From f84dbb912f344270f31d5cce974f12908a47798d Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 10 Jul 2008 14:48:54 -0700
Subject: [PATCH 001/895] genirq: enable polling for disabled screaming irqs

When we disable a screaming irq we never see it again.  If the irq
line is shared or if the driver half works this is a real pain.  So
periodically poll the handlers for screaming interrupts.

I use a timer instead of the classic irq poll technique of working off
the timer interrupt because when we use the local apic timers
note_interrupt is never called (bug?).  Further on a system with
dynamic ticks the timer interrupt might not even fire unless there is
a timer telling it it needs to.

I forced this case on my test system with an e1000 nic and my ssh
session remained responsive despite the interrupt handler only being
called every 10th of a second.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/irq/spurious.c | 146 ++++++++++++++++++++++++++----------------
 1 file changed, 91 insertions(+), 55 deletions(-)

diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index c66d3f10e853..19fe9d6ebfe8 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -12,83 +12,117 @@
 #include <linux/kallsyms.h>
 #include <linux/interrupt.h>
 #include <linux/moduleparam.h>
+#include <linux/timer.h>
 
 static int irqfixup __read_mostly;
 
+#define POLL_SPURIOUS_IRQ_INTERVAL (HZ/10)
+static void poll_spurious_irqs(unsigned long dummy);
+static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs, 0, 0);
+
 /*
  * Recovery handler for misrouted interrupts.
  */
-static int misrouted_irq(int irq)
+static int try_one_irq(int irq, struct irq_desc *desc)
 {
-	int i;
+	struct irqaction *action;
 	int ok = 0;
 	int work = 0;	/* Did we do work for a real IRQ */
 
-	for (i = 1; i < NR_IRQS; i++) {
-		struct irq_desc *desc = irq_desc + i;
-		struct irqaction *action;
-
-		if (i == irq)	/* Already tried */
-			continue;
-
-		spin_lock(&desc->lock);
-		/* Already running on another processor */
-		if (desc->status & IRQ_INPROGRESS) {
-			/*
-			 * Already running: If it is shared get the other
-			 * CPU to go looking for our mystery interrupt too
-			 */
-			if (desc->action && (desc->action->flags & IRQF_SHARED))
-				desc->status |= IRQ_PENDING;
-			spin_unlock(&desc->lock);
-			continue;
-		}
-		/* Honour the normal IRQ locking */
-		desc->status |= IRQ_INPROGRESS;
-		action = desc->action;
+	spin_lock(&desc->lock);
+	/* Already running on another processor */
+	if (desc->status & IRQ_INPROGRESS) {
+		/*
+		 * Already running: If it is shared get the other
+		 * CPU to go looking for our mystery interrupt too
+		 */
+		if (desc->action && (desc->action->flags & IRQF_SHARED))
+			desc->status |= IRQ_PENDING;
 		spin_unlock(&desc->lock);
+		return ok;
+	}
+	/* Honour the normal IRQ locking */
+	desc->status |= IRQ_INPROGRESS;
+	action = desc->action;
+	spin_unlock(&desc->lock);
 
-		while (action) {
-			/* Only shared IRQ handlers are safe to call */
-			if (action->flags & IRQF_SHARED) {
-				if (action->handler(i, action->dev_id) ==
-						IRQ_HANDLED)
-					ok = 1;
-			}
-			action = action->next;
+	while (action) {
+		/* Only shared IRQ handlers are safe to call */
+		if (action->flags & IRQF_SHARED) {
+			if (action->handler(irq, action->dev_id) ==
+				IRQ_HANDLED)
+				ok = 1;
 		}
-		local_irq_disable();
-		/* Now clean up the flags */
-		spin_lock(&desc->lock);
-		action = desc->action;
+		action = action->next;
+	}
+	local_irq_disable();
+	/* Now clean up the flags */
+	spin_lock(&desc->lock);
+	action = desc->action;
 
+	/*
+	 * While we were looking for a fixup someone queued a real
+	 * IRQ clashing with our walk:
+	 */
+	while ((desc->status & IRQ_PENDING) && action) {
 		/*
-		 * While we were looking for a fixup someone queued a real
-		 * IRQ clashing with our walk:
-		 */
-		while ((desc->status & IRQ_PENDING) && action) {
-			/*
-			 * Perform real IRQ processing for the IRQ we deferred
-			 */
-			work = 1;
-			spin_unlock(&desc->lock);
-			handle_IRQ_event(i, action);
-			spin_lock(&desc->lock);
-			desc->status &= ~IRQ_PENDING;
-		}
-		desc->status &= ~IRQ_INPROGRESS;
-		/*
-		 * If we did actual work for the real IRQ line we must let the
-		 * IRQ controller clean up too
+		 * Perform real IRQ processing for the IRQ we deferred
 		 */
-		if (work && desc->chip && desc->chip->end)
-			desc->chip->end(i);
+		work = 1;
 		spin_unlock(&desc->lock);
+		handle_IRQ_event(irq, action);
+		spin_lock(&desc->lock);
+		desc->status &= ~IRQ_PENDING;
+	}
+	desc->status &= ~IRQ_INPROGRESS;
+	/*
+	 * If we did actual work for the real IRQ line we must let the
+	 * IRQ controller clean up too
+	 */
+	if (work && desc->chip && desc->chip->end)
+		desc->chip->end(irq);
+	spin_unlock(&desc->lock);
+
+	return ok;
+}
+
+static int misrouted_irq(int irq)
+{
+	int i;
+	int ok = 0;
+
+	for (i = 1; i < NR_IRQS; i++) {
+		struct irq_desc *desc = irq_desc + i;
+
+		if (i == irq)	/* Already tried */
+			continue;
+
+		if (try_one_irq(i, desc))
+			ok = 1;
 	}
 	/* So the caller can adjust the irq error counts */
 	return ok;
 }
 
+static void poll_spurious_irqs(unsigned long dummy)
+{
+	int i;
+	for (i = 1; i < NR_IRQS; i++) {
+		struct irq_desc *desc = irq_desc + i;
+		unsigned int status;
+
+		/* Racy but it doesn't matter */
+		status = desc->status;
+		barrier();
+		if (!(status & IRQ_SPURIOUS_DISABLED))
+			continue;
+
+		try_one_irq(i, desc);
+	}
+
+	mod_timer(&poll_spurious_irq_timer, jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
+}
+
 /*
  * If 99,900 of the previous 100,000 interrupts have not been handled
  * then assume that the IRQ is stuck in some manner. Drop a diagnostic
@@ -212,6 +246,8 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc,
 		desc->status |= IRQ_DISABLED | IRQ_SPURIOUS_DISABLED;
 		desc->depth++;
 		desc->chip->disable(irq);
+
+		mod_timer(&poll_spurious_irq_timer, jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
 	}
 	desc->irqs_unhandled = 0;
 }
-- 
GitLab


From 8d00a6c8f6b08e7167bc03bf955cdc7e47c5132e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 22 Jul 2008 08:39:57 +0200
Subject: [PATCH 002/895] genirq: remove last NO_IDLE_HZ leftovers

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h | 4 ----
 kernel/irq/handle.c | 2 --
 2 files changed, 6 deletions(-)

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 8ccb462ea42c..f3047df2d23c 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -197,10 +197,6 @@ extern int setup_irq(unsigned int irq, struct irqaction *new);
 
 #ifdef CONFIG_GENERIC_HARDIRQS
 
-#ifndef handle_dynamic_tick
-# define handle_dynamic_tick(a)		do { } while (0)
-#endif
-
 #ifdef CONFIG_SMP
 
 #if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 5fa6198e9139..f4c8a03a9fbb 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -131,8 +131,6 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
 	irqreturn_t ret, retval = IRQ_NONE;
 	unsigned int status = 0;
 
-	handle_dynamic_tick(action);
-
 	if (!(action->flags & IRQF_DISABLED))
 		local_irq_enable_in_hardirq();
 
-- 
GitLab


From 34a82443b79dcda4304b229d555586296da40c16 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Wed, 30 Jul 2008 12:35:05 -0700
Subject: [PATCH 003/895] [MTD] dataflash OTP support

Now that we can tell when we have one of the newer DataFlash chips,
optionally expose the 128 bytes of OTP memory they provide.  Tested
on at45db642 revision B and D chips.

Switch mtdchar over to a generic HAVE_MTD_OTP flag instead of adding
another #ifdef for each type of chip whose driver has OTP support.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Cc: Bryan Wu <cooloney@kernel.org>
Cc: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/Kconfig                 |   5 +
 drivers/mtd/chips/Kconfig           |   1 +
 drivers/mtd/devices/Kconfig         |  11 ++
 drivers/mtd/devices/mtd_dataflash.c | 206 +++++++++++++++++++++++++++-
 drivers/mtd/mtdchar.c               |   4 +-
 drivers/mtd/onenand/Kconfig         |   1 +
 6 files changed, 219 insertions(+), 9 deletions(-)

diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index 14f11f8b9e5f..a90d50c2c3e5 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -172,6 +172,11 @@ config MTD_CHAR
 	  memory chips, and also use ioctl() to obtain information about
 	  the device, or to erase parts of it.
 
+config HAVE_MTD_OTP
+	bool
+	help
+	  Enable access to OTP regions using MTD_CHAR.
+
 config MTD_BLKDEVS
 	tristate "Common interface to block layer for MTD 'translation layers'"
 	depends on BLOCK
diff --git a/drivers/mtd/chips/Kconfig b/drivers/mtd/chips/Kconfig
index 479d32b57a1e..4c35e5d77f95 100644
--- a/drivers/mtd/chips/Kconfig
+++ b/drivers/mtd/chips/Kconfig
@@ -154,6 +154,7 @@ config MTD_CFI_I8
 config MTD_OTP
 	bool "Protection Registers aka one-time programmable (OTP) bits"
 	depends on MTD_CFI_ADV_OPTIONS
+	select HAVE_MTD_OTP
 	default n
 	help
 	  This enables support for reading, writing and locking so called
diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig
index 9c613f06623c..88f4df047464 100644
--- a/drivers/mtd/devices/Kconfig
+++ b/drivers/mtd/devices/Kconfig
@@ -59,6 +59,17 @@ config MTD_DATAFLASH
 	  Sometimes DataFlash chips are packaged inside MMC-format
 	  cards; at this writing, the MMC stack won't handle those.
 
+config MTD_DATAFLASH_OTP
+	bool "DataFlash OTP support (Security Register)"
+	depends on MTD_DATAFLASH
+	select HAVE_MTD_OTP
+	help
+	  Newer DataFlash chips (revisions C and D) support 128 bytes of
+	  one-time-programmable (OTP) data.  The first half may be written
+	  (once) with up to 64 bytes of data, such as a serial number or
+	  other key product data.  The second half is programmed with a
+	  unique-to-each-chip bit pattern at the factory.
+
 config MTD_M25P80
 	tristate "Support most SPI Flash chips (AT26DF, M25P, W25X, ...)"
 	depends on SPI_MASTER && EXPERIMENTAL
diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c
index 8bd0dea6885f..17c9b20dca87 100644
--- a/drivers/mtd/devices/mtd_dataflash.c
+++ b/drivers/mtd/devices/mtd_dataflash.c
@@ -80,7 +80,8 @@
  */
 #define OP_READ_ID		0x9F
 #define OP_READ_SECURITY	0x77
-#define OP_WRITE_SECURITY	0x9A	/* OTP bits */
+#define OP_WRITE_SECURITY_REVC	0x9A
+#define OP_WRITE_SECURITY	0x9B	/* revision D */
 
 
 struct dataflash {
@@ -451,16 +452,192 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
 
 /* ......................................................................... */
 
+#ifdef CONFIG_MTD_DATAFLASH_OTP
+
+static int dataflash_get_otp_info(struct mtd_info *mtd,
+		struct otp_info *info, size_t len)
+{
+	/* Report both blocks as identical:  bytes 0..64, locked.
+	 * Unless the user block changed from all-ones, we can't
+	 * tell whether it's still writable; so we assume it isn't.
+	 */
+	info->start = 0;
+	info->length = 64;
+	info->locked = 1;
+	return sizeof(*info);
+}
+
+static ssize_t otp_read(struct spi_device *spi, unsigned base,
+		uint8_t *buf, loff_t off, size_t len)
+{
+	struct spi_message	m;
+	size_t			l;
+	uint8_t			*scratch;
+	struct spi_transfer	t;
+	int			status;
+
+	if (off > 64)
+		return -EINVAL;
+
+	if ((off + len) > 64)
+		len = 64 - off;
+	if (len == 0)
+		return len;
+
+	spi_message_init(&m);
+
+	l = 4 + base + off + len;
+	scratch = kzalloc(l, GFP_KERNEL);
+	if (!scratch)
+		return -ENOMEM;
+
+	/* OUT: OP_READ_SECURITY, 3 don't-care bytes, zeroes
+	 * IN:  ignore 4 bytes, data bytes 0..N (max 127)
+	 */
+	scratch[0] = OP_READ_SECURITY;
+
+	memset(&t, 0, sizeof t);
+	t.tx_buf = scratch;
+	t.rx_buf = scratch;
+	t.len = l;
+	spi_message_add_tail(&t, &m);
+
+	dataflash_waitready(spi);
+
+	status = spi_sync(spi, &m);
+	if (status >= 0) {
+		memcpy(buf, scratch + 4 + base + off, len);
+		status = len;
+	}
+
+	kfree(scratch);
+	return status;
+}
+
+static int dataflash_read_fact_otp(struct mtd_info *mtd,
+		loff_t from, size_t len, size_t *retlen, u_char *buf)
+{
+	struct dataflash	*priv = (struct dataflash *)mtd->priv;
+	int			status;
+
+	/* 64 bytes, from 0..63 ... start at 64 on-chip */
+	mutex_lock(&priv->lock);
+	status = otp_read(priv->spi, 64, buf, from, len);
+	mutex_unlock(&priv->lock);
+
+	if (status < 0)
+		return status;
+	*retlen = status;
+	return 0;
+}
+
+static int dataflash_read_user_otp(struct mtd_info *mtd,
+		loff_t from, size_t len, size_t *retlen, u_char *buf)
+{
+	struct dataflash	*priv = (struct dataflash *)mtd->priv;
+	int			status;
+
+	/* 64 bytes, from 0..63 ... start at 0 on-chip */
+	mutex_lock(&priv->lock);
+	status = otp_read(priv->spi, 0, buf, from, len);
+	mutex_unlock(&priv->lock);
+
+	if (status < 0)
+		return status;
+	*retlen = status;
+	return 0;
+}
+
+static int dataflash_write_user_otp(struct mtd_info *mtd,
+		loff_t from, size_t len, size_t *retlen, u_char *buf)
+{
+	struct spi_message	m;
+	const size_t		l = 4 + 64;
+	uint8_t			*scratch;
+	struct spi_transfer	t;
+	struct dataflash	*priv = (struct dataflash *)mtd->priv;
+	int			status;
+
+	if (len > 64)
+		return -EINVAL;
+
+	/* Strictly speaking, we *could* truncate the write ... but
+	 * let's not do that for the only write that's ever possible.
+	 */
+	if ((from + len) > 64)
+		return -EINVAL;
+
+	/* OUT: OP_WRITE_SECURITY, 3 zeroes, 64 data-or-zero bytes
+	 * IN:  ignore all
+	 */
+	scratch = kzalloc(l, GFP_KERNEL);
+	if (!scratch)
+		return -ENOMEM;
+	scratch[0] = OP_WRITE_SECURITY;
+	memcpy(scratch + 4 + from, buf, len);
+
+	spi_message_init(&m);
+
+	memset(&t, 0, sizeof t);
+	t.tx_buf = scratch;
+	t.len = l;
+	spi_message_add_tail(&t, &m);
+
+	/* Write the OTP bits, if they've not yet been written.
+	 * This modifies SRAM buffer1.
+	 */
+	mutex_lock(&priv->lock);
+	dataflash_waitready(priv->spi);
+	status = spi_sync(priv->spi, &m);
+	mutex_unlock(&priv->lock);
+
+	kfree(scratch);
+
+	if (status >= 0) {
+		status = 0;
+		*retlen = len;
+	}
+	return status;
+}
+
+static char *otp_setup(struct mtd_info *device, char revision)
+{
+	device->get_fact_prot_info = dataflash_get_otp_info;
+	device->read_fact_prot_reg = dataflash_read_fact_otp;
+	device->get_user_prot_info = dataflash_get_otp_info;
+	device->read_user_prot_reg = dataflash_read_user_otp;
+
+	/* rev c parts (at45db321c and at45db1281 only!) use a
+	 * different write procedure; not (yet?) implemented.
+	 */
+	if (revision > 'c')
+		device->write_user_prot_reg = dataflash_write_user_otp;
+
+	return ", OTP";
+}
+
+#else
+
+static char *otp_setup(struct mtd_info *device)
+{
+	return " (OTP)";
+}
+
+#endif
+
+/* ......................................................................... */
+
 /*
  * Register DataFlash device with MTD subsystem.
  */
 static int __devinit
-add_dataflash(struct spi_device *spi, char *name,
-		int nr_pages, int pagesize, int pageoffset)
+add_dataflash_otp(struct spi_device *spi, char *name,
+		int nr_pages, int pagesize, int pageoffset, char revision)
 {
 	struct dataflash		*priv;
 	struct mtd_info			*device;
 	struct flash_platform_data	*pdata = spi->dev.platform_data;
+	char				*otp_tag = "";
 
 	priv = kzalloc(sizeof *priv, GFP_KERNEL);
 	if (!priv)
@@ -489,8 +666,12 @@ add_dataflash(struct spi_device *spi, char *name,
 	device->write = dataflash_write;
 	device->priv = priv;
 
-	dev_info(&spi->dev, "%s (%d KBytes) pagesize %d bytes\n",
-			name, DIV_ROUND_UP(device->size, 1024), pagesize);
+	if (revision >= 'c')
+		otp_tag = otp_setup(device, revision);
+
+	dev_info(&spi->dev, "%s (%d KBytes) pagesize %d bytes%s\n",
+			name, DIV_ROUND_UP(device->size, 1024),
+			pagesize, otp_tag);
 	dev_set_drvdata(&spi->dev, priv);
 
 	if (mtd_has_partitions()) {
@@ -519,6 +700,14 @@ add_dataflash(struct spi_device *spi, char *name,
 	return add_mtd_device(device) == 1 ? -ENODEV : 0;
 }
 
+static inline int __devinit
+add_dataflash(struct spi_device *spi, char *name,
+		int nr_pages, int pagesize, int pageoffset)
+{
+	return add_dataflash_otp(spi, name, nr_pages, pagesize,
+			pageoffset, 0);
+}
+
 struct flash_info {
 	char		*name;
 
@@ -664,13 +853,16 @@ static int __devinit dataflash_probe(struct spi_device *spi)
 	 * Try to detect dataflash by JEDEC ID.
 	 * If it succeeds we know we have either a C or D part.
 	 * D will support power of 2 pagesize option.
+	 * Both support the security register, though with different
+	 * write procedures.
 	 */
 	info = jedec_probe(spi);
 	if (IS_ERR(info))
 		return PTR_ERR(info);
 	if (info != NULL)
-		return add_dataflash(spi, info->name, info->nr_pages,
-				 info->pagesize, info->pageoffset);
+		return add_dataflash_otp(spi, info->name, info->nr_pages,
+				info->pagesize, info->pageoffset,
+				(info->flags & SUP_POW2PS) ? 'd' : 'c');
 
 	/*
 	 * Older chips support only legacy commands, identifing
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index d2f331876e4c..13cc67ad272a 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -350,7 +350,7 @@ static void mtdchar_erase_callback (struct erase_info *instr)
 	wake_up((wait_queue_head_t *)instr->priv);
 }
 
-#if defined(CONFIG_MTD_OTP) || defined(CONFIG_MTD_ONENAND_OTP)
+#ifdef CONFIG_HAVE_MTD_OTP
 static int otp_select_filemode(struct mtd_file_info *mfi, int mode)
 {
 	struct mtd_info *mtd = mfi->mtd;
@@ -663,7 +663,7 @@ static int mtd_ioctl(struct inode *inode, struct file *file,
 		break;
 	}
 
-#if defined(CONFIG_MTD_OTP) || defined(CONFIG_MTD_ONENAND_OTP)
+#ifdef CONFIG_HAVE_MTD_OTP
 	case OTPSELECT:
 	{
 		int mode;
diff --git a/drivers/mtd/onenand/Kconfig b/drivers/mtd/onenand/Kconfig
index cb41cbca64f7..b94a61b670d1 100644
--- a/drivers/mtd/onenand/Kconfig
+++ b/drivers/mtd/onenand/Kconfig
@@ -29,6 +29,7 @@ config MTD_ONENAND_GENERIC
 
 config MTD_ONENAND_OTP
 	bool "OneNAND OTP Support"
+	select HAVE_MTD_OTP
 	help
 	  One Block of the NAND Flash Array memory is reserved as
 	  a One-Time Programmable Block memory area.
-- 
GitLab


From feb2f55db45919aa80731f8877b60cab454b7b94 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <ext-adrian.hunter@nokia.com>
Date: Fri, 1 Aug 2008 11:53:29 +0300
Subject: [PATCH 004/895] [MTD] [OneNAND] Add defines for HF and sync write

Signed-off-by: Adrian Hunter <ext-adrian.hunter@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/onenand_regs.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/mtd/onenand_regs.h b/include/linux/mtd/onenand_regs.h
index d1b310c92eb4..0c6bbe28f38c 100644
--- a/include/linux/mtd/onenand_regs.h
+++ b/include/linux/mtd/onenand_regs.h
@@ -152,6 +152,8 @@
 #define ONENAND_SYS_CFG1_INT		(1 << 6)
 #define ONENAND_SYS_CFG1_IOBE		(1 << 5)
 #define ONENAND_SYS_CFG1_RDY_CONF	(1 << 4)
+#define ONENAND_SYS_CFG1_HF		(1 << 2)
+#define ONENAND_SYS_CFG1_SYNC_WRITE	(1 << 1)
 
 /*
  * Controller Status Register F240h (R)
-- 
GitLab


From c4308d1076830a72e05eb3e5f58b9ed851229399 Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@kernel.crashing.org>
Date: Fri, 1 Aug 2008 11:44:20 -0500
Subject: [PATCH 005/895] [MTD] remove code associated with !CONFIG_PPC_MERGE

Now that arch/ppc is gone we don't need CONFIG_PPC_MERGE anymore
remove the dead code associated with !CONFIG_PPC_MERGE.

The mtd maps should be using the OF based mechanism.

Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
Acked-by: Josh Boyer <jwboyer@linux.vnet.ibm.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/maps/Kconfig  |  24 ------
 drivers/mtd/maps/Makefile |   3 -
 drivers/mtd/maps/ebony.c  | 163 --------------------------------------
 drivers/mtd/maps/ocotea.c | 154 -----------------------------------
 drivers/mtd/maps/walnut.c | 122 ----------------------------
 5 files changed, 466 deletions(-)
 delete mode 100644 drivers/mtd/maps/ebony.c
 delete mode 100644 drivers/mtd/maps/ocotea.c
 delete mode 100644 drivers/mtd/maps/walnut.c

diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index df8e00bba07b..db667b16c043 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig
@@ -332,30 +332,6 @@ config MTD_CFI_FLAGADM
 	  Mapping for the Flaga digital module. If you don't have one, ignore
 	  this setting.
 
-config MTD_WALNUT
-	tristate "Flash device mapped on IBM 405GP Walnut"
-	depends on MTD_JEDECPROBE && WALNUT && !PPC_MERGE
-	help
-	  This enables access routines for the flash chips on the IBM 405GP
-	  Walnut board. If you have one of these boards and would like to
-	  use the flash chips on it, say 'Y'.
-
-config MTD_EBONY
-	tristate "Flash devices mapped on IBM 440GP Ebony"
-	depends on MTD_JEDECPROBE && EBONY && !PPC_MERGE
-	help
-	  This enables access routines for the flash chips on the IBM 440GP
-	  Ebony board. If you have one of these boards and would like to
-	  use the flash chips on it, say 'Y'.
-
-config MTD_OCOTEA
-	tristate "Flash devices mapped on IBM 440GX Ocotea"
-	depends on MTD_CFI && OCOTEA && !PPC_MERGE
-	help
-	  This enables access routines for the flash chips on the IBM 440GX
-	  Ocotea board. If you have one of these boards and would like to
-	  use the flash chips on it, say 'Y'.
-
 config MTD_REDWOOD
 	tristate "CFI Flash devices mapped on IBM Redwood"
 	depends on MTD_CFI && ( REDWOOD_4 || REDWOOD_5 || REDWOOD_6 )
diff --git a/drivers/mtd/maps/Makefile b/drivers/mtd/maps/Makefile
index 6cda6df973e5..b2582506cde9 100644
--- a/drivers/mtd/maps/Makefile
+++ b/drivers/mtd/maps/Makefile
@@ -50,9 +50,6 @@ obj-$(CONFIG_MTD_REDWOOD)	+= redwood.o
 obj-$(CONFIG_MTD_UCLINUX)	+= uclinux.o
 obj-$(CONFIG_MTD_NETtel)	+= nettel.o
 obj-$(CONFIG_MTD_SCB2_FLASH)	+= scb2_flash.o
-obj-$(CONFIG_MTD_EBONY)		+= ebony.o
-obj-$(CONFIG_MTD_OCOTEA)	+= ocotea.o
-obj-$(CONFIG_MTD_WALNUT)        += walnut.o
 obj-$(CONFIG_MTD_H720X)		+= h720x-flash.o
 obj-$(CONFIG_MTD_SBC8240)	+= sbc8240.o
 obj-$(CONFIG_MTD_NOR_TOTO)	+= omap-toto-flash.o
diff --git a/drivers/mtd/maps/ebony.c b/drivers/mtd/maps/ebony.c
deleted file mode 100644
index d92b7c70d3ed..000000000000
--- a/drivers/mtd/maps/ebony.c
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Mapping for Ebony user flash
- *
- * Matt Porter <mporter@kernel.crashing.org>
- *
- * Copyright 2002-2004 MontaVista Software Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/map.h>
-#include <linux/mtd/partitions.h>
-#include <asm/io.h>
-#include <asm/ibm44x.h>
-#include <platforms/4xx/ebony.h>
-
-static struct mtd_info *flash;
-
-static struct map_info ebony_small_map = {
-	.name =		"Ebony small flash",
-	.size =		EBONY_SMALL_FLASH_SIZE,
-	.bankwidth =	1,
-};
-
-static struct map_info ebony_large_map = {
-	.name =		"Ebony large flash",
-	.size =		EBONY_LARGE_FLASH_SIZE,
-	.bankwidth =	1,
-};
-
-static struct mtd_partition ebony_small_partitions[] = {
-	{
-		.name =   "OpenBIOS",
-		.offset = 0x0,
-		.size =   0x80000,
-	}
-};
-
-static struct mtd_partition ebony_large_partitions[] = {
-	{
-		.name =   "fs",
-		.offset = 0,
-		.size =   0x380000,
-	},
-	{
-		.name =   "firmware",
-		.offset = 0x380000,
-		.size =   0x80000,
-	}
-};
-
-int __init init_ebony(void)
-{
-	u8 fpga0_reg;
-	u8 __iomem *fpga0_adr;
-	unsigned long long small_flash_base, large_flash_base;
-
-	fpga0_adr = ioremap64(EBONY_FPGA_ADDR, 16);
-	if (!fpga0_adr)
-		return -ENOMEM;
-
-	fpga0_reg = readb(fpga0_adr);
-	iounmap(fpga0_adr);
-
-	if (EBONY_BOOT_SMALL_FLASH(fpga0_reg) &&
-			!EBONY_FLASH_SEL(fpga0_reg))
-		small_flash_base = EBONY_SMALL_FLASH_HIGH2;
-	else if (EBONY_BOOT_SMALL_FLASH(fpga0_reg) &&
-			EBONY_FLASH_SEL(fpga0_reg))
-		small_flash_base = EBONY_SMALL_FLASH_HIGH1;
-	else if (!EBONY_BOOT_SMALL_FLASH(fpga0_reg) &&
-			!EBONY_FLASH_SEL(fpga0_reg))
-		small_flash_base = EBONY_SMALL_FLASH_LOW2;
-	else
-		small_flash_base = EBONY_SMALL_FLASH_LOW1;
-
-	if (EBONY_BOOT_SMALL_FLASH(fpga0_reg) &&
-			!EBONY_ONBRD_FLASH_EN(fpga0_reg))
-		large_flash_base = EBONY_LARGE_FLASH_LOW;
-	else
-		large_flash_base = EBONY_LARGE_FLASH_HIGH;
-
-	ebony_small_map.phys = small_flash_base;
-	ebony_small_map.virt = ioremap64(small_flash_base,
-					 ebony_small_map.size);
-
-	if (!ebony_small_map.virt) {
-		printk("Failed to ioremap flash\n");
-		return -EIO;
-	}
-
-	simple_map_init(&ebony_small_map);
-
-	flash = do_map_probe("jedec_probe", &ebony_small_map);
-	if (flash) {
-		flash->owner = THIS_MODULE;
-		add_mtd_partitions(flash, ebony_small_partitions,
-					ARRAY_SIZE(ebony_small_partitions));
-	} else {
-		printk("map probe failed for flash\n");
-		iounmap(ebony_small_map.virt);
-		return -ENXIO;
-	}
-
-	ebony_large_map.phys = large_flash_base;
-	ebony_large_map.virt = ioremap64(large_flash_base,
-					 ebony_large_map.size);
-
-	if (!ebony_large_map.virt) {
-		printk("Failed to ioremap flash\n");
-		iounmap(ebony_small_map.virt);
-		return -EIO;
-	}
-
-	simple_map_init(&ebony_large_map);
-
-	flash = do_map_probe("jedec_probe", &ebony_large_map);
-	if (flash) {
-		flash->owner = THIS_MODULE;
-		add_mtd_partitions(flash, ebony_large_partitions,
-					ARRAY_SIZE(ebony_large_partitions));
-	} else {
-		printk("map probe failed for flash\n");
-		iounmap(ebony_small_map.virt);
-		iounmap(ebony_large_map.virt);
-		return -ENXIO;
-	}
-
-	return 0;
-}
-
-static void __exit cleanup_ebony(void)
-{
-	if (flash) {
-		del_mtd_partitions(flash);
-		map_destroy(flash);
-	}
-
-	if (ebony_small_map.virt) {
-		iounmap(ebony_small_map.virt);
-		ebony_small_map.virt = NULL;
-	}
-
-	if (ebony_large_map.virt) {
-		iounmap(ebony_large_map.virt);
-		ebony_large_map.virt = NULL;
-	}
-}
-
-module_init(init_ebony);
-module_exit(cleanup_ebony);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Matt Porter <mporter@kernel.crashing.org>");
-MODULE_DESCRIPTION("MTD map and partitions for IBM 440GP Ebony boards");
diff --git a/drivers/mtd/maps/ocotea.c b/drivers/mtd/maps/ocotea.c
deleted file mode 100644
index 5522eac8c980..000000000000
--- a/drivers/mtd/maps/ocotea.c
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * Mapping for Ocotea user flash
- *
- * Matt Porter <mporter@kernel.crashing.org>
- *
- * Copyright 2002-2004 MontaVista Software Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/map.h>
-#include <linux/mtd/partitions.h>
-#include <asm/io.h>
-#include <asm/ibm44x.h>
-#include <platforms/4xx/ocotea.h>
-
-static struct mtd_info *flash;
-
-static struct map_info ocotea_small_map = {
-	.name =		"Ocotea small flash",
-	.size =		OCOTEA_SMALL_FLASH_SIZE,
-	.buswidth =	1,
-};
-
-static struct map_info ocotea_large_map = {
-	.name =		"Ocotea large flash",
-	.size =		OCOTEA_LARGE_FLASH_SIZE,
-	.buswidth =	1,
-};
-
-static struct mtd_partition ocotea_small_partitions[] = {
-	{
-		.name =   "pibs",
-		.offset = 0x0,
-		.size =   0x100000,
-	}
-};
-
-static struct mtd_partition ocotea_large_partitions[] = {
-	{
-		.name =   "fs",
-		.offset = 0,
-		.size =   0x300000,
-	},
-	{
-		.name =   "firmware",
-		.offset = 0x300000,
-		.size =   0x100000,
-	}
-};
-
-int __init init_ocotea(void)
-{
-	u8 fpga0_reg;
-	u8 *fpga0_adr;
-	unsigned long long small_flash_base, large_flash_base;
-
-	fpga0_adr = ioremap64(OCOTEA_FPGA_ADDR, 16);
-	if (!fpga0_adr)
-		return -ENOMEM;
-
-	fpga0_reg = readb((unsigned long)fpga0_adr);
-	iounmap(fpga0_adr);
-
-	if (OCOTEA_BOOT_LARGE_FLASH(fpga0_reg)) {
-		small_flash_base = OCOTEA_SMALL_FLASH_HIGH;
-		large_flash_base = OCOTEA_LARGE_FLASH_LOW;
-	}
-	else {
-		small_flash_base = OCOTEA_SMALL_FLASH_LOW;
-		large_flash_base = OCOTEA_LARGE_FLASH_HIGH;
-	}
-
-	ocotea_small_map.phys = small_flash_base;
-	ocotea_small_map.virt = ioremap64(small_flash_base,
-					 ocotea_small_map.size);
-
-	if (!ocotea_small_map.virt) {
-		printk("Failed to ioremap flash\n");
-		return -EIO;
-	}
-
-	simple_map_init(&ocotea_small_map);
-
-	flash = do_map_probe("map_rom", &ocotea_small_map);
-	if (flash) {
-		flash->owner = THIS_MODULE;
-		add_mtd_partitions(flash, ocotea_small_partitions,
-					ARRAY_SIZE(ocotea_small_partitions));
-	} else {
-		printk("map probe failed for flash\n");
-		iounmap(ocotea_small_map.virt);
-		return -ENXIO;
-	}
-
-	ocotea_large_map.phys = large_flash_base;
-	ocotea_large_map.virt = ioremap64(large_flash_base,
-					 ocotea_large_map.size);
-
-	if (!ocotea_large_map.virt) {
-		printk("Failed to ioremap flash\n");
-		iounmap(ocotea_small_map.virt);
-		return -EIO;
-	}
-
-	simple_map_init(&ocotea_large_map);
-
-	flash = do_map_probe("cfi_probe", &ocotea_large_map);
-	if (flash) {
-		flash->owner = THIS_MODULE;
-		add_mtd_partitions(flash, ocotea_large_partitions,
-					ARRAY_SIZE(ocotea_large_partitions));
-	} else {
-		printk("map probe failed for flash\n");
-		iounmap(ocotea_small_map.virt);
-		iounmap(ocotea_large_map.virt);
-		return -ENXIO;
-	}
-
-	return 0;
-}
-
-static void __exit cleanup_ocotea(void)
-{
-	if (flash) {
-		del_mtd_partitions(flash);
-		map_destroy(flash);
-	}
-
-	if (ocotea_small_map.virt) {
-		iounmap((void *)ocotea_small_map.virt);
-		ocotea_small_map.virt = 0;
-	}
-
-	if (ocotea_large_map.virt) {
-		iounmap((void *)ocotea_large_map.virt);
-		ocotea_large_map.virt = 0;
-	}
-}
-
-module_init(init_ocotea);
-module_exit(cleanup_ocotea);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Matt Porter <mporter@kernel.crashing.org>");
-MODULE_DESCRIPTION("MTD map and partitions for IBM 440GX Ocotea boards");
diff --git a/drivers/mtd/maps/walnut.c b/drivers/mtd/maps/walnut.c
deleted file mode 100644
index e243476c8171..000000000000
--- a/drivers/mtd/maps/walnut.c
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Mapping for Walnut flash
- * (used ebony.c as a "framework")
- *
- * Heikki Lindholm <holindho@infradead.org>
- *
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/map.h>
-#include <linux/mtd/partitions.h>
-#include <asm/io.h>
-#include <asm/ibm4xx.h>
-#include <platforms/4xx/walnut.h>
-
-/* these should be in platforms/4xx/walnut.h ? */
-#define WALNUT_FLASH_ONBD_N(x)		(x & 0x02)
-#define WALNUT_FLASH_SRAM_SEL(x)	(x & 0x01)
-#define WALNUT_FLASH_LOW		0xFFF00000
-#define WALNUT_FLASH_HIGH		0xFFF80000
-#define WALNUT_FLASH_SIZE		0x80000
-
-static struct mtd_info *flash;
-
-static struct map_info walnut_map = {
-	.name =		"Walnut flash",
-	.size =		WALNUT_FLASH_SIZE,
-	.bankwidth =	1,
-};
-
-/* Actually, OpenBIOS is the last 128 KiB of the flash - better
- * partitioning could be made */
-static struct mtd_partition walnut_partitions[] = {
-	{
-		.name =   "OpenBIOS",
-		.offset = 0x0,
-		.size =   WALNUT_FLASH_SIZE,
-		/*.mask_flags = MTD_WRITEABLE, */ /* force read-only */
-	}
-};
-
-int __init init_walnut(void)
-{
-	u8 fpga_brds1;
-	void *fpga_brds1_adr;
-	void *fpga_status_adr;
-	unsigned long flash_base;
-
-	/* this should already be mapped (platform/4xx/walnut.c) */
-	fpga_status_adr = ioremap(WALNUT_FPGA_BASE, 8);
-	if (!fpga_status_adr)
-		return -ENOMEM;
-
-	fpga_brds1_adr = fpga_status_adr+5;
-	fpga_brds1 = readb(fpga_brds1_adr);
-	/* iounmap(fpga_status_adr); */
-
-	if (WALNUT_FLASH_ONBD_N(fpga_brds1)) {
-		printk("The on-board flash is disabled (U79 sw 5)!");
-		iounmap(fpga_status_adr);
-		return -EIO;
-	}
-	if (WALNUT_FLASH_SRAM_SEL(fpga_brds1))
-		flash_base = WALNUT_FLASH_LOW;
-	else
-		flash_base = WALNUT_FLASH_HIGH;
-
-	walnut_map.phys = flash_base;
-	walnut_map.virt =
-		(void __iomem *)ioremap(flash_base, walnut_map.size);
-
-	if (!walnut_map.virt) {
-		printk("Failed to ioremap flash.\n");
-		iounmap(fpga_status_adr);
-		return -EIO;
-	}
-
-	simple_map_init(&walnut_map);
-
-	flash = do_map_probe("jedec_probe", &walnut_map);
-	if (flash) {
-		flash->owner = THIS_MODULE;
-		add_mtd_partitions(flash, walnut_partitions,
-					ARRAY_SIZE(walnut_partitions));
-	} else {
-		printk("map probe failed for flash\n");
-		iounmap(fpga_status_adr);
-		return -ENXIO;
-	}
-
-	iounmap(fpga_status_adr);
-	return 0;
-}
-
-static void __exit cleanup_walnut(void)
-{
-	if (flash) {
-		del_mtd_partitions(flash);
-		map_destroy(flash);
-	}
-
-	if (walnut_map.virt) {
-		iounmap((void *)walnut_map.virt);
-		walnut_map.virt = 0;
-	}
-}
-
-module_init(init_walnut);
-module_exit(cleanup_walnut);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Heikki Lindholm <holindho@infradead.org>");
-MODULE_DESCRIPTION("MTD map and partitions for IBM 405GP Walnut boards");
-- 
GitLab


From c8872b069c536976b81bccfc95dda945594bc504 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Sat, 2 Aug 2008 17:14:21 +0200
Subject: [PATCH 006/895] [MTD] Use DIV_ROUND_UP

The kernel.h macro DIV_ROUND_UP performs the computation (((n) + (d) - 1) /
(d)) but is perhaps more readable.

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/chips/cfi_cmdset_0001.c | 2 +-
 drivers/mtd/chips/gen_probe.c       | 2 +-
 drivers/mtd/ssfdc.c                 | 3 +--
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c
index 5f1b472137a0..d49cbe2738a8 100644
--- a/drivers/mtd/chips/cfi_cmdset_0001.c
+++ b/drivers/mtd/chips/cfi_cmdset_0001.c
@@ -1640,7 +1640,7 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
 
 	/* Figure out the number of words to write */
 	word_gap = (-adr & (map_bankwidth(map)-1));
-	words = (len - word_gap + map_bankwidth(map) - 1) / map_bankwidth(map);
+	words = DIV_ROUND_UP(len - word_gap, map_bankwidth(map));
 	if (!word_gap) {
 		words--;
 	} else {
diff --git a/drivers/mtd/chips/gen_probe.c b/drivers/mtd/chips/gen_probe.c
index f061885b2812..e2dc96441e05 100644
--- a/drivers/mtd/chips/gen_probe.c
+++ b/drivers/mtd/chips/gen_probe.c
@@ -111,7 +111,7 @@ static struct cfi_private *genprobe_ident_chips(struct map_info *map, struct chi
 		max_chips = 1;
 	}
 
-	mapsize = sizeof(long) * ( (max_chips + BITS_PER_LONG-1) / BITS_PER_LONG );
+	mapsize = sizeof(long) * DIV_ROUND_UP(max_chips, BITS_PER_LONG);
 	chip_map = kzalloc(mapsize, GFP_KERNEL);
 	if (!chip_map) {
 		printk(KERN_WARNING "%s: kmalloc failed for CFI chip map\n", map->name);
diff --git a/drivers/mtd/ssfdc.c b/drivers/mtd/ssfdc.c
index a5f3d60047d4..33a5d6ed6f18 100644
--- a/drivers/mtd/ssfdc.c
+++ b/drivers/mtd/ssfdc.c
@@ -321,8 +321,7 @@ static void ssfdcr_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
 	DEBUG(MTD_DEBUG_LEVEL1,
 		"SSFDC_RO: cis_block=%d,erase_size=%d,map_len=%d,n_zones=%d\n",
 		ssfdc->cis_block, ssfdc->erase_size, ssfdc->map_len,
-		(ssfdc->map_len + MAX_PHYS_BLK_PER_ZONE - 1) /
-		MAX_PHYS_BLK_PER_ZONE);
+		DIV_ROUND_UP(ssfdc->map_len, MAX_PHYS_BLK_PER_ZONE));
 
 	/* Set geometry */
 	ssfdc->heads = 16;
-- 
GitLab


From 16e00b609aed439453d57b954b449f647466e0d7 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Mon, 4 Aug 2008 11:25:23 +0100
Subject: [PATCH 007/895] [MTD] Remove references to TI 'toto' platform.

This was a reference board for which support never got merged upstream.
Kill it off, at rmk's suggestion.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/maps/Kconfig           |   7 -
 drivers/mtd/maps/Makefile          |   1 -
 drivers/mtd/maps/omap-toto-flash.c | 133 -------------------
 drivers/mtd/nand/Kconfig           |   6 -
 drivers/mtd/nand/Makefile          |   1 -
 drivers/mtd/nand/toto.c            | 206 -----------------------------
 6 files changed, 354 deletions(-)
 delete mode 100644 drivers/mtd/maps/omap-toto-flash.c
 delete mode 100644 drivers/mtd/nand/toto.c

diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index db667b16c043..3ae76ecc07d7 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig
@@ -434,13 +434,6 @@ config MTD_CEIVA
 	  PhotoMax Digital Picture Frame.
 	  If you have such a device, say 'Y'.
 
-config MTD_NOR_TOTO
-	tristate "NOR Flash device on TOTO board"
-	depends on ARCH_OMAP && OMAP_TOTO
-	help
-	  This enables access to the NOR flash on the Texas Instruments
-	  TOTO board.
-
 config MTD_H720X
 	tristate "Hynix evaluation board mappings"
 	depends on MTD_CFI && ( ARCH_H7201 || ARCH_H7202 )
diff --git a/drivers/mtd/maps/Makefile b/drivers/mtd/maps/Makefile
index b2582506cde9..6d9ba35caf11 100644
--- a/drivers/mtd/maps/Makefile
+++ b/drivers/mtd/maps/Makefile
@@ -52,7 +52,6 @@ obj-$(CONFIG_MTD_NETtel)	+= nettel.o
 obj-$(CONFIG_MTD_SCB2_FLASH)	+= scb2_flash.o
 obj-$(CONFIG_MTD_H720X)		+= h720x-flash.o
 obj-$(CONFIG_MTD_SBC8240)	+= sbc8240.o
-obj-$(CONFIG_MTD_NOR_TOTO)	+= omap-toto-flash.o
 obj-$(CONFIG_MTD_IXP4XX)	+= ixp4xx.o
 obj-$(CONFIG_MTD_IXP2000)	+= ixp2000.o
 obj-$(CONFIG_MTD_WRSBC8260)	+= wr_sbc82xx_flash.o
diff --git a/drivers/mtd/maps/omap-toto-flash.c b/drivers/mtd/maps/omap-toto-flash.c
deleted file mode 100644
index 0a60ebbc2175..000000000000
--- a/drivers/mtd/maps/omap-toto-flash.c
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * NOR Flash memory access on TI Toto board
- *
- * jzhang@ti.com (C) 2003 Texas Instruments.
- *
- *  (C) 2002 MontVista Software, Inc.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/map.h>
-#include <linux/mtd/partitions.h>
-
-#include <asm/hardware.h>
-#include <asm/io.h>
-
-
-#ifndef CONFIG_ARCH_OMAP
-#error This is for OMAP architecture only
-#endif
-
-//these lines need be moved to a hardware header file
-#define OMAP_TOTO_FLASH_BASE 0xd8000000
-#define OMAP_TOTO_FLASH_SIZE 0x80000
-
-static struct map_info omap_toto_map_flash = {
-	.name =		"OMAP Toto flash",
-	.bankwidth =	2,
-	.virt =		(void __iomem *)OMAP_TOTO_FLASH_BASE,
-};
-
-
-static struct mtd_partition toto_flash_partitions[] = {
-	{
-		.name =		"BootLoader",
-		.size =		0x00040000,     /* hopefully u-boot will stay 128k + 128*/
-		.offset =	0,
-		.mask_flags =	MTD_WRITEABLE,  /* force read-only */
-	}, {
-		.name =		"ReservedSpace",
-		.size =		0x00030000,
-		.offset =	MTDPART_OFS_APPEND,
-		//mask_flags:	MTD_WRITEABLE,  /* force read-only */
-	}, {
-		.name =		"EnvArea",      /* bottom 64KiB for env vars */
-		.size =		MTDPART_SIZ_FULL,
-		.offset =	MTDPART_OFS_APPEND,
-	}
-};
-
-static struct mtd_partition *parsed_parts;
-
-static struct mtd_info *flash_mtd;
-
-static int __init init_flash (void)
-{
-
-	struct mtd_partition *parts;
-	int nb_parts = 0;
-	int parsed_nr_parts = 0;
-	const char *part_type;
-
-	/*
-	 * Static partition definition selection
-	 */
-	part_type = "static";
-
- 	parts = toto_flash_partitions;
-	nb_parts = ARRAY_SIZE(toto_flash_partitions);
-	omap_toto_map_flash.size = OMAP_TOTO_FLASH_SIZE;
-	omap_toto_map_flash.phys = virt_to_phys(OMAP_TOTO_FLASH_BASE);
-
-	simple_map_init(&omap_toto_map_flash);
-	/*
-	 * Now let's probe for the actual flash.  Do it here since
-	 * specific machine settings might have been set above.
-	 */
-	printk(KERN_NOTICE "OMAP toto flash: probing %d-bit flash bus\n",
-		omap_toto_map_flash.bankwidth*8);
-	flash_mtd = do_map_probe("jedec_probe", &omap_toto_map_flash);
-	if (!flash_mtd)
-		return -ENXIO;
-
- 	if (parsed_nr_parts > 0) {
-		parts = parsed_parts;
-		nb_parts = parsed_nr_parts;
-	}
-
-	if (nb_parts == 0) {
-		printk(KERN_NOTICE "OMAP toto flash: no partition info available,"
-			"registering whole flash at once\n");
-		if (add_mtd_device(flash_mtd)){
-            return -ENXIO;
-        }
-	} else {
-		printk(KERN_NOTICE "Using %s partition definition\n",
-			part_type);
-		return add_mtd_partitions(flash_mtd, parts, nb_parts);
-	}
-	return 0;
-}
-
-int __init omap_toto_mtd_init(void)
-{
-	int status;
-
- 	if (status = init_flash()) {
-		printk(KERN_ERR "OMAP Toto Flash: unable to init map for toto flash\n");
-	}
-    return status;
-}
-
-static void  __exit omap_toto_mtd_cleanup(void)
-{
-	if (flash_mtd) {
-		del_mtd_partitions(flash_mtd);
-		map_destroy(flash_mtd);
-		kfree(parsed_parts);
-	}
-}
-
-module_init(omap_toto_mtd_init);
-module_exit(omap_toto_mtd_cleanup);
-
-MODULE_AUTHOR("Jian Zhang");
-MODULE_DESCRIPTION("OMAP Toto board map driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 02f9cc30d77b..572c842e9f40 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -68,12 +68,6 @@ config MTD_NAND_AMS_DELTA
 	help
 	  Support for NAND flash on Amstrad E3 (Delta).
 
-config MTD_NAND_TOTO
-	tristate "NAND Flash device on TOTO board"
-	depends on ARCH_OMAP && BROKEN
-	help
-	  Support for NAND flash on Texas Instruments Toto platform.
-
 config MTD_NAND_TS7250
 	tristate "NAND Flash device on TS-7250 board"
 	depends on MACH_TS72XX
diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index d772581de573..b55e4c69fea6 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -8,7 +8,6 @@ obj-$(CONFIG_MTD_NAND_IDS)		+= nand_ids.o
 obj-$(CONFIG_MTD_NAND_CAFE)		+= cafe_nand.o
 obj-$(CONFIG_MTD_NAND_SPIA)		+= spia.o
 obj-$(CONFIG_MTD_NAND_AMS_DELTA)	+= ams-delta.o
-obj-$(CONFIG_MTD_NAND_TOTO)		+= toto.o
 obj-$(CONFIG_MTD_NAND_AUTCPU12)		+= autcpu12.o
 obj-$(CONFIG_MTD_NAND_EDB7312)		+= edb7312.o
 obj-$(CONFIG_MTD_NAND_AU1550)		+= au1550nd.o
diff --git a/drivers/mtd/nand/toto.c b/drivers/mtd/nand/toto.c
deleted file mode 100644
index bbf492e6830d..000000000000
--- a/drivers/mtd/nand/toto.c
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- *  drivers/mtd/nand/toto.c
- *
- *  Copyright (c) 2003 Texas Instruments
- *
- *  Derived from drivers/mtd/autcpu12.c
- *
- *  Copyright (c) 2002 Thomas Gleixner <tgxl@linutronix.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- *  Overview:
- *   This is a device driver for the NAND flash device found on the
- *   TI fido board. It supports 32MiB and 64MiB cards
- */
-
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/delay.h>
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/nand.h>
-#include <linux/mtd/partitions.h>
-#include <asm/io.h>
-#include <asm/arch/hardware.h>
-#include <asm/sizes.h>
-#include <asm/arch/toto.h>
-#include <asm/arch-omap1510/hardware.h>
-#include <asm/arch/gpio.h>
-
-#define CONFIG_NAND_WORKAROUND 1
-
-/*
- * MTD structure for TOTO board
- */
-static struct mtd_info *toto_mtd = NULL;
-
-static unsigned long toto_io_base = OMAP_FLASH_1_BASE;
-
-/*
- * Define partitions for flash devices
- */
-
-static struct mtd_partition partition_info64M[] = {
-	{ .name =	"toto kernel partition 1",
-	  .offset =	0,
-	  .size	=	2 * SZ_1M },
-	{ .name =	"toto file sys partition 2",
-	  .offset =	2 * SZ_1M,
-	  .size =	14 * SZ_1M },
-	{ .name =	"toto user partition 3",
-	  .offset =	16 * SZ_1M,
-	  .size =	16 * SZ_1M },
-	{ .name =	"toto devboard extra partition 4",
-	  .offset =	32 * SZ_1M,
-	  .size =	32 * SZ_1M },
-};
-
-static struct mtd_partition partition_info32M[] = {
-	{ .name =	"toto kernel partition 1",
-	  .offset =	0,
-	  .size =	2 * SZ_1M },
-	{ .name =	"toto file sys partition 2",
-	  .offset =	2 * SZ_1M,
-	  .size =	14 * SZ_1M },
-	{ .name =	"toto user partition 3",
-	  .offset =	16 * SZ_1M,
-	  .size =	16 * SZ_1M },
-};
-
-#define NUM_PARTITIONS32M 3
-#define NUM_PARTITIONS64M 4
-
-/*
- *	hardware specific access to control-lines
- *
- *	ctrl:
- *	NAND_NCE: bit 0 -> bit 14 (0x4000)
- *	NAND_CLE: bit 1 -> bit 12 (0x1000)
- *	NAND_ALE: bit 2 -> bit 1  (0x0002)
- */
-static void toto_hwcontrol(struct mtd_info *mtd, int cmd,
-			   unsigned int ctrl)
-{
-	struct nand_chip *chip = mtd->priv;
-
-	if (ctrl & NAND_CTRL_CHANGE) {
-		unsigned long bits;
-
-		/* hopefully enough time for tc make proceding write to clear */
-		udelay(1);
-
-		bits = (~ctrl & NAND_NCE) << 14;
-		bits |= (ctrl & NAND_CLE) << 12;
-		bits |= (ctrl & NAND_ALE) >> 1;
-
-#warning Wild guess as gpiosetout() is nowhere defined in the kernel source - tglx
-		gpiosetout(0x5002, bits);
-
-#ifdef CONFIG_NAND_WORKAROUND
-		/* "some" dev boards busted, blue wired to rts2 :( */
-		rts2setout(2, (ctrl & NAND_CLE) << 1);
-#endif
-		/* allow time to ensure gpio state to over take memory write */
-		udelay(1);
-	}
-
-	if (cmd != NAND_CMD_NONE)
-		writeb(cmd, chip->IO_ADDR_W);
-}
-
-/*
- * Main initialization routine
- */
-static int __init toto_init(void)
-{
-	struct nand_chip *this;
-	int err = 0;
-
-	/* Allocate memory for MTD device structure and private data */
-	toto_mtd = kmalloc(sizeof(struct mtd_info) + sizeof(struct nand_chip), GFP_KERNEL);
-	if (!toto_mtd) {
-		printk(KERN_WARNING "Unable to allocate toto NAND MTD device structure.\n");
-		err = -ENOMEM;
-		goto out;
-	}
-
-	/* Get pointer to private data */
-	this = (struct nand_chip *)(&toto_mtd[1]);
-
-	/* Initialize structures */
-	memset(toto_mtd, 0, sizeof(struct mtd_info));
-	memset(this, 0, sizeof(struct nand_chip));
-
-	/* Link the private data with the MTD structure */
-	toto_mtd->priv = this;
-	toto_mtd->owner = THIS_MODULE;
-
-	/* Set address of NAND IO lines */
-	this->IO_ADDR_R = toto_io_base;
-	this->IO_ADDR_W = toto_io_base;
-	this->cmd_ctrl = toto_hwcontrol;
-	this->dev_ready = NULL;
-	/* 25 us command delay time */
-	this->chip_delay = 30;
-	this->ecc.mode = NAND_ECC_SOFT;
-
-	/* Scan to find existance of the device */
-	if (nand_scan(toto_mtd, 1)) {
-		err = -ENXIO;
-		goto out_mtd;
-	}
-
-	/* Register the partitions */
-	switch (toto_mtd->size) {
-	case SZ_64M:
-		add_mtd_partitions(toto_mtd, partition_info64M, NUM_PARTITIONS64M);
-		break;
-	case SZ_32M:
-		add_mtd_partitions(toto_mtd, partition_info32M, NUM_PARTITIONS32M);
-		break;
-	default:{
-			printk(KERN_WARNING "Unsupported Nand device\n");
-			err = -ENXIO;
-			goto out_buf;
-		}
-	}
-
-	gpioreserve(NAND_MASK);	/* claim our gpios */
-	archflashwp(0, 0);	/* open up flash for writing */
-
-	goto out;
-
- out_mtd:
-	kfree(toto_mtd);
- out:
-	return err;
-}
-
-module_init(toto_init);
-
-/*
- * Clean up routine
- */
-static void __exit toto_cleanup(void)
-{
-	/* Release resources, unregister device */
-	nand_release(toto_mtd);
-
-	/* Free the MTD device structure */
-	kfree(toto_mtd);
-
-	/* stop flash writes */
-	archflashwp(0, 1);
-
-	/* release gpios to system */
-	gpiorelease(NAND_MASK);
-}
-
-module_exit(toto_cleanup);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Richard Woodruff <r-woodruff2@ti.com>");
-MODULE_DESCRIPTION("Glue layer for NAND flash on toto board");
-- 
GitLab


From a0e7229edbfef9495e73bc8baea2131a7e69e365 Mon Sep 17 00:00:00 2001
From: "George G. Davis" <gdavis@mvista.com>
Date: Mon, 4 Aug 2008 19:43:25 -0400
Subject: [PATCH 008/895] [MTD] [NOR] Add "Spansion" to MTD_CFI_AMDSTD kconfig
 menu description

This long overdue trivial change to the MTD_CFI_AMDSTD kconfig menu
description is intended to help clarify that this option also supports
Spansion flash devices.

Signed-off-by: George G. Davis <gdavis@mvista.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/chips/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/chips/Kconfig b/drivers/mtd/chips/Kconfig
index 4c35e5d77f95..9401bfec4623 100644
--- a/drivers/mtd/chips/Kconfig
+++ b/drivers/mtd/chips/Kconfig
@@ -188,7 +188,7 @@ config MTD_CFI_INTELEXT
 	  StrataFlash and other parts.
 
 config MTD_CFI_AMDSTD
-	tristate "Support for AMD/Fujitsu flash chips"
+	tristate "Support for AMD/Fujitsu/Spansion flash chips"
 	depends on MTD_GEN_PROBE
 	select MTD_CFI_UTIL
 	help
-- 
GitLab


From 2e489e077a6ad118c4f247faedf330117b107cce Mon Sep 17 00:00:00 2001
From: Alexey Korolev <akorolev@infradead.org>
Date: Tue, 5 Aug 2008 16:39:42 +0100
Subject: [PATCH 009/895] [MTD] [NOR] Add qry_mode_on()/qry_omde_off() to deal
 with odd chips

There are some CFI chips which require non standard procedures to get
into QRY mode. The possible way to support them would be trying
different modes till QRY will be read. This patch introduce two new
functions qry_mode_on qry_mode_off. qry_mode_on tries different commands
in order switch chip into QRY mode.

So if we have one more "odd" chip - we just could add several lines to
qry_mode_on. Also using these functions remove unnecessary code
duplicaton in porbe procedure.

Currently there are two "odd" cases
1. Some old intel chips which require 0xFF before 0x98
2. ST M29DW chip which requires 0x98 to be sent at 0x555 (according to
CFI should be 0x55)

This patch is partialy based on the patch from Uwe
(see "[PATCH 2/4] [RFC][MTD] cfi_probe: remove Intel chip workaround"
thread )

Signed-off-by: Alexey Korolev <akorolev@infradead.org>
Signed-off-by: Alexander Belyakov <abelyako@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/chips/cfi_probe.c | 52 ++++-------------------------
 drivers/mtd/chips/cfi_util.c  | 62 ++++++++++++++++++++++++++++++++---
 include/linux/mtd/cfi.h       |  9 ++++-
 3 files changed, 73 insertions(+), 50 deletions(-)

diff --git a/drivers/mtd/chips/cfi_probe.c b/drivers/mtd/chips/cfi_probe.c
index c418e92e1d92..e706be2ad0cb 100644
--- a/drivers/mtd/chips/cfi_probe.c
+++ b/drivers/mtd/chips/cfi_probe.c
@@ -44,17 +44,14 @@ do { \
 
 #define xip_enable(base, map, cfi) \
 do { \
-	cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); \
-	cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL); \
+	qry_mode_off(base, map, cfi); \
 	xip_allowed(base, map); \
 } while (0)
 
 #define xip_disable_qry(base, map, cfi) \
 do { \
 	xip_disable(); \
-	cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); \
-	cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL); \
-	cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL); \
+	qry_mode_on(base, map, cfi); \
 } while (0)
 
 #else
@@ -70,32 +67,6 @@ do { \
    in: interleave,type,mode
    ret: table index, <0 for error
  */
-static int __xipram qry_present(struct map_info *map, __u32 base,
-				struct cfi_private *cfi)
-{
-	int osf = cfi->interleave * cfi->device_type;	// scale factor
-	map_word val[3];
-	map_word qry[3];
-
-	qry[0] = cfi_build_cmd('Q', map, cfi);
-	qry[1] = cfi_build_cmd('R', map, cfi);
-	qry[2] = cfi_build_cmd('Y', map, cfi);
-
-	val[0] = map_read(map, base + osf*0x10);
-	val[1] = map_read(map, base + osf*0x11);
-	val[2] = map_read(map, base + osf*0x12);
-
-	if (!map_word_equal(map, qry[0], val[0]))
-		return 0;
-
-	if (!map_word_equal(map, qry[1], val[1]))
-		return 0;
-
-	if (!map_word_equal(map, qry[2], val[2]))
-		return 0;
-
-	return 1; 	// "QRY" found
-}
 
 static int __xipram cfi_probe_chip(struct map_info *map, __u32 base,
 				   unsigned long *chip_map, struct cfi_private *cfi)
@@ -116,11 +87,7 @@ static int __xipram cfi_probe_chip(struct map_info *map, __u32 base,
 	}
 
 	xip_disable();
-	cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
-	cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL);
-	cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL);
-
-	if (!qry_present(map,base,cfi)) {
+	if (!qry_mode_on(base, map, cfi)) {
 		xip_enable(base, map, cfi);
 		return 0;
 	}
@@ -144,8 +111,7 @@ static int __xipram cfi_probe_chip(struct map_info *map, __u32 base,
 		if (qry_present(map, start, cfi)) {
 			/* Eep. This chip also had the QRY marker.
 			 * Is it an alias for the new one? */
-			cfi_send_gen_cmd(0xF0, 0, start, map, cfi, cfi->device_type, NULL);
-			cfi_send_gen_cmd(0xFF, 0, start, map, cfi, cfi->device_type, NULL);
+			qry_mode_off(start, map, cfi);
 
 			/* If the QRY marker goes away, it's an alias */
 			if (!qry_present(map, start, cfi)) {
@@ -158,8 +124,7 @@ static int __xipram cfi_probe_chip(struct map_info *map, __u32 base,
 			 * unfortunate. Stick the new chip in read mode
 			 * too and if it's the same, assume it's an alias. */
 			/* FIXME: Use other modes to do a proper check */
-			cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
-			cfi_send_gen_cmd(0xFF, 0, start, map, cfi, cfi->device_type, NULL);
+			qry_mode_off(base, map, cfi);
 
 			if (qry_present(map, base, cfi)) {
 				xip_allowed(base, map);
@@ -176,8 +141,7 @@ static int __xipram cfi_probe_chip(struct map_info *map, __u32 base,
 	cfi->numchips++;
 
 	/* Put it back into Read Mode */
-	cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
-	cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL);
+	qry_mode_off(base, map, cfi);
 	xip_allowed(base, map);
 
 	printk(KERN_INFO "%s: Found %d x%d devices at 0x%x in %d-bit bank\n",
@@ -237,9 +201,7 @@ static int __xipram cfi_chip_setup(struct map_info *map,
 			  cfi_read_query(map, base + 0xf * ofs_factor);
 
 	/* Put it back into Read Mode */
-	cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
-	/* ... even if it's an Intel chip */
-	cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL);
+	qry_mode_off(base, map, cfi);
 	xip_allowed(base, map);
 
 	/* Do any necessary byteswapping */
diff --git a/drivers/mtd/chips/cfi_util.c b/drivers/mtd/chips/cfi_util.c
index 0ee457018016..8d7553670526 100644
--- a/drivers/mtd/chips/cfi_util.c
+++ b/drivers/mtd/chips/cfi_util.c
@@ -24,6 +24,62 @@
 #include <linux/mtd/cfi.h>
 #include <linux/mtd/compatmac.h>
 
+int __xipram qry_present(struct map_info *map, __u32 base,
+				struct cfi_private *cfi)
+{
+	int osf = cfi->interleave * cfi->device_type;	/* scale factor */
+	map_word val[3];
+	map_word qry[3];
+
+	qry[0] = cfi_build_cmd('Q', map, cfi);
+	qry[1] = cfi_build_cmd('R', map, cfi);
+	qry[2] = cfi_build_cmd('Y', map, cfi);
+
+	val[0] = map_read(map, base + osf*0x10);
+	val[1] = map_read(map, base + osf*0x11);
+	val[2] = map_read(map, base + osf*0x12);
+
+	if (!map_word_equal(map, qry[0], val[0]))
+		return 0;
+
+	if (!map_word_equal(map, qry[1], val[1]))
+		return 0;
+
+	if (!map_word_equal(map, qry[2], val[2]))
+		return 0;
+
+	return 1; 	/* "QRY" found */
+}
+
+int __xipram qry_mode_on(uint32_t base, struct map_info *map,
+				struct cfi_private *cfi)
+{
+	cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
+	cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL);
+	if (qry_present(map, base, cfi))
+		return 1;
+	/* QRY not found probably we deal with some odd CFI chips */
+	/* Some revisions of some old Intel chips? */
+	cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
+	cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL);
+	cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL);
+	if (qry_present(map, base, cfi))
+		return 1;
+	/* ST M29DW chips */
+	cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
+	cfi_send_gen_cmd(0x98, 0x555, base, map, cfi, cfi->device_type, NULL);
+	if (qry_present(map, base, cfi))
+		return 1;
+	/* QRY not found */
+	return 0;
+}
+void __xipram qry_mode_off(uint32_t base, struct map_info *map,
+				struct cfi_private *cfi)
+{
+	cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
+	cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL);
+}
+
 struct cfi_extquery *
 __xipram cfi_read_pri(struct map_info *map, __u16 adr, __u16 size, const char* name)
 {
@@ -48,8 +104,7 @@ __xipram cfi_read_pri(struct map_info *map, __u16 adr, __u16 size, const char* n
 #endif
 
 	/* Switch it into Query Mode */
-	cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL);
-
+	qry_mode_on(base, map, cfi);
 	/* Read in the Extended Query Table */
 	for (i=0; i<size; i++) {
 		((unsigned char *)extp)[i] =
@@ -57,8 +112,7 @@ __xipram cfi_read_pri(struct map_info *map, __u16 adr, __u16 size, const char* n
 	}
 
 	/* Make sure it returns to read mode */
-	cfi_send_gen_cmd(0xf0, 0, base, map, cfi, cfi->device_type, NULL);
-	cfi_send_gen_cmd(0xff, 0, base, map, cfi, cfi->device_type, NULL);
+	qry_mode_off(base, map, cfi);
 
 #ifdef CONFIG_MTD_XIP
 	(void) map_read(map, base);
diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h
index d6fb115f5a07..3058917d7b92 100644
--- a/include/linux/mtd/cfi.h
+++ b/include/linux/mtd/cfi.h
@@ -12,6 +12,7 @@
 #include <linux/mtd/flashchip.h>
 #include <linux/mtd/map.h>
 #include <linux/mtd/cfi_endian.h>
+#include <linux/mtd/xip.h>
 
 #ifdef CONFIG_MTD_CFI_I1
 #define cfi_interleave(cfi) 1
@@ -430,7 +431,6 @@ static inline uint32_t cfi_send_gen_cmd(u_char cmd, uint32_t cmd_addr, uint32_t
 {
 	map_word val;
 	uint32_t addr = base + cfi_build_cmd_addr(cmd_addr, cfi_interleave(cfi), type);
-
 	val = cfi_build_cmd(cmd, map, cfi);
 
 	if (prev_val)
@@ -483,6 +483,13 @@ static inline void cfi_udelay(int us)
 	}
 }
 
+int __xipram qry_present(struct map_info *map, __u32 base,
+				struct cfi_private *cfi);
+int __xipram qry_mode_on(uint32_t base, struct map_info *map,
+				struct cfi_private *cfi);
+void __xipram qry_mode_off(uint32_t base, struct map_info *map,
+				struct cfi_private *cfi);
+
 struct cfi_extquery *cfi_read_pri(struct map_info *map, uint16_t adr, uint16_t size,
 			     const char* name);
 struct cfi_fixup {
-- 
GitLab


From e93cafe45fd74935e0aca2b79e533f0e3ed9640f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Anders=20Grafstr=C3=B6m?= <grfstrm@users.sourceforge.net>
Date: Tue, 5 Aug 2008 18:37:41 +0200
Subject: [PATCH 010/895] [MTD] [NOR] cfi_cmdset_0001: Timeouts for erase,
 write and unlock operations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Timeouts are currently given by the typical operation time times 8.
It works in the general well-behaved case but not when an erase block is
failing. For erase operations, it seems that a failing erase block will
keep the device state machine in erasing state until the vendor
specified maximum timeout period has passed. By this time the driver
would have long since timed out, left erasing state and attempted
further operations which all fail. This patch implements timeouts using
values from the CFI Query structure when available.
The patch also sets a longer timeout for locking operations. The current
value used for locking/unlocking given by 1000000/HZ microseconds is too
short for devices like J3 and J5 Strataflash which have a typical clear
lock-bits time of 0.5 seconds.

Signed-off-by: Anders GrafstrÃ¶m <grfstrm@users.sourceforge.net>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/chips/cfi_cmdset_0001.c | 52 +++++++++++++++++++++--------
 include/linux/mtd/flashchip.h       |  4 +++
 2 files changed, 42 insertions(+), 14 deletions(-)

diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c
index d49cbe2738a8..5157e3cb4b9e 100644
--- a/drivers/mtd/chips/cfi_cmdset_0001.c
+++ b/drivers/mtd/chips/cfi_cmdset_0001.c
@@ -478,6 +478,28 @@ struct mtd_info *cfi_cmdset_0001(struct map_info *map, int primary)
 		else
 			cfi->chips[i].erase_time = 2000000;
 
+		if (cfi->cfiq->WordWriteTimeoutTyp &&
+		    cfi->cfiq->WordWriteTimeoutMax)
+			cfi->chips[i].word_write_time_max =
+				1<<(cfi->cfiq->WordWriteTimeoutTyp +
+				    cfi->cfiq->WordWriteTimeoutMax);
+		else
+			cfi->chips[i].word_write_time_max = 50000 * 8;
+
+		if (cfi->cfiq->BufWriteTimeoutTyp &&
+		    cfi->cfiq->BufWriteTimeoutMax)
+			cfi->chips[i].buffer_write_time_max =
+				1<<(cfi->cfiq->BufWriteTimeoutTyp +
+				    cfi->cfiq->BufWriteTimeoutMax);
+
+		if (cfi->cfiq->BlockEraseTimeoutTyp &&
+		    cfi->cfiq->BlockEraseTimeoutMax)
+			cfi->chips[i].erase_time_max =
+				1000<<(cfi->cfiq->BlockEraseTimeoutTyp +
+				       cfi->cfiq->BlockEraseTimeoutMax);
+		else
+			cfi->chips[i].erase_time_max = 2000000 * 8;
+
 		cfi->chips[i].ref_point_counter = 0;
 		init_waitqueue_head(&(cfi->chips[i].wq));
 	}
@@ -1012,7 +1034,7 @@ static void __xipram xip_enable(struct map_info *map, struct flchip *chip,
 
 static int __xipram xip_wait_for_operation(
 		struct map_info *map, struct flchip *chip,
-		unsigned long adr, unsigned int chip_op_time )
+		unsigned long adr, unsigned int chip_op_time_max)
 {
 	struct cfi_private *cfi = map->fldrv_priv;
 	struct cfi_pri_intelext *cfip = cfi->cmdset_priv;
@@ -1021,7 +1043,7 @@ static int __xipram xip_wait_for_operation(
 	flstate_t oldstate, newstate;
 
        	start = xip_currtime();
-	usec = chip_op_time * 8;
+	usec = chip_op_time_max;
 	if (usec == 0)
 		usec = 500000;
 	done = 0;
@@ -1131,8 +1153,8 @@ static int __xipram xip_wait_for_operation(
 #define XIP_INVAL_CACHED_RANGE(map, from, size)  \
 	INVALIDATE_CACHED_RANGE(map, from, size)
 
-#define INVAL_CACHE_AND_WAIT(map, chip, cmd_adr, inval_adr, inval_len, usec) \
-	xip_wait_for_operation(map, chip, cmd_adr, usec)
+#define INVAL_CACHE_AND_WAIT(map, chip, cmd_adr, inval_adr, inval_len, usec, usec_max) \
+	xip_wait_for_operation(map, chip, cmd_adr, usec_max)
 
 #else
 
@@ -1144,7 +1166,7 @@ static int __xipram xip_wait_for_operation(
 static int inval_cache_and_wait_for_operation(
 		struct map_info *map, struct flchip *chip,
 		unsigned long cmd_adr, unsigned long inval_adr, int inval_len,
-		unsigned int chip_op_time)
+		unsigned int chip_op_time, unsigned int chip_op_time_max)
 {
 	struct cfi_private *cfi = map->fldrv_priv;
 	map_word status, status_OK = CMD(0x80);
@@ -1156,8 +1178,7 @@ static int inval_cache_and_wait_for_operation(
 		INVALIDATE_CACHED_RANGE(map, inval_adr, inval_len);
 	spin_lock(chip->mutex);
 
-	/* set our timeout to 8 times the expected delay */
-	timeo = chip_op_time * 8;
+	timeo = chip_op_time_max;
 	if (!timeo)
 		timeo = 500000;
 	reset_timeo = timeo;
@@ -1217,8 +1238,8 @@ static int inval_cache_and_wait_for_operation(
 
 #endif
 
-#define WAIT_TIMEOUT(map, chip, adr, udelay) \
-	INVAL_CACHE_AND_WAIT(map, chip, adr, 0, 0, udelay);
+#define WAIT_TIMEOUT(map, chip, adr, udelay, udelay_max) \
+	INVAL_CACHE_AND_WAIT(map, chip, adr, 0, 0, udelay, udelay_max);
 
 
 static int do_point_onechip (struct map_info *map, struct flchip *chip, loff_t adr, size_t len)
@@ -1452,7 +1473,8 @@ static int __xipram do_write_oneword(struct map_info *map, struct flchip *chip,
 
 	ret = INVAL_CACHE_AND_WAIT(map, chip, adr,
 				   adr, map_bankwidth(map),
-				   chip->word_write_time);
+				   chip->word_write_time,
+				   chip->word_write_time_max);
 	if (ret) {
 		xip_enable(map, chip, adr);
 		printk(KERN_ERR "%s: word write error (status timeout)\n", map->name);
@@ -1623,7 +1645,7 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
 
 	chip->state = FL_WRITING_TO_BUFFER;
 	map_write(map, write_cmd, cmd_adr);
-	ret = WAIT_TIMEOUT(map, chip, cmd_adr, 0);
+	ret = WAIT_TIMEOUT(map, chip, cmd_adr, 0, 0);
 	if (ret) {
 		/* Argh. Not ready for write to buffer */
 		map_word Xstatus = map_read(map, cmd_adr);
@@ -1692,7 +1714,8 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
 
 	ret = INVAL_CACHE_AND_WAIT(map, chip, cmd_adr,
 				   initial_adr, initial_len,
-				   chip->buffer_write_time);
+				   chip->buffer_write_time,
+				   chip->buffer_write_time_max);
 	if (ret) {
 		map_write(map, CMD(0x70), cmd_adr);
 		chip->state = FL_STATUS;
@@ -1827,7 +1850,8 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
 
 	ret = INVAL_CACHE_AND_WAIT(map, chip, adr,
 				   adr, len,
-				   chip->erase_time);
+				   chip->erase_time,
+				   chip->erase_time_max);
 	if (ret) {
 		map_write(map, CMD(0x70), adr);
 		chip->state = FL_STATUS;
@@ -2006,7 +2030,7 @@ static int __xipram do_xxlock_oneblock(struct map_info *map, struct flchip *chip
 	 */
 	udelay = (!extp || !(extp->FeatureSupport & (1 << 5))) ? 1000000/HZ : 0;
 
-	ret = WAIT_TIMEOUT(map, chip, adr, udelay);
+	ret = WAIT_TIMEOUT(map, chip, adr, udelay, udelay * 100);
 	if (ret) {
 		map_write(map, CMD(0x70), adr);
 		chip->state = FL_STATUS;
diff --git a/include/linux/mtd/flashchip.h b/include/linux/mtd/flashchip.h
index 08dd131301c1..d4f38c5fd44e 100644
--- a/include/linux/mtd/flashchip.h
+++ b/include/linux/mtd/flashchip.h
@@ -73,6 +73,10 @@ struct flchip {
 	int buffer_write_time;
 	int erase_time;
 
+	int word_write_time_max;
+	int buffer_write_time_max;
+	int erase_time_max;
+
 	void *priv;
 };
 
-- 
GitLab


From cf93ae02600e2c752bf2570085e7970a1c0f2b94 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Wed, 6 Aug 2008 13:12:04 -0700
Subject: [PATCH 011/895] [MTD] Compile fix for dataflash OTP support

> > linux-next-20080805/drivers/mtd/devices/mtd_dataflash.c: In function 'add_dataflash_otp':
> > linux-next-20080805/drivers/mtd/devices/mtd_dataflash.c:670: error: too many arguments to function 'otp_setup'

Whoops, sorry ... I see what was going on.  My bad.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/devices/mtd_dataflash.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c
index 17c9b20dca87..90161277902b 100644
--- a/drivers/mtd/devices/mtd_dataflash.c
+++ b/drivers/mtd/devices/mtd_dataflash.c
@@ -618,7 +618,7 @@ static char *otp_setup(struct mtd_info *device, char revision)
 
 #else
 
-static char *otp_setup(struct mtd_info *device)
+static char *otp_setup(struct mtd_info *device, char revision)
 {
 	return " (OTP)";
 }
-- 
GitLab


From 8c64038e4c077b2b37c6b27d0c40c77a3ddfaeef Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Wed, 6 Aug 2008 21:55:14 -0700
Subject: [PATCH 012/895] [MTD] make dataflash write-verify be optional

This adds a WRITE_VERIFY Kconfig option to the DataFlash driver,
closely mirroring the similar NAND and ONENAND options, giving
an option to disable some code that's currently always enabled.

Removing this step probably saves a millisecond or so per page
when writing data, which will add up quickly since these pages
are small (the largest is 1 KiB).  It doesn't seem to add a
lot in terms of reliability, and wouldn't detect errors which
crop up when transferring data to the on-chip SRAM buffer.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Acked-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
Acked-by: Andrew Victor <linux@maxim.org.za>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/devices/Kconfig         | 10 ++++++++++
 drivers/mtd/devices/mtd_dataflash.c |  8 +++-----
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig
index 88f4df047464..6fde0a2e3567 100644
--- a/drivers/mtd/devices/Kconfig
+++ b/drivers/mtd/devices/Kconfig
@@ -59,6 +59,16 @@ config MTD_DATAFLASH
 	  Sometimes DataFlash chips are packaged inside MMC-format
 	  cards; at this writing, the MMC stack won't handle those.
 
+config MTD_DATAFLASH_WRITE_VERIFY
+	bool "Verify DataFlash page writes"
+	depends on MTD_DATAFLASH
+	help
+	  This adds an extra check when data is written to the flash.
+	  It may help if you are verifying chip setup (timings etc) on
+	  your board.  There is a rare possibility that even though the
+	  device thinks the write was successful, a bit could have been
+	  flipped accidentally due to device wear or something else.
+
 config MTD_DATAFLASH_OTP
 	bool "DataFlash OTP support (Security Register)"
 	depends on MTD_DATAFLASH
diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c
index 90161277902b..6dd9aff8bb2d 100644
--- a/drivers/mtd/devices/mtd_dataflash.c
+++ b/drivers/mtd/devices/mtd_dataflash.c
@@ -30,12 +30,10 @@
  * doesn't (yet) use these for any kind of i/o overlap or prefetching.
  *
  * Sometimes DataFlash is packaged in MMC-format cards, although the
- * MMC stack can't use SPI (yet), or distinguish between MMC and DataFlash
+ * MMC stack can't (yet?) distinguish between MMC and DataFlash
  * protocols during enumeration.
  */
 
-#define CONFIG_DATAFLASH_WRITE_VERIFY
-
 /* reads can bypass the buffers */
 #define OP_READ_CONTINUOUS	0xE8
 #define OP_READ_PAGE		0xD2
@@ -403,7 +401,7 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
 		(void) dataflash_waitready(priv->spi);
 
 
-#ifdef	CONFIG_DATAFLASH_WRITE_VERIFY
+#ifdef CONFIG_MTD_DATAFLASH_VERIFY_WRITE
 
 		/* (3) Compare to Buffer1 */
 		addr = pageaddr << priv->page_offset;
@@ -432,7 +430,7 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
 		} else
 			status = 0;
 
-#endif	/* CONFIG_DATAFLASH_WRITE_VERIFY */
+#endif	/* CONFIG_MTD_DATAFLASH_VERIFY_WRITE */
 
 		remaining = remaining - writelen;
 		pageaddr++;
-- 
GitLab


From c314dfdc358847eef0fc07ec8682e1acc8cadd00 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Thu, 7 Aug 2008 11:55:07 +0100
Subject: [PATCH 013/895] [MTD] [NOR] Rename and export new cfi_qry_*()
 functions

They need to be exported, so let's give them less generic-sounding names
while we're at it.

Original export patch, along with the suggestion about the nomenclature,
from Stephen Rothwell.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/chips/cfi_probe.c | 20 ++++++++++----------
 drivers/mtd/chips/cfi_util.c  | 26 +++++++++++++++-----------
 include/linux/mtd/cfi.h       | 12 ++++++------
 3 files changed, 31 insertions(+), 27 deletions(-)

diff --git a/drivers/mtd/chips/cfi_probe.c b/drivers/mtd/chips/cfi_probe.c
index e706be2ad0cb..e63e6749429a 100644
--- a/drivers/mtd/chips/cfi_probe.c
+++ b/drivers/mtd/chips/cfi_probe.c
@@ -44,14 +44,14 @@ do { \
 
 #define xip_enable(base, map, cfi) \
 do { \
-	qry_mode_off(base, map, cfi); \
+	cfi_qry_mode_off(base, map, cfi);		\
 	xip_allowed(base, map); \
 } while (0)
 
 #define xip_disable_qry(base, map, cfi) \
 do { \
 	xip_disable(); \
-	qry_mode_on(base, map, cfi); \
+	cfi_qry_mode_on(base, map, cfi); \
 } while (0)
 
 #else
@@ -87,7 +87,7 @@ static int __xipram cfi_probe_chip(struct map_info *map, __u32 base,
 	}
 
 	xip_disable();
-	if (!qry_mode_on(base, map, cfi)) {
+	if (!cfi_qry_mode_on(base, map, cfi)) {
 		xip_enable(base, map, cfi);
 		return 0;
 	}
@@ -108,13 +108,13 @@ static int __xipram cfi_probe_chip(struct map_info *map, __u32 base,
  		start = i << cfi->chipshift;
 		/* This chip should be in read mode if it's one
 		   we've already touched. */
-		if (qry_present(map, start, cfi)) {
+		if (cfi_qry_present(map, start, cfi)) {
 			/* Eep. This chip also had the QRY marker.
 			 * Is it an alias for the new one? */
-			qry_mode_off(start, map, cfi);
+			cfi_qry_mode_off(start, map, cfi);
 
 			/* If the QRY marker goes away, it's an alias */
-			if (!qry_present(map, start, cfi)) {
+			if (!cfi_qry_present(map, start, cfi)) {
 				xip_allowed(base, map);
 				printk(KERN_DEBUG "%s: Found an alias at 0x%x for the chip at 0x%lx\n",
 				       map->name, base, start);
@@ -124,9 +124,9 @@ static int __xipram cfi_probe_chip(struct map_info *map, __u32 base,
 			 * unfortunate. Stick the new chip in read mode
 			 * too and if it's the same, assume it's an alias. */
 			/* FIXME: Use other modes to do a proper check */
-			qry_mode_off(base, map, cfi);
+			cfi_qry_mode_off(base, map, cfi);
 
-			if (qry_present(map, base, cfi)) {
+			if (cfi_qry_present(map, base, cfi)) {
 				xip_allowed(base, map);
 				printk(KERN_DEBUG "%s: Found an alias at 0x%x for the chip at 0x%lx\n",
 				       map->name, base, start);
@@ -141,7 +141,7 @@ static int __xipram cfi_probe_chip(struct map_info *map, __u32 base,
 	cfi->numchips++;
 
 	/* Put it back into Read Mode */
-	qry_mode_off(base, map, cfi);
+	cfi_qry_mode_off(base, map, cfi);
 	xip_allowed(base, map);
 
 	printk(KERN_INFO "%s: Found %d x%d devices at 0x%x in %d-bit bank\n",
@@ -201,7 +201,7 @@ static int __xipram cfi_chip_setup(struct map_info *map,
 			  cfi_read_query(map, base + 0xf * ofs_factor);
 
 	/* Put it back into Read Mode */
-	qry_mode_off(base, map, cfi);
+	cfi_qry_mode_off(base, map, cfi);
 	xip_allowed(base, map);
 
 	/* Do any necessary byteswapping */
diff --git a/drivers/mtd/chips/cfi_util.c b/drivers/mtd/chips/cfi_util.c
index 8d7553670526..34d40e25d312 100644
--- a/drivers/mtd/chips/cfi_util.c
+++ b/drivers/mtd/chips/cfi_util.c
@@ -24,8 +24,8 @@
 #include <linux/mtd/cfi.h>
 #include <linux/mtd/compatmac.h>
 
-int __xipram qry_present(struct map_info *map, __u32 base,
-				struct cfi_private *cfi)
+int __xipram cfi_qry_present(struct map_info *map, __u32 base,
+			     struct cfi_private *cfi)
 {
 	int osf = cfi->interleave * cfi->device_type;	/* scale factor */
 	map_word val[3];
@@ -50,35 +50,39 @@ int __xipram qry_present(struct map_info *map, __u32 base,
 
 	return 1; 	/* "QRY" found */
 }
+EXPORT_SYMBOL_GPL(cfi_qry_present);
 
-int __xipram qry_mode_on(uint32_t base, struct map_info *map,
-				struct cfi_private *cfi)
+int __xipram cfi_qry_mode_on(uint32_t base, struct map_info *map,
+			     struct cfi_private *cfi)
 {
 	cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
 	cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL);
-	if (qry_present(map, base, cfi))
+	if (cfi_qry_present(map, base, cfi))
 		return 1;
 	/* QRY not found probably we deal with some odd CFI chips */
 	/* Some revisions of some old Intel chips? */
 	cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
 	cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL);
 	cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL);
-	if (qry_present(map, base, cfi))
+	if (cfi_qry_present(map, base, cfi))
 		return 1;
 	/* ST M29DW chips */
 	cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
 	cfi_send_gen_cmd(0x98, 0x555, base, map, cfi, cfi->device_type, NULL);
-	if (qry_present(map, base, cfi))
+	if (cfi_qry_present(map, base, cfi))
 		return 1;
 	/* QRY not found */
 	return 0;
 }
-void __xipram qry_mode_off(uint32_t base, struct map_info *map,
-				struct cfi_private *cfi)
+EXPORT_SYMBOL_GPL(cfi_qry_mode_on);
+
+void __xipram cfi_qry_mode_off(uint32_t base, struct map_info *map,
+			       struct cfi_private *cfi)
 {
 	cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
 	cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL);
 }
+EXPORT_SYMBOL_GPL(cfi_qry_mode_off);
 
 struct cfi_extquery *
 __xipram cfi_read_pri(struct map_info *map, __u16 adr, __u16 size, const char* name)
@@ -104,7 +108,7 @@ __xipram cfi_read_pri(struct map_info *map, __u16 adr, __u16 size, const char* n
 #endif
 
 	/* Switch it into Query Mode */
-	qry_mode_on(base, map, cfi);
+	cfi_qry_mode_on(base, map, cfi);
 	/* Read in the Extended Query Table */
 	for (i=0; i<size; i++) {
 		((unsigned char *)extp)[i] =
@@ -112,7 +116,7 @@ __xipram cfi_read_pri(struct map_info *map, __u16 adr, __u16 size, const char* n
 	}
 
 	/* Make sure it returns to read mode */
-	qry_mode_off(base, map, cfi);
+	cfi_qry_mode_off(base, map, cfi);
 
 #ifdef CONFIG_MTD_XIP
 	(void) map_read(map, base);
diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h
index 3058917d7b92..ee5124ec319e 100644
--- a/include/linux/mtd/cfi.h
+++ b/include/linux/mtd/cfi.h
@@ -483,12 +483,12 @@ static inline void cfi_udelay(int us)
 	}
 }
 
-int __xipram qry_present(struct map_info *map, __u32 base,
-				struct cfi_private *cfi);
-int __xipram qry_mode_on(uint32_t base, struct map_info *map,
-				struct cfi_private *cfi);
-void __xipram qry_mode_off(uint32_t base, struct map_info *map,
-				struct cfi_private *cfi);
+int __xipram cfi_qry_present(struct map_info *map, __u32 base,
+			     struct cfi_private *cfi);
+int __xipram cfi_qry_mode_on(uint32_t base, struct map_info *map,
+			     struct cfi_private *cfi);
+void __xipram cfi_qry_mode_off(uint32_t base, struct map_info *map,
+			       struct cfi_private *cfi);
 
 struct cfi_extquery *cfi_read_pri(struct map_info *map, uint16_t adr, uint16_t size,
 			     const char* name);
-- 
GitLab


From d483492cb5401283b3c6e46b829fec0b42297e68 Mon Sep 17 00:00:00 2001
From: Huang Weiyi <weiyi.huang@gmail.com>
Date: Sun, 10 Aug 2008 18:46:50 +0800
Subject: [PATCH 014/895] [MTD] [NAND] drivers/mtd/nand/nandsim.c: remove
 duplicated #include

Removed duplicated include <asm/div64.h> in
drivers/mtd/nand/nandsim.c.

Signed-off-by: Huang Weiyi <weiyi.huang@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/nandsim.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c
index 556e8131ecdc..ae7c57781a68 100644
--- a/drivers/mtd/nand/nandsim.c
+++ b/drivers/mtd/nand/nandsim.c
@@ -38,7 +38,6 @@
 #include <linux/delay.h>
 #include <linux/list.h>
 #include <linux/random.h>
-#include <asm/div64.h>
 
 /* Default simulator parameters values */
 #if !defined(CONFIG_NANDSIM_FIRST_ID_BYTE)  || \
-- 
GitLab


From faff37508a104e9ec5285d5adecaab7e8dde472a Mon Sep 17 00:00:00 2001
From: Chen Gong <g.chen@freescale.com>
Date: Mon, 11 Aug 2008 16:59:13 +0800
Subject: [PATCH 015/895] [MTD] m25p80.c erase enhance

This patch adds an erase_block command to enhance erase operation

Signed-off-by: Chen Gong <g.chen@freescale.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/devices/m25p80.c | 48 ++++++++++++++++++++++++++++++------
 1 file changed, 40 insertions(+), 8 deletions(-)

diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index b35c3333e210..8fbd1b57f606 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -39,6 +39,7 @@
 #define	OPCODE_PP		0x02	/* Page program (up to 256 bytes) */
 #define	OPCODE_BE_4K 		0x20	/* Erase 4KiB block */
 #define	OPCODE_BE_32K		0x52	/* Erase 32KiB block */
+#define	OPCODE_BE		0xc7	/* Erase whole flash block */
 #define	OPCODE_SE		0xd8	/* Sector erase (usually 64KiB) */
 #define	OPCODE_RDID		0x9f	/* Read JEDEC ID */
 
@@ -161,6 +162,31 @@ static int wait_till_ready(struct m25p *flash)
 	return 1;
 }
 
+/*
+ * Erase the whole flash memory
+ *
+ * Returns 0 if successful, non-zero otherwise.
+ */
+static int erase_block(struct m25p *flash)
+{
+	DEBUG(MTD_DEBUG_LEVEL3, "%s: %s %dKiB\n",
+			flash->spi->dev.bus_id, __func__,
+			flash->mtd.size / 1024);
+
+	/* Wait until finished previous write command. */
+	if (wait_till_ready(flash))
+		return 1;
+
+	/* Send write enable, then erase commands. */
+	write_enable(flash);
+
+	/* Set up command buffer. */
+	flash->command[0] = OPCODE_BE;
+
+	spi_write(flash->spi, flash->command, 1);
+
+	return 0;
+}
 
 /*
  * Erase one sector of flash memory at offset ``offset'' which is any
@@ -229,15 +255,21 @@ static int m25p80_erase(struct mtd_info *mtd, struct erase_info *instr)
 	 */
 
 	/* now erase those sectors */
-	while (len) {
-		if (erase_sector(flash, addr)) {
-			instr->state = MTD_ERASE_FAILED;
-			mutex_unlock(&flash->lock);
-			return -EIO;
-		}
+	if (len == flash->mtd.size && erase_block(flash)) {
+		instr->state = MTD_ERASE_FAILED;
+		mutex_unlock(&flash->lock);
+		return -EIO;
+	} else {
+		while (len) {
+			if (erase_sector(flash, addr)) {
+				instr->state = MTD_ERASE_FAILED;
+				mutex_unlock(&flash->lock);
+				return -EIO;
+			}
 
-		addr += mtd->erasesize;
-		len -= mtd->erasesize;
+			addr += mtd->erasesize;
+			len -= mtd->erasesize;
+		}
 	}
 
 	mutex_unlock(&flash->lock);
-- 
GitLab


From 75d0ee2202b5740e94e913d8a52f91c6557c4c81 Mon Sep 17 00:00:00 2001
From: Chen Gong <g.chen@freescale.com>
Date: Mon, 11 Aug 2008 16:59:14 +0800
Subject: [PATCH 016/895] [MTD] m25p80.c code cleanup

code cleanup for m25p80.c

Signed-off-by: Chen Gong <g.chen@freescale.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/devices/m25p80.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index 8fbd1b57f606..b2b58c1bb321 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -134,7 +134,7 @@ static inline int write_enable(struct m25p *flash)
 {
 	u8	code = OPCODE_WREN;
 
-	return spi_write_then_read(flash->spi, &code, 1, NULL, 0);
+	return spi_write(flash->spi, &code, 1);
 }
 
 
-- 
GitLab


From d0e8c47c58575b9131e786edb488fd029eba443e Mon Sep 17 00:00:00 2001
From: Chen Gong <g.chen@freescale.com>
Date: Mon, 11 Aug 2008 16:59:15 +0800
Subject: [PATCH 017/895] [MTD] m25p80.c extended jedec support

- add extended device information support
- add s25sl128 device support

Signed-off-by: Chen Gong <g.chen@freescale.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/devices/m25p80.c | 86 ++++++++++++++++++++----------------
 1 file changed, 47 insertions(+), 39 deletions(-)

diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index b2b58c1bb321..4d3ae085b1d2 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -469,6 +469,7 @@ struct flash_info {
 	 * then a two byte device id.
 	 */
 	u32		jedec_id;
+	u16             ext_id;
 
 	/* The size listed here is what works with OPCODE_SE, which isn't
 	 * necessarily called a "sector" by the vendor.
@@ -488,57 +489,59 @@ struct flash_info {
 static struct flash_info __devinitdata m25p_data [] = {
 
 	/* Atmel -- some are (confusingly) marketed as "DataFlash" */
-	{ "at25fs010",  0x1f6601, 32 * 1024, 4, SECT_4K, },
-	{ "at25fs040",  0x1f6604, 64 * 1024, 8, SECT_4K, },
+	{ "at25fs010",  0x1f6601, 0, 32 * 1024, 4, SECT_4K, },
+	{ "at25fs040",  0x1f6604, 0, 64 * 1024, 8, SECT_4K, },
 
-	{ "at25df041a", 0x1f4401, 64 * 1024, 8, SECT_4K, },
-	{ "at25df641",  0x1f4800, 64 * 1024, 128, SECT_4K, },
+	{ "at25df041a", 0x1f4401, 0, 64 * 1024, 8, SECT_4K, },
+	{ "at25df641",  0x1f4800, 0, 64 * 1024, 128, SECT_4K, },
 
-	{ "at26f004",   0x1f0400, 64 * 1024, 8, SECT_4K, },
-	{ "at26df081a", 0x1f4501, 64 * 1024, 16, SECT_4K, },
-	{ "at26df161a", 0x1f4601, 64 * 1024, 32, SECT_4K, },
-	{ "at26df321",  0x1f4701, 64 * 1024, 64, SECT_4K, },
+	{ "at26f004",   0x1f0400, 0, 64 * 1024, 8, SECT_4K, },
+	{ "at26df081a", 0x1f4501, 0, 64 * 1024, 16, SECT_4K, },
+	{ "at26df161a", 0x1f4601, 0, 64 * 1024, 32, SECT_4K, },
+	{ "at26df321",  0x1f4701, 0, 64 * 1024, 64, SECT_4K, },
 
 	/* Spansion -- single (large) sector size only, at least
 	 * for the chips listed here (without boot sectors).
 	 */
-	{ "s25sl004a", 0x010212, 64 * 1024, 8, },
-	{ "s25sl008a", 0x010213, 64 * 1024, 16, },
-	{ "s25sl016a", 0x010214, 64 * 1024, 32, },
-	{ "s25sl032a", 0x010215, 64 * 1024, 64, },
-	{ "s25sl064a", 0x010216, 64 * 1024, 128, },
+	{ "s25sl004a", 0x010212, 0, 64 * 1024, 8, },
+	{ "s25sl008a", 0x010213, 0, 64 * 1024, 16, },
+	{ "s25sl016a", 0x010214, 0, 64 * 1024, 32, },
+	{ "s25sl032a", 0x010215, 0, 64 * 1024, 64, },
+	{ "s25sl064a", 0x010216, 0, 64 * 1024, 128, },
+        { "s25sl12800", 0x012018, 0x0300, 256 * 1024, 64, },
+	{ "s25sl12801", 0x012018, 0x0301, 64 * 1024, 256, },
 
 	/* SST -- large erase sizes are "overlays", "sectors" are 4K */
-	{ "sst25vf040b", 0xbf258d, 64 * 1024, 8, SECT_4K, },
-	{ "sst25vf080b", 0xbf258e, 64 * 1024, 16, SECT_4K, },
-	{ "sst25vf016b", 0xbf2541, 64 * 1024, 32, SECT_4K, },
-	{ "sst25vf032b", 0xbf254a, 64 * 1024, 64, SECT_4K, },
+	{ "sst25vf040b", 0xbf258d, 0, 64 * 1024, 8, SECT_4K, },
+	{ "sst25vf080b", 0xbf258e, 0, 64 * 1024, 16, SECT_4K, },
+	{ "sst25vf016b", 0xbf2541, 0, 64 * 1024, 32, SECT_4K, },
+	{ "sst25vf032b", 0xbf254a, 0, 64 * 1024, 64, SECT_4K, },
 
 	/* ST Microelectronics -- newer production may have feature updates */
-	{ "m25p05",  0x202010,  32 * 1024, 2, },
-	{ "m25p10",  0x202011,  32 * 1024, 4, },
-	{ "m25p20",  0x202012,  64 * 1024, 4, },
-	{ "m25p40",  0x202013,  64 * 1024, 8, },
-	{ "m25p80",         0,  64 * 1024, 16, },
-	{ "m25p16",  0x202015,  64 * 1024, 32, },
-	{ "m25p32",  0x202016,  64 * 1024, 64, },
-	{ "m25p64",  0x202017,  64 * 1024, 128, },
-	{ "m25p128", 0x202018, 256 * 1024, 64, },
-
-	{ "m45pe80", 0x204014,  64 * 1024, 16, },
-	{ "m45pe16", 0x204015,  64 * 1024, 32, },
-
-	{ "m25pe80", 0x208014,  64 * 1024, 16, },
-	{ "m25pe16", 0x208015,  64 * 1024, 32, SECT_4K, },
+	{ "m25p05",  0x202010,  0, 32 * 1024, 2, },
+	{ "m25p10",  0x202011,  0, 32 * 1024, 4, },
+	{ "m25p20",  0x202012,  0, 64 * 1024, 4, },
+	{ "m25p40",  0x202013,  0, 64 * 1024, 8, },
+	{ "m25p80",         0,  0, 64 * 1024, 16, },
+	{ "m25p16",  0x202015,  0, 64 * 1024, 32, },
+	{ "m25p32",  0x202016,  0, 64 * 1024, 64, },
+	{ "m25p64",  0x202017,  0, 64 * 1024, 128, },
+	{ "m25p128", 0x202018, 0, 256 * 1024, 64, },
+
+	{ "m45pe80", 0x204014,  0, 64 * 1024, 16, },
+	{ "m45pe16", 0x204015,  0, 64 * 1024, 32, },
+
+	{ "m25pe80", 0x208014,  0, 64 * 1024, 16, },
+	{ "m25pe16", 0x208015,  0, 64 * 1024, 32, SECT_4K, },
 
 	/* Winbond -- w25x "blocks" are 64K, "sectors" are 4KiB */
-	{ "w25x10", 0xef3011, 64 * 1024, 2, SECT_4K, },
-	{ "w25x20", 0xef3012, 64 * 1024, 4, SECT_4K, },
-	{ "w25x40", 0xef3013, 64 * 1024, 8, SECT_4K, },
-	{ "w25x80", 0xef3014, 64 * 1024, 16, SECT_4K, },
-	{ "w25x16", 0xef3015, 64 * 1024, 32, SECT_4K, },
-	{ "w25x32", 0xef3016, 64 * 1024, 64, SECT_4K, },
-	{ "w25x64", 0xef3017, 64 * 1024, 128, SECT_4K, },
+	{ "w25x10", 0xef3011, 0, 64 * 1024, 2, SECT_4K, },
+	{ "w25x20", 0xef3012, 0, 64 * 1024, 4, SECT_4K, },
+	{ "w25x40", 0xef3013, 0, 64 * 1024, 8, SECT_4K, },
+	{ "w25x80", 0xef3014, 0, 64 * 1024, 16, SECT_4K, },
+	{ "w25x16", 0xef3015, 0, 64 * 1024, 32, SECT_4K, },
+	{ "w25x32", 0xef3016, 0, 64 * 1024, 64, SECT_4K, },
+	{ "w25x64", 0xef3017, 0, 64 * 1024, 128, SECT_4K, },
 };
 
 static struct flash_info *__devinit jedec_probe(struct spi_device *spi)
@@ -547,6 +550,7 @@ static struct flash_info *__devinit jedec_probe(struct spi_device *spi)
 	u8			code = OPCODE_RDID;
 	u8			id[3];
 	u32			jedec;
+	u16                     ext_jedec;
 	struct flash_info	*info;
 
 	/* JEDEC also defines an optional "extended device information"
@@ -565,10 +569,14 @@ static struct flash_info *__devinit jedec_probe(struct spi_device *spi)
 	jedec = jedec << 8;
 	jedec |= id[2];
 
+	ext_jedec = id[3] << 8 | id[4];
+
 	for (tmp = 0, info = m25p_data;
 			tmp < ARRAY_SIZE(m25p_data);
 			tmp++, info++) {
 		if (info->jedec_id == jedec)
+			if (ext_jedec != 0 && info->ext_id != ext_jedec)
+				continue;
 			return info;
 	}
 	dev_err(&spi->dev, "unrecognized JEDEC id %06x\n", jedec);
-- 
GitLab


From bb0eb217c980d50c45f3e793b4dcc70ab9ee820d Mon Sep 17 00:00:00 2001
From: Adrian Hunter <ext-adrian.hunter@nokia.com>
Date: Tue, 12 Aug 2008 12:40:50 +0300
Subject: [PATCH 018/895] [MTD] Define and use MTD_FAIL_ADDR_UNKNOWN instead of
 0xffffffff

Signed-off-by: Adrian Hunter <ext-adrian.hunter@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdconcat.c            | 4 ++--
 drivers/mtd/mtdpart.c              | 4 ++--
 drivers/mtd/nand/nand_base.c       | 2 +-
 drivers/mtd/onenand/onenand_base.c | 2 +-
 fs/jffs2/erase.c                   | 4 ++--
 include/linux/mtd/mtd.h            | 4 +++-
 6 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 2972a5edb73d..789842d0e6f2 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -444,7 +444,7 @@ static int concat_erase(struct mtd_info *mtd, struct erase_info *instr)
 			return -EINVAL;
 	}
 
-	instr->fail_addr = 0xffffffff;
+	instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN;
 
 	/* make a local copy of instr to avoid modifying the caller's struct */
 	erase = kmalloc(sizeof (struct erase_info), GFP_KERNEL);
@@ -493,7 +493,7 @@ static int concat_erase(struct mtd_info *mtd, struct erase_info *instr)
 			/* sanity check: should never happen since
 			 * block alignment has been checked above */
 			BUG_ON(err == -EINVAL);
-			if (erase->fail_addr != 0xffffffff)
+			if (erase->fail_addr != MTD_FAIL_ADDR_UNKNOWN)
 				instr->fail_addr = erase->fail_addr + offset;
 			break;
 		}
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index edb90b58a9b1..8e77e36e75ee 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -214,7 +214,7 @@ static int part_erase(struct mtd_info *mtd, struct erase_info *instr)
 	instr->addr += part->offset;
 	ret = part->master->erase(part->master, instr);
 	if (ret) {
-		if (instr->fail_addr != 0xffffffff)
+		if (instr->fail_addr != MTD_FAIL_ADDR_UNKNOWN)
 			instr->fail_addr -= part->offset;
 		instr->addr -= part->offset;
 	}
@@ -226,7 +226,7 @@ void mtd_erase_callback(struct erase_info *instr)
 	if (instr->mtd->erase == part_erase) {
 		struct mtd_part *part = PART(instr->mtd);
 
-		if (instr->fail_addr != 0xffffffff)
+		if (instr->fail_addr != MTD_FAIL_ADDR_UNKNOWN)
 			instr->fail_addr -= part->offset;
 		instr->addr -= part->offset;
 	}
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index d1129bae6c27..582280560c89 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -2042,7 +2042,7 @@ int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
 		return -EINVAL;
 	}
 
-	instr->fail_addr = 0xffffffff;
+	instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN;
 
 	/* Grab the lock and see if the device is available */
 	nand_get_device(chip, mtd, FL_ERASING);
diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c
index 926cf3a4135d..90ed319f26e6 100644
--- a/drivers/mtd/onenand/onenand_base.c
+++ b/drivers/mtd/onenand/onenand_base.c
@@ -1794,7 +1794,7 @@ static int onenand_erase(struct mtd_info *mtd, struct erase_info *instr)
 		return -EINVAL;
 	}
 
-	instr->fail_addr = 0xffffffff;
+	instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN;
 
 	/* Grab the lock and see if the device is available */
 	onenand_get_device(mtd, FL_ERASING);
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index dddb2a6c9e2c..259461b910af 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -68,7 +68,7 @@ static void jffs2_erase_block(struct jffs2_sb_info *c,
 	instr->len = c->sector_size;
 	instr->callback = jffs2_erase_callback;
 	instr->priv = (unsigned long)(&instr[1]);
-	instr->fail_addr = 0xffffffff;
+	instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN;
 
 	((struct erase_priv_struct *)instr->priv)->jeb = jeb;
 	((struct erase_priv_struct *)instr->priv)->c = c;
@@ -175,7 +175,7 @@ static void jffs2_erase_failed(struct jffs2_sb_info *c, struct jffs2_eraseblock
 {
 	/* For NAND, if the failure did not occur at the device level for a
 	   specific physical page, don't bother updating the bad block table. */
-	if (jffs2_cleanmarker_oob(c) && (bad_offset != 0xffffffff)) {
+	if (jffs2_cleanmarker_oob(c) && (bad_offset != MTD_FAIL_ADDR_UNKNOWN)) {
 		/* We had a device-level failure to erase.  Let's see if we've
 		   failed too many times. */
 		if (!jffs2_write_nand_badblock(c, jeb, bad_offset)) {
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 922636548558..eae26bb6430a 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -25,8 +25,10 @@
 #define MTD_ERASE_DONE          0x08
 #define MTD_ERASE_FAILED        0x10
 
+#define MTD_FAIL_ADDR_UNKNOWN 0xffffffff
+
 /* If the erase fails, fail_addr might indicate exactly which block failed.  If
-   fail_addr = 0xffffffff, the failure was not at the device level or was not
+   fail_addr = MTD_FAIL_ADDR_UNKNOWN, the failure was not at the device level or was not
    specific to any particular block. */
 struct erase_info {
 	struct mtd_info *mtd;
-- 
GitLab


From 36cd4fb5d277f34fe9e4db0deac2d4efd7dff735 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <ext-adrian.hunter@nokia.com>
Date: Wed, 6 Aug 2008 10:08:46 +0300
Subject: [PATCH 019/895] [MTD] [OneNAND] Add OMAP2 / OMAP3 OneNAND driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This driver had resided in the OMAP tree but is now to be in MTD.

Original authors were:
	Jarkko Lavinen <jarkko.lavinen@nokia.com> and Juha YrjÃ¶lÃ¤
	IRQ and DMA support written by Timo Teras

Signed-off-by: Adrian Hunter <ext-adrian.hunter@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/onenand/Kconfig         |   7 +
 drivers/mtd/onenand/Makefile        |   1 +
 drivers/mtd/onenand/omap2.c         | 777 ++++++++++++++++++++++++++++
 include/asm-arm/arch-omap/onenand.h |   7 +-
 4 files changed, 791 insertions(+), 1 deletion(-)
 create mode 100644 drivers/mtd/onenand/omap2.c

diff --git a/drivers/mtd/onenand/Kconfig b/drivers/mtd/onenand/Kconfig
index b94a61b670d1..79fa79e8f8de 100644
--- a/drivers/mtd/onenand/Kconfig
+++ b/drivers/mtd/onenand/Kconfig
@@ -27,6 +27,13 @@ config MTD_ONENAND_GENERIC
 	help
 	  Support for OneNAND flash via platform device driver.
 
+config MTD_ONENAND_OMAP2
+	tristate "OneNAND on OMAP2/OMAP3 support"
+	depends on MTD_ONENAND && (ARCH_OMAP2 || ARCH_OMAP3)
+	help
+	  Support for a OneNAND flash device connected to an OMAP2/OMAP3 CPU
+	  via the GPMC memory controller.
+
 config MTD_ONENAND_OTP
 	bool "OneNAND OTP Support"
 	select HAVE_MTD_OTP
diff --git a/drivers/mtd/onenand/Makefile b/drivers/mtd/onenand/Makefile
index 4d2eacfd7e11..64b6cc61a520 100644
--- a/drivers/mtd/onenand/Makefile
+++ b/drivers/mtd/onenand/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_MTD_ONENAND)		+= onenand.o
 
 # Board specific.
 obj-$(CONFIG_MTD_ONENAND_GENERIC)	+= generic.o
+obj-$(CONFIG_MTD_ONENAND_OMAP2)		+= omap2.o
 
 # Simulator
 obj-$(CONFIG_MTD_ONENAND_SIM)		+= onenand_sim.o
diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c
new file mode 100644
index 000000000000..40153ace5df6
--- /dev/null
+++ b/drivers/mtd/onenand/omap2.c
@@ -0,0 +1,777 @@
+/*
+ *  linux/drivers/mtd/onenand/omap2.c
+ *
+ *  OneNAND driver for OMAP2 / OMAP3
+ *
+ *  Copyright Â© 2005-2006 Nokia Corporation
+ *
+ *  Author: Jarkko Lavinen <jarkko.lavinen@nokia.com> and Juha YrjÃ¶lÃ¤
+ *  IRQ and DMA support written by Timo Teras
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; see the file COPYING. If not, write to the Free Software
+ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/onenand.h>
+#include <linux/mtd/partitions.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+
+#include <asm/io.h>
+#include <asm/mach/flash.h>
+#include <asm/arch/gpmc.h>
+#include <asm/arch/onenand.h>
+#include <asm/arch/gpio.h>
+#include <asm/arch/gpmc.h>
+#include <asm/arch/pm.h>
+
+#include <linux/dma-mapping.h>
+#include <asm/dma-mapping.h>
+#include <asm/arch/dma.h>
+
+#include <asm/arch/board.h>
+
+#define DRIVER_NAME "omap2-onenand"
+
+#define ONENAND_IO_SIZE		SZ_128K
+#define ONENAND_BUFRAM_SIZE	(1024 * 5)
+
+struct omap2_onenand {
+	struct platform_device *pdev;
+	int gpmc_cs;
+	unsigned long phys_base;
+	int gpio_irq;
+	struct mtd_info mtd;
+	struct mtd_partition *parts;
+	struct onenand_chip onenand;
+	struct completion irq_done;
+	struct completion dma_done;
+	int dma_channel;
+	int freq;
+	int (*setup)(void __iomem *base, int freq);
+};
+
+static void omap2_onenand_dma_cb(int lch, u16 ch_status, void *data)
+{
+	struct omap2_onenand *c = data;
+
+	complete(&c->dma_done);
+}
+
+static irqreturn_t omap2_onenand_interrupt(int irq, void *dev_id)
+{
+	struct omap2_onenand *c = dev_id;
+
+	complete(&c->irq_done);
+
+	return IRQ_HANDLED;
+}
+
+static inline unsigned short read_reg(struct omap2_onenand *c, int reg)
+{
+	return readw(c->onenand.base + reg);
+}
+
+static inline void write_reg(struct omap2_onenand *c, unsigned short value,
+			     int reg)
+{
+	writew(value, c->onenand.base + reg);
+}
+
+static void wait_err(char *msg, int state, unsigned int ctrl, unsigned int intr)
+{
+	printk(KERN_ERR "onenand_wait: %s! state %d ctrl 0x%04x intr 0x%04x\n",
+	       msg, state, ctrl, intr);
+}
+
+static void wait_warn(char *msg, int state, unsigned int ctrl,
+		      unsigned int intr)
+{
+	printk(KERN_WARNING "onenand_wait: %s! state %d ctrl 0x%04x "
+	       "intr 0x%04x\n", msg, state, ctrl, intr);
+}
+
+static int omap2_onenand_wait(struct mtd_info *mtd, int state)
+{
+	struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd);
+	unsigned int intr = 0;
+	unsigned int ctrl;
+	unsigned long timeout;
+	u32 syscfg;
+
+	if (state == FL_RESETING) {
+		int i;
+
+		for (i = 0; i < 20; i++) {
+			udelay(1);
+			intr = read_reg(c, ONENAND_REG_INTERRUPT);
+			if (intr & ONENAND_INT_MASTER)
+				break;
+		}
+		ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS);
+		if (ctrl & ONENAND_CTRL_ERROR) {
+			wait_err("controller error", state, ctrl, intr);
+			return -EIO;
+		}
+		if (!(intr & ONENAND_INT_RESET)) {
+			wait_err("timeout", state, ctrl, intr);
+			return -EIO;
+		}
+		return 0;
+	}
+
+	if (state != FL_READING) {
+		int result;
+
+		/* Turn interrupts on */
+		syscfg = read_reg(c, ONENAND_REG_SYS_CFG1);
+		syscfg |= ONENAND_SYS_CFG1_IOBE;
+		write_reg(c, syscfg, ONENAND_REG_SYS_CFG1);
+
+		INIT_COMPLETION(c->irq_done);
+		if (c->gpio_irq) {
+			result = omap_get_gpio_datain(c->gpio_irq);
+			if (result == -1) {
+				ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS);
+				intr = read_reg(c, ONENAND_REG_INTERRUPT);
+				wait_err("gpio error", state, ctrl, intr);
+				return -EIO;
+			}
+		} else
+			result = 0;
+		if (result == 0) {
+			int retry_cnt = 0;
+retry:
+			result = wait_for_completion_timeout(&c->irq_done,
+						    msecs_to_jiffies(20));
+			if (result == 0) {
+				/* Timeout after 20ms */
+				ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS);
+				if (ctrl & ONENAND_CTRL_ONGO) {
+					/*
+					 * The operation seems to be still going
+					 * so give it some more time.
+					 */
+					retry_cnt += 1;
+					if (retry_cnt < 3)
+						goto retry;
+					intr = read_reg(c,
+							ONENAND_REG_INTERRUPT);
+					wait_err("timeout", state, ctrl, intr);
+					return -EIO;
+				}
+				intr = read_reg(c, ONENAND_REG_INTERRUPT);
+				if ((intr & ONENAND_INT_MASTER) == 0)
+					wait_warn("timeout", state, ctrl, intr);
+			}
+		}
+	} else {
+		/* Turn interrupts off */
+		syscfg = read_reg(c, ONENAND_REG_SYS_CFG1);
+		syscfg &= ~ONENAND_SYS_CFG1_IOBE;
+		write_reg(c, syscfg, ONENAND_REG_SYS_CFG1);
+
+		timeout = jiffies + msecs_to_jiffies(20);
+		while (time_before(jiffies, timeout)) {
+			intr = read_reg(c, ONENAND_REG_INTERRUPT);
+			if (intr & ONENAND_INT_MASTER)
+				break;
+		}
+	}
+
+	intr = read_reg(c, ONENAND_REG_INTERRUPT);
+	ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS);
+
+	if (intr & ONENAND_INT_READ) {
+		int ecc = read_reg(c, ONENAND_REG_ECC_STATUS);
+
+		if (ecc) {
+			unsigned int addr1, addr8;
+
+			addr1 = read_reg(c, ONENAND_REG_START_ADDRESS1);
+			addr8 = read_reg(c, ONENAND_REG_START_ADDRESS8);
+			if (ecc & ONENAND_ECC_2BIT_ALL) {
+				printk(KERN_ERR "onenand_wait: ECC error = "
+				       "0x%04x, addr1 %#x, addr8 %#x\n",
+				       ecc, addr1, addr8);
+				mtd->ecc_stats.failed++;
+				return -EBADMSG;
+			} else if (ecc & ONENAND_ECC_1BIT_ALL) {
+				printk(KERN_NOTICE "onenand_wait: correctable "
+				       "ECC error = 0x%04x, addr1 %#x, "
+				       "addr8 %#x\n", ecc, addr1, addr8);
+				mtd->ecc_stats.corrected++;
+			}
+		}
+	} else if (state == FL_READING) {
+		wait_err("timeout", state, ctrl, intr);
+		return -EIO;
+	}
+
+	if (ctrl & ONENAND_CTRL_ERROR) {
+		wait_err("controller error", state, ctrl, intr);
+		if (ctrl & ONENAND_CTRL_LOCK)
+			printk(KERN_ERR "onenand_wait: "
+					"Device is write protected!!!\n");
+		return -EIO;
+	}
+
+	if (ctrl & 0xFE9F)
+		wait_warn("unexpected controller status", state, ctrl, intr);
+
+	return 0;
+}
+
+static inline int omap2_onenand_bufferram_offset(struct mtd_info *mtd, int area)
+{
+	struct onenand_chip *this = mtd->priv;
+
+	if (ONENAND_CURRENT_BUFFERRAM(this)) {
+		if (area == ONENAND_DATARAM)
+			return mtd->writesize;
+		if (area == ONENAND_SPARERAM)
+			return mtd->oobsize;
+	}
+
+	return 0;
+}
+
+#if defined(CONFIG_ARCH_OMAP3) || defined(MULTI_OMAP2)
+
+static int omap3_onenand_read_bufferram(struct mtd_info *mtd, int area,
+					unsigned char *buffer, int offset,
+					size_t count)
+{
+	struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd);
+	struct onenand_chip *this = mtd->priv;
+	dma_addr_t dma_src, dma_dst;
+	int bram_offset;
+	unsigned long timeout;
+	void *buf = (void *)buffer;
+	size_t xtra;
+	volatile unsigned *done;
+
+	bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset;
+	if (bram_offset & 3 || (size_t)buf & 3 || count < 384)
+		goto out_copy;
+
+	if (buf >= high_memory) {
+		struct page *p1;
+
+		if (((size_t)buf & PAGE_MASK) !=
+		    ((size_t)(buf + count - 1) & PAGE_MASK))
+			goto out_copy;
+		p1 = vmalloc_to_page(buf);
+		if (!p1)
+			goto out_copy;
+		buf = page_address(p1) + ((size_t)buf & ~PAGE_MASK);
+	}
+
+	xtra = count & 3;
+	if (xtra) {
+		count -= xtra;
+		memcpy(buf + count, this->base + bram_offset + count, xtra);
+	}
+
+	dma_src = c->phys_base + bram_offset;
+	dma_dst = dma_map_single(&c->pdev->dev, buf, count, DMA_FROM_DEVICE);
+	if (dma_mapping_error(&c->pdev->dev, dma_dst)) {
+		dev_err(&c->pdev->dev,
+			"Couldn't DMA map a %d byte buffer\n",
+			count);
+		goto out_copy;
+	}
+
+	omap_set_dma_transfer_params(c->dma_channel, OMAP_DMA_DATA_TYPE_S32,
+				     count >> 2, 1, 0, 0, 0);
+	omap_set_dma_src_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
+				dma_src, 0, 0);
+	omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
+				 dma_dst, 0, 0);
+
+	INIT_COMPLETION(c->dma_done);
+	omap_start_dma(c->dma_channel);
+
+	timeout = jiffies + msecs_to_jiffies(20);
+	done = &c->dma_done.done;
+	while (time_before(jiffies, timeout))
+		if (*done)
+			break;
+
+	dma_unmap_single(&c->pdev->dev, dma_dst, count, DMA_FROM_DEVICE);
+
+	if (!*done) {
+		dev_err(&c->pdev->dev, "timeout waiting for DMA\n");
+		goto out_copy;
+	}
+
+	return 0;
+
+out_copy:
+	memcpy(buf, this->base + bram_offset, count);
+	return 0;
+}
+
+static int omap3_onenand_write_bufferram(struct mtd_info *mtd, int area,
+					 const unsigned char *buffer,
+					 int offset, size_t count)
+{
+	struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd);
+	struct onenand_chip *this = mtd->priv;
+	dma_addr_t dma_src, dma_dst;
+	int bram_offset;
+	unsigned long timeout;
+	void *buf = (void *)buffer;
+	volatile unsigned *done;
+
+	bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset;
+	if (bram_offset & 3 || (size_t)buf & 3 || count < 384)
+		goto out_copy;
+
+	/* panic_write() may be in an interrupt context */
+	if (in_interrupt())
+		goto out_copy;
+
+	if (buf >= high_memory) {
+		struct page *p1;
+
+		if (((size_t)buf & PAGE_MASK) !=
+		    ((size_t)(buf + count - 1) & PAGE_MASK))
+			goto out_copy;
+		p1 = vmalloc_to_page(buf);
+		if (!p1)
+			goto out_copy;
+		buf = page_address(p1) + ((size_t)buf & ~PAGE_MASK);
+	}
+
+	dma_src = dma_map_single(&c->pdev->dev, buf, count, DMA_TO_DEVICE);
+	dma_dst = c->phys_base + bram_offset;
+	if (dma_mapping_error(&c->pdev->dev, dma_dst)) {
+		dev_err(&c->pdev->dev,
+			"Couldn't DMA map a %d byte buffer\n",
+			count);
+		return -1;
+	}
+
+	omap_set_dma_transfer_params(c->dma_channel, OMAP_DMA_DATA_TYPE_S32,
+				     count >> 2, 1, 0, 0, 0);
+	omap_set_dma_src_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
+				dma_src, 0, 0);
+	omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
+				 dma_dst, 0, 0);
+
+	INIT_COMPLETION(c->dma_done);
+	omap_start_dma(c->dma_channel);
+
+	timeout = jiffies + msecs_to_jiffies(20);
+	done = &c->dma_done.done;
+	while (time_before(jiffies, timeout))
+		if (*done)
+			break;
+
+	dma_unmap_single(&c->pdev->dev, dma_dst, count, DMA_TO_DEVICE);
+
+	if (!*done) {
+		dev_err(&c->pdev->dev, "timeout waiting for DMA\n");
+		goto out_copy;
+	}
+
+	return 0;
+
+out_copy:
+	memcpy(this->base + bram_offset, buf, count);
+	return 0;
+}
+
+#else
+
+int omap3_onenand_read_bufferram(struct mtd_info *mtd, int area,
+				 unsigned char *buffer, int offset,
+				 size_t count);
+
+int omap3_onenand_write_bufferram(struct mtd_info *mtd, int area,
+				  const unsigned char *buffer,
+				  int offset, size_t count);
+
+#endif
+
+#if defined(CONFIG_ARCH_OMAP2) || defined(MULTI_OMAP2)
+
+static int omap2_onenand_read_bufferram(struct mtd_info *mtd, int area,
+					unsigned char *buffer, int offset,
+					size_t count)
+{
+	struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd);
+	struct onenand_chip *this = mtd->priv;
+	dma_addr_t dma_src, dma_dst;
+	int bram_offset;
+
+	bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset;
+	/* DMA is not used.  Revisit PM requirements before enabling it. */
+	if (1 || (c->dma_channel < 0) ||
+	    ((void *) buffer >= (void *) high_memory) || (bram_offset & 3) ||
+	    (((unsigned int) buffer) & 3) || (count < 1024) || (count & 3)) {
+		memcpy(buffer, (__force void *)(this->base + bram_offset),
+		       count);
+		return 0;
+	}
+
+	dma_src = c->phys_base + bram_offset;
+	dma_dst = dma_map_single(&c->pdev->dev, buffer, count,
+				 DMA_FROM_DEVICE);
+	if (dma_mapping_error(&c->pdev->dev, dma_dst)) {
+		dev_err(&c->pdev->dev,
+			"Couldn't DMA map a %d byte buffer\n",
+			count);
+		return -1;
+	}
+
+	omap_set_dma_transfer_params(c->dma_channel, OMAP_DMA_DATA_TYPE_S32,
+				     count / 4, 1, 0, 0, 0);
+	omap_set_dma_src_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
+				dma_src, 0, 0);
+	omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
+				 dma_dst, 0, 0);
+
+	INIT_COMPLETION(c->dma_done);
+	omap_start_dma(c->dma_channel);
+	wait_for_completion(&c->dma_done);
+
+	dma_unmap_single(&c->pdev->dev, dma_dst, count, DMA_FROM_DEVICE);
+
+	return 0;
+}
+
+static int omap2_onenand_write_bufferram(struct mtd_info *mtd, int area,
+					 const unsigned char *buffer,
+					 int offset, size_t count)
+{
+	struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd);
+	struct onenand_chip *this = mtd->priv;
+	dma_addr_t dma_src, dma_dst;
+	int bram_offset;
+
+	bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset;
+	/* DMA is not used.  Revisit PM requirements before enabling it. */
+	if (1 || (c->dma_channel < 0) ||
+	    ((void *) buffer >= (void *) high_memory) || (bram_offset & 3) ||
+	    (((unsigned int) buffer) & 3) || (count < 1024) || (count & 3)) {
+		memcpy((__force void *)(this->base + bram_offset), buffer,
+		       count);
+		return 0;
+	}
+
+	dma_src = dma_map_single(&c->pdev->dev, (void *) buffer, count,
+				 DMA_TO_DEVICE);
+	dma_dst = c->phys_base + bram_offset;
+	if (dma_mapping_error(&c->pdev->dev, dma_dst)) {
+		dev_err(&c->pdev->dev,
+			"Couldn't DMA map a %d byte buffer\n",
+			count);
+		return -1;
+	}
+
+	omap_set_dma_transfer_params(c->dma_channel, OMAP_DMA_DATA_TYPE_S16,
+				     count / 2, 1, 0, 0, 0);
+	omap_set_dma_src_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
+				dma_src, 0, 0);
+	omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
+				 dma_dst, 0, 0);
+
+	INIT_COMPLETION(c->dma_done);
+	omap_start_dma(c->dma_channel);
+	wait_for_completion(&c->dma_done);
+
+	dma_unmap_single(&c->pdev->dev, dma_dst, count, DMA_TO_DEVICE);
+
+	return 0;
+}
+
+#else
+
+int omap2_onenand_read_bufferram(struct mtd_info *mtd, int area,
+				 unsigned char *buffer, int offset,
+				 size_t count);
+
+int omap2_onenand_write_bufferram(struct mtd_info *mtd, int area,
+				  const unsigned char *buffer,
+				  int offset, size_t count);
+
+#endif
+
+static struct platform_driver omap2_onenand_driver;
+
+static int __adjust_timing(struct device *dev, void *data)
+{
+	int ret = 0;
+	struct omap2_onenand *c;
+
+	c = dev_get_drvdata(dev);
+
+	BUG_ON(c->setup == NULL);
+
+	/* DMA is not in use so this is all that is needed */
+	/* Revisit for OMAP3! */
+	ret = c->setup(c->onenand.base, c->freq);
+
+	return ret;
+}
+
+int omap2_onenand_rephase(void)
+{
+	return driver_for_each_device(&omap2_onenand_driver.driver, NULL,
+				      NULL, __adjust_timing);
+}
+
+static void __devexit omap2_onenand_shutdown(struct platform_device *pdev)
+{
+	struct omap2_onenand *c = dev_get_drvdata(&pdev->dev);
+
+	/* With certain content in the buffer RAM, the OMAP boot ROM code
+	 * can recognize the flash chip incorrectly. Zero it out before
+	 * soft reset.
+	 */
+	memset((__force void *)c->onenand.base, 0, ONENAND_BUFRAM_SIZE);
+}
+
+static int __devinit omap2_onenand_probe(struct platform_device *pdev)
+{
+	struct omap_onenand_platform_data *pdata;
+	struct omap2_onenand *c;
+	int r;
+
+	pdata = pdev->dev.platform_data;
+	if (pdata == NULL) {
+		dev_err(&pdev->dev, "platform data missing\n");
+		return -ENODEV;
+	}
+
+	c = kzalloc(sizeof(struct omap2_onenand), GFP_KERNEL);
+	if (!c)
+		return -ENOMEM;
+
+	init_completion(&c->irq_done);
+	init_completion(&c->dma_done);
+	c->gpmc_cs = pdata->cs;
+	c->gpio_irq = pdata->gpio_irq;
+	c->dma_channel = pdata->dma_channel;
+	if (c->dma_channel < 0) {
+		/* if -1, don't use DMA */
+		c->gpio_irq = 0;
+	}
+
+	r = gpmc_cs_request(c->gpmc_cs, ONENAND_IO_SIZE, &c->phys_base);
+	if (r < 0) {
+		dev_err(&pdev->dev, "Cannot request GPMC CS\n");
+		goto err_kfree;
+	}
+
+	if (request_mem_region(c->phys_base, ONENAND_IO_SIZE,
+			       pdev->dev.driver->name) == NULL) {
+		dev_err(&pdev->dev, "Cannot reserve memory region at 0x%08lx, "
+			"size: 0x%x\n",	c->phys_base, ONENAND_IO_SIZE);
+		r = -EBUSY;
+		goto err_free_cs;
+	}
+	c->onenand.base = ioremap(c->phys_base, ONENAND_IO_SIZE);
+	if (c->onenand.base == NULL) {
+		r = -ENOMEM;
+		goto err_release_mem_region;
+	}
+
+	if (pdata->onenand_setup != NULL) {
+		r = pdata->onenand_setup(c->onenand.base, c->freq);
+		if (r < 0) {
+			dev_err(&pdev->dev, "Onenand platform setup failed: "
+				"%d\n", r);
+			goto err_iounmap;
+		}
+		c->setup = pdata->onenand_setup;
+	}
+
+	if (c->gpio_irq) {
+		if ((r = omap_request_gpio(c->gpio_irq)) < 0) {
+			dev_err(&pdev->dev,  "Failed to request GPIO%d for "
+				"OneNAND\n", c->gpio_irq);
+			goto err_iounmap;
+	}
+	omap_set_gpio_direction(c->gpio_irq, 1);
+
+	if ((r = request_irq(OMAP_GPIO_IRQ(c->gpio_irq),
+			     omap2_onenand_interrupt, IRQF_TRIGGER_RISING,
+			     pdev->dev.driver->name, c)) < 0)
+		goto err_release_gpio;
+	}
+
+	if (c->dma_channel >= 0) {
+		r = omap_request_dma(0, pdev->dev.driver->name,
+				     omap2_onenand_dma_cb, (void *) c,
+				     &c->dma_channel);
+		if (r == 0) {
+			omap_set_dma_write_mode(c->dma_channel,
+						OMAP_DMA_WRITE_NON_POSTED);
+			omap_set_dma_src_data_pack(c->dma_channel, 1);
+			omap_set_dma_src_burst_mode(c->dma_channel,
+						    OMAP_DMA_DATA_BURST_8);
+			omap_set_dma_dest_data_pack(c->dma_channel, 1);
+			omap_set_dma_dest_burst_mode(c->dma_channel,
+						     OMAP_DMA_DATA_BURST_8);
+		} else {
+			dev_info(&pdev->dev,
+				 "failed to allocate DMA for OneNAND, "
+				 "using PIO instead\n");
+			c->dma_channel = -1;
+		}
+	}
+
+	dev_info(&pdev->dev, "initializing on CS%d, phys base 0x%08lx, virtual "
+		 "base %p\n", c->gpmc_cs, c->phys_base,
+		 c->onenand.base);
+
+	c->pdev = pdev;
+	c->mtd.name = pdev->dev.bus_id;
+	c->mtd.priv = &c->onenand;
+	c->mtd.owner = THIS_MODULE;
+
+	if (c->dma_channel >= 0) {
+		struct onenand_chip *this = &c->onenand;
+
+		this->wait = omap2_onenand_wait;
+		if (cpu_is_omap34xx()) {
+			this->read_bufferram = omap3_onenand_read_bufferram;
+			this->write_bufferram = omap3_onenand_write_bufferram;
+		} else {
+			this->read_bufferram = omap2_onenand_read_bufferram;
+			this->write_bufferram = omap2_onenand_write_bufferram;
+		}
+	}
+
+	if ((r = onenand_scan(&c->mtd, 1)) < 0)
+		goto err_release_dma;
+
+	switch ((c->onenand.version_id >> 4) & 0xf) {
+	case 0:
+		c->freq = 40;
+		break;
+	case 1:
+		c->freq = 54;
+		break;
+	case 2:
+		c->freq = 66;
+		break;
+	case 3:
+		c->freq = 83;
+		break;
+	}
+
+#ifdef CONFIG_MTD_PARTITIONS
+	if (pdata->parts != NULL)
+		r = add_mtd_partitions(&c->mtd, pdata->parts,
+				       pdata->nr_parts);
+	else
+#endif
+		r = add_mtd_device(&c->mtd);
+	if (r < 0)
+		goto err_release_onenand;
+
+	platform_set_drvdata(pdev, c);
+
+	return 0;
+
+err_release_onenand:
+	onenand_release(&c->mtd);
+err_release_dma:
+	if (c->dma_channel != -1)
+		omap_free_dma(c->dma_channel);
+	if (c->gpio_irq)
+		free_irq(OMAP_GPIO_IRQ(c->gpio_irq), c);
+err_release_gpio:
+	if (c->gpio_irq)
+		omap_free_gpio(c->gpio_irq);
+err_iounmap:
+	iounmap(c->onenand.base);
+err_release_mem_region:
+	release_mem_region(c->phys_base, ONENAND_IO_SIZE);
+err_free_cs:
+	gpmc_cs_free(c->gpmc_cs);
+err_kfree:
+	kfree(c);
+
+	return r;
+}
+
+static int __devexit omap2_onenand_remove(struct platform_device *pdev)
+{
+	struct omap2_onenand *c = dev_get_drvdata(&pdev->dev);
+
+	BUG_ON(c == NULL);
+
+#ifdef CONFIG_MTD_PARTITIONS
+	if (c->parts)
+		del_mtd_partitions(&c->mtd);
+	else
+		del_mtd_device(&c->mtd);
+#else
+	del_mtd_device(&c->mtd);
+#endif
+
+	onenand_release(&c->mtd);
+	if (c->dma_channel != -1)
+		omap_free_dma(c->dma_channel);
+	omap2_onenand_shutdown(pdev);
+	platform_set_drvdata(pdev, NULL);
+	if (c->gpio_irq) {
+		free_irq(OMAP_GPIO_IRQ(c->gpio_irq), c);
+		omap_free_gpio(c->gpio_irq);
+	}
+	iounmap(c->onenand.base);
+	release_mem_region(c->phys_base, ONENAND_IO_SIZE);
+	kfree(c);
+
+	return 0;
+}
+
+static struct platform_driver omap2_onenand_driver = {
+	.probe		= omap2_onenand_probe,
+	.remove		= omap2_onenand_remove,
+	.shutdown	= omap2_onenand_shutdown,
+	.driver		= {
+		.name	= DRIVER_NAME,
+		.owner  = THIS_MODULE,
+	},
+};
+
+static int __init omap2_onenand_init(void)
+{
+	printk(KERN_INFO "OneNAND driver initializing\n");
+	return platform_driver_register(&omap2_onenand_driver);
+}
+
+static void __exit omap2_onenand_exit(void)
+{
+	platform_driver_unregister(&omap2_onenand_driver);
+}
+
+module_init(omap2_onenand_init);
+module_exit(omap2_onenand_exit);
+
+MODULE_ALIAS(DRIVER_NAME);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jarkko Lavinen <jarkko.lavinen@nokia.com>");
+MODULE_DESCRIPTION("Glue layer for OneNAND flash on OMAP2 / OMAP3");
diff --git a/include/asm-arm/arch-omap/onenand.h b/include/asm-arm/arch-omap/onenand.h
index 6c959d0ce470..e30237117b69 100644
--- a/include/asm-arm/arch-omap/onenand.h
+++ b/include/asm-arm/arch-omap/onenand.h
@@ -16,6 +16,11 @@ struct omap_onenand_platform_data {
 	int			gpio_irq;
 	struct mtd_partition	*parts;
 	int			nr_parts;
-	int                     (*onenand_setup)(void __iomem *);
+	int                     (*onenand_setup)(void __iomem *, int freq);
 	int			dma_channel;
 };
+
+int omap2_onenand_rephase(void);
+
+#define ONENAND_MAX_PARTITIONS 8
+
-- 
GitLab


From bde86fec7c822b6009d3cfefc20b76b8d34716af Mon Sep 17 00:00:00 2001
From: Adrian Hunter <ext-adrian.hunter@nokia.com>
Date: Thu, 14 Aug 2008 11:57:45 +0300
Subject: [PATCH 020/895] [JFFS2] Correct symlink name too long error code

Signed-off-by: Adrian Hunter <ext-adrian.hunter@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 fs/jffs2/dir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index cd219ef55254..b1aaae823a52 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -311,7 +311,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
 	/* FIXME: If you care. We'd need to use frags for the target
 	   if it grows much more than this */
 	if (targetlen > 254)
-		return -EINVAL;
+		return -ENAMETOOLONG;
 
 	ri = jffs2_alloc_raw_inode();
 
-- 
GitLab


From 782b7a367d81da005d93b28cb00f9ae086773c24 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <ext-adrian.hunter@nokia.com>
Date: Thu, 14 Aug 2008 14:00:12 +0300
Subject: [PATCH 021/895] [MTD] [OneNAND] OMAP3: add delay for GPIO

On OMAP3, the driver was occasionally not seeing the GPIO
interrupt.  Adding a small delay of one register read
eliminates the problem.

Signed-off-by: Adrian Hunter <ext-adrian.hunter@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/onenand/omap2.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c
index 40153ace5df6..34b42533f4bb 100644
--- a/drivers/mtd/onenand/omap2.c
+++ b/drivers/mtd/onenand/omap2.c
@@ -141,8 +141,13 @@ static int omap2_onenand_wait(struct mtd_info *mtd, int state)
 
 		/* Turn interrupts on */
 		syscfg = read_reg(c, ONENAND_REG_SYS_CFG1);
-		syscfg |= ONENAND_SYS_CFG1_IOBE;
-		write_reg(c, syscfg, ONENAND_REG_SYS_CFG1);
+		if (!(syscfg & ONENAND_SYS_CFG1_IOBE)) {
+			syscfg |= ONENAND_SYS_CFG1_IOBE;
+			write_reg(c, syscfg, ONENAND_REG_SYS_CFG1);
+			if (cpu_is_omap34xx())
+				/* Add a delay to let GPIO settle */
+				syscfg = read_reg(c, ONENAND_REG_SYS_CFG1);
+		}
 
 		INIT_COMPLETION(c->irq_done);
 		if (c->gpio_irq) {
-- 
GitLab


From e6cf5df1838c28bb060ac45b5585e48e71bbc740 Mon Sep 17 00:00:00 2001
From: frans <fransmeulenbroeks@gmail.com>
Date: Fri, 15 Aug 2008 23:14:31 +0200
Subject: [PATCH 022/895] [MTD] [NAND] nand_ecc.c: rewrite for improved
 performance

This patch improves the performance of the ecc generation code by a
factor of 18 on an INTEL D920 CPU, a factor of 7 on MIPS and a factor of 5
on ARM (NSLU2)

Signed-off-by: Frans Meulenbroeks <fransmeulenbroeks@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 Documentation/mtd/nand_ecc.txt | 714 +++++++++++++++++++++++++++++++++
 drivers/mtd/nand/nand_ecc.c    | 496 +++++++++++++++++------
 2 files changed, 1086 insertions(+), 124 deletions(-)
 create mode 100644 Documentation/mtd/nand_ecc.txt

diff --git a/Documentation/mtd/nand_ecc.txt b/Documentation/mtd/nand_ecc.txt
new file mode 100644
index 000000000000..bdf93b7f0f24
--- /dev/null
+++ b/Documentation/mtd/nand_ecc.txt
@@ -0,0 +1,714 @@
+Introduction
+============
+
+Having looked at the linux mtd/nand driver and more specific at nand_ecc.c
+I felt there was room for optimisation. I bashed the code for a few hours
+performing tricks like table lookup removing superfluous code etc.
+After that the speed was increased by 35-40%.
+Still I was not too happy as I felt there was additional room for improvement.
+
+Bad! I was hooked.
+I decided to annotate my steps in this file. Perhaps it is useful to someone
+or someone learns something from it.
+
+
+The problem
+===========
+
+NAND flash (at least SLC one) typically has sectors of 256 bytes.
+However NAND flash is not extremely reliable so some error detection
+(and sometimes correction) is needed.
+
+This is done by means of a Hamming code. I'll try to explain it in
+laymans terms (and apologies to all the pro's in the field in case I do
+not use the right terminology, my coding theory class was almost 30
+years ago, and I must admit it was not one of my favourites).
+
+As I said before the ecc calculation is performed on sectors of 256
+bytes. This is done by calculating several parity bits over the rows and
+columns. The parity used is even parity which means that the parity bit = 1
+if the data over which the parity is calculated is 1 and the parity bit = 0
+if the data over which the parity is calculated is 0. So the total
+number of bits over the data over which the parity is calculated + the
+parity bit is even. (see wikipedia if you can't follow this).
+Parity is often calculated by means of an exclusive or operation,
+sometimes also referred to as xor. In C the operator for xor is ^
+
+Back to ecc.
+Let's give a small figure:
+
+byte   0:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp0 rp2 rp4 ... rp14
+byte   1:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp1 rp2 rp4 ... rp14
+byte   2:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp0 rp3 rp4 ... rp14
+byte   3:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp1 rp3 rp4 ... rp14
+byte   4:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp0 rp2 rp5 ... rp14
+....
+byte 254:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp0 rp3 rp5 ... rp15
+byte 255:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp1 rp3 rp5 ... rp15
+           cp1  cp0  cp1  cp0  cp1  cp0  cp1  cp0
+           cp3  cp3  cp2  cp2  cp3  cp3  cp2  cp2
+           cp5  cp5  cp5  cp5  cp4  cp4  cp4  cp4
+
+This figure represents a sector of 256 bytes.
+cp is my abbreviaton for column parity, rp for row parity.
+
+Let's start to explain column parity.
+cp0 is the parity that belongs to all bit0, bit2, bit4, bit6.
+so the sum of all bit0, bit2, bit4 and bit6 values + cp0 itself is even.
+Similarly cp1 is the sum of all bit1, bit3, bit5 and bit7.
+cp2 is the parity over bit0, bit1, bit4 and bit5
+cp3 is the parity over bit2, bit3, bit6 and bit7.
+cp4 is the parity over bit0, bit1, bit2 and bit3.
+cp5 is the parity over bit4, bit5, bit6 and bit7.
+Note that each of cp0 .. cp5 is exactly one bit.
+
+Row parity actually works almost the same.
+rp0 is the parity of all even bytes (0, 2, 4, 6, ... 252, 254)
+rp1 is the parity of all odd bytes (1, 3, 5, 7, ..., 253, 255)
+rp2 is the parity of all bytes 0, 1, 4, 5, 8, 9, ...
+(so handle two bytes, then skip 2 bytes).
+rp3 is covers the half rp2 does not cover (bytes 2, 3, 6, 7, 10, 11, ...)
+for rp4 the rule is cover 4 bytes, skip 4 bytes, cover 4 bytes, skip 4 etc.
+so rp4 calculates parity over bytes 0, 1, 2, 3, 8, 9, 10, 11, 16, ...)
+and rp5 covers the other half, so bytes 4, 5, 6, 7, 12, 13, 14, 15, 20, ..
+The story now becomes quite boring. I guess you get the idea.
+rp6 covers 8 bytes then skips 8 etc
+rp7 skips 8 bytes then covers 8 etc
+rp8 covers 16 bytes then skips 16 etc
+rp9 skips 16 bytes then covers 16 etc
+rp10 covers 32 bytes then skips 32 etc
+rp11 skips 32 bytes then covers 32 etc
+rp12 covers 64 bytes then skips 64 etc
+rp13 skips 64 bytes then covers 64 etc
+rp14 covers 128 bytes then skips 128
+rp15 skips 128 bytes then covers 128
+
+In the end the parity bits are grouped together in three bytes as
+follows:
+ECC    Bit 7 Bit 6 Bit 5 Bit 4 Bit 3 Bit 2 Bit 1 Bit 0
+ECC 0   rp07  rp06  rp05  rp04  rp03  rp02  rp01  rp00
+ECC 1   rp15  rp14  rp13  rp12  rp11  rp10  rp09  rp08
+ECC 2   cp5   cp4   cp3   cp2   cp1   cp0      1     1
+
+I detected after writing this that ST application note AN1823
+(http://www.st.com/stonline/books/pdf/docs/10123.pdf) gives a much
+nicer picture.(but they use line parity as term where I use row parity)
+Oh well, I'm graphically challenged, so suffer with me for a moment :-)
+And I could not reuse the ST picture anyway for copyright reasons.
+
+
+Attempt 0
+=========
+
+Implementing the parity calculation is pretty simple.
+In C pseudocode:
+for (i = 0; i < 256; i++)
+{
+    if (i & 0x01)
+       rp1 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp1;
+    else
+       rp0 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp1;
+    if (i & 0x02)
+       rp3 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp3;
+    else
+       rp2 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp2;
+    if (i & 0x04)
+      rp5 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp5;
+    else
+      rp4 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp4;
+    if (i & 0x08)
+      rp7 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp7;
+    else
+      rp6 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp6;
+    if (i & 0x10)
+      rp9 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp9;
+    else
+      rp8 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp8;
+    if (i & 0x20)
+      rp11 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp11;
+    else
+    rp10 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp10;
+    if (i & 0x40)
+      rp13 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp13;
+    else
+      rp12 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp12;
+    if (i & 0x80)
+      rp15 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp15;
+    else
+      rp14 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp14;
+    cp0 = bit6 ^ bit4 ^ bit2 ^ bit0 ^ cp0;
+    cp1 = bit7 ^ bit5 ^ bit3 ^ bit1 ^ cp1;
+    cp2 = bit5 ^ bit4 ^ bit1 ^ bit0 ^ cp2;
+    cp3 = bit7 ^ bit6 ^ bit3 ^ bit2 ^ cp3
+    cp4 = bit3 ^ bit2 ^ bit1 ^ bit0 ^ cp4
+    cp5 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ cp5
+}
+
+
+Analysis 0
+==========
+
+C does have bitwise operators but not really operators to do the above
+efficiently (and most hardware has no such instructions either).
+Therefore without implementing this it was clear that the code above was
+not going to bring me a Nobel prize :-)
+
+Fortunately the exclusive or operation is commutative, so we can combine
+the values in any order. So instead of calculating all the bits
+individually, let us try to rearrange things.
+For the column parity this is easy. We can just xor the bytes and in the
+end filter out the relevant bits. This is pretty nice as it will bring
+all cp calculation out of the if loop.
+
+Similarly we can first xor the bytes for the various rows.
+This leads to:
+
+
+Attempt 1
+=========
+
+const char parity[256] = {
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0
+};
+
+void ecc1(const unsigned char *buf, unsigned char *code)
+{
+    int i;
+    const unsigned char *bp = buf;
+    unsigned char cur;
+    unsigned char rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
+    unsigned char rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15;
+    unsigned char par;
+
+    par = 0;
+    rp0 = 0; rp1 = 0; rp2 = 0; rp3 = 0;
+    rp4 = 0; rp5 = 0; rp6 = 0; rp7 = 0;
+    rp8 = 0; rp9 = 0; rp10 = 0; rp11 = 0;
+    rp12 = 0; rp13 = 0; rp14 = 0; rp15 = 0;
+
+    for (i = 0; i < 256; i++)
+    {
+        cur = *bp++;
+        par ^= cur;
+        if (i & 0x01) rp1 ^= cur; else rp0 ^= cur;
+        if (i & 0x02) rp3 ^= cur; else rp2 ^= cur;
+        if (i & 0x04) rp5 ^= cur; else rp4 ^= cur;
+        if (i & 0x08) rp7 ^= cur; else rp6 ^= cur;
+        if (i & 0x10) rp9 ^= cur; else rp8 ^= cur;
+        if (i & 0x20) rp11 ^= cur; else rp10 ^= cur;
+        if (i & 0x40) rp13 ^= cur; else rp12 ^= cur;
+        if (i & 0x80) rp15 ^= cur; else rp14 ^= cur;
+    }
+    code[0] =
+        (parity[rp7] << 7) |
+        (parity[rp6] << 6) |
+        (parity[rp5] << 5) |
+        (parity[rp4] << 4) |
+        (parity[rp3] << 3) |
+        (parity[rp2] << 2) |
+        (parity[rp1] << 1) |
+        (parity[rp0]);
+    code[1] =
+        (parity[rp15] << 7) |
+        (parity[rp14] << 6) |
+        (parity[rp13] << 5) |
+        (parity[rp12] << 4) |
+        (parity[rp11] << 3) |
+        (parity[rp10] << 2) |
+        (parity[rp9]  << 1) |
+        (parity[rp8]);
+    code[2] =
+        (parity[par & 0xf0] << 7) |
+        (parity[par & 0x0f] << 6) |
+        (parity[par & 0xcc] << 5) |
+        (parity[par & 0x33] << 4) |
+        (parity[par & 0xaa] << 3) |
+        (parity[par & 0x55] << 2);
+    code[0] = ~code[0];
+    code[1] = ~code[1];
+    code[2] = ~code[2];
+}
+
+Still pretty straightforward. The last three invert statements are there to
+give a checksum of 0xff 0xff 0xff for an empty flash. In an empty flash
+all data is 0xff, so the checksum then matches.
+
+I also introduced the parity lookup. I expected this to be the fastest
+way to calculate the parity, but I will investigate alternatives later
+on.
+
+
+Analysis 1
+==========
+
+The code works, but is not terribly efficient. On my system it took
+almost 4 times as much time as the linux driver code. But hey, if it was
+*that* easy this would have been done long before.
+No pain. no gain.
+
+Fortunately there is plenty of room for improvement.
+
+In step 1 we moved from bit-wise calculation to byte-wise calculation.
+However in C we can also use the unsigned long data type and virtually
+every modern microprocessor supports 32 bit operations, so why not try
+to write our code in such a way that we process data in 32 bit chunks.
+
+Of course this means some modification as the row parity is byte by
+byte. A quick analysis:
+for the column parity we use the par variable. When extending to 32 bits
+we can in the end easily calculate p0 and p1 from it.
+(because par now consists of 4 bytes, contributing to rp1, rp0, rp1, rp0
+respectively)
+also rp2 and rp3 can be easily retrieved from par as rp3 covers the
+first two bytes and rp2 the last two bytes.
+
+Note that of course now the loop is executed only 64 times (256/4).
+And note that care must taken wrt byte ordering. The way bytes are
+ordered in a long is machine dependent, and might affect us.
+Anyway, if there is an issue: this code is developed on x86 (to be
+precise: a DELL PC with a D920 Intel CPU)
+
+And of course the performance might depend on alignment, but I expect
+that the I/O buffers in the nand driver are aligned properly (and
+otherwise that should be fixed to get maximum performance).
+
+Let's give it a try...
+
+
+Attempt 2
+=========
+
+extern const char parity[256];
+
+void ecc2(const unsigned char *buf, unsigned char *code)
+{
+    int i;
+    const unsigned long *bp = (unsigned long *)buf;
+    unsigned long cur;
+    unsigned long rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
+    unsigned long rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15;
+    unsigned long par;
+
+    par = 0;
+    rp0 = 0; rp1 = 0; rp2 = 0; rp3 = 0;
+    rp4 = 0; rp5 = 0; rp6 = 0; rp7 = 0;
+    rp8 = 0; rp9 = 0; rp10 = 0; rp11 = 0;
+    rp12 = 0; rp13 = 0; rp14 = 0; rp15 = 0;
+
+    for (i = 0; i < 64; i++)
+    {
+        cur = *bp++;
+        par ^= cur;
+        if (i & 0x01) rp5 ^= cur; else rp4 ^= cur;
+        if (i & 0x02) rp7 ^= cur; else rp6 ^= cur;
+        if (i & 0x04) rp9 ^= cur; else rp8 ^= cur;
+        if (i & 0x08) rp11 ^= cur; else rp10 ^= cur;
+        if (i & 0x10) rp13 ^= cur; else rp12 ^= cur;
+        if (i & 0x20) rp15 ^= cur; else rp14 ^= cur;
+    }
+    /*
+       we need to adapt the code generation for the fact that rp vars are now
+       long; also the column parity calculation needs to be changed.
+       we'll bring rp4 to 15 back to single byte entities by shifting and
+       xoring
+    */
+    rp4 ^= (rp4 >> 16); rp4 ^= (rp4 >> 8); rp4 &= 0xff;
+    rp5 ^= (rp5 >> 16); rp5 ^= (rp5 >> 8); rp5 &= 0xff;
+    rp6 ^= (rp6 >> 16); rp6 ^= (rp6 >> 8); rp6 &= 0xff;
+    rp7 ^= (rp7 >> 16); rp7 ^= (rp7 >> 8); rp7 &= 0xff;
+    rp8 ^= (rp8 >> 16); rp8 ^= (rp8 >> 8); rp8 &= 0xff;
+    rp9 ^= (rp9 >> 16); rp9 ^= (rp9 >> 8); rp9 &= 0xff;
+    rp10 ^= (rp10 >> 16); rp10 ^= (rp10 >> 8); rp10 &= 0xff;
+    rp11 ^= (rp11 >> 16); rp11 ^= (rp11 >> 8); rp11 &= 0xff;
+    rp12 ^= (rp12 >> 16); rp12 ^= (rp12 >> 8); rp12 &= 0xff;
+    rp13 ^= (rp13 >> 16); rp13 ^= (rp13 >> 8); rp13 &= 0xff;
+    rp14 ^= (rp14 >> 16); rp14 ^= (rp14 >> 8); rp14 &= 0xff;
+    rp15 ^= (rp15 >> 16); rp15 ^= (rp15 >> 8); rp15 &= 0xff;
+    rp3 = (par >> 16); rp3 ^= (rp3 >> 8); rp3 &= 0xff;
+    rp2 = par & 0xffff; rp2 ^= (rp2 >> 8); rp2 &= 0xff;
+    par ^= (par >> 16);
+    rp1 = (par >> 8); rp1 &= 0xff;
+    rp0 = (par & 0xff);
+    par ^= (par >> 8); par &= 0xff;
+
+    code[0] =
+        (parity[rp7] << 7) |
+        (parity[rp6] << 6) |
+        (parity[rp5] << 5) |
+        (parity[rp4] << 4) |
+        (parity[rp3] << 3) |
+        (parity[rp2] << 2) |
+        (parity[rp1] << 1) |
+        (parity[rp0]);
+    code[1] =
+        (parity[rp15] << 7) |
+        (parity[rp14] << 6) |
+        (parity[rp13] << 5) |
+        (parity[rp12] << 4) |
+        (parity[rp11] << 3) |
+        (parity[rp10] << 2) |
+        (parity[rp9]  << 1) |
+        (parity[rp8]);
+    code[2] =
+        (parity[par & 0xf0] << 7) |
+        (parity[par & 0x0f] << 6) |
+        (parity[par & 0xcc] << 5) |
+        (parity[par & 0x33] << 4) |
+        (parity[par & 0xaa] << 3) |
+        (parity[par & 0x55] << 2);
+    code[0] = ~code[0];
+    code[1] = ~code[1];
+    code[2] = ~code[2];
+}
+
+The parity array is not shown any more. Note also that for these
+examples I kinda deviated from my regular programming style by allowing
+multiple statements on a line, not using { } in then and else blocks
+with only a single statement and by using operators like ^=
+
+
+Analysis 2
+==========
+
+The code (of course) works, and hurray: we are a little bit faster than
+the linux driver code (about 15%). But wait, don't cheer too quickly.
+THere is more to be gained.
+If we look at e.g. rp14 and rp15 we see that we either xor our data with
+rp14 or with rp15. However we also have par which goes over all data.
+This means there is no need to calculate rp14 as it can be calculated from
+rp15 through rp14 = par ^ rp15;
+(or if desired we can avoid calculating rp15 and calculate it from
+rp14).  That is why some places refer to inverse parity.
+Of course the same thing holds for rp4/5, rp6/7, rp8/9, rp10/11 and rp12/13.
+Effectively this means we can eliminate the else clause from the if
+statements. Also we can optimise the calculation in the end a little bit
+by going from long to byte first. Actually we can even avoid the table
+lookups
+
+Attempt 3
+=========
+
+Odd replaced:
+        if (i & 0x01) rp5 ^= cur; else rp4 ^= cur;
+        if (i & 0x02) rp7 ^= cur; else rp6 ^= cur;
+        if (i & 0x04) rp9 ^= cur; else rp8 ^= cur;
+        if (i & 0x08) rp11 ^= cur; else rp10 ^= cur;
+        if (i & 0x10) rp13 ^= cur; else rp12 ^= cur;
+        if (i & 0x20) rp15 ^= cur; else rp14 ^= cur;
+with
+        if (i & 0x01) rp5 ^= cur;
+        if (i & 0x02) rp7 ^= cur;
+        if (i & 0x04) rp9 ^= cur;
+        if (i & 0x08) rp11 ^= cur;
+        if (i & 0x10) rp13 ^= cur;
+        if (i & 0x20) rp15 ^= cur;
+
+        and outside the loop added:
+    rp4  = par ^ rp5;
+    rp6  = par ^ rp7;
+    rp8  = par ^ rp9;
+    rp10  = par ^ rp11;
+    rp12  = par ^ rp13;
+    rp14  = par ^ rp15;
+
+And after that the code takes about 30% more time, although the number of
+statements is reduced. This is also reflected in the assembly code.
+
+
+Analysis 3
+==========
+
+Very weird. Guess it has to do with caching or instruction parallellism
+or so. I also tried on an eeePC (Celeron, clocked at 900 Mhz). Interesting
+observation was that this one is only 30% slower (according to time)
+executing the code as my 3Ghz D920 processor.
+
+Well, it was expected not to be easy so maybe instead move to a
+different track: let's move back to the code from attempt2 and do some
+loop unrolling. This will eliminate a few if statements. I'll try
+different amounts of unrolling to see what works best.
+
+
+Attempt 4
+=========
+
+Unrolled the loop 1, 2, 3 and 4 times.
+For 4 the code starts with:
+
+    for (i = 0; i < 4; i++)
+    {
+        cur = *bp++;
+        par ^= cur;
+        rp4 ^= cur;
+        rp6 ^= cur;
+        rp8 ^= cur;
+        rp10 ^= cur;
+        if (i & 0x1) rp13 ^= cur; else rp12 ^= cur;
+        if (i & 0x2) rp15 ^= cur; else rp14 ^= cur;
+        cur = *bp++;
+        par ^= cur;
+        rp5 ^= cur;
+        rp6 ^= cur;
+        ...
+
+
+Analysis 4
+==========
+
+Unrolling once gains about 15%
+Unrolling twice keeps the gain at about 15%
+Unrolling three times gives a gain of 30% compared to attempt 2.
+Unrolling four times gives a marginal improvement compared to unrolling
+three times.
+
+I decided to proceed with a four time unrolled loop anyway. It was my gut
+feeling that in the next steps I would obtain additional gain from it.
+
+The next step was triggered by the fact that par contains the xor of all
+bytes and rp4 and rp5 each contain the xor of half of the bytes.
+So in effect par = rp4 ^ rp5. But as xor is commutative we can also say
+that rp5 = par ^ rp4. So no need to keep both rp4 and rp5 around. We can
+eliminate rp5 (or rp4, but I already foresaw another optimisation).
+The same holds for rp6/7, rp8/9, rp10/11 rp12/13 and rp14/15.
+
+
+Attempt 5
+=========
+
+Effectively so all odd digit rp assignments in the loop were removed.
+This included the else clause of the if statements.
+Of course after the loop we need to correct things by adding code like:
+    rp5 = par ^ rp4;
+Also the initial assignments (rp5 = 0; etc) could be removed.
+Along the line I also removed the initialisation of rp0/1/2/3.
+
+
+Analysis 5
+==========
+
+Measurements showed this was a good move. The run-time roughly halved
+compared with attempt 4 with 4 times unrolled, and we only require 1/3rd
+of the processor time compared to the current code in the linux kernel.
+
+However, still I thought there was more. I didn't like all the if
+statements. Why not keep a running parity and only keep the last if
+statement. Time for yet another version!
+
+
+Attempt 6
+=========
+
+THe code within the for loop was changed to:
+
+    for (i = 0; i < 4; i++)
+    {
+        cur = *bp++; tmppar  = cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= tmppar;
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp8 ^= tmppar;
+
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+	    cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+	    cur = *bp++; tmppar ^= cur; rp10 ^= tmppar;
+
+	    cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur; rp8 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= cur; rp8 ^= cur;
+	    cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp8 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp8 ^= cur;
+
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur;
+
+	    par ^= tmppar;
+        if ((i & 0x1) == 0) rp12 ^= tmppar;
+        if ((i & 0x2) == 0) rp14 ^= tmppar;
+    }
+
+As you can see tmppar is used to accumulate the parity within a for
+iteration. In the last 3 statements is is added to par and, if needed,
+to rp12 and rp14.
+
+While making the changes I also found that I could exploit that tmppar
+contains the running parity for this iteration. So instead of having:
+rp4 ^= cur; rp6 = cur;
+I removed the rp6 = cur; statement and did rp6 ^= tmppar; on next
+statement. A similar change was done for rp8 and rp10
+
+
+Analysis 6
+==========
+
+Measuring this code again showed big gain. When executing the original
+linux code 1 million times, this took about 1 second on my system.
+(using time to measure the performance). After this iteration I was back
+to 0.075 sec. Actually I had to decide to start measuring over 10
+million interations in order not to loose too much accuracy. This one
+definitely seemed to be the jackpot!
+
+There is a little bit more room for improvement though. There are three
+places with statements:
+rp4 ^= cur; rp6 ^= cur;
+It seems more efficient to also maintain a variable rp4_6 in the while
+loop; This eliminates 3 statements per loop. Of course after the loop we
+need to correct by adding:
+    rp4 ^= rp4_6;
+    rp6 ^= rp4_6
+Furthermore there are 4 sequential assingments to rp8. This can be
+encoded slightly more efficient by saving tmppar before those 4 lines
+and later do rp8 = rp8 ^ tmppar ^ notrp8;
+(where notrp8 is the value of rp8 before those 4 lines).
+Again a use of the commutative property of xor.
+Time for a new test!
+
+
+Attempt 7
+=========
+
+The new code now looks like:
+
+    for (i = 0; i < 4; i++)
+    {
+        cur = *bp++; tmppar  = cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= tmppar;
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp8 ^= tmppar;
+
+        cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+	    cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+	    cur = *bp++; tmppar ^= cur; rp10 ^= tmppar;
+
+	    notrp8 = tmppar;
+	    cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+	    cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur;
+	    rp8 = rp8 ^ tmppar ^ notrp8;
+
+        cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur;
+
+	    par ^= tmppar;
+        if ((i & 0x1) == 0) rp12 ^= tmppar;
+        if ((i & 0x2) == 0) rp14 ^= tmppar;
+    }
+    rp4 ^= rp4_6;
+    rp6 ^= rp4_6;
+
+
+Not a big change, but every penny counts :-)
+
+
+Analysis 7
+==========
+
+Acutally this made things worse. Not very much, but I don't want to move
+into the wrong direction. Maybe something to investigate later. Could
+have to do with caching again.
+
+Guess that is what there is to win within the loop. Maybe unrolling one
+more time will help. I'll keep the optimisations from 7 for now.
+
+
+Attempt 8
+=========
+
+Unrolled the loop one more time.
+
+
+Analysis 8
+==========
+
+This makes things worse. Let's stick with attempt 6 and continue from there.
+Although it seems that the code within the loop cannot be optimised
+further there is still room to optimize the generation of the ecc codes.
+We can simply calcualate the total parity. If this is 0 then rp4 = rp5
+etc. If the parity is 1, then rp4 = !rp5;
+But if rp4 = rp5 we do not need rp5 etc. We can just write the even bits
+in the result byte and then do something like
+    code[0] |= (code[0] << 1);
+Lets test this.
+
+
+Attempt 9
+=========
+
+Changed the code but again this slightly degrades performance. Tried all
+kind of other things, like having dedicated parity arrays to avoid the
+shift after parity[rp7] << 7; No gain.
+Change the lookup using the parity array by using shift operators (e.g.
+replace parity[rp7] << 7 with:
+rp7 ^= (rp7 << 4);
+rp7 ^= (rp7 << 2);
+rp7 ^= (rp7 << 1);
+rp7 &= 0x80;
+No gain.
+
+The only marginal change was inverting the parity bits, so we can remove
+the last three invert statements.
+
+Ah well, pity this does not deliver more. Then again 10 million
+iterations using the linux driver code takes between 13 and 13.5
+seconds, whereas my code now takes about 0.73 seconds for those 10
+million iterations. So basically I've improved the performance by a
+factor 18 on my system. Not that bad. Of course on different hardware
+you will get different results. No warranties!
+
+But of course there is no such thing as a free lunch. The codesize almost
+tripled (from 562 bytes to 1434 bytes). Then again, it is not that much.
+
+
+Correcting errors
+=================
+
+For correcting errors I again used the ST application note as a starter,
+but I also peeked at the existing code.
+The algorithm itself is pretty straightforward. Just xor the given and
+the calculated ecc. If all bytes are 0 there is no problem. If 11 bits
+are 1 we have one correctable bit error. If there is 1 bit 1, we have an
+error in the given ecc code.
+It proved to be fastest to do some table lookups. Performance gain
+introduced by this is about a factor 2 on my system when a repair had to
+be done, and 1% or so if no repair had to be done.
+Code size increased from 330 bytes to 686 bytes for this function.
+(gcc 4.2, -O3)
+
+
+Conclusion
+==========
+
+The gain when calculating the ecc is tremendous. Om my development hardware
+a speedup of a factor of 18 for ecc calculation was achieved. On a test on an
+embedded system with a MIPS core a factor 7 was obtained.
+On  a test with a Linksys NSLU2 (ARMv5TE processor) the speedup was a factor
+5 (big endian mode, gcc 4.1.2, -O3)
+For correction not much gain could be obtained (as bitflips are rare). Then
+again there are also much less cycles spent there.
+
+It seems there is not much more gain possible in this, at least when
+programmed in C. Of course it might be possible to squeeze something more
+out of it with an assembler program, but due to pipeline behaviour etc
+this is very tricky (at least for intel hw).
+
+Author: Frans Meulenbroeks
+Copyright (C) 2008 Koninklijke Philips Electronics NV.
diff --git a/drivers/mtd/nand/nand_ecc.c b/drivers/mtd/nand/nand_ecc.c
index 918a806a8471..7129da51bb33 100644
--- a/drivers/mtd/nand/nand_ecc.c
+++ b/drivers/mtd/nand/nand_ecc.c
@@ -1,13 +1,18 @@
 /*
- * This file contains an ECC algorithm from Toshiba that detects and
- * corrects 1 bit errors in a 256 byte block of data.
+ * This file contains an ECC algorithm that detects and corrects 1 bit
+ * errors in a 256 byte block of data.
  *
  * drivers/mtd/nand/nand_ecc.c
  *
- * Copyright (C) 2000-2004 Steven J. Hill (sjhill@realitydiluted.com)
- *                         Toshiba America Electronics Components, Inc.
+ * Copyright (C) 2008 Koninklijke Philips Electronics NV.
+ *                    Author: Frans Meulenbroeks
  *
- * Copyright (C) 2006 Thomas Gleixner <tglx@linutronix.de>
+ * Completely replaces the previous ECC implementation which was written by:
+ *   Steven J. Hill (sjhill@realitydiluted.com)
+ *   Thomas Gleixner (tglx@linutronix.de)
+ *
+ * Information on how this algorithm works and how it was developed
+ * can be found in Documentation/nand/ecc.txt
  *
  * This file is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
@@ -23,174 +28,417 @@
  * with this file; if not, write to the Free Software Foundation, Inc.,
  * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
  *
- * As a special exception, if other files instantiate templates or use
- * macros or inline functions from these files, or you compile these
- * files and link them with other works to produce a work based on these
- * files, these files do not by themselves cause the resulting work to be
- * covered by the GNU General Public License. However the source code for
- * these files must still be made available in accordance with section (3)
- * of the GNU General Public License.
- *
- * This exception does not invalidate any other reasons why a work based on
- * this file might be covered by the GNU General Public License.
  */
 
+/*
+ * The STANDALONE macro is useful when running the code outside the kernel
+ * e.g. when running the code in a testbed or a benchmark program.
+ * When STANDALONE is used, the module related macros are commented out
+ * as well as the linux include files.
+ * Instead a private definition of mtd_into is given to satisfy the compiler
+ * (the code does not use mtd_info, so the code does not care)
+ */
+#ifndef STANDALONE
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/mtd/nand_ecc.h>
+#else
+typedef uint32_t unsigned long
+struct mtd_info {
+	int dummy;
+};
+#define EXPORT_SYMBOL(x)  /* x */
+
+#define MODULE_LICENSE(x)	/* x */
+#define MODULE_AUTHOR(x)	/* x */
+#define MODULE_DESCRIPTION(x)	/* x */
+#endif
+
+/*
+ * invparity is a 256 byte table that contains the odd parity
+ * for each byte. So if the number of bits in a byte is even,
+ * the array element is 1, and when the number of bits is odd
+ * the array eleemnt is 0.
+ */
+static const char invparity[256] = {
+	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1
+};
 
 /*
- * Pre-calculated 256-way 1 byte column parity
+ * bitsperbyte contains the number of bits per byte
+ * this is only used for testing and repairing parity
+ * (a precalculated value slightly improves performance)
  */
-static const u_char nand_ecc_precalc_table[] = {
-	0x00, 0x55, 0x56, 0x03, 0x59, 0x0c, 0x0f, 0x5a, 0x5a, 0x0f, 0x0c, 0x59, 0x03, 0x56, 0x55, 0x00,
-	0x65, 0x30, 0x33, 0x66, 0x3c, 0x69, 0x6a, 0x3f, 0x3f, 0x6a, 0x69, 0x3c, 0x66, 0x33, 0x30, 0x65,
-	0x66, 0x33, 0x30, 0x65, 0x3f, 0x6a, 0x69, 0x3c, 0x3c, 0x69, 0x6a, 0x3f, 0x65, 0x30, 0x33, 0x66,
-	0x03, 0x56, 0x55, 0x00, 0x5a, 0x0f, 0x0c, 0x59, 0x59, 0x0c, 0x0f, 0x5a, 0x00, 0x55, 0x56, 0x03,
-	0x69, 0x3c, 0x3f, 0x6a, 0x30, 0x65, 0x66, 0x33, 0x33, 0x66, 0x65, 0x30, 0x6a, 0x3f, 0x3c, 0x69,
-	0x0c, 0x59, 0x5a, 0x0f, 0x55, 0x00, 0x03, 0x56, 0x56, 0x03, 0x00, 0x55, 0x0f, 0x5a, 0x59, 0x0c,
-	0x0f, 0x5a, 0x59, 0x0c, 0x56, 0x03, 0x00, 0x55, 0x55, 0x00, 0x03, 0x56, 0x0c, 0x59, 0x5a, 0x0f,
-	0x6a, 0x3f, 0x3c, 0x69, 0x33, 0x66, 0x65, 0x30, 0x30, 0x65, 0x66, 0x33, 0x69, 0x3c, 0x3f, 0x6a,
-	0x6a, 0x3f, 0x3c, 0x69, 0x33, 0x66, 0x65, 0x30, 0x30, 0x65, 0x66, 0x33, 0x69, 0x3c, 0x3f, 0x6a,
-	0x0f, 0x5a, 0x59, 0x0c, 0x56, 0x03, 0x00, 0x55, 0x55, 0x00, 0x03, 0x56, 0x0c, 0x59, 0x5a, 0x0f,
-	0x0c, 0x59, 0x5a, 0x0f, 0x55, 0x00, 0x03, 0x56, 0x56, 0x03, 0x00, 0x55, 0x0f, 0x5a, 0x59, 0x0c,
-	0x69, 0x3c, 0x3f, 0x6a, 0x30, 0x65, 0x66, 0x33, 0x33, 0x66, 0x65, 0x30, 0x6a, 0x3f, 0x3c, 0x69,
-	0x03, 0x56, 0x55, 0x00, 0x5a, 0x0f, 0x0c, 0x59, 0x59, 0x0c, 0x0f, 0x5a, 0x00, 0x55, 0x56, 0x03,
-	0x66, 0x33, 0x30, 0x65, 0x3f, 0x6a, 0x69, 0x3c, 0x3c, 0x69, 0x6a, 0x3f, 0x65, 0x30, 0x33, 0x66,
-	0x65, 0x30, 0x33, 0x66, 0x3c, 0x69, 0x6a, 0x3f, 0x3f, 0x6a, 0x69, 0x3c, 0x66, 0x33, 0x30, 0x65,
-	0x00, 0x55, 0x56, 0x03, 0x59, 0x0c, 0x0f, 0x5a, 0x5a, 0x0f, 0x0c, 0x59, 0x03, 0x56, 0x55, 0x00
+static const char bitsperbyte[256] = {
+	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
+	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+	4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
+};
+
+/*
+ * addressbits is a lookup table to filter out the bits from the xor-ed
+ * ecc data that identify the faulty location.
+ * this is only used for repairing parity
+ * see the comments in nand_correct_data for more details
+ */
+static const char addressbits[256] = {
+	0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
+	0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
+	0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
+	0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
+	0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
+	0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
+	0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
+	0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
+	0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
+	0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
+	0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
+	0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
+	0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
+	0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
+	0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
+	0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
+	0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
+	0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
+	0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
+	0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
+	0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
+	0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
+	0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
+	0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
+	0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
+	0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
+	0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
+	0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
+	0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
+	0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
+	0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
+	0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f
 };
 
 /**
  * nand_calculate_ecc - [NAND Interface] Calculate 3-byte ECC for 256-byte block
- * @mtd:	MTD block structure
+ * @mtd:	MTD block structure (unused)
  * @dat:	raw data
  * @ecc_code:	buffer for ECC
  */
-int nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat,
-		       u_char *ecc_code)
+int nand_calculate_ecc(struct mtd_info *mtd, const unsigned char *buf,
+		       unsigned char *code)
 {
-	uint8_t idx, reg1, reg2, reg3, tmp1, tmp2;
 	int i;
+	const uint32_t *bp = (uint32_t *)buf;
+	uint32_t cur;		/* current value in buffer */
+	/* rp0..rp15 are the various accumulated parities (per byte) */
+	uint32_t rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
+	uint32_t rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15;
+	uint32_t par;		/* the cumulative parity for all data */
+	uint32_t tmppar;	/* the cumulative parity for this iteration;
+				   for rp12 and rp14 at the end of the loop */
+
+	par = 0;
+	rp4 = 0;
+	rp6 = 0;
+	rp8 = 0;
+	rp10 = 0;
+	rp12 = 0;
+	rp14 = 0;
+
+	/*
+	 * The loop is unrolled a number of times;
+	 * This avoids if statements to decide on which rp value to update
+	 * Also we process the data by longwords.
+	 * Note: passing unaligned data might give a performance penalty.
+	 * It is assumed that the buffers are aligned.
+	 * tmppar is the cumulative sum of this iteration.
+	 * needed for calculating rp12, rp14 and par
+	 * also used as a performance improvement for rp6, rp8 and rp10
+	 */
+	for (i = 0; i < 4; i++) {
+		cur = *bp++;
+		tmppar = cur;
+		rp4 ^= cur;
+		cur = *bp++;
+		tmppar ^= cur;
+		rp6 ^= tmppar;
+		cur = *bp++;
+		tmppar ^= cur;
+		rp4 ^= cur;
+		cur = *bp++;
+		tmppar ^= cur;
+		rp8 ^= tmppar;
 
-	/* Initialize variables */
-	reg1 = reg2 = reg3 = 0;
+		cur = *bp++;
+		tmppar ^= cur;
+		rp4 ^= cur;
+		rp6 ^= cur;
+		cur = *bp++;
+		tmppar ^= cur;
+		rp6 ^= cur;
+		cur = *bp++;
+		tmppar ^= cur;
+		rp4 ^= cur;
+		cur = *bp++;
+		tmppar ^= cur;
+		rp10 ^= tmppar;
 
-	/* Build up column parity */
-	for(i = 0; i < 256; i++) {
-		/* Get CP0 - CP5 from table */
-		idx = nand_ecc_precalc_table[*dat++];
-		reg1 ^= (idx & 0x3f);
+		cur = *bp++;
+		tmppar ^= cur;
+		rp4 ^= cur;
+		rp6 ^= cur;
+		rp8 ^= cur;
+		cur = *bp++;
+		tmppar ^= cur;
+		rp6 ^= cur;
+		rp8 ^= cur;
+		cur = *bp++;
+		tmppar ^= cur;
+		rp4 ^= cur;
+		rp8 ^= cur;
+		cur = *bp++;
+		tmppar ^= cur;
+		rp8 ^= cur;
 
-		/* All bit XOR = 1 ? */
-		if (idx & 0x40) {
-			reg3 ^= (uint8_t) i;
-			reg2 ^= ~((uint8_t) i);
-		}
+		cur = *bp++;
+		tmppar ^= cur;
+		rp4 ^= cur;
+		rp6 ^= cur;
+		cur = *bp++;
+		tmppar ^= cur;
+		rp6 ^= cur;
+		cur = *bp++;
+		tmppar ^= cur;
+		rp4 ^= cur;
+		cur = *bp++;
+		tmppar ^= cur;
+
+		par ^= tmppar;
+		if ((i & 0x1) == 0)
+			rp12 ^= tmppar;
+		if ((i & 0x2) == 0)
+			rp14 ^= tmppar;
 	}
 
-	/* Create non-inverted ECC code from line parity */
-	tmp1  = (reg3 & 0x80) >> 0; /* B7 -> B7 */
-	tmp1 |= (reg2 & 0x80) >> 1; /* B7 -> B6 */
-	tmp1 |= (reg3 & 0x40) >> 1; /* B6 -> B5 */
-	tmp1 |= (reg2 & 0x40) >> 2; /* B6 -> B4 */
-	tmp1 |= (reg3 & 0x20) >> 2; /* B5 -> B3 */
-	tmp1 |= (reg2 & 0x20) >> 3; /* B5 -> B2 */
-	tmp1 |= (reg3 & 0x10) >> 3; /* B4 -> B1 */
-	tmp1 |= (reg2 & 0x10) >> 4; /* B4 -> B0 */
-
-	tmp2  = (reg3 & 0x08) << 4; /* B3 -> B7 */
-	tmp2 |= (reg2 & 0x08) << 3; /* B3 -> B6 */
-	tmp2 |= (reg3 & 0x04) << 3; /* B2 -> B5 */
-	tmp2 |= (reg2 & 0x04) << 2; /* B2 -> B4 */
-	tmp2 |= (reg3 & 0x02) << 2; /* B1 -> B3 */
-	tmp2 |= (reg2 & 0x02) << 1; /* B1 -> B2 */
-	tmp2 |= (reg3 & 0x01) << 1; /* B0 -> B1 */
-	tmp2 |= (reg2 & 0x01) << 0; /* B7 -> B0 */
-
-	/* Calculate final ECC code */
+	/*
+	 * handle the fact that we use longword operations
+	 * we'll bring rp4..rp14 back to single byte entities by shifting and
+	 * xoring first fold the upper and lower 16 bits,
+	 * then the upper and lower 8 bits.
+	 */
+	rp4 ^= (rp4 >> 16);
+	rp4 ^= (rp4 >> 8);
+	rp4 &= 0xff;
+	rp6 ^= (rp6 >> 16);
+	rp6 ^= (rp6 >> 8);
+	rp6 &= 0xff;
+	rp8 ^= (rp8 >> 16);
+	rp8 ^= (rp8 >> 8);
+	rp8 &= 0xff;
+	rp10 ^= (rp10 >> 16);
+	rp10 ^= (rp10 >> 8);
+	rp10 &= 0xff;
+	rp12 ^= (rp12 >> 16);
+	rp12 ^= (rp12 >> 8);
+	rp12 &= 0xff;
+	rp14 ^= (rp14 >> 16);
+	rp14 ^= (rp14 >> 8);
+	rp14 &= 0xff;
+
+	/*
+	 * we also need to calculate the row parity for rp0..rp3
+	 * This is present in par, because par is now
+	 * rp3 rp3 rp2 rp2
+	 * as well as
+	 * rp1 rp0 rp1 rp0
+	 * First calculate rp2 and rp3
+	 * (and yes: rp2 = (par ^ rp3) & 0xff; but doing that did not
+	 * give a performance improvement)
+	 */
+	rp3 = (par >> 16);
+	rp3 ^= (rp3 >> 8);
+	rp3 &= 0xff;
+	rp2 = par & 0xffff;
+	rp2 ^= (rp2 >> 8);
+	rp2 &= 0xff;
+
+	/* reduce par to 16 bits then calculate rp1 and rp0 */
+	par ^= (par >> 16);
+	rp1 = (par >> 8) & 0xff;
+	rp0 = (par & 0xff);
+
+	/* finally reduce par to 8 bits */
+	par ^= (par >> 8);
+	par &= 0xff;
+
+	/*
+	 * and calculate rp5..rp15
+	 * note that par = rp4 ^ rp5 and due to the commutative property
+	 * of the ^ operator we can say:
+	 * rp5 = (par ^ rp4);
+	 * The & 0xff seems superfluous, but benchmarking learned that
+	 * leaving it out gives slightly worse results. No idea why, probably
+	 * it has to do with the way the pipeline in pentium is organized.
+	 */
+	rp5 = (par ^ rp4) & 0xff;
+	rp7 = (par ^ rp6) & 0xff;
+	rp9 = (par ^ rp8) & 0xff;
+	rp11 = (par ^ rp10) & 0xff;
+	rp13 = (par ^ rp12) & 0xff;
+	rp15 = (par ^ rp14) & 0xff;
+
+	/*
+	 * Finally calculate the ecc bits.
+	 * Again here it might seem that there are performance optimisations
+	 * possible, but benchmarks showed that on the system this is developed
+	 * the code below is the fastest
+	 */
 #ifdef CONFIG_MTD_NAND_ECC_SMC
-	ecc_code[0] = ~tmp2;
-	ecc_code[1] = ~tmp1;
+	code[0] =
+	    (invparity[rp7] << 7) |
+	    (invparity[rp6] << 6) |
+	    (invparity[rp5] << 5) |
+	    (invparity[rp4] << 4) |
+	    (invparity[rp3] << 3) |
+	    (invparity[rp2] << 2) |
+	    (invparity[rp1] << 1) |
+	    (invparity[rp0]);
+	code[1] =
+	    (invparity[rp15] << 7) |
+	    (invparity[rp14] << 6) |
+	    (invparity[rp13] << 5) |
+	    (invparity[rp12] << 4) |
+	    (invparity[rp11] << 3) |
+	    (invparity[rp10] << 2) |
+	    (invparity[rp9] << 1)  |
+	    (invparity[rp8]);
 #else
-	ecc_code[0] = ~tmp1;
-	ecc_code[1] = ~tmp2;
+	code[1] =
+	    (invparity[rp7] << 7) |
+	    (invparity[rp6] << 6) |
+	    (invparity[rp5] << 5) |
+	    (invparity[rp4] << 4) |
+	    (invparity[rp3] << 3) |
+	    (invparity[rp2] << 2) |
+	    (invparity[rp1] << 1) |
+	    (invparity[rp0]);
+	code[0] =
+	    (invparity[rp15] << 7) |
+	    (invparity[rp14] << 6) |
+	    (invparity[rp13] << 5) |
+	    (invparity[rp12] << 4) |
+	    (invparity[rp11] << 3) |
+	    (invparity[rp10] << 2) |
+	    (invparity[rp9] << 1)  |
+	    (invparity[rp8]);
 #endif
-	ecc_code[2] = ((~reg1) << 2) | 0x03;
-
+	code[2] =
+	    (invparity[par & 0xf0] << 7) |
+	    (invparity[par & 0x0f] << 6) |
+	    (invparity[par & 0xcc] << 5) |
+	    (invparity[par & 0x33] << 4) |
+	    (invparity[par & 0xaa] << 3) |
+	    (invparity[par & 0x55] << 2) |
+	    3;
 	return 0;
 }
 EXPORT_SYMBOL(nand_calculate_ecc);
 
-static inline int countbits(uint32_t byte)
-{
-	int res = 0;
-
-	for (;byte; byte >>= 1)
-		res += byte & 0x01;
-	return res;
-}
-
 /**
  * nand_correct_data - [NAND Interface] Detect and correct bit error(s)
- * @mtd:	MTD block structure
+ * @mtd:	MTD block structure (unused)
  * @dat:	raw data read from the chip
  * @read_ecc:	ECC from the chip
  * @calc_ecc:	the ECC calculated from raw data
  *
  * Detect and correct a 1 bit error for 256 byte block
  */
-int nand_correct_data(struct mtd_info *mtd, u_char *dat,
-		      u_char *read_ecc, u_char *calc_ecc)
+int nand_correct_data(struct mtd_info *mtd, unsigned char *buf,
+		      unsigned char *read_ecc, unsigned char *calc_ecc)
 {
-	uint8_t s0, s1, s2;
+	int nr_bits;
+	unsigned char b0, b1, b2;
+	unsigned char byte_addr, bit_addr;
 
+	/*
+	 * b0 to b2 indicate which bit is faulty (if any)
+	 * we might need the xor result  more than once,
+	 * so keep them in a local var
+	*/
 #ifdef CONFIG_MTD_NAND_ECC_SMC
-	s0 = calc_ecc[0] ^ read_ecc[0];
-	s1 = calc_ecc[1] ^ read_ecc[1];
-	s2 = calc_ecc[2] ^ read_ecc[2];
+	b0 = read_ecc[0] ^ calc_ecc[0];
+	b1 = read_ecc[1] ^ calc_ecc[1];
 #else
-	s1 = calc_ecc[0] ^ read_ecc[0];
-	s0 = calc_ecc[1] ^ read_ecc[1];
-	s2 = calc_ecc[2] ^ read_ecc[2];
+	b0 = read_ecc[1] ^ calc_ecc[1];
+	b1 = read_ecc[0] ^ calc_ecc[0];
 #endif
-	if ((s0 | s1 | s2) == 0)
-		return 0;
-
-	/* Check for a single bit error */
-	if( ((s0 ^ (s0 >> 1)) & 0x55) == 0x55 &&
-	    ((s1 ^ (s1 >> 1)) & 0x55) == 0x55 &&
-	    ((s2 ^ (s2 >> 1)) & 0x54) == 0x54) {
-
-		uint32_t byteoffs, bitnum;
+	b2 = read_ecc[2] ^ calc_ecc[2];
 
-		byteoffs = (s1 << 0) & 0x80;
-		byteoffs |= (s1 << 1) & 0x40;
-		byteoffs |= (s1 << 2) & 0x20;
-		byteoffs |= (s1 << 3) & 0x10;
+	/* check if there are any bitfaults */
 
-		byteoffs |= (s0 >> 4) & 0x08;
-		byteoffs |= (s0 >> 3) & 0x04;
-		byteoffs |= (s0 >> 2) & 0x02;
-		byteoffs |= (s0 >> 1) & 0x01;
+	/* count nr of bits; use table lookup, faster than calculating it */
+	nr_bits = bitsperbyte[b0] + bitsperbyte[b1] + bitsperbyte[b2];
 
-		bitnum = (s2 >> 5) & 0x04;
-		bitnum |= (s2 >> 4) & 0x02;
-		bitnum |= (s2 >> 3) & 0x01;
-
-		dat[byteoffs] ^= (1 << bitnum);
-
-		return 1;
+	/* repeated if statements are slightly more efficient than switch ... */
+	/* ordered in order of likelihood */
+	if (nr_bits == 0)
+		return (0);	/* no error */
+	if (nr_bits == 11) {	/* correctable error */
+		/*
+		 * rp15/13/11/9/7/5/3/1 indicate which byte is the faulty byte
+		 * cp 5/3/1 indicate the faulty bit.
+		 * A lookup table (called addressbits) is used to filter
+		 * the bits from the byte they are in.
+		 * A marginal optimisation is possible by having three
+		 * different lookup tables.
+		 * One as we have now (for b0), one for b2
+		 * (that would avoid the >> 1), and one for b1 (with all values
+		 * << 4). However it was felt that introducing two more tables
+		 * hardly justify the gain.
+		 *
+		 * The b2 shift is there to get rid of the lowest two bits.
+		 * We could also do addressbits[b2] >> 1 but for the
+		 * performace it does not make any difference
+		 */
+		byte_addr = (addressbits[b1] << 4) + addressbits[b0];
+		bit_addr = addressbits[b2 >> 2];
+		/* flip the bit */
+		buf[byte_addr] ^= (1 << bit_addr);
+		return (1);
 	}
-
-	if(countbits(s0 | ((uint32_t)s1 << 8) | ((uint32_t)s2 <<16)) == 1)
-		return 1;
-
-	return -EBADMSG;
+	if (nr_bits == 1)
+		return (1);	/* error in ecc data; no action needed */
+	return -1;
 }
 EXPORT_SYMBOL(nand_correct_data);
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Steven J. Hill <sjhill@realitydiluted.com>");
+MODULE_AUTHOR("Frans Meulenbroeks <fransmeulenbroeks@gmail.com>");
 MODULE_DESCRIPTION("Generic NAND ECC support");
-- 
GitLab


From ccbcd6cba5ef6e071deb072188ad044921f6b91e Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Sat, 16 Aug 2008 11:01:31 +0100
Subject: [PATCH 023/895] [MTD] [NAND] Minor cleanup of nand_ecc.c

Make the standalone stuff a little cleaner, fix some checkpatch warnings.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/nand_ecc.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/drivers/mtd/nand/nand_ecc.c b/drivers/mtd/nand/nand_ecc.c
index 7129da51bb33..a8e8413ca2bc 100644
--- a/drivers/mtd/nand/nand_ecc.c
+++ b/drivers/mtd/nand/nand_ecc.c
@@ -4,15 +4,15 @@
  *
  * drivers/mtd/nand/nand_ecc.c
  *
- * Copyright (C) 2008 Koninklijke Philips Electronics NV.
- *                    Author: Frans Meulenbroeks
+ * Copyright Â© 2008 Koninklijke Philips Electronics NV.
+ *                  Author: Frans Meulenbroeks
  *
  * Completely replaces the previous ECC implementation which was written by:
  *   Steven J. Hill (sjhill@realitydiluted.com)
  *   Thomas Gleixner (tglx@linutronix.de)
  *
  * Information on how this algorithm works and how it was developed
- * can be found in Documentation/nand/ecc.txt
+ * can be found in Documentation/mtd/nand_ecc.txt
  *
  * This file is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
@@ -35,7 +35,7 @@
  * e.g. when running the code in a testbed or a benchmark program.
  * When STANDALONE is used, the module related macros are commented out
  * as well as the linux include files.
- * Instead a private definition of mtd_into is given to satisfy the compiler
+ * Instead a private definition of mtd_info is given to satisfy the compiler
  * (the code does not use mtd_info, so the code does not care)
  */
 #ifndef STANDALONE
@@ -44,10 +44,8 @@
 #include <linux/module.h>
 #include <linux/mtd/nand_ecc.h>
 #else
-typedef uint32_t unsigned long
-struct mtd_info {
-	int dummy;
-};
+#include <stdint.h>
+struct mtd_info;
 #define EXPORT_SYMBOL(x)  /* x */
 
 #define MODULE_LICENSE(x)	/* x */
@@ -409,7 +407,7 @@ int nand_correct_data(struct mtd_info *mtd, unsigned char *buf,
 	/* repeated if statements are slightly more efficient than switch ... */
 	/* ordered in order of likelihood */
 	if (nr_bits == 0)
-		return (0);	/* no error */
+		return 0;	/* no error */
 	if (nr_bits == 11) {	/* correctable error */
 		/*
 		 * rp15/13/11/9/7/5/3/1 indicate which byte is the faulty byte
@@ -431,10 +429,10 @@ int nand_correct_data(struct mtd_info *mtd, unsigned char *buf,
 		bit_addr = addressbits[b2 >> 2];
 		/* flip the bit */
 		buf[byte_addr] ^= (1 << bit_addr);
-		return (1);
+		return 1;
 	}
 	if (nr_bits == 1)
-		return (1);	/* error in ecc data; no action needed */
+		return 1;	/* error in ecc data; no action needed */
 	return -1;
 }
 EXPORT_SYMBOL(nand_correct_data);
-- 
GitLab


From 8ee991dd343df57910ff6947696afada9f02bf7e Mon Sep 17 00:00:00 2001
From: Huang Weiyi <weiyi.huang@gmail.com>
Date: Sun, 17 Aug 2008 07:50:44 +0800
Subject: [PATCH 024/895] [MTD] removed unused #include <version.h>

The drivers below do not use LINUX_VERSION_CODE nor KERNEL_VERSION.
  drivers/mtd/maps/amd76xrom.c
  drivers/mtd/maps/ck804xrom.c
  drivers/mtd/maps/esb2rom.c

This patch removes the said #include <version.h>.

Signed-off-by: Huang Weiyi <weiyi.huang@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/maps/amd76xrom.c | 1 -
 drivers/mtd/maps/ck804xrom.c | 1 -
 drivers/mtd/maps/esb2rom.c   | 1 -
 3 files changed, 3 deletions(-)

diff --git a/drivers/mtd/maps/amd76xrom.c b/drivers/mtd/maps/amd76xrom.c
index 948b86f35ef4..d1eec7d3243f 100644
--- a/drivers/mtd/maps/amd76xrom.c
+++ b/drivers/mtd/maps/amd76xrom.c
@@ -6,7 +6,6 @@
 
 #include <linux/module.h>
 #include <linux/types.h>
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <asm/io.h>
diff --git a/drivers/mtd/maps/ck804xrom.c b/drivers/mtd/maps/ck804xrom.c
index effaf7cdefab..1a6feb4474de 100644
--- a/drivers/mtd/maps/ck804xrom.c
+++ b/drivers/mtd/maps/ck804xrom.c
@@ -9,7 +9,6 @@
 
 #include <linux/module.h>
 #include <linux/types.h>
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <asm/io.h>
diff --git a/drivers/mtd/maps/esb2rom.c b/drivers/mtd/maps/esb2rom.c
index aa64a4752781..bbbcdd4c8d13 100644
--- a/drivers/mtd/maps/esb2rom.c
+++ b/drivers/mtd/maps/esb2rom.c
@@ -12,7 +12,6 @@
 
 #include <linux/module.h>
 #include <linux/types.h>
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <asm/io.h>
-- 
GitLab


From 75caf6b5acc6b895df9bdd36db631220e1096e9f Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Mon, 18 Aug 2008 16:23:53 +0100
Subject: [PATCH 025/895] [JFFS2] Fill in f_fsid field in jffs2_statfs()

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 fs/jffs2/fs.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 086c43830221..89e9b735d8df 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -207,6 +207,8 @@ int jffs2_statfs(struct dentry *dentry, struct kstatfs *buf)
 	buf->f_files = 0;
 	buf->f_ffree = 0;
 	buf->f_namelen = JFFS2_MAX_NAME_LEN;
+	buf->f_fsid.val[0] = JFFS2_SUPER_MAGIC;
+	buf->f_fsid.val[1] = c->mtd->index;
 
 	spin_lock(&c->erase_completion_lock);
 	avail = c->dirty_size + c->free_size;
-- 
GitLab


From 1077be58ad7baadd86e47e8b4f6209fa5b6364a5 Mon Sep 17 00:00:00 2001
From: frans <fransmeulenbroeks@gmail.com>
Date: Wed, 20 Aug 2008 21:11:50 +0200
Subject: [PATCH 026/895] [MTD] [NAND] nand_ecc.c: fix big endian, strengthen
 test, add printk

This patch for nand_ecc.c fixes three issues

- fix code so it also works on big endian architectures
- added a printk in case of an uncorrectable ecc error
- strengthen the test for correctable errors (decreasing the chance
  that multiple bit faults by accident will be seen as correctable)

Note: the big endian code is only tested in a testbed (running on big endian
hardware) as I cannot rebuild and test a big endian kernel at the moment.
However the only thing that can go wrong is if <asm/byteorder.h> does not
give __BIG_ENDIAN in that case. In my eyes very unlikely.

Signed-off-by: Frans Meulenbroeks <fransmeulenbroeks@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/nand_ecc.c | 44 +++++++++++++++++++++++++++----------
 1 file changed, 33 insertions(+), 11 deletions(-)

diff --git a/drivers/mtd/nand/nand_ecc.c b/drivers/mtd/nand/nand_ecc.c
index a8e8413ca2bc..d99e569e999f 100644
--- a/drivers/mtd/nand/nand_ecc.c
+++ b/drivers/mtd/nand/nand_ecc.c
@@ -43,6 +43,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/mtd/nand_ecc.h>
+#include <asm/byteorder.h>
 #else
 #include <stdint.h>
 struct mtd_info;
@@ -51,6 +52,9 @@ struct mtd_info;
 #define MODULE_LICENSE(x)	/* x */
 #define MODULE_AUTHOR(x)	/* x */
 #define MODULE_DESCRIPTION(x)	/* x */
+
+#define printk printf
+#define KERN_ERR		""
 #endif
 
 /*
@@ -273,24 +277,38 @@ int nand_calculate_ecc(struct mtd_info *mtd, const unsigned char *buf,
 	/*
 	 * we also need to calculate the row parity for rp0..rp3
 	 * This is present in par, because par is now
-	 * rp3 rp3 rp2 rp2
+	 * rp3 rp3 rp2 rp2 in little endian and
+	 * rp2 rp2 rp3 rp3 in big endian
 	 * as well as
-	 * rp1 rp0 rp1 rp0
+	 * rp1 rp0 rp1 rp0 in little endian and
+	 * rp0 rp1 rp0 rp1 in big endian
 	 * First calculate rp2 and rp3
-	 * (and yes: rp2 = (par ^ rp3) & 0xff; but doing that did not
-	 * give a performance improvement)
 	 */
+#ifdef __BIG_ENDIAN
+	rp2 = (par >> 16);
+	rp2 ^= (rp2 >> 8);
+	rp2 &= 0xff;
+	rp3 = par & 0xffff;
+	rp3 ^= (rp3 >> 8);
+	rp3 &= 0xff;
+#else
 	rp3 = (par >> 16);
 	rp3 ^= (rp3 >> 8);
 	rp3 &= 0xff;
 	rp2 = par & 0xffff;
 	rp2 ^= (rp2 >> 8);
 	rp2 &= 0xff;
+#endif
 
 	/* reduce par to 16 bits then calculate rp1 and rp0 */
 	par ^= (par >> 16);
+#ifdef __BIG_ENDIAN
+	rp0 = (par >> 8) & 0xff;
+	rp1 = (par & 0xff);
+#else
 	rp1 = (par >> 8) & 0xff;
 	rp0 = (par & 0xff);
+#endif
 
 	/* finally reduce par to 8 bits */
 	par ^= (par >> 8);
@@ -381,7 +399,6 @@ EXPORT_SYMBOL(nand_calculate_ecc);
 int nand_correct_data(struct mtd_info *mtd, unsigned char *buf,
 		      unsigned char *read_ecc, unsigned char *calc_ecc)
 {
-	int nr_bits;
 	unsigned char b0, b1, b2;
 	unsigned char byte_addr, bit_addr;
 
@@ -401,14 +418,15 @@ int nand_correct_data(struct mtd_info *mtd, unsigned char *buf,
 
 	/* check if there are any bitfaults */
 
-	/* count nr of bits; use table lookup, faster than calculating it */
-	nr_bits = bitsperbyte[b0] + bitsperbyte[b1] + bitsperbyte[b2];
-
 	/* repeated if statements are slightly more efficient than switch ... */
 	/* ordered in order of likelihood */
-	if (nr_bits == 0)
+
+	if ((b0 | b1 | b2) == 0)
 		return 0;	/* no error */
-	if (nr_bits == 11) {	/* correctable error */
+
+	if ((((b0 ^ (b0 >> 1)) & 0x55) == 0x55) &&
+	    (((b1 ^ (b1 >> 1)) & 0x55) == 0x55) &&
+	    (((b2 ^ (b2 >> 1)) & 0x54) == 0x54)) { /* single bit error */
 		/*
 		 * rp15/13/11/9/7/5/3/1 indicate which byte is the faulty byte
 		 * cp 5/3/1 indicate the faulty bit.
@@ -430,9 +448,13 @@ int nand_correct_data(struct mtd_info *mtd, unsigned char *buf,
 		/* flip the bit */
 		buf[byte_addr] ^= (1 << bit_addr);
 		return 1;
+
 	}
-	if (nr_bits == 1)
+	/* count nr of bits; use table lookup, faster than calculating it */
+	if ((bitsperbyte[b0] + bitsperbyte[b1] + bitsperbyte[b2]) == 1)
 		return 1;	/* error in ecc data; no action needed */
+
+	printk(KERN_ERR "uncorrectable error : ");
 	return -1;
 }
 EXPORT_SYMBOL(nand_correct_data);
-- 
GitLab


From 17c1d2be28e485c0c8b09661db39d5bf2605069d Mon Sep 17 00:00:00 2001
From: Alexey Korolev <akorolev@infradead.org>
Date: Wed, 20 Aug 2008 22:32:08 +0100
Subject: [PATCH 027/895] [MTD] [NAND] Fix missing kernel-doc

[Reported by Randy Dunlap]

Signed-off-by: Alexey Korolev <akorolev@infradead.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/nand_base.c | 6 +++---
 drivers/mtd/nand/nand_ecc.c  | 6 +++---
 include/linux/mtd/nand.h     | 1 +
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 582280560c89..d303db39c48d 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -801,9 +801,9 @@ static int nand_read_page_swecc(struct mtd_info *mtd, struct nand_chip *chip,
  * nand_read_subpage - [REPLACABLE] software ecc based sub-page read function
  * @mtd:	mtd info structure
  * @chip:	nand chip info structure
- * @dataofs	offset of requested data within the page
- * @readlen	data length
- * @buf:	buffer to store read data
+ * @data_offs:	offset of requested data within the page
+ * @readlen:	data length
+ * @bufpoi:	buffer to store read data
  */
 static int nand_read_subpage(struct mtd_info *mtd, struct nand_chip *chip, uint32_t data_offs, uint32_t readlen, uint8_t *bufpoi)
 {
diff --git a/drivers/mtd/nand/nand_ecc.c b/drivers/mtd/nand/nand_ecc.c
index d99e569e999f..fd19787c9ce7 100644
--- a/drivers/mtd/nand/nand_ecc.c
+++ b/drivers/mtd/nand/nand_ecc.c
@@ -150,8 +150,8 @@ static const char addressbits[256] = {
 /**
  * nand_calculate_ecc - [NAND Interface] Calculate 3-byte ECC for 256-byte block
  * @mtd:	MTD block structure (unused)
- * @dat:	raw data
- * @ecc_code:	buffer for ECC
+ * @buf:	input buffer with raw data
+ * @code:	output buffer with ECC
  */
 int nand_calculate_ecc(struct mtd_info *mtd, const unsigned char *buf,
 		       unsigned char *code)
@@ -390,7 +390,7 @@ EXPORT_SYMBOL(nand_calculate_ecc);
 /**
  * nand_correct_data - [NAND Interface] Detect and correct bit error(s)
  * @mtd:	MTD block structure (unused)
- * @dat:	raw data read from the chip
+ * @buf:	raw data read from the chip
  * @read_ecc:	ECC from the chip
  * @calc_ecc:	the ECC calculated from raw data
  *
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 81774e5facf4..733d3f3b4eb8 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -248,6 +248,7 @@ struct nand_hw_control {
  * @read_page_raw:	function to read a raw page without ECC
  * @write_page_raw:	function to write a raw page without ECC
  * @read_page:	function to read a page according to the ecc generator requirements
+ * @read_subpage:	function to read parts of the page covered by ECC.
  * @write_page:	function to write a page according to the ecc generator requirements
  * @read_oob:	function to read chip OOB data
  * @write_oob:	function to write chip OOB data
-- 
GitLab


From ee974e01e5ef2914036f08c8e41d1a3fa8bfc9d9 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 20 Aug 2008 16:37:26 -0700
Subject: [PATCH 028/895] clocksource: check range

Check that the value being passed to parse_pmtmr() does not exceed the
limits of pmtmr_ioport.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/clocksource/acpi_pm.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c
index 5ca1d80de182..3df338481004 100644
--- a/drivers/clocksource/acpi_pm.c
+++ b/drivers/clocksource/acpi_pm.c
@@ -226,9 +226,12 @@ static int __init parse_pmtmr(char *arg)
 
 	if (strict_strtoul(arg, 16, &base))
 		return -EINVAL;
-
+#ifdef CONFIG_X86_64
+	if (base > UINT_MAX)
+		return -ERANGE;
+#endif
 	printk(KERN_INFO "PMTMR IOPort override: 0x%04x -> 0x%04lx\n",
-	       (unsigned int)pmtmr_ioport, base);
+	       pmtmr_ioport, base);
 	pmtmr_ioport = base;
 
 	return 1;
-- 
GitLab


From 1aa5dfb751d275ae7117d3b73ac423b4a46f2a73 Mon Sep 17 00:00:00 2001
From: John Stultz <johnstul@us.ibm.com>
Date: Wed, 20 Aug 2008 16:37:28 -0700
Subject: [PATCH 029/895] clocksource: keep track of original clocksource
 frequency

The clocksource frequency is represented by
clocksource->mult/2^(clocksource->shift).  Currently, when NTP makes
adjustments to the clock frequency, they are made directly to the mult
value.

This has the drawback that once changed, we cannot know what the orignal
mult value was, or how much adjustment has been applied.

This property causes problems in calculating proper ntp intervals when
switching back and forth between clocksources.

This patch separates the current mult value into a mult and mult_orig
pair.  The mult_orig value stays constant, while the ntp clocksource
adjustments are done only to the mult value.

This allows for correct ntp interval calculation and additionally lays the
groundwork for a new notion of time, what I'm calling the monotonic-raw
time, which is introduced in a following patch.

Signed-off-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/clocksource.h | 11 +++++++----
 kernel/time/clocksource.c   |  3 +++
 kernel/time/jiffies.c       |  1 +
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 55e434feec99..f0a7fb984413 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -45,7 +45,8 @@ struct clocksource;
  * @read:		returns a cycle value
  * @mask:		bitmask for two's complement
  *			subtraction of non 64 bit counters
- * @mult:		cycle to nanosecond multiplier
+ * @mult:		cycle to nanosecond multiplier (adjusted by NTP)
+ * @mult_orig:		cycle to nanosecond multiplier (unadjusted by NTP)
  * @shift:		cycle to nanosecond divisor (power of two)
  * @flags:		flags describing special properties
  * @vread:		vsyscall based read
@@ -63,6 +64,7 @@ struct clocksource {
 	cycle_t (*read)(void);
 	cycle_t mask;
 	u32 mult;
+	u32 mult_orig;
 	u32 shift;
 	unsigned long flags;
 	cycle_t (*vread)(void);
@@ -201,16 +203,17 @@ static inline void clocksource_calculate_interval(struct clocksource *c,
 {
 	u64 tmp;
 
-	/* XXX - All of this could use a whole lot of optimization */
+	/* Do the ns -> cycle conversion first, using original mult */
 	tmp = length_nsec;
 	tmp <<= c->shift;
-	tmp += c->mult/2;
-	do_div(tmp, c->mult);
+	tmp += c->mult_orig/2;
+	do_div(tmp, c->mult_orig);
 
 	c->cycle_interval = (cycle_t)tmp;
 	if (c->cycle_interval == 0)
 		c->cycle_interval = 1;
 
+	/* Go back from cycles -> shifted ns, this time use ntp adjused mult */
 	c->xtime_interval = (u64)c->cycle_interval * c->mult;
 }
 
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 093d4acf993b..9ed2eec97526 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -325,6 +325,9 @@ int clocksource_register(struct clocksource *c)
 	unsigned long flags;
 	int ret;
 
+	/* save mult_orig on registration */
+	c->mult_orig = c->mult;
+
 	spin_lock_irqsave(&clocksource_lock, flags);
 	ret = clocksource_enqueue(c);
 	if (!ret)
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 4c256fdb8875..1ca99557e929 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -61,6 +61,7 @@ struct clocksource clocksource_jiffies = {
 	.read		= jiffies_read,
 	.mask		= 0xffffffff, /*32bits*/
 	.mult		= NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
+	.mult_orig	= NSEC_PER_JIFFY << JIFFIES_SHIFT,
 	.shift		= JIFFIES_SHIFT,
 };
 
-- 
GitLab


From 9a055117d3d9cb562f83f8d4cd88772761f4cab0 Mon Sep 17 00:00:00 2001
From: Roman Zippel <zippel@linux-m68k.org>
Date: Wed, 20 Aug 2008 16:37:28 -0700
Subject: [PATCH 030/895] clocksource: introduce clocksource_forward_now()

To keep the raw monotonic patch simple first introduce
clocksource_forward_now(), which takes care of the offset since the last
update_wall_time() call and adds it to the clock, so there is no need
anymore to deal with it explicitly at various places, which need to make
significant changes to the clock.

This is also gets rid of the timekeeping_suspend_nsecs, instead of
waiting until resume, the value is accumulated during suspend. In the end
there is only a single user of __get_nsec_offset() left, so I integrated
it back to getnstimeofday().

Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/time/timekeeping.c | 71 ++++++++++++++++++---------------------
 1 file changed, 33 insertions(+), 38 deletions(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index e91c29f961c9..83d3555a6998 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -58,27 +58,23 @@ struct clocksource *clock;
 
 #ifdef CONFIG_GENERIC_TIME
 /**
- * __get_nsec_offset - Returns nanoseconds since last call to periodic_hook
+ * clocksource_forward_now - update clock to the current time
  *
- * private function, must hold xtime_lock lock when being
- * called. Returns the number of nanoseconds since the
- * last call to update_wall_time() (adjusted by NTP scaling)
+ * Forward the current clock to update its state since the last call to
+ * update_wall_time(). This is useful before significant clock changes,
+ * as it avoids having to deal with this time offset explicitly.
  */
-static inline s64 __get_nsec_offset(void)
+static void clocksource_forward_now(void)
 {
 	cycle_t cycle_now, cycle_delta;
-	s64 ns_offset;
+	s64 nsec;
 
-	/* read clocksource: */
 	cycle_now = clocksource_read(clock);
-
-	/* calculate the delta since the last update_wall_time: */
 	cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
+	clock->cycle_last = cycle_now;
 
-	/* convert to nanoseconds: */
-	ns_offset = cyc2ns(clock, cycle_delta);
-
-	return ns_offset;
+	nsec = cyc2ns(clock, cycle_delta);
+	timespec_add_ns(&xtime, nsec);
 }
 
 /**
@@ -89,6 +85,7 @@ static inline s64 __get_nsec_offset(void)
  */
 void getnstimeofday(struct timespec *ts)
 {
+	cycle_t cycle_now, cycle_delta;
 	unsigned long seq;
 	s64 nsecs;
 
@@ -96,7 +93,15 @@ void getnstimeofday(struct timespec *ts)
 		seq = read_seqbegin(&xtime_lock);
 
 		*ts = xtime;
-		nsecs = __get_nsec_offset();
+
+		/* read clocksource: */
+		cycle_now = clocksource_read(clock);
+
+		/* calculate the delta since the last update_wall_time: */
+		cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
+
+		/* convert to nanoseconds: */
+		nsecs = cyc2ns(clock, cycle_delta);
 
 	} while (read_seqretry(&xtime_lock, seq));
 
@@ -129,22 +134,22 @@ EXPORT_SYMBOL(do_gettimeofday);
  */
 int do_settimeofday(struct timespec *tv)
 {
+	struct timespec ts_delta;
 	unsigned long flags;
-	time_t wtm_sec, sec = tv->tv_sec;
-	long wtm_nsec, nsec = tv->tv_nsec;
 
 	if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
 		return -EINVAL;
 
 	write_seqlock_irqsave(&xtime_lock, flags);
 
-	nsec -= __get_nsec_offset();
+	clocksource_forward_now();
+
+	ts_delta.tv_sec = tv->tv_sec - xtime.tv_sec;
+	ts_delta.tv_nsec = tv->tv_nsec - xtime.tv_nsec;
+	wall_to_monotonic = timespec_sub(wall_to_monotonic, ts_delta);
 
-	wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
-	wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
+	xtime = *tv;
 
-	set_normalized_timespec(&xtime, sec, nsec);
-	set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
 	update_xtime_cache(0);
 
 	clock->error = 0;
@@ -170,22 +175,17 @@ EXPORT_SYMBOL(do_settimeofday);
 static void change_clocksource(void)
 {
 	struct clocksource *new;
-	cycle_t now;
-	u64 nsec;
 
 	new = clocksource_get_next();
 
 	if (clock == new)
 		return;
 
-	new->cycle_last = 0;
-	now = clocksource_read(new);
-	nsec =  __get_nsec_offset();
-	timespec_add_ns(&xtime, nsec);
+	clocksource_forward_now();
 
 	clock = new;
-	clock->cycle_last = now;
-
+	clock->cycle_last = 0;
+	clock->cycle_last = clocksource_read(new);
 	clock->error = 0;
 	clock->xtime_nsec = 0;
 	clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
@@ -200,8 +200,8 @@ static void change_clocksource(void)
 	 */
 }
 #else
+static inline void clocksource_forward_now(void) { }
 static inline void change_clocksource(void) { }
-static inline s64 __get_nsec_offset(void) { return 0; }
 #endif
 
 /**
@@ -265,8 +265,6 @@ void __init timekeeping_init(void)
 static int timekeeping_suspended;
 /* time in seconds when suspend began */
 static unsigned long timekeeping_suspend_time;
-/* xtime offset when we went into suspend */
-static s64 timekeeping_suspend_nsecs;
 
 /**
  * timekeeping_resume - Resumes the generic timekeeping subsystem.
@@ -292,8 +290,6 @@ static int timekeeping_resume(struct sys_device *dev)
 		wall_to_monotonic.tv_sec -= sleep_length;
 		total_sleep_time += sleep_length;
 	}
-	/* Make sure that we have the correct xtime reference */
-	timespec_add_ns(&xtime, timekeeping_suspend_nsecs);
 	update_xtime_cache(0);
 	/* re-base the last cycle value */
 	clock->cycle_last = 0;
@@ -319,8 +315,7 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
 	timekeeping_suspend_time = read_persistent_clock();
 
 	write_seqlock_irqsave(&xtime_lock, flags);
-	/* Get the current xtime offset */
-	timekeeping_suspend_nsecs = __get_nsec_offset();
+	clocksource_forward_now();
 	timekeeping_suspended = 1;
 	write_sequnlock_irqrestore(&xtime_lock, flags);
 
@@ -461,10 +456,10 @@ void update_wall_time(void)
 	 */
 	while (offset >= clock->cycle_interval) {
 		/* accumulate one interval */
-		clock->xtime_nsec += clock->xtime_interval;
-		clock->cycle_last += clock->cycle_interval;
 		offset -= clock->cycle_interval;
+		clock->cycle_last += clock->cycle_interval;
 
+		clock->xtime_nsec += clock->xtime_interval;
 		if (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) {
 			clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift;
 			xtime.tv_sec++;
-- 
GitLab


From 2d42244ae71d6c7b0884b5664cf2eda30fb2ae68 Mon Sep 17 00:00:00 2001
From: John Stultz <johnstul@us.ibm.com>
Date: Wed, 20 Aug 2008 16:37:30 -0700
Subject: [PATCH 031/895] clocksource: introduce CLOCK_MONOTONIC_RAW

In talking with Josip Loncaric, and his work on clock synchronization (see
btime.sf.net), he mentioned that for really close synchronization, it is
useful to have access to "hardware time", that is a notion of time that is
not in any way adjusted by the clock slewing done to keep close time sync.

Part of the issue is if we are using the kernel's ntp adjusted
representation of time in order to measure how we should correct time, we
can run into what Paul McKenney aptly described as "Painting a road using
the lines we're painting as the guide".

I had been thinking of a similar problem, and was trying to come up with a
way to give users access to a purely hardware based time representation
that avoided users having to know the underlying frequency and mask values
needed to deal with the wide variety of possible underlying hardware
counters.

My solution is to introduce CLOCK_MONOTONIC_RAW.  This exposes a
nanosecond based time value, that increments starting at bootup and has no
frequency adjustments made to it what so ever.

The time is accessed from userspace via the posix_clock_gettime() syscall,
passing CLOCK_MONOTONIC_RAW as the clock_id.

Signed-off-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/clocksource.h |  3 +++
 include/linux/time.h        |  2 ++
 kernel/posix-timers.c       | 15 +++++++++++++
 kernel/time/timekeeping.c   | 44 +++++++++++++++++++++++++++++++++++++
 4 files changed, 64 insertions(+)

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index f0a7fb984413..f88d32f8ff7c 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -79,6 +79,7 @@ struct clocksource {
 	/* timekeeping specific data, ignore */
 	cycle_t cycle_interval;
 	u64	xtime_interval;
+	u32	raw_interval;
 	/*
 	 * Second part is written at each timer interrupt
 	 * Keep it in a different cache line to dirty no
@@ -87,6 +88,7 @@ struct clocksource {
 	cycle_t cycle_last ____cacheline_aligned_in_smp;
 	u64 xtime_nsec;
 	s64 error;
+	struct timespec raw_time;
 
 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
 	/* Watchdog related data, used by the framework */
@@ -215,6 +217,7 @@ static inline void clocksource_calculate_interval(struct clocksource *c,
 
 	/* Go back from cycles -> shifted ns, this time use ntp adjused mult */
 	c->xtime_interval = (u64)c->cycle_interval * c->mult;
+	c->raw_interval = ((u64)c->cycle_interval * c->mult_orig) >> c->shift;
 }
 
 
diff --git a/include/linux/time.h b/include/linux/time.h
index e15206a7e82e..205f974b9ebf 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -117,6 +117,7 @@ extern int do_setitimer(int which, struct itimerval *value,
 extern unsigned int alarm_setitimer(unsigned int seconds);
 extern int do_getitimer(int which, struct itimerval *value);
 extern void getnstimeofday(struct timespec *tv);
+extern void getrawmonotonic(struct timespec *ts);
 extern void getboottime(struct timespec *ts);
 extern void monotonic_to_bootbased(struct timespec *ts);
 
@@ -214,6 +215,7 @@ struct itimerval {
 #define CLOCK_MONOTONIC			1
 #define CLOCK_PROCESS_CPUTIME_ID	2
 #define CLOCK_THREAD_CPUTIME_ID		3
+#define CLOCK_MONOTONIC_RAW		4
 
 /*
  * The IDs of various hardware clocks:
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index e36d5798cbff..d3c66b53dff6 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -222,6 +222,15 @@ static int posix_ktime_get_ts(clockid_t which_clock, struct timespec *tp)
 	return 0;
 }
 
+/*
+ * Get monotonic time for posix timers
+ */
+static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec *tp)
+{
+	getrawmonotonic(tp);
+	return 0;
+}
+
 /*
  * Initialize everything, well, just everything in Posix clocks/timers ;)
  */
@@ -235,9 +244,15 @@ static __init int init_posix_timers(void)
 		.clock_get = posix_ktime_get_ts,
 		.clock_set = do_posix_clock_nosettime,
 	};
+	struct k_clock clock_monotonic_raw = {
+		.clock_getres = hrtimer_get_res,
+		.clock_get = posix_get_monotonic_raw,
+		.clock_set = do_posix_clock_nosettime,
+	};
 
 	register_posix_clock(CLOCK_REALTIME, &clock_realtime);
 	register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic);
+	register_posix_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw);
 
 	posix_timers_cache = kmem_cache_create("posix_timers_cache",
 					sizeof (struct k_itimer), 0, SLAB_PANIC,
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 83d3555a6998..5099c95b8aa2 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -75,6 +75,9 @@ static void clocksource_forward_now(void)
 
 	nsec = cyc2ns(clock, cycle_delta);
 	timespec_add_ns(&xtime, nsec);
+
+	nsec = ((s64)cycle_delta * clock->mult_orig) >> clock->shift;
+	clock->raw_time.tv_nsec += nsec;
 }
 
 /**
@@ -183,6 +186,8 @@ static void change_clocksource(void)
 
 	clocksource_forward_now();
 
+	new->raw_time = clock->raw_time;
+
 	clock = new;
 	clock->cycle_last = 0;
 	clock->cycle_last = clocksource_read(new);
@@ -204,6 +209,39 @@ static inline void clocksource_forward_now(void) { }
 static inline void change_clocksource(void) { }
 #endif
 
+/**
+ * getrawmonotonic - Returns the raw monotonic time in a timespec
+ * @ts:		pointer to the timespec to be set
+ *
+ * Returns the raw monotonic time (completely un-modified by ntp)
+ */
+void getrawmonotonic(struct timespec *ts)
+{
+	unsigned long seq;
+	s64 nsecs;
+	cycle_t cycle_now, cycle_delta;
+
+	do {
+		seq = read_seqbegin(&xtime_lock);
+
+		/* read clocksource: */
+		cycle_now = clocksource_read(clock);
+
+		/* calculate the delta since the last update_wall_time: */
+		cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
+
+		/* convert to nanoseconds: */
+		nsecs = ((s64)cycle_delta * clock->mult_orig) >> clock->shift;
+
+		*ts = clock->raw_time;
+
+	} while (read_seqretry(&xtime_lock, seq));
+
+	timespec_add_ns(ts, nsecs);
+}
+EXPORT_SYMBOL(getrawmonotonic);
+
+
 /**
  * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres
  */
@@ -466,6 +504,12 @@ void update_wall_time(void)
 			second_overflow();
 		}
 
+		clock->raw_time.tv_nsec += clock->raw_interval;
+		if (clock->raw_time.tv_nsec >= NSEC_PER_SEC) {
+			clock->raw_time.tv_nsec -= NSEC_PER_SEC;
+			clock->raw_time.tv_sec++;
+		}
+
 		/* accumulate error between NTP and clock interval */
 		clock->error += tick_length;
 		clock->error -= clock->xtime_interval << (NTP_SCALE_SHIFT - clock->shift);
-- 
GitLab


From d82f0b0f6f1a0a25afc288fb7135b1601fe6df18 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Wed, 20 Aug 2008 16:46:04 -0700
Subject: [PATCH 032/895] migrate_timers: add comment, use spinlock_irq()

Add the comment to explain why the double lock in migrate_timers()
can't deadlock.

Change the code to use spinlock_irq() instead of local_irq_disable()
+ spin_lock().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/hrtimer.c | 11 ++++++-----
 kernel/timer.c   | 11 ++++++-----
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index b8e4dce80a74..03ea1378c43b 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1620,9 +1620,11 @@ static void migrate_hrtimers(int cpu)
 	new_base = &get_cpu_var(hrtimer_bases);
 
 	tick_cancel_sched_timer(cpu);
-
-	local_irq_disable();
-	spin_lock(&new_base->lock);
+	/*
+	 * The caller is globally serialized and nobody else
+	 * takes two locks at once, deadlock is not possible.
+	 */
+	spin_lock_irq(&new_base->lock);
 	spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
 
 	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
@@ -1631,8 +1633,7 @@ static void migrate_hrtimers(int cpu)
 	}
 
 	spin_unlock(&old_base->lock);
-	spin_unlock(&new_base->lock);
-	local_irq_enable();
+	spin_unlock_irq(&new_base->lock);
 	put_cpu_var(hrtimer_bases);
 }
 #endif /* CONFIG_HOTPLUG_CPU */
diff --git a/kernel/timer.c b/kernel/timer.c
index 03bc7f1f1593..e8019cc3418d 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1435,9 +1435,11 @@ static void __cpuinit migrate_timers(int cpu)
 	BUG_ON(cpu_online(cpu));
 	old_base = per_cpu(tvec_bases, cpu);
 	new_base = get_cpu_var(tvec_bases);
-
-	local_irq_disable();
-	spin_lock(&new_base->lock);
+	/*
+	 * The caller is globally serialized and nobody else
+	 * takes two locks at once, deadlock is not possible.
+	 */
+	spin_lock_irq(&new_base->lock);
 	spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
 
 	BUG_ON(old_base->running_timer);
@@ -1452,8 +1454,7 @@ static void __cpuinit migrate_timers(int cpu)
 	}
 
 	spin_unlock(&old_base->lock);
-	spin_unlock(&new_base->lock);
-	local_irq_enable();
+	spin_unlock_irq(&new_base->lock);
 	put_cpu_var(tvec_bases);
 }
 #endif /* CONFIG_HOTPLUG_CPU */
-- 
GitLab


From 916c7a855174e3b53d182b97a26b2e27a29726a1 Mon Sep 17 00:00:00 2001
From: Roman Zippel <zippel@linux-m68k.org>
Date: Wed, 20 Aug 2008 16:46:08 -0700
Subject: [PATCH 033/895] ntp: fix ADJ_OFFSET_SS_READ bug and do_adjtimex()
 cleanup

Thanks to the review by Michael Kerrisk a bug in the recent
ADJ_OFFSET_SS_READ option was discovered, where the ntp time_offset was
inadvertently set by it.  This fixes this by making the adjtime code
more separate from the ntp_adjtime code (both of which really want to
be separate syscalls).

Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/timex.h |  9 ++++-
 kernel/time/ntp.c     | 76 +++++++++++++++++++++++--------------------
 2 files changed, 48 insertions(+), 37 deletions(-)

diff --git a/include/linux/timex.h b/include/linux/timex.h
index fc6035d29d56..c00bcdd3ae42 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -141,8 +141,15 @@ struct timex {
 #define ADJ_MICRO		0x1000	/* select microsecond resolution */
 #define ADJ_NANO		0x2000	/* select nanosecond resolution */
 #define ADJ_TICK		0x4000	/* tick value */
+
+#ifdef __KERNEL__
+#define ADJ_ADJTIME		0x8000	/* switch between adjtime/adjtimex modes */
+#define ADJ_OFFSET_SINGLESHOT	0x0001	/* old-fashioned adjtime */
+#define ADJ_OFFSET_READONLY	0x2000	/* read-only adjtime */
+#else
 #define ADJ_OFFSET_SINGLESHOT	0x8001	/* old-fashioned adjtime */
-#define ADJ_OFFSET_SS_READ	0xa001  /* read-only adjtime */
+#define ADJ_OFFSET_SS_READ	0xa001	/* read-only adjtime */
+#endif
 
 /* xntp 3.4 compatibility names */
 #define MOD_OFFSET	ADJ_OFFSET
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 5125ddd8196b..c6921aa1a42a 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -277,38 +277,50 @@ static inline void notify_cmos_timer(void) { }
 int do_adjtimex(struct timex *txc)
 {
 	struct timespec ts;
-	long save_adjust, sec;
 	int result;
 
-	/* In order to modify anything, you gotta be super-user! */
-	if (txc->modes && !capable(CAP_SYS_TIME))
-		return -EPERM;
-
-	/* Now we validate the data before disabling interrupts */
-
-	if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) {
+	/* Validate the data before disabling interrupts */
+	if (txc->modes & ADJ_ADJTIME) {
 		/* singleshot must not be used with any other mode bits */
-		if (txc->modes & ~ADJ_OFFSET_SS_READ)
+		if (!(txc->modes & ADJ_OFFSET_SINGLESHOT))
 			return -EINVAL;
+		if (!(txc->modes & ADJ_OFFSET_READONLY) &&
+		    !capable(CAP_SYS_TIME))
+			return -EPERM;
+	} else {
+		/* In order to modify anything, you gotta be super-user! */
+		 if (txc->modes && !capable(CAP_SYS_TIME))
+			return -EPERM;
+
+		/* if the quartz is off by more than 10% something is VERY wrong! */
+		if (txc->modes & ADJ_TICK &&
+		    (txc->tick <  900000/USER_HZ ||
+		     txc->tick > 1100000/USER_HZ))
+				return -EINVAL;
+
+		if (txc->modes & ADJ_STATUS && time_state != TIME_OK)
+			hrtimer_cancel(&leap_timer);
 	}
 
-	/* if the quartz is off by more than 10% something is VERY wrong ! */
-	if (txc->modes & ADJ_TICK)
-		if (txc->tick <  900000/USER_HZ ||
-		    txc->tick > 1100000/USER_HZ)
-			return -EINVAL;
-
-	if (time_state != TIME_OK && txc->modes & ADJ_STATUS)
-		hrtimer_cancel(&leap_timer);
 	getnstimeofday(&ts);
 
 	write_seqlock_irq(&xtime_lock);
 
-	/* Save for later - semantics of adjtime is to return old value */
-	save_adjust = time_adjust;
-
 	/* If there are input parameters, then process them */
+	if (txc->modes & ADJ_ADJTIME) {
+		long save_adjust = time_adjust;
+
+		if (!(txc->modes & ADJ_OFFSET_READONLY)) {
+			/* adjtime() is independent from ntp_adjtime() */
+			time_adjust = txc->offset;
+			ntp_update_frequency();
+		}
+		txc->offset = save_adjust;
+		goto adj_done;
+	}
 	if (txc->modes) {
+		long sec;
+
 		if (txc->modes & ADJ_STATUS) {
 			if ((time_status & STA_PLL) &&
 			    !(txc->status & STA_PLL)) {
@@ -375,13 +387,8 @@ int do_adjtimex(struct timex *txc)
 		if (txc->modes & ADJ_TAI && txc->constant > 0)
 			time_tai = txc->constant;
 
-		if (txc->modes & ADJ_OFFSET) {
-			if (txc->modes == ADJ_OFFSET_SINGLESHOT)
-				/* adjtime() is independent from ntp_adjtime() */
-				time_adjust = txc->offset;
-			else
-				ntp_update_offset(txc->offset);
-		}
+		if (txc->modes & ADJ_OFFSET)
+			ntp_update_offset(txc->offset);
 		if (txc->modes & ADJ_TICK)
 			tick_usec = txc->tick;
 
@@ -389,19 +396,16 @@ int do_adjtimex(struct timex *txc)
 			ntp_update_frequency();
 	}
 
+	txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ,
+				  NTP_SCALE_SHIFT);
+	if (!(time_status & STA_NANO))
+		txc->offset /= NSEC_PER_USEC;
+
+adj_done:
 	result = time_state;	/* mostly `TIME_OK' */
 	if (time_status & (STA_UNSYNC|STA_CLOCKERR))
 		result = TIME_ERROR;
 
-	if ((txc->modes == ADJ_OFFSET_SINGLESHOT) ||
-	    (txc->modes == ADJ_OFFSET_SS_READ))
-		txc->offset = save_adjust;
-	else {
-		txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ,
-					  NTP_SCALE_SHIFT);
-		if (!(time_status & STA_NANO))
-			txc->offset /= NSEC_PER_USEC;
-	}
 	txc->freq	   = shift_right((s32)(time_freq >> PPM_SCALE_INV_SHIFT) *
 					 (s64)PPM_SCALE_INV,
 					 NTP_SCALE_SHIFT);
-- 
GitLab


From 377bf1e4ac2d894791733270604594c7c851ef83 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Thu, 21 Aug 2008 22:58:28 +0400
Subject: [PATCH 034/895] genirq: fix irq_desc->depth handling with DEBUG_SHIRQ

When DEBUG_SHIRQ is selected, a spurious IRQ is issued before
the setup_irq() initializes the desc->depth. An IRQ handler may
call disable_irq_nosync(), but then setup_irq() will overwrite
desc->depth, and upon enable_irq() we'll catch this WARN:

------------[ cut here ]------------
Badness at kernel/irq/manage.c:180
NIP: c0061ab8 LR: c0061f10 CTR: 00000000
REGS: cf83be50 TRAP: 0700   Not tainted  (2.6.27-rc3-23450-g74919b0)
MSR: 00021032 <ME,IR,DR>  CR: 22042022  XER: 20000000
TASK = cf829100[5] 'events/0' THREAD: cf83a000
GPR00: c0061f10 cf83bf00 cf829100 c038e674 00000016 00000000 cf83bef8 00000038
GPR08: c0298910 00000000 c0310d28 cf83a000 00000c9c 1001a1a8 0fffe000 00800000
GPR16: ffffffff 00000000 007fff00 00000000 007ffeb0 c03320a0 c031095c c0310924
GPR24: cf8292ec cf807190 cf83a000 00009032 c038e6a4 c038e674 cf99b1cc c038e674
NIP [c0061ab8] __enable_irq+0x20/0x80
LR [c0061f10] enable_irq+0x50/0x70
Call Trace:
[cf83bf00] [c038e674] irq_desc+0x630/0x9000 (unreliable)
[cf83bf10] [c0061f10] enable_irq+0x50/0x70
[cf83bf30] [c01abe94] phy_change+0x68/0x108
[cf83bf50] [c0046394] run_workqueue+0xc4/0x16c
[cf83bf90] [c0046834] worker_thread+0x74/0xd4
[cf83bfd0] [c004ab7c] kthread+0x48/0x84
[cf83bff0] [c00135e0] kernel_thread+0x44/0x60
Instruction dump:
4e800020 3d20c031 38a94214 4bffffcc 9421fff0 7c0802a6 93e1000c 7c7f1b78
90010014 8123001c 2f890000 409e001c <0fe00000> 80010014 83e1000c 38210010

That trace corresponds to this line:
WARN(1, KERN_WARNING "Unbalanced enable for IRQ %d\n", irq);

The patch fixes the problem by moving the SHIRQ code below the
setup_irq().

Unfortunately we can't easily move the SHIRQ code inside the
setup_irq(), since it grabs a spinlock, so to prvent a 'real'
IRQ from interfere us we should disable that IRQ.

p.s. The driver in question is drivers/net/phy/phy.c.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/irq/manage.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 77a51be36010..ae1b684e048c 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -596,26 +596,29 @@ int request_irq(unsigned int irq, irq_handler_t handler,
 	action->next = NULL;
 	action->dev_id = dev_id;
 
+	retval = setup_irq(irq, action);
+	if (retval)
+		kfree(action);
+
 #ifdef CONFIG_DEBUG_SHIRQ
 	if (irqflags & IRQF_SHARED) {
 		/*
 		 * It's a shared IRQ -- the driver ought to be prepared for it
 		 * to happen immediately, so let's make sure....
-		 * We do this before actually registering it, to make sure that
-		 * a 'real' IRQ doesn't run in parallel with our fake
+		 * We disable the irq to make sure that a 'real' IRQ doesn't
+		 * run in parallel with our fake.
 		 */
 		unsigned long flags;
 
+		disable_irq(irq);
 		local_irq_save(flags);
+
 		handler(irq, dev_id);
+
 		local_irq_restore(flags);
+		enable_irq(irq);
 	}
 #endif
-
-	retval = setup_irq(irq, action);
-	if (retval)
-		kfree(action);
-
 	return retval;
 }
 EXPORT_SYMBOL(request_irq);
-- 
GitLab


From dffc8d66544563fe00f176f230d5d8a5b45847bb Mon Sep 17 00:00:00 2001
From: Huang Weiyi <weiyi.huang@gmail.com>
Date: Sat, 23 Aug 2008 13:56:21 +0800
Subject: [PATCH 035/895] [MTD] [NAND] au1550nd.c: remove unused #include
 <version.h>

It doesn't use LINUX_VERSION_CODE nor KERNEL_VERSION.

This patch removes the said #include <version.h>.

Signed-off-by: Huang Weiyi <weiyi.huang@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/au1550nd.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/mtd/nand/au1550nd.c b/drivers/mtd/nand/au1550nd.c
index 761946ea45b1..92c334ff4508 100644
--- a/drivers/mtd/nand/au1550nd.c
+++ b/drivers/mtd/nand/au1550nd.c
@@ -16,7 +16,6 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/nand.h>
 #include <linux/mtd/partitions.h>
-#include <linux/version.h>
 #include <asm/io.h>
 
 #include <asm/mach-au1x00/au1xxx.h>
-- 
GitLab


From d68156cfad0fe09201dd049fff167a8a881427ad Mon Sep 17 00:00:00 2001
From: "Singh, Vimal" <vimalsingh@ti.com>
Date: Sat, 23 Aug 2008 18:18:34 +0200
Subject: [PATCH 036/895] [MTD] [NAND] nand_ecc.c: adding support for 512 byte
 ecc

Support 512 byte ECC calculation

[FM: updated two comments]

Signed-off-by: Vimal Singh <vimalsingh@ti.com>
Signed-off-by: Frans Meulenbroeks <fransmeulenbroeks@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/nand_ecc.c | 86 ++++++++++++++++++++++++++-----------
 1 file changed, 62 insertions(+), 24 deletions(-)

diff --git a/drivers/mtd/nand/nand_ecc.c b/drivers/mtd/nand/nand_ecc.c
index fd19787c9ce7..868147acce2c 100644
--- a/drivers/mtd/nand/nand_ecc.c
+++ b/drivers/mtd/nand/nand_ecc.c
@@ -42,6 +42,8 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
 #include <linux/mtd/nand_ecc.h>
 #include <asm/byteorder.h>
 #else
@@ -148,8 +150,9 @@ static const char addressbits[256] = {
 };
 
 /**
- * nand_calculate_ecc - [NAND Interface] Calculate 3-byte ECC for 256-byte block
- * @mtd:	MTD block structure (unused)
+ * nand_calculate_ecc - [NAND Interface] Calculate 3-byte ECC for 256/512-byte
+ *			 block
+ * @mtd:	MTD block structure
  * @buf:	input buffer with raw data
  * @code:	output buffer with ECC
  */
@@ -158,13 +161,18 @@ int nand_calculate_ecc(struct mtd_info *mtd, const unsigned char *buf,
 {
 	int i;
 	const uint32_t *bp = (uint32_t *)buf;
+	/* 256 or 512 bytes/ecc  */
+	const uint32_t eccsize_mult =
+			(((struct nand_chip *)mtd->priv)->ecc.size) >> 8;
 	uint32_t cur;		/* current value in buffer */
-	/* rp0..rp15 are the various accumulated parities (per byte) */
+	/* rp0..rp15..rp17 are the various accumulated parities (per byte) */
 	uint32_t rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
-	uint32_t rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15;
+	uint32_t rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15, rp16;
+	uint32_t uninitialized_var(rp17);	/* to make compiler happy */
 	uint32_t par;		/* the cumulative parity for all data */
 	uint32_t tmppar;	/* the cumulative parity for this iteration;
-				   for rp12 and rp14 at the end of the loop */
+				   for rp12, rp14 and rp16 at the end of the
+				   loop */
 
 	par = 0;
 	rp4 = 0;
@@ -173,6 +181,7 @@ int nand_calculate_ecc(struct mtd_info *mtd, const unsigned char *buf,
 	rp10 = 0;
 	rp12 = 0;
 	rp14 = 0;
+	rp16 = 0;
 
 	/*
 	 * The loop is unrolled a number of times;
@@ -181,10 +190,10 @@ int nand_calculate_ecc(struct mtd_info *mtd, const unsigned char *buf,
 	 * Note: passing unaligned data might give a performance penalty.
 	 * It is assumed that the buffers are aligned.
 	 * tmppar is the cumulative sum of this iteration.
-	 * needed for calculating rp12, rp14 and par
+	 * needed for calculating rp12, rp14, rp16 and par
 	 * also used as a performance improvement for rp6, rp8 and rp10
 	 */
-	for (i = 0; i < 4; i++) {
+	for (i = 0; i < eccsize_mult << 2; i++) {
 		cur = *bp++;
 		tmppar = cur;
 		rp4 ^= cur;
@@ -247,12 +256,14 @@ int nand_calculate_ecc(struct mtd_info *mtd, const unsigned char *buf,
 			rp12 ^= tmppar;
 		if ((i & 0x2) == 0)
 			rp14 ^= tmppar;
+		if (eccsize_mult == 2 && (i & 0x4) == 0)
+			rp16 ^= tmppar;
 	}
 
 	/*
 	 * handle the fact that we use longword operations
-	 * we'll bring rp4..rp14 back to single byte entities by shifting and
-	 * xoring first fold the upper and lower 16 bits,
+	 * we'll bring rp4..rp14..rp16 back to single byte entities by
+	 * shifting and xoring first fold the upper and lower 16 bits,
 	 * then the upper and lower 8 bits.
 	 */
 	rp4 ^= (rp4 >> 16);
@@ -273,6 +284,11 @@ int nand_calculate_ecc(struct mtd_info *mtd, const unsigned char *buf,
 	rp14 ^= (rp14 >> 16);
 	rp14 ^= (rp14 >> 8);
 	rp14 &= 0xff;
+	if (eccsize_mult == 2) {
+		rp16 ^= (rp16 >> 16);
+		rp16 ^= (rp16 >> 8);
+		rp16 &= 0xff;
+	}
 
 	/*
 	 * we also need to calculate the row parity for rp0..rp3
@@ -315,7 +331,7 @@ int nand_calculate_ecc(struct mtd_info *mtd, const unsigned char *buf,
 	par &= 0xff;
 
 	/*
-	 * and calculate rp5..rp15
+	 * and calculate rp5..rp15..rp17
 	 * note that par = rp4 ^ rp5 and due to the commutative property
 	 * of the ^ operator we can say:
 	 * rp5 = (par ^ rp4);
@@ -329,6 +345,8 @@ int nand_calculate_ecc(struct mtd_info *mtd, const unsigned char *buf,
 	rp11 = (par ^ rp10) & 0xff;
 	rp13 = (par ^ rp12) & 0xff;
 	rp15 = (par ^ rp14) & 0xff;
+	if (eccsize_mult == 2)
+		rp17 = (par ^ rp16) & 0xff;
 
 	/*
 	 * Finally calculate the ecc bits.
@@ -375,32 +393,46 @@ int nand_calculate_ecc(struct mtd_info *mtd, const unsigned char *buf,
 	    (invparity[rp9] << 1)  |
 	    (invparity[rp8]);
 #endif
-	code[2] =
-	    (invparity[par & 0xf0] << 7) |
-	    (invparity[par & 0x0f] << 6) |
-	    (invparity[par & 0xcc] << 5) |
-	    (invparity[par & 0x33] << 4) |
-	    (invparity[par & 0xaa] << 3) |
-	    (invparity[par & 0x55] << 2) |
-	    3;
+	if (eccsize_mult == 1)
+		code[2] =
+		    (invparity[par & 0xf0] << 7) |
+		    (invparity[par & 0x0f] << 6) |
+		    (invparity[par & 0xcc] << 5) |
+		    (invparity[par & 0x33] << 4) |
+		    (invparity[par & 0xaa] << 3) |
+		    (invparity[par & 0x55] << 2) |
+		    3;
+	else
+		code[2] =
+		    (invparity[par & 0xf0] << 7) |
+		    (invparity[par & 0x0f] << 6) |
+		    (invparity[par & 0xcc] << 5) |
+		    (invparity[par & 0x33] << 4) |
+		    (invparity[par & 0xaa] << 3) |
+		    (invparity[par & 0x55] << 2) |
+		    (invparity[rp17] << 1) |
+		    (invparity[rp16] << 0);
 	return 0;
 }
 EXPORT_SYMBOL(nand_calculate_ecc);
 
 /**
  * nand_correct_data - [NAND Interface] Detect and correct bit error(s)
- * @mtd:	MTD block structure (unused)
+ * @mtd:	MTD block structure
  * @buf:	raw data read from the chip
  * @read_ecc:	ECC from the chip
  * @calc_ecc:	the ECC calculated from raw data
  *
- * Detect and correct a 1 bit error for 256 byte block
+ * Detect and correct a 1 bit error for 256/512 byte block
  */
 int nand_correct_data(struct mtd_info *mtd, unsigned char *buf,
 		      unsigned char *read_ecc, unsigned char *calc_ecc)
 {
 	unsigned char b0, b1, b2;
 	unsigned char byte_addr, bit_addr;
+	/* 256 or 512 bytes/ecc  */
+	const uint32_t eccsize_mult =
+			(((struct nand_chip *)mtd->priv)->ecc.size) >> 8;
 
 	/*
 	 * b0 to b2 indicate which bit is faulty (if any)
@@ -426,10 +458,12 @@ int nand_correct_data(struct mtd_info *mtd, unsigned char *buf,
 
 	if ((((b0 ^ (b0 >> 1)) & 0x55) == 0x55) &&
 	    (((b1 ^ (b1 >> 1)) & 0x55) == 0x55) &&
-	    (((b2 ^ (b2 >> 1)) & 0x54) == 0x54)) { /* single bit error */
+	    ((eccsize_mult == 1 && ((b2 ^ (b2 >> 1)) & 0x54) == 0x54) ||
+	     (eccsize_mult == 2 && ((b2 ^ (b2 >> 1)) & 0x55) == 0x55))) {
+	/* single bit error */
 		/*
-		 * rp15/13/11/9/7/5/3/1 indicate which byte is the faulty byte
-		 * cp 5/3/1 indicate the faulty bit.
+		 * rp17/rp15/13/11/9/7/5/3/1 indicate which byte is the faulty
+		 * byte, cp 5/3/1 indicate the faulty bit.
 		 * A lookup table (called addressbits) is used to filter
 		 * the bits from the byte they are in.
 		 * A marginal optimisation is possible by having three
@@ -443,7 +477,11 @@ int nand_correct_data(struct mtd_info *mtd, unsigned char *buf,
 		 * We could also do addressbits[b2] >> 1 but for the
 		 * performace it does not make any difference
 		 */
-		byte_addr = (addressbits[b1] << 4) + addressbits[b0];
+		if (eccsize_mult == 1)
+			byte_addr = (addressbits[b1] << 4) + addressbits[b0];
+		else
+			byte_addr = (addressbits[b2 & 0x3] << 8) +
+				    (addressbits[b1] << 4) + addressbits[b0];
 		bit_addr = addressbits[b2 >> 2];
 		/* flip the bit */
 		buf[byte_addr] ^= (1 << bit_addr);
-- 
GitLab


From e82374fd1a804e197fc2a54c3930e70c5d300abc Mon Sep 17 00:00:00 2001
From: Robert Jarzmik <robert.jarzmik@free.fr>
Date: Mon, 11 Aug 2008 22:22:27 +0200
Subject: [PATCH 037/895] pda_power: Check and handle return value of
 set_irq_wake

The recent change in commit 2db873211ba47ef704c301f9ecf4a33413a0b649
forces the calls enable_irq_wake() and disable_irq_wake() to
be balanced. But if in pda_power_suspend() the call to
enable_irq_wake() fails (because attached gpio cannot wake
up the CPU), the corresponding disable_irq_wake will WARN().
Fix it by storing success/failure of enable_irq_wake().

Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/pda_power.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/power/pda_power.c b/drivers/power/pda_power.c
index 0471ec743ab9..d30bb766fcef 100644
--- a/drivers/power/pda_power.c
+++ b/drivers/power/pda_power.c
@@ -334,13 +334,16 @@ static int pda_power_remove(struct platform_device *pdev)
 }
 
 #ifdef CONFIG_PM
+static int ac_wakeup_enabled;
+static int usb_wakeup_enabled;
+
 static int pda_power_suspend(struct platform_device *pdev, pm_message_t state)
 {
 	if (device_may_wakeup(&pdev->dev)) {
 		if (ac_irq)
-			enable_irq_wake(ac_irq->start);
+			ac_wakeup_enabled = !enable_irq_wake(ac_irq->start);
 		if (usb_irq)
-			enable_irq_wake(usb_irq->start);
+			usb_wakeup_enabled = !enable_irq_wake(usb_irq->start);
 	}
 
 	return 0;
@@ -349,9 +352,9 @@ static int pda_power_suspend(struct platform_device *pdev, pm_message_t state)
 static int pda_power_resume(struct platform_device *pdev)
 {
 	if (device_may_wakeup(&pdev->dev)) {
-		if (usb_irq)
+		if (usb_irq && usb_wakeup_enabled)
 			disable_irq_wake(usb_irq->start);
-		if (ac_irq)
+		if (ac_irq && ac_wakeup_enabled)
 			disable_irq_wake(ac_irq->start);
 	}
 
-- 
GitLab


From 942ed161944b3476639916cf544e6975b29c985a Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg59@srcf.ucam.org>
Date: Tue, 26 Aug 2008 21:09:59 +0100
Subject: [PATCH 038/895] power_supply: Add function to return system-wide
 power state

Certain drivers benefit from knowing whether the system is on ac or
battery, for instance when determining which backlight registers to
read. This adds a simple call to determine whether there's an online
power supply other than any batteries.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/power_supply_core.c | 25 +++++++++++++++++++++++++
 include/linux/power_supply.h      |  6 ++++++
 2 files changed, 31 insertions(+)

diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c
index cb1ccb472921..f44f5b608f6a 100644
--- a/drivers/power/power_supply_core.c
+++ b/drivers/power/power_supply_core.c
@@ -87,6 +87,30 @@ int power_supply_am_i_supplied(struct power_supply *psy)
 	return error;
 }
 
+static int __power_supply_is_system_supplied(struct device *dev, void *data)
+{
+	union power_supply_propval ret = {0,};
+	struct power_supply *psy = dev_get_drvdata(dev);
+
+	if (psy->type != POWER_SUPPLY_TYPE_BATTERY) {
+		if (psy->get_property(psy, POWER_SUPPLY_PROP_ONLINE, &ret))
+			return 0;
+		if (ret.intval)
+			return ret.intval;
+	}
+	return 0;
+}
+
+int power_supply_is_system_supplied(void)
+{
+	int error;
+
+	error = class_for_each_device(power_supply_class, NULL, NULL,
+				      __power_supply_is_system_supplied);
+
+	return error;
+}
+
 int power_supply_register(struct device *parent, struct power_supply *psy)
 {
 	int rc = 0;
@@ -148,6 +172,7 @@ static void __exit power_supply_class_exit(void)
 
 EXPORT_SYMBOL_GPL(power_supply_changed);
 EXPORT_SYMBOL_GPL(power_supply_am_i_supplied);
+EXPORT_SYMBOL_GPL(power_supply_is_system_supplied);
 EXPORT_SYMBOL_GPL(power_supply_register);
 EXPORT_SYMBOL_GPL(power_supply_unregister);
 
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index ea96ead1d39d..f9348cba6dc1 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -165,6 +165,12 @@ struct power_supply_info {
 extern void power_supply_changed(struct power_supply *psy);
 extern int power_supply_am_i_supplied(struct power_supply *psy);
 
+#if defined(CONFIG_POWER_SUPPLY) || defined(CONFIG_POWER_SUPPLY_MODULE)
+extern int power_supply_is_system_supplied(void);
+#else
+static inline int power_supply_is_system_supplied(void) { return -ENOSYS; }
+#endif
+
 extern int power_supply_register(struct device *parent,
 				 struct power_supply *psy);
 extern void power_supply_unregister(struct power_supply *psy);
-- 
GitLab


From b996ad0e9fb15ca4acc60bcd0380912117a45d13 Mon Sep 17 00:00:00 2001
From: Rodolfo Giometti <giometti@linux.it>
Date: Wed, 20 Aug 2008 16:52:58 -0700
Subject: [PATCH 039/895] power_supply: Support for Texas Instruments BQ27200
 battery managers

These battery managers came in two different packages: one for I2C
busses (BQ27200) and one for HDQ busses (BQ27000).

This driver currently supports only the I2C chip version but the code
is designed in order to easily allow the HDQ chip version integration.

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: make things static, use kasprintf()]
Signed-off-by: Rodolfo Giometti <giometti@linux.it>
Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/Kconfig           |   6 +
 drivers/power/Makefile          |   1 +
 drivers/power/bq27x00_battery.c | 382 ++++++++++++++++++++++++++++++++
 3 files changed, 389 insertions(+)
 create mode 100644 drivers/power/bq27x00_battery.c

diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index 9ce55850271a..b2bd104b9869 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -62,4 +62,10 @@ config BATTERY_PALMTX
 	help
 	  Say Y to enable support for the battery in Palm T|X.
 
+config BATTERY_BQ27x00
+	tristate "BQ27200 battery driver"
+	depends on I2C
+	help
+	  Say Y here to enable support for batteries with BQ27200(I2C) chip.
+
 endif # POWER_SUPPLY
diff --git a/drivers/power/Makefile b/drivers/power/Makefile
index 4706bf8ff459..6cb301b779a7 100644
--- a/drivers/power/Makefile
+++ b/drivers/power/Makefile
@@ -22,3 +22,4 @@ obj-$(CONFIG_BATTERY_PMU)	+= pmu_battery.o
 obj-$(CONFIG_BATTERY_OLPC)	+= olpc_battery.o
 obj-$(CONFIG_BATTERY_TOSA)	+= tosa_battery.o
 obj-$(CONFIG_BATTERY_PALMTX)	+= palmtx_battery.o
+obj-$(CONFIG_BATTERY_BQ27x00)	+= bq27x00_battery.o
diff --git a/drivers/power/bq27x00_battery.c b/drivers/power/bq27x00_battery.c
new file mode 100644
index 000000000000..62d4948e8206
--- /dev/null
+++ b/drivers/power/bq27x00_battery.c
@@ -0,0 +1,382 @@
+/*
+ * BQ27x00 battery driver
+ *
+ * Copyright (C) 2008 Rodolfo Giometti <giometti@linux.it>
+ * Copyright (C) 2008 Eurotech S.p.A. <info@eurotech.it>
+ *
+ * Based on a previous work by Copyright (C) 2008 Texas Instruments, Inc.
+ *
+ * This package is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * THIS PACKAGE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+#include <linux/module.h>
+#include <linux/param.h>
+#include <linux/jiffies.h>
+#include <linux/workqueue.h>
+#include <linux/delay.h>
+#include <linux/platform_device.h>
+#include <linux/power_supply.h>
+#include <linux/idr.h>
+
+#include <linux/i2c.h>
+
+#define DRIVER_VERSION			"1.0.0"
+
+#define BQ27x00_REG_TEMP		0x06
+#define BQ27x00_REG_VOLT		0x08
+#define BQ27x00_REG_RSOC		0x0B /* Relative State-of-Charge */
+#define BQ27x00_REG_AI			0x14
+#define BQ27x00_REG_FLAGS		0x0A
+#define HIGH_BYTE(A)			((A) << 8)
+
+/* If the system has several batteries we need a different name for each
+ * of them...
+ */
+static DEFINE_IDR(battery_id);
+static DEFINE_MUTEX(battery_mutex);
+
+struct bq27x00_device_info;
+struct bq27x00_access_methods {
+	int (*read)(u8 reg, int *rt_value, int b_single,
+		struct bq27x00_device_info *di);
+};
+
+struct bq27x00_device_info {
+	struct device 		*dev;
+	int			id;
+	int			voltage_uV;
+	int			current_uA;
+	int			temp_C;
+	int			charge_rsoc;
+	struct bq27x00_access_methods	*bus;
+	struct power_supply	bat;
+
+	struct i2c_client	*client;
+};
+
+static enum power_supply_property bq27x00_battery_props[] = {
+	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_CURRENT_NOW,
+	POWER_SUPPLY_PROP_CAPACITY,
+	POWER_SUPPLY_PROP_TEMP,
+};
+
+/*
+ * Common code for BQ27x00 devices
+ */
+
+static int bq27x00_read(u8 reg, int *rt_value, int b_single,
+			struct bq27x00_device_info *di)
+{
+	int ret;
+
+	ret = di->bus->read(reg, rt_value, b_single, di);
+	*rt_value = be16_to_cpu(*rt_value);
+
+	return ret;
+}
+
+/*
+ * Return the battery temperature in Celcius degrees
+ * Or < 0 if something fails.
+ */
+static int bq27x00_battery_temperature(struct bq27x00_device_info *di)
+{
+	int ret;
+	int temp = 0;
+
+	ret = bq27x00_read(BQ27x00_REG_TEMP, &temp, 0, di);
+	if (ret) {
+		dev_err(di->dev, "error reading temperature\n");
+		return ret;
+	}
+
+	return (temp >> 2) - 273;
+}
+
+/*
+ * Return the battery Voltage in milivolts
+ * Or < 0 if something fails.
+ */
+static int bq27x00_battery_voltage(struct bq27x00_device_info *di)
+{
+	int ret;
+	int volt = 0;
+
+	ret = bq27x00_read(BQ27x00_REG_VOLT, &volt, 0, di);
+	if (ret) {
+		dev_err(di->dev, "error reading voltage\n");
+		return ret;
+	}
+
+	return volt;
+}
+
+/*
+ * Return the battery average current
+ * Note that current can be negative signed as well
+ * Or 0 if something fails.
+ */
+static int bq27x00_battery_current(struct bq27x00_device_info *di)
+{
+	int ret;
+	int curr = 0;
+	int flags = 0;
+
+	ret = bq27x00_read(BQ27x00_REG_AI, &curr, 0, di);
+	if (ret) {
+		dev_err(di->dev, "error reading current\n");
+		return 0;
+	}
+	ret = bq27x00_read(BQ27x00_REG_FLAGS, &flags, 0, di);
+	if (ret < 0) {
+		dev_err(di->dev, "error reading flags\n");
+		return 0;
+	}
+	if ((flags & (1 << 7)) != 0) {
+		dev_dbg(di->dev, "negative current!\n");
+		return -curr;
+	}
+	return curr;
+}
+
+/*
+ * Return the battery Relative State-of-Charge
+ * Or < 0 if something fails.
+ */
+static int bq27x00_battery_rsoc(struct bq27x00_device_info *di)
+{
+	int ret;
+	int rsoc = 0;
+
+	ret = bq27x00_read(BQ27x00_REG_RSOC, &rsoc, 1, di);
+	if (ret) {
+		dev_err(di->dev, "error reading relative State-of-Charge\n");
+		return ret;
+	}
+
+	return rsoc >> 8;
+}
+
+#define to_bq27x00_device_info(x) container_of((x), \
+				struct bq27x00_device_info, bat);
+
+static int bq27x00_battery_get_property(struct power_supply *psy,
+					enum power_supply_property psp,
+					union power_supply_propval *val)
+{
+	struct bq27x00_device_info *di = to_bq27x00_device_info(psy);
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+	case POWER_SUPPLY_PROP_PRESENT:
+		val->intval = bq27x00_battery_voltage(di);
+		if (psp == POWER_SUPPLY_PROP_PRESENT)
+			val->intval = val->intval <= 0 ? 0 : 1;
+		break;
+	case POWER_SUPPLY_PROP_CURRENT_NOW:
+		val->intval = bq27x00_battery_current(di);
+		break;
+	case POWER_SUPPLY_PROP_CAPACITY:
+		val->intval = bq27x00_battery_rsoc(di);
+		break;
+	case POWER_SUPPLY_PROP_TEMP:
+		val->intval = bq27x00_battery_temperature(di);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void bq27x00_powersupply_init(struct bq27x00_device_info *di)
+{
+	di->bat.type = POWER_SUPPLY_TYPE_BATTERY;
+	di->bat.properties = bq27x00_battery_props;
+	di->bat.num_properties = ARRAY_SIZE(bq27x00_battery_props);
+	di->bat.get_property = bq27x00_battery_get_property;
+	di->bat.external_power_changed = NULL;
+}
+
+/*
+ * BQ27200 specific code
+ */
+
+static int bq27200_read(u8 reg, int *rt_value, int b_single,
+			struct bq27x00_device_info *di)
+{
+	struct i2c_client *client = di->client;
+	struct i2c_msg msg[1];
+	unsigned char data[2];
+	int err;
+
+	if (!client->adapter)
+		return -ENODEV;
+
+	msg->addr = client->addr;
+	msg->flags = 0;
+	msg->len = 1;
+	msg->buf = data;
+
+	data[0] = reg;
+	err = i2c_transfer(client->adapter, msg, 1);
+
+	if (err >= 0) {
+		if (!b_single)
+			msg->len = 2;
+		else
+			msg->len = 1;
+
+		msg->flags = I2C_M_RD;
+		err = i2c_transfer(client->adapter, msg, 1);
+		if (err >= 0) {
+			if (!b_single)
+				*rt_value = data[1] | HIGH_BYTE(data[0]);
+			else
+				*rt_value = data[0];
+
+			return 0;
+		}
+	}
+	return err;
+}
+
+static int bq27200_battery_probe(struct i2c_client *client,
+				 const struct i2c_device_id *id)
+{
+	char *name;
+	struct bq27x00_device_info *di;
+	struct bq27x00_access_methods *bus;
+	int num;
+	int retval = 0;
+
+	/* Get new ID for the new battery device */
+	retval = idr_pre_get(&battery_id, GFP_KERNEL);
+	if (retval == 0)
+		return -ENOMEM;
+	mutex_lock(&battery_mutex);
+	retval = idr_get_new(&battery_id, client, &num);
+	mutex_unlock(&battery_mutex);
+	if (retval < 0)
+		return retval;
+
+	name = kasprintf(GFP_KERNEL, "bq27200-%d", num);
+	if (!name) {
+		dev_err(&client->dev, "failed to allocate device name\n");
+		retval = -ENOMEM;
+		goto batt_failed_1;
+	}
+
+	di = kzalloc(sizeof(*di), GFP_KERNEL);
+	if (!di) {
+		dev_err(&client->dev, "failed to allocate device info data\n");
+		retval = -ENOMEM;
+		goto batt_failed_2;
+	}
+	di->id = num;
+
+	bus = kzalloc(sizeof(*bus), GFP_KERNEL);
+	if (!bus) {
+		dev_err(&client->dev, "failed to allocate access method "
+					"data\n");
+		retval = -ENOMEM;
+		goto batt_failed_3;
+	}
+
+	i2c_set_clientdata(client, di);
+	di->dev = &client->dev;
+	di->bat.name = name;
+	bus->read = &bq27200_read;
+	di->bus = bus;
+	di->client = client;
+
+	bq27x00_powersupply_init(di);
+
+	retval = power_supply_register(&client->dev, &di->bat);
+	if (retval) {
+		dev_err(&client->dev, "failed to register battery\n");
+		goto batt_failed_4;
+	}
+
+	dev_info(&client->dev, "support ver. %s enabled\n", DRIVER_VERSION);
+
+	return 0;
+
+batt_failed_4:
+	kfree(bus);
+batt_failed_3:
+	kfree(di);
+batt_failed_2:
+	kfree(name);
+batt_failed_1:
+	mutex_lock(&battery_mutex);
+	idr_remove(&battery_id, num);
+	mutex_unlock(&battery_mutex);
+
+	return retval;
+}
+
+static int bq27200_battery_remove(struct i2c_client *client)
+{
+	struct bq27x00_device_info *di = i2c_get_clientdata(client);
+
+	power_supply_unregister(&di->bat);
+
+	kfree(di->bat.name);
+
+	mutex_lock(&battery_mutex);
+	idr_remove(&battery_id, di->id);
+	mutex_unlock(&battery_mutex);
+
+	kfree(di);
+
+	return 0;
+}
+
+/*
+ * Module stuff
+ */
+
+static const struct i2c_device_id bq27200_id[] = {
+	{ "bq27200", 0 },
+	{},
+};
+
+static struct i2c_driver bq27200_battery_driver = {
+	.driver = {
+		.name = "bq27200-battery",
+	},
+	.probe = bq27200_battery_probe,
+	.remove = bq27200_battery_remove,
+	.id_table = bq27200_id,
+};
+
+static int __init bq27x00_battery_init(void)
+{
+	int ret;
+
+	ret = i2c_add_driver(&bq27200_battery_driver);
+	if (ret)
+		printk(KERN_ERR "Unable to register BQ27200 driver\n");
+
+	return ret;
+}
+module_init(bq27x00_battery_init);
+
+static void __exit bq27x00_battery_exit(void)
+{
+	i2c_del_driver(&bq27200_battery_driver);
+}
+module_exit(bq27x00_battery_exit);
+
+MODULE_AUTHOR("Rodolfo Giometti <giometti@linux.it>");
+MODULE_DESCRIPTION("BQ27x00 battery monitor driver");
+MODULE_LICENSE("GPL");
-- 
GitLab


From 31db6e9ea1dbdcf66b8227b4f7035dee1b1dd8c0 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 29 Aug 2008 07:19:50 +0400
Subject: [PATCH 040/895] [JFFS2] Move JFFS2 config options out of fs/Kconfig

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 fs/Kconfig       | 190 +----------------------------------------------
 fs/jffs2/Kconfig | 188 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 189 insertions(+), 189 deletions(-)
 create mode 100644 fs/jffs2/Kconfig

diff --git a/fs/Kconfig b/fs/Kconfig
index d3873583360b..5831f9c38841 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1136,195 +1136,7 @@ config EFS_FS
 	  To compile the EFS file system support as a module, choose M here: the
 	  module will be called efs.
 
-config JFFS2_FS
-	tristate "Journalling Flash File System v2 (JFFS2) support"
-	select CRC32
-	depends on MTD
-	help
-	  JFFS2 is the second generation of the Journalling Flash File System
-	  for use on diskless embedded devices. It provides improved wear
-	  levelling, compression and support for hard links. You cannot use
-	  this on normal block devices, only on 'MTD' devices.
-
-	  Further information on the design and implementation of JFFS2 is
-	  available at <http://sources.redhat.com/jffs2/>.
-
-config JFFS2_FS_DEBUG
-	int "JFFS2 debugging verbosity (0 = quiet, 2 = noisy)"
-	depends on JFFS2_FS
-	default "0"
-	help
-	  This controls the amount of debugging messages produced by the JFFS2
-	  code. Set it to zero for use in production systems. For evaluation,
-	  testing and debugging, it's advisable to set it to one. This will
-	  enable a few assertions and will print debugging messages at the
-	  KERN_DEBUG loglevel, where they won't normally be visible. Level 2
-	  is unlikely to be useful - it enables extra debugging in certain
-	  areas which at one point needed debugging, but when the bugs were
-	  located and fixed, the detailed messages were relegated to level 2.
-
-	  If reporting bugs, please try to have available a full dump of the
-	  messages at debug level 1 while the misbehaviour was occurring.
-
-config JFFS2_FS_WRITEBUFFER
-	bool "JFFS2 write-buffering support"
-	depends on JFFS2_FS
-	default y
-	help
-	  This enables the write-buffering support in JFFS2.
-
-	  This functionality is required to support JFFS2 on the following
-	  types of flash devices:
-	    - NAND flash
-	    - NOR flash with transparent ECC
-	    - DataFlash
-
-config JFFS2_FS_WBUF_VERIFY
-	bool "Verify JFFS2 write-buffer reads"
-	depends on JFFS2_FS_WRITEBUFFER
-	default n
-	help
-	  This causes JFFS2 to read back every page written through the
-	  write-buffer, and check for errors.
-
-config JFFS2_SUMMARY
-	bool "JFFS2 summary support (EXPERIMENTAL)"
-	depends on JFFS2_FS && EXPERIMENTAL
-	default n
-	help
-	  This feature makes it possible to use summary information
-	  for faster filesystem mount.
-
-	  The summary information can be inserted into a filesystem image
-	  by the utility 'sumtool'.
-
-	  If unsure, say 'N'.
-
-config JFFS2_FS_XATTR
-	bool "JFFS2 XATTR support (EXPERIMENTAL)"
-	depends on JFFS2_FS && EXPERIMENTAL
-	default n
-	help
-	  Extended attributes are name:value pairs associated with inodes by
-	  the kernel or by users (see the attr(5) manual page, or visit
-	  <http://acl.bestbits.at/> for details).
-
-	  If unsure, say N.
-
-config JFFS2_FS_POSIX_ACL
-	bool "JFFS2 POSIX Access Control Lists"
-	depends on JFFS2_FS_XATTR
-	default y
-	select FS_POSIX_ACL
-	help
-	  Posix Access Control Lists (ACLs) support permissions for users and
-	  groups beyond the owner/group/world scheme.
-
-	  To learn more about Access Control Lists, visit the Posix ACLs for
-	  Linux website <http://acl.bestbits.at/>.
-
-	  If you don't know what Access Control Lists are, say N
-
-config JFFS2_FS_SECURITY
-	bool "JFFS2 Security Labels"
-	depends on JFFS2_FS_XATTR
-	default y
-	help
-	  Security labels support alternative access control models
-	  implemented by security modules like SELinux.  This option
-	  enables an extended attribute handler for file security
-	  labels in the jffs2 filesystem.
-
-	  If you are not using a security module that requires using
-	  extended attributes for file security labels, say N.
-
-config JFFS2_COMPRESSION_OPTIONS
-	bool "Advanced compression options for JFFS2"
-	depends on JFFS2_FS
-	default n
-	help
-	  Enabling this option allows you to explicitly choose which
-	  compression modules, if any, are enabled in JFFS2. Removing
-	  compressors can mean you cannot read existing file systems,
-	  and enabling experimental compressors can mean that you
-	  write a file system which cannot be read by a standard kernel.
-
-	  If unsure, you should _definitely_ say 'N'.
-
-config JFFS2_ZLIB
-	bool "JFFS2 ZLIB compression support" if JFFS2_COMPRESSION_OPTIONS
-	select ZLIB_INFLATE
-	select ZLIB_DEFLATE
-	depends on JFFS2_FS
-	default y
-	help
-	  Zlib is designed to be a free, general-purpose, legally unencumbered,
-	  lossless data-compression library for use on virtually any computer
-	  hardware and operating system. See <http://www.gzip.org/zlib/> for
-	  further information.
-
-	  Say 'Y' if unsure.
-
-config JFFS2_LZO
-	bool "JFFS2 LZO compression support" if JFFS2_COMPRESSION_OPTIONS
-	select LZO_COMPRESS
-	select LZO_DECOMPRESS
-	depends on JFFS2_FS
-	default n
-	help
-	  minilzo-based compression. Generally works better than Zlib.
-
-	  This feature was added in July, 2007. Say 'N' if you need
-	  compatibility with older bootloaders or kernels.
-
-config JFFS2_RTIME
-	bool "JFFS2 RTIME compression support" if JFFS2_COMPRESSION_OPTIONS
-	depends on JFFS2_FS
-	default y
-	help
-	  Rtime does manage to recompress already-compressed data. Say 'Y' if unsure.
-
-config JFFS2_RUBIN
-	bool "JFFS2 RUBIN compression support" if JFFS2_COMPRESSION_OPTIONS
-	depends on JFFS2_FS
-	default n
-	help
-	  RUBINMIPS and DYNRUBIN compressors. Say 'N' if unsure.
-
-choice
-	prompt "JFFS2 default compression mode" if JFFS2_COMPRESSION_OPTIONS
-	default JFFS2_CMODE_PRIORITY
-	depends on JFFS2_FS
-	help
-	  You can set here the default compression mode of JFFS2 from
-	  the available compression modes. Don't touch if unsure.
-
-config JFFS2_CMODE_NONE
-	bool "no compression"
-	help
-	  Uses no compression.
-
-config JFFS2_CMODE_PRIORITY
-	bool "priority"
-	help
-	  Tries the compressors in a predefined order and chooses the first
-	  successful one.
-
-config JFFS2_CMODE_SIZE
-	bool "size (EXPERIMENTAL)"
-	help
-	  Tries all compressors and chooses the one which has the smallest
-	  result.
-
-config JFFS2_CMODE_FAVOURLZO
-	bool "Favour LZO"
-	help
-	  Tries all compressors and chooses the one which has the smallest
-	  result but gives some preference to LZO (which has faster
-	  decompression) at the expense of size.
-
-endchoice
-
+source "fs/jffs2/Kconfig"
 # UBIFS File system configuration
 source "fs/ubifs/Kconfig"
 
diff --git a/fs/jffs2/Kconfig b/fs/jffs2/Kconfig
new file mode 100644
index 000000000000..6ae169cd8faa
--- /dev/null
+++ b/fs/jffs2/Kconfig
@@ -0,0 +1,188 @@
+config JFFS2_FS
+	tristate "Journalling Flash File System v2 (JFFS2) support"
+	select CRC32
+	depends on MTD
+	help
+	  JFFS2 is the second generation of the Journalling Flash File System
+	  for use on diskless embedded devices. It provides improved wear
+	  levelling, compression and support for hard links. You cannot use
+	  this on normal block devices, only on 'MTD' devices.
+
+	  Further information on the design and implementation of JFFS2 is
+	  available at <http://sources.redhat.com/jffs2/>.
+
+config JFFS2_FS_DEBUG
+	int "JFFS2 debugging verbosity (0 = quiet, 2 = noisy)"
+	depends on JFFS2_FS
+	default "0"
+	help
+	  This controls the amount of debugging messages produced by the JFFS2
+	  code. Set it to zero for use in production systems. For evaluation,
+	  testing and debugging, it's advisable to set it to one. This will
+	  enable a few assertions and will print debugging messages at the
+	  KERN_DEBUG loglevel, where they won't normally be visible. Level 2
+	  is unlikely to be useful - it enables extra debugging in certain
+	  areas which at one point needed debugging, but when the bugs were
+	  located and fixed, the detailed messages were relegated to level 2.
+
+	  If reporting bugs, please try to have available a full dump of the
+	  messages at debug level 1 while the misbehaviour was occurring.
+
+config JFFS2_FS_WRITEBUFFER
+	bool "JFFS2 write-buffering support"
+	depends on JFFS2_FS
+	default y
+	help
+	  This enables the write-buffering support in JFFS2.
+
+	  This functionality is required to support JFFS2 on the following
+	  types of flash devices:
+	    - NAND flash
+	    - NOR flash with transparent ECC
+	    - DataFlash
+
+config JFFS2_FS_WBUF_VERIFY
+	bool "Verify JFFS2 write-buffer reads"
+	depends on JFFS2_FS_WRITEBUFFER
+	default n
+	help
+	  This causes JFFS2 to read back every page written through the
+	  write-buffer, and check for errors.
+
+config JFFS2_SUMMARY
+	bool "JFFS2 summary support (EXPERIMENTAL)"
+	depends on JFFS2_FS && EXPERIMENTAL
+	default n
+	help
+	  This feature makes it possible to use summary information
+	  for faster filesystem mount.
+
+	  The summary information can be inserted into a filesystem image
+	  by the utility 'sumtool'.
+
+	  If unsure, say 'N'.
+
+config JFFS2_FS_XATTR
+	bool "JFFS2 XATTR support (EXPERIMENTAL)"
+	depends on JFFS2_FS && EXPERIMENTAL
+	default n
+	help
+	  Extended attributes are name:value pairs associated with inodes by
+	  the kernel or by users (see the attr(5) manual page, or visit
+	  <http://acl.bestbits.at/> for details).
+
+	  If unsure, say N.
+
+config JFFS2_FS_POSIX_ACL
+	bool "JFFS2 POSIX Access Control Lists"
+	depends on JFFS2_FS_XATTR
+	default y
+	select FS_POSIX_ACL
+	help
+	  Posix Access Control Lists (ACLs) support permissions for users and
+	  groups beyond the owner/group/world scheme.
+
+	  To learn more about Access Control Lists, visit the Posix ACLs for
+	  Linux website <http://acl.bestbits.at/>.
+
+	  If you don't know what Access Control Lists are, say N
+
+config JFFS2_FS_SECURITY
+	bool "JFFS2 Security Labels"
+	depends on JFFS2_FS_XATTR
+	default y
+	help
+	  Security labels support alternative access control models
+	  implemented by security modules like SELinux.  This option
+	  enables an extended attribute handler for file security
+	  labels in the jffs2 filesystem.
+
+	  If you are not using a security module that requires using
+	  extended attributes for file security labels, say N.
+
+config JFFS2_COMPRESSION_OPTIONS
+	bool "Advanced compression options for JFFS2"
+	depends on JFFS2_FS
+	default n
+	help
+	  Enabling this option allows you to explicitly choose which
+	  compression modules, if any, are enabled in JFFS2. Removing
+	  compressors can mean you cannot read existing file systems,
+	  and enabling experimental compressors can mean that you
+	  write a file system which cannot be read by a standard kernel.
+
+	  If unsure, you should _definitely_ say 'N'.
+
+config JFFS2_ZLIB
+	bool "JFFS2 ZLIB compression support" if JFFS2_COMPRESSION_OPTIONS
+	select ZLIB_INFLATE
+	select ZLIB_DEFLATE
+	depends on JFFS2_FS
+	default y
+	help
+	  Zlib is designed to be a free, general-purpose, legally unencumbered,
+	  lossless data-compression library for use on virtually any computer
+	  hardware and operating system. See <http://www.gzip.org/zlib/> for
+	  further information.
+
+	  Say 'Y' if unsure.
+
+config JFFS2_LZO
+	bool "JFFS2 LZO compression support" if JFFS2_COMPRESSION_OPTIONS
+	select LZO_COMPRESS
+	select LZO_DECOMPRESS
+	depends on JFFS2_FS
+	default n
+	help
+	  minilzo-based compression. Generally works better than Zlib.
+
+	  This feature was added in July, 2007. Say 'N' if you need
+	  compatibility with older bootloaders or kernels.
+
+config JFFS2_RTIME
+	bool "JFFS2 RTIME compression support" if JFFS2_COMPRESSION_OPTIONS
+	depends on JFFS2_FS
+	default y
+	help
+	  Rtime does manage to recompress already-compressed data. Say 'Y' if unsure.
+
+config JFFS2_RUBIN
+	bool "JFFS2 RUBIN compression support" if JFFS2_COMPRESSION_OPTIONS
+	depends on JFFS2_FS
+	default n
+	help
+	  RUBINMIPS and DYNRUBIN compressors. Say 'N' if unsure.
+
+choice
+	prompt "JFFS2 default compression mode" if JFFS2_COMPRESSION_OPTIONS
+	default JFFS2_CMODE_PRIORITY
+	depends on JFFS2_FS
+	help
+	  You can set here the default compression mode of JFFS2 from
+	  the available compression modes. Don't touch if unsure.
+
+config JFFS2_CMODE_NONE
+	bool "no compression"
+	help
+	  Uses no compression.
+
+config JFFS2_CMODE_PRIORITY
+	bool "priority"
+	help
+	  Tries the compressors in a predefined order and chooses the first
+	  successful one.
+
+config JFFS2_CMODE_SIZE
+	bool "size (EXPERIMENTAL)"
+	help
+	  Tries all compressors and chooses the one which has the smallest
+	  result.
+
+config JFFS2_CMODE_FAVOURLZO
+	bool "Favour LZO"
+	help
+	  Tries all compressors and chooses the one which has the smallest
+	  result but gives some preference to LZO (which has faster
+	  decompression) at the expense of size.
+
+endchoice
-- 
GitLab


From 3fc678a0e63138f56109ea31850f19b2e29c45b8 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 27 Aug 2008 14:48:32 +0100
Subject: [PATCH 041/895] CRED: Wrap task credential accesses in the JFFS2
 filesystem

Wrap access to task credentials so that they can be separated more easily from
the task_struct during the introduction of COW creds.

Change most current->(|e|s|fs)[ug]id to current_(|e|s|fs)[ug]id().

Change some task->e?[ug]id to task_e?[ug]id().  In some places it makes more
sense to use RCU directly rather than a convenient wrapper; these will be
addressed by later patches.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 fs/jffs2/fs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 89e9b735d8df..249305d65d5b 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -442,14 +442,14 @@ struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_i
 
 	memset(ri, 0, sizeof(*ri));
 	/* Set OS-specific defaults for new inodes */
-	ri->uid = cpu_to_je16(current->fsuid);
+	ri->uid = cpu_to_je16(current_fsuid());
 
 	if (dir_i->i_mode & S_ISGID) {
 		ri->gid = cpu_to_je16(dir_i->i_gid);
 		if (S_ISDIR(mode))
 			mode |= S_ISGID;
 	} else {
-		ri->gid = cpu_to_je16(current->fsgid);
+		ri->gid = cpu_to_je16(current_fsgid());
 	}
 
 	/* POSIX ACLs have to be processed now, at least partly.
-- 
GitLab


From 4262bd2981307258b31e15f1a526d2b3884e77b5 Mon Sep 17 00:00:00 2001
From: Semun Lee <semun.lee@samsung.com>
Date: Mon, 1 Sep 2008 11:49:27 +0100
Subject: [PATCH 042/895] [MTD] [NAND] pxa3xx_nand_flash: Add definition of
 STM2GbX16 NAND flashes

Signed-off-by: Semun Lee <semun.lee@samsung.com>
Acked-by: Eric Miao <eric.miao@marvell.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/pxa3xx_nand.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index a64ad15b8fdd..0cd213c8e69f 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -291,10 +291,33 @@ static struct pxa3xx_nand_flash micron1GbX16 = {
 	.chip_id	= 0xb12c,
 };
 
+static struct pxa3xx_nand_timing stm2GbX16_timing = {
+	.tCH = 10,
+	.tCS = 35,
+	.tWH = 15,
+	.tWP = 25,
+	.tRH = 15,
+	.tRP = 25,
+	.tR = 25000,
+	.tWHR = 60,
+	.tAR = 10,
+};
+
+static struct pxa3xx_nand_flash stm2GbX16 = {
+	.timing = &stm2GbX16_timing,
+	.page_per_block = 64,
+	.page_size = 2048,
+	.flash_width = 16,
+	.dfc_width = 16,
+	.num_blocks = 2048,
+	.chip_id = 0xba20,
+};
+
 static struct pxa3xx_nand_flash *builtin_flash_types[] = {
 	&samsung512MbX16,
 	&micron1GbX8,
 	&micron1GbX16,
+	&stm2GbX16,
 };
 
 #define NDTR0_tCH(c)	(min((c), 7) << 19)
-- 
GitLab


From 5e706469a0518ec640a122aa5da22035e2af003a Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Mon, 1 Sep 2008 12:21:05 +0100
Subject: [PATCH 043/895] [MTD] [NOR] Select MTD_CFI_UTIL when MTD_CFI probe
 routine is enabled

It requires cfi_qry_mode_on(), which is in cfi_util.c

Reported by Russell King

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/chips/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mtd/chips/Kconfig b/drivers/mtd/chips/Kconfig
index 9401bfec4623..9408099eec48 100644
--- a/drivers/mtd/chips/Kconfig
+++ b/drivers/mtd/chips/Kconfig
@@ -6,6 +6,7 @@ menu "RAM/ROM/Flash chip drivers"
 config MTD_CFI
 	tristate "Detect flash chips by Common Flash Interface (CFI) probe"
 	select MTD_GEN_PROBE
+	select MTD_CFI_UTIL
 	help
 	  The Common Flash Interface specification was developed by Intel,
 	  AMD and other flash manufactures that provides a universal method
-- 
GitLab


From 43035338ad772b6a4097b2ac530b75390bee87c1 Mon Sep 17 00:00:00 2001
From: Enrico Scholz <enrico.scholz@sigma-chemnitz.de>
Date: Fri, 29 Aug 2008 12:57:28 +0200
Subject: [PATCH 044/895] [MTD] [NAND] pxa3xx_nand: moved nand definitions into
 shared platform header

This patch moves the exported datastructures from the pxa3xx_nand.c driver
into the <mach/pxa3xx_nand.h> header. This is a plain movement without
any modification of the attributes.

This is the first one of a set of patches which:

 * allows to specify used NAND flash in the platform code and allows to turn
   off the old way to specify NAND characteristics in the driver.  This way did
   not worked well as these characteristics depend on the platform and can not be
   derived from NAND id alone.

   E.g.  some NAND chips share the same ID (e.g.  K9K8G08U0A and K9NBG08U5A) but
   have different timings (which are written in the common driver currently and
   must be modified there).

 * adds 'const' annotations at various places

Further patches will be sent to the mtd-list.

Signed-off-by: Enrico Scholz <enrico.scholz@sigma-chemnitz.de>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 arch/arm/mach-pxa/include/mach/pxa3xx_nand.h | 44 ++++++++++++++++++++
 drivers/mtd/nand/pxa3xx_nand.c               | 44 --------------------
 2 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/arch/arm/mach-pxa/include/mach/pxa3xx_nand.h b/arch/arm/mach-pxa/include/mach/pxa3xx_nand.h
index eb4b190b6657..0dc50d82ea73 100644
--- a/arch/arm/mach-pxa/include/mach/pxa3xx_nand.h
+++ b/arch/arm/mach-pxa/include/mach/pxa3xx_nand.h
@@ -4,6 +4,50 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 
+struct pxa3xx_nand_timing {
+	unsigned int	tCH;  /* Enable signal hold time */
+	unsigned int	tCS;  /* Enable signal setup time */
+	unsigned int	tWH;  /* ND_nWE high duration */
+	unsigned int	tWP;  /* ND_nWE pulse time */
+	unsigned int	tRH;  /* ND_nRE high duration */
+	unsigned int	tRP;  /* ND_nRE pulse width */
+	unsigned int	tR;   /* ND_nWE high to ND_nRE low for read */
+	unsigned int	tWHR; /* ND_nWE high to ND_nRE low for status read */
+	unsigned int	tAR;  /* ND_ALE low to ND_nRE low delay */
+};
+
+struct pxa3xx_nand_cmdset {
+	uint16_t	read1;
+	uint16_t	read2;
+	uint16_t	program;
+	uint16_t	read_status;
+	uint16_t	read_id;
+	uint16_t	erase;
+	uint16_t	reset;
+	uint16_t	lock;
+	uint16_t	unlock;
+	uint16_t	lock_status;
+};
+
+struct pxa3xx_nand_flash {
+	struct pxa3xx_nand_timing *timing; /* NAND Flash timing */
+	struct pxa3xx_nand_cmdset *cmdset;
+
+	uint32_t page_per_block;/* Pages per block (PG_PER_BLK) */
+	uint32_t page_size;	/* Page size in bytes (PAGE_SZ) */
+	uint32_t flash_width;	/* Width of Flash memory (DWIDTH_M) */
+	uint32_t dfc_width;	/* Width of flash controller(DWIDTH_C) */
+	uint32_t num_blocks;	/* Number of physical blocks in Flash */
+	uint32_t chip_id;
+
+	/* NOTE: these are automatically calculated, do not define */
+	size_t		oob_size;
+	size_t		read_id_bytes;
+
+	unsigned int	col_addr_cycles;
+	unsigned int	row_addr_cycles;
+};
+
 struct pxa3xx_nand_platform_data {
 
 	/* the data flash bus is shared between the Static Memory
diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index 0cd213c8e69f..203e8efefb30 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -115,50 +115,6 @@ enum {
 	STATE_PIO_WRITING,
 };
 
-struct pxa3xx_nand_timing {
-	unsigned int	tCH;  /* Enable signal hold time */
-	unsigned int	tCS;  /* Enable signal setup time */
-	unsigned int	tWH;  /* ND_nWE high duration */
-	unsigned int	tWP;  /* ND_nWE pulse time */
-	unsigned int	tRH;  /* ND_nRE high duration */
-	unsigned int	tRP;  /* ND_nRE pulse width */
-	unsigned int	tR;   /* ND_nWE high to ND_nRE low for read */
-	unsigned int	tWHR; /* ND_nWE high to ND_nRE low for status read */
-	unsigned int	tAR;  /* ND_ALE low to ND_nRE low delay */
-};
-
-struct pxa3xx_nand_cmdset {
-	uint16_t	read1;
-	uint16_t	read2;
-	uint16_t	program;
-	uint16_t	read_status;
-	uint16_t	read_id;
-	uint16_t	erase;
-	uint16_t	reset;
-	uint16_t	lock;
-	uint16_t	unlock;
-	uint16_t	lock_status;
-};
-
-struct pxa3xx_nand_flash {
-	struct pxa3xx_nand_timing *timing; /* NAND Flash timing */
-	struct pxa3xx_nand_cmdset *cmdset;
-
-	uint32_t page_per_block;/* Pages per block (PG_PER_BLK) */
-	uint32_t page_size;	/* Page size in bytes (PAGE_SZ) */
-	uint32_t flash_width;	/* Width of Flash memory (DWIDTH_M) */
-	uint32_t dfc_width;	/* Width of flash controller(DWIDTH_C) */
-	uint32_t num_blocks;	/* Number of physical blocks in Flash */
-	uint32_t chip_id;
-
-	/* NOTE: these are automatically calculated, do not define */
-	size_t		oob_size;
-	size_t		read_id_bytes;
-
-	unsigned int	col_addr_cycles;
-	unsigned int	row_addr_cycles;
-};
-
 struct pxa3xx_nand_info {
 	struct nand_chip	nand_chip;
 
-- 
GitLab


From c8ac3f818e1183eab8d08a41b01b6078c5df4b43 Mon Sep 17 00:00:00 2001
From: Enrico Scholz <enrico.scholz@sigma-chemnitz.de>
Date: Fri, 29 Aug 2008 12:59:48 +0200
Subject: [PATCH 045/895] [MTD] [NAND] pxa3xx_nand: allow to define flash types
 in the platform data

This patch adds 'flash' and 'num_flash' attributes to the platform data.
There was added code in the driver to iterate across these attributes in the
detect-flash routine.  This is done similarly to the existing method
which uses a 'builtin_flash_types' field.

Signed-off-by: Enrico Scholz <enrico.scholz@sigma-chemnitz.de>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 arch/arm/mach-pxa/include/mach/pxa3xx_nand.h |  3 +++
 drivers/mtd/nand/pxa3xx_nand.c               | 18 ++++++++++++++++--
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mach-pxa/include/mach/pxa3xx_nand.h b/arch/arm/mach-pxa/include/mach/pxa3xx_nand.h
index 0dc50d82ea73..6ac9aea0b0b5 100644
--- a/arch/arm/mach-pxa/include/mach/pxa3xx_nand.h
+++ b/arch/arm/mach-pxa/include/mach/pxa3xx_nand.h
@@ -58,6 +58,9 @@ struct pxa3xx_nand_platform_data {
 
 	struct mtd_partition *parts;
 	unsigned int	nr_parts;
+
+	struct pxa3xx_nand_flash * const	flash;
+	size_t					num_flash;
 };
 
 extern void pxa3xx_set_nand_info(struct pxa3xx_nand_platform_data *info);
diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index 203e8efefb30..1906aba7e73c 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -911,12 +911,26 @@ static int pxa3xx_nand_config_flash(struct pxa3xx_nand_info *info,
 	return 0;
 }
 
-static int pxa3xx_nand_detect_flash(struct pxa3xx_nand_info *info)
+static int pxa3xx_nand_detect_flash(struct pxa3xx_nand_info *info,
+				    const struct pxa3xx_nand_platform_data *pdata)
 {
 	struct pxa3xx_nand_flash *f;
 	uint32_t id;
 	int i;
 
+	for (i = 0; i<pdata->num_flash; ++i) {
+		f = pdata->flash + i;
+
+		if (pxa3xx_nand_config_flash(info, f))
+			continue;
+
+		if (__readid(info, &id))
+			continue;
+
+		if (id == f->chip_id)
+			return 0;
+	}
+
 	for (i = 0; i < ARRAY_SIZE(builtin_flash_types); i++) {
 
 		f = builtin_flash_types[i];
@@ -1114,7 +1128,7 @@ static int pxa3xx_nand_probe(struct platform_device *pdev)
 		goto fail_free_buf;
 	}
 
-	ret = pxa3xx_nand_detect_flash(info);
+	ret = pxa3xx_nand_detect_flash(info, pdata);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to detect flash\n");
 		ret = -ENODEV;
-- 
GitLab


From 80ebf20f34c30760cfba7b5e0a418241181d2cd9 Mon Sep 17 00:00:00 2001
From: Enrico Scholz <enrico.scholz@sigma-chemnitz.de>
Date: Fri, 29 Aug 2008 12:59:49 +0200
Subject: [PATCH 046/895] [MTD] [NAND] pxa3xx_nand: allow to disable builtin
 flash-type table

This patch adds a MTD_NAND_PXA3xx_BUILTIN configuration variables which
allows to disable usage of builtin flash-type table.  Not enabling this
option saves some space in the generated driver.

Signed-off-by: Enrico Scholz <enrico.scholz@sigma-chemnitz.de>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/Kconfig       | 7 +++++++
 drivers/mtd/nand/pxa3xx_nand.c | 4 ++++
 2 files changed, 11 insertions(+)

diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 8eb2b06cf0d9..6eebe852b9b3 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -334,6 +334,13 @@ config MTD_NAND_PXA3xx
 	  This enables the driver for the NAND flash device found on
 	  PXA3xx processors
 
+config MTD_NAND_PXA3xx_BUILTIN
+	bool "Use builtin definitions for some NAND chips (deprecated)"
+	depends on MTD_NAND_PXA3xx
+	help
+	  This enables builtin definitions for some NAND chips. This
+	  is deprecated in favor of platform specific data.
+
 config MTD_NAND_CM_X270
 	tristate "Support for NAND Flash on CM-X270 modules"
 	depends on MTD_NAND && MACH_ARMCORE
diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index 1906aba7e73c..e492804b3d98 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -164,6 +164,7 @@ static int use_dma = 1;
 module_param(use_dma, bool, 0444);
 MODULE_PARM_DESC(use_dma, "enable DMA for data transfering to/from NAND HW");
 
+#ifdef CONFIG_MTD_NAND_PXA3xx_BUILTIN
 static struct pxa3xx_nand_cmdset smallpage_cmdset = {
 	.read1		= 0x0000,
 	.read2		= 0x0050,
@@ -275,6 +276,7 @@ static struct pxa3xx_nand_flash *builtin_flash_types[] = {
 	&micron1GbX16,
 	&stm2GbX16,
 };
+#endif /* CONFIG_MTD_NAND_PXA3xx_BUILTIN */
 
 #define NDTR0_tCH(c)	(min((c), 7) << 19)
 #define NDTR0_tCS(c)	(min((c), 7) << 16)
@@ -931,6 +933,7 @@ static int pxa3xx_nand_detect_flash(struct pxa3xx_nand_info *info,
 			return 0;
 	}
 
+#ifdef CONFIG_MTD_NAND_PXA3xx_BUILTIN
 	for (i = 0; i < ARRAY_SIZE(builtin_flash_types); i++) {
 
 		f = builtin_flash_types[i];
@@ -944,6 +947,7 @@ static int pxa3xx_nand_detect_flash(struct pxa3xx_nand_info *info,
 		if (id == f->chip_id)
 			return 0;
 	}
+#endif
 
 	return -ENODEV;
 }
-- 
GitLab


From 7dad482ed0648a40e403d1ed44e0ea92248632f1 Mon Sep 17 00:00:00 2001
From: Enrico Scholz <enrico.scholz@sigma-chemnitz.de>
Date: Fri, 29 Aug 2008 12:59:50 +0200
Subject: [PATCH 047/895] [MTD] [NAND] pxa3xx_nand: added some 'const'
 annotations to the exported API

This patch marks some attributes as 'const' which are set only once and
never be modified by the driver.  There are some changes in parameter
list and variable declarations too which mark them as 'const'.

Signed-off-by: Enrico Scholz <enrico.scholz@sigma-chemnitz.de>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 arch/arm/mach-pxa/include/mach/pxa3xx_nand.h |  8 ++++----
 drivers/mtd/nand/pxa3xx_nand.c               | 10 +++++-----
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/arm/mach-pxa/include/mach/pxa3xx_nand.h b/arch/arm/mach-pxa/include/mach/pxa3xx_nand.h
index 6ac9aea0b0b5..cfcb2e21aeeb 100644
--- a/arch/arm/mach-pxa/include/mach/pxa3xx_nand.h
+++ b/arch/arm/mach-pxa/include/mach/pxa3xx_nand.h
@@ -30,8 +30,8 @@ struct pxa3xx_nand_cmdset {
 };
 
 struct pxa3xx_nand_flash {
-	struct pxa3xx_nand_timing *timing; /* NAND Flash timing */
-	struct pxa3xx_nand_cmdset *cmdset;
+	const struct pxa3xx_nand_timing *timing; /* NAND Flash timing */
+	const struct pxa3xx_nand_cmdset *cmdset;
 
 	uint32_t page_per_block;/* Pages per block (PG_PER_BLK) */
 	uint32_t page_size;	/* Page size in bytes (PAGE_SZ) */
@@ -56,8 +56,8 @@ struct pxa3xx_nand_platform_data {
 	 */
 	int	enable_arbiter;
 
-	struct mtd_partition *parts;
-	unsigned int	nr_parts;
+	const struct mtd_partition		*parts;
+	unsigned int				nr_parts;
 
 	struct pxa3xx_nand_flash * const	flash;
 	size_t					num_flash;
diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index e492804b3d98..af174054656e 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -293,7 +293,7 @@ static struct pxa3xx_nand_flash *builtin_flash_types[] = {
 #define ns2cycle(ns, clk)	(int)(((ns) * (clk / 1000000) / 1000) + 1)
 
 static void pxa3xx_nand_set_timing(struct pxa3xx_nand_info *info,
-				   struct pxa3xx_nand_timing *t)
+				   const struct pxa3xx_nand_timing *t)
 {
 	unsigned long nand_clk = clk_get_rate(info->clk);
 	uint32_t ndtr0, ndtr1;
@@ -336,7 +336,7 @@ static int prepare_read_prog_cmd(struct pxa3xx_nand_info *info,
 			uint16_t cmd, int column, int page_addr)
 {
 	struct pxa3xx_nand_flash *f = info->flash_info;
-	struct pxa3xx_nand_cmdset *cmdset = f->cmdset;
+	const struct pxa3xx_nand_cmdset *cmdset = f->cmdset;
 
 	/* calculate data size */
 	switch (f->page_size) {
@@ -387,7 +387,7 @@ static int prepare_erase_cmd(struct pxa3xx_nand_info *info,
 
 static int prepare_other_cmd(struct pxa3xx_nand_info *info, uint16_t cmd)
 {
-	struct pxa3xx_nand_cmdset *cmdset = info->flash_info->cmdset;
+	const struct pxa3xx_nand_cmdset *cmdset = info->flash_info->cmdset;
 
 	info->ndcb0 = cmd | ((cmd & 0xff00) ? NDCB0_DBC : 0);
 	info->ndcb1 = 0;
@@ -623,7 +623,7 @@ static void pxa3xx_nand_cmdfunc(struct mtd_info *mtd, unsigned command,
 {
 	struct pxa3xx_nand_info *info = mtd->priv;
 	struct pxa3xx_nand_flash *flash_info = info->flash_info;
-	struct pxa3xx_nand_cmdset *cmdset = flash_info->cmdset;
+	const struct pxa3xx_nand_cmdset *cmdset = flash_info->cmdset;
 	int ret;
 
 	info->use_dma = (use_dma) ? 1 : 0;
@@ -843,7 +843,7 @@ static int pxa3xx_nand_ecc_correct(struct mtd_info *mtd,
 static int __readid(struct pxa3xx_nand_info *info, uint32_t *id)
 {
 	struct pxa3xx_nand_flash *f = info->flash_info;
-	struct pxa3xx_nand_cmdset *cmdset = f->cmdset;
+	const struct pxa3xx_nand_cmdset *cmdset = f->cmdset;
 	uint32_t ndcr;
 	uint8_t  id_buff[8];
 
-- 
GitLab


From c8c17c888d936c58ceb28b084a6272d67e10ea28 Mon Sep 17 00:00:00 2001
From: Enrico Scholz <enrico.scholz@sigma-chemnitz.de>
Date: Fri, 29 Aug 2008 12:59:51 +0200
Subject: [PATCH 048/895] [MTD] [NAND] pxa3xx_nand: moved some helper variables
 out from platform data

This patch moves some attributes out from the platform data into the
dynamically created nand device.   This results into a cleaner interface
and allows to use constant pxa3xx_nand_flash definitions.

Signed-off-by: Enrico Scholz <enrico.scholz@sigma-chemnitz.de>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 arch/arm/mach-pxa/include/mach/pxa3xx_nand.h |  9 +---
 drivers/mtd/nand/pxa3xx_nand.c               | 43 ++++++++++++--------
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/arch/arm/mach-pxa/include/mach/pxa3xx_nand.h b/arch/arm/mach-pxa/include/mach/pxa3xx_nand.h
index cfcb2e21aeeb..eb35fca9aea5 100644
--- a/arch/arm/mach-pxa/include/mach/pxa3xx_nand.h
+++ b/arch/arm/mach-pxa/include/mach/pxa3xx_nand.h
@@ -39,13 +39,6 @@ struct pxa3xx_nand_flash {
 	uint32_t dfc_width;	/* Width of flash controller(DWIDTH_C) */
 	uint32_t num_blocks;	/* Number of physical blocks in Flash */
 	uint32_t chip_id;
-
-	/* NOTE: these are automatically calculated, do not define */
-	size_t		oob_size;
-	size_t		read_id_bytes;
-
-	unsigned int	col_addr_cycles;
-	unsigned int	row_addr_cycles;
 };
 
 struct pxa3xx_nand_platform_data {
@@ -59,7 +52,7 @@ struct pxa3xx_nand_platform_data {
 	const struct mtd_partition		*parts;
 	unsigned int				nr_parts;
 
-	struct pxa3xx_nand_flash * const	flash;
+	const struct pxa3xx_nand_flash * 	flash;
 	size_t					num_flash;
 };
 
diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index af174054656e..bc37f551edf5 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -119,7 +119,7 @@ struct pxa3xx_nand_info {
 	struct nand_chip	nand_chip;
 
 	struct platform_device	 *pdev;
-	struct pxa3xx_nand_flash *flash_info;
+	const struct pxa3xx_nand_flash *flash_info;
 
 	struct clk		*clk;
 	void __iomem		*mmio_base;
@@ -158,6 +158,13 @@ struct pxa3xx_nand_info {
 	uint32_t		ndcb0;
 	uint32_t		ndcb1;
 	uint32_t		ndcb2;
+
+	/* calculated from pxa3xx_nand_flash data */
+	size_t		oob_size;
+	size_t		read_id_bytes;
+
+	unsigned int	col_addr_cycles;
+	unsigned int	row_addr_cycles;
 };
 
 static int use_dma = 1;
@@ -335,7 +342,7 @@ static int wait_for_event(struct pxa3xx_nand_info *info, uint32_t event)
 static int prepare_read_prog_cmd(struct pxa3xx_nand_info *info,
 			uint16_t cmd, int column, int page_addr)
 {
-	struct pxa3xx_nand_flash *f = info->flash_info;
+	const struct pxa3xx_nand_flash *f = info->flash_info;
 	const struct pxa3xx_nand_cmdset *cmdset = f->cmdset;
 
 	/* calculate data size */
@@ -354,14 +361,14 @@ static int prepare_read_prog_cmd(struct pxa3xx_nand_info *info,
 	info->ndcb0 = cmd | ((cmd & 0xff00) ? NDCB0_DBC : 0);
 	info->ndcb1 = 0;
 	info->ndcb2 = 0;
-	info->ndcb0 |= NDCB0_ADDR_CYC(f->row_addr_cycles + f->col_addr_cycles);
+	info->ndcb0 |= NDCB0_ADDR_CYC(info->row_addr_cycles + info->col_addr_cycles);
 
-	if (f->col_addr_cycles == 2) {
+	if (info->col_addr_cycles == 2) {
 		/* large block, 2 cycles for column address
 		 * row address starts from 3rd cycle
 		 */
 		info->ndcb1 |= (page_addr << 16) | (column & 0xffff);
-		if (f->row_addr_cycles == 3)
+		if (info->row_addr_cycles == 3)
 			info->ndcb2 = (page_addr >> 16) & 0xff;
 	} else
 		/* small block, 1 cycles for column address
@@ -622,7 +629,7 @@ static void pxa3xx_nand_cmdfunc(struct mtd_info *mtd, unsigned command,
 				int column, int page_addr)
 {
 	struct pxa3xx_nand_info *info = mtd->priv;
-	struct pxa3xx_nand_flash *flash_info = info->flash_info;
+	const struct pxa3xx_nand_flash *flash_info = info->flash_info;
 	const struct pxa3xx_nand_cmdset *cmdset = flash_info->cmdset;
 	int ret;
 
@@ -701,7 +708,7 @@ static void pxa3xx_nand_cmdfunc(struct mtd_info *mtd, unsigned command,
 		info->use_dma = 0;	/* force PIO read */
 		info->buf_start = 0;
 		info->buf_count = (command == NAND_CMD_READID) ?
-				flash_info->read_id_bytes : 1;
+				info->read_id_bytes : 1;
 
 		if (prepare_other_cmd(info, (command == NAND_CMD_READID) ?
 				cmdset->read_id : cmdset->read_status))
@@ -842,7 +849,7 @@ static int pxa3xx_nand_ecc_correct(struct mtd_info *mtd,
 
 static int __readid(struct pxa3xx_nand_info *info, uint32_t *id)
 {
-	struct pxa3xx_nand_flash *f = info->flash_info;
+	const struct pxa3xx_nand_flash *f = info->flash_info;
 	const struct pxa3xx_nand_cmdset *cmdset = f->cmdset;
 	uint32_t ndcr;
 	uint8_t  id_buff[8];
@@ -872,7 +879,7 @@ fail_timeout:
 }
 
 static int pxa3xx_nand_config_flash(struct pxa3xx_nand_info *info,
-				    struct pxa3xx_nand_flash *f)
+				    const struct pxa3xx_nand_flash *f)
 {
 	struct platform_device *pdev = info->pdev;
 	struct pxa3xx_nand_platform_data *pdata = pdev->dev.platform_data;
@@ -885,25 +892,25 @@ static int pxa3xx_nand_config_flash(struct pxa3xx_nand_info *info,
 		return -EINVAL;
 
 	/* calculate flash information */
-	f->oob_size = (f->page_size == 2048) ? 64 : 16;
-	f->read_id_bytes = (f->page_size == 2048) ? 4 : 2;
+	info->oob_size = (f->page_size == 2048) ? 64 : 16;
+	info->read_id_bytes = (f->page_size == 2048) ? 4 : 2;
 
 	/* calculate addressing information */
-	f->col_addr_cycles = (f->page_size == 2048) ? 2 : 1;
+	info->col_addr_cycles = (f->page_size == 2048) ? 2 : 1;
 
 	if (f->num_blocks * f->page_per_block > 65536)
-		f->row_addr_cycles = 3;
+		info->row_addr_cycles = 3;
 	else
-		f->row_addr_cycles = 2;
+		info->row_addr_cycles = 2;
 
 	ndcr |= (pdata->enable_arbiter) ? NDCR_ND_ARB_EN : 0;
-	ndcr |= (f->col_addr_cycles == 2) ? NDCR_RA_START : 0;
+	ndcr |= (info->col_addr_cycles == 2) ? NDCR_RA_START : 0;
 	ndcr |= (f->page_per_block == 64) ? NDCR_PG_PER_BLK : 0;
 	ndcr |= (f->page_size == 2048) ? NDCR_PAGE_SZ : 0;
 	ndcr |= (f->flash_width == 16) ? NDCR_DWIDTH_M : 0;
 	ndcr |= (f->dfc_width == 16) ? NDCR_DWIDTH_C : 0;
 
-	ndcr |= NDCR_RD_ID_CNT(f->read_id_bytes);
+	ndcr |= NDCR_RD_ID_CNT(info->read_id_bytes);
 	ndcr |= NDCR_SPARE_EN; /* enable spare by default */
 
 	info->reg_ndcr = ndcr;
@@ -916,7 +923,7 @@ static int pxa3xx_nand_config_flash(struct pxa3xx_nand_info *info,
 static int pxa3xx_nand_detect_flash(struct pxa3xx_nand_info *info,
 				    const struct pxa3xx_nand_platform_data *pdata)
 {
-	struct pxa3xx_nand_flash *f;
+	const struct pxa3xx_nand_flash *f;
 	uint32_t id;
 	int i;
 
@@ -1011,7 +1018,7 @@ static struct nand_ecclayout hw_largepage_ecclayout = {
 static void pxa3xx_nand_init_mtd(struct mtd_info *mtd,
 				 struct pxa3xx_nand_info *info)
 {
-	struct pxa3xx_nand_flash *f = info->flash_info;
+	const struct pxa3xx_nand_flash *f = info->flash_info;
 	struct nand_chip *this = &info->nand_chip;
 
 	this->options = (f->flash_width == 16) ? NAND_BUSWIDTH_16: 0;
-- 
GitLab


From 2675e9447bb5c861dbd29c5fe55b7ce2ad3ff0f5 Mon Sep 17 00:00:00 2001
From: Enrico Scholz <enrico.scholz@sigma-chemnitz.de>
Date: Fri, 29 Aug 2008 12:59:52 +0200
Subject: [PATCH 049/895] [MTD] [NAND] pxa3xx_nand: added warning which tells
 id of detected NAND

Minor patch to help debugging of NAND detection.

Signed-off-by: Enrico Scholz <enrico.scholz@sigma-chemnitz.de>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/pxa3xx_nand.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index bc37f551edf5..c0fa9c9edf08 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -924,7 +924,7 @@ static int pxa3xx_nand_detect_flash(struct pxa3xx_nand_info *info,
 				    const struct pxa3xx_nand_platform_data *pdata)
 {
 	const struct pxa3xx_nand_flash *f;
-	uint32_t id;
+	uint32_t id = -1;
 	int i;
 
 	for (i = 0; i<pdata->num_flash; ++i) {
@@ -956,6 +956,9 @@ static int pxa3xx_nand_detect_flash(struct pxa3xx_nand_info *info,
 	}
 #endif
 
+	dev_warn(&info->pdev->dev,
+		 "failed to detect configured nand flash; found %04x instead of\n",
+		 id);
 	return -ENODEV;
 }
 
-- 
GitLab


From 34f6e15786293e8d6ed05f9c19ed784ff15d2702 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Tue, 2 Sep 2008 17:16:59 +0200
Subject: [PATCH 050/895] [MTD] [NAND] Freescale i.MX2 NAND driver

This patch adds support for the integrated NAND flash controller of the
i.MX2 and i.MX3 family. It is tested on MX27 but should work on MX3
aswell.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Acked-by: Juergen Beisert <j.beisert@pengutronix.de>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 arch/arm/plat-mxc/include/mach/mxc_nand.h |   27 +
 drivers/mtd/nand/Kconfig                  |    7 +
 drivers/mtd/nand/Makefile                 |    1 +
 drivers/mtd/nand/mxc_nand.c               | 1077 +++++++++++++++++++++
 4 files changed, 1112 insertions(+)
 create mode 100644 arch/arm/plat-mxc/include/mach/mxc_nand.h
 create mode 100644 drivers/mtd/nand/mxc_nand.c

diff --git a/arch/arm/plat-mxc/include/mach/mxc_nand.h b/arch/arm/plat-mxc/include/mach/mxc_nand.h
new file mode 100644
index 000000000000..2b972df22d12
--- /dev/null
+++ b/arch/arm/plat-mxc/include/mach/mxc_nand.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2004-2007 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Sascha Hauer, kernel@pengutronix.de
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef __ASM_ARCH_NAND_H
+#define __ASM_ARCH_NAND_H
+
+struct mxc_nand_platform_data {
+	int width;	/* data bus width in bytes */
+	int hw_ecc;	/* 0 if supress hardware ECC */
+};
+#endif /* __ASM_ARCH_NAND_H */
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 6eebe852b9b3..7153854eb83a 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -407,4 +407,11 @@ config MTD_NAND_FSL_UPM
 	  Enables support for NAND Flash chips wired onto Freescale PowerPC
 	  processor localbus with User-Programmable Machine support.
 
+config MTD_NAND_MXC
+	tristate "MXC NAND support"
+	depends on ARCH_MX2
+	help
+	  This enables the driver for the NAND flash controller on the
+	  MXC processors.
+
 endif # MTD_NAND
diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index 8540c46ffba9..e0fee048c1b4 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -33,5 +33,6 @@ obj-$(CONFIG_MTD_NAND_PASEMI)		+= pasemi_nand.o
 obj-$(CONFIG_MTD_NAND_ORION)		+= orion_nand.o
 obj-$(CONFIG_MTD_NAND_FSL_ELBC)		+= fsl_elbc_nand.o
 obj-$(CONFIG_MTD_NAND_FSL_UPM)		+= fsl_upm.o
+obj-$(CONFIG_MTD_NAND_MXC)		+= mxc_nand.o
 
 nand-objs := nand_base.o nand_bbt.o
diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c
new file mode 100644
index 000000000000..21fd4f1c4806
--- /dev/null
+++ b/drivers/mtd/nand/mxc_nand.c
@@ -0,0 +1,1077 @@
+/*
+ * Copyright 2004-2007 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Sascha Hauer, kernel@pengutronix.de
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/partitions.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+
+#include <asm/mach/flash.h>
+#include <mach/mxc_nand.h>
+
+#define DRIVER_NAME "mxc_nand"
+
+/* Addresses for NFC registers */
+#define NFC_BUF_SIZE		0xE00
+#define NFC_BUF_ADDR		0xE04
+#define NFC_FLASH_ADDR		0xE06
+#define NFC_FLASH_CMD		0xE08
+#define NFC_CONFIG		0xE0A
+#define NFC_ECC_STATUS_RESULT	0xE0C
+#define NFC_RSLTMAIN_AREA	0xE0E
+#define NFC_RSLTSPARE_AREA	0xE10
+#define NFC_WRPROT		0xE12
+#define NFC_UNLOCKSTART_BLKADDR	0xE14
+#define NFC_UNLOCKEND_BLKADDR	0xE16
+#define NFC_NF_WRPRST		0xE18
+#define NFC_CONFIG1		0xE1A
+#define NFC_CONFIG2		0xE1C
+
+/* Addresses for NFC RAM BUFFER Main area 0 */
+#define MAIN_AREA0		0x000
+#define MAIN_AREA1		0x200
+#define MAIN_AREA2		0x400
+#define MAIN_AREA3		0x600
+
+/* Addresses for NFC SPARE BUFFER Spare area 0 */
+#define SPARE_AREA0		0x800
+#define SPARE_AREA1		0x810
+#define SPARE_AREA2		0x820
+#define SPARE_AREA3		0x830
+
+/* Set INT to 0, FCMD to 1, rest to 0 in NFC_CONFIG2 Register
+ * for Command operation */
+#define NFC_CMD            0x1
+
+/* Set INT to 0, FADD to 1, rest to 0 in NFC_CONFIG2 Register
+ * for Address operation */
+#define NFC_ADDR           0x2
+
+/* Set INT to 0, FDI to 1, rest to 0 in NFC_CONFIG2 Register
+ * for Input operation */
+#define NFC_INPUT          0x4
+
+/* Set INT to 0, FDO to 001, rest to 0 in NFC_CONFIG2 Register
+ * for Data Output operation */
+#define NFC_OUTPUT         0x8
+
+/* Set INT to 0, FD0 to 010, rest to 0 in NFC_CONFIG2 Register
+ * for Read ID operation */
+#define NFC_ID             0x10
+
+/* Set INT to 0, FDO to 100, rest to 0 in NFC_CONFIG2 Register
+ * for Read Status operation */
+#define NFC_STATUS         0x20
+
+/* Set INT to 1, rest to 0 in NFC_CONFIG2 Register for Read
+ * Status operation */
+#define NFC_INT            0x8000
+
+#define NFC_SP_EN           (1 << 2)
+#define NFC_ECC_EN          (1 << 3)
+#define NFC_INT_MSK         (1 << 4)
+#define NFC_BIG             (1 << 5)
+#define NFC_RST             (1 << 6)
+#define NFC_CE              (1 << 7)
+#define NFC_ONE_CYCLE       (1 << 8)
+
+struct mxc_nand_host {
+	struct mtd_info		mtd;
+	struct nand_chip	nand;
+	struct mtd_partition	*parts;
+	struct device		*dev;
+
+	void __iomem		*regs;
+	int			spare_only;
+	int			status_request;
+	int			pagesize_2k;
+	uint16_t		col_addr;
+	struct clk		*clk;
+	int			clk_act;
+	int			irq;
+
+	wait_queue_head_t	irq_waitq;
+};
+
+/* Define delays in microsec for NAND device operations */
+#define TROP_US_DELAY   2000
+/* Macros to get byte and bit positions of ECC */
+#define COLPOS(x)  ((x) >> 3)
+#define BITPOS(x) ((x) & 0xf)
+
+/* Define single bit Error positions in Main & Spare area */
+#define MAIN_SINGLEBIT_ERROR 0x4
+#define SPARE_SINGLEBIT_ERROR 0x1
+
+/* OOB placement block for use with hardware ecc generation */
+static struct nand_ecclayout nand_hw_eccoob_8 = {
+	.eccbytes = 5,
+	.eccpos = {6, 7, 8, 9, 10},
+	.oobfree = {{0, 5}, {11, 5}, }
+};
+
+static struct nand_ecclayout nand_hw_eccoob_16 = {
+	.eccbytes = 5,
+	.eccpos = {6, 7, 8, 9, 10},
+	.oobfree = {{0, 6}, {12, 4}, }
+};
+
+#ifdef CONFIG_MTD_PARTITIONS
+static const char *part_probes[] = { "RedBoot", "cmdlinepart", NULL };
+#endif
+
+static irqreturn_t mxc_nfc_irq(int irq, void *dev_id)
+{
+	struct mxc_nand_host *host = dev_id;
+
+	uint16_t tmp;
+
+	tmp = readw(host->regs + NFC_CONFIG1);
+	tmp |= NFC_INT_MSK; /* Disable interrupt */
+	writew(tmp, host->regs + NFC_CONFIG1);
+
+	wake_up(&host->irq_waitq);
+
+	return IRQ_HANDLED;
+}
+
+/* This function polls the NANDFC to wait for the basic operation to
+ * complete by checking the INT bit of config2 register.
+ */
+static void wait_op_done(struct mxc_nand_host *host, int max_retries,
+				uint16_t param, int useirq)
+{
+	uint32_t tmp;
+
+	if (useirq) {
+		if ((readw(host->regs + NFC_CONFIG2) & NFC_INT) == 0) {
+
+			tmp = readw(host->regs + NFC_CONFIG1);
+			tmp  &= ~NFC_INT_MSK;	/* Enable interrupt */
+			writew(tmp, host->regs + NFC_CONFIG1);
+
+			wait_event(host->irq_waitq,
+				readw(host->regs + NFC_CONFIG2) & NFC_INT);
+
+			tmp = readw(host->regs + NFC_CONFIG2);
+			tmp  &= ~NFC_INT;
+			writew(tmp, host->regs + NFC_CONFIG2);
+		}
+	} else {
+		while (max_retries-- > 0) {
+			if (readw(host->regs + NFC_CONFIG2) & NFC_INT) {
+				tmp = readw(host->regs + NFC_CONFIG2);
+				tmp  &= ~NFC_INT;
+				writew(tmp, host->regs + NFC_CONFIG2);
+				break;
+			}
+			udelay(1);
+		}
+		if (max_retries <= 0)
+			DEBUG(MTD_DEBUG_LEVEL0, "%s(%d): INT not set\n",
+			      __func__, param);
+	}
+}
+
+/* This function issues the specified command to the NAND device and
+ * waits for completion. */
+static void send_cmd(struct mxc_nand_host *host, uint16_t cmd, int useirq)
+{
+	DEBUG(MTD_DEBUG_LEVEL3, "send_cmd(host, 0x%x, %d)\n", cmd, useirq);
+
+	writew(cmd, host->regs + NFC_FLASH_CMD);
+	writew(NFC_CMD, host->regs + NFC_CONFIG2);
+
+	/* Wait for operation to complete */
+	wait_op_done(host, TROP_US_DELAY, cmd, useirq);
+}
+
+/* This function sends an address (or partial address) to the
+ * NAND device. The address is used to select the source/destination for
+ * a NAND command. */
+static void send_addr(struct mxc_nand_host *host, uint16_t addr, int islast)
+{
+	DEBUG(MTD_DEBUG_LEVEL3, "send_addr(host, 0x%x %d)\n", addr, islast);
+
+	writew(addr, host->regs + NFC_FLASH_ADDR);
+	writew(NFC_ADDR, host->regs + NFC_CONFIG2);
+
+	/* Wait for operation to complete */
+	wait_op_done(host, TROP_US_DELAY, addr, islast);
+}
+
+/* This function requests the NANDFC to initate the transfer
+ * of data currently in the NANDFC RAM buffer to the NAND device. */
+static void send_prog_page(struct mxc_nand_host *host, uint8_t buf_id,
+			int spare_only)
+{
+	DEBUG(MTD_DEBUG_LEVEL3, "send_prog_page (%d)\n", spare_only);
+
+	/* NANDFC buffer 0 is used for page read/write */
+	writew(buf_id, host->regs + NFC_BUF_ADDR);
+
+	/* Configure spare or page+spare access */
+	if (!host->pagesize_2k) {
+		uint16_t config1 = readw(host->regs + NFC_CONFIG1);
+		if (spare_only)
+			config1 |= NFC_SP_EN;
+		else
+			config1 &= ~(NFC_SP_EN);
+		writew(config1, host->regs + NFC_CONFIG1);
+	}
+
+	writew(NFC_INPUT, host->regs + NFC_CONFIG2);
+
+	/* Wait for operation to complete */
+	wait_op_done(host, TROP_US_DELAY, spare_only, true);
+}
+
+/* Requests NANDFC to initated the transfer of data from the
+ * NAND device into in the NANDFC ram buffer. */
+static void send_read_page(struct mxc_nand_host *host, uint8_t buf_id,
+		int spare_only)
+{
+	DEBUG(MTD_DEBUG_LEVEL3, "send_read_page (%d)\n", spare_only);
+
+	/* NANDFC buffer 0 is used for page read/write */
+	writew(buf_id, host->regs + NFC_BUF_ADDR);
+
+	/* Configure spare or page+spare access */
+	if (!host->pagesize_2k) {
+		uint32_t config1 = readw(host->regs + NFC_CONFIG1);
+		if (spare_only)
+			config1 |= NFC_SP_EN;
+		else
+			config1 &= ~NFC_SP_EN;
+		writew(config1, host->regs + NFC_CONFIG1);
+	}
+
+	writew(NFC_OUTPUT, host->regs + NFC_CONFIG2);
+
+	/* Wait for operation to complete */
+	wait_op_done(host, TROP_US_DELAY, spare_only, true);
+}
+
+/* Request the NANDFC to perform a read of the NAND device ID. */
+static void send_read_id(struct mxc_nand_host *host)
+{
+	struct nand_chip *this = &host->nand;
+	uint16_t tmp;
+
+	/* NANDFC buffer 0 is used for device ID output */
+	writew(0x0, host->regs + NFC_BUF_ADDR);
+
+	/* Read ID into main buffer */
+	tmp = readw(host->regs + NFC_CONFIG1);
+	tmp &= ~NFC_SP_EN;
+	writew(tmp, host->regs + NFC_CONFIG1);
+
+	writew(NFC_ID, host->regs + NFC_CONFIG2);
+
+	/* Wait for operation to complete */
+	wait_op_done(host, TROP_US_DELAY, 0, true);
+
+	if (this->options & NAND_BUSWIDTH_16) {
+		void __iomem *main_buf = host->regs + MAIN_AREA0;
+		/* compress the ID info */
+		writeb(readb(main_buf + 2), main_buf + 1);
+		writeb(readb(main_buf + 4), main_buf + 2);
+		writeb(readb(main_buf + 6), main_buf + 3);
+		writeb(readb(main_buf + 8), main_buf + 4);
+		writeb(readb(main_buf + 10), main_buf + 5);
+	}
+}
+
+/* This function requests the NANDFC to perform a read of the
+ * NAND device status and returns the current status. */
+static uint16_t get_dev_status(struct mxc_nand_host *host)
+{
+	void __iomem *main_buf = host->regs + MAIN_AREA1;
+	uint32_t store;
+	uint16_t ret, tmp;
+	/* Issue status request to NAND device */
+
+	/* store the main area1 first word, later do recovery */
+	store = readl(main_buf);
+	/* NANDFC buffer 1 is used for device status to prevent
+	 * corruption of read/write buffer on status requests. */
+	writew(1, host->regs + NFC_BUF_ADDR);
+
+	/* Read status into main buffer */
+	tmp = readw(host->regs + NFC_CONFIG1);
+	tmp &= ~NFC_SP_EN;
+	writew(tmp, host->regs + NFC_CONFIG1);
+
+	writew(NFC_STATUS, host->regs + NFC_CONFIG2);
+
+	/* Wait for operation to complete */
+	wait_op_done(host, TROP_US_DELAY, 0, true);
+
+	/* Status is placed in first word of main buffer */
+	/* get status, then recovery area 1 data */
+	ret = readw(main_buf);
+	writel(store, main_buf);
+
+	return ret;
+}
+
+/* This functions is used by upper layer to checks if device is ready */
+static int mxc_nand_dev_ready(struct mtd_info *mtd)
+{
+	/*
+	 * NFC handles R/B internally. Therefore, this function
+	 * always returns status as ready.
+	 */
+	return 1;
+}
+
+static void mxc_nand_enable_hwecc(struct mtd_info *mtd, int mode)
+{
+	/*
+	 * If HW ECC is enabled, we turn it on during init. There is
+	 * no need to enable again here.
+	 */
+}
+
+static int mxc_nand_correct_data(struct mtd_info *mtd, u_char *dat,
+				 u_char *read_ecc, u_char *calc_ecc)
+{
+	struct nand_chip *nand_chip = mtd->priv;
+	struct mxc_nand_host *host = nand_chip->priv;
+
+	/*
+	 * 1-Bit errors are automatically corrected in HW.  No need for
+	 * additional correction.  2-Bit errors cannot be corrected by
+	 * HW ECC, so we need to return failure
+	 */
+	uint16_t ecc_status = readw(host->regs + NFC_ECC_STATUS_RESULT);
+
+	if (((ecc_status & 0x3) == 2) || ((ecc_status >> 2) == 2)) {
+		DEBUG(MTD_DEBUG_LEVEL0,
+		      "MXC_NAND: HWECC uncorrectable 2-bit ECC error\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int mxc_nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat,
+				  u_char *ecc_code)
+{
+	return 0;
+}
+
+static u_char mxc_nand_read_byte(struct mtd_info *mtd)
+{
+	struct nand_chip *nand_chip = mtd->priv;
+	struct mxc_nand_host *host = nand_chip->priv;
+	uint8_t ret = 0;
+	uint16_t col, rd_word;
+	uint16_t __iomem *main_buf = host->regs + MAIN_AREA0;
+	uint16_t __iomem *spare_buf = host->regs + SPARE_AREA0;
+
+	/* Check for status request */
+	if (host->status_request)
+		return get_dev_status(host) & 0xFF;
+
+	/* Get column for 16-bit access */
+	col = host->col_addr >> 1;
+
+	/* If we are accessing the spare region */
+	if (host->spare_only)
+		rd_word = readw(&spare_buf[col]);
+	else
+		rd_word = readw(&main_buf[col]);
+
+	/* Pick upper/lower byte of word from RAM buffer */
+	if (host->col_addr & 0x1)
+		ret = (rd_word >> 8) & 0xFF;
+	else
+		ret = rd_word & 0xFF;
+
+	/* Update saved column address */
+	host->col_addr++;
+
+	return ret;
+}
+
+static uint16_t mxc_nand_read_word(struct mtd_info *mtd)
+{
+	struct nand_chip *nand_chip = mtd->priv;
+	struct mxc_nand_host *host = nand_chip->priv;
+	uint16_t col, rd_word, ret;
+	uint16_t __iomem *p;
+
+	DEBUG(MTD_DEBUG_LEVEL3,
+	      "mxc_nand_read_word(col = %d)\n", host->col_addr);
+
+	col = host->col_addr;
+	/* Adjust saved column address */
+	if (col < mtd->writesize && host->spare_only)
+		col += mtd->writesize;
+
+	if (col < mtd->writesize)
+		p = (host->regs + MAIN_AREA0) + (col >> 1);
+	else
+		p = (host->regs + SPARE_AREA0) + ((col - mtd->writesize) >> 1);
+
+	if (col & 1) {
+		rd_word = readw(p);
+		ret = (rd_word >> 8) & 0xff;
+		rd_word = readw(&p[1]);
+		ret |= (rd_word << 8) & 0xff00;
+
+	} else
+		ret = readw(p);
+
+	/* Update saved column address */
+	host->col_addr = col + 2;
+
+	return ret;
+}
+
+/* Write data of length len to buffer buf. The data to be
+ * written on NAND Flash is first copied to RAMbuffer. After the Data Input
+ * Operation by the NFC, the data is written to NAND Flash */
+static void mxc_nand_write_buf(struct mtd_info *mtd,
+				const u_char *buf, int len)
+{
+	struct nand_chip *nand_chip = mtd->priv;
+	struct mxc_nand_host *host = nand_chip->priv;
+	int n, col, i = 0;
+
+	DEBUG(MTD_DEBUG_LEVEL3,
+	      "mxc_nand_write_buf(col = %d, len = %d)\n", host->col_addr,
+	      len);
+
+	col = host->col_addr;
+
+	/* Adjust saved column address */
+	if (col < mtd->writesize && host->spare_only)
+		col += mtd->writesize;
+
+	n = mtd->writesize + mtd->oobsize - col;
+	n = min(len, n);
+
+	DEBUG(MTD_DEBUG_LEVEL3,
+	      "%s:%d: col = %d, n = %d\n", __func__, __LINE__, col, n);
+
+	while (n) {
+		void __iomem *p;
+
+		if (col < mtd->writesize)
+			p = host->regs + MAIN_AREA0 + (col & ~3);
+		else
+			p = host->regs + SPARE_AREA0 -
+						mtd->writesize + (col & ~3);
+
+		DEBUG(MTD_DEBUG_LEVEL3, "%s:%d: p = %p\n", __func__,
+		      __LINE__, p);
+
+		if (((col | (int)&buf[i]) & 3) || n < 16) {
+			uint32_t data = 0;
+
+			if (col & 3 || n < 4)
+				data = readl(p);
+
+			switch (col & 3) {
+			case 0:
+				if (n) {
+					data = (data & 0xffffff00) |
+					    (buf[i++] << 0);
+					n--;
+					col++;
+				}
+			case 1:
+				if (n) {
+					data = (data & 0xffff00ff) |
+					    (buf[i++] << 8);
+					n--;
+					col++;
+				}
+			case 2:
+				if (n) {
+					data = (data & 0xff00ffff) |
+					    (buf[i++] << 16);
+					n--;
+					col++;
+				}
+			case 3:
+				if (n) {
+					data = (data & 0x00ffffff) |
+					    (buf[i++] << 24);
+					n--;
+					col++;
+				}
+			}
+
+			writel(data, p);
+		} else {
+			int m = mtd->writesize - col;
+
+			if (col >= mtd->writesize)
+				m += mtd->oobsize;
+
+			m = min(n, m) & ~3;
+
+			DEBUG(MTD_DEBUG_LEVEL3,
+			      "%s:%d: n = %d, m = %d, i = %d, col = %d\n",
+			      __func__,  __LINE__, n, m, i, col);
+
+			memcpy(p, &buf[i], m);
+			col += m;
+			i += m;
+			n -= m;
+		}
+	}
+	/* Update saved column address */
+	host->col_addr = col;
+}
+
+/* Read the data buffer from the NAND Flash. To read the data from NAND
+ * Flash first the data output cycle is initiated by the NFC, which copies
+ * the data to RAMbuffer. This data of length len is then copied to buffer buf.
+ */
+static void mxc_nand_read_buf(struct mtd_info *mtd, u_char *buf, int len)
+{
+	struct nand_chip *nand_chip = mtd->priv;
+	struct mxc_nand_host *host = nand_chip->priv;
+	int n, col, i = 0;
+
+	DEBUG(MTD_DEBUG_LEVEL3,
+	      "mxc_nand_read_buf(col = %d, len = %d)\n", host->col_addr, len);
+
+	col = host->col_addr;
+
+	/* Adjust saved column address */
+	if (col < mtd->writesize && host->spare_only)
+		col += mtd->writesize;
+
+	n = mtd->writesize + mtd->oobsize - col;
+	n = min(len, n);
+
+	while (n) {
+		void __iomem *p;
+
+		if (col < mtd->writesize)
+			p = host->regs + MAIN_AREA0 + (col & ~3);
+		else
+			p = host->regs + SPARE_AREA0 -
+					mtd->writesize + (col & ~3);
+
+		if (((col | (int)&buf[i]) & 3) || n < 16) {
+			uint32_t data;
+
+			data = readl(p);
+			switch (col & 3) {
+			case 0:
+				if (n) {
+					buf[i++] = (uint8_t) (data);
+					n--;
+					col++;
+				}
+			case 1:
+				if (n) {
+					buf[i++] = (uint8_t) (data >> 8);
+					n--;
+					col++;
+				}
+			case 2:
+				if (n) {
+					buf[i++] = (uint8_t) (data >> 16);
+					n--;
+					col++;
+				}
+			case 3:
+				if (n) {
+					buf[i++] = (uint8_t) (data >> 24);
+					n--;
+					col++;
+				}
+			}
+		} else {
+			int m = mtd->writesize - col;
+
+			if (col >= mtd->writesize)
+				m += mtd->oobsize;
+
+			m = min(n, m) & ~3;
+			memcpy(&buf[i], p, m);
+			col += m;
+			i += m;
+			n -= m;
+		}
+	}
+	/* Update saved column address */
+	host->col_addr = col;
+
+}
+
+/* Used by the upper layer to verify the data in NAND Flash
+ * with the data in the buf. */
+static int mxc_nand_verify_buf(struct mtd_info *mtd,
+				const u_char *buf, int len)
+{
+	return -EFAULT;
+}
+
+/* This function is used by upper layer for select and
+ * deselect of the NAND chip */
+static void mxc_nand_select_chip(struct mtd_info *mtd, int chip)
+{
+	struct nand_chip *nand_chip = mtd->priv;
+	struct mxc_nand_host *host = nand_chip->priv;
+
+#ifdef CONFIG_MTD_NAND_MXC_FORCE_CE
+	if (chip > 0) {
+		DEBUG(MTD_DEBUG_LEVEL0,
+		      "ERROR:  Illegal chip select (chip = %d)\n", chip);
+		return;
+	}
+
+	if (chip == -1) {
+		writew(readw(host->regs + NFC_CONFIG1) & ~NFC_CE,
+				host->regs + NFC_CONFIG1);
+		return;
+	}
+
+	writew(readw(host->regs + NFC_CONFIG1) | NFC_CE,
+			host->regs + NFC_CONFIG1);
+#endif
+
+	switch (chip) {
+	case -1:
+		/* Disable the NFC clock */
+		if (host->clk_act) {
+			clk_disable(host->clk);
+			host->clk_act = 0;
+		}
+		break;
+	case 0:
+		/* Enable the NFC clock */
+		if (!host->clk_act) {
+			clk_enable(host->clk);
+			host->clk_act = 1;
+		}
+		break;
+
+	default:
+		break;
+	}
+}
+
+/* Used by the upper layer to write command to NAND Flash for
+ * different operations to be carried out on NAND Flash */
+static void mxc_nand_command(struct mtd_info *mtd, unsigned command,
+				int column, int page_addr)
+{
+	struct nand_chip *nand_chip = mtd->priv;
+	struct mxc_nand_host *host = nand_chip->priv;
+	int useirq = true;
+
+	DEBUG(MTD_DEBUG_LEVEL3,
+	      "mxc_nand_command (cmd = 0x%x, col = 0x%x, page = 0x%x)\n",
+	      command, column, page_addr);
+
+	/* Reset command state information */
+	host->status_request = false;
+
+	/* Command pre-processing step */
+	switch (command) {
+
+	case NAND_CMD_STATUS:
+		host->col_addr = 0;
+		host->status_request = true;
+		break;
+
+	case NAND_CMD_READ0:
+		host->col_addr = column;
+		host->spare_only = false;
+		useirq = false;
+		break;
+
+	case NAND_CMD_READOOB:
+		host->col_addr = column;
+		host->spare_only = true;
+		useirq = false;
+		if (host->pagesize_2k)
+			command = NAND_CMD_READ0; /* only READ0 is valid */
+		break;
+
+	case NAND_CMD_SEQIN:
+		if (column >= mtd->writesize) {
+			/*
+			 * FIXME: before send SEQIN command for write OOB,
+			 * We must read one page out.
+			 * For K9F1GXX has no READ1 command to set current HW
+			 * pointer to spare area, we must write the whole page
+			 * including OOB together.
+			 */
+			if (host->pagesize_2k)
+				/* call ourself to read a page */
+				mxc_nand_command(mtd, NAND_CMD_READ0, 0,
+						page_addr);
+
+			host->col_addr = column - mtd->writesize;
+			host->spare_only = true;
+
+			/* Set program pointer to spare region */
+			if (!host->pagesize_2k)
+				send_cmd(host, NAND_CMD_READOOB, false);
+		} else {
+			host->spare_only = false;
+			host->col_addr = column;
+
+			/* Set program pointer to page start */
+			if (!host->pagesize_2k)
+				send_cmd(host, NAND_CMD_READ0, false);
+		}
+		useirq = false;
+		break;
+
+	case NAND_CMD_PAGEPROG:
+		send_prog_page(host, 0, host->spare_only);
+
+		if (host->pagesize_2k) {
+			/* data in 4 areas datas */
+			send_prog_page(host, 1, host->spare_only);
+			send_prog_page(host, 2, host->spare_only);
+			send_prog_page(host, 3, host->spare_only);
+		}
+
+		break;
+
+	case NAND_CMD_ERASE1:
+		useirq = false;
+		break;
+	}
+
+	/* Write out the command to the device. */
+	send_cmd(host, command, useirq);
+
+	/* Write out column address, if necessary */
+	if (column != -1) {
+		/*
+		 * MXC NANDFC can only perform full page+spare or
+		 * spare-only read/write.  When the upper layers
+		 * layers perform a read/write buf operation,
+		 * we will used the saved column adress to index into
+		 * the full page.
+		 */
+		send_addr(host, 0, page_addr == -1);
+		if (host->pagesize_2k)
+			/* another col addr cycle for 2k page */
+			send_addr(host, 0, false);
+	}
+
+	/* Write out page address, if necessary */
+	if (page_addr != -1) {
+		/* paddr_0 - p_addr_7 */
+		send_addr(host, (page_addr & 0xff), false);
+
+		if (host->pagesize_2k) {
+			send_addr(host, (page_addr >> 8) & 0xFF, false);
+			if (mtd->size >= 0x40000000)
+				send_addr(host, (page_addr >> 16) & 0xff, true);
+		} else {
+			/* One more address cycle for higher density devices */
+			if (mtd->size >= 0x4000000) {
+				/* paddr_8 - paddr_15 */
+				send_addr(host, (page_addr >> 8) & 0xff, false);
+				send_addr(host, (page_addr >> 16) & 0xff, true);
+			} else
+				/* paddr_8 - paddr_15 */
+				send_addr(host, (page_addr >> 8) & 0xff, true);
+		}
+	}
+
+	/* Command post-processing step */
+	switch (command) {
+
+	case NAND_CMD_RESET:
+		break;
+
+	case NAND_CMD_READOOB:
+	case NAND_CMD_READ0:
+		if (host->pagesize_2k) {
+			/* send read confirm command */
+			send_cmd(host, NAND_CMD_READSTART, true);
+			/* read for each AREA */
+			send_read_page(host, 0, host->spare_only);
+			send_read_page(host, 1, host->spare_only);
+			send_read_page(host, 2, host->spare_only);
+			send_read_page(host, 3, host->spare_only);
+		} else
+			send_read_page(host, 0, host->spare_only);
+		break;
+
+	case NAND_CMD_READID:
+		send_read_id(host);
+		break;
+
+	case NAND_CMD_PAGEPROG:
+		break;
+
+	case NAND_CMD_STATUS:
+		break;
+
+	case NAND_CMD_ERASE2:
+		break;
+	}
+}
+
+static int __init mxcnd_probe(struct platform_device *pdev)
+{
+	struct nand_chip *this;
+	struct mtd_info *mtd;
+	struct mxc_nand_platform_data *pdata = pdev->dev.platform_data;
+	struct mxc_nand_host *host;
+	struct resource *res;
+	uint16_t tmp;
+	int err = 0, nr_parts = 0;
+
+	/* Allocate memory for MTD device structure and private data */
+	host = kzalloc(sizeof(struct mxc_nand_host), GFP_KERNEL);
+	if (!host)
+		return -ENOMEM;
+
+	host->dev = &pdev->dev;
+	/* structures must be linked */
+	this = &host->nand;
+	mtd = &host->mtd;
+	mtd->priv = this;
+	mtd->owner = THIS_MODULE;
+
+	/* 50 us command delay time */
+	this->chip_delay = 5;
+
+	this->priv = host;
+	this->dev_ready = mxc_nand_dev_ready;
+	this->cmdfunc = mxc_nand_command;
+	this->select_chip = mxc_nand_select_chip;
+	this->read_byte = mxc_nand_read_byte;
+	this->read_word = mxc_nand_read_word;
+	this->write_buf = mxc_nand_write_buf;
+	this->read_buf = mxc_nand_read_buf;
+	this->verify_buf = mxc_nand_verify_buf;
+
+	host->clk = clk_get(&pdev->dev, "nfc_clk");
+	if (IS_ERR(host->clk))
+		goto eclk;
+
+	clk_enable(host->clk);
+	host->clk_act = 1;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		err = -ENODEV;
+		goto eres;
+	}
+
+	host->regs = ioremap(res->start, res->end - res->start + 1);
+	if (!host->regs) {
+		err = -EIO;
+		goto eres;
+	}
+
+	tmp = readw(host->regs + NFC_CONFIG1);
+	tmp |= NFC_INT_MSK;
+	writew(tmp, host->regs + NFC_CONFIG1);
+
+	init_waitqueue_head(&host->irq_waitq);
+
+	host->irq = platform_get_irq(pdev, 0);
+
+	err = request_irq(host->irq, mxc_nfc_irq, 0, "mxc_nd", host);
+	if (err)
+		goto eirq;
+
+	if (pdata->hw_ecc) {
+		this->ecc.calculate = mxc_nand_calculate_ecc;
+		this->ecc.hwctl = mxc_nand_enable_hwecc;
+		this->ecc.correct = mxc_nand_correct_data;
+		this->ecc.mode = NAND_ECC_HW;
+		this->ecc.size = 512;
+		this->ecc.bytes = 3;
+		this->ecc.layout = &nand_hw_eccoob_8;
+		tmp = readw(host->regs + NFC_CONFIG1);
+		tmp |= NFC_ECC_EN;
+		writew(tmp, host->regs + NFC_CONFIG1);
+	} else {
+		this->ecc.size = 512;
+		this->ecc.bytes = 3;
+		this->ecc.layout = &nand_hw_eccoob_8;
+		this->ecc.mode = NAND_ECC_SOFT;
+		tmp = readw(host->regs + NFC_CONFIG1);
+		tmp &= ~NFC_ECC_EN;
+		writew(tmp, host->regs + NFC_CONFIG1);
+	}
+
+	/* Reset NAND */
+	this->cmdfunc(mtd, NAND_CMD_RESET, -1, -1);
+
+	/* preset operation */
+	/* Unlock the internal RAM Buffer */
+	writew(0x2, host->regs + NFC_CONFIG);
+
+	/* Blocks to be unlocked */
+	writew(0x0, host->regs + NFC_UNLOCKSTART_BLKADDR);
+	writew(0x4000, host->regs + NFC_UNLOCKEND_BLKADDR);
+
+	/* Unlock Block Command for given address range */
+	writew(0x4, host->regs + NFC_WRPROT);
+
+	/* NAND bus width determines access funtions used by upper layer */
+	if (pdata->width == 2) {
+		this->options |= NAND_BUSWIDTH_16;
+		this->ecc.layout = &nand_hw_eccoob_16;
+	}
+
+	host->pagesize_2k = 0;
+
+	/* Scan to find existence of the device */
+	if (nand_scan(mtd, 1)) {
+		DEBUG(MTD_DEBUG_LEVEL0,
+		      "MXC_ND: Unable to find any NAND device.\n");
+		err = -ENXIO;
+		goto escan;
+	}
+
+	/* Register the partitions */
+#ifdef CONFIG_MTD_PARTITIONS
+	nr_parts =
+	    parse_mtd_partitions(mtd, part_probes, &host->parts, 0);
+	if (nr_parts > 0)
+		add_mtd_partitions(mtd, host->parts, nr_parts);
+	else
+#endif
+	{
+		pr_info("Registering %s as whole device\n", mtd->name);
+		add_mtd_device(mtd);
+	}
+
+	platform_set_drvdata(pdev, host);
+
+	return 0;
+
+escan:
+	free_irq(host->irq, NULL);
+eirq:
+	iounmap(host->regs);
+eres:
+	clk_put(host->clk);
+eclk:
+	kfree(host);
+
+	return err;
+}
+
+static int __devexit mxcnd_remove(struct platform_device *pdev)
+{
+	struct mxc_nand_host *host = platform_get_drvdata(pdev);
+
+	clk_put(host->clk);
+
+	platform_set_drvdata(pdev, NULL);
+
+	nand_release(&host->mtd);
+	free_irq(host->irq, NULL);
+	iounmap(host->regs);
+	kfree(host);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int mxcnd_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct mtd_info *info = platform_get_drvdata(pdev);
+	int ret = 0;
+
+	DEBUG(MTD_DEBUG_LEVEL0, "MXC_ND : NAND suspend\n");
+	if (info)
+		ret = info->suspend(info);
+
+	/* Disable the NFC clock */
+	clk_disable(nfc_clk);	/* FIXME */
+
+	return ret;
+}
+
+static int mxcnd_resume(struct platform_device *pdev)
+{
+	struct mtd_info *info = platform_get_drvdata(pdev);
+	int ret = 0;
+
+	DEBUG(MTD_DEBUG_LEVEL0, "MXC_ND : NAND resume\n");
+	/* Enable the NFC clock */
+	clk_enable(nfc_clk);	/* FIXME */
+
+	if (info)
+		info->resume(info);
+
+	return ret;
+}
+
+#else
+# define mxcnd_suspend   NULL
+# define mxcnd_resume    NULL
+#endif				/* CONFIG_PM */
+
+static struct platform_driver mxcnd_driver = {
+	.driver = {
+		   .name = DRIVER_NAME,
+		   },
+	.remove = __exit_p(mxcnd_remove),
+	.suspend = mxcnd_suspend,
+	.resume = mxcnd_resume,
+};
+
+static int __init mxc_nd_init(void)
+{
+	/* Register the device driver structure. */
+	pr_info("MXC MTD nand Driver\n");
+	if (platform_driver_probe(&mxcnd_driver, mxcnd_probe) != 0) {
+		printk(KERN_ERR "Driver register failed for mxcnd_driver\n");
+		return -ENODEV;
+	}
+	return 0;
+}
+
+static void __exit mxc_nd_cleanup(void)
+{
+	/* Unregister the device structure */
+	platform_driver_unregister(&mxcnd_driver);
+}
+
+module_init(mxc_nd_init);
+module_exit(mxc_nd_cleanup);
+
+MODULE_AUTHOR("Freescale Semiconductor, Inc.");
+MODULE_DESCRIPTION("MXC NAND MTD driver");
+MODULE_LICENSE("GPL");
-- 
GitLab


From 7d200e88cbdff5334d23d3af8d444eb9cc041962 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Sun, 31 Aug 2008 19:32:13 +0300
Subject: [PATCH 051/895] UBI: remove BKL

We do not need BKL in UBI because we serialize things
properly.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/cdev.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c
index 03c759b4eeb5..b30a0b83d7f1 100644
--- a/drivers/mtd/ubi/cdev.c
+++ b/drivers/mtd/ubi/cdev.c
@@ -104,12 +104,9 @@ static int vol_cdev_open(struct inode *inode, struct file *file)
 	struct ubi_volume_desc *desc;
 	int vol_id = iminor(inode) - 1, mode, ubi_num;
 
-	lock_kernel();
 	ubi_num = ubi_major2num(imajor(inode));
-	if (ubi_num < 0) {
-		unlock_kernel();
+	if (ubi_num < 0)
 		return ubi_num;
-	}
 
 	if (file->f_mode & FMODE_WRITE)
 		mode = UBI_READWRITE;
@@ -119,7 +116,6 @@ static int vol_cdev_open(struct inode *inode, struct file *file)
 	dbg_gen("open volume %d, mode %d", vol_id, mode);
 
 	desc = ubi_open_volume(ubi_num, vol_id, mode);
-	unlock_kernel();
 	if (IS_ERR(desc))
 		return PTR_ERR(desc);
 
-- 
GitLab


From 7e6e178ab1548c8d894a77593e757acf4510b8ba Mon Sep 17 00:00:00 2001
From: Pawel MOLL <pawel.moll@st.com>
Date: Mon, 1 Sep 2008 10:12:11 +0100
Subject: [PATCH 052/895] genirq: irq_chip->startup() usage in setup_irq and
 set_irq_chained handler

This patch clarifies usage of irq_chip->startup() callback:

1. The "if (startup) startup(); else enabled();" code in setup_irq()
   is unnecessary, as startup() falls back to enabled() via
   default callbacks, set by irq_chip_set_defaults().

2. When using set_irq_chained_handler() the startup() was never called,
   which is not good at all... Fixed. And again - when startup() is not
   defined the call will fall back to enable() than to unmask() via
   default callbacks.

Signed-off-by: Pawel Moll <pawel.moll@st.com>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/irq/chip.c   | 2 +-
 kernel/irq/manage.c | 5 +----
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 964964baefa2..240c64d59267 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -587,7 +587,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 		desc->status &= ~IRQ_DISABLED;
 		desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE;
 		desc->depth = 0;
-		desc->chip->unmask(irq);
+		desc->chip->startup(irq);
 	}
 	spin_unlock_irqrestore(&desc->lock, flags);
 }
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index ae1b684e048c..9aa3e7b81389 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -397,10 +397,7 @@ int setup_irq(unsigned int irq, struct irqaction *new)
 		if (!(desc->status & IRQ_NOAUTOEN)) {
 			desc->depth = 0;
 			desc->status &= ~IRQ_DISABLED;
-			if (desc->chip->startup)
-				desc->chip->startup(irq);
-			else
-				desc->chip->enable(irq);
+			desc->chip->startup(irq);
 		} else
 			/* Undo nested disables: */
 			desc->depth = 1;
-- 
GitLab


From 742fd1bcfb475c702c9b1dd6afc79c08f8dbf7dd Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 7 Aug 2008 17:36:12 +0900
Subject: [PATCH 053/895] sh: Provide movli.l/movco.l-based bitops.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/bitops-llsc.h | 144 ++++++++++++++++++++++++++++++
 arch/sh/include/asm/bitops.h      |   2 +
 2 files changed, 146 insertions(+)
 create mode 100644 arch/sh/include/asm/bitops-llsc.h

diff --git a/arch/sh/include/asm/bitops-llsc.h b/arch/sh/include/asm/bitops-llsc.h
new file mode 100644
index 000000000000..43b8e1a8239e
--- /dev/null
+++ b/arch/sh/include/asm/bitops-llsc.h
@@ -0,0 +1,144 @@
+#ifndef __ASM_SH_BITOPS_LLSC_H
+#define __ASM_SH_BITOPS_LLSC_H
+
+static inline void set_bit(int nr, volatile void * addr)
+{
+	int	mask;
+	volatile unsigned int *a = addr;
+	unsigned long tmp;
+
+	a += nr >> 5;
+	mask = 1 << (nr & 0x1f);
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%1, %0	! set_bit		\n\t"
+		"or		%3, %0				\n\t"
+		"movco.l	%0, @%1				\n\t"
+		"bf		1b				\n\t"
+		: "=&z" (tmp), "=r" (a)
+		: "1" (a), "r" (mask)
+		: "t", "memory"
+	);
+}
+
+static inline void clear_bit(int nr, volatile void * addr)
+{
+	int	mask;
+	volatile unsigned int *a = addr;
+	unsigned long tmp;
+
+	a += nr >> 5;
+	mask = 1 << (nr & 0x1f);
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%1, %0	! clear_bit		\n\t"
+		"and		%3, %0				\n\t"
+		"movco.l	%0, @%1				\n\t"
+		"bf		1b				\n\t"
+		: "=&z" (tmp), "=r" (a)
+		: "1" (a), "r" (~mask)
+		: "t", "memory"
+	);
+}
+
+static inline void change_bit(int nr, volatile void * addr)
+{
+	int	mask;
+	volatile unsigned int *a = addr;
+	unsigned long tmp;
+
+	a += nr >> 5;
+	mask = 1 << (nr & 0x1f);
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%1, %0	! change_bit		\n\t"
+		"xor		%3, %0				\n\t"
+		"movco.l	%0, @%1				\n\t"
+		"bf		1b				\n\t"
+		: "=&z" (tmp), "=r" (a)
+		: "1" (a), "r" (mask)
+		: "t", "memory"
+	);
+}
+
+static inline int test_and_set_bit(int nr, volatile void * addr)
+{
+	int	mask, retval;
+	volatile unsigned int *a = addr;
+	unsigned long tmp;
+
+	a += nr >> 5;
+	mask = 1 << (nr & 0x1f);
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%1, %0	! test_and_set_bit	\n\t"
+		"mov		%0, %2				\n\t"
+		"or		%4, %0				\n\t"
+		"movco.l	%0, @%1				\n\t"
+		"bf		1b				\n\t"
+		"and		%4, %2				\n\t"
+		: "=&z" (tmp), "=r" (a), "=&r" (retval)
+		: "1" (a), "r" (mask)
+		: "t", "memory"
+	);
+
+	return retval != 0;
+}
+
+static inline int test_and_clear_bit(int nr, volatile void * addr)
+{
+	int	mask, retval;
+	volatile unsigned int *a = addr;
+	unsigned long tmp;
+
+	a += nr >> 5;
+	mask = 1 << (nr & 0x1f);
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%1, %0	! test_and_clear_bit	\n\t"
+		"mov		%0, %2				\n\t"
+		"and		%5, %0				\n\t"
+		"movco.l	%0, @%1				\n\t"
+		"bf		1b				\n\t"
+		"and		%4, %2				\n\t"
+		"synco						\n\t"
+		: "=&z" (tmp), "=r" (a), "=&r" (retval)
+		: "1" (a), "r" (mask), "r" (~mask)
+		: "t", "memory"
+	);
+
+	return retval != 0;
+}
+
+static inline int test_and_change_bit(int nr, volatile void * addr)
+{
+	int	mask, retval;
+	volatile unsigned int *a = addr;
+	unsigned long tmp;
+
+	a += nr >> 5;
+	mask = 1 << (nr & 0x1f);
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%1, %0	! test_and_change_bit	\n\t"
+		"mov		%0, %2				\n\t"
+		"xor		%4, %0				\n\t"
+		"movco.l	%0, @%1				\n\t"
+		"bf		1b				\n\t"
+		"and		%4, %2				\n\t"
+		"synco						\n\t"
+		: "=&z" (tmp), "=r" (a), "=&r" (retval)
+		: "1" (a), "r" (mask)
+		: "t", "memory"
+	);
+
+	return retval != 0;
+}
+
+#endif /* __ASM_SH_BITOPS_LLSC_H */
diff --git a/arch/sh/include/asm/bitops.h b/arch/sh/include/asm/bitops.h
index d7d382f63ee5..367930d8e5ae 100644
--- a/arch/sh/include/asm/bitops.h
+++ b/arch/sh/include/asm/bitops.h
@@ -13,6 +13,8 @@
 
 #ifdef CONFIG_GUSA_RB
 #include <asm/bitops-grb.h>
+#elif defined(CONFIG_CPU_SH4A)
+#include <asm/bitops-llsc.h>
 #else
 #include <asm/bitops-irq.h>
 #endif
-- 
GitLab


From ee43a8442bd7a5d611f11958e6f8c8953d26f907 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 7 Aug 2008 18:01:43 +0900
Subject: [PATCH 054/895] sh: Provide movli.l/movco.l-based cmpxchg.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/cmpxchg-llsc.h | 71 ++++++++++++++++++++++++++++++
 arch/sh/include/asm/system.h       |  2 +
 2 files changed, 73 insertions(+)
 create mode 100644 arch/sh/include/asm/cmpxchg-llsc.h

diff --git a/arch/sh/include/asm/cmpxchg-llsc.h b/arch/sh/include/asm/cmpxchg-llsc.h
new file mode 100644
index 000000000000..aee3bf286581
--- /dev/null
+++ b/arch/sh/include/asm/cmpxchg-llsc.h
@@ -0,0 +1,71 @@
+#ifndef __ASM_SH_CMPXCHG_LLSC_H
+#define __ASM_SH_CMPXCHG_LLSC_H
+
+static inline unsigned long xchg_u32(volatile u32 *m, unsigned long val)
+{
+	unsigned long retval;
+	unsigned long tmp;
+
+	__asm__ __volatile__ (
+		"1:					\n\t"
+		"movli.l	@%1, %0	! xchg_u32	\n\t"
+		"mov		%0, %2			\n\t"
+		"mov		%4, %0			\n\t"
+		"movco.l	%0, @%1			\n\t"
+		"bf		1b			\n\t"
+		"synco					\n\t"
+		: "=&z"(tmp), "=r" (m), "=&r" (retval)
+		: "1" (m), "r" (val)
+		: "t", "memory"
+	);
+
+	return retval;
+}
+
+static inline unsigned long xchg_u8(volatile u8 *m, unsigned long val)
+{
+	unsigned long retval;
+	unsigned long tmp;
+
+	__asm__ __volatile__ (
+		"1:					\n\t"
+		"movli.l	@%1, %0	! xchg_u8	\n\t"
+		"mov		%0, %2			\n\t"
+		"mov		%4, %0			\n\t"
+		"movco.l	%0, @%1			\n\t"
+		"bf		1b			\n\t"
+		"synco					\n\t"
+		: "=&z"(tmp), "=r" (m), "=&r" (retval)
+		: "1" (m), "r" (val & 0xff)
+		: "t", "memory"
+	);
+
+	return retval;
+}
+
+static inline unsigned long
+__cmpxchg_u32(volatile int *m, unsigned long old, unsigned long new)
+{
+	unsigned long retval;
+	unsigned long tmp;
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%1, %0	! __cmpxchg_u32		\n\t"
+		"mov		%0, %2				\n\t"
+		"cmp/eq		%2, %4				\n\t"
+		"bf		2f				\n\t"
+		"mov		%5, %0				\n\t"
+		"2:						\n\t"
+		"movco.l	%0, @%1				\n\t"
+		"bf		1b				\n\t"
+		"synco						\n\t"
+		: "=&z" (tmp), "=r" (m), "=&r" (retval)
+		: "1" (m), "r" (old), "r" (new)
+		: "t", "memory"
+	);
+
+	return retval;
+}
+
+#endif /* __ASM_SH_CMPXCHG_LLSC_H */
diff --git a/arch/sh/include/asm/system.h b/arch/sh/include/asm/system.h
index 056d68cd2108..fbac113bbfbf 100644
--- a/arch/sh/include/asm/system.h
+++ b/arch/sh/include/asm/system.h
@@ -70,6 +70,8 @@
 
 #ifdef CONFIG_GUSA_RB
 #include <asm/cmpxchg-grb.h>
+#elif defined(CONFIG_CPU_SH4A)
+#include <asm/cmpxchg-llsc.h>
 #else
 #include <asm/cmpxchg-irq.h>
 #endif
-- 
GitLab


From 04ec080dcaad7d3c6d0b40b599c8e63da618f784 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 6 Aug 2008 17:29:14 +0900
Subject: [PATCH 055/895] sh: Kill off unused defines from asm/smp.h.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/smp.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/arch/sh/include/asm/smp.h b/arch/sh/include/asm/smp.h
index 593343cd26ee..e2b79e6864e7 100644
--- a/arch/sh/include/asm/smp.h
+++ b/arch/sh/include/asm/smp.h
@@ -21,11 +21,6 @@ extern int __cpu_number_map[NR_CPUS];
 extern int __cpu_logical_map[NR_CPUS];
 #define cpu_logical_map(cpu)  __cpu_logical_map[cpu]
 
-/* I've no idea what the real meaning of this is */
-#define PROC_CHANGE_PENALTY	20
-
-#define NO_PROC_ID	(-1)
-
 #define SMP_MSG_FUNCTION	0
 #define SMP_MSG_RESCHEDULE	1
 #define SMP_MSG_FUNCTION_SINGLE	2
-- 
GitLab


From 173a44dd1f406e9aa6fcf46c83b7c972d10ec930 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 6 Aug 2008 18:02:48 +0900
Subject: [PATCH 056/895] sh: smp: Provide a generic IPI handler.

This provides a generic smp_message_recv() routine (based on the PPC
one), that IPI IRQs can wrap in to.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/smp.h |  1 +
 arch/sh/kernel/smp.c      | 20 +++++++++++++++++++-
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/arch/sh/include/asm/smp.h b/arch/sh/include/asm/smp.h
index e2b79e6864e7..1292c6d3c53e 100644
--- a/arch/sh/include/asm/smp.h
+++ b/arch/sh/include/asm/smp.h
@@ -26,6 +26,7 @@ extern int __cpu_logical_map[NR_CPUS];
 #define SMP_MSG_FUNCTION_SINGLE	2
 #define SMP_MSG_NR		3
 
+void smp_message_recv(unsigned int msg);
 void plat_smp_setup(void);
 void plat_prepare_cpus(unsigned int max_cpus);
 int plat_smp_processor_id(void);
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index 60c50841143e..ebfdd364544f 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -3,7 +3,7 @@
  *
  * SMP support for the SuperH processors.
  *
- * Copyright (C) 2002 - 2007 Paul Mundt
+ * Copyright (C) 2002 - 2008 Paul Mundt
  * Copyright (C) 2006 - 2007 Akio Idehara
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -184,6 +184,24 @@ void arch_send_call_function_single_ipi(int cpu)
 	plat_send_ipi(cpu, SMP_MSG_FUNCTION_SINGLE);
 }
 
+void smp_message_recv(unsigned int msg)
+{
+	switch (msg) {
+	case SMP_MSG_FUNCTION:
+		generic_smp_call_function_interrupt();
+		break;
+	case SMP_MSG_RESCHEDULE:
+		break;
+	case SMP_MSG_FUNCTION_SINGLE:
+		generic_smp_call_function_single_interrupt();
+		break;
+	default:
+		printk(KERN_WARNING "SMP %d: %s(): unknown IPI %d\n",
+		       smp_processor_id(), __func__, msg);
+		break;
+	}
+}
+
 /* Not really SMP stuff ... */
 int setup_profiling_timer(unsigned int multiplier)
 {
-- 
GitLab


From c7936b9abcf5e043e73f183a37e81787f6178dd0 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 6 Aug 2008 18:05:09 +0900
Subject: [PATCH 057/895] sh: smp: Hook in to the generic IPI handler for SH-X3
 SMP.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/smp.h          |  2 -
 arch/sh/kernel/cpu/sh4a/smp-shx3.c | 65 +++++++++++-------------------
 2 files changed, 24 insertions(+), 43 deletions(-)

diff --git a/arch/sh/include/asm/smp.h b/arch/sh/include/asm/smp.h
index 1292c6d3c53e..5ebe0d09559e 100644
--- a/arch/sh/include/asm/smp.h
+++ b/arch/sh/include/asm/smp.h
@@ -32,8 +32,6 @@ void plat_prepare_cpus(unsigned int max_cpus);
 int plat_smp_processor_id(void);
 void plat_start_cpu(unsigned int cpu, unsigned long entry_point);
 void plat_send_ipi(unsigned int cpu, unsigned int message);
-int plat_register_ipi_handler(unsigned int message,
-			      void (*handler)(void *), void *arg);
 extern void arch_send_call_function_single_ipi(int cpu);
 extern void arch_send_call_function_ipi(cpumask_t mask);
 
diff --git a/arch/sh/kernel/cpu/sh4a/smp-shx3.c b/arch/sh/kernel/cpu/sh4a/smp-shx3.c
index e5e06845fa43..edb4da037e0c 100644
--- a/arch/sh/kernel/cpu/sh4a/smp-shx3.c
+++ b/arch/sh/kernel/cpu/sh4a/smp-shx3.c
@@ -1,7 +1,7 @@
 /*
  * SH-X3 SMP
  *
- *  Copyright (C) 2007  Paul Mundt
+ *  Copyright (C) 2007 - 2008  Paul Mundt
  *  Copyright (C) 2007  Magnus Damm
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -14,6 +14,22 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 
+static irqreturn_t ipi_interrupt_handler(int irq, void *arg)
+{
+	unsigned int message = (unsigned int)(long)arg;
+	unsigned int cpu = hard_smp_processor_id();
+	unsigned int offs = 4 * cpu;
+	unsigned int x;
+
+	x = ctrl_inl(0xfe410070 + offs); /* C0INITICI..CnINTICI */
+	x &= (1 << (message << 2));
+	ctrl_outl(x, 0xfe410080 + offs); /* C0INTICICLR..CnINTICICLR */
+
+	smp_message_recv(message);
+
+	return IRQ_HANDLED;
+}
+
 void __init plat_smp_setup(void)
 {
 	unsigned int cpu = 0;
@@ -40,6 +56,13 @@ void __init plat_smp_setup(void)
 
 void __init plat_prepare_cpus(unsigned int max_cpus)
 {
+	int i;
+
+	BUILD_BUG_ON(SMP_MSG_NR >= 8);
+
+	for (i = 0; i < SMP_MSG_NR; i++)
+		request_irq(104 + i, ipi_interrupt_handler, IRQF_DISABLED,
+			    "IPI", (void *)(long)i);
 }
 
 #define STBCR_REG(phys_id) (0xfe400004 | (phys_id << 12))
@@ -75,46 +98,6 @@ void plat_send_ipi(unsigned int cpu, unsigned int message)
 	unsigned long addr = 0xfe410070 + (cpu * 4);
 
 	BUG_ON(cpu >= 4);
-	BUG_ON(message >= SMP_MSG_NR);
 
 	ctrl_outl(1 << (message << 2), addr); /* C0INTICI..CnINTICI */
 }
-
-struct ipi_data {
-	void (*handler)(void *);
-	void *arg;
-	unsigned int message;
-};
-
-static irqreturn_t ipi_interrupt_handler(int irq, void *arg)
-{
-	struct ipi_data *id = arg;
-	unsigned int cpu = hard_smp_processor_id();
-	unsigned int offs = 4 * cpu;
-	unsigned int x;
-
-	x = ctrl_inl(0xfe410070 + offs); /* C0INITICI..CnINTICI */
-	x &= (1 << (id->message << 2));
-	ctrl_outl(x, 0xfe410080 + offs); /* C0INTICICLR..CnINTICICLR */
-
-	id->handler(id->arg);
-
-	return IRQ_HANDLED;
-}
-
-static struct ipi_data ipi_handlers[SMP_MSG_NR];
-
-int plat_register_ipi_handler(unsigned int message,
-			      void (*handler)(void *), void *arg)
-{
-	struct ipi_data *id = &ipi_handlers[message];
-
-	BUG_ON(SMP_MSG_NR >= 8);
-	BUG_ON(message >= SMP_MSG_NR);
-
-	id->handler = handler;
-	id->arg = arg;
-	id->message = message;
-
-	return request_irq(104 + message, ipi_interrupt_handler, 0, "IPI", id);
-}
-- 
GitLab


From 6f52707e6882eb3bc6920c3f59beb05d23d68354 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 6 Aug 2008 18:21:03 +0900
Subject: [PATCH 058/895] sh: smp: Hook up a timer IPI stub.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/smp.h | 19 +++++++++++++------
 arch/sh/kernel/smp.c      | 18 ++++++++++++++++++
 2 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/arch/sh/include/asm/smp.h b/arch/sh/include/asm/smp.h
index 5ebe0d09559e..9d22cda67c29 100644
--- a/arch/sh/include/asm/smp.h
+++ b/arch/sh/include/asm/smp.h
@@ -21,19 +21,26 @@ extern int __cpu_number_map[NR_CPUS];
 extern int __cpu_logical_map[NR_CPUS];
 #define cpu_logical_map(cpu)  __cpu_logical_map[cpu]
 
-#define SMP_MSG_FUNCTION	0
-#define SMP_MSG_RESCHEDULE	1
-#define SMP_MSG_FUNCTION_SINGLE	2
-#define SMP_MSG_NR		3
+enum {
+	SMP_MSG_FUNCTION,
+	SMP_MSG_RESCHEDULE,
+	SMP_MSG_FUNCTION_SINGLE,
+	SMP_MSG_TIMER,
+
+	SMP_MSG_NR,	/* must be last */
+};
 
 void smp_message_recv(unsigned int msg);
+void smp_timer_broadcast(cpumask_t mask);
+
 void plat_smp_setup(void);
 void plat_prepare_cpus(unsigned int max_cpus);
 int plat_smp_processor_id(void);
 void plat_start_cpu(unsigned int cpu, unsigned long entry_point);
 void plat_send_ipi(unsigned int cpu, unsigned int message);
-extern void arch_send_call_function_single_ipi(int cpu);
-extern void arch_send_call_function_ipi(cpumask_t mask);
+
+void arch_send_call_function_single_ipi(int cpu);
+void arch_send_call_function_ipi(cpumask_t mask);
 
 #else
 
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index ebfdd364544f..9cb3734dbd49 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -184,6 +184,21 @@ void arch_send_call_function_single_ipi(int cpu)
 	plat_send_ipi(cpu, SMP_MSG_FUNCTION_SINGLE);
 }
 
+void smp_timer_broadcast(cpumask_t mask)
+{
+	int cpu;
+
+	for_each_cpu_mask(cpu, mask)
+		plat_send_ipi(cpu, SMP_MSG_TIMER);
+}
+
+static void ipi_timer(void)
+{
+	irq_enter();
+	/* XXX ... */
+	irq_exit();
+}
+
 void smp_message_recv(unsigned int msg)
 {
 	switch (msg) {
@@ -195,6 +210,9 @@ void smp_message_recv(unsigned int msg)
 	case SMP_MSG_FUNCTION_SINGLE:
 		generic_smp_call_function_single_interrupt();
 		break;
+	case SMP_MSG_TIMER:
+		ipi_timer();
+		break;
 	default:
 		printk(KERN_WARNING "SMP %d: %s(): unknown IPI %d\n",
 		       smp_processor_id(), __func__, msg);
-- 
GitLab


From 8c24594deab89a484879bee270e948f0a556ed75 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 6 Aug 2008 18:37:07 +0900
Subject: [PATCH 059/895] sh: generic clockevent broadcast support.

This hooks up GENERIC_CLOCKEVENTS_BROADCAST and a dummy local timer,
which we call in to from the timer IPI when no other local timer is
provided.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/Kconfig                         |  4 ++
 arch/sh/include/asm/smp.h               |  3 ++
 arch/sh/kernel/smp.c                    |  7 ++-
 arch/sh/kernel/time_32.c                |  9 +++-
 arch/sh/kernel/timers/Makefile          |  1 +
 arch/sh/kernel/timers/timer-broadcast.c | 57 +++++++++++++++++++++++++
 6 files changed, 77 insertions(+), 4 deletions(-)
 create mode 100644 arch/sh/kernel/timers/timer-broadcast.c

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 5131d50f851a..399664c1f386 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -66,6 +66,9 @@ config GENERIC_TIME
 config GENERIC_CLOCKEVENTS
 	def_bool n
 
+config GENERIC_CLOCKEVENTS_BROADCAST
+	bool
+
 config GENERIC_LOCKBREAK
 	def_bool y
 	depends on SMP && PREEMPT
@@ -323,6 +326,7 @@ config CPU_SUBTYPE_SHX3
 	select ARCH_SPARSEMEM_ENABLE
 	select SYS_SUPPORTS_NUMA
 	select SYS_SUPPORTS_SMP
+	select GENERIC_CLOCKEVENTS_BROADCAST
 
 # SH4AL-DSP Processor Support
 
diff --git a/arch/sh/include/asm/smp.h b/arch/sh/include/asm/smp.h
index 9d22cda67c29..85b660c17eb0 100644
--- a/arch/sh/include/asm/smp.h
+++ b/arch/sh/include/asm/smp.h
@@ -33,6 +33,9 @@ enum {
 void smp_message_recv(unsigned int msg);
 void smp_timer_broadcast(cpumask_t mask);
 
+void local_timer_interrupt(void);
+void local_timer_setup(unsigned int cpu);
+
 void plat_smp_setup(void);
 void plat_prepare_cpus(unsigned int max_cpus);
 int plat_smp_processor_id(void);
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index 9cb3734dbd49..c55d314166c7 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -84,9 +84,12 @@ asmlinkage void __cpuinit start_secondary(void)
 
 	local_irq_enable();
 
+	cpu = smp_processor_id();
+
+	/* Enable local timers */
+	local_timer_setup(cpu);
 	calibrate_delay();
 
-	cpu = smp_processor_id();
 	smp_store_cpu_info(cpu);
 
 	cpu_set(cpu, cpu_online_map);
@@ -195,7 +198,7 @@ void smp_timer_broadcast(cpumask_t mask)
 static void ipi_timer(void)
 {
 	irq_enter();
-	/* XXX ... */
+	local_timer_interrupt();
 	irq_exit();
 }
 
diff --git a/arch/sh/kernel/time_32.c b/arch/sh/kernel/time_32.c
index 0758b5ee8180..decee0a453ab 100644
--- a/arch/sh/kernel/time_32.c
+++ b/arch/sh/kernel/time_32.c
@@ -1,9 +1,9 @@
 /*
- *  arch/sh/kernel/time.c
+ *  arch/sh/kernel/time_32.c
  *
  *  Copyright (C) 1999  Tetsuya Okada & Niibe Yutaka
  *  Copyright (C) 2000  Philipp Rumpf <prumpf@tux.org>
- *  Copyright (C) 2002 - 2007  Paul Mundt
+ *  Copyright (C) 2002 - 2008  Paul Mundt
  *  Copyright (C) 2002  M. R. Brown  <mrbrown@linux-sh.org>
  *
  *  Some code taken from i386 version.
@@ -16,6 +16,7 @@
 #include <linux/timex.h>
 #include <linux/sched.h>
 #include <linux/clockchips.h>
+#include <linux/smp.h>
 #include <asm/clock.h>
 #include <asm/rtc.h>
 #include <asm/timer.h>
@@ -260,6 +261,10 @@ void __init time_init(void)
 	sys_timer = get_sys_timer();
 	printk(KERN_INFO "Using %s for system timer\n", sys_timer->name);
 
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+	local_timer_setup(smp_processor_id());
+#endif
+
 	if (sys_timer->ops->read)
 		clocksource_sh.read = sys_timer->ops->read;
 
diff --git a/arch/sh/kernel/timers/Makefile b/arch/sh/kernel/timers/Makefile
index bcf244ff6a12..0b7f8577193f 100644
--- a/arch/sh/kernel/timers/Makefile
+++ b/arch/sh/kernel/timers/Makefile
@@ -8,3 +8,4 @@ obj-$(CONFIG_SH_TMU)		+= timer-tmu.o
 obj-$(CONFIG_SH_MTU2)		+= timer-mtu2.o
 obj-$(CONFIG_SH_CMT)		+= timer-cmt.o
 
+obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST)	+= timer-broadcast.o
diff --git a/arch/sh/kernel/timers/timer-broadcast.c b/arch/sh/kernel/timers/timer-broadcast.c
new file mode 100644
index 000000000000..c2317635230f
--- /dev/null
+++ b/arch/sh/kernel/timers/timer-broadcast.c
@@ -0,0 +1,57 @@
+/*
+ * Dummy local timer
+ *
+ * Copyright (C) 2008  Paul Mundt
+ *
+ * cloned from:
+ *
+ *  linux/arch/arm/mach-realview/localtimer.c
+ *
+ *  Copyright (C) 2002 ARM Ltd.
+ *  All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/smp.h>
+#include <linux/jiffies.h>
+#include <linux/percpu.h>
+#include <linux/clockchips.h>
+#include <linux/irq.h>
+
+static DEFINE_PER_CPU(struct clock_event_device, local_clockevent);
+
+/*
+ * Used on SMP for either the local timer or SMP_MSG_TIMER
+ */
+void local_timer_interrupt(void)
+{
+	struct clock_event_device *clk = &__get_cpu_var(local_clockevent);
+
+	clk->event_handler(clk);
+}
+
+static void dummy_timer_set_mode(enum clock_event_mode mode,
+				 struct clock_event_device *clk)
+{
+}
+
+void __cpuinit local_timer_setup(unsigned int cpu)
+{
+	struct clock_event_device *clk = &per_cpu(local_clockevent, cpu);
+
+	clk->name		= "dummy_timer";
+	clk->features		= CLOCK_EVT_FEAT_DUMMY;
+	clk->rating		= 200;
+	clk->mult		= 1;
+	clk->set_mode		= dummy_timer_set_mode;
+	clk->broadcast		= smp_timer_broadcast;
+	clk->cpumask		= cpumask_of_cpu(cpu);
+
+	clockevents_register_device(clk);
+}
-- 
GitLab


From 71f0bdcab69ab36b1e939d36063aaf6c4a164ed3 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 6 Aug 2008 18:39:32 +0900
Subject: [PATCH 060/895] sh: smp: shove a cpu_relax() in the plat_start_cpu()
 busy loop.

Without this, certain versions of GCC will happily optimize the entire
loop out.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/cpu/sh4a/smp-shx3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/sh/kernel/cpu/sh4a/smp-shx3.c b/arch/sh/kernel/cpu/sh4a/smp-shx3.c
index edb4da037e0c..b8869aa20dec 100644
--- a/arch/sh/kernel/cpu/sh4a/smp-shx3.c
+++ b/arch/sh/kernel/cpu/sh4a/smp-shx3.c
@@ -82,7 +82,7 @@ void plat_start_cpu(unsigned int cpu, unsigned long entry_point)
 		ctrl_outl(STBCR_MSTP, STBCR_REG(cpu));
 
 	while (!(ctrl_inl(STBCR_REG(cpu)) & STBCR_MSTP))
-		;
+		cpu_relax();
 
 	/* Start up secondary processor by sending a reset */
 	ctrl_outl(STBCR_AP_VAL, STBCR_REG(cpu));
-- 
GitLab


From 53c01d2dc38cd3cfaf5591ec5c6c9c4e437cfec2 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 8 Aug 2008 01:18:48 +0900
Subject: [PATCH 061/895] sh: Early dummy clockevent registration on boot CPU.

The dummy timer needs to be registered on the boot CPU before the
system timer clockevent is registered, or broadcasting doesn't work
as advertized.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/time_32.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/sh/kernel/time_32.c b/arch/sh/kernel/time_32.c
index decee0a453ab..e2f74cc71d8c 100644
--- a/arch/sh/kernel/time_32.c
+++ b/arch/sh/kernel/time_32.c
@@ -254,6 +254,10 @@ void __init time_init(void)
 	set_normalized_timespec(&wall_to_monotonic,
 				-xtime.tv_sec, -xtime.tv_nsec);
 
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+	local_timer_setup(smp_processor_id());
+#endif
+
 	/*
 	 * Find the timer to use as the system timer, it will be
 	 * initialized for us.
@@ -261,9 +265,6 @@ void __init time_init(void)
 	sys_timer = get_sys_timer();
 	printk(KERN_INFO "Using %s for system timer\n", sys_timer->name);
 
-#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
-	local_timer_setup(smp_processor_id());
-#endif
 
 	if (sys_timer->ops->read)
 		clocksource_sh.read = sys_timer->ops->read;
-- 
GitLab


From 7d96169cb769f459dd6730b06fa3a88cb0c9297d Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 8 Aug 2008 01:23:34 +0900
Subject: [PATCH 062/895] sh: Display CPU information in show_regs().

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/process_32.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index 3326a45749d9..7326313bcfdb 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -111,15 +111,21 @@ void show_regs(struct pt_regs * regs)
 {
 	printk("\n");
 	printk("Pid : %d, Comm: %20s\n", task_pid_nr(current), current->comm);
+	printk("CPU : %d    %s  (%s %.*s)\n",
+	       smp_processor_id(), print_tainted(), init_utsname()->release,
+	       (int)strcspn(init_utsname()->version, " "),
+	       init_utsname()->version);
+
 	print_symbol("PC is at %s\n", instruction_pointer(regs));
+	print_symbol("PR is at %s\n", regs->pr);
+
 	printk("PC  : %08lx SP  : %08lx SR  : %08lx ",
 	       regs->pc, regs->regs[15], regs->sr);
 #ifdef CONFIG_MMU
-	printk("TEA : %08x    ", ctrl_inl(MMU_TEA));
+	printk("TEA : %08x\n", ctrl_inl(MMU_TEA));
 #else
-	printk("                  ");
+	printk("\n");
 #endif
-	printk("%s\n", print_tainted());
 
 	printk("R0  : %08lx R1  : %08lx R2  : %08lx R3  : %08lx\n",
 	       regs->regs[0],regs->regs[1],
-- 
GitLab


From fa43972fab24a3c050e880a7831f9378c6cebc0b Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 4 Sep 2008 18:53:58 +0900
Subject: [PATCH 063/895] sh: fixup many sparse errors.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/boot/compressed/misc_32.c  |  2 +-
 arch/sh/include/asm/clock.h        |  1 +
 arch/sh/include/asm/io.h           |  4 +-
 arch/sh/include/asm/irq.h          |  3 ++
 arch/sh/include/asm/processor.h    |  4 ++
 arch/sh/include/asm/processor_32.h |  3 ++
 arch/sh/include/asm/processor_64.h |  2 -
 arch/sh/include/asm/rtc.h          |  1 +
 arch/sh/include/asm/setup.h        |  2 +
 arch/sh/include/asm/syscalls.h     | 25 +++++++++++
 arch/sh/include/asm/syscalls_32.h  | 56 ++++++++++++++++++++++++
 arch/sh/include/asm/syscalls_64.h  | 34 +++++++++++++++
 arch/sh/include/asm/system.h       |  6 ++-
 arch/sh/include/asm/system_32.h    | 16 +++++++
 arch/sh/kernel/cpu/clock.c         |  5 ++-
 arch/sh/kernel/io_generic.c        |  6 +--
 arch/sh/kernel/machvec.c           |  1 +
 arch/sh/kernel/process_32.c        |  1 +
 arch/sh/kernel/process_64.c        |  1 +
 arch/sh/kernel/ptrace_32.c         |  6 ++-
 arch/sh/kernel/ptrace_64.c         |  1 +
 arch/sh/kernel/setup.c             |  1 +
 arch/sh/kernel/signal_32.c         | 10 ++---
 arch/sh/kernel/sys_sh.c            |  7 +--
 arch/sh/kernel/sys_sh32.c          |  5 ++-
 arch/sh/kernel/time_32.c           |  1 +
 arch/sh/kernel/timers/timer-cmt.c  |  2 +-
 arch/sh/kernel/traps_32.c          | 68 +++++++++++++++---------------
 arch/sh/lib/div64-generic.c        |  1 +
 arch/sh/mm/consistent.c            |  2 +-
 arch/sh/mm/pg-nommu.c              |  1 +
 arch/sh/mm/tlb-nommu.c             |  1 +
 drivers/serial/sh-sci.c            |  2 +-
 33 files changed, 220 insertions(+), 61 deletions(-)
 create mode 100644 arch/sh/include/asm/syscalls.h
 create mode 100644 arch/sh/include/asm/syscalls_32.h
 create mode 100644 arch/sh/include/asm/syscalls_64.h

diff --git a/arch/sh/boot/compressed/misc_32.c b/arch/sh/boot/compressed/misc_32.c
index f386997e4d9c..efdba6b29572 100644
--- a/arch/sh/boot/compressed/misc_32.c
+++ b/arch/sh/boot/compressed/misc_32.c
@@ -191,7 +191,7 @@ long* stack_start = &user_stack[STACK_SIZE];
 
 void decompress_kernel(void)
 {
-	output_data = 0;
+	output_data = NULL;
 	output_ptr = PHYSADDR((unsigned long)&_text+PAGE_SIZE);
 #ifdef CONFIG_29BIT
 	output_ptr |= P2SEG;
diff --git a/arch/sh/include/asm/clock.h b/arch/sh/include/asm/clock.h
index 720dfab7b15e..f9c88583d90a 100644
--- a/arch/sh/include/asm/clock.h
+++ b/arch/sh/include/asm/clock.h
@@ -39,6 +39,7 @@ struct clk {
 
 /* Should be defined by processor-specific code */
 void arch_init_clk_ops(struct clk_ops **, int type);
+int __init arch_clk_init(void);
 
 /* arch/sh/kernel/cpu/clock.c */
 int clk_init(void);
diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h
index a4fbf0c84fb1..e49cfee95fb2 100644
--- a/arch/sh/include/asm/io.h
+++ b/arch/sh/include/asm/io.h
@@ -194,6 +194,8 @@ __BUILD_MEMORY_STRING(w, u16)
 
 #define IO_SPACE_LIMIT 0xffffffff
 
+extern unsigned long generic_io_base;
+
 /*
  * This function provides a method for the generic case where a board-specific
  * ioport_map simply needs to return the port + some arbitrary port base.
@@ -203,8 +205,6 @@ __BUILD_MEMORY_STRING(w, u16)
  */
 static inline void __set_io_port_base(unsigned long pbase)
 {
-	extern unsigned long generic_io_base;
-
 	generic_io_base = pbase;
 }
 
diff --git a/arch/sh/include/asm/irq.h b/arch/sh/include/asm/irq.h
index 6195a531c1b0..d319baaf4fbd 100644
--- a/arch/sh/include/asm/irq.h
+++ b/arch/sh/include/asm/irq.h
@@ -41,6 +41,9 @@ static inline int generic_irq_demux(int irq)
 #define irq_canonicalize(irq)	(irq)
 #define irq_demux(irq)		sh_mv.mv_irq_demux(irq)
 
+void init_IRQ(void);
+asmlinkage int do_IRQ(unsigned int irq, struct pt_regs *regs);
+
 #ifdef CONFIG_IRQSTACKS
 extern void irq_ctx_init(int cpu);
 extern void irq_ctx_exit(int cpu);
diff --git a/arch/sh/include/asm/processor.h b/arch/sh/include/asm/processor.h
index 15d9f92ca383..58e2be55ab93 100644
--- a/arch/sh/include/asm/processor.h
+++ b/arch/sh/include/asm/processor.h
@@ -45,9 +45,13 @@ enum cpu_type {
 
 /* Forward decl */
 struct sh_cpuinfo;
+struct seq_operations;
+
+extern struct pt_regs fake_swapper_regs;
 
 /* arch/sh/kernel/setup.c */
 const char *get_cpu_subtype(struct sh_cpuinfo *c);
+extern const struct seq_operations cpuinfo_op;
 
 #ifdef CONFIG_VSYSCALL
 int vsyscall_init(void);
diff --git a/arch/sh/include/asm/processor_32.h b/arch/sh/include/asm/processor_32.h
index 0dadd75bd93c..41d23210583c 100644
--- a/arch/sh/include/asm/processor_32.h
+++ b/arch/sh/include/asm/processor_32.h
@@ -10,6 +10,7 @@
 #ifdef __KERNEL__
 
 #include <linux/compiler.h>
+#include <linux/linkage.h>
 #include <asm/page.h>
 #include <asm/types.h>
 #include <asm/cache.h>
@@ -44,6 +45,8 @@ extern struct sh_cpuinfo cpu_data[];
 #define current_cpu_data cpu_data[smp_processor_id()]
 #define raw_current_cpu_data cpu_data[raw_smp_processor_id()]
 
+asmlinkage void __init sh_cpu_init(void);
+
 /*
  * User space process size: 2GB.
  *
diff --git a/arch/sh/include/asm/processor_64.h b/arch/sh/include/asm/processor_64.h
index 770d5169983b..16609bc11627 100644
--- a/arch/sh/include/asm/processor_64.h
+++ b/arch/sh/include/asm/processor_64.h
@@ -169,8 +169,6 @@ struct thread_struct {
 #define INIT_MMAP \
 { &init_mm, 0, 0, NULL, PAGE_SHARED, VM_READ | VM_WRITE | VM_EXEC, 1, NULL, NULL }
 
-extern  struct pt_regs fake_swapper_regs;
-
 #define INIT_THREAD  {				\
 	.sp		= sizeof(init_stack) +	\
 			  (long) &init_stack,	\
diff --git a/arch/sh/include/asm/rtc.h b/arch/sh/include/asm/rtc.h
index 1813f4202a24..f7b010d48af7 100644
--- a/arch/sh/include/asm/rtc.h
+++ b/arch/sh/include/asm/rtc.h
@@ -1,6 +1,7 @@
 #ifndef _ASM_RTC_H
 #define _ASM_RTC_H
 
+void time_init(void);
 extern void (*board_time_init)(void);
 extern void (*rtc_sh_get_time)(struct timespec *);
 extern int (*rtc_sh_set_time)(const time_t);
diff --git a/arch/sh/include/asm/setup.h b/arch/sh/include/asm/setup.h
index 55a2bd328d99..554f865075ca 100644
--- a/arch/sh/include/asm/setup.h
+++ b/arch/sh/include/asm/setup.h
@@ -1,6 +1,8 @@
 #ifndef _SH_SETUP_H
 #define _SH_SETUP_H
 
+#include <asm/mmzone.h>
+
 #define COMMAND_LINE_SIZE 256
 
 #ifdef __KERNEL__
diff --git a/arch/sh/include/asm/syscalls.h b/arch/sh/include/asm/syscalls.h
new file mode 100644
index 000000000000..c1e2b8deb837
--- /dev/null
+++ b/arch/sh/include/asm/syscalls.h
@@ -0,0 +1,25 @@
+#ifndef __ASM_SH_SYSCALLS_H
+#define __ASM_SH_SYSCALLS_H
+
+#ifdef __KERNEL__
+
+struct old_utsname;
+
+asmlinkage int old_mmap(unsigned long addr, unsigned long len,
+			unsigned long prot, unsigned long flags,
+			int fd, unsigned long off);
+asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
+			  unsigned long prot, unsigned long flags,
+			  unsigned long fd, unsigned long pgoff);
+asmlinkage int sys_ipc(uint call, int first, int second,
+		       int third, void __user *ptr, long fifth);
+asmlinkage int sys_uname(struct old_utsname __user *name);
+
+#ifdef CONFIG_SUPERH32
+# include "syscalls_32.h"
+#else
+# include "syscalls_64.h"
+#endif
+
+#endif /* __KERNEL__ */
+#endif /* __ASM_SH_SYSCALLS_H */
diff --git a/arch/sh/include/asm/syscalls_32.h b/arch/sh/include/asm/syscalls_32.h
new file mode 100644
index 000000000000..104c5e686106
--- /dev/null
+++ b/arch/sh/include/asm/syscalls_32.h
@@ -0,0 +1,56 @@
+#ifndef __ASM_SH_SYSCALLS_32_H
+#define __ASM_SH_SYSCALLS_32_H
+
+#ifdef __KERNEL__
+
+#include <linux/compiler.h>
+#include <linux/linkage.h>
+#include <linux/types.h>
+
+struct pt_regs;
+
+asmlinkage int sys_fork(unsigned long r4, unsigned long r5,
+			unsigned long r6, unsigned long r7,
+			struct pt_regs __regs);
+asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
+			 unsigned long parent_tidptr,
+			 unsigned long child_tidptr,
+			 struct pt_regs __regs);
+asmlinkage int sys_vfork(unsigned long r4, unsigned long r5,
+			 unsigned long r6, unsigned long r7,
+			 struct pt_regs __regs);
+asmlinkage int sys_execve(char __user *ufilename, char __user * __user *uargv,
+			  char __user * __user *uenvp, unsigned long r7,
+			  struct pt_regs __regs);
+asmlinkage int sys_sigsuspend(old_sigset_t mask, unsigned long r5,
+			      unsigned long r6, unsigned long r7,
+			      struct pt_regs __regs);
+asmlinkage int sys_sigaction(int sig, const struct old_sigaction __user *act,
+			     struct old_sigaction __user *oact);
+asmlinkage int sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
+			       unsigned long r6, unsigned long r7,
+			       struct pt_regs __regs);
+asmlinkage int sys_sigreturn(unsigned long r4, unsigned long r5,
+			     unsigned long r6, unsigned long r7,
+			     struct pt_regs __regs);
+asmlinkage int sys_rt_sigreturn(unsigned long r4, unsigned long r5,
+				unsigned long r6, unsigned long r7,
+				struct pt_regs __regs);
+asmlinkage int sys_pipe(unsigned long r4, unsigned long r5,
+			unsigned long r6, unsigned long r7,
+			struct pt_regs __regs);
+asmlinkage ssize_t sys_pread_wrapper(unsigned int fd, char __user *buf,
+				     size_t count, long dummy, loff_t pos);
+asmlinkage ssize_t sys_pwrite_wrapper(unsigned int fd, const char __user *buf,
+				      size_t count, long dummy, loff_t pos);
+asmlinkage int sys_fadvise64_64_wrapper(int fd, u32 offset0, u32 offset1,
+					u32 len0, u32 len1, int advice);
+
+/* Misc syscall related bits */
+asmlinkage long do_syscall_trace_enter(struct pt_regs *regs);
+asmlinkage void do_syscall_trace_leave(struct pt_regs *regs);
+asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned int save_r0,
+				 unsigned long thread_info_flags);
+
+#endif /* __KERNEL__ */
+#endif /* __ASM_SH_SYSCALLS_32_H */
diff --git a/arch/sh/include/asm/syscalls_64.h b/arch/sh/include/asm/syscalls_64.h
new file mode 100644
index 000000000000..751fd8811364
--- /dev/null
+++ b/arch/sh/include/asm/syscalls_64.h
@@ -0,0 +1,34 @@
+#ifndef __ASM_SH_SYSCALLS_64_H
+#define __ASM_SH_SYSCALLS_64_H
+
+#ifdef __KERNEL__
+
+#include <linux/compiler.h>
+#include <linux/linkage.h>
+#include <linux/types.h>
+
+struct pt_regs;
+
+asmlinkage int sys_fork(unsigned long r2, unsigned long r3,
+			unsigned long r4, unsigned long r5,
+			unsigned long r6, unsigned long r7,
+			struct pt_regs *pregs);
+asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
+			 unsigned long r4, unsigned long r5,
+			 unsigned long r6, unsigned long r7,
+			 struct pt_regs *pregs);
+asmlinkage int sys_vfork(unsigned long r2, unsigned long r3,
+			 unsigned long r4, unsigned long r5,
+			 unsigned long r6, unsigned long r7,
+			 struct pt_regs *pregs);
+asmlinkage int sys_execve(char *ufilename, char **uargv,
+			  char **uenvp, unsigned long r5,
+			  unsigned long r6, unsigned long r7,
+			  struct pt_regs *pregs);
+
+/* Misc syscall related bits */
+asmlinkage long long do_syscall_trace_enter(struct pt_regs *regs);
+asmlinkage void do_syscall_trace_leave(struct pt_regs *regs);
+
+#endif /* __KERNEL__ */
+#endif /* __ASM_SH_SYSCALLS_64_H */
diff --git a/arch/sh/include/asm/system.h b/arch/sh/include/asm/system.h
index fbac113bbfbf..6160fe445161 100644
--- a/arch/sh/include/asm/system.h
+++ b/arch/sh/include/asm/system.h
@@ -127,6 +127,8 @@ static inline unsigned long __cmpxchg(volatile void * ptr, unsigned long old,
   })
 
 extern void die(const char *str, struct pt_regs *regs, long err) __attribute__ ((noreturn));
+void free_initmem(void);
+void free_initrd_mem(unsigned long start, unsigned long end);
 
 extern void *set_exception_table_vec(unsigned int vec, void *handler);
 
@@ -179,8 +181,8 @@ BUILD_TRAP_HANDLER(fpu_state_restore);
 #define arch_align_stack(x) (x)
 
 struct mem_access {
-	unsigned long (*from)(void *dst, const void *src, unsigned long cnt);
-	unsigned long (*to)(void *dst, const void *src, unsigned long cnt);
+	unsigned long (*from)(void *dst, const void __user *src, unsigned long cnt);
+	unsigned long (*to)(void __user *dst, const void *src, unsigned long cnt);
 };
 
 #ifdef CONFIG_SUPERH32
diff --git a/arch/sh/include/asm/system_32.h b/arch/sh/include/asm/system_32.h
index f11bcf0855ed..16509ed2bb60 100644
--- a/arch/sh/include/asm/system_32.h
+++ b/arch/sh/include/asm/system_32.h
@@ -99,4 +99,20 @@ do {							\
 int handle_unaligned_access(opcode_t instruction, struct pt_regs *regs,
 			    struct mem_access *ma);
 
+asmlinkage void do_address_error(struct pt_regs *regs,
+				 unsigned long writeaccess,
+				 unsigned long address);
+asmlinkage void do_divide_error(unsigned long r4, unsigned long r5,
+				unsigned long r6, unsigned long r7,
+				struct pt_regs __regs);
+asmlinkage void do_reserved_inst(unsigned long r4, unsigned long r5,
+				unsigned long r6, unsigned long r7,
+				struct pt_regs __regs);
+asmlinkage void do_illegal_slot_inst(unsigned long r4, unsigned long r5,
+				unsigned long r6, unsigned long r7,
+				struct pt_regs __regs);
+asmlinkage void do_exception_error(unsigned long r4, unsigned long r5,
+				   unsigned long r6, unsigned long r7,
+				   struct pt_regs __regs);
+
 #endif /* __ASM_SH_SYSTEM_32_H */
diff --git a/arch/sh/kernel/cpu/clock.c b/arch/sh/kernel/cpu/clock.c
index f5eb56e6bc59..b7e46d5bba43 100644
--- a/arch/sh/kernel/cpu/clock.c
+++ b/arch/sh/kernel/cpu/clock.c
@@ -294,9 +294,10 @@ arch_init_clk_ops(struct clk_ops **ops, int type)
 {
 }
 
-void __init __attribute__ ((weak))
+int __init __attribute__ ((weak))
 arch_clk_init(void)
 {
+	return 0;
 }
 
 static int show_clocks(char *buf, char **start, off_t off,
@@ -331,7 +332,7 @@ int __init clk_init(void)
 		ret |= clk_register(clk);
 	}
 
-	arch_clk_init();
+	ret |= arch_clk_init();
 
 	/* Kick the child clocks.. */
 	propagate_rate(&master_clk);
diff --git a/arch/sh/kernel/io_generic.c b/arch/sh/kernel/io_generic.c
index db769449f5a7..f1b214d3bce3 100644
--- a/arch/sh/kernel/io_generic.c
+++ b/arch/sh/kernel/io_generic.c
@@ -81,7 +81,7 @@ void generic_insb(unsigned long port, void *dst, unsigned long count)
 	volatile u8 *port_addr;
 	u8 *buf = dst;
 
-	port_addr = (volatile u8 *)__ioport_map(port, 1);
+	port_addr = (volatile u8 __force *)__ioport_map(port, 1);
 	while (count--)
 		*buf++ = *port_addr;
 }
@@ -91,7 +91,7 @@ void generic_insw(unsigned long port, void *dst, unsigned long count)
 	volatile u16 *port_addr;
 	u16 *buf = dst;
 
-	port_addr = (volatile u16 *)__ioport_map(port, 2);
+	port_addr = (volatile u16 __force *)__ioport_map(port, 2);
 	while (count--)
 		*buf++ = *port_addr;
 
@@ -103,7 +103,7 @@ void generic_insl(unsigned long port, void *dst, unsigned long count)
 	volatile u32 *port_addr;
 	u32 *buf = dst;
 
-	port_addr = (volatile u32 *)__ioport_map(port, 4);
+	port_addr = (volatile u32 __force *)__ioport_map(port, 4);
 	while (count--)
 		*buf++ = *port_addr;
 
diff --git a/arch/sh/kernel/machvec.c b/arch/sh/kernel/machvec.c
index 129b2cfd18a8..8bfdd275e940 100644
--- a/arch/sh/kernel/machvec.c
+++ b/arch/sh/kernel/machvec.c
@@ -14,6 +14,7 @@
 #include <linux/string.h>
 #include <asm/machvec.h>
 #include <asm/sections.h>
+#include <asm/setup.h>
 #include <asm/io.h>
 #include <asm/irq.h>
 
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index 7326313bcfdb..914e543102df 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -26,6 +26,7 @@
 #include <asm/system.h>
 #include <asm/ubc.h>
 #include <asm/fpu.h>
+#include <asm/syscalls.h>
 
 static int hlt_counter;
 int ubc_usercnt = 0;
diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c
index b9dbd2d3b4a5..d0dddc438c0c 100644
--- a/arch/sh/kernel/process_64.c
+++ b/arch/sh/kernel/process_64.c
@@ -25,6 +25,7 @@
 #include <linux/module.h>
 #include <linux/proc_fs.h>
 #include <linux/io.h>
+#include <asm/syscalls.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/mmu_context.h>
diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c
index 035cb300d3dc..84bf3420597c 100644
--- a/arch/sh/kernel/ptrace_32.c
+++ b/arch/sh/kernel/ptrace_32.c
@@ -27,6 +27,7 @@
 #include <asm/system.h>
 #include <asm/processor.h>
 #include <asm/mmu_context.h>
+#include <asm/syscalls.h>
 
 /*
  * does not yet catch signals sent when the child dies.
@@ -105,6 +106,7 @@ void ptrace_disable(struct task_struct *child)
 long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 {
 	struct user * dummy = NULL;
+	unsigned long __user *datap = (unsigned long __user *)data;
 	int ret;
 
 	switch (request) {
@@ -133,7 +135,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 			tmp = !!tsk_used_math(child);
 		else
 			tmp = 0;
-		ret = put_user(tmp, (unsigned long __user *)data);
+		ret = put_user(tmp, datap);
 		break;
 	}
 
@@ -202,7 +204,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		}
 
 		ret = 0;
-		if (put_user(tmp, (unsigned long *) data)) {
+		if (put_user(tmp, datap)) {
 			ret = -EFAULT;
 			break;
 		}
diff --git a/arch/sh/kernel/ptrace_64.c b/arch/sh/kernel/ptrace_64.c
index 9c6424892bd3..e15b099c1f06 100644
--- a/arch/sh/kernel/ptrace_64.c
+++ b/arch/sh/kernel/ptrace_64.c
@@ -35,6 +35,7 @@
 #include <asm/system.h>
 #include <asm/processor.h>
 #include <asm/mmu_context.h>
+#include <asm/syscalls.h>
 #include <asm/fpu.h>
 
 /* This mask defines the bits of the SR which the user is not allowed to
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index de832056bf1b..6d0899e890d0 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -26,6 +26,7 @@
 #include <linux/err.h>
 #include <linux/debugfs.h>
 #include <linux/crash_dump.h>
+#include <linux/mmzone.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
 #include <asm/page.h>
diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c
index 51689d29ad45..345de2f1d53c 100644
--- a/arch/sh/kernel/signal_32.c
+++ b/arch/sh/kernel/signal_32.c
@@ -30,6 +30,7 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>
+#include <asm/syscalls.h>
 #include <asm/fpu.h>
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
@@ -247,7 +248,6 @@ asmlinkage int sys_rt_sigreturn(unsigned long r4, unsigned long r5,
 	struct pt_regs *regs = RELOC_HIDE(&__regs, 0);
 	struct rt_sigframe __user *frame = (struct rt_sigframe __user *)regs->regs[15];
 	sigset_t set;
-	stack_t st;
 	int r0;
 
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
@@ -265,11 +265,9 @@ asmlinkage int sys_rt_sigreturn(unsigned long r4, unsigned long r5,
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &r0))
 		goto badframe;
 
-	if (__copy_from_user(&st, &frame->uc.uc_stack, sizeof(st)))
+	if (do_sigaltstack(&frame->uc.uc_stack, NULL,
+			   regs->regs[15]) == -EFAULT)
 		goto badframe;
-	/* It is more difficult to avoid calling this function than to
-	   call it and ignore errors.  */
-	do_sigaltstack((const stack_t __user *)&st, NULL, (unsigned long)frame);
 
 	return r0;
 
@@ -429,7 +427,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 
 	/* Create the ucontext.  */
 	err |= __put_user(0, &frame->uc.uc_flags);
-	err |= __put_user(0, &frame->uc.uc_link);
+	err |= __put_user(NULL, &frame->uc.uc_link);
 	err |= __put_user((void *)current->sas_ss_sp,
 			  &frame->uc.uc_stack.ss_sp);
 	err |= __put_user(sas_ss_flags(regs->regs[15]),
diff --git a/arch/sh/kernel/sys_sh.c b/arch/sh/kernel/sys_sh.c
index 9061b86d73fa..0dfb88925add 100644
--- a/arch/sh/kernel/sys_sh.c
+++ b/arch/sh/kernel/sys_sh.c
@@ -23,6 +23,7 @@
 #include <linux/fs.h>
 #include <linux/ipc.h>
 #include <asm/cacheflush.h>
+#include <asm/syscalls.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 
@@ -186,7 +187,7 @@ asmlinkage int sys_ipc(uint call, int first, int second,
 			union semun fourth;
 			if (!ptr)
 				return -EINVAL;
-			if (get_user(fourth.__pad, (void * __user *) ptr))
+			if (get_user(fourth.__pad, (void __user * __user *) ptr))
 				return -EFAULT;
 			return sys_semctl (first, second, third, fourth);
 			}
@@ -261,13 +262,13 @@ asmlinkage int sys_ipc(uint call, int first, int second,
 	return -EINVAL;
 }
 
-asmlinkage int sys_uname(struct old_utsname * name)
+asmlinkage int sys_uname(struct old_utsname __user *name)
 {
 	int err;
 	if (!name)
 		return -EFAULT;
 	down_read(&uts_sem);
-	err = copy_to_user(name, utsname(), sizeof (*name));
+	err = copy_to_user(name, utsname(), sizeof(*name));
 	up_read(&uts_sem);
 	return err?-EFAULT:0;
 }
diff --git a/arch/sh/kernel/sys_sh32.c b/arch/sh/kernel/sys_sh32.c
index f0aa5c398656..dbba1e1833d4 100644
--- a/arch/sh/kernel/sys_sh32.c
+++ b/arch/sh/kernel/sys_sh32.c
@@ -16,6 +16,7 @@
 #include <asm/cacheflush.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
+#include <asm/syscalls.h>
 
 /*
  * sys_pipe() is the normal C calling standard for creating
@@ -37,13 +38,13 @@ asmlinkage int sys_pipe(unsigned long r4, unsigned long r5,
 	return error;
 }
 
-asmlinkage ssize_t sys_pread_wrapper(unsigned int fd, char * buf,
+asmlinkage ssize_t sys_pread_wrapper(unsigned int fd, char __user *buf,
 			     size_t count, long dummy, loff_t pos)
 {
 	return sys_pread64(fd, buf, count, pos);
 }
 
-asmlinkage ssize_t sys_pwrite_wrapper(unsigned int fd, const char * buf,
+asmlinkage ssize_t sys_pwrite_wrapper(unsigned int fd, const char __user *buf,
 			      size_t count, long dummy, loff_t pos)
 {
 	return sys_pwrite64(fd, buf, count, pos);
diff --git a/arch/sh/kernel/time_32.c b/arch/sh/kernel/time_32.c
index e2f74cc71d8c..23ca711c27d2 100644
--- a/arch/sh/kernel/time_32.c
+++ b/arch/sh/kernel/time_32.c
@@ -16,6 +16,7 @@
 #include <linux/timex.h>
 #include <linux/sched.h>
 #include <linux/clockchips.h>
+#include <linux/mc146818rtc.h>	/* for rtc_lock */
 #include <linux/smp.h>
 #include <asm/clock.h>
 #include <asm/rtc.h>
diff --git a/arch/sh/kernel/timers/timer-cmt.c b/arch/sh/kernel/timers/timer-cmt.c
index d20c8c375881..c127293271e1 100644
--- a/arch/sh/kernel/timers/timer-cmt.c
+++ b/arch/sh/kernel/timers/timer-cmt.c
@@ -174,7 +174,7 @@ static int cmt_timer_init(void)
 	return 0;
 }
 
-struct sys_timer_ops cmt_timer_ops = {
+static struct sys_timer_ops cmt_timer_ops = {
 	.init		= cmt_timer_init,
 	.start		= cmt_timer_start,
 	.stop		= cmt_timer_stop,
diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c
index 511a9426cec5..4901f6732162 100644
--- a/arch/sh/kernel/traps_32.c
+++ b/arch/sh/kernel/traps_32.c
@@ -192,6 +192,7 @@ static int handle_unaligned_ins(opcode_t instruction, struct pt_regs *regs,
 	int ret, index, count;
 	unsigned long *rm, *rn;
 	unsigned char *src, *dst;
+	unsigned char __user *srcu, *dstu;
 
 	index = (instruction>>8)&15;	/* 0x0F00 */
 	rn = &regs->regs[index];
@@ -206,28 +207,28 @@ static int handle_unaligned_ins(opcode_t instruction, struct pt_regs *regs,
 	case 0: /* mov.[bwl] to/from memory via r0+rn */
 		if (instruction & 8) {
 			/* from memory */
-			src = (unsigned char*) *rm;
-			src += regs->regs[0];
-			dst = (unsigned char*) rn;
-			*(unsigned long*)dst = 0;
+			srcu = (unsigned char __user *)*rm;
+			srcu += regs->regs[0];
+			dst = (unsigned char *)rn;
+			*(unsigned long *)dst = 0;
 
 #if !defined(__LITTLE_ENDIAN__)
 			dst += 4-count;
 #endif
-			if (ma->from(dst, src, count))
+			if (ma->from(dst, srcu, count))
 				goto fetch_fault;
 
 			sign_extend(count, dst);
 		} else {
 			/* to memory */
-			src = (unsigned char*) rm;
+			src = (unsigned char *)rm;
 #if !defined(__LITTLE_ENDIAN__)
 			src += 4-count;
 #endif
-			dst = (unsigned char*) *rn;
-			dst += regs->regs[0];
+			dstu = (unsigned char __user *)*rn;
+			dstu += regs->regs[0];
 
-			if (ma->to(dst, src, count))
+			if (ma->to(dstu, src, count))
 				goto fetch_fault;
 		}
 		ret = 0;
@@ -235,10 +236,10 @@ static int handle_unaligned_ins(opcode_t instruction, struct pt_regs *regs,
 
 	case 1: /* mov.l Rm,@(disp,Rn) */
 		src = (unsigned char*) rm;
-		dst = (unsigned char*) *rn;
-		dst += (instruction&0x000F)<<2;
+		dstu = (unsigned char __user *)*rn;
+		dstu += (instruction&0x000F)<<2;
 
-		if (ma->to(dst, src, 4))
+		if (ma->to(dstu, src, 4))
 			goto fetch_fault;
 		ret = 0;
 		break;
@@ -247,28 +248,28 @@ static int handle_unaligned_ins(opcode_t instruction, struct pt_regs *regs,
 		if (instruction & 4)
 			*rn -= count;
 		src = (unsigned char*) rm;
-		dst = (unsigned char*) *rn;
+		dstu = (unsigned char __user *)*rn;
 #if !defined(__LITTLE_ENDIAN__)
 		src += 4-count;
 #endif
-		if (ma->to(dst, src, count))
+		if (ma->to(dstu, src, count))
 			goto fetch_fault;
 		ret = 0;
 		break;
 
 	case 5: /* mov.l @(disp,Rm),Rn */
-		src = (unsigned char*) *rm;
-		src += (instruction&0x000F)<<2;
-		dst = (unsigned char*) rn;
-		*(unsigned long*)dst = 0;
+		srcu = (unsigned char __user *)*rm;
+		srcu += (instruction & 0x000F) << 2;
+		dst = (unsigned char *)rn;
+		*(unsigned long *)dst = 0;
 
-		if (ma->from(dst, src, 4))
+		if (ma->from(dst, srcu, 4))
 			goto fetch_fault;
 		ret = 0;
 		break;
 
 	case 6:	/* mov.[bwl] from memory, possibly with post-increment */
-		src = (unsigned char*) *rm;
+		srcu = (unsigned char __user *)*rm;
 		if (instruction & 4)
 			*rm += count;
 		dst = (unsigned char*) rn;
@@ -277,7 +278,7 @@ static int handle_unaligned_ins(opcode_t instruction, struct pt_regs *regs,
 #if !defined(__LITTLE_ENDIAN__)
 		dst += 4-count;
 #endif
-		if (ma->from(dst, src, count))
+		if (ma->from(dst, srcu, count))
 			goto fetch_fault;
 		sign_extend(count, dst);
 		ret = 0;
@@ -286,28 +287,28 @@ static int handle_unaligned_ins(opcode_t instruction, struct pt_regs *regs,
 	case 8:
 		switch ((instruction&0xFF00)>>8) {
 		case 0x81: /* mov.w R0,@(disp,Rn) */
-			src = (unsigned char*) &regs->regs[0];
+			src = (unsigned char *) &regs->regs[0];
 #if !defined(__LITTLE_ENDIAN__)
 			src += 2;
 #endif
-			dst = (unsigned char*) *rm; /* called Rn in the spec */
-			dst += (instruction&0x000F)<<1;
+			dstu = (unsigned char __user *)*rm; /* called Rn in the spec */
+			dstu += (instruction & 0x000F) << 1;
 
-			if (ma->to(dst, src, 2))
+			if (ma->to(dstu, src, 2))
 				goto fetch_fault;
 			ret = 0;
 			break;
 
 		case 0x85: /* mov.w @(disp,Rm),R0 */
-			src = (unsigned char*) *rm;
-			src += (instruction&0x000F)<<1;
-			dst = (unsigned char*) &regs->regs[0];
-			*(unsigned long*)dst = 0;
+			srcu = (unsigned char __user *)*rm;
+			srcu += (instruction & 0x000F) << 1;
+			dst = (unsigned char *) &regs->regs[0];
+			*(unsigned long *)dst = 0;
 
 #if !defined(__LITTLE_ENDIAN__)
 			dst += 2;
 #endif
-			if (ma->from(dst, src, 2))
+			if (ma->from(dst, srcu, 2))
 				goto fetch_fault;
 			sign_extend(2, dst);
 			ret = 0;
@@ -333,7 +334,8 @@ static inline int handle_delayslot(struct pt_regs *regs,
 				   struct mem_access *ma)
 {
 	opcode_t instruction;
-	void *addr = (void *)(regs->pc + instruction_size(old_instruction));
+	void __user *addr = (void __user *)(regs->pc +
+		instruction_size(old_instruction));
 
 	if (copy_from_user(&instruction, addr, sizeof(instruction))) {
 		/* the instruction-fetch faulted */
@@ -559,7 +561,7 @@ asmlinkage void do_address_error(struct pt_regs *regs,
 		}
 
 		set_fs(USER_DS);
-		if (copy_from_user(&instruction, (void *)(regs->pc),
+		if (copy_from_user(&instruction, (void __user *)(regs->pc),
 				   sizeof(instruction))) {
 			/* Argh. Fault on the instruction itself.
 			   This should never happen non-SMP
@@ -589,7 +591,7 @@ uspace_segv:
 			die("unaligned program counter", regs, error_code);
 
 		set_fs(KERNEL_DS);
-		if (copy_from_user(&instruction, (void *)(regs->pc),
+		if (copy_from_user(&instruction, (void __user *)(regs->pc),
 				   sizeof(instruction))) {
 			/* Argh. Fault on the instruction itself.
 			   This should never happen non-SMP
diff --git a/arch/sh/lib/div64-generic.c b/arch/sh/lib/div64-generic.c
index 4bef3b5d964a..60e76aa8b53e 100644
--- a/arch/sh/lib/div64-generic.c
+++ b/arch/sh/lib/div64-generic.c
@@ -3,6 +3,7 @@
  */
 
 #include <linux/types.h>
+#include <asm/div64.h>
 
 extern uint64_t __xdiv64_32(u64 n, u32 d);
 
diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c
index 64b8f7f96f9a..7619a0fae086 100644
--- a/arch/sh/mm/consistent.c
+++ b/arch/sh/mm/consistent.c
@@ -44,7 +44,7 @@ void *dma_alloc_coherent(struct device *dev, size_t size,
 	 */
 	dma_cache_sync(dev, ret, size, DMA_BIDIRECTIONAL);
 
-	ret_nocache = ioremap_nocache(virt_to_phys(ret), size);
+	ret_nocache = (void __force *)ioremap_nocache(virt_to_phys(ret), size);
 	if (!ret_nocache) {
 		free_pages((unsigned long)ret, order);
 		return NULL;
diff --git a/arch/sh/mm/pg-nommu.c b/arch/sh/mm/pg-nommu.c
index 677dd57f0877..91ed4e695ff7 100644
--- a/arch/sh/mm/pg-nommu.c
+++ b/arch/sh/mm/pg-nommu.c
@@ -13,6 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <asm/page.h>
+#include <asm/uaccess.h>
 
 void copy_page(void *to, void *from)
 {
diff --git a/arch/sh/mm/tlb-nommu.c b/arch/sh/mm/tlb-nommu.c
index 15111bc7ddd6..71c742b5aee3 100644
--- a/arch/sh/mm/tlb-nommu.c
+++ b/arch/sh/mm/tlb-nommu.c
@@ -10,6 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <asm/pgtable.h>
+#include <asm/tlbflush.h>
 
 /*
  * Nothing too terribly exciting here ..
diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c
index 3df2aaec829f..f5aebc9f27e7 100644
--- a/drivers/serial/sh-sci.c
+++ b/drivers/serial/sh-sci.c
@@ -1113,7 +1113,7 @@ static const char *sci_type(struct uart_port *port)
 		case PORT_IRDA: return "irda";
 	}
 
-	return 0;
+	return NULL;
 }
 
 static void sci_release_port(struct uart_port *port)
-- 
GitLab


From 5840263ecb95e55a2d248fc740644a2c9171a61c Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 5 Sep 2008 15:36:39 +0900
Subject: [PATCH 064/895] sh: Don't enable clockevents broadcasting on UP SH-X3
 builds.

Fixes up compile errors with missing timer definitions. It's pointless to
have this enabled anyways if CONFIG_SMP=n.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 399664c1f386..af2b174fd7d9 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -326,7 +326,7 @@ config CPU_SUBTYPE_SHX3
 	select ARCH_SPARSEMEM_ENABLE
 	select SYS_SUPPORTS_NUMA
 	select SYS_SUPPORTS_SMP
-	select GENERIC_CLOCKEVENTS_BROADCAST
+	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
 
 # SH4AL-DSP Processor Support
 
-- 
GitLab


From 3159e7d62ad13f71ef3fe029c145594d8caa580d Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 5 Sep 2008 15:39:12 +0900
Subject: [PATCH 065/895] sh: Add support for memory hot-remove.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/mm/Kconfig |  4 ++++
 arch/sh/mm/init.c  | 17 +++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig
index 8a03926ea84f..f8e6dc5e056f 100644
--- a/arch/sh/mm/Kconfig
+++ b/arch/sh/mm/Kconfig
@@ -134,6 +134,10 @@ config ARCH_ENABLE_MEMORY_HOTPLUG
 	def_bool y
 	depends on SPARSEMEM
 
+config ARCH_ENABLE_MEMORY_HOTREMOVE
+	def_bool y
+	depends on SPARSEMEM
+
 config ARCH_MEMORY_PROBE
 	def_bool y
 	depends on MEMORY_HOTPLUG
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index b75a7acd62fb..d4681a55c852 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -292,4 +292,21 @@ int memory_add_physaddr_to_nid(u64 addr)
 }
 EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 #endif
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+int remove_memory(u64 start, u64 size)
+{
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long end_pfn = start_pfn + (size >> PAGE_SHIFT);
+	int ret;
+
+	ret = offline_pages(start_pfn, end_pfn, 120 * HZ);
+	if (unlikely(ret))
+		printk("%s: Failed, offline_pages() == %d\n", __func__, ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(remove_memory);
 #endif
+
+#endif /* CONFIG_MEMORY_HOTPLUG */
-- 
GitLab


From c6feb6142cb85228e73497a309f475a0d7279318 Mon Sep 17 00:00:00 2001
From: Stuart Menefy <stuart.menefy@st.com>
Date: Fri, 5 Sep 2008 16:06:42 +0900
Subject: [PATCH 066/895] sh: early cached_to_uncached initialization.

statically initialise the cached_to_uncached offset, so that we can use
it immediatly.

Signed-off-by: Stuart Menefy <stuart.menefy@st.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/system_32.h |  3 ++-
 arch/sh/mm/init.c               | 27 ++++++++++++++-------------
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/arch/sh/include/asm/system_32.h b/arch/sh/include/asm/system_32.h
index 16509ed2bb60..f7f105627fd9 100644
--- a/arch/sh/include/asm/system_32.h
+++ b/arch/sh/include/asm/system_32.h
@@ -58,7 +58,8 @@ do {									\
 	last = __last;							\
 } while (0)
 
-#define __uses_jump_to_uncached __attribute__ ((__section__ (".uncached.text")))
+#define __uses_jump_to_uncached \
+	noinline __attribute__ ((__section__ (".uncached.text")))
 
 /*
  * Jump to uncached area.
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index d4681a55c852..f1a494283c4e 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -23,7 +23,19 @@
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 pgd_t swapper_pg_dir[PTRS_PER_PGD];
-unsigned long cached_to_uncached = 0;
+
+#ifdef CONFIG_SUPERH32
+/*
+ * Handle trivial transitions between cached and uncached
+ * segments, making use of the 1:1 mapping relationship in
+ * 512MB lowmem.
+ *
+ * This is the offset of the uncached section from its cached alias.
+ * Default value only valid in 29 bit mode, in 32bit mode will be
+ * overridden in pmb_init.
+ */
+unsigned long cached_to_uncached = P2SEG - P1SEG;
+#endif
 
 #ifdef CONFIG_MMU
 static void set_pte_phys(unsigned long addr, unsigned long phys, pgprot_t prot)
@@ -58,9 +70,7 @@ static void set_pte_phys(unsigned long addr, unsigned long phys, pgprot_t prot)
 	}
 
 	set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, prot));
-
-	if (cached_to_uncached)
-		flush_tlb_one(get_asid(), addr);
+	flush_tlb_one(get_asid(), addr);
 }
 
 /*
@@ -165,15 +175,6 @@ void __init paging_init(void)
 #ifdef CONFIG_SUPERH32
 	/* Set up the uncached fixmap */
 	set_fixmap_nocache(FIX_UNCACHED, __pa(&__uncached_start));
-
-#ifdef CONFIG_29BIT
-	/*
-	 * Handle trivial transitions between cached and uncached
-	 * segments, making use of the 1:1 mapping relationship in
-	 * 512MB lowmem.
-	 */
-	cached_to_uncached = P2SEG - P1SEG;
-#endif
 #endif
 }
 
-- 
GitLab


From 28d6e52cf7e881834d2dab370afa20b6223f726c Mon Sep 17 00:00:00 2001
From: Stuart Menefy <stuart.menefy@st.com>
Date: Fri, 5 Sep 2008 16:14:17 +0900
Subject: [PATCH 067/895] sh: Fix up broken 32-bit initrd support.

Signed-off-by: Stuart Menefy <stuart.menefy@st.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/setup.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index 6d0899e890d0..fc7171a9c804 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -249,17 +249,18 @@ void __init setup_bootmem_allocator(unsigned long free_pfn)
 	ROOT_DEV = Root_RAM0;
 
 	if (LOADER_TYPE && INITRD_START) {
-		if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
-			reserve_bootmem(INITRD_START + __MEMORY_START,
-					INITRD_SIZE, BOOTMEM_DEFAULT);
-			initrd_start = INITRD_START + PAGE_OFFSET +
-					__MEMORY_START;
+		unsigned long initrd_start_phys = INITRD_START + __MEMORY_START;
+
+		if (initrd_start_phys + INITRD_SIZE <= PFN_PHYS(max_low_pfn)) {
+			reserve_bootmem(initrd_start_phys, INITRD_SIZE,
+					BOOTMEM_DEFAULT);
+			initrd_start = (unsigned long)__va(initrd_start_phys);
 			initrd_end = initrd_start + INITRD_SIZE;
 		} else {
 			printk("initrd extends beyond end of memory "
-			    "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
-				    INITRD_START + INITRD_SIZE,
-				    max_low_pfn << PAGE_SHIFT);
+			       "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
+			       initrd_start_phys + INITRD_SIZE,
+			       PFN_PHYS(max_low_pfn));
 			initrd_start = 0;
 		}
 	}
-- 
GitLab


From 96e14e54a6abd5a4bcd75e33962f87bef145d1f6 Mon Sep 17 00:00:00 2001
From: Stuart Menefy <stuart.menefy@st.com>
Date: Fri, 5 Sep 2008 16:17:15 +0900
Subject: [PATCH 068/895] sh: vmalloc pgtable sync fix.

This fixes a problem in the code which copies the vmalloc portion of the
kernel's page table into the current user space page table. The addition
of the four level page table code breaks on folded page tables, because
the pud level is always present (although folded). This updates the code
to use the same style of updates for the pud as is used for the pgd
level.

Signed-off-by: Stuart Menefy <stuart.menefy@st.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/mm/fault_32.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/arch/sh/mm/fault_32.c b/arch/sh/mm/fault_32.c
index 0c776fdfbdda..e8efda9846bb 100644
--- a/arch/sh/mm/fault_32.c
+++ b/arch/sh/mm/fault_32.c
@@ -61,7 +61,6 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 		pgd = get_TTB() + offset;
 		pgd_k = swapper_pg_dir + offset;
 
-		/* This will never happen with the folded page table. */
 		if (!pgd_present(*pgd)) {
 			if (!pgd_present(*pgd_k))
 				goto bad_area_nosemaphore;
@@ -71,9 +70,13 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 
 		pud = pud_offset(pgd, address);
 		pud_k = pud_offset(pgd_k, address);
-		if (pud_present(*pud) || !pud_present(*pud_k))
-			goto bad_area_nosemaphore;
-		set_pud(pud, *pud_k);
+
+		if (!pud_present(*pud)) {
+			if (!pud_present(*pud_k))
+				goto bad_area_nosemaphore;
+			set_pud(pud, *pud_k);
+			return;
+		}
 
 		pmd = pmd_offset(pud, address);
 		pmd_k = pmd_offset(pud_k, address);
-- 
GitLab


From 664718a34348a9ef6f966c3977e8df927a378134 Mon Sep 17 00:00:00 2001
From: Chris Smith <chris.smith@st.com>
Date: Fri, 5 Sep 2008 16:24:13 +0900
Subject: [PATCH 069/895] sh: Fix uImage load address in 32-bit mode.

Fix "make uImage" load and entry addresses in 32-bit mode.

Signed-off-by: Chris Smith <chris.smith@st.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/boot/Makefile | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/sh/boot/Makefile b/arch/sh/boot/Makefile
index 5b54965eef98..c16ccd4bfa16 100644
--- a/arch/sh/boot/Makefile
+++ b/arch/sh/boot/Makefile
@@ -33,10 +33,16 @@ $(obj)/zImage: $(obj)/compressed/vmlinux FORCE
 $(obj)/compressed/vmlinux: FORCE
 	$(Q)$(MAKE) $(build)=$(obj)/compressed $@
 
+ifeq ($(CONFIG_32BIT),y)
+KERNEL_LOAD	:= $(shell /bin/bash -c 'printf "0x%08x" \
+		     $$[$(CONFIG_PAGE_OFFSET)  + \
+			$(CONFIG_ZERO_PAGE_OFFSET)]')
+else
 KERNEL_LOAD	:= $(shell /bin/bash -c 'printf "0x%08x" \
 		     $$[$(CONFIG_PAGE_OFFSET)  + \
 			$(CONFIG_MEMORY_START) + \
 			$(CONFIG_ZERO_PAGE_OFFSET)]')
+endif
 
 KERNEL_ENTRY	:= $(shell /bin/bash -c 'printf "0x%08x" \
 		     $$[$(CONFIG_PAGE_OFFSET)  + \
-- 
GitLab


From f040ddaf4cfd28f25ea9d6a42d3c734d5c3f6798 Mon Sep 17 00:00:00 2001
From: Stuart Menefy <stuart.menefy@st.com>
Date: Fri, 5 Sep 2008 16:29:40 +0900
Subject: [PATCH 070/895] sh: Fix an unusual memory initialisation error.

This fixes a problems with the set up of Linux memory:

 - When reserving memory at boot time, the code previously reserved
   the bottom page of memory, and then from one page up to the end of
   the bootmap. This had the desired effect, but was strictly speaking
   wrong, as the one page was actually whatever CONFIG_ZERO_PAGE_OFFSET
   had been set to.

Signed-off-by: Stuart Menefy <stuart.menefy@st.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/setup.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index fc7171a9c804..fc098c8af052 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -233,15 +233,17 @@ void __init setup_bootmem_allocator(unsigned long free_pfn)
 	 * case of us accidentally initializing the bootmem allocator with
 	 * an invalid RAM area.
 	 */
-	reserve_bootmem(__MEMORY_START+PAGE_SIZE,
-		(PFN_PHYS(free_pfn)+bootmap_size+PAGE_SIZE-1)-__MEMORY_START,
-		BOOTMEM_DEFAULT);
+	reserve_bootmem(__MEMORY_START + CONFIG_ZERO_PAGE_OFFSET,
+			(PFN_PHYS(free_pfn) + bootmap_size + PAGE_SIZE - 1) -
+			(__MEMORY_START + CONFIG_ZERO_PAGE_OFFSET),
+			BOOTMEM_DEFAULT);
 
 	/*
 	 * reserve physical page 0 - it's a special BIOS page on many boxes,
 	 * enabling clean reboots, SMP operation, laptop functions.
 	 */
-	reserve_bootmem(__MEMORY_START, PAGE_SIZE, BOOTMEM_DEFAULT);
+	reserve_bootmem(__MEMORY_START, CONFIG_ZERO_PAGE_OFFSET,
+			BOOTMEM_DEFAULT);
 
 	sparse_memory_present_with_active_regions(0);
 
-- 
GitLab


From b6ad1e8c3f76fcc5dee506d5e79e752d296ff745 Mon Sep 17 00:00:00 2001
From: Carl Shaw <carl.shaw@st.com>
Date: Fri, 5 Sep 2008 16:36:19 +0900
Subject: [PATCH 071/895] sh: Subnormal double to float conversion

This patch adds support for the SH4 to convert a subnormal double
into a float by catching the FPE and implementing the FCNVDS
instruction in software.

Signed-off-by: Carl Shaw <carl.shaw@st.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/cpu/sh4/fpu.c       | 25 ++++++++++++++++++++++++-
 arch/sh/kernel/cpu/sh4/softfloat.c | 29 +++++++++++++++++++++++++++++
 2 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/arch/sh/kernel/cpu/sh4/fpu.c b/arch/sh/kernel/cpu/sh4/fpu.c
index 2d452f67fb87..2780917c0088 100644
--- a/arch/sh/kernel/cpu/sh4/fpu.c
+++ b/arch/sh/kernel/cpu/sh4/fpu.c
@@ -36,7 +36,7 @@ extern unsigned long int float32_add(unsigned long int a, unsigned long int b);
 extern unsigned long long float64_sub(unsigned long long a,
 				      unsigned long long b);
 extern unsigned long int float32_sub(unsigned long int a, unsigned long int b);
-
+extern unsigned long int float64_to_float32(unsigned long long a);
 static unsigned int fpu_exception_flags;
 
 /*
@@ -415,6 +415,29 @@ static int ieee_fpe_handler(struct pt_regs *regs)
 		} else
 			return 0;
 
+		regs->pc = nextpc;
+		return 1;
+	} else if ((finsn & 0xf0bd) == 0xf0bd) {
+		/* fcnvds - double to single precision convert */
+		struct task_struct *tsk = current;
+		int m;
+		unsigned int hx;
+
+		m = (finsn >> 9) & 0x7;
+		hx = tsk->thread.fpu.hard.fp_regs[m];
+
+		if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR)
+			&& ((hx & 0x7fffffff) < 0x00100000)) {
+			/* subnormal double to float conversion */
+			long long llx;
+
+			llx = ((long long)tsk->thread.fpu.hard.fp_regs[m] << 32)
+			    | tsk->thread.fpu.hard.fp_regs[m + 1];
+
+			tsk->thread.fpu.hard.fpul = float64_to_float32(llx);
+		} else
+			return 0;
+
 		regs->pc = nextpc;
 		return 1;
 	}
diff --git a/arch/sh/kernel/cpu/sh4/softfloat.c b/arch/sh/kernel/cpu/sh4/softfloat.c
index 828cb57cb959..2b747f3b02bd 100644
--- a/arch/sh/kernel/cpu/sh4/softfloat.c
+++ b/arch/sh/kernel/cpu/sh4/softfloat.c
@@ -85,6 +85,7 @@ float64 float64_div(float64 a, float64 b);
 float32 float32_div(float32 a, float32 b);
 float32 float32_mul(float32 a, float32 b);
 float64 float64_mul(float64 a, float64 b);
+float32 float64_to_float32(float64 a);
 inline void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
 		   bits64 * z1Ptr);
 inline void sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
@@ -890,3 +891,31 @@ float64 float64_mul(float64 a, float64 b)
 	}
 	return roundAndPackFloat64(zSign, zExp, zSig0);
 }
+
+/*
+ * -------------------------------------------------------------------------------
+ *  Returns the result of converting the double-precision floating-point value
+ *  `a' to the single-precision floating-point format.  The conversion is
+ *  performed according to the IEC/IEEE Standard for Binary Floating-point
+ *  Arithmetic.
+ *  -------------------------------------------------------------------------------
+ *  */
+float32 float64_to_float32(float64 a)
+{
+    flag aSign;
+    int16 aExp;
+    bits64 aSig;
+    bits32 zSig;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+
+    shift64RightJamming( aSig, 22, &aSig );
+    zSig = aSig;
+    if ( aExp || zSig ) {
+        zSig |= 0x40000000;
+        aExp -= 0x381;
+    }
+    return roundAndPackFloat32(aSign, aExp, zSig);
+}
-- 
GitLab


From 61c66387e640abc0e0aa11519bc48ff9bb50580a Mon Sep 17 00:00:00 2001
From: Francesco Virlinzi <francesco.virlinzi@st.com>
Date: Fri, 5 Sep 2008 16:40:22 +0900
Subject: [PATCH 072/895] sh: fix the TMU code to allow a fully running NO_HZ
 system

This patch fixes the TMU code to allow NO_HZ to work on sh

Signed-off-by: Francesco Virlinzi <francesco.virlinzi@st.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/timers/timer-tmu.c | 177 ++++++++++++++++++++----------
 1 file changed, 119 insertions(+), 58 deletions(-)

diff --git a/arch/sh/kernel/timers/timer-tmu.c b/arch/sh/kernel/timers/timer-tmu.c
index 1ca9ad49b541..aaaf90d06b85 100644
--- a/arch/sh/kernel/timers/timer-tmu.c
+++ b/arch/sh/kernel/timers/timer-tmu.c
@@ -28,43 +28,90 @@
 #define TMU_TOCR_INIT	0x00
 #define TMU_TCR_INIT	0x0020
 
-static int tmu_timer_start(void)
+#define TMU0		(0)
+#define TMU1		(1)
+
+static inline void _tmu_start(int tmu_num)
 {
-	ctrl_outb(ctrl_inb(TMU_012_TSTR) | 0x3, TMU_012_TSTR);
-	return 0;
+	ctrl_outb(ctrl_inb(TMU_012_TSTR) | (0x1<<tmu_num), TMU_012_TSTR);
 }
 
-static void tmu0_timer_set_interval(unsigned long interval, unsigned int reload)
+static inline void _tmu_set_irq(int tmu_num, int enabled)
 {
-	ctrl_outl(interval, TMU0_TCNT);
+	register unsigned long tmu_tcr = TMU0_TCR + (0xc*tmu_num);
+	ctrl_outw( (enabled ? ctrl_inw(tmu_tcr) | (1<<5) : ctrl_inw(tmu_tcr) & ~(1<<5)), tmu_tcr);
+}
 
-	/*
-	 * TCNT reloads from TCOR on underflow, clear it if we don't
-	 * intend to auto-reload
-	 */
-	if (reload)
-		ctrl_outl(interval, TMU0_TCOR);
-	else
-		ctrl_outl(0, TMU0_TCOR);
+static inline void _tmu_stop(int tmu_num)
+{
+	ctrl_outb(ctrl_inb(TMU_012_TSTR) & ~(0x1<<tmu_num), TMU_012_TSTR);
+}
+
+static inline void _tmu_clear_status(int tmu_num)
+{
+	register unsigned long tmu_tcr = TMU0_TCR + (0xc*tmu_num);
+	/* Clear UNF bit */
+	ctrl_outw(ctrl_inw(tmu_tcr) & ~0x100, tmu_tcr);
+}
 
-	tmu_timer_start();
+static inline unsigned long _tmu_read(int tmu_num)
+{
+        return ctrl_inl(TMU0_TCNT+0xC*tmu_num);
+}
+
+static int tmu_timer_start(void)
+{
+	_tmu_start(TMU0);
+	_tmu_start(TMU1);
+	_tmu_set_irq(TMU0,1);
+	return 0;
 }
 
 static int tmu_timer_stop(void)
 {
-	ctrl_outb(ctrl_inb(TMU_012_TSTR) & ~0x3, TMU_012_TSTR);
+	_tmu_stop(TMU0);
+	_tmu_stop(TMU1);
+	_tmu_clear_status(TMU0);
 	return 0;
 }
 
+/*
+ * also when the module_clk is scaled the TMU1
+ * will show the same frequency
+ */
+static int tmus_are_scaled;
+
 static cycle_t tmu_timer_read(void)
 {
-	return ~ctrl_inl(TMU1_TCNT);
+	return ((cycle_t)(~_tmu_read(TMU1)))<<tmus_are_scaled;
+}
+
+
+static unsigned long tmu_latest_interval[3];
+static void tmu_timer_set_interval(int tmu_num, unsigned long interval, unsigned int reload)
+{
+	unsigned long tmu_tcnt = TMU0_TCNT + tmu_num*0xC;
+	unsigned long tmu_tcor = TMU0_TCOR + tmu_num*0xC;
+
+	_tmu_stop(tmu_num);
+
+	ctrl_outl(interval, tmu_tcnt);
+	tmu_latest_interval[tmu_num] = interval;
+
+	/*
+	 * TCNT reloads from TCOR on underflow, clear it if we don't
+	 * intend to auto-reload
+	 */
+	ctrl_outl( reload ? interval : 0 , tmu_tcor);
+
+	_tmu_start(tmu_num);
 }
 
 static int tmu_set_next_event(unsigned long cycles,
 			      struct clock_event_device *evt)
 {
-	tmu0_timer_set_interval(cycles, 1);
+	tmu_timer_set_interval(TMU0,cycles, evt->mode == CLOCK_EVT_MODE_PERIODIC);
+	_tmu_set_irq(TMU0,1);
 	return 0;
 }
 
@@ -96,12 +143,8 @@ static struct clock_event_device tmu0_clockevent = {
 static irqreturn_t tmu_timer_interrupt(int irq, void *dummy)
 {
 	struct clock_event_device *evt = &tmu0_clockevent;
-	unsigned long timer_status;
-
-	/* Clear UNF bit */
-	timer_status = ctrl_inw(TMU0_TCR);
-	timer_status &= ~0x100;
-	ctrl_outw(timer_status, TMU0_TCR);
+	_tmu_clear_status(TMU0);
+	_tmu_set_irq(TMU0,tmu0_clockevent.mode != CLOCK_EVT_MODE_ONESHOT);
 
 	evt->event_handler(evt);
 
@@ -109,56 +152,73 @@ static irqreturn_t tmu_timer_interrupt(int irq, void *dummy)
 }
 
 static struct irqaction tmu0_irq = {
-	.name		= "periodic timer",
+	.name		= "periodic/oneshot timer",
 	.handler	= tmu_timer_interrupt,
 	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
 	.mask		= CPU_MASK_NONE,
 };
 
-static void tmu0_clk_init(struct clk *clk)
+static void __init tmu_clk_init(struct clk *clk)
 {
-	u8 divisor = TMU_TCR_INIT & 0x7;
-	ctrl_outw(TMU_TCR_INIT, TMU0_TCR);
-	clk->rate = clk->parent->rate / (4 << (divisor << 1));
+	u8 divisor  = TMU_TCR_INIT & 0x7;
+	int tmu_num = clk->name[3]-'0';
+	ctrl_outw(TMU_TCR_INIT, TMU0_TCR+(tmu_num*0xC));
+	clk->rate = clk_get_rate(clk->parent) / (4 << (divisor << 1));
 }
 
-static void tmu0_clk_recalc(struct clk *clk)
+static void tmu_clk_recalc(struct clk *clk)
 {
-	u8 divisor = ctrl_inw(TMU0_TCR) & 0x7;
-	clk->rate = clk->parent->rate / (4 << (divisor << 1));
-}
+	int tmu_num = clk->name[3]-'0';
+	unsigned long prev_rate = clk_get_rate(clk);
+	unsigned long flags;
+	u8 divisor = ctrl_inw(TMU0_TCR+tmu_num*0xC) & 0x7;
+	clk->rate  = clk_get_rate(clk->parent) / (4 << (divisor << 1));
 
-static struct clk_ops tmu0_clk_ops = {
-	.init		= tmu0_clk_init,
-	.recalc		= tmu0_clk_recalc,
-};
+	if(prev_rate==clk_get_rate(clk))
+		return;
 
-static struct clk tmu0_clk = {
-	.name		= "tmu0_clk",
-	.ops		= &tmu0_clk_ops,
-};
+	if(tmu_num)
+		return; /* No more work on TMU1 */
 
-static void tmu1_clk_init(struct clk *clk)
-{
-	u8 divisor = TMU_TCR_INIT & 0x7;
-	ctrl_outw(divisor, TMU1_TCR);
-	clk->rate = clk->parent->rate / (4 << (divisor << 1));
-}
+	local_irq_save(flags);
+	tmus_are_scaled = (prev_rate > clk->rate);
 
-static void tmu1_clk_recalc(struct clk *clk)
-{
-	u8 divisor = ctrl_inw(TMU1_TCR) & 0x7;
-	clk->rate = clk->parent->rate / (4 << (divisor << 1));
+	_tmu_stop(TMU0);
+
+	tmu0_clockevent.mult = div_sc(clk->rate, NSEC_PER_SEC,
+				tmu0_clockevent.shift);
+	tmu0_clockevent.max_delta_ns =
+			clockevent_delta2ns(-1, &tmu0_clockevent);
+	tmu0_clockevent.min_delta_ns =
+			clockevent_delta2ns(1, &tmu0_clockevent);
+
+	if (tmus_are_scaled)
+		tmu_latest_interval[TMU0] >>= 1;
+	else
+		tmu_latest_interval[TMU0] <<= 1;
+
+	tmu_timer_set_interval(TMU0,
+		tmu_latest_interval[TMU0],
+		tmu0_clockevent.mode == CLOCK_EVT_MODE_PERIODIC);
+
+	_tmu_start(TMU0);
+
+	local_irq_restore(flags);
 }
 
-static struct clk_ops tmu1_clk_ops = {
-	.init		= tmu1_clk_init,
-	.recalc		= tmu1_clk_recalc,
+static struct clk_ops tmu_clk_ops = {
+	.init		= tmu_clk_init,
+	.recalc		= tmu_clk_recalc,
+};
+
+static struct clk tmu0_clk = {
+	.name		= "tmu0_clk",
+	.ops		= &tmu_clk_ops,
 };
 
 static struct clk tmu1_clk = {
 	.name		= "tmu1_clk",
-	.ops		= &tmu1_clk_ops,
+	.ops		= &tmu_clk_ops,
 };
 
 static int tmu_timer_init(void)
@@ -189,11 +249,12 @@ static int tmu_timer_init(void)
 	frequency = clk_get_rate(&tmu0_clk);
 	interval = (frequency + HZ / 2) / HZ;
 
-	sh_hpt_frequency = clk_get_rate(&tmu1_clk);
-	ctrl_outl(~0, TMU1_TCNT);
-	ctrl_outl(~0, TMU1_TCOR);
+	tmu_timer_set_interval(TMU0,interval, 1);
+	tmu_timer_set_interval(TMU1,~0,1);
 
-	tmu0_timer_set_interval(interval, 1);
+	_tmu_start(TMU1);
+
+	sh_hpt_frequency = clk_get_rate(&tmu1_clk);
 
 	tmu0_clockevent.mult = div_sc(frequency, NSEC_PER_SEC,
 				      tmu0_clockevent.shift);
-- 
GitLab


From b6c20e4290a1ef92bcef5ec9dd8e5c7d036153aa Mon Sep 17 00:00:00 2001
From: Marek Skuczynski <M.Skuczynski@adbglobal.com>
Date: Fri, 5 Sep 2008 16:42:58 +0900
Subject: [PATCH 073/895] sh: remove unnecessary memset after
 alloc_bootmem_low_pages

Because alloc_bootmem functions return the allocated memory always
zeroed, an additional call of memset on allocated memory is
unnecessary.

Signed-off-by: Marek Skuczynski <M.Skuczynski@adbglobal.com>
Signed-off-by: Carl Shaw <carl.shaw@st.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/mm/init.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index f1a494283c4e..31211bfdc6d8 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -123,7 +123,6 @@ void __init page_table_range_init(unsigned long start, unsigned long end,
 		if (!pmd_present(*pmd)) {
 			pte_t *pte_table;
 			pte_table = (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
-			memset(pte_table, 0, PAGE_SIZE);
 			pmd_populate_kernel(&init_mm, pmd, pte_table);
 		}
 
-- 
GitLab


From d39f5450146ff39f66cfde9d5184420627d0ac51 Mon Sep 17 00:00:00 2001
From: Chris Smith <chris.smith@st.com>
Date: Fri, 5 Sep 2008 17:15:39 +0900
Subject: [PATCH 074/895] sh: Add kprobes support.

Initial support for kprobes/kretprobes for 32-bit SH platforms.

[ General cleanup and some rework for the kretprobe hash lock. -- PFM ]

Signed-off-by: Chris Smith <chris.smith@st.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/Kconfig               |   2 +
 arch/sh/include/asm/kprobes.h |  59 ++++
 arch/sh/kernel/Makefile_32    |   1 +
 arch/sh/kernel/kprobes.c      | 568 ++++++++++++++++++++++++++++++++++
 arch/sh/kernel/traps_32.c     |   5 +
 5 files changed, 635 insertions(+)
 create mode 100644 arch/sh/include/asm/kprobes.h
 create mode 100644 arch/sh/kernel/kprobes.c

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index af2b174fd7d9..334917a62ecf 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -20,6 +20,8 @@ config SUPERH
 
 config SUPERH32
 	def_bool !SUPERH64
+	select HAVE_KPROBES
+	select HAVE_KRETPROBES
 
 config SUPERH64
 	def_bool y if CPU_SH5
diff --git a/arch/sh/include/asm/kprobes.h b/arch/sh/include/asm/kprobes.h
new file mode 100644
index 000000000000..70fc629df904
--- /dev/null
+++ b/arch/sh/include/asm/kprobes.h
@@ -0,0 +1,59 @@
+#ifndef __ASM_SH_KPROBES_H
+#define __ASM_SH_KPROBES_H
+
+#ifdef CONFIG_KPROBES
+
+#include <linux/types.h>
+#include <linux/ptrace.h>
+
+struct pt_regs;
+
+typedef u16 kprobe_opcode_t;
+#define BREAKPOINT_INSTRUCTION	0xc3ff
+
+#define MAX_INSN_SIZE 16
+#define MAX_STACK_SIZE 64
+#define MIN_STACK_SIZE(ADDR) (((MAX_STACK_SIZE) < \
+	(((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) \
+	? (MAX_STACK_SIZE) \
+	: (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR)))
+
+#define regs_return_value(regs)		((regs)->regs[0])
+#define flush_insn_slot(p)		do { } while (0)
+#define kretprobe_blacklist_size	0
+
+struct kprobe;
+
+void arch_remove_kprobe(struct kprobe *);
+void kretprobe_trampoline(void);
+void jprobe_return_end(void);
+
+/* Architecture specific copy of original instruction*/
+struct arch_specific_insn {
+	/* copy of the original instruction */
+	kprobe_opcode_t insn[MAX_INSN_SIZE];
+};
+
+struct prev_kprobe {
+	struct kprobe *kp;
+	unsigned long status;
+};
+
+/* per-cpu kprobe control block */
+struct kprobe_ctlblk {
+	unsigned long kprobe_status;
+	unsigned long jprobe_saved_r15;
+	struct pt_regs jprobe_saved_regs;
+	kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE];
+	struct prev_kprobe prev_kprobe;
+};
+
+extern int kprobe_exceptions_notify(struct notifier_block *self,
+				    unsigned long val, void *data);
+extern int kprobe_handle_illslot(unsigned long pc);
+#else
+
+#define kprobe_handle_illslot(pc)	(-1)
+
+#endif /* CONFIG_KPROBES */
+#endif /* __ASM_SH_KPROBES_H */
diff --git a/arch/sh/kernel/Makefile_32 b/arch/sh/kernel/Makefile_32
index 0e6905fe9fec..12e617beba20 100644
--- a/arch/sh/kernel/Makefile_32
+++ b/arch/sh/kernel/Makefile_32
@@ -23,5 +23,6 @@ obj-$(CONFIG_PM)		+= pm.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-$(CONFIG_ELF_CORE)		+= dump_task.o
 obj-$(CONFIG_IO_TRAPPED)	+= io_trapped.o
+obj-$(CONFIG_KPROBES)		+= kprobes.o
 
 EXTRA_CFLAGS += -Werror
diff --git a/arch/sh/kernel/kprobes.c b/arch/sh/kernel/kprobes.c
new file mode 100644
index 000000000000..c4f4a0923eb9
--- /dev/null
+++ b/arch/sh/kernel/kprobes.c
@@ -0,0 +1,568 @@
+/*
+ * Kernel probes (kprobes) for SuperH
+ *
+ * Copyright (C) 2007 Chris Smith <chris.smith@st.com>
+ * Copyright (C) 2006 Lineo Solutions, Inc.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/ptrace.h>
+#include <linux/preempt.h>
+#include <linux/kdebug.h>
+#include <asm/cacheflush.h>
+#include <asm/uaccess.h>
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+static struct kprobe saved_current_opcode;
+static struct kprobe saved_next_opcode;
+static struct kprobe saved_next_opcode2;
+
+#define OPCODE_JMP(x)	(((x) & 0xF0FF) == 0x402b)
+#define OPCODE_JSR(x)	(((x) & 0xF0FF) == 0x400b)
+#define OPCODE_BRA(x)	(((x) & 0xF000) == 0xa000)
+#define OPCODE_BRAF(x)	(((x) & 0xF0FF) == 0x0023)
+#define OPCODE_BSR(x)	(((x) & 0xF000) == 0xb000)
+#define OPCODE_BSRF(x)	(((x) & 0xF0FF) == 0x0003)
+
+#define OPCODE_BF_S(x)	(((x) & 0xFF00) == 0x8f00)
+#define OPCODE_BT_S(x)	(((x) & 0xFF00) == 0x8d00)
+
+#define OPCODE_BF(x)	(((x) & 0xFF00) == 0x8b00)
+#define OPCODE_BT(x)	(((x) & 0xFF00) == 0x8900)
+
+#define OPCODE_RTS(x)	(((x) & 0x000F) == 0x000b)
+#define OPCODE_RTE(x)	(((x) & 0xFFFF) == 0x002b)
+
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
+{
+	kprobe_opcode_t opcode = *(kprobe_opcode_t *) (p->addr);
+
+	if (OPCODE_RTE(opcode))
+		return -EFAULT;	/* Bad breakpoint */
+
+	p->opcode = opcode;
+
+	return 0;
+}
+
+void __kprobes arch_copy_kprobe(struct kprobe *p)
+{
+	memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+	p->opcode = *p->addr;
+}
+
+void __kprobes arch_arm_kprobe(struct kprobe *p)
+{
+	*p->addr = BREAKPOINT_INSTRUCTION;
+	flush_icache_range((unsigned long)p->addr,
+			   (unsigned long)p->addr + sizeof(kprobe_opcode_t));
+}
+
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
+{
+	*p->addr = p->opcode;
+	flush_icache_range((unsigned long)p->addr,
+			   (unsigned long)p->addr + sizeof(kprobe_opcode_t));
+}
+
+int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+{
+	if (*p->addr == BREAKPOINT_INSTRUCTION)
+		return 1;
+
+	return 0;
+}
+
+/**
+ * If an illegal slot instruction exception occurs for an address
+ * containing a kprobe, remove the probe.
+ *
+ * Returns 0 if the exception was handled successfully, 1 otherwise.
+ */
+int __kprobes kprobe_handle_illslot(unsigned long pc)
+{
+	struct kprobe *p = get_kprobe((kprobe_opcode_t *) pc + 1);
+
+	if (p != NULL) {
+		printk("Warning: removing kprobe from delay slot: 0x%.8x\n",
+		       (unsigned int)pc + 2);
+		unregister_kprobe(p);
+		return 0;
+	}
+
+	return 1;
+}
+
+void __kprobes arch_remove_kprobe(struct kprobe *p)
+{
+	if (saved_next_opcode.addr != 0x0) {
+		arch_disarm_kprobe(p);
+		arch_disarm_kprobe(&saved_next_opcode);
+		saved_next_opcode.addr = 0x0;
+		saved_next_opcode.opcode = 0x0;
+
+		if (saved_next_opcode2.addr != 0x0) {
+			arch_disarm_kprobe(&saved_next_opcode2);
+			saved_next_opcode2.addr = 0x0;
+			saved_next_opcode2.opcode = 0x0;
+		}
+	}
+}
+
+static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+	kcb->prev_kprobe.kp = kprobe_running();
+	kcb->prev_kprobe.status = kcb->kprobe_status;
+}
+
+static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+	__get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
+	kcb->kprobe_status = kcb->prev_kprobe.status;
+}
+
+static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
+				      struct kprobe_ctlblk *kcb)
+{
+	__get_cpu_var(current_kprobe) = p;
+}
+
+/*
+ * Singlestep is implemented by disabling the current kprobe and setting one
+ * on the next instruction, following branches. Two probes are set if the
+ * branch is conditional.
+ */
+static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t *addr = NULL;
+	saved_current_opcode.addr = (kprobe_opcode_t *) (regs->pc);
+	addr = saved_current_opcode.addr;
+
+	if (p != NULL) {
+		arch_disarm_kprobe(p);
+
+		if (OPCODE_JSR(p->opcode) || OPCODE_JMP(p->opcode)) {
+			unsigned int reg_nr = ((p->opcode >> 8) & 0x000F);
+			saved_next_opcode.addr =
+			    (kprobe_opcode_t *) regs->regs[reg_nr];
+		} else if (OPCODE_BRA(p->opcode) || OPCODE_BSR(p->opcode)) {
+			unsigned long disp = (p->opcode & 0x0FFF);
+			saved_next_opcode.addr =
+			    (kprobe_opcode_t *) (regs->pc + 4 + disp * 2);
+
+		} else if (OPCODE_BRAF(p->opcode) || OPCODE_BSRF(p->opcode)) {
+			unsigned int reg_nr = ((p->opcode >> 8) & 0x000F);
+			saved_next_opcode.addr =
+			    (kprobe_opcode_t *) (regs->pc + 4 +
+						 regs->regs[reg_nr]);
+
+		} else if (OPCODE_RTS(p->opcode)) {
+			saved_next_opcode.addr = (kprobe_opcode_t *) regs->pr;
+
+		} else if (OPCODE_BF(p->opcode) || OPCODE_BT(p->opcode)) {
+			unsigned long disp = (p->opcode & 0x00FF);
+			/* case 1 */
+			saved_next_opcode.addr = p->addr + 1;
+			/* case 2 */
+			saved_next_opcode2.addr =
+			    (kprobe_opcode_t *) (regs->pc + 4 + disp * 2);
+			saved_next_opcode2.opcode = *(saved_next_opcode2.addr);
+			arch_arm_kprobe(&saved_next_opcode2);
+
+		} else if (OPCODE_BF_S(p->opcode) || OPCODE_BT_S(p->opcode)) {
+			unsigned long disp = (p->opcode & 0x00FF);
+			/* case 1 */
+			saved_next_opcode.addr = p->addr + 2;
+			/* case 2 */
+			saved_next_opcode2.addr =
+			    (kprobe_opcode_t *) (regs->pc + 4 + disp * 2);
+			saved_next_opcode2.opcode = *(saved_next_opcode2.addr);
+			arch_arm_kprobe(&saved_next_opcode2);
+
+		} else {
+			saved_next_opcode.addr = p->addr + 1;
+		}
+
+		saved_next_opcode.opcode = *(saved_next_opcode.addr);
+		arch_arm_kprobe(&saved_next_opcode);
+	}
+}
+
+/* Called with kretprobe_lock held */
+void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
+				      struct pt_regs *regs)
+{
+	ri->ret_addr = (kprobe_opcode_t *) regs->pr;
+
+	/* Replace the return addr with trampoline addr */
+	regs->pr = (unsigned long)kretprobe_trampoline;
+}
+
+static int __kprobes kprobe_handler(struct pt_regs *regs)
+{
+	struct kprobe *p;
+	int ret = 0;
+	kprobe_opcode_t *addr = NULL;
+	struct kprobe_ctlblk *kcb;
+
+	/*
+	 * We don't want to be preempted for the entire
+	 * duration of kprobe processing
+	 */
+	preempt_disable();
+	kcb = get_kprobe_ctlblk();
+
+	addr = (kprobe_opcode_t *) (regs->pc);
+
+	/* Check we're not actually recursing */
+	if (kprobe_running()) {
+		p = get_kprobe(addr);
+		if (p) {
+			if (kcb->kprobe_status == KPROBE_HIT_SS &&
+			    *p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
+				goto no_kprobe;
+			}
+			/* We have reentered the kprobe_handler(), since
+			 * another probe was hit while within the handler.
+			 * We here save the original kprobes variables and
+			 * just single step on the instruction of the new probe
+			 * without calling any user handlers.
+			 */
+			save_previous_kprobe(kcb);
+			set_current_kprobe(p, regs, kcb);
+			kprobes_inc_nmissed_count(p);
+			prepare_singlestep(p, regs);
+			kcb->kprobe_status = KPROBE_REENTER;
+			return 1;
+		} else {
+			p = __get_cpu_var(current_kprobe);
+			if (p->break_handler && p->break_handler(p, regs)) {
+				goto ss_probe;
+			}
+		}
+		goto no_kprobe;
+	}
+
+	p = get_kprobe(addr);
+	if (!p) {
+		/* Not one of ours: let kernel handle it */
+		goto no_kprobe;
+	}
+
+	set_current_kprobe(p, regs, kcb);
+	kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+
+	if (p->pre_handler && p->pre_handler(p, regs))
+		/* handler has already set things up, so skip ss setup */
+		return 1;
+
+      ss_probe:
+	prepare_singlestep(p, regs);
+	kcb->kprobe_status = KPROBE_HIT_SS;
+	return 1;
+
+      no_kprobe:
+	preempt_enable_no_resched();
+	return ret;
+}
+
+/*
+ * For function-return probes, init_kprobes() establishes a probepoint
+ * here. When a retprobed function returns, this probe is hit and
+ * trampoline_probe_handler() runs, calling the kretprobe's handler.
+ */
+void kretprobe_trampoline_holder(void)
+{
+	asm volatile ("kretprobe_trampoline: \n" "nop\n");
+}
+
+/*
+ * Called when we hit the probe point at kretprobe_trampoline
+ */
+int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct kretprobe_instance *ri = NULL;
+	struct hlist_head *head, empty_rp;
+	struct hlist_node *node, *tmp;
+	unsigned long flags, orig_ret_address = 0;
+	unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
+
+	INIT_HLIST_HEAD(&empty_rp);
+	kretprobe_hash_lock(current, &head, &flags);
+
+	/*
+	 * It is possible to have multiple instances associated with a given
+	 * task either because an multiple functions in the call path
+	 * have a return probe installed on them, and/or more then one return
+	 * return probe was registered for a target function.
+	 *
+	 * We can handle this because:
+	 *     - instances are always inserted at the head of the list
+	 *     - when multiple return probes are registered for the same
+	 *       function, the first instance's ret_addr will point to the
+	 *       real return address, and all the rest will point to
+	 *       kretprobe_trampoline
+	 */
+	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+		if (ri->task != current)
+			/* another task is sharing our hash bucket */
+			continue;
+
+		if (ri->rp && ri->rp->handler) {
+			__get_cpu_var(current_kprobe) = &ri->rp->kp;
+			ri->rp->handler(ri, regs);
+			__get_cpu_var(current_kprobe) = NULL;
+		}
+
+		orig_ret_address = (unsigned long)ri->ret_addr;
+		recycle_rp_inst(ri, &empty_rp);
+
+		if (orig_ret_address != trampoline_address)
+			/*
+			 * This is the real return address. Any other
+			 * instances associated with this task are for
+			 * other calls deeper on the call stack
+			 */
+			break;
+	}
+
+	kretprobe_assert(ri, orig_ret_address, trampoline_address);
+
+	regs->pc = orig_ret_address;
+	kretprobe_hash_unlock(current, &flags);
+
+	preempt_enable_no_resched();
+
+	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+		hlist_del(&ri->hlist);
+		kfree(ri);
+	}
+
+	return orig_ret_address;
+}
+
+static inline int post_kprobe_handler(struct pt_regs *regs)
+{
+	struct kprobe *cur = kprobe_running();
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	kprobe_opcode_t *addr = NULL;
+	struct kprobe *p = NULL;
+
+	if (!cur)
+		return 0;
+
+	if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
+		kcb->kprobe_status = KPROBE_HIT_SSDONE;
+		cur->post_handler(cur, regs, 0);
+	}
+
+	if (saved_next_opcode.addr != 0x0) {
+		arch_disarm_kprobe(&saved_next_opcode);
+		saved_next_opcode.addr = 0x0;
+		saved_next_opcode.opcode = 0x0;
+
+		addr = saved_current_opcode.addr;
+		saved_current_opcode.addr = 0x0;
+
+		p = get_kprobe(addr);
+		arch_arm_kprobe(p);
+
+		if (saved_next_opcode2.addr != 0x0) {
+			arch_disarm_kprobe(&saved_next_opcode2);
+			saved_next_opcode2.addr = 0x0;
+			saved_next_opcode2.opcode = 0x0;
+		}
+	}
+
+	/*Restore back the original saved kprobes variables and continue. */
+	if (kcb->kprobe_status == KPROBE_REENTER) {
+		restore_previous_kprobe(kcb);
+		goto out;
+	}
+	reset_current_kprobe();
+
+      out:
+	preempt_enable_no_resched();
+
+	return 1;
+}
+
+static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+	struct kprobe *cur = kprobe_running();
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	const struct exception_table_entry *entry;
+
+	switch (kcb->kprobe_status) {
+	case KPROBE_HIT_SS:
+	case KPROBE_REENTER:
+		/*
+		 * We are here because the instruction being single
+		 * stepped caused a page fault. We reset the current
+		 * kprobe, point the pc back to the probe address
+		 * and allow the page fault handler to continue as a
+		 * normal page fault.
+		 */
+		regs->pc = (unsigned long)cur->addr;
+		if (kcb->kprobe_status == KPROBE_REENTER)
+			restore_previous_kprobe(kcb);
+		else
+			reset_current_kprobe();
+		preempt_enable_no_resched();
+		break;
+	case KPROBE_HIT_ACTIVE:
+	case KPROBE_HIT_SSDONE:
+		/*
+		 * We increment the nmissed count for accounting,
+		 * we can also use npre/npostfault count for accounting
+		 * these specific fault cases.
+		 */
+		kprobes_inc_nmissed_count(cur);
+
+		/*
+		 * We come here because instructions in the pre/post
+		 * handler caused the page_fault, this could happen
+		 * if handler tries to access user space by
+		 * copy_from_user(), get_user() etc. Let the
+		 * user-specified handler try to fix it first.
+		 */
+		if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
+			return 1;
+
+		/*
+		 * In case the user-specified fault handler returned
+		 * zero, try to fix up.
+		 */
+		if ((entry = search_exception_tables(regs->pc)) != NULL) {
+			regs->pc = entry->fixup;
+			return 1;
+		}
+
+		/*
+		 * fixup_exception() could not handle it,
+		 * Let do_page_fault() fix it.
+		 */
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+/*
+ * Wrapper routine to for handling exceptions.
+ */
+int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
+				       unsigned long val, void *data)
+{
+	struct kprobe *p = NULL;
+	struct die_args *args = (struct die_args *)data;
+	int ret = NOTIFY_DONE;
+	kprobe_opcode_t *addr = NULL;
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	addr = (kprobe_opcode_t *) (args->regs->pc);
+	if (val == DIE_TRAP) {
+		if (!kprobe_running()) {
+			if (kprobe_handler(args->regs)) {
+				ret = NOTIFY_STOP;
+			} else {
+				/* Not a kprobe trap */
+				force_sig(SIGTRAP, current);
+			}
+		} else {
+			p = get_kprobe(addr);
+			if ((kcb->kprobe_status == KPROBE_HIT_SS) ||
+			    (kcb->kprobe_status == KPROBE_REENTER)) {
+				if (post_kprobe_handler(args->regs))
+					ret = NOTIFY_STOP;
+			} else {
+				if (kprobe_handler(args->regs)) {
+					ret = NOTIFY_STOP;
+				} else {
+					p = __get_cpu_var(current_kprobe);
+					if (p->break_handler
+					    && p->break_handler(p, args->regs))
+						ret = NOTIFY_STOP;
+				}
+			}
+		}
+	}
+
+	return ret;
+}
+
+int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct jprobe *jp = container_of(p, struct jprobe, kp);
+	unsigned long addr;
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	kcb->jprobe_saved_regs = *regs;
+	kcb->jprobe_saved_r15 = regs->regs[15];
+	addr = kcb->jprobe_saved_r15;
+
+	/*
+	 * TBD: As Linus pointed out, gcc assumes that the callee
+	 * owns the argument space and could overwrite it, e.g.
+	 * tailcall optimization. So, to be absolutely safe
+	 * we also save and restore enough stack bytes to cover
+	 * the argument area.
+	 */
+	memcpy(kcb->jprobes_stack, (kprobe_opcode_t *) addr,
+	       MIN_STACK_SIZE(addr));
+
+	regs->pc = (unsigned long)(jp->entry);
+
+	return 1;
+}
+
+void __kprobes jprobe_return(void)
+{
+	__asm("trapa #-1\n\t" "jprobe_return_end:\n\t" "nop\n\t");
+
+}
+
+int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	u8 *addr = (u8 *) regs->pc;
+	unsigned long stack_addr = kcb->jprobe_saved_r15;
+
+	if ((addr >= (u8 *) jprobe_return)
+	    && (addr <= (u8 *) jprobe_return_end)) {
+		*regs = kcb->jprobe_saved_regs;
+
+		memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack,
+		       MIN_STACK_SIZE(stack_addr));
+
+		kcb->kprobe_status = KPROBE_HIT_SS;
+		return 1;
+	}
+	return 0;
+}
+
+static struct kprobe trampoline_p = {
+	.addr = (kprobe_opcode_t *) &kretprobe_trampoline,
+	.pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init_kprobes(void)
+{
+	saved_next_opcode.addr = 0x0;
+	saved_next_opcode.opcode = 0x0;
+
+	saved_current_opcode.addr = 0x0;
+	saved_current_opcode.opcode = 0x0;
+
+	saved_next_opcode2.addr = 0x0;
+	saved_next_opcode2.opcode = 0x0;
+
+	return register_kprobe(&trampoline_p);
+}
diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c
index 4901f6732162..862667a341fd 100644
--- a/arch/sh/kernel/traps_32.c
+++ b/arch/sh/kernel/traps_32.c
@@ -26,6 +26,7 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/fpu.h>
+#include <asm/kprobes.h>
 
 #ifdef CONFIG_SH_KGDB
 #include <asm/kgdb.h>
@@ -743,6 +744,10 @@ asmlinkage void do_illegal_slot_inst(unsigned long r4, unsigned long r5,
 	struct pt_regs *regs = RELOC_HIDE(&__regs, 0);
 	unsigned long error_code;
 	struct task_struct *tsk = current;
+
+	if (kprobe_handle_illslot(regs->pc) == 0)
+		return;
+
 #ifdef CONFIG_SH_FPU_EMU
 	unsigned short inst = 0;
 
-- 
GitLab


From 6907e6a601a4fd442535d977886bbd3761aa2b56 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 5 Sep 2008 17:27:37 +0900
Subject: [PATCH 075/895] sh: Add the rest of the boot targets to
 arch/sh/boot/.gitignore.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/boot/.gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/sh/boot/.gitignore b/arch/sh/boot/.gitignore
index b6718de23693..aad5edddf93b 100644
--- a/arch/sh/boot/.gitignore
+++ b/arch/sh/boot/.gitignore
@@ -1 +1,4 @@
 zImage
+vmlinux.srec
+uImage
+uImage.srec
-- 
GitLab


From 205a3b4328de1c8ddd99ddd5092bed1344068213 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 5 Sep 2008 18:00:29 +0900
Subject: [PATCH 076/895] sh: uninline flush_icache_all().

This uses jump_to_uncached() which is now given the noinline attribute
due to the special section mapping. Kill off the inline attribute to
fix up compilation failure.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/mm/cache-sh4.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c
index 1fdc8d90254a..5cfe08dbb59e 100644
--- a/arch/sh/mm/cache-sh4.c
+++ b/arch/sh/mm/cache-sh4.c
@@ -261,7 +261,7 @@ void flush_dcache_page(struct page *page)
 }
 
 /* TODO: Selective icache invalidation through IC address array.. */
-static inline void __uses_jump_to_uncached flush_icache_all(void)
+static void __uses_jump_to_uncached flush_icache_all(void)
 {
 	unsigned long flags, ccr;
 
-- 
GitLab


From 53abf911fa6753dfbd6775ae541fb2f8b9f5b825 Mon Sep 17 00:00:00 2001
From: Luca Santini <luca.santini@spesonline.com>
Date: Mon, 8 Sep 2008 11:54:56 +0900
Subject: [PATCH 077/895] sh: Enable IRLM mode for SH7760 IRQ_MODE_IRQ.

Follows the same setting as SH7750.

Signed-off-by: Luca Santini <luca.santini@spesonline.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/cpu/sh4/setup-sh7760.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/sh/kernel/cpu/sh4/setup-sh7760.c b/arch/sh/kernel/cpu/sh4/setup-sh7760.c
index 254c5c55ab91..d9bdc931ac09 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh7760.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh7760.c
@@ -11,6 +11,7 @@
 #include <linux/init.h>
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
+#include <linux/io.h>
 
 enum {
 	UNUSED = 0,
@@ -178,10 +179,14 @@ static int __init sh7760_devices_setup(void)
 }
 __initcall(sh7760_devices_setup);
 
+#define INTC_ICR	0xffd00000UL
+#define INTC_ICR_IRLM	(1 << 7)
+
 void __init plat_irq_setup_pins(int mode)
 {
 	switch (mode) {
 	case IRQ_MODE_IRQ:
+		ctrl_outw(ctrl_inw(INTC_ICR) | INTC_ICR_IRLM, INTC_ICR);
 		register_intc_controller(&intc_desc_irq);
 		break;
 	default:
-- 
GitLab


From 3db9170880241d63aae7ab86a03aa15418c3e5c6 Mon Sep 17 00:00:00 2001
From: Luca Santini <luca.santini@spesonline.com>
Date: Mon, 8 Sep 2008 12:01:15 +0900
Subject: [PATCH 078/895] sh: Add Renesas EDOSK7760 board support.

This adds support for the Renesas (RTE) EDOSK7760 board. Currently
supported devices are:

	 - ramdisk support
	 - ethernet support
	 - nfs support
	 - ext2/ext3 support
	 - i2c support
	 - fb support (M)

Signed-off-by: Luca Santini <luca.santini@spesonline.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/boards/Kconfig              |    7 +
 arch/sh/boards/Makefile             |    1 +
 arch/sh/boards/board-edosk7760.c    |  144 ++++
 arch/sh/configs/edosk7760_defconfig | 1050 +++++++++++++++++++++++++++
 4 files changed, 1202 insertions(+)
 create mode 100644 arch/sh/boards/board-edosk7760.c
 create mode 100644 arch/sh/configs/edosk7760_defconfig

diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig
index ae194869fd60..c6b21e82755e 100644
--- a/arch/sh/boards/Kconfig
+++ b/arch/sh/boards/Kconfig
@@ -184,6 +184,13 @@ config SH_EDOSK7705
 	bool "EDOSK7705"
 	depends on CPU_SUBTYPE_SH7705
 
+config SH_EDOSK7760
+	bool "EDOSK7760"
+	depends on CPU_SUBTYPE_SH7760
+	help
+	  Select if configuring for a Renesas EDOSK7760
+	  evaluation board.
+
 config SH_SH4202_MICRODEV
 	bool "SH4-202 MicroDev"
 	depends on CPU_SUBTYPE_SH4_202
diff --git a/arch/sh/boards/Makefile b/arch/sh/boards/Makefile
index 463022c7df3c..d9efa3923721 100644
--- a/arch/sh/boards/Makefile
+++ b/arch/sh/boards/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_SH_MAGIC_PANEL_R2)	+= board-magicpanelr2.o
 obj-$(CONFIG_SH_RSK7203)	+= board-rsk7203.o
 obj-$(CONFIG_SH_SH7785LCR)	+= board-sh7785lcr.o
 obj-$(CONFIG_SH_SHMIN)		+= board-shmin.o
+obj-$(CONFIG_SH_EDOSK7760)	+= board-edosk7760.o
diff --git a/arch/sh/boards/board-edosk7760.c b/arch/sh/boards/board-edosk7760.c
new file mode 100644
index 000000000000..7cc5e1143522
--- /dev/null
+++ b/arch/sh/boards/board-edosk7760.c
@@ -0,0 +1,144 @@
+/*
+ * Renesas Europe EDOSK7760 Board Support
+ *
+ * Copyright (C) 2008 SPES Societa' Progettazione Elettronica e Software Ltd.
+ * Author: Luca Santini <luca.santini@spesonline.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/platform_device.h>
+#include <linux/smc91x.h>
+#include <linux/interrupt.h>
+#include <linux/i2c.h>
+#include <asm/machvec.h>
+#include <asm/io.h>
+#include <asm/addrspace.h>
+#include <asm/delay.h>
+#include <asm/i2c-sh7760.h>
+
+/* Bus state controller registers for CS4 area */
+#define BSC_CS4BCR	0xA4FD0010
+#define BSC_CS4WCR	0xA4FD0030
+
+#define SMC_IOBASE	0xA2000000
+#define SMC_IO_OFFSET	0x300
+#define SMC_IOADDR	(SMC_IOBASE + SMC_IO_OFFSET)
+
+#define ETHERNET_IRQ	5
+
+/* i2c initialization functions */
+static struct sh7760_i2c_platdata i2c_pd = {
+	.speed_khz	= 400,
+};
+
+static struct resource sh7760_i2c1_res[] = {
+	{
+		.start	= SH7760_I2C1_MMIO,
+		.end	= SH7760_I2C1_MMIOEND,
+		.flags	= IORESOURCE_MEM,
+	},{
+		.start	= SH7760_I2C1_IRQ,
+		.end	= SH7760_I2C1_IRQ,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device sh7760_i2c1_dev = {
+	.dev    = {
+		.platform_data	= &i2c_pd,
+	},
+
+	.name		= SH7760_I2C_DEVNAME,
+	.id		= 1,
+	.resource	= sh7760_i2c1_res,
+	.num_resources	= ARRAY_SIZE(sh7760_i2c1_res),
+};
+
+static struct resource sh7760_i2c0_res[] = {
+	{
+		.start	= SH7760_I2C0_MMIO,
+		.end	= SH7760_I2C0_MMIOEND,
+		.flags	= IORESOURCE_MEM,
+	}, {
+		.start	= SH7760_I2C0_IRQ,
+		.end	= SH7760_I2C0_IRQ,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device sh7760_i2c0_dev = {
+	.dev    = {
+		.platform_data	= &i2c_pd,
+	},
+	.name		= SH7760_I2C_DEVNAME,
+	.id		= 0,
+	.resource	= sh7760_i2c0_res,
+	.num_resources	= ARRAY_SIZE(sh7760_i2c0_res),
+};
+
+/* eth initialization functions */
+static struct smc91x_platdata smc91x_info = {
+	.flags = SMC91X_USE_16BIT | SMC91X_IO_SHIFT_1 | IORESOURCE_IRQ_LOWLEVEL,
+};
+
+static struct resource smc91x_res[] = {
+	[0] = {
+		.start	= SMC_IOADDR,
+		.end	= SMC_IOADDR + 0x1f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= ETHERNET_IRQ,
+		.end	= ETHERNET_IRQ,
+		.flags	= IORESOURCE_IRQ ,
+	}
+};
+
+static struct platform_device smc91x_dev = {
+	.name		= "smc91x",
+	.id		= -1,
+	.num_resources	= ARRAY_SIZE(smc91x_res),
+	.resource	= smc91x_res,
+
+	.dev	= {
+		.platform_data	= &smc91x_info,
+	},
+};
+
+/* platform init code */
+static struct platform_device *edosk7760_devices[] __initdata = {
+	&sh7760_i2c0_dev,
+	&sh7760_i2c1_dev,
+	&smc91x_dev,
+};
+
+static int __init init_edosk7760_devices(void)
+{
+	plat_irq_setup_pins(IRQ_MODE_IRQ);
+
+	return platform_add_devices(edosk7760_devices,
+				    ARRAY_SIZE(edosk7760_devices));
+}
+__initcall(init_edosk7760_devices);
+
+/*
+ * The Machine Vector
+ */
+struct sh_machine_vector mv_edosk7760 __initmv = {
+	.mv_name	= "EDOSK7760",
+	.mv_nr_irqs	= 128,
+};
diff --git a/arch/sh/configs/edosk7760_defconfig b/arch/sh/configs/edosk7760_defconfig
new file mode 100644
index 000000000000..f8ec32a5d4ae
--- /dev/null
+++ b/arch/sh/configs/edosk7760_defconfig
@@ -0,0 +1,1050 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.26
+# Tue Aug 26 11:36:09 2008
+#
+CONFIG_SUPERH=y
+CONFIG_SUPERH32=y
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+CONFIG_GENERIC_BUG=y
+CONFIG_GENERIC_FIND_NEXT_BIT=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_CLOCKEVENTS=y
+CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_LOCKDEP_SUPPORT=y
+# CONFIG_ARCH_HAS_ILOG2_U32 is not set
+# CONFIG_ARCH_HAS_ILOG2_U64 is not set
+CONFIG_ARCH_NO_VIRT_TO_BUS=y
+CONFIG_ARCH_SUPPORTS_AOUT=y
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_LOCK_KERNEL=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION="_edosk7760"
+CONFIG_LOCALVERSION_AUTO=y
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_BSD_PROCESS_ACCT=y
+# CONFIG_BSD_PROCESS_ACCT_V3 is not set
+# CONFIG_TASKSTATS is not set
+# CONFIG_AUDIT is not set
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=17
+# CONFIG_CGROUPS is not set
+# CONFIG_GROUP_SCHED is not set
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+# CONFIG_RELAY is not set
+# CONFIG_NAMESPACES is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE=""
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_SYSCTL=y
+CONFIG_EMBEDDED=y
+CONFIG_UID16=y
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_SYSCTL_SYSCALL_CHECK=y
+CONFIG_KALLSYMS=y
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_COMPAT_BRK=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_ANON_INODES=y
+CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_TIMERFD=y
+CONFIG_EVENTFD=y
+CONFIG_SHMEM=y
+CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_SLUB_DEBUG=y
+# CONFIG_SLAB is not set
+CONFIG_SLUB=y
+# CONFIG_SLOB is not set
+# CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
+CONFIG_HAVE_OPROFILE=y
+# CONFIG_HAVE_KPROBES is not set
+# CONFIG_HAVE_KRETPROBES is not set
+# CONFIG_HAVE_DMA_ATTRS is not set
+CONFIG_PROC_PAGE_MONITOR=y
+CONFIG_SLABINFO=y
+CONFIG_RT_MUTEXES=y
+# CONFIG_TINY_SHMEM is not set
+CONFIG_BASE_SMALL=0
+CONFIG_MODULES=y
+# CONFIG_MODULE_FORCE_LOAD is not set
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+CONFIG_KMOD=y
+CONFIG_BLOCK=y
+# CONFIG_LBD is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_LSF is not set
+# CONFIG_BLK_DEV_BSG is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+# CONFIG_DEFAULT_AS is not set
+# CONFIG_DEFAULT_DEADLINE is not set
+CONFIG_DEFAULT_CFQ=y
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="cfq"
+CONFIG_CLASSIC_RCU=y
+
+#
+# System type
+#
+CONFIG_CPU_SH4=y
+# CONFIG_CPU_SUBTYPE_SH7619 is not set
+# CONFIG_CPU_SUBTYPE_SH7203 is not set
+# CONFIG_CPU_SUBTYPE_SH7206 is not set
+# CONFIG_CPU_SUBTYPE_SH7263 is not set
+# CONFIG_CPU_SUBTYPE_MXG is not set
+# CONFIG_CPU_SUBTYPE_SH7705 is not set
+# CONFIG_CPU_SUBTYPE_SH7706 is not set
+# CONFIG_CPU_SUBTYPE_SH7707 is not set
+# CONFIG_CPU_SUBTYPE_SH7708 is not set
+# CONFIG_CPU_SUBTYPE_SH7709 is not set
+# CONFIG_CPU_SUBTYPE_SH7710 is not set
+# CONFIG_CPU_SUBTYPE_SH7712 is not set
+# CONFIG_CPU_SUBTYPE_SH7720 is not set
+# CONFIG_CPU_SUBTYPE_SH7721 is not set
+# CONFIG_CPU_SUBTYPE_SH7750 is not set
+# CONFIG_CPU_SUBTYPE_SH7091 is not set
+# CONFIG_CPU_SUBTYPE_SH7750R is not set
+# CONFIG_CPU_SUBTYPE_SH7750S is not set
+# CONFIG_CPU_SUBTYPE_SH7751 is not set
+# CONFIG_CPU_SUBTYPE_SH7751R is not set
+CONFIG_CPU_SUBTYPE_SH7760=y
+# CONFIG_CPU_SUBTYPE_SH4_202 is not set
+# CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7763 is not set
+# CONFIG_CPU_SUBTYPE_SH7770 is not set
+# CONFIG_CPU_SUBTYPE_SH7780 is not set
+# CONFIG_CPU_SUBTYPE_SH7785 is not set
+# CONFIG_CPU_SUBTYPE_SHX3 is not set
+# CONFIG_CPU_SUBTYPE_SH7343 is not set
+# CONFIG_CPU_SUBTYPE_SH7722 is not set
+# CONFIG_CPU_SUBTYPE_SH7366 is not set
+# CONFIG_CPU_SUBTYPE_SH5_101 is not set
+# CONFIG_CPU_SUBTYPE_SH5_103 is not set
+
+#
+# Memory management options
+#
+CONFIG_QUICKLIST=y
+CONFIG_MMU=y
+CONFIG_PAGE_OFFSET=0x80000000
+CONFIG_MEMORY_START=0x0c000000
+CONFIG_MEMORY_SIZE=0x04000000
+CONFIG_29BIT=y
+CONFIG_VSYSCALL=y
+CONFIG_ARCH_FLATMEM_ENABLE=y
+CONFIG_ARCH_SPARSEMEM_ENABLE=y
+CONFIG_ARCH_SPARSEMEM_DEFAULT=y
+CONFIG_MAX_ACTIVE_REGIONS=1
+CONFIG_ARCH_POPULATES_NODE_MAP=y
+CONFIG_ARCH_SELECT_MEMORY_MODEL=y
+CONFIG_PAGE_SIZE_4KB=y
+# CONFIG_PAGE_SIZE_8KB is not set
+# CONFIG_PAGE_SIZE_64KB is not set
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+CONFIG_SPARSEMEM_STATIC=y
+# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set
+CONFIG_PAGEFLAGS_EXTENDED=y
+CONFIG_SPLIT_PTLOCK_CPUS=4
+# CONFIG_RESOURCES_64BIT is not set
+CONFIG_ZONE_DMA_FLAG=0
+CONFIG_NR_QUICK=2
+
+#
+# Cache configuration
+#
+# CONFIG_SH_DIRECT_MAPPED is not set
+CONFIG_CACHE_WRITEBACK=y
+# CONFIG_CACHE_WRITETHROUGH is not set
+# CONFIG_CACHE_OFF is not set
+
+#
+# Processor features
+#
+CONFIG_CPU_LITTLE_ENDIAN=y
+# CONFIG_CPU_BIG_ENDIAN is not set
+CONFIG_SH_FPU=y
+CONFIG_SH_STORE_QUEUES=y
+CONFIG_CPU_HAS_INTEVT=y
+CONFIG_CPU_HAS_SR_RB=y
+CONFIG_CPU_HAS_PTEA=y
+CONFIG_CPU_HAS_FPU=y
+
+#
+# Board support
+#
+CONFIG_SH_EDOSK7760=y
+
+#
+# Timer and clock configuration
+#
+CONFIG_SH_TMU=y
+CONFIG_SH_TIMER_IRQ=16
+CONFIG_SH_PCLK_FREQ=33333333
+CONFIG_TICK_ONESHOT=y
+# CONFIG_NO_HZ is not set
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+
+#
+# CPU Frequency scaling
+#
+# CONFIG_CPU_FREQ is not set
+
+#
+# DMA support
+#
+CONFIG_SH_DMA_API=y
+CONFIG_SH_DMA=y
+CONFIG_NR_ONCHIP_DMA_CHANNELS=4
+# CONFIG_NR_DMA_CHANNELS_BOOL is not set
+# CONFIG_SH_DMABRG is not set
+
+#
+# Companion Chips
+#
+
+#
+# Additional SuperH Device Drivers
+#
+# CONFIG_HEARTBEAT is not set
+# CONFIG_PUSH_SWITCH is not set
+
+#
+# Kernel features
+#
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
+# CONFIG_HZ_300 is not set
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=250
+# CONFIG_SCHED_HRTICK is not set
+# CONFIG_KEXEC is not set
+# CONFIG_CRASH_DUMP is not set
+# CONFIG_PREEMPT_NONE is not set
+# CONFIG_PREEMPT_VOLUNTARY is not set
+CONFIG_PREEMPT=y
+# CONFIG_PREEMPT_RCU is not set
+CONFIG_GUSA=y
+# CONFIG_GUSA_RB is not set
+
+#
+# Boot options
+#
+CONFIG_ZERO_PAGE_OFFSET=0x00001000
+CONFIG_BOOT_LINK_OFFSET=0x02000000
+# CONFIG_UBC_WAKEUP is not set
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="mem=64M console=ttySC2,115200 root=/dev/nfs rw nfsroot=192.168.0.3:/scripts/filesys ip=192.168.0.4"
+
+#
+# Bus options
+#
+# CONFIG_ARCH_SUPPORTS_MSI is not set
+# CONFIG_PCCARD is not set
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_ELF=y
+# CONFIG_BINFMT_MISC is not set
+
+#
+# Networking
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+CONFIG_IP_PNP=y
+# CONFIG_IP_PNP_DHCP is not set
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IP_PNP_RARP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
+# CONFIG_INET_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+CONFIG_INET_DIAG=y
+CONFIG_INET_TCP_DIAG=y
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_DEFAULT_TCP_CONG="cubic"
+# CONFIG_TCP_MD5SIG is not set
+# CONFIG_IPV6 is not set
+# CONFIG_NETWORK_SECMARK is not set
+# CONFIG_NETFILTER is not set
+# CONFIG_IP_DCCP is not set
+# CONFIG_IP_SCTP is not set
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_NET_SCHED is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_CAN is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_AF_RXRPC is not set
+
+#
+# Wireless
+#
+# CONFIG_CFG80211 is not set
+# CONFIG_WIRELESS_EXT is not set
+# CONFIG_MAC80211 is not set
+# CONFIG_IEEE80211 is not set
+# CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+# CONFIG_FW_LOADER is not set
+CONFIG_DEBUG_DRIVER=y
+CONFIG_DEBUG_DEVRES=y
+# CONFIG_SYS_HYPERVISOR is not set
+# CONFIG_CONNECTOR is not set
+CONFIG_MTD=y
+CONFIG_MTD_DEBUG=y
+CONFIG_MTD_DEBUG_VERBOSE=3
+CONFIG_MTD_CONCAT=y
+CONFIG_MTD_PARTITIONS=y
+# CONFIG_MTD_REDBOOT_PARTS is not set
+# CONFIG_MTD_CMDLINE_PARTS is not set
+# CONFIG_MTD_AR7_PARTS is not set
+
+#
+# User Modules And Translation Layers
+#
+CONFIG_MTD_CHAR=y
+CONFIG_MTD_BLKDEVS=y
+CONFIG_MTD_BLOCK=y
+# CONFIG_FTL is not set
+# CONFIG_NFTL is not set
+# CONFIG_INFTL is not set
+# CONFIG_RFD_FTL is not set
+# CONFIG_SSFDC is not set
+# CONFIG_MTD_OOPS is not set
+
+#
+# RAM/ROM/Flash chip drivers
+#
+CONFIG_MTD_CFI=y
+CONFIG_MTD_JEDECPROBE=y
+CONFIG_MTD_GEN_PROBE=y
+CONFIG_MTD_CFI_ADV_OPTIONS=y
+CONFIG_MTD_CFI_NOSWAP=y
+# CONFIG_MTD_CFI_BE_BYTE_SWAP is not set
+# CONFIG_MTD_CFI_LE_BYTE_SWAP is not set
+CONFIG_MTD_CFI_GEOMETRY=y
+# CONFIG_MTD_MAP_BANK_WIDTH_1 is not set
+CONFIG_MTD_MAP_BANK_WIDTH_2=y
+# CONFIG_MTD_MAP_BANK_WIDTH_4 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
+CONFIG_MTD_CFI_I1=y
+# CONFIG_MTD_CFI_I2 is not set
+# CONFIG_MTD_CFI_I4 is not set
+# CONFIG_MTD_CFI_I8 is not set
+# CONFIG_MTD_OTP is not set
+CONFIG_MTD_CFI_INTELEXT=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_CFI_STAA=y
+CONFIG_MTD_CFI_UTIL=y
+CONFIG_MTD_RAM=y
+CONFIG_MTD_ROM=y
+CONFIG_MTD_ABSENT=y
+
+#
+# Mapping drivers for chip access
+#
+# CONFIG_MTD_COMPLEX_MAPPINGS is not set
+CONFIG_MTD_PHYSMAP=y
+CONFIG_MTD_PHYSMAP_START=0xA0000000
+CONFIG_MTD_PHYSMAP_LEN=0x01000000
+CONFIG_MTD_PHYSMAP_BANKWIDTH=2
+# CONFIG_MTD_PLATRAM is not set
+
+#
+# Self-contained MTD device drivers
+#
+# CONFIG_MTD_SLRAM is not set
+# CONFIG_MTD_PHRAM is not set
+# CONFIG_MTD_MTDRAM is not set
+# CONFIG_MTD_BLOCK2MTD is not set
+
+#
+# Disk-On-Chip Device Drivers
+#
+# CONFIG_MTD_DOC2000 is not set
+# CONFIG_MTD_DOC2001 is not set
+# CONFIG_MTD_DOC2001PLUS is not set
+# CONFIG_MTD_NAND is not set
+# CONFIG_MTD_ONENAND is not set
+
+#
+# UBI - Unsorted block images
+#
+# CONFIG_MTD_UBI is not set
+# CONFIG_PARPORT is not set
+CONFIG_BLK_DEV=y
+# CONFIG_BLK_DEV_COW_COMMON is not set
+# CONFIG_BLK_DEV_LOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=26000
+# CONFIG_BLK_DEV_XIP is not set
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_ATA_OVER_ETH is not set
+# CONFIG_MISC_DEVICES is not set
+CONFIG_HAVE_IDE=y
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+# CONFIG_SCSI is not set
+# CONFIG_SCSI_DMA is not set
+# CONFIG_SCSI_NETLINK is not set
+# CONFIG_ATA is not set
+# CONFIG_MD is not set
+CONFIG_NETDEVICES=y
+# CONFIG_NETDEVICES_MULTIQUEUE is not set
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_MACVLAN is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+# CONFIG_VETH is not set
+# CONFIG_PHYLIB is not set
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=y
+# CONFIG_AX88796 is not set
+# CONFIG_STNIC is not set
+# CONFIG_SMC9194 is not set
+CONFIG_SMC91X=y
+# CONFIG_IBM_NEW_EMAC_ZMII is not set
+# CONFIG_IBM_NEW_EMAC_RGMII is not set
+# CONFIG_IBM_NEW_EMAC_TAH is not set
+# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
+# CONFIG_B44 is not set
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
+
+#
+# Wireless LAN
+#
+# CONFIG_WLAN_PRE80211 is not set
+# CONFIG_WLAN_80211 is not set
+# CONFIG_IWLWIFI_LEDS is not set
+# CONFIG_WAN is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+# CONFIG_ISDN is not set
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+# CONFIG_INPUT_FF_MEMLESS is not set
+# CONFIG_INPUT_POLLDEV is not set
+
+#
+# Userland interfaces
+#
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_EVDEV is not set
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input Device Drivers
+#
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TABLET is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+# CONFIG_VT_HW_CONSOLE_BINDING is not set
+CONFIG_DEVKMEM=y
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+# CONFIG_SERIAL_8250 is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_SH_SCI=y
+CONFIG_SERIAL_SH_SCI_NR_UARTS=3
+CONFIG_SERIAL_SH_SCI_CONSOLE=y
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_UNIX98_PTYS=y
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+# CONFIG_IPMI_HANDLER is not set
+CONFIG_HW_RANDOM=y
+# CONFIG_R3964 is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_TCG_TPM is not set
+CONFIG_I2C=y
+CONFIG_I2C_BOARDINFO=y
+CONFIG_I2C_CHARDEV=y
+
+#
+# I2C Hardware Bus support
+#
+# CONFIG_I2C_OCORES is not set
+# CONFIG_I2C_PARPORT_LIGHT is not set
+# CONFIG_I2C_SIMTEC is not set
+# CONFIG_I2C_TAOS_EVM is not set
+# CONFIG_I2C_STUB is not set
+# CONFIG_I2C_PCA_PLATFORM is not set
+CONFIG_I2C_SH7760=y
+# CONFIG_I2C_SH_MOBILE is not set
+
+#
+# Miscellaneous I2C Chip support
+#
+# CONFIG_DS1682 is not set
+# CONFIG_SENSORS_EEPROM is not set
+# CONFIG_SENSORS_PCF8574 is not set
+# CONFIG_PCF8575 is not set
+# CONFIG_SENSORS_PCF8591 is not set
+# CONFIG_SENSORS_MAX6875 is not set
+# CONFIG_SENSORS_TSL2550 is not set
+CONFIG_I2C_DEBUG_CORE=y
+CONFIG_I2C_DEBUG_ALGO=y
+CONFIG_I2C_DEBUG_BUS=y
+CONFIG_I2C_DEBUG_CHIP=y
+# CONFIG_SPI is not set
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
+# CONFIG_HWMON is not set
+# CONFIG_THERMAL is not set
+# CONFIG_THERMAL_HWMON is not set
+# CONFIG_WATCHDOG is not set
+
+#
+# Sonics Silicon Backplane
+#
+CONFIG_SSB_POSSIBLE=y
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_SM501 is not set
+# CONFIG_HTC_PASIC3 is not set
+
+#
+# Multimedia devices
+#
+
+#
+# Multimedia core support
+#
+# CONFIG_VIDEO_DEV is not set
+# CONFIG_DVB_CORE is not set
+# CONFIG_VIDEO_MEDIA is not set
+
+#
+# Multimedia drivers
+#
+# CONFIG_DAB is not set
+
+#
+# Graphics support
+#
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
+CONFIG_FB=m
+# CONFIG_FIRMWARE_EDID is not set
+# CONFIG_FB_DDC is not set
+CONFIG_FB_CFB_FILLRECT=m
+CONFIG_FB_CFB_COPYAREA=m
+CONFIG_FB_CFB_IMAGEBLIT=m
+# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set
+# CONFIG_FB_SYS_FILLRECT is not set
+# CONFIG_FB_SYS_COPYAREA is not set
+# CONFIG_FB_SYS_IMAGEBLIT is not set
+# CONFIG_FB_FOREIGN_ENDIAN is not set
+# CONFIG_FB_SYS_FOPS is not set
+# CONFIG_FB_SVGALIB is not set
+# CONFIG_FB_MACMODES is not set
+# CONFIG_FB_BACKLIGHT is not set
+# CONFIG_FB_MODE_HELPERS is not set
+CONFIG_FB_TILEBLITTING=y
+
+#
+# Frame buffer hardware drivers
+#
+# CONFIG_FB_MB86290_640X480_16BPP is not set
+# CONFIG_FB_S1D13XXX is not set
+# CONFIG_FB_VIRTUAL is not set
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+
+#
+# Console display driver support
+#
+CONFIG_DUMMY_CONSOLE=y
+# CONFIG_FRAMEBUFFER_CONSOLE is not set
+# CONFIG_LOGO is not set
+
+#
+# Sound
+#
+CONFIG_SOUND=y
+
+#
+# Advanced Linux Sound Architecture
+#
+CONFIG_SND=y
+CONFIG_SND_TIMER=y
+CONFIG_SND_PCM=y
+# CONFIG_SND_SEQUENCER is not set
+# CONFIG_SND_MIXER_OSS is not set
+# CONFIG_SND_PCM_OSS is not set
+# CONFIG_SND_DYNAMIC_MINORS is not set
+# CONFIG_SND_SUPPORT_OLD_API is not set
+# CONFIG_SND_VERBOSE_PROCFS is not set
+CONFIG_SND_VERBOSE_PRINTK=y
+# CONFIG_SND_DEBUG is not set
+
+#
+# Generic devices
+#
+# CONFIG_SND_DUMMY is not set
+# CONFIG_SND_MTPAV is not set
+# CONFIG_SND_SERIAL_U16550 is not set
+# CONFIG_SND_MPU401 is not set
+
+#
+# SUPERH devices
+#
+
+#
+# System on Chip audio support
+#
+CONFIG_SND_SOC=y
+
+#
+# SoC Audio support for SuperH
+#
+
+#
+# ALSA SoC audio for Freescale SOCs
+#
+
+#
+# SoC Audio for the Texas Instruments OMAP
+#
+
+#
+# Open Sound System
+#
+# CONFIG_SOUND_PRIME is not set
+# CONFIG_HID_SUPPORT is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_MMC is not set
+# CONFIG_MEMSTICK is not set
+# CONFIG_NEW_LEDS is not set
+# CONFIG_ACCESSIBILITY is not set
+# CONFIG_RTC_CLASS is not set
+# CONFIG_UIO is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+# CONFIG_EXT2_FS_POSIX_ACL is not set
+# CONFIG_EXT2_FS_SECURITY is not set
+CONFIG_EXT2_FS_XIP=y
+CONFIG_FS_XIP=y
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_XATTR=y
+# CONFIG_EXT3_FS_POSIX_ACL is not set
+# CONFIG_EXT3_FS_SECURITY is not set
+# CONFIG_EXT4DEV_FS is not set
+CONFIG_JBD=y
+CONFIG_FS_MBCACHE=y
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+CONFIG_FS_POSIX_ACL=y
+# CONFIG_XFS_FS is not set
+# CONFIG_OCFS2_FS is not set
+CONFIG_DNOTIFY=y
+CONFIG_INOTIFY=y
+CONFIG_INOTIFY_USER=y
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+CONFIG_GENERIC_ACL=y
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+# CONFIG_MSDOS_FS is not set
+# CONFIG_VFAT_FS is not set
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+# CONFIG_PROC_KCORE is not set
+CONFIG_PROC_SYSCTL=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+# CONFIG_HUGETLBFS is not set
+# CONFIG_HUGETLB_PAGE is not set
+# CONFIG_CONFIGFS_FS is not set
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_JFFS2_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_ROMFS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+CONFIG_NETWORK_FILESYSTEMS=y
+CONFIG_NFS_FS=y
+# CONFIG_NFS_V3 is not set
+# CONFIG_NFS_V4 is not set
+# CONFIG_NFSD is not set
+CONFIG_ROOT_NFS=y
+CONFIG_LOCKD=y
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=y
+# CONFIG_SUNRPC_BIND34 is not set
+# CONFIG_RPCSEC_GSS_KRB5 is not set
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="iso8859-1"
+CONFIG_NLS_CODEPAGE_437=y
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+# CONFIG_NLS_CODEPAGE_850 is not set
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
+# CONFIG_NLS_CODEPAGE_936 is not set
+# CONFIG_NLS_CODEPAGE_950 is not set
+# CONFIG_NLS_CODEPAGE_932 is not set
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_NLS_ISO8859_2 is not set
+# CONFIG_NLS_ISO8859_3 is not set
+# CONFIG_NLS_ISO8859_4 is not set
+# CONFIG_NLS_ISO8859_5 is not set
+# CONFIG_NLS_ISO8859_6 is not set
+# CONFIG_NLS_ISO8859_7 is not set
+# CONFIG_NLS_ISO8859_9 is not set
+# CONFIG_NLS_ISO8859_13 is not set
+# CONFIG_NLS_ISO8859_14 is not set
+CONFIG_NLS_ISO8859_15=y
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
+CONFIG_NLS_UTF8=y
+# CONFIG_DLM is not set
+
+#
+# Kernel hacking
+#
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+CONFIG_PRINTK_TIME=y
+CONFIG_ENABLE_WARN_DEPRECATED=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_FRAME_WARN=1024
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_UNUSED_SYMBOLS=y
+# CONFIG_DEBUG_FS is not set
+# CONFIG_HEADERS_CHECK is not set
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_SHIRQ=y
+CONFIG_DETECT_SOFTLOCKUP=y
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_SCHEDSTATS is not set
+CONFIG_TIMER_STATS=y
+# CONFIG_DEBUG_OBJECTS is not set
+# CONFIG_SLUB_DEBUG_ON is not set
+# CONFIG_SLUB_STATS is not set
+CONFIG_DEBUG_PREEMPT=y
+# CONFIG_DEBUG_RT_MUTEXES is not set
+# CONFIG_RT_MUTEX_TESTER is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_MUTEXES is not set
+# CONFIG_DEBUG_LOCK_ALLOC is not set
+# CONFIG_PROVE_LOCKING is not set
+# CONFIG_LOCK_STAT is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+# CONFIG_DEBUG_KOBJECT is not set
+CONFIG_DEBUG_BUGVERBOSE=y
+CONFIG_DEBUG_INFO=y
+# CONFIG_DEBUG_VM is not set
+# CONFIG_DEBUG_WRITECOUNT is not set
+# CONFIG_DEBUG_LIST is not set
+# CONFIG_DEBUG_SG is not set
+# CONFIG_FRAME_POINTER is not set
+# CONFIG_BOOT_PRINTK_DELAY is not set
+# CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_BACKTRACE_SELF_TEST is not set
+# CONFIG_FAULT_INJECTION is not set
+# CONFIG_SAMPLES is not set
+# CONFIG_SH_STANDARD_BIOS is not set
+CONFIG_EARLY_SCIF_CONSOLE=y
+CONFIG_EARLY_SCIF_CONSOLE_PORT=0xffe80000
+CONFIG_EARLY_PRINTK=y
+# CONFIG_DEBUG_BOOTMEM is not set
+CONFIG_DEBUG_STACKOVERFLOW=y
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_4KSTACKS is not set
+# CONFIG_IRQSTACKS is not set
+# CONFIG_SH_KGDB is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+CONFIG_CRYPTO=y
+
+#
+# Crypto core or helper
+#
+CONFIG_CRYPTO_ALGAPI=y
+# CONFIG_CRYPTO_MANAGER is not set
+# CONFIG_CRYPTO_GF128MUL is not set
+# CONFIG_CRYPTO_NULL is not set
+# CONFIG_CRYPTO_CRYPTD is not set
+# CONFIG_CRYPTO_AUTHENC is not set
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Authenticated Encryption with Associated Data
+#
+# CONFIG_CRYPTO_CCM is not set
+# CONFIG_CRYPTO_GCM is not set
+# CONFIG_CRYPTO_SEQIV is not set
+
+#
+# Block modes
+#
+# CONFIG_CRYPTO_CBC is not set
+# CONFIG_CRYPTO_CTR is not set
+# CONFIG_CRYPTO_CTS is not set
+# CONFIG_CRYPTO_ECB is not set
+# CONFIG_CRYPTO_LRW is not set
+# CONFIG_CRYPTO_PCBC is not set
+# CONFIG_CRYPTO_XTS is not set
+
+#
+# Hash modes
+#
+# CONFIG_CRYPTO_HMAC is not set
+# CONFIG_CRYPTO_XCBC is not set
+
+#
+# Digest
+#
+# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_MD4 is not set
+CONFIG_CRYPTO_MD5=y
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_SHA1 is not set
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
+# CONFIG_CRYPTO_WP512 is not set
+
+#
+# Ciphers
+#
+# CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_CAMELLIA is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+CONFIG_CRYPTO_DES=y
+# CONFIG_CRYPTO_FCRYPT is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_SALSA20 is not set
+# CONFIG_CRYPTO_SEED is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+
+#
+# Compression
+#
+# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_LZO is not set
+CONFIG_CRYPTO_HW=y
+
+#
+# Library routines
+#
+CONFIG_BITREVERSE=y
+# CONFIG_GENERIC_FIND_FIRST_BIT is not set
+# CONFIG_CRC_CCITT is not set
+# CONFIG_CRC16 is not set
+# CONFIG_CRC_ITU_T is not set
+CONFIG_CRC32=y
+# CONFIG_CRC7 is not set
+# CONFIG_LIBCRC32C is not set
+CONFIG_PLIST=y
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAS_DMA=y
-- 
GitLab


From 4ad06dd6f1ec745c5ee0e37ec26b1b817f88507a Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 8 Sep 2008 12:01:55 +0900
Subject: [PATCH 079/895] sh: Add EDOSK7760 mach type.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/tools/mach-types | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/sh/tools/mach-types b/arch/sh/tools/mach-types
index 0a11cc08f0a5..d4fb11f7e2ee 100644
--- a/arch/sh/tools/mach-types
+++ b/arch/sh/tools/mach-types
@@ -30,6 +30,7 @@ HP6XX			SH_HP6XX
 DREAMCAST		SH_DREAMCAST
 SNAPGEAR		SH_SECUREEDGE5410
 EDOSK7705		SH_EDOSK7705
+EDOSK7760		SH_EDOSK7760
 SH4202_MICRODEV		SH_SH4202_MICRODEV
 SH03			SH_SH03
 LANDISK			SH_LANDISK
-- 
GitLab


From e7cb016e5a3163e2999d8715390d64eb46816655 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 8 Sep 2008 12:02:17 +0900
Subject: [PATCH 080/895] sh: Mark kretprobe_trampoline_holder static and
 __used.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/kprobes.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/sh/kernel/kprobes.c b/arch/sh/kernel/kprobes.c
index c4f4a0923eb9..ac6074942fd6 100644
--- a/arch/sh/kernel/kprobes.c
+++ b/arch/sh/kernel/kprobes.c
@@ -277,7 +277,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
  * here. When a retprobed function returns, this probe is hit and
  * trampoline_probe_handler() runs, calling the kretprobe's handler.
  */
-void kretprobe_trampoline_holder(void)
+static void __used kretprobe_trampoline_holder(void)
 {
 	asm volatile ("kretprobe_trampoline: \n" "nop\n");
 }
-- 
GitLab


From fc63562ac2107dfa843f5288fe985fc6f0021c17 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 8 Sep 2008 12:10:35 +0900
Subject: [PATCH 081/895] sh: Disable seccomp support by default.

This was initially checked in with a stupid default of y, while most
everyone is going to want to have this disabled anyways.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 334917a62ecf..c5b08eb3770e 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -496,7 +496,6 @@ config CRASH_DUMP
 config SECCOMP
 	bool "Enable seccomp to safely compute untrusted bytecode"
 	depends on PROC_FS
-	default y
 	help
 	  This kernel feature is useful for number crunching applications
 	  that may need to compute untrusted bytecode during their
-- 
GitLab


From 037c10a612e8b7461e33672fb3848807ac6e2346 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 8 Sep 2008 12:22:47 +0900
Subject: [PATCH 082/895] sh: kprobes: Hook up kprobe_fault_handler() in the
 page fault path.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/kprobes.h |  3 +--
 arch/sh/kernel/kprobes.c      |  2 +-
 arch/sh/mm/fault_32.c         | 29 ++++++++++++++++++++++++++++-
 3 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/arch/sh/include/asm/kprobes.h b/arch/sh/include/asm/kprobes.h
index 70fc629df904..756a5cd96378 100644
--- a/arch/sh/include/asm/kprobes.h
+++ b/arch/sh/include/asm/kprobes.h
@@ -6,8 +6,6 @@
 #include <linux/types.h>
 #include <linux/ptrace.h>
 
-struct pt_regs;
-
 typedef u16 kprobe_opcode_t;
 #define BREAKPOINT_INSTRUCTION	0xc3ff
 
@@ -48,6 +46,7 @@ struct kprobe_ctlblk {
 	struct prev_kprobe prev_kprobe;
 };
 
+extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
 extern int kprobe_exceptions_notify(struct notifier_block *self,
 				    unsigned long val, void *data);
 extern int kprobe_handle_illslot(unsigned long pc);
diff --git a/arch/sh/kernel/kprobes.c b/arch/sh/kernel/kprobes.c
index ac6074942fd6..81a3725e5155 100644
--- a/arch/sh/kernel/kprobes.c
+++ b/arch/sh/kernel/kprobes.c
@@ -393,7 +393,7 @@ static inline int post_kprobe_handler(struct pt_regs *regs)
 	return 1;
 }
 
-static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 {
 	struct kprobe *cur = kprobe_running();
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
diff --git a/arch/sh/mm/fault_32.c b/arch/sh/mm/fault_32.c
index e8efda9846bb..659811c179e6 100644
--- a/arch/sh/mm/fault_32.c
+++ b/arch/sh/mm/fault_32.c
@@ -2,7 +2,7 @@
  * Page fault handler for SH with an MMU.
  *
  *  Copyright (C) 1999  Niibe Yutaka
- *  Copyright (C) 2003 - 2007  Paul Mundt
+ *  Copyright (C) 2003 - 2008  Paul Mundt
  *
  *  Based on linux/arch/i386/mm/fault.c:
  *   Copyright (C) 1995  Linus Torvalds
@@ -21,6 +21,27 @@
 #include <asm/tlbflush.h>
 #include <asm/kgdb.h>
 
+#ifdef CONFIG_KPROBES
+static inline int notify_page_fault(struct pt_regs *regs, int trap)
+{
+	int ret = 0;
+
+	if (!user_mode(regs)) {
+		preempt_disable();
+		if (kprobe_running() && kprobe_fault_handler(regs, trap))
+			ret = 1;
+		preempt_enable();
+	}
+
+	return ret;
+}
+#else
+static inline int notify_page_fault(struct pt_regs *regs, int trap)
+{
+	return 0;
+}
+#endif
+
 /*
  * This routine handles page faults.  It determines the address,
  * and the problem, and then passes it off to one of the appropriate
@@ -37,6 +58,9 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 	int fault;
 	siginfo_t info;
 
+	if (notify_page_fault(regs, writeaccess))
+		return;
+
 #ifdef CONFIG_SH_KGDB
 	if (kgdb_nofault && kgdb_bus_err_hook)
 		kgdb_bus_err_hook();
@@ -269,6 +293,9 @@ asmlinkage int __kprobes __do_page_fault(struct pt_regs *regs,
 	pte_t *pte;
 	pte_t entry;
 
+	if (notify_page_fault(regs, writeaccess))
+		return 0;
+
 #ifdef CONFIG_SH_KGDB
 	if (kgdb_nofault && kgdb_bus_err_hook)
 		kgdb_bus_err_hook();
-- 
GitLab


From 174b5c9923e0170c844e03d55a9f3fb3b329a8b7 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 8 Sep 2008 18:10:10 +0900
Subject: [PATCH 083/895] sh: kprobes: Use trapa #0x3a for breakpoint trap.

Not all parts support trapa #0xff, so use something within the debug trap
range that's accessible on all parts.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/kprobes.h | 2 +-
 arch/sh/kernel/kprobes.c      | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/sh/include/asm/kprobes.h b/arch/sh/include/asm/kprobes.h
index 756a5cd96378..6078d8e551d4 100644
--- a/arch/sh/include/asm/kprobes.h
+++ b/arch/sh/include/asm/kprobes.h
@@ -7,7 +7,7 @@
 #include <linux/ptrace.h>
 
 typedef u16 kprobe_opcode_t;
-#define BREAKPOINT_INSTRUCTION	0xc3ff
+#define BREAKPOINT_INSTRUCTION	0xc33a
 
 #define MAX_INSN_SIZE 16
 #define MAX_STACK_SIZE 64
diff --git a/arch/sh/kernel/kprobes.c b/arch/sh/kernel/kprobes.c
index 81a3725e5155..fdd049e9ad86 100644
--- a/arch/sh/kernel/kprobes.c
+++ b/arch/sh/kernel/kprobes.c
@@ -525,8 +525,7 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 
 void __kprobes jprobe_return(void)
 {
-	__asm("trapa #-1\n\t" "jprobe_return_end:\n\t" "nop\n\t");
-
+	asm volatile ("trapa #0x3a\n\t" "jprobe_return_end:\n\t" "nop\n\t");
 }
 
 int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
-- 
GitLab


From ee386de77419f9fedf206d84c4d4b2de0ead5bcb Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 8 Sep 2008 18:12:33 +0900
Subject: [PATCH 084/895] sh: kprobes: Default to NOTIFY_DONE for unhandled
 debug traps.

Presently this is doing a force_sig() SIGTRAP, which is already taken
care of in the generic code if no one asserts NOTIFY_STOP. Switch the
default return to NOTIFY_DONE in the case of unhandled traps, so that
the same trap may pass through to other users on the same die chain.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/kprobes.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/sh/kernel/kprobes.c b/arch/sh/kernel/kprobes.c
index fdd049e9ad86..75accf9b4209 100644
--- a/arch/sh/kernel/kprobes.c
+++ b/arch/sh/kernel/kprobes.c
@@ -474,7 +474,7 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
 				ret = NOTIFY_STOP;
 			} else {
 				/* Not a kprobe trap */
-				force_sig(SIGTRAP, current);
+				ret = NOTIFY_DONE;
 			}
 		} else {
 			p = get_kprobe(addr);
-- 
GitLab


From 247bc6d2ae3e2de08529977952c7d085f9d562d4 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 8 Sep 2008 18:14:50 +0900
Subject: [PATCH 085/895] sh: kprobes: Fix up a preemption imbalance on jprobe
 return.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/kprobes.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/sh/kernel/kprobes.c b/arch/sh/kernel/kprobes.c
index 75accf9b4209..f0e1c78d0bff 100644
--- a/arch/sh/kernel/kprobes.c
+++ b/arch/sh/kernel/kprobes.c
@@ -542,6 +542,7 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 		       MIN_STACK_SIZE(stack_addr));
 
 		kcb->kprobe_status = KPROBE_HIT_SS;
+		preempt_enable_no_resched();
 		return 1;
 	}
 	return 0;
-- 
GitLab


From 734db3770de03fbe9ae4e78519a7d1678189788c Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 8 Sep 2008 18:15:55 +0900
Subject: [PATCH 086/895] sh: kprobes: Fix up race against probe point removal.

Handle a corner case where another CPU or debugger removes the probe
point from underneath us.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/kprobes.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/arch/sh/kernel/kprobes.c b/arch/sh/kernel/kprobes.c
index f0e1c78d0bff..a478ba78e752 100644
--- a/arch/sh/kernel/kprobes.c
+++ b/arch/sh/kernel/kprobes.c
@@ -252,6 +252,17 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 	p = get_kprobe(addr);
 	if (!p) {
 		/* Not one of ours: let kernel handle it */
+		if (*(kprobe_opcode_t *)addr != BREAKPOINT_INSTRUCTION) {
+			/*
+			 * The breakpoint instruction was removed right
+			 * after we hit it. Another cpu has removed
+			 * either a probepoint or a debugger breakpoint
+			 * at this address. In either case, no further
+			 * handling of this interrupt is appropriate.
+			 */
+			ret = 1;
+		}
+
 		goto no_kprobe;
 	}
 
-- 
GitLab


From 4eb5845d6cbdb9bf03f563c22f3a54115121858f Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 8 Sep 2008 18:22:47 +0900
Subject: [PATCH 087/895] sh: kprobes: __kprobes annotations and formatting
 cleanups.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/kprobes.c | 37 ++++++++++++++++++++-----------------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/arch/sh/kernel/kprobes.c b/arch/sh/kernel/kprobes.c
index a478ba78e752..e357e3669a34 100644
--- a/arch/sh/kernel/kprobes.c
+++ b/arch/sh/kernel/kprobes.c
@@ -115,20 +115,20 @@ void __kprobes arch_remove_kprobe(struct kprobe *p)
 	}
 }
 
-static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
+static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
 {
 	kcb->prev_kprobe.kp = kprobe_running();
 	kcb->prev_kprobe.status = kcb->kprobe_status;
 }
 
-static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
 {
 	__get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
 	kcb->kprobe_status = kcb->prev_kprobe.status;
 }
 
-static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
-				      struct kprobe_ctlblk *kcb)
+static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
+					 struct kprobe_ctlblk *kcb)
 {
 	__get_cpu_var(current_kprobe) = p;
 }
@@ -138,7 +138,7 @@ static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
  * on the next instruction, following branches. Two probes are set if the
  * branch is conditional.
  */
-static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
+static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
 {
 	kprobe_opcode_t *addr = NULL;
 	saved_current_opcode.addr = (kprobe_opcode_t *) (regs->pc);
@@ -273,12 +273,12 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 		/* handler has already set things up, so skip ss setup */
 		return 1;
 
-      ss_probe:
+ss_probe:
 	prepare_singlestep(p, regs);
 	kcb->kprobe_status = KPROBE_HIT_SS;
 	return 1;
 
-      no_kprobe:
+no_kprobe:
 	preempt_enable_no_resched();
 	return ret;
 }
@@ -358,7 +358,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 	return orig_ret_address;
 }
 
-static inline int post_kprobe_handler(struct pt_regs *regs)
+static int __kprobes post_kprobe_handler(struct pt_regs *regs)
 {
 	struct kprobe *cur = kprobe_running();
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
@@ -391,14 +391,15 @@ static inline int post_kprobe_handler(struct pt_regs *regs)
 		}
 	}
 
-	/*Restore back the original saved kprobes variables and continue. */
+	/* Restore back the original saved kprobes variables and continue. */
 	if (kcb->kprobe_status == KPROBE_REENTER) {
 		restore_previous_kprobe(kcb);
 		goto out;
 	}
+
 	reset_current_kprobe();
 
-      out:
+out:
 	preempt_enable_no_resched();
 
 	return 1;
@@ -463,6 +464,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 	default:
 		break;
 	}
+
 	return 0;
 }
 
@@ -498,8 +500,8 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
 					ret = NOTIFY_STOP;
 				} else {
 					p = __get_cpu_var(current_kprobe);
-					if (p->break_handler
-					    && p->break_handler(p, args->regs))
+					if (p->break_handler &&
+					    p->break_handler(p, args->regs))
 						ret = NOTIFY_STOP;
 				}
 			}
@@ -542,25 +544,26 @@ void __kprobes jprobe_return(void)
 int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-	u8 *addr = (u8 *) regs->pc;
 	unsigned long stack_addr = kcb->jprobe_saved_r15;
+	u8 *addr = (u8 *)regs->pc;
 
-	if ((addr >= (u8 *) jprobe_return)
-	    && (addr <= (u8 *) jprobe_return_end)) {
+	if ((addr >= (u8 *)jprobe_return) &&
+	    (addr <= (u8 *)jprobe_return_end)) {
 		*regs = kcb->jprobe_saved_regs;
 
-		memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack,
+		memcpy((kprobe_opcode_t *)stack_addr, kcb->jprobes_stack,
 		       MIN_STACK_SIZE(stack_addr));
 
 		kcb->kprobe_status = KPROBE_HIT_SS;
 		preempt_enable_no_resched();
 		return 1;
 	}
+
 	return 0;
 }
 
 static struct kprobe trampoline_p = {
-	.addr = (kprobe_opcode_t *) &kretprobe_trampoline,
+	.addr = (kprobe_opcode_t *)&kretprobe_trampoline,
 	.pre_handler = trampoline_probe_handler
 };
 
-- 
GitLab


From cf204fa797cf968de8043491cd469ad0321d0940 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 8 Sep 2008 20:47:42 +0900
Subject: [PATCH 088/895] sh: Derive calibrate_delay lpj from clk fwk.

All CPUs must have a sensible cpu_clk definition these days, which we can
safely use for deriving the preset loops_per_jiffy. The only odd one out
is SH-5, which hasn't been hammered in to the framework yet.

Based on the ST patch.

Signed-off-by: Francesco Virlinzi <francesco.virlinzi@st.com>
Signed-off-by: Carl Shaw <carl.shaw@st.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/Kconfig        |  3 ++-
 arch/sh/kernel/setup.c | 20 ++++++++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index c5b08eb3770e..bbdcd6418ef5 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -25,6 +25,7 @@ config SUPERH32
 
 config SUPERH64
 	def_bool y if CPU_SH5
+	select GENERIC_CALIBRATE_DELAY
 
 config ARCH_DEFCONFIG
 	string
@@ -57,7 +58,7 @@ config GENERIC_IRQ_PROBE
 	def_bool y
 
 config GENERIC_CALIBRATE_DELAY
-	def_bool y
+	bool
 
 config GENERIC_IOMAP
 	bool
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index fc098c8af052..267b344099c0 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -27,6 +27,8 @@
 #include <linux/debugfs.h>
 #include <linux/crash_dump.h>
 #include <linux/mmzone.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
 #include <asm/page.h>
@@ -180,6 +182,24 @@ static inline void __init reserve_crashkernel(void)
 {}
 #endif
 
+#ifndef CONFIG_GENERIC_CALIBRATE_DELAY
+void __cpuinit calibrate_delay(void)
+{
+	struct clk *clk = clk_get(NULL, "cpu_clk");
+
+	if (IS_ERR(clk))
+		panic("Need a sane CPU clock definition!");
+
+	loops_per_jiffy = (clk_get_rate(clk) >> 1) / HZ;
+
+	printk(KERN_INFO "Calibrating delay loop (skipped)... "
+			 "%lu.%02lu BogoMIPS PRESET (lpj=%lu)\n",
+			 loops_per_jiffy/(500000/HZ),
+			 (loops_per_jiffy/(5000/HZ)) % 100,
+			 loops_per_jiffy);
+}
+#endif
+
 void __init __add_active_range(unsigned int nid, unsigned long start_pfn,
 						unsigned long end_pfn)
 {
-- 
GitLab


From 61098a086671e2d386104e562fb7cd97cc197d1b Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 9 Sep 2008 06:15:47 +0900
Subject: [PATCH 089/895] sh: Add R2D+ defconfig for qemu system emulator.

This adds a defconfig for the R2D+ target in the qemu system emulator.
Eventually it will be possible to simply use the r2d+ defconfig as it is.

Provided by Shin-ichiro KAWASAKI.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/configs/rts7751r2dplus_qemu_defconfig | 909 ++++++++++++++++++
 1 file changed, 909 insertions(+)
 create mode 100644 arch/sh/configs/rts7751r2dplus_qemu_defconfig

diff --git a/arch/sh/configs/rts7751r2dplus_qemu_defconfig b/arch/sh/configs/rts7751r2dplus_qemu_defconfig
new file mode 100644
index 000000000000..a72796c0293c
--- /dev/null
+++ b/arch/sh/configs/rts7751r2dplus_qemu_defconfig
@@ -0,0 +1,909 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.27-rc2
+# Mon Aug 18 22:17:44 2008
+#
+CONFIG_SUPERH=y
+CONFIG_SUPERH32=y
+CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+CONFIG_GENERIC_BUG=y
+CONFIG_GENERIC_FIND_NEXT_BIT=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
+CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_CLOCKEVENTS=y
+CONFIG_SYS_SUPPORTS_PCI=y
+CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_LOCKDEP_SUPPORT=y
+# CONFIG_ARCH_HAS_ILOG2_U32 is not set
+# CONFIG_ARCH_HAS_ILOG2_U64 is not set
+CONFIG_ARCH_NO_VIRT_TO_BUS=y
+CONFIG_IO_TRAPPED=y
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION=""
+CONFIG_LOCALVERSION_AUTO=y
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
+# CONFIG_BSD_PROCESS_ACCT is not set
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=14
+# CONFIG_CGROUPS is not set
+CONFIG_GROUP_SCHED=y
+CONFIG_FAIR_GROUP_SCHED=y
+# CONFIG_RT_GROUP_SCHED is not set
+CONFIG_USER_SCHED=y
+# CONFIG_CGROUP_SCHED is not set
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+# CONFIG_RELAY is not set
+# CONFIG_NAMESPACES is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_SYSCTL=y
+CONFIG_EMBEDDED=y
+CONFIG_UID16=y
+# CONFIG_SYSCTL_SYSCALL is not set
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_HOTPLUG is not set
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_COMPAT_BRK=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_ANON_INODES=y
+CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_TIMERFD=y
+CONFIG_EVENTFD=y
+CONFIG_SHMEM=y
+CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_SLAB=y
+# CONFIG_SLUB is not set
+# CONFIG_SLOB is not set
+CONFIG_PROFILING=y
+# CONFIG_MARKERS is not set
+CONFIG_OPROFILE=y
+CONFIG_HAVE_OPROFILE=y
+# CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is not set
+# CONFIG_HAVE_IOREMAP_PROT is not set
+# CONFIG_HAVE_KPROBES is not set
+# CONFIG_HAVE_KRETPROBES is not set
+# CONFIG_HAVE_ARCH_TRACEHOOK is not set
+# CONFIG_HAVE_DMA_ATTRS is not set
+# CONFIG_USE_GENERIC_SMP_HELPERS is not set
+CONFIG_HAVE_CLK=y
+CONFIG_PROC_PAGE_MONITOR=y
+CONFIG_HAVE_GENERIC_DMA_COHERENT=y
+CONFIG_SLABINFO=y
+CONFIG_RT_MUTEXES=y
+# CONFIG_TINY_SHMEM is not set
+CONFIG_BASE_SMALL=0
+CONFIG_MODULES=y
+# CONFIG_MODULE_FORCE_LOAD is not set
+# CONFIG_MODULE_UNLOAD is not set
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+CONFIG_KMOD=y
+CONFIG_BLOCK=y
+# CONFIG_LBD is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_LSF is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_BLK_DEV_INTEGRITY is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
+CONFIG_CLASSIC_RCU=y
+
+#
+# System type
+#
+CONFIG_CPU_SH4=y
+# CONFIG_CPU_SUBTYPE_SH7619 is not set
+# CONFIG_CPU_SUBTYPE_SH7203 is not set
+# CONFIG_CPU_SUBTYPE_SH7206 is not set
+# CONFIG_CPU_SUBTYPE_SH7263 is not set
+# CONFIG_CPU_SUBTYPE_MXG is not set
+# CONFIG_CPU_SUBTYPE_SH7705 is not set
+# CONFIG_CPU_SUBTYPE_SH7706 is not set
+# CONFIG_CPU_SUBTYPE_SH7707 is not set
+# CONFIG_CPU_SUBTYPE_SH7708 is not set
+# CONFIG_CPU_SUBTYPE_SH7709 is not set
+# CONFIG_CPU_SUBTYPE_SH7710 is not set
+# CONFIG_CPU_SUBTYPE_SH7712 is not set
+# CONFIG_CPU_SUBTYPE_SH7720 is not set
+# CONFIG_CPU_SUBTYPE_SH7721 is not set
+# CONFIG_CPU_SUBTYPE_SH7750 is not set
+# CONFIG_CPU_SUBTYPE_SH7091 is not set
+# CONFIG_CPU_SUBTYPE_SH7750R is not set
+# CONFIG_CPU_SUBTYPE_SH7750S is not set
+# CONFIG_CPU_SUBTYPE_SH7751 is not set
+CONFIG_CPU_SUBTYPE_SH7751R=y
+# CONFIG_CPU_SUBTYPE_SH7760 is not set
+# CONFIG_CPU_SUBTYPE_SH4_202 is not set
+# CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7763 is not set
+# CONFIG_CPU_SUBTYPE_SH7770 is not set
+# CONFIG_CPU_SUBTYPE_SH7780 is not set
+# CONFIG_CPU_SUBTYPE_SH7785 is not set
+# CONFIG_CPU_SUBTYPE_SHX3 is not set
+# CONFIG_CPU_SUBTYPE_SH7343 is not set
+# CONFIG_CPU_SUBTYPE_SH7722 is not set
+# CONFIG_CPU_SUBTYPE_SH7366 is not set
+# CONFIG_CPU_SUBTYPE_SH5_101 is not set
+# CONFIG_CPU_SUBTYPE_SH5_103 is not set
+
+#
+# Memory management options
+#
+CONFIG_QUICKLIST=y
+CONFIG_MMU=y
+CONFIG_PAGE_OFFSET=0x80000000
+CONFIG_MEMORY_START=0x0c000000
+CONFIG_MEMORY_SIZE=0x04000000
+CONFIG_29BIT=y
+CONFIG_VSYSCALL=y
+CONFIG_ARCH_FLATMEM_ENABLE=y
+CONFIG_ARCH_SPARSEMEM_ENABLE=y
+CONFIG_ARCH_SPARSEMEM_DEFAULT=y
+CONFIG_MAX_ACTIVE_REGIONS=1
+CONFIG_ARCH_POPULATES_NODE_MAP=y
+CONFIG_ARCH_SELECT_MEMORY_MODEL=y
+CONFIG_PAGE_SIZE_4KB=y
+# CONFIG_PAGE_SIZE_8KB is not set
+# CONFIG_PAGE_SIZE_16KB is not set
+# CONFIG_PAGE_SIZE_64KB is not set
+CONFIG_ENTRY_OFFSET=0x00001000
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+CONFIG_SPARSEMEM_STATIC=y
+# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set
+CONFIG_PAGEFLAGS_EXTENDED=y
+CONFIG_SPLIT_PTLOCK_CPUS=4
+# CONFIG_RESOURCES_64BIT is not set
+CONFIG_ZONE_DMA_FLAG=0
+CONFIG_NR_QUICK=2
+
+#
+# Cache configuration
+#
+# CONFIG_SH_DIRECT_MAPPED is not set
+CONFIG_CACHE_WRITEBACK=y
+# CONFIG_CACHE_WRITETHROUGH is not set
+# CONFIG_CACHE_OFF is not set
+
+#
+# Processor features
+#
+CONFIG_CPU_LITTLE_ENDIAN=y
+# CONFIG_CPU_BIG_ENDIAN is not set
+CONFIG_SH_FPU=y
+# CONFIG_SH_STORE_QUEUES is not set
+CONFIG_CPU_HAS_INTEVT=y
+CONFIG_CPU_HAS_SR_RB=y
+CONFIG_CPU_HAS_PTEA=y
+CONFIG_CPU_HAS_FPU=y
+
+#
+# Board support
+#
+# CONFIG_SH_7751_SYSTEMH is not set
+# CONFIG_SH_SECUREEDGE5410 is not set
+CONFIG_SH_RTS7751R2D=y
+# CONFIG_SH_LANDISK is not set
+# CONFIG_SH_TITAN is not set
+# CONFIG_SH_LBOX_RE2 is not set
+
+#
+# RTS7751R2D Board Revision
+#
+CONFIG_RTS7751R2D_PLUS=y
+# CONFIG_RTS7751R2D_1 is not set
+
+#
+# Timer and clock configuration
+#
+CONFIG_SH_TMU=y
+CONFIG_SH_TIMER_IRQ=16
+CONFIG_SH_PCLK_FREQ=60000000
+# CONFIG_TICK_ONESHOT is not set
+# CONFIG_NO_HZ is not set
+# CONFIG_HIGH_RES_TIMERS is not set
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+
+#
+# CPU Frequency scaling
+#
+# CONFIG_CPU_FREQ is not set
+
+#
+# DMA support
+#
+# CONFIG_SH_DMA is not set
+
+#
+# Companion Chips
+#
+
+#
+# Additional SuperH Device Drivers
+#
+CONFIG_HEARTBEAT=y
+# CONFIG_PUSH_SWITCH is not set
+
+#
+# Kernel features
+#
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
+# CONFIG_HZ_300 is not set
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=250
+# CONFIG_SCHED_HRTICK is not set
+# CONFIG_KEXEC is not set
+# CONFIG_CRASH_DUMP is not set
+CONFIG_SECCOMP=y
+CONFIG_PREEMPT_NONE=y
+# CONFIG_PREEMPT_VOLUNTARY is not set
+# CONFIG_PREEMPT is not set
+CONFIG_GUSA=y
+# CONFIG_GUSA_RB is not set
+
+#
+# Boot options
+#
+CONFIG_ZERO_PAGE_OFFSET=0x00010000
+CONFIG_BOOT_LINK_OFFSET=0x00800000
+# CONFIG_UBC_WAKEUP is not set
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=tty0 console=ttySC0,115200 root=/dev/sda1 earlyprintk=serial"
+
+#
+# Bus options
+#
+# CONFIG_PCI is not set
+# CONFIG_ARCH_SUPPORTS_MSI is not set
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_ELF=y
+# CONFIG_BINFMT_MISC is not set
+# CONFIG_NET is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+# CONFIG_DEBUG_DRIVER is not set
+# CONFIG_DEBUG_DEVRES is not set
+# CONFIG_SYS_HYPERVISOR is not set
+# CONFIG_MTD is not set
+# CONFIG_PARPORT is not set
+CONFIG_BLK_DEV=y
+# CONFIG_BLK_DEV_COW_COMMON is not set
+# CONFIG_BLK_DEV_LOOP is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=4096
+# CONFIG_BLK_DEV_XIP is not set
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_BLK_DEV_HD is not set
+CONFIG_MISC_DEVICES=y
+# CONFIG_EEPROM_93CX6 is not set
+# CONFIG_ENCLOSURE_SERVICES is not set
+CONFIG_HAVE_IDE=y
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+CONFIG_SCSI=y
+CONFIG_SCSI_DMA=y
+# CONFIG_SCSI_TGT is not set
+# CONFIG_SCSI_NETLINK is not set
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+# CONFIG_CHR_DEV_ST is not set
+# CONFIG_CHR_DEV_OSST is not set
+# CONFIG_BLK_DEV_SR is not set
+# CONFIG_CHR_DEV_SG is not set
+# CONFIG_CHR_DEV_SCH is not set
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+# CONFIG_SCSI_MULTI_LUN is not set
+# CONFIG_SCSI_CONSTANTS is not set
+# CONFIG_SCSI_LOGGING is not set
+# CONFIG_SCSI_SCAN_ASYNC is not set
+CONFIG_SCSI_WAIT_SCAN=m
+
+#
+# SCSI Transports
+#
+# CONFIG_SCSI_SPI_ATTRS is not set
+# CONFIG_SCSI_FC_ATTRS is not set
+# CONFIG_SCSI_SAS_LIBSAS is not set
+# CONFIG_SCSI_SRP_ATTRS is not set
+CONFIG_SCSI_LOWLEVEL=y
+# CONFIG_SCSI_DEBUG is not set
+# CONFIG_SCSI_DH is not set
+CONFIG_ATA=y
+# CONFIG_ATA_NONSTANDARD is not set
+CONFIG_SATA_PMP=y
+CONFIG_ATA_SFF=y
+# CONFIG_SATA_MV is not set
+# CONFIG_PATA_PLATFORM is not set
+# CONFIG_MD is not set
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+# CONFIG_INPUT_FF_MEMLESS is not set
+# CONFIG_INPUT_POLLDEV is not set
+
+#
+# Userland interfaces
+#
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_EVDEV is not set
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input Device Drivers
+#
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TABLET is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_CONSOLE_TRANSLATIONS=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+CONFIG_VT_HW_CONSOLE_BINDING=y
+CONFIG_DEVKMEM=y
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_8250=y
+# CONFIG_SERIAL_8250_CONSOLE is not set
+CONFIG_SERIAL_8250_NR_UARTS=4
+CONFIG_SERIAL_8250_RUNTIME_UARTS=4
+# CONFIG_SERIAL_8250_EXTENDED is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_SH_SCI=y
+CONFIG_SERIAL_SH_SCI_NR_UARTS=1
+CONFIG_SERIAL_SH_SCI_CONSOLE=y
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_UNIX98_PTYS=y
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+# CONFIG_IPMI_HANDLER is not set
+CONFIG_HW_RANDOM=y
+# CONFIG_R3964 is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_TCG_TPM is not set
+# CONFIG_I2C is not set
+CONFIG_SPI=y
+# CONFIG_SPI_DEBUG is not set
+CONFIG_SPI_MASTER=y
+
+#
+# SPI Master Controller Drivers
+#
+CONFIG_SPI_BITBANG=y
+# CONFIG_SPI_SH_SCI is not set
+
+#
+# SPI Protocol Masters
+#
+# CONFIG_SPI_AT25 is not set
+# CONFIG_SPI_SPIDEV is not set
+# CONFIG_SPI_TLE62X0 is not set
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
+CONFIG_HWMON=y
+# CONFIG_HWMON_VID is not set
+# CONFIG_SENSORS_F71805F is not set
+# CONFIG_SENSORS_F71882FG is not set
+# CONFIG_SENSORS_IT87 is not set
+# CONFIG_SENSORS_LM70 is not set
+# CONFIG_SENSORS_PC87360 is not set
+# CONFIG_SENSORS_PC87427 is not set
+# CONFIG_SENSORS_SMSC47M1 is not set
+# CONFIG_SENSORS_SMSC47B397 is not set
+# CONFIG_SENSORS_VT1211 is not set
+# CONFIG_SENSORS_W83627HF is not set
+# CONFIG_SENSORS_W83627EHF is not set
+# CONFIG_HWMON_DEBUG_CHIP is not set
+# CONFIG_THERMAL is not set
+# CONFIG_THERMAL_HWMON is not set
+# CONFIG_WATCHDOG is not set
+
+#
+# Sonics Silicon Backplane
+#
+CONFIG_SSB_POSSIBLE=y
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_CORE is not set
+CONFIG_MFD_SM501=y
+# CONFIG_HTC_PASIC3 is not set
+
+#
+# Multimedia devices
+#
+
+#
+# Multimedia core support
+#
+# CONFIG_VIDEO_DEV is not set
+# CONFIG_VIDEO_MEDIA is not set
+
+#
+# Multimedia drivers
+#
+CONFIG_DAB=y
+
+#
+# Graphics support
+#
+# CONFIG_VGASTATE is not set
+CONFIG_VIDEO_OUTPUT_CONTROL=m
+CONFIG_FB=y
+# CONFIG_FIRMWARE_EDID is not set
+# CONFIG_FB_DDC is not set
+CONFIG_FB_CFB_FILLRECT=y
+CONFIG_FB_CFB_COPYAREA=y
+CONFIG_FB_CFB_IMAGEBLIT=y
+# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set
+# CONFIG_FB_SYS_FILLRECT is not set
+# CONFIG_FB_SYS_COPYAREA is not set
+# CONFIG_FB_SYS_IMAGEBLIT is not set
+# CONFIG_FB_FOREIGN_ENDIAN is not set
+# CONFIG_FB_SYS_FOPS is not set
+# CONFIG_FB_SVGALIB is not set
+# CONFIG_FB_MACMODES is not set
+# CONFIG_FB_BACKLIGHT is not set
+# CONFIG_FB_MODE_HELPERS is not set
+# CONFIG_FB_TILEBLITTING is not set
+
+#
+# Frame buffer hardware drivers
+#
+# CONFIG_FB_S1D13XXX is not set
+CONFIG_FB_SH_MOBILE_LCDC=m
+CONFIG_FB_SM501=y
+# CONFIG_FB_VIRTUAL is not set
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+
+#
+# Console display driver support
+#
+CONFIG_DUMMY_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+# CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY is not set
+# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
+# CONFIG_FONTS is not set
+CONFIG_FONT_8x8=y
+CONFIG_FONT_8x16=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+# CONFIG_LOGO_LINUX_CLUT224 is not set
+# CONFIG_LOGO_SUPERH_MONO is not set
+# CONFIG_LOGO_SUPERH_VGA16 is not set
+CONFIG_LOGO_SUPERH_CLUT224=y
+CONFIG_SOUND=y
+CONFIG_SND=m
+# CONFIG_SND_SEQUENCER is not set
+# CONFIG_SND_MIXER_OSS is not set
+# CONFIG_SND_PCM_OSS is not set
+# CONFIG_SND_DYNAMIC_MINORS is not set
+CONFIG_SND_SUPPORT_OLD_API=y
+CONFIG_SND_VERBOSE_PROCFS=y
+# CONFIG_SND_VERBOSE_PRINTK is not set
+# CONFIG_SND_DEBUG is not set
+CONFIG_SND_DRIVERS=y
+# CONFIG_SND_DUMMY is not set
+# CONFIG_SND_MTPAV is not set
+# CONFIG_SND_SERIAL_U16550 is not set
+# CONFIG_SND_MPU401 is not set
+CONFIG_SND_SPI=y
+CONFIG_SND_SUPERH=y
+# CONFIG_SND_SOC is not set
+CONFIG_SOUND_PRIME=m
+CONFIG_HID_SUPPORT=y
+CONFIG_HID=y
+# CONFIG_HID_DEBUG is not set
+# CONFIG_HIDRAW is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_MMC is not set
+# CONFIG_MEMSTICK is not set
+# CONFIG_NEW_LEDS is not set
+# CONFIG_ACCESSIBILITY is not set
+CONFIG_RTC_LIB=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_HCTOSYS=y
+CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
+# CONFIG_RTC_DEBUG is not set
+
+#
+# RTC interfaces
+#
+CONFIG_RTC_INTF_SYSFS=y
+CONFIG_RTC_INTF_PROC=y
+CONFIG_RTC_INTF_DEV=y
+# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set
+# CONFIG_RTC_DRV_TEST is not set
+
+#
+# SPI RTC drivers
+#
+# CONFIG_RTC_DRV_M41T94 is not set
+# CONFIG_RTC_DRV_DS1305 is not set
+# CONFIG_RTC_DRV_MAX6902 is not set
+CONFIG_RTC_DRV_R9701=y
+# CONFIG_RTC_DRV_RS5C348 is not set
+
+#
+# Platform RTC drivers
+#
+# CONFIG_RTC_DRV_DS1511 is not set
+# CONFIG_RTC_DRV_DS1553 is not set
+# CONFIG_RTC_DRV_DS1742 is not set
+# CONFIG_RTC_DRV_STK17TA8 is not set
+# CONFIG_RTC_DRV_M48T86 is not set
+# CONFIG_RTC_DRV_M48T59 is not set
+# CONFIG_RTC_DRV_V3020 is not set
+
+#
+# on-CPU RTC drivers
+#
+# CONFIG_RTC_DRV_SH is not set
+# CONFIG_DMADEVICES is not set
+# CONFIG_UIO is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+# CONFIG_EXT2_FS_XIP is not set
+# CONFIG_EXT3_FS is not set
+# CONFIG_EXT4DEV_FS is not set
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+# CONFIG_XFS_FS is not set
+CONFIG_DNOTIFY=y
+CONFIG_INOTIFY=y
+CONFIG_INOTIFY_USER=y
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_TMPFS_POSIX_ACL is not set
+# CONFIG_HUGETLBFS is not set
+# CONFIG_HUGETLB_PAGE is not set
+# CONFIG_CONFIGFS_FS is not set
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_VXFS_FS is not set
+CONFIG_MINIX_FS=y
+# CONFIG_OMFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_ROMFS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="iso8859-1"
+# CONFIG_NLS_CODEPAGE_437 is not set
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+# CONFIG_NLS_CODEPAGE_850 is not set
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
+# CONFIG_NLS_CODEPAGE_936 is not set
+# CONFIG_NLS_CODEPAGE_950 is not set
+CONFIG_NLS_CODEPAGE_932=y
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+# CONFIG_NLS_ASCII is not set
+# CONFIG_NLS_ISO8859_1 is not set
+# CONFIG_NLS_ISO8859_2 is not set
+# CONFIG_NLS_ISO8859_3 is not set
+# CONFIG_NLS_ISO8859_4 is not set
+# CONFIG_NLS_ISO8859_5 is not set
+# CONFIG_NLS_ISO8859_6 is not set
+# CONFIG_NLS_ISO8859_7 is not set
+# CONFIG_NLS_ISO8859_9 is not set
+# CONFIG_NLS_ISO8859_13 is not set
+# CONFIG_NLS_ISO8859_14 is not set
+# CONFIG_NLS_ISO8859_15 is not set
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
+# CONFIG_NLS_UTF8 is not set
+
+#
+# Kernel hacking
+#
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+# CONFIG_PRINTK_TIME is not set
+CONFIG_ENABLE_WARN_DEPRECATED=y
+CONFIG_ENABLE_MUST_CHECK=y
+CONFIG_FRAME_WARN=1024
+# CONFIG_MAGIC_SYSRQ is not set
+# CONFIG_UNUSED_SYMBOLS is not set
+CONFIG_DEBUG_FS=y
+# CONFIG_HEADERS_CHECK is not set
+CONFIG_DEBUG_KERNEL=y
+# CONFIG_DEBUG_SHIRQ is not set
+CONFIG_DETECT_SOFTLOCKUP=y
+# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+CONFIG_SCHED_DEBUG=y
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_TIMER_STATS is not set
+# CONFIG_DEBUG_OBJECTS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_RT_MUTEXES is not set
+# CONFIG_RT_MUTEX_TESTER is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_MUTEXES is not set
+# CONFIG_DEBUG_LOCK_ALLOC is not set
+# CONFIG_PROVE_LOCKING is not set
+# CONFIG_LOCK_STAT is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
+CONFIG_DEBUG_INFO=y
+# CONFIG_DEBUG_VM is not set
+# CONFIG_DEBUG_WRITECOUNT is not set
+# CONFIG_DEBUG_MEMORY_INIT is not set
+# CONFIG_DEBUG_LIST is not set
+# CONFIG_DEBUG_SG is not set
+# CONFIG_FRAME_POINTER is not set
+# CONFIG_BOOT_PRINTK_DELAY is not set
+# CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_BACKTRACE_SELF_TEST is not set
+# CONFIG_FAULT_INJECTION is not set
+# CONFIG_SAMPLES is not set
+# CONFIG_SH_STANDARD_BIOS is not set
+CONFIG_EARLY_SCIF_CONSOLE=y
+CONFIG_EARLY_SCIF_CONSOLE_PORT=0xffe80000
+CONFIG_EARLY_PRINTK=y
+# CONFIG_DEBUG_BOOTMEM is not set
+# CONFIG_DEBUG_STACKOVERFLOW is not set
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_4KSTACKS is not set
+# CONFIG_IRQSTACKS is not set
+# CONFIG_SH_KGDB is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+CONFIG_CRYPTO=y
+
+#
+# Crypto core or helper
+#
+# CONFIG_CRYPTO_MANAGER is not set
+# CONFIG_CRYPTO_GF128MUL is not set
+# CONFIG_CRYPTO_NULL is not set
+# CONFIG_CRYPTO_CRYPTD is not set
+# CONFIG_CRYPTO_AUTHENC is not set
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Authenticated Encryption with Associated Data
+#
+# CONFIG_CRYPTO_CCM is not set
+# CONFIG_CRYPTO_GCM is not set
+# CONFIG_CRYPTO_SEQIV is not set
+
+#
+# Block modes
+#
+# CONFIG_CRYPTO_CBC is not set
+# CONFIG_CRYPTO_CTR is not set
+# CONFIG_CRYPTO_CTS is not set
+# CONFIG_CRYPTO_ECB is not set
+# CONFIG_CRYPTO_LRW is not set
+# CONFIG_CRYPTO_PCBC is not set
+# CONFIG_CRYPTO_XTS is not set
+
+#
+# Hash modes
+#
+# CONFIG_CRYPTO_HMAC is not set
+# CONFIG_CRYPTO_XCBC is not set
+
+#
+# Digest
+#
+# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_MD4 is not set
+# CONFIG_CRYPTO_MD5 is not set
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_RMD128 is not set
+# CONFIG_CRYPTO_RMD160 is not set
+# CONFIG_CRYPTO_RMD256 is not set
+# CONFIG_CRYPTO_RMD320 is not set
+# CONFIG_CRYPTO_SHA1 is not set
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
+# CONFIG_CRYPTO_WP512 is not set
+
+#
+# Ciphers
+#
+# CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_CAMELLIA is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+# CONFIG_CRYPTO_DES is not set
+# CONFIG_CRYPTO_FCRYPT is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_SALSA20 is not set
+# CONFIG_CRYPTO_SEED is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+
+#
+# Compression
+#
+# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_LZO is not set
+CONFIG_CRYPTO_HW=y
+
+#
+# Library routines
+#
+CONFIG_BITREVERSE=y
+# CONFIG_GENERIC_FIND_FIRST_BIT is not set
+# CONFIG_CRC_CCITT is not set
+# CONFIG_CRC16 is not set
+CONFIG_CRC_T10DIF=y
+# CONFIG_CRC_ITU_T is not set
+CONFIG_CRC32=y
+# CONFIG_CRC7 is not set
+# CONFIG_LIBCRC32C is not set
+CONFIG_PLIST=y
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAS_DMA=y
-- 
GitLab


From 6eb2139b3dc3e1c5181a7cdf83a517c57c34bb12 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 9 Sep 2008 08:13:28 +0900
Subject: [PATCH 090/895] sh: kprobes: kretprobe_trampoline needs to be global.

Needed by CONFIG_TRACING.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/kprobes.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/sh/kernel/kprobes.c b/arch/sh/kernel/kprobes.c
index e357e3669a34..c96850b061fb 100644
--- a/arch/sh/kernel/kprobes.c
+++ b/arch/sh/kernel/kprobes.c
@@ -290,7 +290,9 @@ no_kprobe:
  */
 static void __used kretprobe_trampoline_holder(void)
 {
-	asm volatile ("kretprobe_trampoline: \n" "nop\n");
+	asm volatile (".globl kretprobe_trampoline\n"
+		      "kretprobe_trampoline:\n\t"
+		      "nop\n");
 }
 
 /*
-- 
GitLab


From b21a91043592434e2847c4b552be7b51851d92c3 Mon Sep 17 00:00:00 2001
From: roel kluin <roel.kluin@gmail.com>
Date: Tue, 9 Sep 2008 23:02:43 +0200
Subject: [PATCH 091/895] sh: intc_prio_data() test before subtraction on
 unsigned

bit is unsigned, so test before subtraction

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/cpu/irq/intc.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/sh/kernel/cpu/irq/intc.c b/arch/sh/kernel/cpu/irq/intc.c
index 8c70e201bde0..94536d358fe1 100644
--- a/arch/sh/kernel/cpu/irq/intc.c
+++ b/arch/sh/kernel/cpu/irq/intc.c
@@ -464,9 +464,10 @@ static unsigned int __init intc_prio_data(struct intc_desc *desc,
 			}
 
 			fn += (pr->reg_width >> 3) - 1;
-			bit = pr->reg_width - ((j + 1) * pr->field_width);
 
-			BUG_ON(bit < 0);
+			BUG_ON((j + 1) * pr->field_width > pr->reg_width);
+
+			bit = pr->reg_width - ((j + 1) * pr->field_width);
 
 			return _INTC_MK(fn, mode,
 					intc_get_reg(d, reg_e),
@@ -531,9 +532,10 @@ static unsigned int __init intc_sense_data(struct intc_desc *desc,
 
 			fn = REG_FN_MODIFY_BASE;
 			fn += (sr->reg_width >> 3) - 1;
-			bit = sr->reg_width - ((j + 1) * sr->field_width);
 
-			BUG_ON(bit < 0);
+			BUG_ON((j + 1) * sr->field_width > sr->reg_width);
+
+			bit = sr->reg_width - ((j + 1) * sr->field_width);
 
 			return _INTC_MK(fn, 0, intc_get_reg(d, sr->reg),
 					0, sr->field_width, bit);
-- 
GitLab


From cc3c080d9f4484021e7b14f99de94a8c85a668d5 Mon Sep 17 00:00:00 2001
From: roel kluin <roel.kluin@gmail.com>
Date: Wed, 10 Sep 2008 19:22:44 +0200
Subject: [PATCH 092/895] sh_eth: unsigned ndev->irq cannot be negative

unsigned ndev->irq cannot be negative

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/net/sh_eth.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/sh_eth.c b/drivers/net/sh_eth.c
index 1c370e6aa641..1a04814291c1 100644
--- a/drivers/net/sh_eth.c
+++ b/drivers/net/sh_eth.c
@@ -1205,11 +1205,12 @@ static int sh_eth_drv_probe(struct platform_device *pdev)
 		devno = 0;
 
 	ndev->dma = -1;
-	ndev->irq = platform_get_irq(pdev, 0);
-	if (ndev->irq < 0) {
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0) {
 		ret = -ENODEV;
 		goto out_release;
 	}
+	ndev->irq = ret;
 
 	SET_NETDEV_DEV(ndev, &pdev->dev);
 
-- 
GitLab


From 2641dc92b3c7f979c7e4820cff2e765664358982 Mon Sep 17 00:00:00 2001
From: roel kluin <roel.kluin@gmail.com>
Date: Wed, 10 Sep 2008 19:34:44 +0200
Subject: [PATCH 093/895] rtc-sh: Unsigned rtc->{periodic,carry,alarm}_irq
 cannot be negative

possibly since commit b420b1a7a17ea88531d0e12b2f2679a0c8365803

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/rtc/rtc-sh.c | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c
index 1f88e9e914ec..690a7800805a 100644
--- a/drivers/rtc/rtc-sh.c
+++ b/drivers/rtc/rtc-sh.c
@@ -575,7 +575,7 @@ static int __devinit sh_rtc_probe(struct platform_device *pdev)
 	struct sh_rtc *rtc;
 	struct resource *res;
 	unsigned int tmp;
-	int ret = -ENOENT;
+	int ret;
 
 	rtc = kzalloc(sizeof(struct sh_rtc), GFP_KERNEL);
 	if (unlikely(!rtc))
@@ -584,26 +584,33 @@ static int __devinit sh_rtc_probe(struct platform_device *pdev)
 	spin_lock_init(&rtc->lock);
 
 	/* get periodic/carry/alarm irqs */
-	rtc->periodic_irq = platform_get_irq(pdev, 0);
-	if (unlikely(rtc->periodic_irq < 0)) {
+	ret = platform_get_irq(pdev, 0);
+	if (unlikely(ret < 0)) {
+		ret = -ENOENT;
 		dev_err(&pdev->dev, "No IRQ for period\n");
 		goto err_badres;
 	}
+	rtc->periodic_irq = ret;
 
-	rtc->carry_irq = platform_get_irq(pdev, 1);
-	if (unlikely(rtc->carry_irq < 0)) {
+	ret = platform_get_irq(pdev, 1);
+	if (unlikely(ret < 0)) {
+		ret = -ENOENT;
 		dev_err(&pdev->dev, "No IRQ for carry\n");
 		goto err_badres;
 	}
+	rtc->carry_irq = ret;
 
-	rtc->alarm_irq = platform_get_irq(pdev, 2);
-	if (unlikely(rtc->alarm_irq < 0)) {
+	ret = platform_get_irq(pdev, 2);
+	if (unlikely(ret < 0)) {
+		ret = -ENOENT;
 		dev_err(&pdev->dev, "No IRQ for alarm\n");
 		goto err_badres;
 	}
+	rtc->alarm_irq = ret;
 
 	res = platform_get_resource(pdev, IORESOURCE_IO, 0);
 	if (unlikely(res == NULL)) {
+		ret = -ENOENT;
 		dev_err(&pdev->dev, "No IO resource\n");
 		goto err_badres;
 	}
-- 
GitLab


From 4018ffcfdf84faf17fbadcd29ea5eced26f9d9cb Mon Sep 17 00:00:00 2001
From: Luca Santini <luca.santini@spesonline.com>
Date: Fri, 12 Sep 2008 18:07:16 +0900
Subject: [PATCH 094/895] sh: edosk7760 physmap-flash support.

Signed-off-by: Luca Santini <luca.santini@spesonline.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/boards/board-edosk7760.c | 50 +++++++++++++++++++++++++++++++-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/arch/sh/boards/board-edosk7760.c b/arch/sh/boards/board-edosk7760.c
index 7cc5e1143522..4890ba7961ae 100644
--- a/arch/sh/boards/board-edosk7760.c
+++ b/arch/sh/boards/board-edosk7760.c
@@ -24,6 +24,7 @@
 #include <linux/smc91x.h>
 #include <linux/interrupt.h>
 #include <linux/i2c.h>
+#include <linux/mtd/physmap.h>
 #include <asm/machvec.h>
 #include <asm/io.h>
 #include <asm/addrspace.h>
@@ -40,6 +41,52 @@
 
 #define ETHERNET_IRQ	5
 
+/* NOR flash */
+static struct mtd_partition edosk7760_nor_flash_partitions[] = {
+	{
+		.name = "bootloader",
+		.offset = 0,
+		.size = (1 * 1024 * 1024), /*1MB*/
+		.mask_flags = MTD_WRITEABLE,	/* Read-only */
+	}, {
+		.name = "kernel",
+		.offset = MTDPART_OFS_APPEND,
+		.size = (2 * 1024 * 1024), /*2MB*/
+	}, {
+		.name = "fs",
+		.offset = MTDPART_OFS_APPEND,
+		.size = (26 * 1024 * 1024),
+	}, {
+		.name = "other",
+		.offset = MTDPART_OFS_APPEND,
+		.size = MTDPART_SIZ_FULL,
+	},
+};
+
+static struct physmap_flash_data edosk7760_nor_flash_data = {
+	.width		= 4,
+	.parts		= edosk7760_nor_flash_partitions,
+	.nr_parts	= ARRAY_SIZE(edosk7760_nor_flash_partitions),
+};
+
+static struct resource edosk7760_nor_flash_resources[] = {
+	[0] = {
+		.name	= "NOR Flash",
+		.start	= 0x00000000,
+		.end	= (32 * 1024 * 1024) -1,	/* 32MB*/
+		.flags	= IORESOURCE_MEM,
+	}
+};
+
+static struct platform_device edosk7760_nor_flash_device = {
+	.name		= "physmap-flash",
+	.resource	= edosk7760_nor_flash_resources,
+	.num_resources	= ARRAY_SIZE(edosk7760_nor_flash_resources),
+	.dev		= {
+		.platform_data = &edosk7760_nor_flash_data,
+	},
+};
+
 /* i2c initialization functions */
 static struct sh7760_i2c_platdata i2c_pd = {
 	.speed_khz	= 400,
@@ -121,9 +168,10 @@ static struct platform_device smc91x_dev = {
 
 /* platform init code */
 static struct platform_device *edosk7760_devices[] __initdata = {
+	&smc91x_dev,
+	&edosk7760_nor_flash_device,
 	&sh7760_i2c0_dev,
 	&sh7760_i2c1_dev,
-	&smc91x_dev,
 };
 
 static int __init init_edosk7760_devices(void)
-- 
GitLab


From 1b582d19ce9e14bf5bbc36688bf818b3c42574da Mon Sep 17 00:00:00 2001
From: Luca Santini <luca.santini@spesonline.com>
Date: Fri, 12 Sep 2008 18:08:01 +0900
Subject: [PATCH 095/895] sh: update edosk7760 defconfig for physmap-flash.

Signed-off-by: Luca Santini <luca.santini@spesonline.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/configs/edosk7760_defconfig | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/arch/sh/configs/edosk7760_defconfig b/arch/sh/configs/edosk7760_defconfig
index f8ec32a5d4ae..bef07fa8d858 100644
--- a/arch/sh/configs/edosk7760_defconfig
+++ b/arch/sh/configs/edosk7760_defconfig
@@ -380,11 +380,11 @@ CONFIG_DEBUG_DEVRES=y
 # CONFIG_CONNECTOR is not set
 CONFIG_MTD=y
 CONFIG_MTD_DEBUG=y
-CONFIG_MTD_DEBUG_VERBOSE=3
+CONFIG_MTD_DEBUG_VERBOSE=0
 CONFIG_MTD_CONCAT=y
 CONFIG_MTD_PARTITIONS=y
 # CONFIG_MTD_REDBOOT_PARTS is not set
-# CONFIG_MTD_CMDLINE_PARTS is not set
+CONFIG_MTD_CMDLINE_PARTS=y
 # CONFIG_MTD_AR7_PARTS is not set
 
 #
@@ -411,16 +411,16 @@ CONFIG_MTD_CFI_NOSWAP=y
 # CONFIG_MTD_CFI_BE_BYTE_SWAP is not set
 # CONFIG_MTD_CFI_LE_BYTE_SWAP is not set
 CONFIG_MTD_CFI_GEOMETRY=y
-# CONFIG_MTD_MAP_BANK_WIDTH_1 is not set
+CONFIG_MTD_MAP_BANK_WIDTH_1=y
 CONFIG_MTD_MAP_BANK_WIDTH_2=y
-# CONFIG_MTD_MAP_BANK_WIDTH_4 is not set
-# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
-# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
-# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
+CONFIG_MTD_MAP_BANK_WIDTH_4=y
+CONFIG_MTD_MAP_BANK_WIDTH_8=y
+CONFIG_MTD_MAP_BANK_WIDTH_16=y
+CONFIG_MTD_MAP_BANK_WIDTH_32=y
 CONFIG_MTD_CFI_I1=y
-# CONFIG_MTD_CFI_I2 is not set
-# CONFIG_MTD_CFI_I4 is not set
-# CONFIG_MTD_CFI_I8 is not set
+CONFIG_MTD_CFI_I2=y
+CONFIG_MTD_CFI_I4=y
+CONFIG_MTD_CFI_I8=y
 # CONFIG_MTD_OTP is not set
 CONFIG_MTD_CFI_INTELEXT=y
 CONFIG_MTD_CFI_AMDSTD=y
@@ -435,9 +435,9 @@ CONFIG_MTD_ABSENT=y
 #
 # CONFIG_MTD_COMPLEX_MAPPINGS is not set
 CONFIG_MTD_PHYSMAP=y
-CONFIG_MTD_PHYSMAP_START=0xA0000000
-CONFIG_MTD_PHYSMAP_LEN=0x01000000
-CONFIG_MTD_PHYSMAP_BANKWIDTH=2
+CONFIG_MTD_PHYSMAP_START=0xffffffff
+CONFIG_MTD_PHYSMAP_LEN=0x0
+CONFIG_MTD_PHYSMAP_BANKWIDTH=4
 # CONFIG_MTD_PLATRAM is not set
 
 #
-- 
GitLab


From 09558748464a9afafe2848a3ad4cfd509c9b0fb6 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 12 Sep 2008 18:58:28 +0900
Subject: [PATCH 096/895] sh: Provide a fixed UTS_MACHINE definition for sh64.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/Makefile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/sh/Makefile b/arch/sh/Makefile
index 01d85c74481d..0bc956012c38 100644
--- a/arch/sh/Makefile
+++ b/arch/sh/Makefile
@@ -76,8 +76,10 @@ KBUILD_IMAGE		:= $(defaultimage-y)
 # error messages during linking.
 #
 ifdef CONFIG_SUPERH32
+UTS_MACHINE	:= sh
 LDFLAGS_vmlinux	+= -e _stext
 else
+UTS_MACHINE	:= sh64
 LDFLAGS_vmlinux	+= --defsym phys_stext=_stext-$(CONFIG_PAGE_OFFSET) \
 		   --defsym phys_stext_shmedia=phys_stext+1 \
 		   -e phys_stext_shmedia
-- 
GitLab


From 934135c19d8a1be435bae75aefc09b8ae1698b16 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 12 Sep 2008 19:52:36 +0900
Subject: [PATCH 097/895] sh: ptrace: Introduce user_regset interface for gp
 regs.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/elf.h    |   9 +++
 arch/sh/include/asm/ptrace.h |   8 ++-
 arch/sh/kernel/ptrace_32.c   | 116 ++++++++++++++++++++++++++++++-----
 3 files changed, 115 insertions(+), 18 deletions(-)

diff --git a/arch/sh/include/asm/elf.h b/arch/sh/include/asm/elf.h
index f01449a8d378..455d9e1e1438 100644
--- a/arch/sh/include/asm/elf.h
+++ b/arch/sh/include/asm/elf.h
@@ -108,6 +108,15 @@ typedef struct user_fpu_struct elf_fpregset_t;
 #define elf_check_fdpic(x)		((x)->e_flags & EF_SH_FDPIC)
 #define elf_check_const_displacement(x)	((x)->e_flags & EF_SH_PIC)
 
+#if defined(CONFIG_SUPERH32) && \
+	(!defined(CONFIG_SH_FPU) && !defined(CONFIG_SH_DSP))
+/*
+ * Enable dump using regset for general purpose registers, use this as
+ * the default once the FPU and DSP registers are moved over also.
+ */
+#define CORE_DUMP_USE_REGSET
+#endif
+
 #define USE_ELF_CORE_DUMP
 #define ELF_FDPIC_CORE_EFLAGS	EF_SH_FDPIC
 #define ELF_EXEC_PAGESIZE	PAGE_SIZE
diff --git a/arch/sh/include/asm/ptrace.h b/arch/sh/include/asm/ptrace.h
index b86aeabba61a..bf73646e2d2b 100644
--- a/arch/sh/include/asm/ptrace.h
+++ b/arch/sh/include/asm/ptrace.h
@@ -87,12 +87,18 @@ struct pt_dspregs {
 	unsigned long	mod;
 };
 
+#define PTRACE_GETREGS		12	/* General registers */
+#define PTRACE_SETREGS		13
+
+#define PTRACE_GETFPREGS	14	/* FPU registers */
+#define PTRACE_SETFPREGS	15
+
 #define PTRACE_GETFDPIC		31	/* get the ELF fdpic loadmap address */
 
 #define PTRACE_GETFDPIC_EXEC	0	/* [addr] request the executable loadmap */
 #define PTRACE_GETFDPIC_INTERP	1	/* [addr] request the interpreter loadmap */
 
-#define	PTRACE_GETDSPREGS	55
+#define	PTRACE_GETDSPREGS	55	/* DSP registers */
 #define	PTRACE_SETDSPREGS	56
 #endif
 
diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c
index 84bf3420597c..5e3ba10255cd 100644
--- a/arch/sh/kernel/ptrace_32.c
+++ b/arch/sh/kernel/ptrace_32.c
@@ -1,12 +1,14 @@
 /*
- * linux/arch/sh/kernel/ptrace.c
+ * SuperH process tracing
  *
- * Original x86 implementation:
- *	By Ross Biro 1/23/92
- *	edited by Linus Torvalds
+ * Copyright (C) 1999, 2000  Kaz Kojima & Niibe Yutaka
+ * Copyright (C) 2002 - 2008  Paul Mundt
  *
- * SuperH version:   Copyright (C) 1999, 2000  Kaz Kojima & Niibe Yutaka
- * Audit support: Yuichi Nakamura <ynakam@hitachisoft.jp>
+ * Audit support by Yuichi Nakamura <ynakam@hitachisoft.jp>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
  */
 #include <linux/kernel.h>
 #include <linux/sched.h>
@@ -22,6 +24,8 @@
 #include <linux/audit.h>
 #include <linux/seccomp.h>
 #include <linux/tracehook.h>
+#include <linux/elf.h>
+#include <linux/regset.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/system.h>
@@ -29,11 +33,6 @@
 #include <asm/mmu_context.h>
 #include <asm/syscalls.h>
 
-/*
- * does not yet catch signals sent when the child dies.
- * in exit.c or in signal.c.
- */
-
 /*
  * This routine will get a word off of the process kernel stack.
  */
@@ -62,16 +61,12 @@ static inline int put_stack_long(struct task_struct *task, int offset,
 
 void user_enable_single_step(struct task_struct *child)
 {
-	struct pt_regs *regs = task_pt_regs(child);
-	long pc;
-
-	pc = get_stack_long(child, (long)&regs->pc);
-
 	/* Next scheduling will set up UBC */
 	if (child->thread.ubc_pc == 0)
 		ubc_usercnt += 1;
 
-	child->thread.ubc_pc = pc;
+	child->thread.ubc_pc = get_stack_long(child,
+				offsetof(struct pt_regs, pc));
 
 	set_tsk_thread_flag(child, TIF_SINGLESTEP);
 }
@@ -103,6 +98,83 @@ void ptrace_disable(struct task_struct *child)
 	user_disable_single_step(child);
 }
 
+static int genregs_get(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       void *kbuf, void __user *ubuf)
+{
+	const struct pt_regs *regs = task_pt_regs(target);
+	int ret;
+
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  regs->regs,
+				  0, 16 * sizeof(unsigned long));
+	if (!ret)
+		/* PC, PR, SR, GBR, MACH, MACL, TRA */
+		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+					  &regs->pc,
+					  offsetof(struct pt_regs, pc),
+					  sizeof(struct pt_regs));
+	if (!ret)
+		ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					       sizeof(struct pt_regs), -1);
+
+	return ret;
+}
+
+static int genregs_set(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       const void *kbuf, const void __user *ubuf)
+{
+	struct pt_regs *regs = task_pt_regs(target);
+	int ret;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 regs->regs,
+				 0, 16 * sizeof(unsigned long));
+	if (!ret && count > 0)
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+					 &regs->pc,
+					 offsetof(struct pt_regs, pc),
+					 sizeof(struct pt_regs));
+	if (!ret)
+		ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+						sizeof(struct pt_regs), -1);
+
+	return ret;
+}
+
+/*
+ * These are our native regset flavours.
+ */
+enum sh_regset {
+	REGSET_GENERAL,
+};
+
+static const struct user_regset sh_regsets[] = {
+	/*
+	 * Format is:
+	 *	R0 --> R15
+	 *	PC, PR, SR, GBR, MACH, MACL, TRA
+	 */
+	[REGSET_GENERAL] = {
+		.core_note_type	= NT_PRSTATUS,
+		.n		= ELF_NGREG,
+		.size		= sizeof(long),
+		.align		= sizeof(long),
+		.get		= genregs_get,
+		.set		= genregs_set,
+	},
+};
+
+static const struct user_regset_view user_sh_native_view = {
+	.name		= "sh",
+	.e_machine	= EM_SH,
+	.regsets	= sh_regsets,
+	.n		= ARRAY_SIZE(sh_regsets),
+};
+
 long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 {
 	struct user * dummy = NULL;
@@ -159,6 +231,16 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		}
 		break;
 
+	case PTRACE_GETREGS:
+		return copy_regset_to_user(child, &user_sh_native_view,
+					   REGSET_GENERAL,
+					   0, sizeof(struct pt_regs),
+					   (void __user *)data);
+	case PTRACE_SETREGS:
+		return copy_regset_from_user(child, &user_sh_native_view,
+					     REGSET_GENERAL,
+					     0, sizeof(struct pt_regs),
+					     (const void __user *)data);
 #ifdef CONFIG_SH_DSP
 	case PTRACE_GETDSPREGS: {
 		unsigned long dp;
-- 
GitLab


From cb700aa4f13d38726defab3060d3ebeaf67dc189 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 12 Sep 2008 20:41:05 +0900
Subject: [PATCH 098/895] sh: ioremap_prot support.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/Kconfig               | 1 +
 arch/sh/include/asm/io.h      | 2 ++
 arch/sh/include/asm/page.h    | 2 ++
 arch/sh/include/asm/pgtable.h | 1 +
 4 files changed, 6 insertions(+)

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index bbdcd6418ef5..434183eb4f12 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -12,6 +12,7 @@ config SUPERH
 	select HAVE_IDE
 	select HAVE_OPROFILE
 	select HAVE_GENERIC_DMA_COHERENT
+	select HAVE_IOREMAP_PROT
 	help
 	  The SuperH is a RISC processor targeted for use in embedded systems
 	  and consumer electronics; it was also used in the Sega Dreamcast
diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h
index e49cfee95fb2..1857666136f2 100644
--- a/arch/sh/include/asm/io.h
+++ b/arch/sh/include/asm/io.h
@@ -347,6 +347,8 @@ __ioremap_mode(unsigned long offset, unsigned long size, unsigned long flags)
 	__ioremap_mode((offset), (size), _PAGE_CACHABLE)
 #define p3_ioremap(offset, size, flags)			\
 	__ioremap((offset), (size), (flags))
+#define ioremap_prot(offset, size, flags)		\
+	__ioremap_mode((offset), (size), (flags))
 #define iounmap(addr)					\
 	__iounmap((addr))
 
diff --git a/arch/sh/include/asm/page.h b/arch/sh/include/asm/page.h
index 77fb8bf02e4e..5871d78e47e5 100644
--- a/arch/sh/include/asm/page.h
+++ b/arch/sh/include/asm/page.h
@@ -104,6 +104,8 @@ typedef struct { unsigned long pgd; } pgd_t;
 
 typedef struct page *pgtable_t;
 
+#define pte_pgprot(x) __pgprot(pte_val(x) & PTE_FLAGS_MASK)
+
 #endif /* !__ASSEMBLY__ */
 
 /*
diff --git a/arch/sh/include/asm/pgtable.h b/arch/sh/include/asm/pgtable.h
index a4a8f8b93463..52220d70a096 100644
--- a/arch/sh/include/asm/pgtable.h
+++ b/arch/sh/include/asm/pgtable.h
@@ -76,6 +76,7 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 #endif
 
 #define PTE_PHYS_MASK		(PHYS_ADDR_MASK & PAGE_MASK)
+#define PTE_FLAGS_MASK		(~(PTE_PHYS_MASK) << PAGE_SHIFT)
 
 #ifdef CONFIG_SUPERH32
 #define VMALLOC_START	(P3SEG)
-- 
GitLab


From f8b890ab4ca60c05b5621b267712709d329f7612 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 12 Sep 2008 22:08:20 +0900
Subject: [PATCH 099/895] sh: Flag T-bit for syscall restart.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/signal_32.c | 53 ++++++++++++++++++++++----------------
 1 file changed, 31 insertions(+), 22 deletions(-)

diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c
index 345de2f1d53c..be194d0bd7d2 100644
--- a/arch/sh/kernel/signal_32.c
+++ b/arch/sh/kernel/signal_32.c
@@ -490,37 +490,43 @@ give_sigsegv:
 	return -EFAULT;
 }
 
+static inline void
+handle_syscall_restart(unsigned long save_r0, struct pt_regs *regs,
+		       struct sigaction *sa)
+{
+	/* If we're not from a syscall, bail out */
+	if (regs->tra < 0)
+		return;
+
+	/* check for system call restart.. */
+	switch (regs->regs[0]) {
+		case -ERESTART_RESTARTBLOCK:
+		case -ERESTARTNOHAND:
+		no_system_call_restart:
+			regs->regs[0] = -EINTR;
+			regs->sr |= 1;
+			break;
+
+		case -ERESTARTSYS:
+			if (!(sa->sa_flags & SA_RESTART))
+				goto no_system_call_restart;
+		/* fallthrough */
+		case -ERESTARTNOINTR:
+			regs->regs[0] = save_r0;
+			regs->pc -= instruction_size(ctrl_inw(regs->pc - 4));
+			break;
+	}
+}
+
 /*
  * OK, we're invoking a handler
  */
-
 static int
 handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info,
 	      sigset_t *oldset, struct pt_regs *regs, unsigned int save_r0)
 {
 	int ret;
 
-	/* Are we from a system call? */
-	if (regs->tra >= 0) {
-		/* If so, check system call restarting.. */
-		switch (regs->regs[0]) {
-			case -ERESTART_RESTARTBLOCK:
-			case -ERESTARTNOHAND:
-			no_system_call_restart:
-				regs->regs[0] = -EINTR;
-				break;
-
-			case -ERESTARTSYS:
-				if (!(ka->sa.sa_flags & SA_RESTART))
-					goto no_system_call_restart;
-			/* fallthrough */
-			case -ERESTARTNOINTR:
-				regs->regs[0] = save_r0;
-				regs->pc -= instruction_size(
-						ctrl_inw(regs->pc - 4));
-				break;
-		}
-	}
 
 	/* Set up the stack frame */
 	if (ka->sa.sa_flags & SA_SIGINFO)
@@ -578,6 +584,9 @@ static void do_signal(struct pt_regs *regs, unsigned int save_r0)
 
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 	if (signr > 0) {
+		if (regs->sr & 1)
+			handle_syscall_restart(save_r0, regs, &ka.sa);
+
 		/* Whee!  Actually deliver the signal.  */
 		if (handle_signal(signr, &ka, &info, oldset,
 				  regs, save_r0) == 0) {
-- 
GitLab


From 9996b42ac06adb7555933366e071ec8824bcaa37 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 12 Sep 2008 22:11:36 +0900
Subject: [PATCH 100/895] sh: provide user_stack_pointer(), needed for
 tracehook support.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/processor_32.h | 2 ++
 arch/sh/include/asm/processor_64.h | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/arch/sh/include/asm/processor_32.h b/arch/sh/include/asm/processor_32.h
index 41d23210583c..1cd3a144c85c 100644
--- a/arch/sh/include/asm/processor_32.h
+++ b/arch/sh/include/asm/processor_32.h
@@ -199,6 +199,8 @@ extern unsigned long get_wchan(struct task_struct *p);
 #define KSTK_EIP(tsk)  (task_pt_regs(tsk)->pc)
 #define KSTK_ESP(tsk)  (task_pt_regs(tsk)->regs[15])
 
+#define user_stack_pointer(regs)	((regs)->regs[15])
+
 #define cpu_sleep()	__asm__ __volatile__ ("sleep" : : : "memory")
 #define cpu_relax()	barrier()
 
diff --git a/arch/sh/include/asm/processor_64.h b/arch/sh/include/asm/processor_64.h
index 16609bc11627..ae19839f1d97 100644
--- a/arch/sh/include/asm/processor_64.h
+++ b/arch/sh/include/asm/processor_64.h
@@ -267,6 +267,8 @@ extern unsigned long get_wchan(struct task_struct *p);
 #define KSTK_EIP(tsk)  ((tsk)->thread.pc)
 #define KSTK_ESP(tsk)  ((tsk)->thread.sp)
 
+#define user_stack_pointer(regs)	((regs)->sp)
+
 #define cpu_relax()	barrier()
 
 #endif	/* __ASSEMBLY__ */
-- 
GitLab


From fb4f87a2f048b4cb1a499c9baa78f1d8437b09c3 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 12 Sep 2008 22:13:13 +0900
Subject: [PATCH 101/895] sh: Provide the asm/syscall.h interface, needed by
 tracehook.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/syscall.h    |  10 +++
 arch/sh/include/asm/syscall_32.h | 110 +++++++++++++++++++++++++++++++
 arch/sh/include/asm/syscall_64.h |   6 ++
 3 files changed, 126 insertions(+)
 create mode 100644 arch/sh/include/asm/syscall.h
 create mode 100644 arch/sh/include/asm/syscall_32.h
 create mode 100644 arch/sh/include/asm/syscall_64.h

diff --git a/arch/sh/include/asm/syscall.h b/arch/sh/include/asm/syscall.h
new file mode 100644
index 000000000000..6a381429ee9d
--- /dev/null
+++ b/arch/sh/include/asm/syscall.h
@@ -0,0 +1,10 @@
+#ifndef __ASM_SH_SYSCALL_H
+#define __ASM_SH_SYSCALL_H
+
+#ifdef CONFIG_SUPERH32
+# include "syscall_32.h"
+#else
+# include "syscall_64.h"
+#endif
+
+#endif /* __ASM_SH_SYSCALL_H */
diff --git a/arch/sh/include/asm/syscall_32.h b/arch/sh/include/asm/syscall_32.h
new file mode 100644
index 000000000000..54773f26cd44
--- /dev/null
+++ b/arch/sh/include/asm/syscall_32.h
@@ -0,0 +1,110 @@
+#ifndef __ASM_SH_SYSCALL_32_H
+#define __ASM_SH_SYSCALL_32_H
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <asm/ptrace.h>
+
+/* The system call number is given by the user in %g1 */
+static inline long syscall_get_nr(struct task_struct *task,
+				  struct pt_regs *regs)
+{
+	return (regs->tra >= 0) ? regs->regs[3] : -1L;
+}
+
+static inline void syscall_rollback(struct task_struct *task,
+				    struct pt_regs *regs)
+{
+	/*
+	 * XXX: This needs some thought. On SH we don't
+	 * save away the original r0 value anywhere.
+	 */
+}
+
+static inline bool syscall_has_error(struct pt_regs *regs)
+{
+	return (regs->sr & 0x1) ? true : false;
+}
+static inline void syscall_set_error(struct pt_regs *regs)
+{
+	regs->sr |= 0x1;
+}
+static inline void syscall_clear_error(struct pt_regs *regs)
+{
+	regs->sr &= ~0x1;
+}
+
+static inline long syscall_get_error(struct task_struct *task,
+				     struct pt_regs *regs)
+{
+	return syscall_has_error(regs) ? regs->regs[0] : 0;
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+					    struct pt_regs *regs)
+{
+	return regs->regs[0];
+}
+
+static inline void syscall_set_return_value(struct task_struct *task,
+					    struct pt_regs *regs,
+					    int error, long val)
+{
+	if (error) {
+		syscall_set_error(regs);
+		regs->regs[0] = -error;
+	} else {
+		syscall_clear_error(regs);
+		regs->regs[0] = val;
+	}
+}
+
+static inline void syscall_get_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 unsigned long *args)
+{
+	/*
+	 * Do this simply for now. If we need to start supporting
+	 * fetching arguments from arbitrary indices, this will need some
+	 * extra logic. Presently there are no in-tree users that depend
+	 * on this behaviour.
+	 */
+	BUG_ON(i);
+
+	/* Argument pattern is: R4, R5, R6, R7, R0, R1 */
+	switch (n) {
+	case 6: args[5] = regs->regs[1];
+	case 5: args[4] = regs->regs[0];
+	case 4: args[3] = regs->regs[7];
+	case 3: args[2] = regs->regs[6];
+	case 2: args[1] = regs->regs[5];
+	case 1:	args[0] = regs->regs[4];
+		break;
+	default:
+		BUG();
+	}
+}
+
+static inline void syscall_set_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 const unsigned long *args)
+{
+	/* Same note as above applies */
+	BUG_ON(i);
+
+	switch (n) {
+	case 6: regs->regs[1] = args[5];
+	case 5: regs->regs[0] = args[4];
+	case 4: regs->regs[7] = args[3];
+	case 3: regs->regs[6] = args[2];
+	case 2: regs->regs[5] = args[1];
+	case 1: regs->regs[4] = args[0];
+		break;
+	default:
+		BUG();
+	}
+}
+
+#endif /* __ASM_SH_SYSCALL_32_H */
diff --git a/arch/sh/include/asm/syscall_64.h b/arch/sh/include/asm/syscall_64.h
new file mode 100644
index 000000000000..bcaaa8ca4d70
--- /dev/null
+++ b/arch/sh/include/asm/syscall_64.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_SH_SYSCALL_64_H
+#define __ASM_SH_SYSCALL_64_H
+
+#include <asm-generic/syscall.h>
+
+#endif /* __ASM_SH_SYSCALL_64_H */
-- 
GitLab


From 3231739d97b348c628a10fb49adfa9143e1de28b Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 12 Sep 2008 22:13:36 +0900
Subject: [PATCH 102/895] sh: Enable HAVE_ARCH_TRACEHOOK.

Now that the rest of the support requirements are out of the way, finally
enable support for tracehook.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 434183eb4f12..f995d134ab3a 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -23,6 +23,7 @@ config SUPERH32
 	def_bool !SUPERH64
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
+	select HAVE_ARCH_TRACEHOOK if (!SH_FPU && !SH_DSP)
 
 config SUPERH64
 	def_bool y if CPU_SH5
-- 
GitLab


From 6bff1592d85c9fa1f1d9d4de1cd0e104279544a6 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 12 Sep 2008 22:41:30 +0900
Subject: [PATCH 103/895] sh: Fix up NUMA build error with se7722_defconfig.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/mmzone.h | 2 ++
 arch/sh/include/asm/setup.h  | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/sh/include/asm/mmzone.h b/arch/sh/include/asm/mmzone.h
index 2969253c4042..7f5363b29ba0 100644
--- a/arch/sh/include/asm/mmzone.h
+++ b/arch/sh/include/asm/mmzone.h
@@ -4,6 +4,8 @@
 #ifdef __KERNEL__
 
 #ifdef CONFIG_NEED_MULTIPLE_NODES
+#include <linux/numa.h>
+
 extern struct pglist_data *node_data[];
 #define NODE_DATA(nid)		(node_data[nid])
 
diff --git a/arch/sh/include/asm/setup.h b/arch/sh/include/asm/setup.h
index 554f865075ca..1b7856f5924c 100644
--- a/arch/sh/include/asm/setup.h
+++ b/arch/sh/include/asm/setup.h
@@ -1,7 +1,7 @@
 #ifndef _SH_SETUP_H
 #define _SH_SETUP_H
 
-#include <asm/mmzone.h>
+#include <linux/mmzone.h>
 
 #define COMMAND_LINE_SIZE 256
 
-- 
GitLab


From 5dadb34394d59313e2e763ae8e2fc911e9fc557c Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 12 Sep 2008 22:42:10 +0900
Subject: [PATCH 104/895] sh: Add DSP registers to regset interface.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/Kconfig              |  2 +-
 arch/sh/include/asm/elf.h    |  5 +--
 arch/sh/include/asm/ptrace.h |  3 ++
 arch/sh/kernel/ptrace_32.c   | 86 +++++++++++++++++++++++++-----------
 4 files changed, 65 insertions(+), 31 deletions(-)

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index f995d134ab3a..acaba1bdd8a6 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -23,7 +23,7 @@ config SUPERH32
 	def_bool !SUPERH64
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
-	select HAVE_ARCH_TRACEHOOK if (!SH_FPU && !SH_DSP)
+	select HAVE_ARCH_TRACEHOOK if !SH_FPU
 
 config SUPERH64
 	def_bool y if CPU_SH5
diff --git a/arch/sh/include/asm/elf.h b/arch/sh/include/asm/elf.h
index 455d9e1e1438..7c2363f8250e 100644
--- a/arch/sh/include/asm/elf.h
+++ b/arch/sh/include/asm/elf.h
@@ -108,11 +108,10 @@ typedef struct user_fpu_struct elf_fpregset_t;
 #define elf_check_fdpic(x)		((x)->e_flags & EF_SH_FDPIC)
 #define elf_check_const_displacement(x)	((x)->e_flags & EF_SH_PIC)
 
-#if defined(CONFIG_SUPERH32) && \
-	(!defined(CONFIG_SH_FPU) && !defined(CONFIG_SH_DSP))
+#if defined(CONFIG_SUPERH32) && !defined(CONFIG_SH_FPU)
 /*
  * Enable dump using regset for general purpose registers, use this as
- * the default once the FPU and DSP registers are moved over also.
+ * the default once the FPU registers are moved over also.
  */
 #define CORE_DUMP_USE_REGSET
 #endif
diff --git a/arch/sh/include/asm/ptrace.h b/arch/sh/include/asm/ptrace.h
index bf73646e2d2b..3ad18e91bca6 100644
--- a/arch/sh/include/asm/ptrace.h
+++ b/arch/sh/include/asm/ptrace.h
@@ -123,6 +123,9 @@ extern void user_disable_single_step(struct task_struct *);
 #define task_pt_regs(task) \
 	((struct pt_regs *) (task_stack_page(task) + THREAD_SIZE \
 		 - sizeof(struct pt_dspregs) - sizeof(unsigned long)) - 1)
+#define task_pt_dspregs(task) \
+	((struct pt_dspregs *) (task_stack_page(task) + THREAD_SIZE \
+		 - sizeof(unsigned long)) - 1)
 #else
 #define task_pt_regs(task) \
 	((struct pt_regs *) (task_stack_page(task) + THREAD_SIZE \
diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c
index 5e3ba10255cd..20b103f6c33b 100644
--- a/arch/sh/kernel/ptrace_32.c
+++ b/arch/sh/kernel/ptrace_32.c
@@ -145,11 +145,50 @@ static int genregs_set(struct task_struct *target,
 	return ret;
 }
 
+#ifdef CONFIG_SH_DSP
+static int dspregs_get(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       void *kbuf, void __user *ubuf)
+{
+	const struct pt_dspregs *regs = task_pt_dspregs(target);
+	int ret;
+
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, regs,
+				  0, sizeof(struct pt_dspregs));
+	if (!ret)
+		ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					       sizeof(struct pt_dspregs), -1);
+
+	return ret;
+}
+
+static int dspregs_set(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       const void *kbuf, const void __user *ubuf)
+{
+	struct pt_dspregs *regs = task_pt_dspregs(target);
+	int ret;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, regs,
+				 0, sizeof(struct pt_dspregs));
+	if (!ret)
+		ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+						sizeof(struct pt_dspregs), -1);
+
+	return ret;
+}
+#endif
+
 /*
  * These are our native regset flavours.
  */
 enum sh_regset {
 	REGSET_GENERAL,
+#ifdef CONFIG_SH_DSP
+	REGSET_DSP,
+#endif
 };
 
 static const struct user_regset sh_regsets[] = {
@@ -166,6 +205,16 @@ static const struct user_regset sh_regsets[] = {
 		.get		= genregs_get,
 		.set		= genregs_set,
 	},
+
+#ifdef CONFIG_SH_DSP
+	[REGSET_DSP] = {
+		.n		= sizeof(struct pt_dspregs) / sizeof(long),
+		.size		= sizeof(long),
+		.align		= sizeof(long),
+		.get		= dspregs_get,
+		.set		= dspregs_set,
+	},
+#endif
 };
 
 static const struct user_regset_view user_sh_native_view = {
@@ -242,33 +291,16 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 					     0, sizeof(struct pt_regs),
 					     (const void __user *)data);
 #ifdef CONFIG_SH_DSP
-	case PTRACE_GETDSPREGS: {
-		unsigned long dp;
-
-		ret = -EIO;
-		dp = ((unsigned long) child) + THREAD_SIZE -
-			 sizeof(struct pt_dspregs);
-		if (*((int *) (dp - 4)) == SR_FD) {
-			copy_to_user((void *)addr, (void *) dp,
-				sizeof(struct pt_dspregs));
-			ret = 0;
-		}
-		break;
-	}
-
-	case PTRACE_SETDSPREGS: {
-		unsigned long dp;
-
-		ret = -EIO;
-		dp = ((unsigned long) child) + THREAD_SIZE -
-			 sizeof(struct pt_dspregs);
-		if (*((int *) (dp - 4)) == SR_FD) {
-			copy_from_user((void *) dp, (void *)addr,
-				sizeof(struct pt_dspregs));
-			ret = 0;
-		}
-		break;
-	}
+	case PTRACE_GETDSPREGS:
+		return copy_regset_to_user(child, &user_sh_native_view,
+					   REGSET_DSP,
+					   0, sizeof(struct pt_dspregs),
+					   (void __user *)data);
+	case PTRACE_SETDSPREGS:
+		return copy_regset_from_user(child, &user_sh_native_view,
+					     REGSET_DSP,
+					     0, sizeof(struct pt_dspregs),
+					     (const void __user *)data);
 #endif
 #ifdef CONFIG_BINFMT_ELF_FDPIC
 	case PTRACE_GETFDPIC: {
-- 
GitLab


From f9540ececaa2cf94b6760741c82f25097e662383 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 12 Sep 2008 22:42:43 +0900
Subject: [PATCH 105/895] sh: Add missing task_user_regset_view() definition.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/ptrace_32.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c
index 20b103f6c33b..92fe2034f74a 100644
--- a/arch/sh/kernel/ptrace_32.c
+++ b/arch/sh/kernel/ptrace_32.c
@@ -224,6 +224,11 @@ static const struct user_regset_view user_sh_native_view = {
 	.n		= ARRAY_SIZE(sh_regsets),
 };
 
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+	return &user_sh_native_view;
+}
+
 long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 {
 	struct user * dummy = NULL;
-- 
GitLab


From 72461997c3c66c29775afa68ca31bea16bf17f39 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 12 Sep 2008 22:56:35 +0900
Subject: [PATCH 106/895] sh: Check SR.DSP bit for DSP regset validity.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/ptrace_32.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c
index 92fe2034f74a..0f44f2b51a60 100644
--- a/arch/sh/kernel/ptrace_32.c
+++ b/arch/sh/kernel/ptrace_32.c
@@ -179,6 +179,14 @@ static int dspregs_set(struct task_struct *target,
 
 	return ret;
 }
+
+static int dspregs_active(struct task_struct *target,
+			  const struct user_regset *regset)
+{
+	struct pt_regs *regs = task_pt_regs(target);
+
+	return regs->sr & SR_DSP ? regset->n : 0;
+}
 #endif
 
 /*
@@ -213,6 +221,7 @@ static const struct user_regset sh_regsets[] = {
 		.align		= sizeof(long),
 		.get		= dspregs_get,
 		.set		= dspregs_set,
+		.active		= dspregs_active,
 	},
 #endif
 };
-- 
GitLab


From 0e660d2d433393f983cd58fe8c54f831fa7c7713 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 12 Sep 2008 23:27:46 +0900
Subject: [PATCH 107/895] sh: Tidy up ELF core dumps.

These have been using overrides for ELF_CORE_COPY_TASK_REGS and
ELF_CORE_COPY_FPREGS while the generic versions can be used instead.
Presently the pt_regs are also duplicated across elf_core_copy_regs()
and elf_core_copy_task_regs(), this switches to simply copying out
through elf_core_copy_regs() instead.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/elf.h  |  6 ------
 arch/sh/kernel/Makefile_32 |  1 -
 arch/sh/kernel/Makefile_64 |  1 -
 arch/sh/kernel/dump_task.c | 32 --------------------------------
 4 files changed, 40 deletions(-)
 delete mode 100644 arch/sh/kernel/dump_task.c

diff --git a/arch/sh/include/asm/elf.h b/arch/sh/include/asm/elf.h
index 7c2363f8250e..6b2cec80fd15 100644
--- a/arch/sh/include/asm/elf.h
+++ b/arch/sh/include/asm/elf.h
@@ -198,12 +198,6 @@ do {									\
 #endif
 
 #define SET_PERSONALITY(ex, ibcs2) set_personality(PER_LINUX_32BIT)
-struct task_struct;
-extern int dump_task_regs (struct task_struct *, elf_gregset_t *);
-extern int dump_task_fpu (struct task_struct *, elf_fpregset_t *);
-
-#define ELF_CORE_COPY_TASK_REGS(tsk, elf_regs) dump_task_regs(tsk, elf_regs)
-#define ELF_CORE_COPY_FPREGS(tsk, elf_fpregs) dump_task_fpu(tsk, elf_fpregs)
 
 #ifdef CONFIG_VSYSCALL
 /* vDSO has arch_setup_additional_pages */
diff --git a/arch/sh/kernel/Makefile_32 b/arch/sh/kernel/Makefile_32
index 12e617beba20..8497de8afbff 100644
--- a/arch/sh/kernel/Makefile_32
+++ b/arch/sh/kernel/Makefile_32
@@ -21,7 +21,6 @@ obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
 obj-$(CONFIG_PM)		+= pm.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
-obj-$(CONFIG_ELF_CORE)		+= dump_task.o
 obj-$(CONFIG_IO_TRAPPED)	+= io_trapped.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o
 
diff --git a/arch/sh/kernel/Makefile_64 b/arch/sh/kernel/Makefile_64
index 6edf53b93d94..e987eb2e9bb5 100644
--- a/arch/sh/kernel/Makefile_64
+++ b/arch/sh/kernel/Makefile_64
@@ -17,7 +17,6 @@ obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
 obj-$(CONFIG_PM)		+= pm.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
-obj-$(CONFIG_BINFMT_ELF)	+= dump_task.o
 obj-$(CONFIG_IO_TRAPPED)	+= io_trapped.o
 
 EXTRA_CFLAGS += -Werror
diff --git a/arch/sh/kernel/dump_task.c b/arch/sh/kernel/dump_task.c
deleted file mode 100644
index 1db7ce0f25d4..000000000000
--- a/arch/sh/kernel/dump_task.c
+++ /dev/null
@@ -1,32 +0,0 @@
-#include <linux/elfcore.h>
-#include <linux/sched.h>
-#include <asm/fpu.h>
-
-/*
- * Capture the user space registers if the task is not running (in user space)
- */
-int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
-{
-	struct pt_regs ptregs;
-
-	ptregs = *task_pt_regs(tsk);
-	elf_core_copy_regs(regs, &ptregs);
-
-	return 1;
-}
-
-int dump_task_fpu(struct task_struct *tsk, elf_fpregset_t *fpu)
-{
-	int fpvalid = 0;
-
-#if defined(CONFIG_SH_FPU)
-	fpvalid = !!tsk_used_math(tsk);
-	if (fpvalid) {
-		unlazy_fpu(tsk, task_pt_regs(tsk));
-		memcpy(fpu, &tsk->thread.fpu.hard, sizeof(*fpu));
-	}
-#endif
-
-	return fpvalid;
-}
-
-- 
GitLab


From 5a89f1adbc5ce44988aab0c370ae2f1478061307 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sat, 13 Sep 2008 01:44:03 +0900
Subject: [PATCH 108/895] sh: latencytop support.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/Kconfig             |  4 ++++
 arch/sh/kernel/stacktrace.c | 23 ++++++++++++++++++++++-
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index acaba1bdd8a6..18a1cc8a6aa3 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -100,6 +100,10 @@ config STACKTRACE_SUPPORT
 config LOCKDEP_SUPPORT
 	def_bool y
 
+config HAVE_LATENCYTOP_SUPPORT
+	def_bool y
+	depends on !SMP
+
 config ARCH_HAS_ILOG2_U32
 	def_bool n
 
diff --git a/arch/sh/kernel/stacktrace.c b/arch/sh/kernel/stacktrace.c
index 54d1f61aa007..1a2a5eb76e41 100644
--- a/arch/sh/kernel/stacktrace.c
+++ b/arch/sh/kernel/stacktrace.c
@@ -3,7 +3,7 @@
  *
  * Stack trace management functions
  *
- *  Copyright (C) 2006  Paul Mundt
+ *  Copyright (C) 2006 - 2008  Paul Mundt
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
@@ -36,3 +36,24 @@ void save_stack_trace(struct stack_trace *trace)
 	}
 }
 EXPORT_SYMBOL_GPL(save_stack_trace);
+
+void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+{
+	unsigned long *sp = (unsigned long *)tsk->thread.sp;
+
+	while (!kstack_end(sp)) {
+		unsigned long addr = *sp++;
+
+		if (__kernel_text_address(addr)) {
+			if (in_sched_functions(addr))
+				break;
+			if (trace->skip > 0)
+				trace->skip--;
+			else
+				trace->entries[trace->nr_entries++] = addr;
+			if (trace->nr_entries >= trace->max_entries)
+				break;
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
-- 
GitLab


From fdb0ac80618729e6b12121c66449b8532990eaf3 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sat, 13 Sep 2008 19:57:04 -0700
Subject: [PATCH 109/895] async_tx: make async_tx_run_dependencies() easier to
 read

* Rename 'next' to 'dep'
* Move the channel switch check inside the loop to simplify
  termination

Acked-by: Ilya Yanok <yanok@emcraft.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 crypto/async_tx/async_tx.c | 34 ++++++++++++++++------------------
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
index e8362c1efa30..dcbf1be149f3 100644
--- a/crypto/async_tx/async_tx.c
+++ b/crypto/async_tx/async_tx.c
@@ -115,34 +115,32 @@ EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);
  *	(start) dependent operations on their target channel
  * @tx: transaction with dependencies
  */
-void
-async_tx_run_dependencies(struct dma_async_tx_descriptor *tx)
+void async_tx_run_dependencies(struct dma_async_tx_descriptor *tx)
 {
-	struct dma_async_tx_descriptor *next = tx->next;
+	struct dma_async_tx_descriptor *dep = tx->next;
+	struct dma_async_tx_descriptor *dep_next;
 	struct dma_chan *chan;
 
-	if (!next)
+	if (!dep)
 		return;
 
-	tx->next = NULL;
-	chan = next->chan;
+	chan = dep->chan;
 
 	/* keep submitting up until a channel switch is detected
 	 * in that case we will be called again as a result of
 	 * processing the interrupt from async_tx_channel_switch
 	 */
-	while (next && next->chan == chan) {
-		struct dma_async_tx_descriptor *_next;
-
-		spin_lock_bh(&next->lock);
-		next->parent = NULL;
-		_next = next->next;
-		if (_next && _next->chan == chan)
-			next->next = NULL;
-		spin_unlock_bh(&next->lock);
-
-		next->tx_submit(next);
-		next = _next;
+	for (; dep; dep = dep_next) {
+		spin_lock_bh(&dep->lock);
+		dep->parent = NULL;
+		dep_next = dep->next;
+		if (dep_next && dep_next->chan == chan)
+			dep->next = NULL; /* ->next will be submitted */
+		else
+			dep_next = NULL; /* submit current dep and terminate */
+		spin_unlock_bh(&dep->lock);
+
+		dep->tx_submit(dep);
 	}
 
 	chan->device->device_issue_pending(chan);
-- 
GitLab


From 89f72a0633d1d4f28c4c5c8831ec814523d7671a Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Sat, 13 Sep 2008 20:05:34 -0700
Subject: [PATCH 110/895] drivers/dma/ioat_dma.c: drop code after return

The break after the return serves no purpose.

Signed-off-by: Julia Lawall <julia@diku.dk>
Reviewed-by: Richard Genoud <richard.genoud@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/ioat_dma.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c
index bc8c6e3470ca..1ef68b315657 100644
--- a/drivers/dma/ioat_dma.c
+++ b/drivers/dma/ioat_dma.c
@@ -971,11 +971,9 @@ static struct ioat_desc_sw *ioat_dma_get_next_descriptor(
 	switch (ioat_chan->device->version) {
 	case IOAT_VER_1_2:
 		return ioat1_dma_get_next_descriptor(ioat_chan);
-		break;
 	case IOAT_VER_2_0:
 	case IOAT_VER_3_0:
 		return ioat2_dma_get_next_descriptor(ioat_chan);
-		break;
 	}
 	return NULL;
 }
-- 
GitLab


From f06febc96ba8e0af80bcc3eaec0a109e88275fac Mon Sep 17 00:00:00 2001
From: Frank Mayhar <fmayhar@google.com>
Date: Fri, 12 Sep 2008 09:54:39 -0700
Subject: [PATCH 111/895] timers: fix itimer/many thread hang

Overview

This patch reworks the handling of POSIX CPU timers, including the
ITIMER_PROF, ITIMER_VIRT timers and rlimit handling.  It was put together
with the help of Roland McGrath, the owner and original writer of this code.

The problem we ran into, and the reason for this rework, has to do with using
a profiling timer in a process with a large number of threads.  It appears
that the performance of the old implementation of run_posix_cpu_timers() was
at least O(n*3) (where "n" is the number of threads in a process) or worse.
Everything is fine with an increasing number of threads until the time taken
for that routine to run becomes the same as or greater than the tick time, at
which point things degrade rather quickly.

This patch fixes bug 9906, "Weird hang with NPTL and SIGPROF."

Code Changes

This rework corrects the implementation of run_posix_cpu_timers() to make it
run in constant time for a particular machine.  (Performance may vary between
one machine and another depending upon whether the kernel is built as single-
or multiprocessor and, in the latter case, depending upon the number of
running processors.)  To do this, at each tick we now update fields in
signal_struct as well as task_struct.  The run_posix_cpu_timers() function
uses those fields to make its decisions.

We define a new structure, "task_cputime," to contain user, system and
scheduler times and use these in appropriate places:

struct task_cputime {
	cputime_t utime;
	cputime_t stime;
	unsigned long long sum_exec_runtime;
};

This is included in the structure "thread_group_cputime," which is a new
substructure of signal_struct and which varies for uniprocessor versus
multiprocessor kernels.  For uniprocessor kernels, it uses "task_cputime" as
a simple substructure, while for multiprocessor kernels it is a pointer:

struct thread_group_cputime {
	struct task_cputime totals;
};

struct thread_group_cputime {
	struct task_cputime *totals;
};

We also add a new task_cputime substructure directly to signal_struct, to
cache the earliest expiration of process-wide timers, and task_cputime also
replaces the it_*_expires fields of task_struct (used for earliest expiration
of thread timers).  The "thread_group_cputime" structure contains process-wide
timers that are updated via account_user_time() and friends.  In the non-SMP
case the structure is a simple aggregator; unfortunately in the SMP case that
simplicity was not achievable due to cache-line contention between CPUs (in
one measured case performance was actually _worse_ on a 16-cpu system than
the same test on a 4-cpu system, due to this contention).  For SMP, the
thread_group_cputime counters are maintained as a per-cpu structure allocated
using alloc_percpu().  The timer functions update only the timer field in
the structure corresponding to the running CPU, obtained using per_cpu_ptr().

We define a set of inline functions in sched.h that we use to maintain the
thread_group_cputime structure and hide the differences between UP and SMP
implementations from the rest of the kernel.  The thread_group_cputime_init()
function initializes the thread_group_cputime structure for the given task.
The thread_group_cputime_alloc() is a no-op for UP; for SMP it calls the
out-of-line function thread_group_cputime_alloc_smp() to allocate and fill
in the per-cpu structures and fields.  The thread_group_cputime_free()
function, also a no-op for UP, in SMP frees the per-cpu structures.  The
thread_group_cputime_clone_thread() function (also a UP no-op) for SMP calls
thread_group_cputime_alloc() if the per-cpu structures haven't yet been
allocated.  The thread_group_cputime() function fills the task_cputime
structure it is passed with the contents of the thread_group_cputime fields;
in UP it's that simple but in SMP it must also safely check that tsk->signal
is non-NULL (if it is it just uses the appropriate fields of task_struct) and,
if so, sums the per-cpu values for each online CPU.  Finally, the three
functions account_group_user_time(), account_group_system_time() and
account_group_exec_runtime() are used by timer functions to update the
respective fields of the thread_group_cputime structure.

Non-SMP operation is trivial and will not be mentioned further.

The per-cpu structure is always allocated when a task creates its first new
thread, via a call to thread_group_cputime_clone_thread() from copy_signal().
It is freed at process exit via a call to thread_group_cputime_free() from
cleanup_signal().

All functions that formerly summed utime/stime/sum_sched_runtime values from
from all threads in the thread group now use thread_group_cputime() to
snapshot the values in the thread_group_cputime structure or the values in
the task structure itself if the per-cpu structure hasn't been allocated.

Finally, the code in kernel/posix-cpu-timers.c has changed quite a bit.
The run_posix_cpu_timers() function has been split into a fast path and a
slow path; the former safely checks whether there are any expired thread
timers and, if not, just returns, while the slow path does the heavy lifting.
With the dedicated thread group fields, timers are no longer "rebalanced" and
the process_timer_rebalance() function and related code has gone away.  All
summing loops are gone and all code that used them now uses the
thread_group_cputime() inline.  When process-wide timers are set, the new
task_cputime structure in signal_struct is used to cache the earliest
expiration; this is checked in the fast path.

Performance

The fix appears not to add significant overhead to existing operations.  It
generally performs the same as the current code except in two cases, one in
which it performs slightly worse (Case 5 below) and one in which it performs
very significantly better (Case 2 below).  Overall it's a wash except in those
two cases.

I've since done somewhat more involved testing on a dual-core Opteron system.

Case 1: With no itimer running, for a test with 100,000 threads, the fixed
	kernel took 1428.5 seconds, 513 seconds more than the unfixed system,
	all of which was spent in the system.  There were twice as many
	voluntary context switches with the fix as without it.

Case 2: With an itimer running at .01 second ticks and 4000 threads (the most
	an unmodified kernel can handle), the fixed kernel ran the test in
	eight percent of the time (5.8 seconds as opposed to 70 seconds) and
	had better tick accuracy (.012 seconds per tick as opposed to .023
	seconds per tick).

Case 3: A 4000-thread test with an initial timer tick of .01 second and an
	interval of 10,000 seconds (i.e. a timer that ticks only once) had
	very nearly the same performance in both cases:  6.3 seconds elapsed
	for the fixed kernel versus 5.5 seconds for the unfixed kernel.

With fewer threads (eight in these tests), the Case 1 test ran in essentially
the same time on both the modified and unmodified kernels (5.2 seconds versus
5.8 seconds).  The Case 2 test ran in about the same time as well, 5.9 seconds
versus 5.4 seconds but again with much better tick accuracy, .013 seconds per
tick versus .025 seconds per tick for the unmodified kernel.

Since the fix affected the rlimit code, I also tested soft and hard CPU limits.

Case 4: With a hard CPU limit of 20 seconds and eight threads (and an itimer
	running), the modified kernel was very slightly favored in that while
	it killed the process in 19.997 seconds of CPU time (5.002 seconds of
	wall time), only .003 seconds of that was system time, the rest was
	user time.  The unmodified kernel killed the process in 20.001 seconds
	of CPU (5.014 seconds of wall time) of which .016 seconds was system
	time.  Really, though, the results were too close to call.  The results
	were essentially the same with no itimer running.

Case 5: With a soft limit of 20 seconds and a hard limit of 2000 seconds
	(where the hard limit would never be reached) and an itimer running,
	the modified kernel exhibited worse tick accuracy than the unmodified
	kernel: .050 seconds/tick versus .028 seconds/tick.  Otherwise,
	performance was almost indistinguishable.  With no itimer running this
	test exhibited virtually identical behavior and times in both cases.

In times past I did some limited performance testing.  those results are below.

On a four-cpu Opteron system without this fix, a sixteen-thread test executed
in 3569.991 seconds, of which user was 3568.435s and system was 1.556s.  On
the same system with the fix, user and elapsed time were about the same, but
system time dropped to 0.007 seconds.  Performance with eight, four and one
thread were comparable.  Interestingly, the timer ticks with the fix seemed
more accurate:  The sixteen-thread test with the fix received 149543 ticks
for 0.024 seconds per tick, while the same test without the fix received 58720
for 0.061 seconds per tick.  Both cases were configured for an interval of
0.01 seconds.  Again, the other tests were comparable.  Each thread in this
test computed the primes up to 25,000,000.

I also did a test with a large number of threads, 100,000 threads, which is
impossible without the fix.  In this case each thread computed the primes only
up to 10,000 (to make the runtime manageable).  System time dominated, at
1546.968 seconds out of a total 2176.906 seconds (giving a user time of
629.938s).  It received 147651 ticks for 0.015 seconds per tick, still quite
accurate.  There is obviously no comparable test without the fix.

Signed-off-by: Frank Mayhar <fmayhar@google.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 fs/binfmt_elf.c              |  19 +-
 fs/proc/array.c              |   8 +-
 include/linux/posix-timers.h |   2 +
 include/linux/sched.h        | 257 ++++++++++++++++++-
 include/linux/time.h         |   3 +
 kernel/compat.c              |  53 ++--
 kernel/exit.c                |  19 +-
 kernel/fork.c                |  88 ++++---
 kernel/itimer.c              |  33 +--
 kernel/posix-cpu-timers.c    | 471 +++++++++++++++++++----------------
 kernel/sched.c               |  53 +++-
 kernel/sched_fair.c          |   1 +
 kernel/sched_rt.c            |   4 +-
 kernel/signal.c              |   8 +-
 kernel/sys.c                 |  75 ++----
 security/selinux/hooks.c     |   9 +-
 16 files changed, 677 insertions(+), 426 deletions(-)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 655ed8d30a86..a8635f637038 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1333,20 +1333,15 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
 	prstatus->pr_pgrp = task_pgrp_vnr(p);
 	prstatus->pr_sid = task_session_vnr(p);
 	if (thread_group_leader(p)) {
+		struct task_cputime cputime;
+
 		/*
-		 * This is the record for the group leader.  Add in the
-		 * cumulative times of previous dead threads.  This total
-		 * won't include the time of each live thread whose state
-		 * is included in the core dump.  The final total reported
-		 * to our parent process when it calls wait4 will include
-		 * those sums as well as the little bit more time it takes
-		 * this and each other thread to finish dying after the
-		 * core dump synchronization phase.
+		 * This is the record for the group leader.  It shows the
+		 * group-wide total, not its individual thread total.
 		 */
-		cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
-				   &prstatus->pr_utime);
-		cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
-				   &prstatus->pr_stime);
+		thread_group_cputime(p, &cputime);
+		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
+		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
 	} else {
 		cputime_to_timeval(p->utime, &prstatus->pr_utime);
 		cputime_to_timeval(p->stime, &prstatus->pr_stime);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 71c9be59c9c2..933953c4e407 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -395,20 +395,20 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 
 		/* add up live thread stats at the group level */
 		if (whole) {
+			struct task_cputime cputime;
 			struct task_struct *t = task;
 			do {
 				min_flt += t->min_flt;
 				maj_flt += t->maj_flt;
-				utime = cputime_add(utime, task_utime(t));
-				stime = cputime_add(stime, task_stime(t));
 				gtime = cputime_add(gtime, task_gtime(t));
 				t = next_thread(t);
 			} while (t != task);
 
 			min_flt += sig->min_flt;
 			maj_flt += sig->maj_flt;
-			utime = cputime_add(utime, sig->utime);
-			stime = cputime_add(stime, sig->stime);
+			thread_group_cputime(task, &cputime);
+			utime = cputime.utime;
+			stime = cputime.stime;
 			gtime = cputime_add(gtime, sig->gtime);
 		}
 
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index a7dd38f30ade..f9d8e9e94e9b 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -115,4 +115,6 @@ void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
 
 long clock_nanosleep_restart(struct restart_block *restart_block);
 
+void update_rlimit_cpu(unsigned long rlim_new);
+
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3d9120c5ad15..26d7a5f2d0ba 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -425,6 +425,45 @@ struct pacct_struct {
 	unsigned long		ac_minflt, ac_majflt;
 };
 
+/**
+ * struct task_cputime - collected CPU time counts
+ * @utime:		time spent in user mode, in &cputime_t units
+ * @stime:		time spent in kernel mode, in &cputime_t units
+ * @sum_exec_runtime:	total time spent on the CPU, in nanoseconds
+ * 
+ * This structure groups together three kinds of CPU time that are
+ * tracked for threads and thread groups.  Most things considering
+ * CPU time want to group these counts together and treat all three
+ * of them in parallel.
+ */
+struct task_cputime {
+	cputime_t utime;
+	cputime_t stime;
+	unsigned long long sum_exec_runtime;
+};
+/* Alternate field names when used to cache expirations. */
+#define prof_exp	stime
+#define virt_exp	utime
+#define sched_exp	sum_exec_runtime
+
+/**
+ * struct thread_group_cputime - thread group interval timer counts
+ * @totals:		thread group interval timers; substructure for
+ *			uniprocessor kernel, per-cpu for SMP kernel.
+ *
+ * This structure contains the version of task_cputime, above, that is
+ * used for thread group CPU clock calculations.
+ */
+#ifdef CONFIG_SMP
+struct thread_group_cputime {
+	struct task_cputime *totals;
+};
+#else
+struct thread_group_cputime {
+	struct task_cputime totals;
+};
+#endif
+
 /*
  * NOTE! "signal_struct" does not have it's own
  * locking, because a shared signal_struct always
@@ -470,6 +509,17 @@ struct signal_struct {
 	cputime_t it_prof_expires, it_virt_expires;
 	cputime_t it_prof_incr, it_virt_incr;
 
+	/*
+	 * Thread group totals for process CPU clocks.
+	 * See thread_group_cputime(), et al, for details.
+	 */
+	struct thread_group_cputime cputime;
+
+	/* Earliest-expiration cache. */
+	struct task_cputime cputime_expires;
+
+	struct list_head cpu_timers[3];
+
 	/* job control IDs */
 
 	/*
@@ -500,7 +550,7 @@ struct signal_struct {
 	 * Live threads maintain their own counters and add to these
 	 * in __exit_signal, except for the group leader.
 	 */
-	cputime_t utime, stime, cutime, cstime;
+	cputime_t cutime, cstime;
 	cputime_t gtime;
 	cputime_t cgtime;
 	unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
@@ -508,14 +558,6 @@ struct signal_struct {
 	unsigned long inblock, oublock, cinblock, coublock;
 	struct task_io_accounting ioac;
 
-	/*
-	 * Cumulative ns of scheduled CPU time for dead threads in the
-	 * group, not including a zombie group leader.  (This only differs
-	 * from jiffies_to_ns(utime + stime) if sched_clock uses something
-	 * other than jiffies.)
-	 */
-	unsigned long long sum_sched_runtime;
-
 	/*
 	 * We don't bother to synchronize most readers of this at all,
 	 * because there is no reader checking a limit that actually needs
@@ -527,8 +569,6 @@ struct signal_struct {
 	 */
 	struct rlimit rlim[RLIM_NLIMITS];
 
-	struct list_head cpu_timers[3];
-
 	/* keep the process-shared keyrings here so that they do the right
 	 * thing in threads created with CLONE_THREAD */
 #ifdef CONFIG_KEYS
@@ -1134,8 +1174,7 @@ struct task_struct {
 /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
 	unsigned long min_flt, maj_flt;
 
-  	cputime_t it_prof_expires, it_virt_expires;
-	unsigned long long it_sched_expires;
+	struct task_cputime cputime_expires;
 	struct list_head cpu_timers[3];
 
 /* process credentials */
@@ -1585,6 +1624,7 @@ extern unsigned long long cpu_clock(int cpu);
 
 extern unsigned long long
 task_sched_runtime(struct task_struct *task);
+extern unsigned long long thread_group_sched_runtime(struct task_struct *task);
 
 /* sched_exec is called by processes performing an exec */
 #ifdef CONFIG_SMP
@@ -2081,6 +2121,197 @@ static inline int spin_needbreak(spinlock_t *lock)
 #endif
 }
 
+/*
+ * Thread group CPU time accounting.
+ */
+#ifdef CONFIG_SMP
+
+extern int thread_group_cputime_alloc_smp(struct task_struct *);
+extern void thread_group_cputime_smp(struct task_struct *, struct task_cputime *);
+
+static inline void thread_group_cputime_init(struct signal_struct *sig)
+{
+	sig->cputime.totals = NULL;
+}
+
+static inline int thread_group_cputime_clone_thread(struct task_struct *curr,
+						    struct task_struct *new)
+{
+	if (curr->signal->cputime.totals)
+		return 0;
+	return thread_group_cputime_alloc_smp(curr);
+}
+
+static inline void thread_group_cputime_free(struct signal_struct *sig)
+{
+	free_percpu(sig->cputime.totals);
+}
+
+/**
+ * thread_group_cputime - Sum the thread group time fields across all CPUs.
+ *
+ * This is a wrapper for the real routine, thread_group_cputime_smp().  See
+ * that routine for details.
+ */
+static inline void thread_group_cputime(
+	struct task_struct *tsk,
+	struct task_cputime *times)
+{
+	thread_group_cputime_smp(tsk, times);
+}
+
+/**
+ * thread_group_cputime_account_user - Maintain utime for a thread group.
+ *
+ * @tgtimes:	Pointer to thread_group_cputime structure.
+ * @cputime:	Time value by which to increment the utime field of that
+ *		structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the utime field there.
+ */
+static inline void thread_group_cputime_account_user(
+	struct thread_group_cputime *tgtimes,
+	cputime_t cputime)
+{
+	if (tgtimes->totals) {
+		struct task_cputime *times;
+
+		times = per_cpu_ptr(tgtimes->totals, get_cpu());
+		times->utime = cputime_add(times->utime, cputime);
+		put_cpu_no_resched();
+	}
+}
+
+/**
+ * thread_group_cputime_account_system - Maintain stime for a thread group.
+ *
+ * @tgtimes:	Pointer to thread_group_cputime structure.
+ * @cputime:	Time value by which to increment the stime field of that
+ *		structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the stime field there.
+ */
+static inline void thread_group_cputime_account_system(
+	struct thread_group_cputime *tgtimes,
+	cputime_t cputime)
+{
+	if (tgtimes->totals) {
+		struct task_cputime *times;
+
+		times = per_cpu_ptr(tgtimes->totals, get_cpu());
+		times->stime = cputime_add(times->stime, cputime);
+		put_cpu_no_resched();
+	}
+}
+
+/**
+ * thread_group_cputime_account_exec_runtime - Maintain exec runtime for a
+ *						thread group.
+ *
+ * @tgtimes:	Pointer to thread_group_cputime structure.
+ * @ns:		Time value by which to increment the sum_exec_runtime field
+ *		of that structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the sum_exec_runtime field there.
+ */
+static inline void thread_group_cputime_account_exec_runtime(
+	struct thread_group_cputime *tgtimes,
+	unsigned long long ns)
+{
+	if (tgtimes->totals) {
+		struct task_cputime *times;
+
+		times = per_cpu_ptr(tgtimes->totals, get_cpu());
+		times->sum_exec_runtime += ns;
+		put_cpu_no_resched();
+	}
+}
+
+#else /* CONFIG_SMP */
+
+static inline void thread_group_cputime_init(struct signal_struct *sig)
+{
+	sig->cputime.totals.utime = cputime_zero;
+	sig->cputime.totals.stime = cputime_zero;
+	sig->cputime.totals.sum_exec_runtime = 0;
+}
+
+static inline int thread_group_cputime_alloc(struct task_struct *tsk)
+{
+	return 0;
+}
+
+static inline void thread_group_cputime_free(struct signal_struct *sig)
+{
+}
+
+static inline int thread_group_cputime_clone_thread(struct task_struct *curr,
+						     struct task_struct *tsk)
+{
+}
+
+static inline void thread_group_cputime(struct task_struct *tsk,
+					 struct task_cputime *cputime)
+{
+	*cputime = tsk->signal->cputime.totals;
+}
+
+static inline void thread_group_cputime_account_user(
+	struct thread_group_cputime *tgtimes,
+	cputime_t cputime)
+{
+	tgtimes->totals->utime = cputime_add(tgtimes->totals->utime, cputime);
+}
+
+static inline void thread_group_cputime_account_system(
+	struct thread_group_cputime *tgtimes,
+	cputime_t cputime)
+{
+	tgtimes->totals->stime = cputime_add(tgtimes->totals->stime, cputime);
+}
+
+static inline void thread_group_cputime_account_exec_runtime(
+	struct thread_group_cputime *tgtimes,
+	unsigned long long ns)
+{
+	tgtimes->totals->sum_exec_runtime += ns;
+}
+
+#endif /* CONFIG_SMP */
+
+static inline void account_group_user_time(struct task_struct *tsk,
+					    cputime_t cputime)
+{
+	struct signal_struct *sig;
+
+	sig = tsk->signal;
+	if (likely(sig))
+		thread_group_cputime_account_user(&sig->cputime, cputime);
+}
+
+static inline void account_group_system_time(struct task_struct *tsk,
+					      cputime_t cputime)
+{
+	struct signal_struct *sig;
+
+	sig = tsk->signal;
+	if (likely(sig))
+		thread_group_cputime_account_system(&sig->cputime, cputime);
+}
+
+static inline void account_group_exec_runtime(struct task_struct *tsk,
+					       unsigned long long ns)
+{
+	struct signal_struct *sig;
+
+	sig = tsk->signal;
+	if (likely(sig))
+		thread_group_cputime_account_exec_runtime(&sig->cputime, ns);
+}
+
 /*
  * Reevaluate whether the task has signals pending delivery.
  * Wake the task if so.
diff --git a/include/linux/time.h b/include/linux/time.h
index e15206a7e82e..1b70b3c293e9 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -125,6 +125,9 @@ extern int timekeeping_valid_for_hres(void);
 extern void update_wall_time(void);
 extern void update_xtime_cache(u64 nsec);
 
+struct tms;
+extern void do_sys_times(struct tms *);
+
 /**
  * timespec_to_ns - Convert timespec to nanoseconds
  * @ts:		pointer to the timespec variable to be converted
diff --git a/kernel/compat.c b/kernel/compat.c
index 32c254a8ab9a..72650e39b3e6 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -23,6 +23,7 @@
 #include <linux/timex.h>
 #include <linux/migrate.h>
 #include <linux/posix-timers.h>
+#include <linux/times.h>
 
 #include <asm/uaccess.h>
 
@@ -150,49 +151,23 @@ asmlinkage long compat_sys_setitimer(int which,
 	return 0;
 }
 
+static compat_clock_t clock_t_to_compat_clock_t(clock_t x)
+{
+	return compat_jiffies_to_clock_t(clock_t_to_jiffies(x));
+}
+
 asmlinkage long compat_sys_times(struct compat_tms __user *tbuf)
 {
-	/*
-	 *	In the SMP world we might just be unlucky and have one of
-	 *	the times increment as we use it. Since the value is an
-	 *	atomically safe type this is just fine. Conceptually its
-	 *	as if the syscall took an instant longer to occur.
-	 */
 	if (tbuf) {
+		struct tms tms;
 		struct compat_tms tmp;
-		struct task_struct *tsk = current;
-		struct task_struct *t;
-		cputime_t utime, stime, cutime, cstime;
-
-		read_lock(&tasklist_lock);
-		utime = tsk->signal->utime;
-		stime = tsk->signal->stime;
-		t = tsk;
-		do {
-			utime = cputime_add(utime, t->utime);
-			stime = cputime_add(stime, t->stime);
-			t = next_thread(t);
-		} while (t != tsk);
-
-		/*
-		 * While we have tasklist_lock read-locked, no dying thread
-		 * can be updating current->signal->[us]time.  Instead,
-		 * we got their counts included in the live thread loop.
-		 * However, another thread can come in right now and
-		 * do a wait call that updates current->signal->c[us]time.
-		 * To make sure we always see that pair updated atomically,
-		 * we take the siglock around fetching them.
-		 */
-		spin_lock_irq(&tsk->sighand->siglock);
-		cutime = tsk->signal->cutime;
-		cstime = tsk->signal->cstime;
-		spin_unlock_irq(&tsk->sighand->siglock);
-		read_unlock(&tasklist_lock);
-
-		tmp.tms_utime = compat_jiffies_to_clock_t(cputime_to_jiffies(utime));
-		tmp.tms_stime = compat_jiffies_to_clock_t(cputime_to_jiffies(stime));
-		tmp.tms_cutime = compat_jiffies_to_clock_t(cputime_to_jiffies(cutime));
-		tmp.tms_cstime = compat_jiffies_to_clock_t(cputime_to_jiffies(cstime));
+
+		do_sys_times(&tms);
+		/* Convert our struct tms to the compat version. */
+		tmp.tms_utime = clock_t_to_compat_clock_t(tms.tms_utime);
+		tmp.tms_stime = clock_t_to_compat_clock_t(tms.tms_stime);
+		tmp.tms_cutime = clock_t_to_compat_clock_t(tms.tms_cutime);
+		tmp.tms_cstime = clock_t_to_compat_clock_t(tms.tms_cstime);
 		if (copy_to_user(tbuf, &tmp, sizeof(tmp)))
 			return -EFAULT;
 	}
diff --git a/kernel/exit.c b/kernel/exit.c
index 16395644a98f..40036ac04271 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -112,8 +112,6 @@ static void __exit_signal(struct task_struct *tsk)
 		 * We won't ever get here for the group leader, since it
 		 * will have been the last reference on the signal_struct.
 		 */
-		sig->utime = cputime_add(sig->utime, task_utime(tsk));
-		sig->stime = cputime_add(sig->stime, task_stime(tsk));
 		sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
 		sig->min_flt += tsk->min_flt;
 		sig->maj_flt += tsk->maj_flt;
@@ -122,7 +120,6 @@ static void __exit_signal(struct task_struct *tsk)
 		sig->inblock += task_io_get_inblock(tsk);
 		sig->oublock += task_io_get_oublock(tsk);
 		task_io_accounting_add(&sig->ioac, &tsk->ioac);
-		sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
 		sig = NULL; /* Marker for below. */
 	}
 
@@ -1294,6 +1291,7 @@ static int wait_task_zombie(struct task_struct *p, int options,
 	if (likely(!traced)) {
 		struct signal_struct *psig;
 		struct signal_struct *sig;
+		struct task_cputime cputime;
 
 		/*
 		 * The resource counters for the group leader are in its
@@ -1309,20 +1307,23 @@ static int wait_task_zombie(struct task_struct *p, int options,
 		 * need to protect the access to p->parent->signal fields,
 		 * as other threads in the parent group can be right
 		 * here reaping other children at the same time.
+		 *
+		 * We use thread_group_cputime() to get times for the thread
+		 * group, which consolidates times for all threads in the
+		 * group including the group leader.
 		 */
 		spin_lock_irq(&p->parent->sighand->siglock);
 		psig = p->parent->signal;
 		sig = p->signal;
+		thread_group_cputime(p, &cputime);
 		psig->cutime =
 			cputime_add(psig->cutime,
-			cputime_add(p->utime,
-			cputime_add(sig->utime,
-				    sig->cutime)));
+			cputime_add(cputime.utime,
+				    sig->cutime));
 		psig->cstime =
 			cputime_add(psig->cstime,
-			cputime_add(p->stime,
-			cputime_add(sig->stime,
-				    sig->cstime)));
+			cputime_add(cputime.stime,
+				    sig->cstime));
 		psig->cgtime =
 			cputime_add(psig->cgtime,
 			cputime_add(p->gtime,
diff --git a/kernel/fork.c b/kernel/fork.c
index 7ce2ebe84796..a8ac2efb8e30 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -759,15 +759,44 @@ void __cleanup_sighand(struct sighand_struct *sighand)
 		kmem_cache_free(sighand_cachep, sighand);
 }
 
+
+/*
+ * Initialize POSIX timer handling for a thread group.
+ */
+static void posix_cpu_timers_init_group(struct signal_struct *sig)
+{
+	/* Thread group counters. */
+	thread_group_cputime_init(sig);
+
+	/* Expiration times and increments. */
+	sig->it_virt_expires = cputime_zero;
+	sig->it_virt_incr = cputime_zero;
+	sig->it_prof_expires = cputime_zero;
+	sig->it_prof_incr = cputime_zero;
+
+	/* Cached expiration times. */
+	sig->cputime_expires.prof_exp = cputime_zero;
+	sig->cputime_expires.virt_exp = cputime_zero;
+	sig->cputime_expires.sched_exp = 0;
+
+	/* The timer lists. */
+	INIT_LIST_HEAD(&sig->cpu_timers[0]);
+	INIT_LIST_HEAD(&sig->cpu_timers[1]);
+	INIT_LIST_HEAD(&sig->cpu_timers[2]);
+}
+
 static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 {
 	struct signal_struct *sig;
 	int ret;
 
 	if (clone_flags & CLONE_THREAD) {
-		atomic_inc(&current->signal->count);
-		atomic_inc(&current->signal->live);
-		return 0;
+		ret = thread_group_cputime_clone_thread(current, tsk);
+		if (likely(!ret)) {
+			atomic_inc(&current->signal->count);
+			atomic_inc(&current->signal->live);
+		}
+		return ret;
 	}
 	sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
 	tsk->signal = sig;
@@ -795,15 +824,10 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	sig->it_real_incr.tv64 = 0;
 	sig->real_timer.function = it_real_fn;
 
-	sig->it_virt_expires = cputime_zero;
-	sig->it_virt_incr = cputime_zero;
-	sig->it_prof_expires = cputime_zero;
-	sig->it_prof_incr = cputime_zero;
-
 	sig->leader = 0;	/* session leadership doesn't inherit */
 	sig->tty_old_pgrp = NULL;
 
-	sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
+	sig->cutime = sig->cstime = cputime_zero;
 	sig->gtime = cputime_zero;
 	sig->cgtime = cputime_zero;
 	sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
@@ -820,14 +844,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
 	task_unlock(current->group_leader);
 
-	if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
-		/*
-		 * New sole thread in the process gets an expiry time
-		 * of the whole CPU time limit.
-		 */
-		tsk->it_prof_expires =
-			secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
-	}
+	posix_cpu_timers_init_group(sig);
+
 	acct_init_pacct(&sig->pacct);
 
 	tty_audit_fork(sig);
@@ -837,6 +855,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 
 void __cleanup_signal(struct signal_struct *sig)
 {
+	thread_group_cputime_free(sig);
 	exit_thread_group_keys(sig);
 	kmem_cache_free(signal_cachep, sig);
 }
@@ -885,6 +904,19 @@ void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
 }
 #endif /* CONFIG_MM_OWNER */
 
+/*
+ * Initialize POSIX timer handling for a single task.
+ */
+static void posix_cpu_timers_init(struct task_struct *tsk)
+{
+	tsk->cputime_expires.prof_exp = cputime_zero;
+	tsk->cputime_expires.virt_exp = cputime_zero;
+	tsk->cputime_expires.sched_exp = 0;
+	INIT_LIST_HEAD(&tsk->cpu_timers[0]);
+	INIT_LIST_HEAD(&tsk->cpu_timers[1]);
+	INIT_LIST_HEAD(&tsk->cpu_timers[2]);
+}
+
 /*
  * This creates a new process as a copy of the old one,
  * but does not actually start it yet.
@@ -995,12 +1027,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	task_io_accounting_init(&p->ioac);
 	acct_clear_integrals(p);
 
-	p->it_virt_expires = cputime_zero;
-	p->it_prof_expires = cputime_zero;
-	p->it_sched_expires = 0;
-	INIT_LIST_HEAD(&p->cpu_timers[0]);
-	INIT_LIST_HEAD(&p->cpu_timers[1]);
-	INIT_LIST_HEAD(&p->cpu_timers[2]);
+	posix_cpu_timers_init(p);
 
 	p->lock_depth = -1;		/* -1 = no lock */
 	do_posix_clock_monotonic_gettime(&p->start_time);
@@ -1201,21 +1228,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	if (clone_flags & CLONE_THREAD) {
 		p->group_leader = current->group_leader;
 		list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
-
-		if (!cputime_eq(current->signal->it_virt_expires,
-				cputime_zero) ||
-		    !cputime_eq(current->signal->it_prof_expires,
-				cputime_zero) ||
-		    current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY ||
-		    !list_empty(&current->signal->cpu_timers[0]) ||
-		    !list_empty(&current->signal->cpu_timers[1]) ||
-		    !list_empty(&current->signal->cpu_timers[2])) {
-			/*
-			 * Have child wake up on its first tick to check
-			 * for process CPU timers.
-			 */
-			p->it_prof_expires = jiffies_to_cputime(1);
-		}
 	}
 
 	if (likely(p->pid)) {
diff --git a/kernel/itimer.c b/kernel/itimer.c
index ab982747d9bd..db7c358b9a02 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -55,17 +55,15 @@ int do_getitimer(int which, struct itimerval *value)
 		spin_unlock_irq(&tsk->sighand->siglock);
 		break;
 	case ITIMER_VIRTUAL:
-		read_lock(&tasklist_lock);
 		spin_lock_irq(&tsk->sighand->siglock);
 		cval = tsk->signal->it_virt_expires;
 		cinterval = tsk->signal->it_virt_incr;
 		if (!cputime_eq(cval, cputime_zero)) {
-			struct task_struct *t = tsk;
-			cputime_t utime = tsk->signal->utime;
-			do {
-				utime = cputime_add(utime, t->utime);
-				t = next_thread(t);
-			} while (t != tsk);
+			struct task_cputime cputime;
+			cputime_t utime;
+
+			thread_group_cputime(tsk, &cputime);
+			utime = cputime.utime;
 			if (cputime_le(cval, utime)) { /* about to fire */
 				cval = jiffies_to_cputime(1);
 			} else {
@@ -73,25 +71,19 @@ int do_getitimer(int which, struct itimerval *value)
 			}
 		}
 		spin_unlock_irq(&tsk->sighand->siglock);
-		read_unlock(&tasklist_lock);
 		cputime_to_timeval(cval, &value->it_value);
 		cputime_to_timeval(cinterval, &value->it_interval);
 		break;
 	case ITIMER_PROF:
-		read_lock(&tasklist_lock);
 		spin_lock_irq(&tsk->sighand->siglock);
 		cval = tsk->signal->it_prof_expires;
 		cinterval = tsk->signal->it_prof_incr;
 		if (!cputime_eq(cval, cputime_zero)) {
-			struct task_struct *t = tsk;
-			cputime_t ptime = cputime_add(tsk->signal->utime,
-						      tsk->signal->stime);
-			do {
-				ptime = cputime_add(ptime,
-						    cputime_add(t->utime,
-								t->stime));
-				t = next_thread(t);
-			} while (t != tsk);
+			struct task_cputime times;
+			cputime_t ptime;
+
+			thread_group_cputime(tsk, &times);
+			ptime = cputime_add(times.utime, times.stime);
 			if (cputime_le(cval, ptime)) { /* about to fire */
 				cval = jiffies_to_cputime(1);
 			} else {
@@ -99,7 +91,6 @@ int do_getitimer(int which, struct itimerval *value)
 			}
 		}
 		spin_unlock_irq(&tsk->sighand->siglock);
-		read_unlock(&tasklist_lock);
 		cputime_to_timeval(cval, &value->it_value);
 		cputime_to_timeval(cinterval, &value->it_interval);
 		break;
@@ -185,7 +176,6 @@ again:
 	case ITIMER_VIRTUAL:
 		nval = timeval_to_cputime(&value->it_value);
 		ninterval = timeval_to_cputime(&value->it_interval);
-		read_lock(&tasklist_lock);
 		spin_lock_irq(&tsk->sighand->siglock);
 		cval = tsk->signal->it_virt_expires;
 		cinterval = tsk->signal->it_virt_incr;
@@ -200,7 +190,6 @@ again:
 		tsk->signal->it_virt_expires = nval;
 		tsk->signal->it_virt_incr = ninterval;
 		spin_unlock_irq(&tsk->sighand->siglock);
-		read_unlock(&tasklist_lock);
 		if (ovalue) {
 			cputime_to_timeval(cval, &ovalue->it_value);
 			cputime_to_timeval(cinterval, &ovalue->it_interval);
@@ -209,7 +198,6 @@ again:
 	case ITIMER_PROF:
 		nval = timeval_to_cputime(&value->it_value);
 		ninterval = timeval_to_cputime(&value->it_interval);
-		read_lock(&tasklist_lock);
 		spin_lock_irq(&tsk->sighand->siglock);
 		cval = tsk->signal->it_prof_expires;
 		cinterval = tsk->signal->it_prof_incr;
@@ -224,7 +212,6 @@ again:
 		tsk->signal->it_prof_expires = nval;
 		tsk->signal->it_prof_incr = ninterval;
 		spin_unlock_irq(&tsk->sighand->siglock);
-		read_unlock(&tasklist_lock);
 		if (ovalue) {
 			cputime_to_timeval(cval, &ovalue->it_value);
 			cputime_to_timeval(cinterval, &ovalue->it_interval);
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index c42a03aef36f..dba1c334c3e8 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -8,6 +8,99 @@
 #include <linux/math64.h>
 #include <asm/uaccess.h>
 
+#ifdef CONFIG_SMP
+/*
+ * Allocate the thread_group_cputime structure appropriately for SMP kernels
+ * and fill in the current values of the fields.  Called from copy_signal()
+ * via thread_group_cputime_clone_thread() when adding a second or subsequent
+ * thread to a thread group.  Assumes interrupts are enabled when called.
+ */
+int thread_group_cputime_alloc_smp(struct task_struct *tsk)
+{
+	struct signal_struct *sig = tsk->signal;
+	struct task_cputime *cputime;
+
+	/*
+	 * If we have multiple threads and we don't already have a
+	 * per-CPU task_cputime struct, allocate one and fill it in with
+	 * the times accumulated so far.
+	 */
+	if (sig->cputime.totals)
+		return 0;
+	cputime = alloc_percpu(struct task_cputime);
+	if (cputime == NULL)
+		return -ENOMEM;
+	read_lock(&tasklist_lock);
+	spin_lock_irq(&tsk->sighand->siglock);
+	if (sig->cputime.totals) {
+		spin_unlock_irq(&tsk->sighand->siglock);
+		read_unlock(&tasklist_lock);
+		free_percpu(cputime);
+		return 0;
+	}
+	sig->cputime.totals = cputime;
+	cputime = per_cpu_ptr(sig->cputime.totals, get_cpu());
+	cputime->utime = tsk->utime;
+	cputime->stime = tsk->stime;
+	cputime->sum_exec_runtime = tsk->se.sum_exec_runtime;
+	put_cpu_no_resched();
+	spin_unlock_irq(&tsk->sighand->siglock);
+	read_unlock(&tasklist_lock);
+	return 0;
+}
+
+/**
+ * thread_group_cputime_smp - Sum the thread group time fields across all CPUs.
+ *
+ * @tsk:	The task we use to identify the thread group.
+ * @times:	task_cputime structure in which we return the summed fields.
+ *
+ * Walk the list of CPUs to sum the per-CPU time fields in the thread group
+ * time structure.
+ */
+void thread_group_cputime_smp(
+	struct task_struct *tsk,
+	struct task_cputime *times)
+{
+	struct signal_struct *sig;
+	int i;
+	struct task_cputime *tot;
+
+	sig = tsk->signal;
+	if (unlikely(!sig) || !sig->cputime.totals) {
+		times->utime = tsk->utime;
+		times->stime = tsk->stime;
+		times->sum_exec_runtime = tsk->se.sum_exec_runtime;
+		return;
+	}
+	times->stime = times->utime = cputime_zero;
+	times->sum_exec_runtime = 0;
+	for_each_possible_cpu(i) {
+		tot = per_cpu_ptr(tsk->signal->cputime.totals, i);
+		times->utime = cputime_add(times->utime, tot->utime);
+		times->stime = cputime_add(times->stime, tot->stime);
+		times->sum_exec_runtime += tot->sum_exec_runtime;
+	}
+}
+
+#endif /* CONFIG_SMP */
+
+/*
+ * Called after updating RLIMIT_CPU to set timer expiration if necessary.
+ */
+void update_rlimit_cpu(unsigned long rlim_new)
+{
+	cputime_t cputime;
+
+	cputime = secs_to_cputime(rlim_new);
+	if (cputime_eq(current->signal->it_prof_expires, cputime_zero) ||
+            cputime_lt(current->signal->it_prof_expires, cputime)) {
+		spin_lock_irq(&current->sighand->siglock);
+		set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
+		spin_unlock_irq(&current->sighand->siglock);
+	}
+}
+
 static int check_clock(const clockid_t which_clock)
 {
 	int error = 0;
@@ -158,10 +251,6 @@ static inline cputime_t virt_ticks(struct task_struct *p)
 {
 	return p->utime;
 }
-static inline unsigned long long sched_ns(struct task_struct *p)
-{
-	return task_sched_runtime(p);
-}
 
 int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
 {
@@ -211,7 +300,7 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
 		cpu->cpu = virt_ticks(p);
 		break;
 	case CPUCLOCK_SCHED:
-		cpu->sched = sched_ns(p);
+		cpu->sched = task_sched_runtime(p);
 		break;
 	}
 	return 0;
@@ -226,31 +315,20 @@ static int cpu_clock_sample_group_locked(unsigned int clock_idx,
 					 struct task_struct *p,
 					 union cpu_time_count *cpu)
 {
-	struct task_struct *t = p;
- 	switch (clock_idx) {
+	struct task_cputime cputime;
+
+	thread_group_cputime(p, &cputime);
+	switch (clock_idx) {
 	default:
 		return -EINVAL;
 	case CPUCLOCK_PROF:
-		cpu->cpu = cputime_add(p->signal->utime, p->signal->stime);
-		do {
-			cpu->cpu = cputime_add(cpu->cpu, prof_ticks(t));
-			t = next_thread(t);
-		} while (t != p);
+		cpu->cpu = cputime_add(cputime.utime, cputime.stime);
 		break;
 	case CPUCLOCK_VIRT:
-		cpu->cpu = p->signal->utime;
-		do {
-			cpu->cpu = cputime_add(cpu->cpu, virt_ticks(t));
-			t = next_thread(t);
-		} while (t != p);
+		cpu->cpu = cputime.utime;
 		break;
 	case CPUCLOCK_SCHED:
-		cpu->sched = p->signal->sum_sched_runtime;
-		/* Add in each other live thread.  */
-		while ((t = next_thread(t)) != p) {
-			cpu->sched += t->se.sum_exec_runtime;
-		}
-		cpu->sched += sched_ns(p);
+		cpu->sched = thread_group_sched_runtime(p);
 		break;
 	}
 	return 0;
@@ -471,80 +549,11 @@ void posix_cpu_timers_exit(struct task_struct *tsk)
 }
 void posix_cpu_timers_exit_group(struct task_struct *tsk)
 {
-	cleanup_timers(tsk->signal->cpu_timers,
-		       cputime_add(tsk->utime, tsk->signal->utime),
-		       cputime_add(tsk->stime, tsk->signal->stime),
-		     tsk->se.sum_exec_runtime + tsk->signal->sum_sched_runtime);
-}
-
-
-/*
- * Set the expiry times of all the threads in the process so one of them
- * will go off before the process cumulative expiry total is reached.
- */
-static void process_timer_rebalance(struct task_struct *p,
-				    unsigned int clock_idx,
-				    union cpu_time_count expires,
-				    union cpu_time_count val)
-{
-	cputime_t ticks, left;
-	unsigned long long ns, nsleft;
- 	struct task_struct *t = p;
-	unsigned int nthreads = atomic_read(&p->signal->live);
-
-	if (!nthreads)
-		return;
+	struct task_cputime cputime;
 
-	switch (clock_idx) {
-	default:
-		BUG();
-		break;
-	case CPUCLOCK_PROF:
-		left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu),
-				       nthreads);
-		do {
-			if (likely(!(t->flags & PF_EXITING))) {
-				ticks = cputime_add(prof_ticks(t), left);
-				if (cputime_eq(t->it_prof_expires,
-					       cputime_zero) ||
-				    cputime_gt(t->it_prof_expires, ticks)) {
-					t->it_prof_expires = ticks;
-				}
-			}
-			t = next_thread(t);
-		} while (t != p);
-		break;
-	case CPUCLOCK_VIRT:
-		left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu),
-				       nthreads);
-		do {
-			if (likely(!(t->flags & PF_EXITING))) {
-				ticks = cputime_add(virt_ticks(t), left);
-				if (cputime_eq(t->it_virt_expires,
-					       cputime_zero) ||
-				    cputime_gt(t->it_virt_expires, ticks)) {
-					t->it_virt_expires = ticks;
-				}
-			}
-			t = next_thread(t);
-		} while (t != p);
-		break;
-	case CPUCLOCK_SCHED:
-		nsleft = expires.sched - val.sched;
-		do_div(nsleft, nthreads);
-		nsleft = max_t(unsigned long long, nsleft, 1);
-		do {
-			if (likely(!(t->flags & PF_EXITING))) {
-				ns = t->se.sum_exec_runtime + nsleft;
-				if (t->it_sched_expires == 0 ||
-				    t->it_sched_expires > ns) {
-					t->it_sched_expires = ns;
-				}
-			}
-			t = next_thread(t);
-		} while (t != p);
-		break;
-	}
+	thread_group_cputime(tsk, &cputime);
+	cleanup_timers(tsk->signal->cpu_timers,
+		       cputime.utime, cputime.stime, cputime.sum_exec_runtime);
 }
 
 static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)
@@ -608,29 +617,32 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
 			default:
 				BUG();
 			case CPUCLOCK_PROF:
-				if (cputime_eq(p->it_prof_expires,
+				if (cputime_eq(p->cputime_expires.prof_exp,
 					       cputime_zero) ||
-				    cputime_gt(p->it_prof_expires,
+				    cputime_gt(p->cputime_expires.prof_exp,
 					       nt->expires.cpu))
-					p->it_prof_expires = nt->expires.cpu;
+					p->cputime_expires.prof_exp =
+						nt->expires.cpu;
 				break;
 			case CPUCLOCK_VIRT:
-				if (cputime_eq(p->it_virt_expires,
+				if (cputime_eq(p->cputime_expires.virt_exp,
 					       cputime_zero) ||
-				    cputime_gt(p->it_virt_expires,
+				    cputime_gt(p->cputime_expires.virt_exp,
 					       nt->expires.cpu))
-					p->it_virt_expires = nt->expires.cpu;
+					p->cputime_expires.virt_exp =
+						nt->expires.cpu;
 				break;
 			case CPUCLOCK_SCHED:
-				if (p->it_sched_expires == 0 ||
-				    p->it_sched_expires > nt->expires.sched)
-					p->it_sched_expires = nt->expires.sched;
+				if (p->cputime_expires.sched_exp == 0 ||
+				    p->cputime_expires.sched_exp >
+							nt->expires.sched)
+					p->cputime_expires.sched_exp =
+						nt->expires.sched;
 				break;
 			}
 		} else {
 			/*
-			 * For a process timer, we must balance
-			 * all the live threads' expirations.
+			 * For a process timer, set the cached expiration time.
 			 */
 			switch (CPUCLOCK_WHICH(timer->it_clock)) {
 			default:
@@ -641,7 +653,9 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
 				    cputime_lt(p->signal->it_virt_expires,
 					       timer->it.cpu.expires.cpu))
 					break;
-				goto rebalance;
+				p->signal->cputime_expires.virt_exp =
+					timer->it.cpu.expires.cpu;
+				break;
 			case CPUCLOCK_PROF:
 				if (!cputime_eq(p->signal->it_prof_expires,
 						cputime_zero) &&
@@ -652,13 +666,12 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
 				if (i != RLIM_INFINITY &&
 				    i <= cputime_to_secs(timer->it.cpu.expires.cpu))
 					break;
-				goto rebalance;
+				p->signal->cputime_expires.prof_exp =
+					timer->it.cpu.expires.cpu;
+				break;
 			case CPUCLOCK_SCHED:
-			rebalance:
-				process_timer_rebalance(
-					timer->it.cpu.task,
-					CPUCLOCK_WHICH(timer->it_clock),
-					timer->it.cpu.expires, now);
+				p->signal->cputime_expires.sched_exp =
+					timer->it.cpu.expires.sched;
 				break;
 			}
 		}
@@ -969,13 +982,13 @@ static void check_thread_timers(struct task_struct *tsk,
 	struct signal_struct *const sig = tsk->signal;
 
 	maxfire = 20;
-	tsk->it_prof_expires = cputime_zero;
+	tsk->cputime_expires.prof_exp = cputime_zero;
 	while (!list_empty(timers)) {
 		struct cpu_timer_list *t = list_first_entry(timers,
 						      struct cpu_timer_list,
 						      entry);
 		if (!--maxfire || cputime_lt(prof_ticks(tsk), t->expires.cpu)) {
-			tsk->it_prof_expires = t->expires.cpu;
+			tsk->cputime_expires.prof_exp = t->expires.cpu;
 			break;
 		}
 		t->firing = 1;
@@ -984,13 +997,13 @@ static void check_thread_timers(struct task_struct *tsk,
 
 	++timers;
 	maxfire = 20;
-	tsk->it_virt_expires = cputime_zero;
+	tsk->cputime_expires.virt_exp = cputime_zero;
 	while (!list_empty(timers)) {
 		struct cpu_timer_list *t = list_first_entry(timers,
 						      struct cpu_timer_list,
 						      entry);
 		if (!--maxfire || cputime_lt(virt_ticks(tsk), t->expires.cpu)) {
-			tsk->it_virt_expires = t->expires.cpu;
+			tsk->cputime_expires.virt_exp = t->expires.cpu;
 			break;
 		}
 		t->firing = 1;
@@ -999,13 +1012,13 @@ static void check_thread_timers(struct task_struct *tsk,
 
 	++timers;
 	maxfire = 20;
-	tsk->it_sched_expires = 0;
+	tsk->cputime_expires.sched_exp = 0;
 	while (!list_empty(timers)) {
 		struct cpu_timer_list *t = list_first_entry(timers,
 						      struct cpu_timer_list,
 						      entry);
 		if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) {
-			tsk->it_sched_expires = t->expires.sched;
+			tsk->cputime_expires.sched_exp = t->expires.sched;
 			break;
 		}
 		t->firing = 1;
@@ -1055,10 +1068,10 @@ static void check_process_timers(struct task_struct *tsk,
 {
 	int maxfire;
 	struct signal_struct *const sig = tsk->signal;
-	cputime_t utime, stime, ptime, virt_expires, prof_expires;
+	cputime_t utime, ptime, virt_expires, prof_expires;
 	unsigned long long sum_sched_runtime, sched_expires;
-	struct task_struct *t;
 	struct list_head *timers = sig->cpu_timers;
+	struct task_cputime cputime;
 
 	/*
 	 * Don't sample the current process CPU clocks if there are no timers.
@@ -1074,18 +1087,10 @@ static void check_process_timers(struct task_struct *tsk,
 	/*
 	 * Collect the current process totals.
 	 */
-	utime = sig->utime;
-	stime = sig->stime;
-	sum_sched_runtime = sig->sum_sched_runtime;
-	t = tsk;
-	do {
-		utime = cputime_add(utime, t->utime);
-		stime = cputime_add(stime, t->stime);
-		sum_sched_runtime += t->se.sum_exec_runtime;
-		t = next_thread(t);
-	} while (t != tsk);
-	ptime = cputime_add(utime, stime);
-
+	thread_group_cputime(tsk, &cputime);
+	utime = cputime.utime;
+	ptime = cputime_add(utime, cputime.stime);
+	sum_sched_runtime = cputime.sum_exec_runtime;
 	maxfire = 20;
 	prof_expires = cputime_zero;
 	while (!list_empty(timers)) {
@@ -1193,60 +1198,18 @@ static void check_process_timers(struct task_struct *tsk,
 		}
 	}
 
-	if (!cputime_eq(prof_expires, cputime_zero) ||
-	    !cputime_eq(virt_expires, cputime_zero) ||
-	    sched_expires != 0) {
-		/*
-		 * Rebalance the threads' expiry times for the remaining
-		 * process CPU timers.
-		 */
-
-		cputime_t prof_left, virt_left, ticks;
-		unsigned long long sched_left, sched;
-		const unsigned int nthreads = atomic_read(&sig->live);
-
-		if (!nthreads)
-			return;
-
-		prof_left = cputime_sub(prof_expires, utime);
-		prof_left = cputime_sub(prof_left, stime);
-		prof_left = cputime_div_non_zero(prof_left, nthreads);
-		virt_left = cputime_sub(virt_expires, utime);
-		virt_left = cputime_div_non_zero(virt_left, nthreads);
-		if (sched_expires) {
-			sched_left = sched_expires - sum_sched_runtime;
-			do_div(sched_left, nthreads);
-			sched_left = max_t(unsigned long long, sched_left, 1);
-		} else {
-			sched_left = 0;
-		}
-		t = tsk;
-		do {
-			if (unlikely(t->flags & PF_EXITING))
-				continue;
-
-			ticks = cputime_add(cputime_add(t->utime, t->stime),
-					    prof_left);
-			if (!cputime_eq(prof_expires, cputime_zero) &&
-			    (cputime_eq(t->it_prof_expires, cputime_zero) ||
-			     cputime_gt(t->it_prof_expires, ticks))) {
-				t->it_prof_expires = ticks;
-			}
-
-			ticks = cputime_add(t->utime, virt_left);
-			if (!cputime_eq(virt_expires, cputime_zero) &&
-			    (cputime_eq(t->it_virt_expires, cputime_zero) ||
-			     cputime_gt(t->it_virt_expires, ticks))) {
-				t->it_virt_expires = ticks;
-			}
-
-			sched = t->se.sum_exec_runtime + sched_left;
-			if (sched_expires && (t->it_sched_expires == 0 ||
-					      t->it_sched_expires > sched)) {
-				t->it_sched_expires = sched;
-			}
-		} while ((t = next_thread(t)) != tsk);
-	}
+	if (!cputime_eq(prof_expires, cputime_zero) &&
+	    (cputime_eq(sig->cputime_expires.prof_exp, cputime_zero) ||
+	     cputime_gt(sig->cputime_expires.prof_exp, prof_expires)))
+		sig->cputime_expires.prof_exp = prof_expires;
+	if (!cputime_eq(virt_expires, cputime_zero) &&
+	    (cputime_eq(sig->cputime_expires.virt_exp, cputime_zero) ||
+	     cputime_gt(sig->cputime_expires.virt_exp, virt_expires)))
+		sig->cputime_expires.virt_exp = virt_expires;
+	if (sched_expires != 0 &&
+	    (sig->cputime_expires.sched_exp == 0 ||
+	     sig->cputime_expires.sched_exp > sched_expires))
+		sig->cputime_expires.sched_exp = sched_expires;
 }
 
 /*
@@ -1314,6 +1277,78 @@ out:
 	++timer->it_requeue_pending;
 }
 
+/**
+ * task_cputime_zero - Check a task_cputime struct for all zero fields.
+ *
+ * @cputime:	The struct to compare.
+ *
+ * Checks @cputime to see if all fields are zero.  Returns true if all fields
+ * are zero, false if any field is nonzero.
+ */
+static inline int task_cputime_zero(const struct task_cputime *cputime)
+{
+	if (cputime_eq(cputime->utime, cputime_zero) &&
+	    cputime_eq(cputime->stime, cputime_zero) &&
+	    cputime->sum_exec_runtime == 0)
+		return 1;
+	return 0;
+}
+
+/**
+ * task_cputime_expired - Compare two task_cputime entities.
+ *
+ * @sample:	The task_cputime structure to be checked for expiration.
+ * @expires:	Expiration times, against which @sample will be checked.
+ *
+ * Checks @sample against @expires to see if any field of @sample has expired.
+ * Returns true if any field of the former is greater than the corresponding
+ * field of the latter if the latter field is set.  Otherwise returns false.
+ */
+static inline int task_cputime_expired(const struct task_cputime *sample,
+					const struct task_cputime *expires)
+{
+	if (!cputime_eq(expires->utime, cputime_zero) &&
+	    cputime_ge(sample->utime, expires->utime))
+		return 1;
+	if (!cputime_eq(expires->stime, cputime_zero) &&
+	    cputime_ge(cputime_add(sample->utime, sample->stime),
+		       expires->stime))
+		return 1;
+	if (expires->sum_exec_runtime != 0 &&
+	    sample->sum_exec_runtime >= expires->sum_exec_runtime)
+		return 1;
+	return 0;
+}
+
+/**
+ * fastpath_timer_check - POSIX CPU timers fast path.
+ *
+ * @tsk:	The task (thread) being checked.
+ * @sig:	The signal pointer for that task.
+ *
+ * If there are no timers set return false.  Otherwise snapshot the task and
+ * thread group timers, then compare them with the corresponding expiration
+ # times.  Returns true if a timer has expired, else returns false.
+ */
+static inline int fastpath_timer_check(struct task_struct *tsk,
+					struct signal_struct *sig)
+{
+	struct task_cputime task_sample = {
+		.utime = tsk->utime,
+		.stime = tsk->stime,
+		.sum_exec_runtime = tsk->se.sum_exec_runtime
+	};
+	struct task_cputime group_sample;
+
+	if (task_cputime_zero(&tsk->cputime_expires) &&
+	    task_cputime_zero(&sig->cputime_expires))
+		return 0;
+	if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
+		return 1;
+	thread_group_cputime(tsk, &group_sample);
+	return task_cputime_expired(&group_sample, &sig->cputime_expires);
+}
+
 /*
  * This is called from the timer interrupt handler.  The irq handler has
  * already updated our counts.  We need to check if any timers fire now.
@@ -1323,30 +1358,29 @@ void run_posix_cpu_timers(struct task_struct *tsk)
 {
 	LIST_HEAD(firing);
 	struct k_itimer *timer, *next;
+	struct signal_struct *sig;
+	struct sighand_struct *sighand;
+	unsigned long flags;
 
 	BUG_ON(!irqs_disabled());
 
-#define UNEXPIRED(clock) \
-		(cputime_eq(tsk->it_##clock##_expires, cputime_zero) || \
-		 cputime_lt(clock##_ticks(tsk), tsk->it_##clock##_expires))
-
-	if (UNEXPIRED(prof) && UNEXPIRED(virt) &&
-	    (tsk->it_sched_expires == 0 ||
-	     tsk->se.sum_exec_runtime < tsk->it_sched_expires))
-		return;
-
-#undef	UNEXPIRED
-
+	/* Pick up tsk->signal and make sure it's valid. */
+	sig = tsk->signal;
 	/*
-	 * Double-check with locks held.
+	 * The fast path checks that there are no expired thread or thread
+	 * group timers.  If that's so, just return.  Also check that
+	 * tsk->signal is non-NULL; this probably can't happen but cover the
+	 * possibility anyway.
 	 */
-	read_lock(&tasklist_lock);
-	if (likely(tsk->signal != NULL)) {
-		spin_lock(&tsk->sighand->siglock);
-
+	if (unlikely(!sig) || !fastpath_timer_check(tsk, sig)) {
+		return;
+	}
+	sighand = lock_task_sighand(tsk, &flags);
+	if (likely(sighand)) {
 		/*
-		 * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N]
-		 * all the timers that are firing, and put them on the firing list.
+		 * Here we take off tsk->signal->cpu_timers[N] and
+		 * tsk->cpu_timers[N] all the timers that are firing, and
+		 * put them on the firing list.
 		 */
 		check_thread_timers(tsk, &firing);
 		check_process_timers(tsk, &firing);
@@ -1359,9 +1393,8 @@ void run_posix_cpu_timers(struct task_struct *tsk)
 		 * that gets the timer lock before we do will give it up and
 		 * spin until we've taken care of that timer below.
 		 */
-		spin_unlock(&tsk->sighand->siglock);
 	}
-	read_unlock(&tasklist_lock);
+	unlock_task_sighand(tsk, &flags);
 
 	/*
 	 * Now that all the timers on our list have the firing flag,
@@ -1389,10 +1422,9 @@ void run_posix_cpu_timers(struct task_struct *tsk)
 
 /*
  * Set one of the process-wide special case CPU timers.
- * The tasklist_lock and tsk->sighand->siglock must be held by the caller.
- * The oldval argument is null for the RLIMIT_CPU timer, where *newval is
- * absolute; non-null for ITIMER_*, where *newval is relative and we update
- * it to be absolute, *oldval is absolute and we update it to be relative.
+ * The tsk->sighand->siglock must be held by the caller.
+ * The *newval argument is relative and we update it to be absolute, *oldval
+ * is absolute and we update it to be relative.
  */
 void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
 			   cputime_t *newval, cputime_t *oldval)
@@ -1435,13 +1467,14 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
 	    cputime_ge(list_first_entry(head,
 				  struct cpu_timer_list, entry)->expires.cpu,
 		       *newval)) {
-		/*
-		 * Rejigger each thread's expiry time so that one will
-		 * notice before we hit the process-cumulative expiry time.
-		 */
-		union cpu_time_count expires = { .sched = 0 };
-		expires.cpu = *newval;
-		process_timer_rebalance(tsk, clock_idx, expires, now);
+		switch (clock_idx) {
+		case CPUCLOCK_PROF:
+			tsk->signal->cputime_expires.prof_exp = *newval;
+			break;
+		case CPUCLOCK_VIRT:
+			tsk->signal->cputime_expires.virt_exp = *newval;
+			break;
+		}
 	}
 }
 
diff --git a/kernel/sched.c b/kernel/sched.c
index cc1f81b50b82..c51b5d276665 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4036,6 +4036,25 @@ DEFINE_PER_CPU(struct kernel_stat, kstat);
 
 EXPORT_PER_CPU_SYMBOL(kstat);
 
+/*
+ * Return any ns on the sched_clock that have not yet been banked in
+ * @p in case that task is currently running.
+ *
+ * Called with task_rq_lock() held on @rq.
+ */
+static unsigned long long task_delta_exec(struct task_struct *p, struct rq *rq)
+{
+	if (task_current(rq, p)) {
+		u64 delta_exec;
+
+		update_rq_clock(rq);
+		delta_exec = rq->clock - p->se.exec_start;
+		if ((s64)delta_exec > 0)
+			return delta_exec;
+	}
+	return 0;
+}
+
 /*
  * Return p->sum_exec_runtime plus any more ns on the sched_clock
  * that have not yet been banked in case the task is currently running.
@@ -4043,17 +4062,31 @@ EXPORT_PER_CPU_SYMBOL(kstat);
 unsigned long long task_sched_runtime(struct task_struct *p)
 {
 	unsigned long flags;
-	u64 ns, delta_exec;
+	u64 ns;
 	struct rq *rq;
 
 	rq = task_rq_lock(p, &flags);
-	ns = p->se.sum_exec_runtime;
-	if (task_current(rq, p)) {
-		update_rq_clock(rq);
-		delta_exec = rq->clock - p->se.exec_start;
-		if ((s64)delta_exec > 0)
-			ns += delta_exec;
-	}
+	ns = p->se.sum_exec_runtime + task_delta_exec(p, rq);
+	task_rq_unlock(rq, &flags);
+
+	return ns;
+}
+
+/*
+ * Return sum_exec_runtime for the thread group plus any more ns on the
+ * sched_clock that have not yet been banked in case the task is currently
+ * running.
+ */
+unsigned long long thread_group_sched_runtime(struct task_struct *p)
+{
+	unsigned long flags;
+	u64 ns;
+	struct rq *rq;
+	struct task_cputime totals;
+
+	rq = task_rq_lock(p, &flags);
+	thread_group_cputime(p, &totals);
+	ns = totals.sum_exec_runtime + task_delta_exec(p, rq);
 	task_rq_unlock(rq, &flags);
 
 	return ns;
@@ -4070,6 +4103,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime)
 	cputime64_t tmp;
 
 	p->utime = cputime_add(p->utime, cputime);
+	account_group_user_time(p, cputime);
 
 	/* Add user time to cpustat. */
 	tmp = cputime_to_cputime64(cputime);
@@ -4094,6 +4128,7 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime)
 	tmp = cputime_to_cputime64(cputime);
 
 	p->utime = cputime_add(p->utime, cputime);
+	account_group_user_time(p, cputime);
 	p->gtime = cputime_add(p->gtime, cputime);
 
 	cpustat->user = cputime64_add(cpustat->user, tmp);
@@ -4129,6 +4164,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
 	}
 
 	p->stime = cputime_add(p->stime, cputime);
+	account_group_system_time(p, cputime);
 
 	/* Add system time to cpustat. */
 	tmp = cputime_to_cputime64(cputime);
@@ -4170,6 +4206,7 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
 
 	if (p == rq->idle) {
 		p->stime = cputime_add(p->stime, steal);
+		account_group_system_time(p, steal);
 		if (atomic_read(&rq->nr_iowait) > 0)
 			cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
 		else
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index fb8994c6d4bb..99aa31acc544 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -507,6 +507,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
 		struct task_struct *curtask = task_of(curr);
 
 		cpuacct_charge(curtask, delta_exec);
+		account_group_exec_runtime(curtask, delta_exec);
 	}
 }
 
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 552310798dad..8375e69af36a 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -483,6 +483,8 @@ static void update_curr_rt(struct rq *rq)
 	schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));
 
 	curr->se.sum_exec_runtime += delta_exec;
+	account_group_exec_runtime(curr, delta_exec);
+
 	curr->se.exec_start = rq->clock;
 	cpuacct_charge(curr, delta_exec);
 
@@ -1412,7 +1414,7 @@ static void watchdog(struct rq *rq, struct task_struct *p)
 		p->rt.timeout++;
 		next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
 		if (p->rt.timeout > next)
-			p->it_sched_expires = p->se.sum_exec_runtime;
+			p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
 	}
 }
 
diff --git a/kernel/signal.c b/kernel/signal.c
index e661b01d340f..6eea5826d618 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1338,6 +1338,7 @@ int do_notify_parent(struct task_struct *tsk, int sig)
 	struct siginfo info;
 	unsigned long flags;
 	struct sighand_struct *psig;
+	struct task_cputime cputime;
 	int ret = sig;
 
 	BUG_ON(sig == -1);
@@ -1368,10 +1369,9 @@ int do_notify_parent(struct task_struct *tsk, int sig)
 
 	info.si_uid = tsk->uid;
 
-	info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime,
-						       tsk->signal->utime));
-	info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime,
-						       tsk->signal->stime));
+	thread_group_cputime(tsk, &cputime);
+	info.si_utime = cputime_to_jiffies(cputime.utime);
+	info.si_stime = cputime_to_jiffies(cputime.stime);
 
 	info.si_status = tsk->exit_code & 0x7f;
 	if (tsk->exit_code & 0x80)
diff --git a/kernel/sys.c b/kernel/sys.c
index 038a7bc0901d..d046a7a055c2 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -853,38 +853,28 @@ asmlinkage long sys_setfsgid(gid_t gid)
 	return old_fsgid;
 }
 
+void do_sys_times(struct tms *tms)
+{
+	struct task_cputime cputime;
+	cputime_t cutime, cstime;
+
+	spin_lock_irq(&current->sighand->siglock);
+	thread_group_cputime(current, &cputime);
+	cutime = current->signal->cutime;
+	cstime = current->signal->cstime;
+	spin_unlock_irq(&current->sighand->siglock);
+	tms->tms_utime = cputime_to_clock_t(cputime.utime);
+	tms->tms_stime = cputime_to_clock_t(cputime.stime);
+	tms->tms_cutime = cputime_to_clock_t(cutime);
+	tms->tms_cstime = cputime_to_clock_t(cstime);
+}
+
 asmlinkage long sys_times(struct tms __user * tbuf)
 {
-	/*
-	 *	In the SMP world we might just be unlucky and have one of
-	 *	the times increment as we use it. Since the value is an
-	 *	atomically safe type this is just fine. Conceptually its
-	 *	as if the syscall took an instant longer to occur.
-	 */
 	if (tbuf) {
 		struct tms tmp;
-		struct task_struct *tsk = current;
-		struct task_struct *t;
-		cputime_t utime, stime, cutime, cstime;
-
-		spin_lock_irq(&tsk->sighand->siglock);
-		utime = tsk->signal->utime;
-		stime = tsk->signal->stime;
-		t = tsk;
-		do {
-			utime = cputime_add(utime, t->utime);
-			stime = cputime_add(stime, t->stime);
-			t = next_thread(t);
-		} while (t != tsk);
-
-		cutime = tsk->signal->cutime;
-		cstime = tsk->signal->cstime;
-		spin_unlock_irq(&tsk->sighand->siglock);
-
-		tmp.tms_utime = cputime_to_clock_t(utime);
-		tmp.tms_stime = cputime_to_clock_t(stime);
-		tmp.tms_cutime = cputime_to_clock_t(cutime);
-		tmp.tms_cstime = cputime_to_clock_t(cstime);
+
+		do_sys_times(&tmp);
 		if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
 			return -EFAULT;
 	}
@@ -1445,7 +1435,6 @@ asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *r
 asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
 {
 	struct rlimit new_rlim, *old_rlim;
-	unsigned long it_prof_secs;
 	int retval;
 
 	if (resource >= RLIM_NLIMITS)
@@ -1491,18 +1480,7 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
 	if (new_rlim.rlim_cur == RLIM_INFINITY)
 		goto out;
 
-	it_prof_secs = cputime_to_secs(current->signal->it_prof_expires);
-	if (it_prof_secs == 0 || new_rlim.rlim_cur <= it_prof_secs) {
-		unsigned long rlim_cur = new_rlim.rlim_cur;
-		cputime_t cputime;
-
-		cputime = secs_to_cputime(rlim_cur);
-		read_lock(&tasklist_lock);
-		spin_lock_irq(&current->sighand->siglock);
-		set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
-		spin_unlock_irq(&current->sighand->siglock);
-		read_unlock(&tasklist_lock);
-	}
+	update_rlimit_cpu(new_rlim.rlim_cur);
 out:
 	return 0;
 }
@@ -1540,11 +1518,8 @@ out:
  *
  */
 
-static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r,
-				     cputime_t *utimep, cputime_t *stimep)
+static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r)
 {
-	*utimep = cputime_add(*utimep, t->utime);
-	*stimep = cputime_add(*stimep, t->stime);
 	r->ru_nvcsw += t->nvcsw;
 	r->ru_nivcsw += t->nivcsw;
 	r->ru_minflt += t->min_flt;
@@ -1558,12 +1533,13 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 	struct task_struct *t;
 	unsigned long flags;
 	cputime_t utime, stime;
+	struct task_cputime cputime;
 
 	memset((char *) r, 0, sizeof *r);
 	utime = stime = cputime_zero;
 
 	if (who == RUSAGE_THREAD) {
-		accumulate_thread_rusage(p, r, &utime, &stime);
+		accumulate_thread_rusage(p, r);
 		goto out;
 	}
 
@@ -1586,8 +1562,9 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 				break;
 
 		case RUSAGE_SELF:
-			utime = cputime_add(utime, p->signal->utime);
-			stime = cputime_add(stime, p->signal->stime);
+			thread_group_cputime(p, &cputime);
+			utime = cputime_add(utime, cputime.utime);
+			stime = cputime_add(stime, cputime.stime);
 			r->ru_nvcsw += p->signal->nvcsw;
 			r->ru_nivcsw += p->signal->nivcsw;
 			r->ru_minflt += p->signal->min_flt;
@@ -1596,7 +1573,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 			r->ru_oublock += p->signal->oublock;
 			t = p;
 			do {
-				accumulate_thread_rusage(t, r, &utime, &stime);
+				accumulate_thread_rusage(t, r);
 				t = next_thread(t);
 			} while (t != p);
 			break;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 03fc6a81ae32..69649783c266 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -75,6 +75,7 @@
 #include <linux/string.h>
 #include <linux/selinux.h>
 #include <linux/mutex.h>
+#include <linux/posix-timers.h>
 
 #include "avc.h"
 #include "objsec.h"
@@ -2321,13 +2322,7 @@ static void selinux_bprm_post_apply_creds(struct linux_binprm *bprm)
 			initrlim = init_task.signal->rlim+i;
 			rlim->rlim_cur = min(rlim->rlim_max, initrlim->rlim_cur);
 		}
-		if (current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
-			/*
-			 * This will cause RLIMIT_CPU calculations
-			 * to be refigured.
-			 */
-			current->it_prof_expires = jiffies_to_cputime(1);
-		}
+		update_rlimit_cpu(rlim->rlim_cur);
 	}
 
 	/* Wake up the parent if it is waiting so that it can
-- 
GitLab


From 430b5294bd72c085c730e1e4b86580f164d976bf Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 14 Sep 2008 16:33:01 +0200
Subject: [PATCH 112/895] timers: fix itimer/many thread hang, fix
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

fix:

 kernel/fork.c:843: error: â€˜struct signal_structâ€™ has no member named â€˜sum_sched_runtimeâ€™
 kernel/irq/handle.c:117: warning: â€˜sparse_irq_lockâ€™ defined but not used

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/fork.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index a8ac2efb8e30..1181b9aac48e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -834,7 +834,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
 	sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
 	task_io_accounting_init(&sig->ioac);
-	sig->sum_sched_runtime = 0;
 	INIT_LIST_HEAD(&sig->cpu_timers[0]);
 	INIT_LIST_HEAD(&sig->cpu_timers[1]);
 	INIT_LIST_HEAD(&sig->cpu_timers[2]);
-- 
GitLab


From 0a8eaa4f9b58759595a1bfe13a1295fdc25ba026 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 14 Sep 2008 17:03:52 +0200
Subject: [PATCH 113/895] timers: fix itimer/many thread hang, fix #2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

fix the UP build:

In file included from arch/x86/kernel/asm-offsets_32.c:9,
                 from arch/x86/kernel/asm-offsets.c:3:
include/linux/sched.h: In function â€˜thread_group_cputime_clone_threadâ€™:
include/linux/sched.h:2272: warning: no return statement in function returning non-void
include/linux/sched.h: In function â€˜thread_group_cputime_account_userâ€™:
include/linux/sched.h:2284: error: invalid type argument of â€˜->â€™ (have â€˜struct task_cputimeâ€™)
include/linux/sched.h:2284: error: invalid type argument of â€˜->â€™ (have â€˜struct task_cputimeâ€™)
include/linux/sched.h: In function â€˜thread_group_cputime_account_systemâ€™:
include/linux/sched.h:2291: error: invalid type argument of â€˜->â€™ (have â€˜struct task_cputimeâ€™)
include/linux/sched.h:2291: error: invalid type argument of â€˜->â€™ (have â€˜struct task_cputimeâ€™)
include/linux/sched.h: In function â€˜thread_group_cputime_account_exec_runtimeâ€™:
include/linux/sched.h:2298: error: invalid type argument of â€˜->â€™ (have â€˜struct task_cputimeâ€™)
distcc[14501] ERROR: compile arch/x86/kernel/asm-offsets.c on a/30 failed
make[1]: *** [arch/x86/kernel/asm-offsets.s] Error 1

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 26d7a5f2d0ba..ed355f02d329 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2251,6 +2251,7 @@ static inline void thread_group_cputime_free(struct signal_struct *sig)
 static inline int thread_group_cputime_clone_thread(struct task_struct *curr,
 						     struct task_struct *tsk)
 {
+	return 0;
 }
 
 static inline void thread_group_cputime(struct task_struct *tsk,
@@ -2263,21 +2264,21 @@ static inline void thread_group_cputime_account_user(
 	struct thread_group_cputime *tgtimes,
 	cputime_t cputime)
 {
-	tgtimes->totals->utime = cputime_add(tgtimes->totals->utime, cputime);
+	tgtimes->totals.utime = cputime_add(tgtimes->totals.utime, cputime);
 }
 
 static inline void thread_group_cputime_account_system(
 	struct thread_group_cputime *tgtimes,
 	cputime_t cputime)
 {
-	tgtimes->totals->stime = cputime_add(tgtimes->totals->stime, cputime);
+	tgtimes->totals.stime = cputime_add(tgtimes->totals.stime, cputime);
 }
 
 static inline void thread_group_cputime_account_exec_runtime(
 	struct thread_group_cputime *tgtimes,
 	unsigned long long ns)
 {
-	tgtimes->totals->sum_exec_runtime += ns;
+	tgtimes->totals.sum_exec_runtime += ns;
 }
 
 #endif /* CONFIG_SMP */
-- 
GitLab


From 5ce73a4a5a4893a1aa4cdeed1b1a5a6de42c43b6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 14 Sep 2008 17:11:46 +0200
Subject: [PATCH 114/895] timers: fix itimer/many thread hang, cleanups

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h     | 2 +-
 kernel/posix-cpu-timers.c | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ed355f02d329..7ce8d4e53565 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -430,7 +430,7 @@ struct pacct_struct {
  * @utime:		time spent in user mode, in &cputime_t units
  * @stime:		time spent in kernel mode, in &cputime_t units
  * @sum_exec_runtime:	total time spent on the CPU, in nanoseconds
- * 
+ *
  * This structure groups together three kinds of CPU time that are
  * tracked for threads and thread groups.  Most things considering
  * CPU time want to group these counts together and treat all three
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index dba1c334c3e8..9a7ea049fcdc 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -94,7 +94,7 @@ void update_rlimit_cpu(unsigned long rlim_new)
 
 	cputime = secs_to_cputime(rlim_new);
 	if (cputime_eq(current->signal->it_prof_expires, cputime_zero) ||
-            cputime_lt(current->signal->it_prof_expires, cputime)) {
+	    cputime_lt(current->signal->it_prof_expires, cputime)) {
 		spin_lock_irq(&current->sighand->siglock);
 		set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
 		spin_unlock_irq(&current->sighand->siglock);
@@ -1372,9 +1372,9 @@ void run_posix_cpu_timers(struct task_struct *tsk)
 	 * tsk->signal is non-NULL; this probably can't happen but cover the
 	 * possibility anyway.
 	 */
-	if (unlikely(!sig) || !fastpath_timer_check(tsk, sig)) {
+	if (unlikely(!sig) || !fastpath_timer_check(tsk, sig))
 		return;
-	}
+
 	sighand = lock_task_sighand(tsk, &flags);
 	if (likely(sighand)) {
 		/*
-- 
GitLab


From 8afbc114542a6810b0a2e658abda6e911121cd22 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <ext-adrian.hunter@nokia.com>
Date: Mon, 25 Aug 2008 12:01:31 +0300
Subject: [PATCH 115/895] [MTD] [NAND] OMAP2: add retry after read timeout

Very occasionally, (about one in a million) read operations are
ongoing after the timeout has expired.  So, retry three times
while the ongoing bit remains set.

Signed-off-by: Adrian Hunter <ext-adrian.hunter@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/onenand/omap2.c | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c
index 34b42533f4bb..8387e05daae2 100644
--- a/drivers/mtd/onenand/omap2.c
+++ b/drivers/mtd/onenand/omap2.c
@@ -187,16 +187,36 @@ retry:
 			}
 		}
 	} else {
+		int retry_cnt = 0;
+
 		/* Turn interrupts off */
 		syscfg = read_reg(c, ONENAND_REG_SYS_CFG1);
 		syscfg &= ~ONENAND_SYS_CFG1_IOBE;
 		write_reg(c, syscfg, ONENAND_REG_SYS_CFG1);
 
 		timeout = jiffies + msecs_to_jiffies(20);
-		while (time_before(jiffies, timeout)) {
-			intr = read_reg(c, ONENAND_REG_INTERRUPT);
-			if (intr & ONENAND_INT_MASTER)
+		while (1) {
+			if (time_before(jiffies, timeout)) {
+				intr = read_reg(c, ONENAND_REG_INTERRUPT);
+				if (intr & ONENAND_INT_MASTER)
+					break;
+			} else {
+				/* Timeout after 20ms */
+				ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS);
+				if (ctrl & ONENAND_CTRL_ONGO) {
+					/*
+					 * The operation seems to be still going
+					 * so give it some more time.
+					 */
+					retry_cnt += 1;
+					if (retry_cnt < 3) {
+						timeout = jiffies +
+							  msecs_to_jiffies(20);
+						continue;
+					}
+				}
 				break;
+			}
 		}
 	}
 
-- 
GitLab


From ef89a8801321e0d0665c327c9d77d602ef764c87 Mon Sep 17 00:00:00 2001
From: Karl Beldan <karl.beldan@gmail.com>
Date: Mon, 15 Sep 2008 14:37:29 +0200
Subject: [PATCH 116/895] [MTD] [NAND] nand_base.c: reset chip first

Some chips require a RESET after power-up (e.g. Micron MT29FxGxxxxx).
The first command sent is NAND_CMD_READID.
Issue a NAND_CMD_RESET in nand_scan_ident before reading the device id.
Tested with an MT29F4G08AAC.

Signed-off-by: Karl Beldan <karl.beldan@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/nand_base.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index d303db39c48d..0a9c9cd33f96 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -2318,6 +2318,12 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd,
 	/* Select the device */
 	chip->select_chip(mtd, 0);
 
+	/*
+	 * Reset the chip, required by some chips (e.g. Micron MT29FxGxxxxx)
+	 * after power-up
+	 */
+	chip->cmdfunc(mtd, NAND_CMD_RESET, -1, -1);
+
 	/* Send the command for reading device ID */
 	chip->cmdfunc(mtd, NAND_CMD_READID, 0x00, -1);
 
@@ -2488,6 +2494,8 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips)
 	/* Check for a chip array */
 	for (i = 1; i < maxchips; i++) {
 		chip->select_chip(mtd, i);
+		/* See comment in nand_get_flash_type for reset */
+		chip->cmdfunc(mtd, NAND_CMD_RESET, -1, -1);
 		/* Send the command for reading device ID */
 		chip->cmdfunc(mtd, NAND_CMD_READID, 0x00, -1);
 		/* Read manufacturer and device IDs */
-- 
GitLab


From b3d765f5df5707e2b3676768b6877db5d8db76a2 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 17 Sep 2008 23:12:11 +0900
Subject: [PATCH 117/895] sh: Fix up fpu emu build.

The addition of the kprobes code pushed down a variable declaration,
clean it up.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/traps_32.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c
index 862667a341fd..35b901ed6de3 100644
--- a/arch/sh/kernel/traps_32.c
+++ b/arch/sh/kernel/traps_32.c
@@ -742,15 +742,13 @@ asmlinkage void do_illegal_slot_inst(unsigned long r4, unsigned long r5,
 				struct pt_regs __regs)
 {
 	struct pt_regs *regs = RELOC_HIDE(&__regs, 0);
-	unsigned long error_code;
+	unsigned long inst;
 	struct task_struct *tsk = current;
 
 	if (kprobe_handle_illslot(regs->pc) == 0)
 		return;
 
 #ifdef CONFIG_SH_FPU_EMU
-	unsigned short inst = 0;
-
 	get_user(inst, (unsigned short *)regs->pc + 1);
 	if (!do_fpu_inst(inst, regs)) {
 		get_user(inst, (unsigned short *)regs->pc);
@@ -761,12 +759,12 @@ asmlinkage void do_illegal_slot_inst(unsigned long r4, unsigned long r5,
 	/* not a FPU inst. */
 #endif
 
-	lookup_exception_vector(error_code);
+	lookup_exception_vector(inst);
 
 	local_irq_enable();
 	CHK_REMOTE_DEBUG(regs);
 	force_sig(SIGILL, tsk);
-	die_if_no_fixup("illegal slot instruction", regs, error_code);
+	die_if_no_fixup("illegal slot instruction", regs, inst);
 }
 
 asmlinkage void do_exception_error(unsigned long r4, unsigned long r5,
-- 
GitLab


From b85641bdde340f683e5baa7688832e185548c9bd Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 17 Sep 2008 23:13:27 +0900
Subject: [PATCH 118/895] sh: Make memory hot-add and hot-remove depend on MMU.

Cleans up link numerous build issues with page migration and so on when
enabled on nommu builds.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/mm/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig
index f8e6dc5e056f..555ec9714b9e 100644
--- a/arch/sh/mm/Kconfig
+++ b/arch/sh/mm/Kconfig
@@ -132,11 +132,11 @@ config ARCH_SELECT_MEMORY_MODEL
 
 config ARCH_ENABLE_MEMORY_HOTPLUG
 	def_bool y
-	depends on SPARSEMEM
+	depends on SPARSEMEM && MMU
 
 config ARCH_ENABLE_MEMORY_HOTREMOVE
 	def_bool y
-	depends on SPARSEMEM
+	depends on SPARSEMEM && MMU
 
 config ARCH_MEMORY_PROBE
 	def_bool y
-- 
GitLab


From 8a80a5e9e89cf3aacf8165dd34b40c7c3fe91b4d Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 17 Sep 2008 23:14:36 +0900
Subject: [PATCH 119/895] sh: Fix up signal_64 conflicting handle_signal()
 definition.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/signal_64.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c
index 1d62dfef77f1..37bd381a7aa3 100644
--- a/arch/sh/kernel/signal_64.c
+++ b/arch/sh/kernel/signal_64.c
@@ -43,6 +43,10 @@
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
+static void
+handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
+		sigset_t *oldset, struct pt_regs * regs);
+
 /*
  * Note that 'init' is a special process: it doesn't get signals it doesn't
  * want to handle. Thus you cannot kill init even with a SIGKILL even by
-- 
GitLab


From 81b669952ed5fe0d6f65f8b9a97d1fdeac93ff10 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 17 Sep 2008 23:24:02 +0900
Subject: [PATCH 120/895] sh: Consolidate struct sh_cpuinfo definitions across
 _32/_64 split.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/processor.h    | 40 ++++++++++++++++++++++++++++-
 arch/sh/include/asm/processor_32.h | 19 --------------
 arch/sh/include/asm/processor_64.h | 41 ------------------------------
 3 files changed, 39 insertions(+), 61 deletions(-)

diff --git a/arch/sh/include/asm/processor.h b/arch/sh/include/asm/processor.h
index 58e2be55ab93..693364a20ad7 100644
--- a/arch/sh/include/asm/processor.h
+++ b/arch/sh/include/asm/processor.h
@@ -3,6 +3,7 @@
 
 #include <asm/cpu-features.h>
 #include <asm/segment.h>
+#include <asm/cache.h>
 
 #ifndef __ASSEMBLY__
 /*
@@ -43,8 +44,45 @@ enum cpu_type {
 	CPU_SH_NONE
 };
 
+/*
+ * TLB information structure
+ *
+ * Defined for both I and D tlb, per-processor.
+ */
+struct tlb_info {
+	unsigned long long next;
+	unsigned long long first;
+	unsigned long long last;
+
+	unsigned int entries;
+	unsigned int step;
+
+	unsigned long flags;
+};
+
+struct sh_cpuinfo {
+	unsigned int type;
+	int cut_major, cut_minor;
+	unsigned long loops_per_jiffy;
+	unsigned long asid_cache;
+
+	struct cache_info icache;	/* Primary I-cache */
+	struct cache_info dcache;	/* Primary D-cache */
+	struct cache_info scache;	/* Secondary cache */
+
+	/* TLB info */
+	struct tlb_info itlb;
+	struct tlb_info dtlb;
+
+	unsigned long flags;
+} __attribute__ ((aligned(L1_CACHE_BYTES)));
+
+extern struct sh_cpuinfo cpu_data[];
+#define boot_cpu_data cpu_data[0]
+#define current_cpu_data cpu_data[smp_processor_id()]
+#define raw_current_cpu_data cpu_data[raw_smp_processor_id()]
+
 /* Forward decl */
-struct sh_cpuinfo;
 struct seq_operations;
 
 extern struct pt_regs fake_swapper_regs;
diff --git a/arch/sh/include/asm/processor_32.h b/arch/sh/include/asm/processor_32.h
index 1cd3a144c85c..a46a0207e977 100644
--- a/arch/sh/include/asm/processor_32.h
+++ b/arch/sh/include/asm/processor_32.h
@@ -13,7 +13,6 @@
 #include <linux/linkage.h>
 #include <asm/page.h>
 #include <asm/types.h>
-#include <asm/cache.h>
 #include <asm/ptrace.h>
 
 /*
@@ -27,24 +26,6 @@
 #define CCN_CVR		0xff000040
 #define CCN_PRR		0xff000044
 
-struct sh_cpuinfo {
-	unsigned int type;
-	int cut_major, cut_minor;
-	unsigned long loops_per_jiffy;
-	unsigned long asid_cache;
-
-	struct cache_info icache;	/* Primary I-cache */
-	struct cache_info dcache;	/* Primary D-cache */
-	struct cache_info scache;	/* Secondary cache */
-
-	unsigned long flags;
-} __attribute__ ((aligned(L1_CACHE_BYTES)));
-
-extern struct sh_cpuinfo cpu_data[];
-#define boot_cpu_data cpu_data[0]
-#define current_cpu_data cpu_data[smp_processor_id()]
-#define raw_current_cpu_data cpu_data[raw_smp_processor_id()]
-
 asmlinkage void __init sh_cpu_init(void);
 
 /*
diff --git a/arch/sh/include/asm/processor_64.h b/arch/sh/include/asm/processor_64.h
index ae19839f1d97..b0b4824dfc4c 100644
--- a/arch/sh/include/asm/processor_64.h
+++ b/arch/sh/include/asm/processor_64.h
@@ -17,7 +17,6 @@
 #include <linux/compiler.h>
 #include <asm/page.h>
 #include <asm/types.h>
-#include <asm/cache.h>
 #include <asm/ptrace.h>
 #include <cpu/registers.h>
 
@@ -36,46 +35,6 @@ __asm__("gettr	tr0, %1\n\t" \
 	: "1" (__dummy)); \
 pc; })
 
-/*
- * TLB information structure
- *
- * Defined for both I and D tlb, per-processor.
- */
-struct tlb_info {
-	unsigned long long next;
-	unsigned long long first;
-	unsigned long long last;
-
-	unsigned int entries;
-	unsigned int step;
-
-	unsigned long flags;
-};
-
-struct sh_cpuinfo {
-	enum cpu_type type;
-	unsigned long loops_per_jiffy;
-	unsigned long asid_cache;
-
-	unsigned int cpu_clock, master_clock, bus_clock, module_clock;
-
-	/* Cache info */
-	struct cache_info icache;
-	struct cache_info dcache;
-	struct cache_info scache;
-
-	/* TLB info */
-	struct tlb_info itlb;
-	struct tlb_info dtlb;
-
-	unsigned long flags;
-};
-
-extern struct sh_cpuinfo cpu_data[];
-#define boot_cpu_data cpu_data[0]
-#define current_cpu_data cpu_data[smp_processor_id()]
-#define raw_current_cpu_data cpu_data[raw_smp_processor_id()]
-
 #endif
 
 /*
-- 
GitLab


From b406efefd5246a67c691fd79871e65ce8f3f91ff Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 17 Sep 2008 23:24:59 +0900
Subject: [PATCH 121/895] sh: Fix up headers_check regression.

linux/mmzone.h isn't exported, kill it off from asm/setup.h and simply
deal with it in the places that have a dependency instead.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/setup.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/sh/include/asm/setup.h b/arch/sh/include/asm/setup.h
index 1b7856f5924c..d450bcf59ee2 100644
--- a/arch/sh/include/asm/setup.h
+++ b/arch/sh/include/asm/setup.h
@@ -1,12 +1,9 @@
 #ifndef _SH_SETUP_H
 #define _SH_SETUP_H
 
-#include <linux/mmzone.h>
-
 #define COMMAND_LINE_SIZE 256
 
 #ifdef __KERNEL__
-
 /*
  * This is set up by the setup-routine at boot-time
  */
-- 
GitLab


From 2194478157127d52338be96ac9436dc54005816a Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 17 Sep 2008 23:26:44 +0900
Subject: [PATCH 122/895] sh: HAVE_IOREMAP_PROT depends on MMU.

HAVE_IOREMAP_PROT enables an unconditional reference to
generic_access_phys(), which remains undefined in the nommu case.
As there's no point in supporting this there anyways, simply fix
up the dependency.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 18a1cc8a6aa3..adef42cd507f 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -12,7 +12,7 @@ config SUPERH
 	select HAVE_IDE
 	select HAVE_OPROFILE
 	select HAVE_GENERIC_DMA_COHERENT
-	select HAVE_IOREMAP_PROT
+	select HAVE_IOREMAP_PROT if MMU
 	help
 	  The SuperH is a RISC processor targeted for use in embedded systems
 	  and consumer electronics; it was also used in the Sega Dreamcast
-- 
GitLab


From 6b3141962dc82cfe1c30afdf91d564b309859cbe Mon Sep 17 00:00:00 2001
From: Timur Tabi <timur@freescale.com>
Date: Fri, 19 Sep 2008 04:16:19 -0700
Subject: [PATCH 123/895] dmatest: properly handle duplicate DMA channels

Update the the dmatest driver so that it handles duplicate DMA channels
properly.

When a DMA client is notified of an available DMA channel, it must check if it
has already allocated resources for that channel.  If so, it should return
DMA_DUP.  This can happen, for example, if a DMA driver calls
dma_async_device_register() more than once.

Acked-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
Signed-off-by: Timur Tabi <timur@freescale.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/dmatest.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index a08d19704743..422500c6c163 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -325,6 +325,11 @@ static enum dma_state_client dmatest_add_channel(struct dma_chan *chan)
 	struct dmatest_thread	*thread;
 	unsigned int		i;
 
+	/* Have we already been told about this channel? */
+	list_for_each_entry(dtc, &dmatest_channels, node)
+		if (dtc->chan == chan)
+			return DMA_DUP;
+
 	dtc = kmalloc(sizeof(struct dmatest_chan), GFP_ATOMIC);
 	if (!dtc) {
 		pr_warning("dmatest: No memory for %s\n", chan->dev.bus_id);
-- 
GitLab


From 6fdb8bd47111d3f94be221082b725ec2dec1d5c7 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 19 Sep 2008 04:16:23 -0700
Subject: [PATCH 124/895] drivers/dma/dmatest.c: switch a GFP_ATOMIC to
 GFP_KERNEL

It was needlessly using the unreliable GFP_ATOMIC.

Cc: Timur Tabi <timur@freescale.com>
Acked-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/dmatest.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index 422500c6c163..d1e381e35a9e 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -330,7 +330,7 @@ static enum dma_state_client dmatest_add_channel(struct dma_chan *chan)
 		if (dtc->chan == chan)
 			return DMA_DUP;
 
-	dtc = kmalloc(sizeof(struct dmatest_chan), GFP_ATOMIC);
+	dtc = kmalloc(sizeof(struct dmatest_chan), GFP_KERNEL);
 	if (!dtc) {
 		pr_warning("dmatest: No memory for %s\n", chan->dev.bus_id);
 		return DMA_NAK;
-- 
GitLab


From b817f7e020958c8f79842076c137daa6f72eb366 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sat, 20 Sep 2008 20:16:35 +0900
Subject: [PATCH 125/895] sh: Disable 4kB stacks when using PAGE_SIZE_64KB.

This combination triggers a divide by zero in kernel/fork.c when
calculating the initial max_threads value:

	max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE);

Simply disable 4K stacks on 64kB PAGE_SIZE to work around this,
as it's not a terribly useful combination to begin with.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/Kconfig.debug | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/sh/Kconfig.debug b/arch/sh/Kconfig.debug
index 4d2d102e00d5..e6d2c8b11abd 100644
--- a/arch/sh/Kconfig.debug
+++ b/arch/sh/Kconfig.debug
@@ -82,7 +82,7 @@ config DEBUG_STACK_USAGE
 
 config 4KSTACKS
 	bool "Use 4Kb for kernel stacks instead of 8Kb"
-	depends on DEBUG_KERNEL && (MMU || BROKEN)
+	depends on DEBUG_KERNEL && (MMU || BROKEN) && !PAGE_SIZE_64KB
 	help
 	  If you say Y here the kernel will use a 4Kb stacksize for the
 	  kernel stack attached to each process/thread. This facilitates
-- 
GitLab


From c15c5f8c2bf0b00d036c5c6b67264764a6e5dffc Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sat, 20 Sep 2008 20:21:33 +0900
Subject: [PATCH 126/895] sh: Support kernel stacks smaller than a page.

This follows the powerpc commit f6a616800e68b61807d0f7bb0d5dc70665ef8046
'[POWERPC] Fix kernel stack allocation alignment'.

SH has traditionally forced the thread order to be relative to the page
size, so there were never any situations where the same bug was
triggered by slub. Regardless, the usage of > 8kB stacks for the larger
page sizes is overkill, so we switch to using slab allocations there,
as per the powerpc change.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/thread_info.h | 32 ++++++++++++++-----------------
 arch/sh/mm/init.c                 | 29 ++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+), 18 deletions(-)

diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h
index 0a894cafb1dd..f09ac4806294 100644
--- a/arch/sh/include/asm/thread_info.h
+++ b/arch/sh/include/asm/thread_info.h
@@ -33,20 +33,12 @@ struct thread_info {
 #define PREEMPT_ACTIVE		0x10000000
 
 #if defined(CONFIG_4KSTACKS)
-#define THREAD_SIZE_ORDER	(0)
-#elif defined(CONFIG_PAGE_SIZE_4KB)
-#define THREAD_SIZE_ORDER	(1)
-#elif defined(CONFIG_PAGE_SIZE_8KB)
-#define THREAD_SIZE_ORDER	(1)
-#elif defined(CONFIG_PAGE_SIZE_16KB)
-#define THREAD_SIZE_ORDER	(0)
-#elif defined(CONFIG_PAGE_SIZE_64KB)
-#define THREAD_SIZE_ORDER	(0)
+#define THREAD_SHIFT	12
 #else
-#error "Unknown thread size"
+#define THREAD_SHIFT	13
 #endif
 
-#define THREAD_SIZE	(PAGE_SIZE << THREAD_SIZE_ORDER)
+#define THREAD_SIZE	(1 << THREAD_SHIFT)
 #define STACK_WARN	(THREAD_SIZE >> 3)
 
 /*
@@ -94,15 +86,19 @@ static inline struct thread_info *current_thread_info(void)
 	return ti;
 }
 
+/* thread information allocation */
+#if THREAD_SHIFT >= PAGE_SHIFT
+
+#define THREAD_SIZE_ORDER	(THREAD_SHIFT - PAGE_SHIFT)
+
+#else /* THREAD_SHIFT < PAGE_SHIFT */
+
 #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
 
-/* thread information allocation */
-#ifdef CONFIG_DEBUG_STACK_USAGE
-#define alloc_thread_info(ti)	kzalloc(THREAD_SIZE, GFP_KERNEL)
-#else
-#define alloc_thread_info(ti)	kmalloc(THREAD_SIZE, GFP_KERNEL)
-#endif
-#define free_thread_info(ti)	kfree(ti)
+extern struct thread_info *alloc_thread_info(struct task_struct *tsk);
+extern void free_thread_info(struct thread_info *ti);
+ 
+#endif /* THREAD_SHIFT < PAGE_SHIFT */
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 31211bfdc6d8..2a53943924b2 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -265,6 +265,35 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 }
 #endif
 
+#if THREAD_SHIFT < PAGE_SHIFT
+static struct kmem_cache *thread_info_cache;
+
+struct thread_info *alloc_thread_info(struct task_struct *tsk)
+{
+	struct thread_info *ti;
+
+	ti = kmem_cache_alloc(thread_info_cache, GFP_KERNEL);
+	if (unlikely(ti == NULL))
+		return NULL;
+#ifdef CONFIG_DEBUG_STACK_USAGE
+	memset(ti, 0, THREAD_SIZE);
+#endif
+	return ti;
+}
+
+void free_thread_info(struct thread_info *ti)
+{
+	kmem_cache_free(thread_info_cache, ti);
+}
+
+void thread_info_cache_init(void)
+{
+	thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE,
+					      THREAD_SIZE, 0, NULL);
+	BUG_ON(thread_info_cache == NULL);
+}
+#endif /* THREAD_SHIFT < PAGE_SHIFT */
+
 #ifdef CONFIG_MEMORY_HOTPLUG
 int arch_add_memory(int nid, u64 start, u64 size)
 {
-- 
GitLab


From 837c946aad480d4773619707f115ed4c15738f77 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sun, 21 Sep 2008 10:24:18 +0900
Subject: [PATCH 127/895] sh: Copy in asm/sizes.h helper from ARM.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/sizes.h | 56 +++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)
 create mode 100644 arch/sh/include/asm/sizes.h

diff --git a/arch/sh/include/asm/sizes.h b/arch/sh/include/asm/sizes.h
new file mode 100644
index 000000000000..503843db1565
--- /dev/null
+++ b/arch/sh/include/asm/sizes.h
@@ -0,0 +1,56 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+/* DO NOT EDIT!! - this file automatically generated
+ *                 from .s file by awk -f s2h.awk
+ */
+/*  Size definitions
+ *  Copyright (C) ARM Limited 1998. All rights reserved.
+ */
+
+#ifndef __sizes_h
+#define __sizes_h                       1
+
+/* handy sizes */
+#define SZ_16				0x00000010
+#define SZ_256				0x00000100
+#define SZ_512				0x00000200
+
+#define SZ_1K                           0x00000400
+#define SZ_4K                           0x00001000
+#define SZ_8K                           0x00002000
+#define SZ_16K                          0x00004000
+#define SZ_64K                          0x00010000
+#define SZ_128K                         0x00020000
+#define SZ_256K                         0x00040000
+#define SZ_512K                         0x00080000
+
+#define SZ_1M                           0x00100000
+#define SZ_2M                           0x00200000
+#define SZ_4M                           0x00400000
+#define SZ_8M                           0x00800000
+#define SZ_16M                          0x01000000
+#define SZ_32M                          0x02000000
+#define SZ_64M                          0x04000000
+#define SZ_128M                         0x08000000
+#define SZ_256M                         0x10000000
+#define SZ_512M                         0x20000000
+
+#define SZ_1G                           0x40000000
+#define SZ_2G                           0x80000000
+
+#endif
+
+/*         END */
-- 
GitLab


From d3ea00a36da2e715217f0e31944dd220deefa38c Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sun, 21 Sep 2008 10:31:57 +0900
Subject: [PATCH 128/895] sh: Add a few more definitions to asm/sizes.h.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/sizes.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/sh/include/asm/sizes.h b/arch/sh/include/asm/sizes.h
index 503843db1565..3a1fb97770f1 100644
--- a/arch/sh/include/asm/sizes.h
+++ b/arch/sh/include/asm/sizes.h
@@ -25,6 +25,9 @@
 
 /* handy sizes */
 #define SZ_16				0x00000010
+#define SZ_32				0x00000020
+#define SZ_64				0x00000040
+#define SZ_128				0x00000080
 #define SZ_256				0x00000100
 #define SZ_512				0x00000200
 
@@ -32,6 +35,7 @@
 #define SZ_4K                           0x00001000
 #define SZ_8K                           0x00002000
 #define SZ_16K                          0x00004000
+#define SZ_32K				0x00008000
 #define SZ_64K                          0x00010000
 #define SZ_128K                         0x00020000
 #define SZ_256K                         0x00040000
@@ -42,6 +46,7 @@
 #define SZ_4M                           0x00400000
 #define SZ_8M                           0x00800000
 #define SZ_16M                          0x01000000
+#define SZ_26M				0x01a00000
 #define SZ_32M                          0x02000000
 #define SZ_64M                          0x04000000
 #define SZ_128M                         0x08000000
-- 
GitLab


From 347cd34f4b32be30d2a6d92fe4d6eac04b00a637 Mon Sep 17 00:00:00 2001
From: Luca Santini <luca.santini@spesonline.com>
Date: Sun, 21 Sep 2008 10:32:29 +0900
Subject: [PATCH 129/895] sh: edosk7760: Correct size of bootloader flash
 partition.

This is 256K instead of 1M.

[ Converted to use asm/sizes.h. -- PFM ]

Signed-off-by: Luca Santini <luca.santini@spesonline.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/boards/board-edosk7760.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/sh/boards/board-edosk7760.c b/arch/sh/boards/board-edosk7760.c
index 4890ba7961ae..35dc0994875d 100644
--- a/arch/sh/boards/board-edosk7760.c
+++ b/arch/sh/boards/board-edosk7760.c
@@ -30,6 +30,7 @@
 #include <asm/addrspace.h>
 #include <asm/delay.h>
 #include <asm/i2c-sh7760.h>
+#include <asm/sizes.h>
 
 /* Bus state controller registers for CS4 area */
 #define BSC_CS4BCR	0xA4FD0010
@@ -46,16 +47,16 @@ static struct mtd_partition edosk7760_nor_flash_partitions[] = {
 	{
 		.name = "bootloader",
 		.offset = 0,
-		.size = (1 * 1024 * 1024), /*1MB*/
+		.size = SZ_256K,
 		.mask_flags = MTD_WRITEABLE,	/* Read-only */
 	}, {
 		.name = "kernel",
 		.offset = MTDPART_OFS_APPEND,
-		.size = (2 * 1024 * 1024), /*2MB*/
+		.size = SZ_2M,
 	}, {
 		.name = "fs",
 		.offset = MTDPART_OFS_APPEND,
-		.size = (26 * 1024 * 1024),
+		.size = SZ_26M,
 	}, {
 		.name = "other",
 		.offset = MTDPART_OFS_APPEND,
@@ -73,7 +74,7 @@ static struct resource edosk7760_nor_flash_resources[] = {
 	[0] = {
 		.name	= "NOR Flash",
 		.start	= 0x00000000,
-		.end	= (32 * 1024 * 1024) -1,	/* 32MB*/
+		.end	= 0x00000000 + SZ_32M - 1,
 		.flags	= IORESOURCE_MEM,
 	}
 };
@@ -145,7 +146,7 @@ static struct smc91x_platdata smc91x_info = {
 static struct resource smc91x_res[] = {
 	[0] = {
 		.start	= SMC_IOADDR,
-		.end	= SMC_IOADDR + 0x1f,
+		.end	= SMC_IOADDR + SZ_32 - 1,
 		.flags	= IORESOURCE_MEM,
 	},
 	[1] = {
-- 
GitLab


From 4c59e2942e92d2d776bcd038604a5c3c1d56d3ac Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sun, 21 Sep 2008 12:00:23 +0900
Subject: [PATCH 130/895] sh: Move lookup_exception_vector() out to
 asm/system_32.h.

There are other places where we want to have access to the trap/exception
number, so move out the lookup_exception_vector() helper. While we're at
it, refactor it slightly to return the vector instead.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/system_32.h | 25 +++++++++++++++++++++++++
 arch/sh/kernel/traps_32.c       | 16 ++++------------
 2 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/arch/sh/include/asm/system_32.h b/arch/sh/include/asm/system_32.h
index f7f105627fd9..a726d5d07277 100644
--- a/arch/sh/include/asm/system_32.h
+++ b/arch/sh/include/asm/system_32.h
@@ -97,6 +97,31 @@ do {							\
 		: "=&r" (__dummy));			\
 } while (0)
 
+#ifdef CONFIG_CPU_HAS_SR_RB
+#define lookup_exception_vector()	\
+({					\
+	unsigned long _vec;		\
+					\
+	__asm__ __volatile__ (		\
+		"stc r2_bank, %0\n\t"	\
+		: "=r" (_vec)		\
+	);				\
+					\
+	_vec;				\
+})
+#else
+#define lookup_exception_vector()	\
+({					\
+	unsigned long _vec;		\
+	__asm__ __volatile__ (		\
+		"mov r4, %0\n\t"	\
+		: "=r" (_vec)		\
+	);				\
+					\
+	_vec;				\
+})
+#endif
+
 int handle_unaligned_access(opcode_t instruction, struct pt_regs *regs,
 			    struct mem_access *ma);
 
diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c
index 35b901ed6de3..b359b08a8e33 100644
--- a/arch/sh/kernel/traps_32.c
+++ b/arch/sh/kernel/traps_32.c
@@ -514,14 +514,6 @@ int handle_unaligned_access(opcode_t instruction, struct pt_regs *regs,
 	return ret;
 }
 
-#ifdef CONFIG_CPU_HAS_SR_RB
-#define lookup_exception_vector(x)	\
-	__asm__ __volatile__ ("stc r2_bank, %0\n\t" : "=r" ((x)))
-#else
-#define lookup_exception_vector(x)	\
-	__asm__ __volatile__ ("mov r4, %0\n\t" : "=r" ((x)))
-#endif
-
 /*
  * Handle various address error exceptions:
  *  - instruction address error:
@@ -545,7 +537,7 @@ asmlinkage void do_address_error(struct pt_regs *regs,
 
 	/* Intentional ifdef */
 #ifdef CONFIG_CPU_HAS_SR_RB
-	lookup_exception_vector(error_code);
+	error_code = lookup_exception_vector();
 #endif
 
 	oldfs = get_fs();
@@ -686,7 +678,7 @@ asmlinkage void do_reserved_inst(unsigned long r4, unsigned long r5,
 	}
 #endif
 
-	lookup_exception_vector(error_code);
+	error_code = lookup_exception_vector();
 
 	local_irq_enable();
 	CHK_REMOTE_DEBUG(regs);
@@ -759,7 +751,7 @@ asmlinkage void do_illegal_slot_inst(unsigned long r4, unsigned long r5,
 	/* not a FPU inst. */
 #endif
 
-	lookup_exception_vector(inst);
+	inst = lookup_exception_vector();
 
 	local_irq_enable();
 	CHK_REMOTE_DEBUG(regs);
@@ -774,7 +766,7 @@ asmlinkage void do_exception_error(unsigned long r4, unsigned long r5,
 	struct pt_regs *regs = RELOC_HIDE(&__regs, 0);
 	long ex;
 
-	lookup_exception_vector(ex);
+	ex = lookup_exception_vector();
 	die_if_kernel("exception", regs, ex);
 }
 
-- 
GitLab


From 887f1ae3bc1701604a7b5ef145e1021072675444 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sun, 21 Sep 2008 12:06:43 +0900
Subject: [PATCH 131/895] sh: Look up the trap vector for the page fault
 notifier.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/mm/fault_32.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/arch/sh/mm/fault_32.c b/arch/sh/mm/fault_32.c
index 659811c179e6..ef01f45daa8a 100644
--- a/arch/sh/mm/fault_32.c
+++ b/arch/sh/mm/fault_32.c
@@ -21,26 +21,21 @@
 #include <asm/tlbflush.h>
 #include <asm/kgdb.h>
 
-#ifdef CONFIG_KPROBES
 static inline int notify_page_fault(struct pt_regs *regs, int trap)
 {
 	int ret = 0;
 
+#ifdef CONFIG_KPROBES
 	if (!user_mode(regs)) {
 		preempt_disable();
 		if (kprobe_running() && kprobe_fault_handler(regs, trap))
 			ret = 1;
 		preempt_enable();
 	}
+#endif
 
 	return ret;
 }
-#else
-static inline int notify_page_fault(struct pt_regs *regs, int trap)
-{
-	return 0;
-}
-#endif
 
 /*
  * This routine handles page faults.  It determines the address,
@@ -58,7 +53,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 	int fault;
 	siginfo_t info;
 
-	if (notify_page_fault(regs, writeaccess))
+	if (notify_page_fault(regs, lookup_exception_vector()))
 		return;
 
 #ifdef CONFIG_SH_KGDB
@@ -293,7 +288,7 @@ asmlinkage int __kprobes __do_page_fault(struct pt_regs *regs,
 	pte_t *pte;
 	pte_t entry;
 
-	if (notify_page_fault(regs, writeaccess))
+	if (notify_page_fault(regs, lookup_exception_vector()))
 		return 0;
 
 #ifdef CONFIG_SH_KGDB
-- 
GitLab


From 8f2baee28093ea77c7cc8da45049fd94cc76998e Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sun, 21 Sep 2008 12:11:25 +0900
Subject: [PATCH 132/895] sh: Kill off duplicate page fault notifiers in slow
 path.

We already have hooks in place in the __do_page_fault() fast-path,
so kill them off in the slow path.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/mm/fault_32.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/arch/sh/mm/fault_32.c b/arch/sh/mm/fault_32.c
index ef01f45daa8a..08a08ea5d69f 100644
--- a/arch/sh/mm/fault_32.c
+++ b/arch/sh/mm/fault_32.c
@@ -53,13 +53,10 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 	int fault;
 	siginfo_t info;
 
-	if (notify_page_fault(regs, lookup_exception_vector()))
-		return;
-
-#ifdef CONFIG_SH_KGDB
-	if (kgdb_nofault && kgdb_bus_err_hook)
-		kgdb_bus_err_hook();
-#endif
+	/*
+	 * We don't bother with any notifier callbacks here, as they are
+	 * all handled through the __do_page_fault() fast-path.
+	 */
 
 	tsk = current;
 	si_code = SEGV_MAPERR;
-- 
GitLab


From 3d58695edbfac785161bf282dc11fd42a483d6c9 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sun, 21 Sep 2008 13:56:39 +0900
Subject: [PATCH 133/895] sh: Trivial trace_mark() instrumentation for core
 events.

This implements a few trace points across events that are deemed
interesting. This implements a number of trace points:

	- The page fault handler / TLB miss
	- IPC calls
	- Kernel thread creation

The original LTTng patch had the slow-path instrumented, which
fails to account for the vast majority of events. In general
placing this in the fast-path is not a huge performance hit, as
we don't take page faults for kernel addresses.

The other bits of interest are some of the other trap handlers, as
well as the syscall entry/exit (which is better off being handled
through the tracehook API). Most of the other trap handlers are corner
cases where alternate means of notification exist, so there is little
value in placing extra trace points in these locations.

Based on top of the points provided both by the LTTng instrumentation
patch as well as the patch shipping in the ST-Linux tree, albeit in a
stripped down form.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/process_32.c |  9 ++++--
 arch/sh/kernel/process_64.c | 10 +++++--
 arch/sh/kernel/sys_sh.c     |  2 ++
 arch/sh/mm/fault_32.c       | 57 +++++++++++++++++++++----------------
 4 files changed, 50 insertions(+), 28 deletions(-)

diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index 914e543102df..7b013aa8c43f 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -169,6 +169,7 @@ __asm__(".align 5\n"
 int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
 {
 	struct pt_regs regs;
+	int pid;
 
 	memset(&regs, 0, sizeof(regs));
 	regs.regs[4] = (unsigned long)arg;
@@ -178,8 +179,12 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
 	regs.sr = (1 << 30);
 
 	/* Ok, create the new process.. */
-	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0,
-		       &regs, 0, NULL, NULL);
+	pid = do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0,
+		      &regs, 0, NULL, NULL);
+
+	trace_mark(kernel_arch_kthread_create, "pid %d fn %p", pid, fn);
+
+	return pid;
 }
 
 /*
diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c
index d0dddc438c0c..b7aa09235b51 100644
--- a/arch/sh/kernel/process_64.c
+++ b/arch/sh/kernel/process_64.c
@@ -396,6 +396,7 @@ ATTRIB_NORET void kernel_thread_helper(void *arg, int (*fn)(void *))
 int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
 {
 	struct pt_regs regs;
+	int pid;
 
 	memset(&regs, 0, sizeof(regs));
 	regs.regs[2] = (unsigned long)arg;
@@ -404,8 +405,13 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
 	regs.pc = (unsigned long)kernel_thread_helper;
 	regs.sr = (1 << 30);
 
-	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0,
-		       &regs, 0, NULL, NULL);
+	/* Ok, create the new process.. */
+	pid = do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0,
+		      &regs, 0, NULL, NULL);
+
+	trace_mark(kernel_arch_kthread_create, "pid %d fn %p", pid, fn);
+
+	return pid;
 }
 
 /*
diff --git a/arch/sh/kernel/sys_sh.c b/arch/sh/kernel/sys_sh.c
index 0dfb88925add..38f098c9c72d 100644
--- a/arch/sh/kernel/sys_sh.c
+++ b/arch/sh/kernel/sys_sh.c
@@ -171,6 +171,8 @@ asmlinkage int sys_ipc(uint call, int first, int second,
 	version = call >> 16; /* hack for backward compatibility */
 	call &= 0xffff;
 
+	trace_mark(kernel_arch_ipc_call, "call %u first %d", call, first);
+
 	if (call <= SEMTIMEDOP)
 		switch (call) {
 		case SEMOP:
diff --git a/arch/sh/mm/fault_32.c b/arch/sh/mm/fault_32.c
index 08a08ea5d69f..898d477e47c1 100644
--- a/arch/sh/mm/fault_32.c
+++ b/arch/sh/mm/fault_32.c
@@ -15,28 +15,13 @@
 #include <linux/mm.h>
 #include <linux/hardirq.h>
 #include <linux/kprobes.h>
+#include <linux/marker.h>
 #include <asm/io_trapped.h>
 #include <asm/system.h>
 #include <asm/mmu_context.h>
 #include <asm/tlbflush.h>
 #include <asm/kgdb.h>
 
-static inline int notify_page_fault(struct pt_regs *regs, int trap)
-{
-	int ret = 0;
-
-#ifdef CONFIG_KPROBES
-	if (!user_mode(regs)) {
-		preempt_disable();
-		if (kprobe_running() && kprobe_fault_handler(regs, trap))
-			ret = 1;
-		preempt_enable();
-	}
-#endif
-
-	return ret;
-}
-
 /*
  * This routine handles page faults.  It determines the address,
  * and the problem, and then passes it off to one of the appropriate
@@ -261,6 +246,25 @@ do_sigbus:
 		goto no_context;
 }
 
+static inline int notify_page_fault(struct pt_regs *regs, int trap)
+{
+	int ret = 0;
+
+	trace_mark(kernel_arch_trap_entry, "trap_id %d ip #p%ld",
+		   trap >> 5, instruction_pointer(regs));
+
+#ifdef CONFIG_KPROBES
+	if (!user_mode(regs)) {
+		preempt_disable();
+		if (kprobe_running() && kprobe_fault_handler(regs, trap))
+			ret = 1;
+		preempt_enable();
+	}
+#endif
+
+	return ret;
+}
+
 #ifdef CONFIG_SH_STORE_QUEUES
 /*
  * This is a special case for the SH-4 store queues, as pages for this
@@ -284,15 +288,18 @@ asmlinkage int __kprobes __do_page_fault(struct pt_regs *regs,
 	pmd_t *pmd;
 	pte_t *pte;
 	pte_t entry;
+	int ret = 0;
 
 	if (notify_page_fault(regs, lookup_exception_vector()))
-		return 0;
+		goto out;
 
 #ifdef CONFIG_SH_KGDB
 	if (kgdb_nofault && kgdb_bus_err_hook)
 		kgdb_bus_err_hook();
 #endif
 
+	ret = 1;
+
 	/*
 	 * We don't take page faults for P1, P2, and parts of P4, these
 	 * are always mapped, whether it be due to legacy behaviour in
@@ -302,24 +309,23 @@ asmlinkage int __kprobes __do_page_fault(struct pt_regs *regs,
 		pgd = pgd_offset_k(address);
 	} else {
 		if (unlikely(address >= TASK_SIZE || !current->mm))
-			return 1;
+			goto out;
 
 		pgd = pgd_offset(current->mm, address);
 	}
 
 	pud = pud_offset(pgd, address);
 	if (pud_none_or_clear_bad(pud))
-		return 1;
+		goto out;
 	pmd = pmd_offset(pud, address);
 	if (pmd_none_or_clear_bad(pmd))
-		return 1;
-
+		goto out;
 	pte = pte_offset_kernel(pmd, address);
 	entry = *pte;
 	if (unlikely(pte_none(entry) || pte_not_present(entry)))
-		return 1;
+		goto out;
 	if (unlikely(writeaccess && !pte_write(entry)))
-		return 1;
+		goto out;
 
 	if (writeaccess)
 		entry = pte_mkdirty(entry);
@@ -336,5 +342,8 @@ asmlinkage int __kprobes __do_page_fault(struct pt_regs *regs,
 	set_pte(pte, entry);
 	update_mmu_cache(NULL, address, entry);
 
-	return 0;
+	ret = 0;
+out:
+	trace_mark(kernel_arch_trap_exit, MARK_NOARGS);
+	return ret;
 }
-- 
GitLab


From 9d2b1f81dd93b198e12bca8120afec4a7b609b06 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sun, 21 Sep 2008 16:43:45 +0900
Subject: [PATCH 134/895] sh: ftrace support.

This adds support for ftrace to SH. This only includes CONFIG_FTRACE,
and does not handle dynamic ftrace presently.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/Kconfig                     |  1 +
 arch/sh/boot/compressed/Makefile_32 |  5 ++++
 arch/sh/kernel/entry-common.S       | 44 +++++++++++++++++++++++++++++
 arch/sh/kernel/sh_ksyms_32.c        |  4 +++
 4 files changed, 54 insertions(+)

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index adef42cd507f..38a5a9edb677 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -24,6 +24,7 @@ config SUPERH32
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
 	select HAVE_ARCH_TRACEHOOK if !SH_FPU
+	select HAVE_FTRACE
 
 config SUPERH64
 	def_bool y if CPU_SH5
diff --git a/arch/sh/boot/compressed/Makefile_32 b/arch/sh/boot/compressed/Makefile_32
index 47685f618ae7..301e6d503256 100644
--- a/arch/sh/boot/compressed/Makefile_32
+++ b/arch/sh/boot/compressed/Makefile_32
@@ -23,6 +23,11 @@ IMAGE_OFFSET	:= $(shell /bin/bash -c 'printf "0x%08x" \
 
 LIBGCC	:= $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
 
+ifeq ($(CONFIG_FTRACE),y)
+ORIG_CFLAGS := $(KBUILD_CFLAGS)
+KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS))
+endif
+
 LDFLAGS_vmlinux := -Ttext $(IMAGE_OFFSET) -e startup -T $(obj)/../../kernel/vmlinux.lds
 
 $(obj)/vmlinux: $(OBJECTS) $(obj)/piggy.o $(LIBGCC) FORCE
diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S
index efbb4268875e..1a5cf9dd82de 100644
--- a/arch/sh/kernel/entry-common.S
+++ b/arch/sh/kernel/entry-common.S
@@ -371,3 +371,47 @@ syscall_exit:
 #endif
 7:	.long	do_syscall_trace_enter
 8:	.long	do_syscall_trace_leave
+
+#ifdef CONFIG_FTRACE
+	.align 2
+	.globl	_mcount
+	.type	_mcount,@function
+	.globl	mcount
+	.type	mcount,@function
+_mcount:
+mcount:
+	mov.l	r4, @-r15
+	mov.l	r5, @-r15
+	mov.l	r6, @-r15
+	mov.l	r7, @-r15
+	sts.l	pr, @-r15
+
+	mov.l	@(20,r15),r4
+	sts	pr, r5
+
+	mov.l	1f, r6
+	mov.l	ftrace_stub, r7	
+	cmp/eq	r6, r7
+	bt	skip_trace
+
+	mov.l	@r6, r6
+	jsr	@r6
+	 nop
+
+skip_trace:
+
+	lds.l	@r15+, pr
+	mov.l	@r15+, r7
+	mov.l	@r15+, r6
+	mov.l	@r15+, r5
+	rts
+	 mov.l	@r15+, r4
+
+	.align 2
+1:	.long	ftrace_trace_function
+
+	.globl	ftrace_stub
+ftrace_stub:
+	rts
+	 nop
+#endif /* CONFIG_FTRACE */
diff --git a/arch/sh/kernel/sh_ksyms_32.c b/arch/sh/kernel/sh_ksyms_32.c
index 6e1b1c271658..d917b7b4042b 100644
--- a/arch/sh/kernel/sh_ksyms_32.c
+++ b/arch/sh/kernel/sh_ksyms_32.c
@@ -16,6 +16,7 @@
 #include <asm/delay.h>
 #include <asm/tlbflush.h>
 #include <asm/cacheflush.h>
+#include <asm/ftrace.h>
 
 extern int dump_fpu(struct pt_regs *, elf_fpregset_t *);
 extern struct hw_interrupt_type no_irq_type;
@@ -133,6 +134,9 @@ EXPORT_SYMBOL(__flush_purge_region);
 EXPORT_SYMBOL(clear_user_page);
 #endif
 
+#ifdef CONFIG_FTRACE
+EXPORT_SYMBOL(mcount);
+#endif
 EXPORT_SYMBOL(csum_partial);
 EXPORT_SYMBOL(csum_partial_copy_generic);
 #ifdef CONFIG_IPV6
-- 
GitLab


From 6902aa84f565153ce05f3438ecb8e445d4f468d8 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sun, 21 Sep 2008 17:14:42 +0900
Subject: [PATCH 135/895] doc: Add remaining SH parameters to
 kernel-parameters.txt.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 Documentation/kernel-parameters.txt | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 1150444a21ab..abeff96e9d46 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -796,6 +796,8 @@ and is between 256 and 4096 characters. It is defined in the file
 			Defaults to the default architecture's huge page size
 			if not specified.
 
+	hlt		[BUGS=ARM,SH]
+
 	i8042.direct	[HW] Put keyboard port into non-translated mode
 	i8042.dumbkbd	[HW] Pretend that controller can only read data from
 			     keyboard and cannot control its state
@@ -1206,6 +1208,10 @@ and is between 256 and 4096 characters. It is defined in the file
 	mem=nopentium	[BUGS=X86-32] Disable usage of 4MB pages for kernel
 			memory.
 
+	memchunk=nn[KMG]
+			[KNL,SH] Allow user to override the default size for
+			per-device physically contiguous DMA buffers.
+
 	memmap=exactmap	[KNL,X86-32,X86_64] Enable setting of an exact
 			E820 memory map, as specified by the user.
 			Such memmap=exactmap lines can be constructed based on
@@ -1365,6 +1371,8 @@ and is between 256 and 4096 characters. It is defined in the file
 
 	nodisconnect	[HW,SCSI,M68K] Disables SCSI disconnects.
 
+	nodsp		[SH] Disable hardware DSP at boot time.
+
 	noefi		[X86-32,X86-64] Disable EFI runtime services support.
 
 	noexec		[IA-64]
@@ -1381,13 +1389,15 @@ and is between 256 and 4096 characters. It is defined in the file
 			noexec32=off: disable non-executable mappings
 				read implies executable mappings
 
+	nofpu		[SH] Disable hardware FPU at boot time.
+
 	nofxsr		[BUGS=X86-32] Disables x86 floating point extended
 			register save and restore. The kernel will only save
 			legacy floating-point registers on task switch.
 
 	noclflush	[BUGS=X86] Don't use the CLFLUSH instruction
 
-	nohlt		[BUGS=ARM]
+	nohlt		[BUGS=ARM,SH]
 
 	no-hlt		[BUGS=X86-32] Tells the kernel that the hlt
 			instruction doesn't work correctly and not to
-- 
GitLab


From 4b4cf7595a8bce9b4dd64c241a8cb7336ecb9489 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sun, 21 Sep 2008 17:17:53 +0900
Subject: [PATCH 136/895] sh: Add missing asm/ftrace.h.

This was missed with the ftrace support commit.. check it in now.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/ftrace.h | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 arch/sh/include/asm/ftrace.h

diff --git a/arch/sh/include/asm/ftrace.h b/arch/sh/include/asm/ftrace.h
new file mode 100644
index 000000000000..3aed362c9463
--- /dev/null
+++ b/arch/sh/include/asm/ftrace.h
@@ -0,0 +1,8 @@
+#ifndef __ASM_SH_FTRACE_H
+#define __ASM_SH_FTRACE_H
+
+#ifndef __ASSEMBLY__
+extern void mcount(void);
+#endif
+
+#endif /* __ASM_SH_FTRACE_H */
-- 
GitLab


From e7ab3cd251926d57ee11d7d320e8fb42c882ad22 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sun, 21 Sep 2008 19:04:55 +0900
Subject: [PATCH 137/895] sh: Add FPU registers to regset interface.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/Kconfig             |  2 +-
 arch/sh/include/asm/elf.h   |  6 +--
 arch/sh/include/asm/fpu.h   | 19 ++++++++++
 arch/sh/kernel/process_32.c | 14 ++++---
 arch/sh/kernel/ptrace_32.c  | 76 +++++++++++++++++++++++++++++++++++++
 5 files changed, 108 insertions(+), 9 deletions(-)

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 38a5a9edb677..71be7ff5c771 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -23,7 +23,7 @@ config SUPERH32
 	def_bool !SUPERH64
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
-	select HAVE_ARCH_TRACEHOOK if !SH_FPU
+	select HAVE_ARCH_TRACEHOOK
 	select HAVE_FTRACE
 
 config SUPERH64
diff --git a/arch/sh/include/asm/elf.h b/arch/sh/include/asm/elf.h
index 6b2cec80fd15..4da3a0b10911 100644
--- a/arch/sh/include/asm/elf.h
+++ b/arch/sh/include/asm/elf.h
@@ -108,10 +108,10 @@ typedef struct user_fpu_struct elf_fpregset_t;
 #define elf_check_fdpic(x)		((x)->e_flags & EF_SH_FDPIC)
 #define elf_check_const_displacement(x)	((x)->e_flags & EF_SH_PIC)
 
-#if defined(CONFIG_SUPERH32) && !defined(CONFIG_SH_FPU)
+#ifdef CONFIG_SUPERH32
 /*
- * Enable dump using regset for general purpose registers, use this as
- * the default once the FPU registers are moved over also.
+ * Enable dump using regset.
+ * This covers all of general/DSP/FPU regs.
  */
 #define CORE_DUMP_USE_REGSET
 #endif
diff --git a/arch/sh/include/asm/fpu.h b/arch/sh/include/asm/fpu.h
index 91462fea1507..1d3aee04b5cc 100644
--- a/arch/sh/include/asm/fpu.h
+++ b/arch/sh/include/asm/fpu.h
@@ -30,8 +30,15 @@ static inline void save_fpu(struct task_struct *tsk, struct pt_regs *regs)
 }
 #endif
 
+struct user_regset;
+
 extern int do_fpu_inst(unsigned short, struct pt_regs *);
 
+extern int fpregs_get(struct task_struct *target,
+		      const struct user_regset *regset,
+		      unsigned int pos, unsigned int count,
+		      void *kbuf, void __user *ubuf);
+
 static inline void unlazy_fpu(struct task_struct *tsk, struct pt_regs *regs)
 {
 	preempt_disable();
@@ -50,6 +57,18 @@ static inline void clear_fpu(struct task_struct *tsk, struct pt_regs *regs)
 	preempt_enable();
 }
 
+static inline int init_fpu(struct task_struct *tsk)
+{
+	if (tsk_used_math(tsk)) {
+		if ((boot_cpu_data.flags & CPU_HAS_FPU) && tsk == current)
+			unlazy_fpu(tsk, task_pt_regs(tsk));
+		return 0;
+	}
+
+	set_stopped_child_used_math(tsk);
+	return 0;
+}
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __ASM_SH_FPU_H */
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index 7b013aa8c43f..b965f0282c7d 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -7,7 +7,11 @@
  *
  *  SuperH version:  Copyright (C) 1999, 2000  Niibe Yutaka & Kaz Kojima
  *		     Copyright (C) 2006 Lineo Solutions Inc. support SH4A UBC
- *		     Copyright (C) 2002 - 2007  Paul Mundt
+ *		     Copyright (C) 2002 - 2008  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
  */
 #include <linux/module.h>
 #include <linux/mm.h>
@@ -222,10 +226,10 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu)
 	struct task_struct *tsk = current;
 
 	fpvalid = !!tsk_used_math(tsk);
-	if (fpvalid) {
-		unlazy_fpu(tsk, regs);
-		memcpy(fpu, &tsk->thread.fpu.hard, sizeof(*fpu));
-	}
+	if (fpvalid)
+		fpvalid = !fpregs_get(tsk, NULL, 0,
+				      sizeof(struct user_fpu_struct),
+				      fpu, NULL);
 #endif
 
 	return fpvalid;
diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c
index 0f44f2b51a60..29ca09d24ef8 100644
--- a/arch/sh/kernel/ptrace_32.c
+++ b/arch/sh/kernel/ptrace_32.c
@@ -32,6 +32,7 @@
 #include <asm/processor.h>
 #include <asm/mmu_context.h>
 #include <asm/syscalls.h>
+#include <asm/fpu.h>
 
 /*
  * This routine will get a word off of the process kernel stack.
@@ -145,6 +146,54 @@ static int genregs_set(struct task_struct *target,
 	return ret;
 }
 
+#ifdef CONFIG_SH_FPU
+int fpregs_get(struct task_struct *target,
+	       const struct user_regset *regset,
+	       unsigned int pos, unsigned int count,
+	       void *kbuf, void __user *ubuf)
+{
+	int ret;
+
+	ret = init_fpu(target);
+	if (ret)
+		return ret;
+
+	if ((boot_cpu_data.flags & CPU_HAS_FPU))
+		return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+					   &target->thread.fpu.hard, 0, -1);
+
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				   &target->thread.fpu.soft, 0, -1);
+}
+
+static int fpregs_set(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+
+	ret = init_fpu(target);
+	if (ret)
+		return ret;
+
+	set_stopped_child_used_math(target);
+
+	if ((boot_cpu_data.flags & CPU_HAS_FPU))
+		return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+					  &target->thread.fpu.hard, 0, -1);
+
+	return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				  &target->thread.fpu.soft, 0, -1);
+}
+
+static int fpregs_active(struct task_struct *target,
+			 const struct user_regset *regset)
+{
+	return tsk_used_math(target) ? regset->n : 0;
+}
+#endif
+
 #ifdef CONFIG_SH_DSP
 static int dspregs_get(struct task_struct *target,
 		       const struct user_regset *regset,
@@ -194,6 +243,9 @@ static int dspregs_active(struct task_struct *target,
  */
 enum sh_regset {
 	REGSET_GENERAL,
+#ifdef CONFIG_SH_FPU
+	REGSET_FPU,
+#endif
 #ifdef CONFIG_SH_DSP
 	REGSET_DSP,
 #endif
@@ -214,6 +266,18 @@ static const struct user_regset sh_regsets[] = {
 		.set		= genregs_set,
 	},
 
+#ifdef CONFIG_SH_FPU
+	[REGSET_FPU] = {
+		.core_note_type	= NT_PRFPREG,
+		.n		= sizeof(struct user_fpu_struct) / sizeof(long),
+		.size		= sizeof(long),
+		.align		= sizeof(long),
+		.get		= fpregs_get,
+		.set		= fpregs_set,
+		.active		= fpregs_active,
+	},
+#endif
+
 #ifdef CONFIG_SH_DSP
 	[REGSET_DSP] = {
 		.n		= sizeof(struct pt_dspregs) / sizeof(long),
@@ -304,6 +368,18 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 					     REGSET_GENERAL,
 					     0, sizeof(struct pt_regs),
 					     (const void __user *)data);
+#ifdef CONFIG_SH_FPU
+	case PTRACE_GETFPREGS:
+		return copy_regset_to_user(child, &user_sh_native_view,
+					   REGSET_FPU,
+					   0, sizeof(struct user_fpu_struct),
+					   (void __user *)data);
+	case PTRACE_SETFPREGS:
+		return copy_regset_from_user(child, &user_sh_native_view,
+					     REGSET_FPU,
+					     0, sizeof(struct user_fpu_struct),
+					     (const void __user *)data);
+#endif
 #ifdef CONFIG_SH_DSP
 	case PTRACE_GETDSPREGS:
 		return copy_regset_to_user(child, &user_sh_native_view,
-- 
GitLab


From d7cfb60c5cf904ecf1e0ae23ec178175b86f0d4a Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Fri, 19 Sep 2008 13:13:44 +0100
Subject: [PATCH 138/895] hrtimer: remove
 hrtimer_clock_base::get_softirq_time()

Peter Zijlstra noticed this 8 months ago and I just noticed
it again.

hrtimer_clock_base::get_softirq_time() is currently unused
in the entire tree. In fact, looking at the logs, it appears
as if it was never used. Remove it.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/hrtimer.h | 2 --
 kernel/hrtimer.c        | 4 +---
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 6d93dce61cbb..1b079bd29c35 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -145,7 +145,6 @@ struct hrtimer_sleeper {
  * @first:		pointer to the timer node which expires first
  * @resolution:		the resolution of the clock, in nanoseconds
  * @get_time:		function to retrieve the current time of the clock
- * @get_softirq_time:	function to retrieve the current time from the softirq
  * @softirq_time:	the time when running the hrtimer queue in the softirq
  * @offset:		offset of this clock to the monotonic base
  * @reprogram:		function to reprogram the timer event
@@ -157,7 +156,6 @@ struct hrtimer_clock_base {
 	struct rb_node		*first;
 	ktime_t			resolution;
 	ktime_t			(*get_time)(void);
-	ktime_t			(*get_softirq_time)(void);
 	ktime_t			softirq_time;
 #ifdef CONFIG_HIGH_RES_TIMERS
 	ktime_t			offset;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 03ea1378c43b..4d761d50c529 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1401,9 +1401,7 @@ void hrtimer_run_queues(void)
 		if (!base->first)
 			continue;
 
-		if (base->get_softirq_time)
-			base->softirq_time = base->get_softirq_time();
-		else if (gettime) {
+		if (gettime) {
 			hrtimer_get_softirq_time(cpu_base);
 			gettime = 0;
 		}
-- 
GitLab


From b91c4996df56fcd201f85c392a1de7bc3f6641f5 Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Fri, 19 Sep 2008 13:13:48 +0100
Subject: [PATCH 139/895] hrtimer: remove hrtimer_clock_base::reprogram()

hrtimer_clock_base::reprogram() also appears to never
have been used, so remove it.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/hrtimer.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 1b079bd29c35..68b0196d8696 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -147,7 +147,6 @@ struct hrtimer_sleeper {
  * @get_time:		function to retrieve the current time of the clock
  * @softirq_time:	the time when running the hrtimer queue in the softirq
  * @offset:		offset of this clock to the monotonic base
- * @reprogram:		function to reprogram the timer event
  */
 struct hrtimer_clock_base {
 	struct hrtimer_cpu_base	*cpu_base;
@@ -159,9 +158,6 @@ struct hrtimer_clock_base {
 	ktime_t			softirq_time;
 #ifdef CONFIG_HIGH_RES_TIMERS
 	ktime_t			offset;
-	int			(*reprogram)(struct hrtimer *t,
-					     struct hrtimer_clock_base *b,
-					     ktime_t n);
 #endif
 };
 
-- 
GitLab


From bb34d92f643086d546b49cef680f6f305ed84414 Mon Sep 17 00:00:00 2001
From: Frank Mayhar <fmayhar@google.com>
Date: Fri, 12 Sep 2008 09:54:39 -0700
Subject: [PATCH 140/895] timers: fix itimer/many thread hang, v2

This is the second resubmission of the posix timer rework patch, posted
a few days ago.

This includes the changes from the previous resubmittion, which addressed
Oleg Nesterov's comments, removing the RCU stuff from the patch and
un-inlining the thread_group_cputime() function for SMP.

In addition, per Ingo Molnar it simplifies the UP code, consolidating much
of it with the SMP version and depending on lower-level SMP/UP handling to
take care of the differences.

It also cleans up some UP compile errors, moves the scheduler stats-related
macros into kernel/sched_stats.h, cleans up a merge error in
kernel/fork.c and has a few other minor fixes and cleanups as suggested
by Oleg and Ingo. Thanks for the review, guys.

Signed-off-by: Frank Mayhar <fmayhar@google.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/kernel_stat.h |   1 +
 include/linux/sched.h       | 183 +-----------------------------------
 kernel/fork.c               |   5 +-
 kernel/posix-cpu-timers.c   | 153 +++++++++++++-----------------
 kernel/sched.c              |  47 ++-------
 kernel/sched_stats.h        | 136 +++++++++++++++++++++++++++
 6 files changed, 214 insertions(+), 311 deletions(-)

diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index cf9f40a91c9c..cac3750cd65e 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -52,6 +52,7 @@ static inline int kstat_irqs(int irq)
 	return sum;
 }
 
+extern unsigned long long task_delta_exec(struct task_struct *);
 extern void account_user_time(struct task_struct *, cputime_t);
 extern void account_user_time_scaled(struct task_struct *, cputime_t);
 extern void account_system_time(struct task_struct *, int, cputime_t);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7ce8d4e53565..b982fb48c8f0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -454,15 +454,9 @@ struct task_cputime {
  * This structure contains the version of task_cputime, above, that is
  * used for thread group CPU clock calculations.
  */
-#ifdef CONFIG_SMP
 struct thread_group_cputime {
 	struct task_cputime *totals;
 };
-#else
-struct thread_group_cputime {
-	struct task_cputime totals;
-};
-#endif
 
 /*
  * NOTE! "signal_struct" does not have it's own
@@ -2124,193 +2118,26 @@ static inline int spin_needbreak(spinlock_t *lock)
 /*
  * Thread group CPU time accounting.
  */
-#ifdef CONFIG_SMP
 
-extern int thread_group_cputime_alloc_smp(struct task_struct *);
-extern void thread_group_cputime_smp(struct task_struct *, struct task_cputime *);
+extern int thread_group_cputime_alloc(struct task_struct *);
+extern void thread_group_cputime(struct task_struct *, struct task_cputime *);
 
 static inline void thread_group_cputime_init(struct signal_struct *sig)
 {
 	sig->cputime.totals = NULL;
 }
 
-static inline int thread_group_cputime_clone_thread(struct task_struct *curr,
-						    struct task_struct *new)
+static inline int thread_group_cputime_clone_thread(struct task_struct *curr)
 {
 	if (curr->signal->cputime.totals)
 		return 0;
-	return thread_group_cputime_alloc_smp(curr);
+	return thread_group_cputime_alloc(curr);
 }
 
-static inline void thread_group_cputime_free(struct signal_struct *sig)
-{
-	free_percpu(sig->cputime.totals);
-}
-
-/**
- * thread_group_cputime - Sum the thread group time fields across all CPUs.
- *
- * This is a wrapper for the real routine, thread_group_cputime_smp().  See
- * that routine for details.
- */
-static inline void thread_group_cputime(
-	struct task_struct *tsk,
-	struct task_cputime *times)
-{
-	thread_group_cputime_smp(tsk, times);
-}
-
-/**
- * thread_group_cputime_account_user - Maintain utime for a thread group.
- *
- * @tgtimes:	Pointer to thread_group_cputime structure.
- * @cputime:	Time value by which to increment the utime field of that
- *		structure.
- *
- * If thread group time is being maintained, get the structure for the
- * running CPU and update the utime field there.
- */
-static inline void thread_group_cputime_account_user(
-	struct thread_group_cputime *tgtimes,
-	cputime_t cputime)
-{
-	if (tgtimes->totals) {
-		struct task_cputime *times;
-
-		times = per_cpu_ptr(tgtimes->totals, get_cpu());
-		times->utime = cputime_add(times->utime, cputime);
-		put_cpu_no_resched();
-	}
-}
-
-/**
- * thread_group_cputime_account_system - Maintain stime for a thread group.
- *
- * @tgtimes:	Pointer to thread_group_cputime structure.
- * @cputime:	Time value by which to increment the stime field of that
- *		structure.
- *
- * If thread group time is being maintained, get the structure for the
- * running CPU and update the stime field there.
- */
-static inline void thread_group_cputime_account_system(
-	struct thread_group_cputime *tgtimes,
-	cputime_t cputime)
-{
-	if (tgtimes->totals) {
-		struct task_cputime *times;
-
-		times = per_cpu_ptr(tgtimes->totals, get_cpu());
-		times->stime = cputime_add(times->stime, cputime);
-		put_cpu_no_resched();
-	}
-}
-
-/**
- * thread_group_cputime_account_exec_runtime - Maintain exec runtime for a
- *						thread group.
- *
- * @tgtimes:	Pointer to thread_group_cputime structure.
- * @ns:		Time value by which to increment the sum_exec_runtime field
- *		of that structure.
- *
- * If thread group time is being maintained, get the structure for the
- * running CPU and update the sum_exec_runtime field there.
- */
-static inline void thread_group_cputime_account_exec_runtime(
-	struct thread_group_cputime *tgtimes,
-	unsigned long long ns)
-{
-	if (tgtimes->totals) {
-		struct task_cputime *times;
-
-		times = per_cpu_ptr(tgtimes->totals, get_cpu());
-		times->sum_exec_runtime += ns;
-		put_cpu_no_resched();
-	}
-}
-
-#else /* CONFIG_SMP */
-
-static inline void thread_group_cputime_init(struct signal_struct *sig)
-{
-	sig->cputime.totals.utime = cputime_zero;
-	sig->cputime.totals.stime = cputime_zero;
-	sig->cputime.totals.sum_exec_runtime = 0;
-}
-
-static inline int thread_group_cputime_alloc(struct task_struct *tsk)
-{
-	return 0;
-}
 
 static inline void thread_group_cputime_free(struct signal_struct *sig)
 {
-}
-
-static inline int thread_group_cputime_clone_thread(struct task_struct *curr,
-						     struct task_struct *tsk)
-{
-	return 0;
-}
-
-static inline void thread_group_cputime(struct task_struct *tsk,
-					 struct task_cputime *cputime)
-{
-	*cputime = tsk->signal->cputime.totals;
-}
-
-static inline void thread_group_cputime_account_user(
-	struct thread_group_cputime *tgtimes,
-	cputime_t cputime)
-{
-	tgtimes->totals.utime = cputime_add(tgtimes->totals.utime, cputime);
-}
-
-static inline void thread_group_cputime_account_system(
-	struct thread_group_cputime *tgtimes,
-	cputime_t cputime)
-{
-	tgtimes->totals.stime = cputime_add(tgtimes->totals.stime, cputime);
-}
-
-static inline void thread_group_cputime_account_exec_runtime(
-	struct thread_group_cputime *tgtimes,
-	unsigned long long ns)
-{
-	tgtimes->totals.sum_exec_runtime += ns;
-}
-
-#endif /* CONFIG_SMP */
-
-static inline void account_group_user_time(struct task_struct *tsk,
-					    cputime_t cputime)
-{
-	struct signal_struct *sig;
-
-	sig = tsk->signal;
-	if (likely(sig))
-		thread_group_cputime_account_user(&sig->cputime, cputime);
-}
-
-static inline void account_group_system_time(struct task_struct *tsk,
-					      cputime_t cputime)
-{
-	struct signal_struct *sig;
-
-	sig = tsk->signal;
-	if (likely(sig))
-		thread_group_cputime_account_system(&sig->cputime, cputime);
-}
-
-static inline void account_group_exec_runtime(struct task_struct *tsk,
-					       unsigned long long ns)
-{
-	struct signal_struct *sig;
-
-	sig = tsk->signal;
-	if (likely(sig))
-		thread_group_cputime_account_exec_runtime(&sig->cputime, ns);
+	free_percpu(sig->cputime.totals);
 }
 
 /*
diff --git a/kernel/fork.c b/kernel/fork.c
index 1181b9aac48e..021ae012cc75 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -791,7 +791,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	int ret;
 
 	if (clone_flags & CLONE_THREAD) {
-		ret = thread_group_cputime_clone_thread(current, tsk);
+		ret = thread_group_cputime_clone_thread(current);
 		if (likely(!ret)) {
 			atomic_inc(&current->signal->count);
 			atomic_inc(&current->signal->live);
@@ -834,9 +834,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
 	sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
 	task_io_accounting_init(&sig->ioac);
-	INIT_LIST_HEAD(&sig->cpu_timers[0]);
-	INIT_LIST_HEAD(&sig->cpu_timers[1]);
-	INIT_LIST_HEAD(&sig->cpu_timers[2]);
 	taskstats_tgid_init(sig);
 
 	task_lock(current->group_leader);
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 9a7ea049fcdc..153dcb2639c3 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -7,50 +7,46 @@
 #include <linux/errno.h>
 #include <linux/math64.h>
 #include <asm/uaccess.h>
+#include <linux/kernel_stat.h>
 
-#ifdef CONFIG_SMP
 /*
- * Allocate the thread_group_cputime structure appropriately for SMP kernels
- * and fill in the current values of the fields.  Called from copy_signal()
- * via thread_group_cputime_clone_thread() when adding a second or subsequent
+ * Allocate the thread_group_cputime structure appropriately and fill in the
+ * current values of the fields.  Called from copy_signal() via
+ * thread_group_cputime_clone_thread() when adding a second or subsequent
  * thread to a thread group.  Assumes interrupts are enabled when called.
  */
-int thread_group_cputime_alloc_smp(struct task_struct *tsk)
+int thread_group_cputime_alloc(struct task_struct *tsk)
 {
 	struct signal_struct *sig = tsk->signal;
 	struct task_cputime *cputime;
 
 	/*
 	 * If we have multiple threads and we don't already have a
-	 * per-CPU task_cputime struct, allocate one and fill it in with
-	 * the times accumulated so far.
+	 * per-CPU task_cputime struct (checked in the caller), allocate
+	 * one and fill it in with the times accumulated so far.  We may
+	 * race with another thread so recheck after we pick up the sighand
+	 * lock.
 	 */
-	if (sig->cputime.totals)
-		return 0;
 	cputime = alloc_percpu(struct task_cputime);
 	if (cputime == NULL)
 		return -ENOMEM;
-	read_lock(&tasklist_lock);
 	spin_lock_irq(&tsk->sighand->siglock);
 	if (sig->cputime.totals) {
 		spin_unlock_irq(&tsk->sighand->siglock);
-		read_unlock(&tasklist_lock);
 		free_percpu(cputime);
 		return 0;
 	}
 	sig->cputime.totals = cputime;
-	cputime = per_cpu_ptr(sig->cputime.totals, get_cpu());
+	cputime = per_cpu_ptr(sig->cputime.totals, smp_processor_id());
 	cputime->utime = tsk->utime;
 	cputime->stime = tsk->stime;
 	cputime->sum_exec_runtime = tsk->se.sum_exec_runtime;
-	put_cpu_no_resched();
 	spin_unlock_irq(&tsk->sighand->siglock);
-	read_unlock(&tasklist_lock);
 	return 0;
 }
 
 /**
- * thread_group_cputime_smp - Sum the thread group time fields across all CPUs.
+ * thread_group_cputime - Sum the thread group time fields across all CPUs.
  *
  * @tsk:	The task we use to identify the thread group.
  * @times:	task_cputime structure in which we return the summed fields.
@@ -58,7 +54,7 @@ int thread_group_cputime_alloc_smp(struct task_struct *tsk)
  * Walk the list of CPUs to sum the per-CPU time fields in the thread group
  * time structure.
  */
-void thread_group_cputime_smp(
+void thread_group_cputime(
 	struct task_struct *tsk,
 	struct task_cputime *times)
 {
@@ -83,8 +79,6 @@ void thread_group_cputime_smp(
 	}
 }
 
-#endif /* CONFIG_SMP */
-
 /*
  * Called after updating RLIMIT_CPU to set timer expiration if necessary.
  */
@@ -300,7 +294,7 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
 		cpu->cpu = virt_ticks(p);
 		break;
 	case CPUCLOCK_SCHED:
-		cpu->sched = task_sched_runtime(p);
+		cpu->sched = p->se.sum_exec_runtime + task_delta_exec(p);
 		break;
 	}
 	return 0;
@@ -309,16 +303,15 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
 /*
  * Sample a process (thread group) clock for the given group_leader task.
  * Must be called with tasklist_lock held for reading.
- * Must be called with tasklist_lock held for reading, and p->sighand->siglock.
  */
-static int cpu_clock_sample_group_locked(unsigned int clock_idx,
-					 struct task_struct *p,
-					 union cpu_time_count *cpu)
+static int cpu_clock_sample_group(const clockid_t which_clock,
+				  struct task_struct *p,
+				  union cpu_time_count *cpu)
 {
 	struct task_cputime cputime;
 
 	thread_group_cputime(p, &cputime);
-	switch (clock_idx) {
+	switch (which_clock) {
 	default:
 		return -EINVAL;
 	case CPUCLOCK_PROF:
@@ -328,29 +321,12 @@ static int cpu_clock_sample_group_locked(unsigned int clock_idx,
 		cpu->cpu = cputime.utime;
 		break;
 	case CPUCLOCK_SCHED:
-		cpu->sched = thread_group_sched_runtime(p);
+		cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p);
 		break;
 	}
 	return 0;
 }
 
-/*
- * Sample a process (thread group) clock for the given group_leader task.
- * Must be called with tasklist_lock held for reading.
- */
-static int cpu_clock_sample_group(const clockid_t which_clock,
-				  struct task_struct *p,
-				  union cpu_time_count *cpu)
-{
-	int ret;
-	unsigned long flags;
-	spin_lock_irqsave(&p->sighand->siglock, flags);
-	ret = cpu_clock_sample_group_locked(CPUCLOCK_WHICH(which_clock), p,
-					    cpu);
-	spin_unlock_irqrestore(&p->sighand->siglock, flags);
-	return ret;
-}
-
 
 int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
 {
@@ -1324,29 +1300,37 @@ static inline int task_cputime_expired(const struct task_cputime *sample,
  * fastpath_timer_check - POSIX CPU timers fast path.
  *
  * @tsk:	The task (thread) being checked.
- * @sig:	The signal pointer for that task.
  *
- * If there are no timers set return false.  Otherwise snapshot the task and
- * thread group timers, then compare them with the corresponding expiration
- # times.  Returns true if a timer has expired, else returns false.
+ * Check the task and thread group timers.  If both are zero (there are no
+ * timers set) return false.  Otherwise snapshot the task and thread group
+ * timers and compare them with the corresponding expiration times.  Return
+ * true if a timer has expired, else return false.
  */
-static inline int fastpath_timer_check(struct task_struct *tsk,
-					struct signal_struct *sig)
+static inline int fastpath_timer_check(struct task_struct *tsk)
 {
-	struct task_cputime task_sample = {
-		.utime = tsk->utime,
-		.stime = tsk->stime,
-		.sum_exec_runtime = tsk->se.sum_exec_runtime
-	};
-	struct task_cputime group_sample;
+	struct signal_struct *sig = tsk->signal;
 
-	if (task_cputime_zero(&tsk->cputime_expires) &&
-	    task_cputime_zero(&sig->cputime_expires))
+	if (unlikely(!sig))
 		return 0;
-	if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
-		return 1;
-	thread_group_cputime(tsk, &group_sample);
-	return task_cputime_expired(&group_sample, &sig->cputime_expires);
+
+	if (!task_cputime_zero(&tsk->cputime_expires)) {
+		struct task_cputime task_sample = {
+			.utime = tsk->utime,
+			.stime = tsk->stime,
+			.sum_exec_runtime = tsk->se.sum_exec_runtime
+		};
+
+		if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
+			return 1;
+	}
+	if (!task_cputime_zero(&sig->cputime_expires)) {
+		struct task_cputime group_sample;
+
+		thread_group_cputime(tsk, &group_sample);
+		if (task_cputime_expired(&group_sample, &sig->cputime_expires))
+			return 1;
+	}
+	return 0;
 }
 
 /*
@@ -1358,43 +1342,34 @@ void run_posix_cpu_timers(struct task_struct *tsk)
 {
 	LIST_HEAD(firing);
 	struct k_itimer *timer, *next;
-	struct signal_struct *sig;
-	struct sighand_struct *sighand;
-	unsigned long flags;
 
 	BUG_ON(!irqs_disabled());
 
-	/* Pick up tsk->signal and make sure it's valid. */
-	sig = tsk->signal;
 	/*
 	 * The fast path checks that there are no expired thread or thread
-	 * group timers.  If that's so, just return.  Also check that
-	 * tsk->signal is non-NULL; this probably can't happen but cover the
-	 * possibility anyway.
+	 * group timers.  If that's so, just return.
 	 */
-	if (unlikely(!sig) || !fastpath_timer_check(tsk, sig))
+	if (!fastpath_timer_check(tsk))
 		return;
 
-	sighand = lock_task_sighand(tsk, &flags);
-	if (likely(sighand)) {
-		/*
-		 * Here we take off tsk->signal->cpu_timers[N] and
-		 * tsk->cpu_timers[N] all the timers that are firing, and
-		 * put them on the firing list.
-		 */
-		check_thread_timers(tsk, &firing);
-		check_process_timers(tsk, &firing);
+	spin_lock(&tsk->sighand->siglock);
+	/*
+	 * Here we take off tsk->signal->cpu_timers[N] and
+	 * tsk->cpu_timers[N] all the timers that are firing, and
+	 * put them on the firing list.
+	 */
+	check_thread_timers(tsk, &firing);
+	check_process_timers(tsk, &firing);
 
-		/*
-		 * We must release these locks before taking any timer's lock.
-		 * There is a potential race with timer deletion here, as the
-		 * siglock now protects our private firing list.  We have set
-		 * the firing flag in each timer, so that a deletion attempt
-		 * that gets the timer lock before we do will give it up and
-		 * spin until we've taken care of that timer below.
-		 */
-	}
-	unlock_task_sighand(tsk, &flags);
+	/*
+	 * We must release these locks before taking any timer's lock.
+	 * There is a potential race with timer deletion here, as the
+	 * siglock now protects our private firing list.  We have set
+	 * the firing flag in each timer, so that a deletion attempt
+	 * that gets the timer lock before we do will give it up and
+	 * spin until we've taken care of that timer below.
+	 */
+	spin_unlock(&tsk->sighand->siglock);
 
 	/*
 	 * Now that all the timers on our list have the firing flag,
@@ -1433,7 +1408,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
 	struct list_head *head;
 
 	BUG_ON(clock_idx == CPUCLOCK_SCHED);
-	cpu_clock_sample_group_locked(clock_idx, tsk, &now);
+	cpu_clock_sample_group(clock_idx, tsk, &now);
 
 	if (oldval) {
 		if (!cputime_eq(*oldval, cputime_zero)) {
diff --git a/kernel/sched.c b/kernel/sched.c
index c51b5d276665..260c22cc530a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4039,55 +4039,22 @@ EXPORT_PER_CPU_SYMBOL(kstat);
 /*
  * Return any ns on the sched_clock that have not yet been banked in
  * @p in case that task is currently running.
- *
- * Called with task_rq_lock() held on @rq.
  */
-static unsigned long long task_delta_exec(struct task_struct *p, struct rq *rq)
+unsigned long long task_delta_exec(struct task_struct *p)
 {
+	struct rq *rq;
+	unsigned long flags;
+	u64 ns = 0;
+
+	rq = task_rq_lock(p, &flags);
 	if (task_current(rq, p)) {
 		u64 delta_exec;
 
 		update_rq_clock(rq);
 		delta_exec = rq->clock - p->se.exec_start;
 		if ((s64)delta_exec > 0)
-			return delta_exec;
+			ns = delta_exec;
 	}
-	return 0;
-}
-
-/*
- * Return p->sum_exec_runtime plus any more ns on the sched_clock
- * that have not yet been banked in case the task is currently running.
- */
-unsigned long long task_sched_runtime(struct task_struct *p)
-{
-	unsigned long flags;
-	u64 ns;
-	struct rq *rq;
-
-	rq = task_rq_lock(p, &flags);
-	ns = p->se.sum_exec_runtime + task_delta_exec(p, rq);
-	task_rq_unlock(rq, &flags);
-
-	return ns;
-}
-
-/*
- * Return sum_exec_runtime for the thread group plus any more ns on the
- * sched_clock that have not yet been banked in case the task is currently
- * running.
- */
-unsigned long long thread_group_sched_runtime(struct task_struct *p)
-{
-	unsigned long flags;
-	u64 ns;
-	struct rq *rq;
-	struct task_cputime totals;
-
-	rq = task_rq_lock(p, &flags);
-	thread_group_cputime(p, &totals);
-	ns = totals.sum_exec_runtime + task_delta_exec(p, rq);
-	task_rq_unlock(rq, &flags);
 
 	return ns;
 }
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index 8385d43987e2..d6903bd0c7a8 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -270,3 +270,139 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next)
 #define sched_info_switch(t, next)		do { } while (0)
 #endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */
 
+/*
+ * The following are functions that support scheduler-internal time accounting.
+ * These functions are generally called at the timer tick.  None of this depends
+ * on CONFIG_SCHEDSTATS.
+ */
+
+#ifdef CONFIG_SMP
+
+/**
+ * thread_group_cputime_account_user - Maintain utime for a thread group.
+ *
+ * @tgtimes:	Pointer to thread_group_cputime structure.
+ * @cputime:	Time value by which to increment the utime field of that
+ *		structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the utime field there.
+ */
+static inline void thread_group_cputime_account_user(
+	struct thread_group_cputime *tgtimes,
+	cputime_t cputime)
+{
+	if (tgtimes->totals) {
+		struct task_cputime *times;
+
+		times = per_cpu_ptr(tgtimes->totals, get_cpu());
+		times->utime = cputime_add(times->utime, cputime);
+		put_cpu_no_resched();
+	}
+}
+
+/**
+ * thread_group_cputime_account_system - Maintain stime for a thread group.
+ *
+ * @tgtimes:	Pointer to thread_group_cputime structure.
+ * @cputime:	Time value by which to increment the stime field of that
+ *		structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the stime field there.
+ */
+static inline void thread_group_cputime_account_system(
+	struct thread_group_cputime *tgtimes,
+	cputime_t cputime)
+{
+	if (tgtimes->totals) {
+		struct task_cputime *times;
+
+		times = per_cpu_ptr(tgtimes->totals, get_cpu());
+		times->stime = cputime_add(times->stime, cputime);
+		put_cpu_no_resched();
+	}
+}
+
+/**
+ * thread_group_cputime_account_exec_runtime - Maintain exec runtime for a
+ *						thread group.
+ *
+ * @tgtimes:	Pointer to thread_group_cputime structure.
+ * @ns:		Time value by which to increment the sum_exec_runtime field
+ *		of that structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the sum_exec_runtime field there.
+ */
+static inline void thread_group_cputime_account_exec_runtime(
+	struct thread_group_cputime *tgtimes,
+	unsigned long long ns)
+{
+	if (tgtimes->totals) {
+		struct task_cputime *times;
+
+		times = per_cpu_ptr(tgtimes->totals, get_cpu());
+		times->sum_exec_runtime += ns;
+		put_cpu_no_resched();
+	}
+}
+
+#else /* CONFIG_SMP */
+
+static inline void thread_group_cputime_account_user(
+	struct thread_group_cputime *tgtimes,
+	cputime_t cputime)
+{
+	tgtimes->totals->utime = cputime_add(tgtimes->totals->utime, cputime);
+}
+
+static inline void thread_group_cputime_account_system(
+	struct thread_group_cputime *tgtimes,
+	cputime_t cputime)
+{
+	tgtimes->totals->stime = cputime_add(tgtimes->totals->stime, cputime);
+}
+
+static inline void thread_group_cputime_account_exec_runtime(
+	struct thread_group_cputime *tgtimes,
+	unsigned long long ns)
+{
+	tgtimes->totals->sum_exec_runtime += ns;
+}
+
+#endif /* CONFIG_SMP */
+
+/*
+ * These are the generic time-accounting routines that use the above
+ * functions.  They are the functions actually called by the scheduler.
+ */
+static inline void account_group_user_time(struct task_struct *tsk,
+					    cputime_t cputime)
+{
+	struct signal_struct *sig;
+
+	sig = tsk->signal;
+	if (likely(sig))
+		thread_group_cputime_account_user(&sig->cputime, cputime);
+}
+
+static inline void account_group_system_time(struct task_struct *tsk,
+					      cputime_t cputime)
+{
+	struct signal_struct *sig;
+
+	sig = tsk->signal;
+	if (likely(sig))
+		thread_group_cputime_account_system(&sig->cputime, cputime);
+}
+
+static inline void account_group_exec_runtime(struct task_struct *tsk,
+					       unsigned long long ns)
+{
+	struct signal_struct *sig;
+
+	sig = tsk->signal;
+	if (likely(sig))
+		thread_group_cputime_account_exec_runtime(&sig->cputime, ns);
+}
-- 
GitLab


From 59f647c25a4f27c1e5c84710e0608b36303089f9 Mon Sep 17 00:00:00 2001
From: Timur Tabi <timur@freescale.com>
Date: Tue, 23 Sep 2008 15:55:56 -0700
Subject: [PATCH 141/895] fsldma: remove internal self-test from Freescale Elo
 DMA driver

The Freescale Elo DMA driver runs an internal self-test before registering
the channels with the DMA engine.  This self-test has a fundemental flaw in
that it calls the DMA engine's callback functions directly before the
registration.  However, the registration initializes some variables that the
callback functions uses, namely the device struct.

The code works today because there are two device structs: the one created
by the DMA engine, and one created by the Open Firmware (OF) subsystem.  The
self-test currently uses the device struct created by OF.  However, in the
future, some of the device structs created by OF will be eliminated.
This means that the self-test will only have access to the device struct
created by the DMA engine.  But this device struct isn't initialized when
the self-test runs, and this causes a kernel panic.

Since there is already a DMA test module (dmatest), the internal self-test
code is not useful anyway.  It is extremely unlikely that the test will fail
in normal usage.  It may have been helpful during development, but not any more.

Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: Li Yang <leoli@freescale.com>
Cc: Scott Wood <scottwood@freescale.com>
Signed-off-by: Timur Tabi <timur@freescale.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/fsldma.c | 132 -------------------------------------------
 1 file changed, 132 deletions(-)

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index c0059ca58340..e9b263897c03 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -786,132 +786,6 @@ static void dma_do_tasklet(unsigned long data)
 	fsl_chan_ld_cleanup(fsl_chan);
 }
 
-static void fsl_dma_callback_test(void *param)
-{
-	struct fsl_dma_chan *fsl_chan = param;
-	if (fsl_chan)
-		dev_dbg(fsl_chan->dev, "selftest: callback is ok!\n");
-}
-
-static int fsl_dma_self_test(struct fsl_dma_chan *fsl_chan)
-{
-	struct dma_chan *chan;
-	int err = 0;
-	dma_addr_t dma_dest, dma_src;
-	dma_cookie_t cookie;
-	u8 *src, *dest;
-	int i;
-	size_t test_size;
-	struct dma_async_tx_descriptor *tx1, *tx2, *tx3;
-
-	test_size = 4096;
-
-	src = kmalloc(test_size * 2, GFP_KERNEL);
-	if (!src) {
-		dev_err(fsl_chan->dev,
-				"selftest: Cannot alloc memory for test!\n");
-		return -ENOMEM;
-	}
-
-	dest = src + test_size;
-
-	for (i = 0; i < test_size; i++)
-		src[i] = (u8) i;
-
-	chan = &fsl_chan->common;
-
-	if (fsl_dma_alloc_chan_resources(chan, NULL) < 1) {
-		dev_err(fsl_chan->dev,
-				"selftest: Cannot alloc resources for DMA\n");
-		err = -ENODEV;
-		goto out;
-	}
-
-	/* TX 1 */
-	dma_src = dma_map_single(fsl_chan->dev, src, test_size / 2,
-				 DMA_TO_DEVICE);
-	dma_dest = dma_map_single(fsl_chan->dev, dest, test_size / 2,
-				  DMA_FROM_DEVICE);
-	tx1 = fsl_dma_prep_memcpy(chan, dma_dest, dma_src, test_size / 2, 0);
-	async_tx_ack(tx1);
-
-	cookie = fsl_dma_tx_submit(tx1);
-	fsl_dma_memcpy_issue_pending(chan);
-	msleep(2);
-
-	if (fsl_dma_is_complete(chan, cookie, NULL, NULL) != DMA_SUCCESS) {
-		dev_err(fsl_chan->dev, "selftest: Time out!\n");
-		err = -ENODEV;
-		goto free_resources;
-	}
-
-	/* Test free and re-alloc channel resources */
-	fsl_dma_free_chan_resources(chan);
-
-	if (fsl_dma_alloc_chan_resources(chan, NULL) < 1) {
-		dev_err(fsl_chan->dev,
-				"selftest: Cannot alloc resources for DMA\n");
-		err = -ENODEV;
-		goto free_resources;
-	}
-
-	/* Continue to test
-	 * TX 2
-	 */
-	dma_src = dma_map_single(fsl_chan->dev, src + test_size / 2,
-					test_size / 4, DMA_TO_DEVICE);
-	dma_dest = dma_map_single(fsl_chan->dev, dest + test_size / 2,
-					test_size / 4, DMA_FROM_DEVICE);
-	tx2 = fsl_dma_prep_memcpy(chan, dma_dest, dma_src, test_size / 4, 0);
-	async_tx_ack(tx2);
-
-	/* TX 3 */
-	dma_src = dma_map_single(fsl_chan->dev, src + test_size * 3 / 4,
-					test_size / 4, DMA_TO_DEVICE);
-	dma_dest = dma_map_single(fsl_chan->dev, dest + test_size * 3 / 4,
-					test_size / 4, DMA_FROM_DEVICE);
-	tx3 = fsl_dma_prep_memcpy(chan, dma_dest, dma_src, test_size / 4, 0);
-	async_tx_ack(tx3);
-
-	/* Interrupt tx test */
-	tx1 = fsl_dma_prep_interrupt(chan, 0);
-	async_tx_ack(tx1);
-	cookie = fsl_dma_tx_submit(tx1);
-
-	/* Test exchanging the prepared tx sort */
-	cookie = fsl_dma_tx_submit(tx3);
-	cookie = fsl_dma_tx_submit(tx2);
-
-	if (dma_has_cap(DMA_INTERRUPT, ((struct fsl_dma_device *)
-	    dev_get_drvdata(fsl_chan->dev->parent))->common.cap_mask)) {
-		tx3->callback = fsl_dma_callback_test;
-		tx3->callback_param = fsl_chan;
-	}
-	fsl_dma_memcpy_issue_pending(chan);
-	msleep(2);
-
-	if (fsl_dma_is_complete(chan, cookie, NULL, NULL) != DMA_SUCCESS) {
-		dev_err(fsl_chan->dev, "selftest: Time out!\n");
-		err = -ENODEV;
-		goto free_resources;
-	}
-
-	err = memcmp(src, dest, test_size);
-	if (err) {
-		for (i = 0; (*(src + i) == *(dest + i)) && (i < test_size);
-				i++);
-		dev_err(fsl_chan->dev, "selftest: Test failed, data %d/%ld is "
-				"error! src 0x%x, dest 0x%x\n",
-				i, (long)test_size, *(src + i), *(dest + i));
-	}
-
-free_resources:
-	fsl_dma_free_chan_resources(chan);
-out:
-	kfree(src);
-	return err;
-}
-
 static int __devinit of_fsl_dma_chan_probe(struct of_device *dev,
 			const struct of_device_id *match)
 {
@@ -1000,17 +874,11 @@ static int __devinit of_fsl_dma_chan_probe(struct of_device *dev,
 		}
 	}
 
-	err = fsl_dma_self_test(new_fsl_chan);
-	if (err)
-		goto err_self_test;
-
 	dev_info(&dev->dev, "#%d (%s), irq %d\n", new_fsl_chan->id,
 				match->compatible, new_fsl_chan->irq);
 
 	return 0;
 
-err_self_test:
-	free_irq(new_fsl_chan->irq, new_fsl_chan);
 err_no_irq:
 	list_del(&new_fsl_chan->common.device_node);
 err_no_chan:
-- 
GitLab


From aa88f169d6fc4305125b6917d9d5f2e08211f011 Mon Sep 17 00:00:00 2001
From: Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
Date: Wed, 24 Sep 2008 11:46:48 +0900
Subject: [PATCH 142/895] sh: ap325rxa: create CPLD data area in mtd

AP320 and AP325RXA has CPLD data in NOR Flash.
If this area erased, this board can not boot.
This patch create CPLD data area and set writeable mask bit.

Signed-off-by: Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/boards/board-ap325rxa.c | 33 +++++++++++++++++++++++----------
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/arch/sh/boards/board-ap325rxa.c b/arch/sh/boards/board-ap325rxa.c
index fd1612590bf4..00e632fc0688 100644
--- a/arch/sh/boards/board-ap325rxa.c
+++ b/arch/sh/boards/board-ap325rxa.c
@@ -52,20 +52,33 @@ static struct platform_device smc9118_device = {
 	},
 };
 
+/*
+ * AP320 and AP325RXA has CPLD data in NOR Flash(0xA80000-0xABFFFF).
+ * If this area erased, this board can not boot.
+ */
 static struct mtd_partition ap325rxa_nor_flash_partitions[] = {
 	{
-		 .name = "uboot",
-		 .offset = 0,
-		 .size = (1 * 1024 * 1024),
-		 .mask_flags = MTD_WRITEABLE,	/* Read-only */
+		.name = "uboot",
+		.offset = 0,
+		.size = (1 * 1024 * 1024),
+		.mask_flags = MTD_WRITEABLE,	/* Read-only */
+	}, {
+		.name = "kernel",
+		.offset = MTDPART_OFS_APPEND,
+		.size = (2 * 1024 * 1024),
+	}, {
+		.name = "free-area0",
+		.offset = MTDPART_OFS_APPEND,
+		.size = ((7 * 1024 * 1024) + (512 * 1024)),
 	}, {
-		 .name = "kernel",
-		 .offset = MTDPART_OFS_APPEND,
-		 .size = (2 * 1024 * 1024),
+		.name = "CPLD-Data",
+		.offset = MTDPART_OFS_APPEND,
+		.mask_flags = MTD_WRITEABLE,	/* Read-only */
+		.size = (1024 * 128 * 2),
 	}, {
-		 .name = "other",
-		 .offset = MTDPART_OFS_APPEND,
-		 .size = MTDPART_SIZ_FULL,
+		.name = "free-area1",
+		.offset = MTDPART_OFS_APPEND,
+		.size = MTDPART_SIZ_FULL,
 	},
 };
 
-- 
GitLab


From 1bec157a1f747d038026efabebdee4c929147b63 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 24 Sep 2008 14:37:35 +0900
Subject: [PATCH 143/895] sh: Force pending restarted system calls to return
 -EINTR.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/signal_32.c |  6 ++++++
 arch/sh/kernel/signal_64.c | 18 ++++++++++++------
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c
index be194d0bd7d2..69d09c0b3498 100644
--- a/arch/sh/kernel/signal_32.c
+++ b/arch/sh/kernel/signal_32.c
@@ -216,6 +216,9 @@ asmlinkage int sys_sigreturn(unsigned long r4, unsigned long r5,
 	sigset_t set;
 	int r0;
 
+        /* Always make any pending restarted system calls return -EINTR */
+	current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
 		goto badframe;
 
@@ -250,6 +253,9 @@ asmlinkage int sys_rt_sigreturn(unsigned long r4, unsigned long r5,
 	sigset_t set;
 	int r0;
 
+	/* Always make any pending restarted system calls return -EINTR */
+	current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
 		goto badframe;
 
diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c
index 37bd381a7aa3..0582ae4739ba 100644
--- a/arch/sh/kernel/signal_64.c
+++ b/arch/sh/kernel/signal_64.c
@@ -375,6 +375,9 @@ asmlinkage int sys_sigreturn(unsigned long r2, unsigned long r3,
 	sigset_t set;
 	long long ret;
 
+	/* Always make any pending restarted system calls return -EINTR */
+	current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
 		goto badframe;
 
@@ -412,6 +415,9 @@ asmlinkage int sys_rt_sigreturn(unsigned long r2, unsigned long r3,
 	stack_t __user st;
 	long long ret;
 
+	/* Always make any pending restarted system calls return -EINTR */
+	current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
 		goto badframe;
 
@@ -539,7 +545,7 @@ static void setup_frame(int sig, struct k_sigaction *ka,
 		 * On SH5 all edited pointers are subject to NEFF
 		 */
 		DEREF_REG_PR = (DEREF_REG_PR & NEFF_SIGN) ?
-        		 	(DEREF_REG_PR | NEFF_MASK) : DEREF_REG_PR;
+			(DEREF_REG_PR | NEFF_MASK) : DEREF_REG_PR;
 	} else {
 		/*
 		 * Different approach on SH5.
@@ -554,7 +560,7 @@ static void setup_frame(int sig, struct k_sigaction *ka,
 		 */
 		DEREF_REG_PR = (unsigned long) frame->retcode | 0x01;
 		DEREF_REG_PR = (DEREF_REG_PR & NEFF_SIGN) ?
-        		 	(DEREF_REG_PR | NEFF_MASK) : DEREF_REG_PR;
+			(DEREF_REG_PR | NEFF_MASK) : DEREF_REG_PR;
 
 		if (__copy_to_user(frame->retcode,
 			(unsigned long long)sa_default_restorer & (~1), 16) != 0)
@@ -570,7 +576,7 @@ static void setup_frame(int sig, struct k_sigaction *ka,
 	 */
 	regs->regs[REG_SP] = (unsigned long) frame;
 	regs->regs[REG_SP] = (regs->regs[REG_SP] & NEFF_SIGN) ?
-        		 (regs->regs[REG_SP] | NEFF_MASK) : regs->regs[REG_SP];
+		 (regs->regs[REG_SP] | NEFF_MASK) : regs->regs[REG_SP];
 	regs->regs[REG_ARG1] = signal; /* Arg for signal handler */
 
         /* FIXME:
@@ -656,7 +662,7 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 		 * On SH5 all edited pointers are subject to NEFF
 		 */
 		DEREF_REG_PR = (DEREF_REG_PR & NEFF_SIGN) ?
-        		 	(DEREF_REG_PR | NEFF_MASK) : DEREF_REG_PR;
+			(DEREF_REG_PR | NEFF_MASK) : DEREF_REG_PR;
 	} else {
 		/*
 		 * Different approach on SH5.
@@ -672,7 +678,7 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 
 		DEREF_REG_PR = (unsigned long) frame->retcode | 0x01;
 		DEREF_REG_PR = (DEREF_REG_PR & NEFF_SIGN) ?
-        		 	(DEREF_REG_PR | NEFF_MASK) : DEREF_REG_PR;
+			(DEREF_REG_PR | NEFF_MASK) : DEREF_REG_PR;
 
 		if (__copy_to_user(frame->retcode,
 			(unsigned long long)sa_default_rt_restorer & (~1), 16) != 0)
@@ -687,7 +693,7 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	 */
 	regs->regs[REG_SP] = (unsigned long) frame;
 	regs->regs[REG_SP] = (regs->regs[REG_SP] & NEFF_SIGN) ?
-        		 (regs->regs[REG_SP] | NEFF_MASK) : regs->regs[REG_SP];
+		 (regs->regs[REG_SP] | NEFF_MASK) : regs->regs[REG_SP];
 	regs->regs[REG_ARG1] = signal; /* Arg for signal handler */
 	regs->regs[REG_ARG2] = (unsigned long long)(unsigned long)(signed long)&frame->info;
 	regs->regs[REG_ARG3] = (unsigned long long)(unsigned long)(signed long)&frame->uc.uc_mcontext;
-- 
GitLab


From 4aa7361179bed905fd0f35b236a5c65db683b9e0 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Mon, 22 Sep 2008 14:42:46 -0700
Subject: [PATCH 144/895] posix-timers: don't switch to ->group_leader if
 ->it_process dies

posix_timer_event() drops SIGEV_THREAD_ID and switches to ->group_leader
if send_sigqueue() fails.

This is not very useful and doesn't work reliably.  send_sigqueue() can
only fail if ->it_process is dead.  But it can die before it dequeues the
SI_TIMER signal, in that case the timer stops anyway.

Remove this code.  I guess it was needed a long ago to ensure that the
timer is not destroyed when when its creator thread dies.

Q: perhaps it makes sense to change sys_timer_settime() to return an error
if ->it_process is dead?

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: mingo@elte.hu
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/posix-timers.c | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index e36d5798cbff..3dfd15aecc60 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -298,6 +298,7 @@ void do_schedule_next_timer(struct siginfo *info)
 
 int posix_timer_event(struct k_itimer *timr, int si_private)
 {
+	int shared, ret;
 	/*
 	 * FIXME: if ->sigq is queued we can race with
 	 * dequeue_signal()->do_schedule_next_timer().
@@ -316,20 +317,10 @@ int posix_timer_event(struct k_itimer *timr, int si_private)
 	timr->sigq->info.si_tid = timr->it_id;
 	timr->sigq->info.si_value = timr->it_sigev_value;
 
-	if (timr->it_sigev_notify & SIGEV_THREAD_ID) {
-		struct task_struct *leader;
-		int ret = send_sigqueue(timr->sigq, timr->it_process, 0);
-
-		if (likely(ret >= 0))
-			return ret;
-
-		timr->it_sigev_notify = SIGEV_SIGNAL;
-		leader = timr->it_process->group_leader;
-		put_task_struct(timr->it_process);
-		timr->it_process = leader;
-	}
-
-	return send_sigqueue(timr->sigq, timr->it_process, 1);
+	shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID);
+	ret = send_sigqueue(timr->sigq, timr->it_process, shared);
+	/* If we failed to send the signal the timer stops. */
+	return ret > 0;
 }
 EXPORT_SYMBOL_GPL(posix_timer_event);
 
-- 
GitLab


From 918fc0372831dca73039e1577bfea0c2ce49bdb6 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Mon, 22 Sep 2008 14:42:46 -0700
Subject: [PATCH 145/895] posix-timers: always do
 get_task_struct(timer->it_process)

Change the code to get/put timer->it_process regardless of
SIGEV_THREAD_ID.  This streamlines the create/destroy paths and allows us
to simplify the usage of exit_itimers() in de_thread().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: mingo@elte.hu
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/posix-timers.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 3dfd15aecc60..bd9c931b3659 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -540,11 +540,10 @@ sys_timer_create(const clockid_t which_clock,
 			 */
 			spin_lock_irqsave(&process->sighand->siglock, flags);
 			if (!(process->flags & PF_EXITING)) {
+				get_task_struct(process);
 				new_timer->it_process = process;
 				list_add(&new_timer->list,
 					 &process->signal->posix_timers);
-				if (new_timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
-					get_task_struct(process);
 				spin_unlock_irqrestore(&process->sighand->siglock, flags);
 			} else {
 				spin_unlock_irqrestore(&process->sighand->siglock, flags);
@@ -561,6 +560,7 @@ sys_timer_create(const clockid_t which_clock,
 		new_timer->it_sigev_signo = SIGALRM;
 		new_timer->it_sigev_value.sival_int = new_timer->it_id;
 		process = current->group_leader;
+		get_task_struct(process);
 		spin_lock_irqsave(&process->sighand->siglock, flags);
 		new_timer->it_process = process;
 		list_add(&new_timer->list, &process->signal->posix_timers);
@@ -853,8 +853,7 @@ retry_delete:
 	 * This keeps any tasks waiting on the spin lock from thinking
 	 * they got something (see the lock code above).
 	 */
-	if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
-		put_task_struct(timer->it_process);
+	put_task_struct(timer->it_process);
 	timer->it_process = NULL;
 
 	unlock_timer(timer, flags);
@@ -881,8 +880,7 @@ retry_delete:
 	 * This keeps any tasks waiting on the spin lock from thinking
 	 * they got something (see the lock code above).
 	 */
-	if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
-		put_task_struct(timer->it_process);
+	put_task_struct(timer->it_process);
 	timer->it_process = NULL;
 
 	unlock_timer(timer, flags);
-- 
GitLab


From 2cd499e38ec241691e4bce50bddc8f57e4cc9bd0 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Mon, 22 Sep 2008 14:42:47 -0700
Subject: [PATCH 146/895] posix-timers: sys_timer_create: remove the buggy
 PF_EXITING check

sys_timer_create() return -EINVAL if the target thread has PF_EXITING.

This doesn't really make sense, the sub-thread can die right after unlock.
And in fact, this is just wrong.  Without SIGEV_THREAD_ID good_sigevent()
returns ->group_leader, and it is very possible that the leader is already
dead.  This is OK, we shouldn't return the error in this case.

Remove this check and the comment.  Note that the "process" was found
under tasklist_lock, it must have ->sighand != NULL.

Also, remove a couple of unneeded initializations.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: mingo@elte.hu
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/posix-timers.c | 34 +++++++---------------------------
 1 file changed, 7 insertions(+), 27 deletions(-)

diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index bd9c931b3659..60b262051d1d 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -460,9 +460,9 @@ sys_timer_create(const clockid_t which_clock,
 		 timer_t __user * created_timer_id)
 {
 	int error = 0;
-	struct k_itimer *new_timer = NULL;
+	struct k_itimer *new_timer;
 	int new_timer_id;
-	struct task_struct *process = NULL;
+	struct task_struct *process;
 	unsigned long flags;
 	sigevent_t event;
 	int it_id_set = IT_ID_NOT_SET;
@@ -523,32 +523,12 @@ sys_timer_create(const clockid_t which_clock,
 
 		read_lock(&tasklist_lock);
 		if ((process = good_sigevent(&event))) {
-			/*
-			 * We may be setting up this process for another
-			 * thread.  It may be exiting.  To catch this
-			 * case the we check the PF_EXITING flag.  If
-			 * the flag is not set, the siglock will catch
-			 * him before it is too late (in exit_itimers).
-			 *
-			 * The exec case is a bit more invloved but easy
-			 * to code.  If the process is in our thread
-			 * group (and it must be or we would not allow
-			 * it here) and is doing an exec, it will cause
-			 * us to be killed.  In this case it will wait
-			 * for us to die which means we can finish this
-			 * linkage with our last gasp. I.e. no code :)
-			 */
+			get_task_struct(process);
 			spin_lock_irqsave(&process->sighand->siglock, flags);
-			if (!(process->flags & PF_EXITING)) {
-				get_task_struct(process);
-				new_timer->it_process = process;
-				list_add(&new_timer->list,
-					 &process->signal->posix_timers);
-				spin_unlock_irqrestore(&process->sighand->siglock, flags);
-			} else {
-				spin_unlock_irqrestore(&process->sighand->siglock, flags);
-				process = NULL;
-			}
+			new_timer->it_process = process;
+			list_add(&new_timer->list,
+				&process->signal->posix_timers);
+			spin_unlock_irqrestore(&process->sighand->siglock, flags);
 		}
 		read_unlock(&tasklist_lock);
 		if (!process) {
-- 
GitLab


From 36b2f046000b358b62b9d116cb10a2b1c5be5cbf Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Mon, 22 Sep 2008 14:42:48 -0700
Subject: [PATCH 147/895] posix-timers: sys_timer_create: simplify and
 s/tasklist/rcu/

- Change the code to do rcu_read_lock() instead of taking tasklist_lock,
  it is safe to get_task_struct(p) if p was found under RCU.

  However, now we must not use process's sighand/signal, they may be NULL.
  We can use current->sighand/signal instead, this "process" must belong
  to the current's thread-group.

- Factor out the common code for 2 "if (timer_event_spec)" branches, the
  !timer_event_spec case can use current too.

- use spin_lock_irq() instead of _irqsave(), kill "flags".

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: mingo@elte.hu
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/posix-timers.c | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 60b262051d1d..5b761903b49a 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -463,7 +463,6 @@ sys_timer_create(const clockid_t which_clock,
 	struct k_itimer *new_timer;
 	int new_timer_id;
 	struct task_struct *process;
-	unsigned long flags;
 	sigevent_t event;
 	int it_id_set = IT_ID_NOT_SET;
 
@@ -521,16 +520,11 @@ sys_timer_create(const clockid_t which_clock,
 		new_timer->it_sigev_signo = event.sigev_signo;
 		new_timer->it_sigev_value = event.sigev_value;
 
-		read_lock(&tasklist_lock);
-		if ((process = good_sigevent(&event))) {
+		rcu_read_lock();
+		process = good_sigevent(&event);
+		if (process)
 			get_task_struct(process);
-			spin_lock_irqsave(&process->sighand->siglock, flags);
-			new_timer->it_process = process;
-			list_add(&new_timer->list,
-				&process->signal->posix_timers);
-			spin_unlock_irqrestore(&process->sighand->siglock, flags);
-		}
-		read_unlock(&tasklist_lock);
+		rcu_read_unlock();
 		if (!process) {
 			error = -EINVAL;
 			goto out;
@@ -541,19 +535,18 @@ sys_timer_create(const clockid_t which_clock,
 		new_timer->it_sigev_value.sival_int = new_timer->it_id;
 		process = current->group_leader;
 		get_task_struct(process);
-		spin_lock_irqsave(&process->sighand->siglock, flags);
-		new_timer->it_process = process;
-		list_add(&new_timer->list, &process->signal->posix_timers);
-		spin_unlock_irqrestore(&process->sighand->siglock, flags);
 	}
 
+	spin_lock_irq(&current->sighand->siglock);
+	new_timer->it_process = process;
+	list_add(&new_timer->list, &current->signal->posix_timers);
+	spin_unlock_irq(&current->sighand->siglock);
  	/*
 	 * In the case of the timer belonging to another task, after
 	 * the task is unlocked, the timer is owned by the other task
 	 * and may cease to exist at any time.  Don't use or modify
 	 * new_timer after the unlock call.
 	 */
-
 out:
 	if (error)
 		release_posix_timer(new_timer, it_id_set);
-- 
GitLab


From 717835d94d3e3d343a302df0a3cb9405887c3e2a Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Mon, 22 Sep 2008 14:42:49 -0700
Subject: [PATCH 148/895] posix-timers: move the initialization of timer->sigq
 from send to create path

posix_timer_event() always populates timer->sigq with the same numbers,
move this code into sys_timer_create().

Note that with this patch we can kill it_sigev_signo and it_sigev_value.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: mingo@elte.hu
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/posix-timers.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 5b761903b49a..c459b29efdd4 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -312,11 +312,6 @@ int posix_timer_event(struct k_itimer *timr, int si_private)
 	 */
 	timr->sigq->info.si_sys_private = si_private;
 
-	timr->sigq->info.si_signo = timr->it_sigev_signo;
-	timr->sigq->info.si_code = SI_TIMER;
-	timr->sigq->info.si_tid = timr->it_id;
-	timr->sigq->info.si_value = timr->it_sigev_value;
-
 	shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID);
 	ret = send_sigqueue(timr->sigq, timr->it_process, shared);
 	/* If we failed to send the signal the timer stops. */
@@ -537,6 +532,11 @@ sys_timer_create(const clockid_t which_clock,
 		get_task_struct(process);
 	}
 
+	new_timer->sigq->info.si_code  = SI_TIMER;
+	new_timer->sigq->info.si_tid   = new_timer->it_id;
+	new_timer->sigq->info.si_signo = new_timer->it_sigev_signo;
+	new_timer->sigq->info.si_value = new_timer->it_sigev_value;
+
 	spin_lock_irq(&current->sighand->siglock);
 	new_timer->it_process = process;
 	list_add(&new_timer->list, &current->signal->posix_timers);
-- 
GitLab


From ef864c958801768fb28bd3603cd0b098b394671c Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Mon, 22 Sep 2008 14:42:49 -0700
Subject: [PATCH 149/895] posix-timers: sys_timer_create: cleanup the error
 handling

Cleanup.

- sys_timer_create() is big and complicated. The code above the "out:"
  label relies on the fact that "error" must be == 0. This is not very
  robust, make the code more explicit. Remove the unneeded initialization
  of error.

- If idr_get_new() succeeds (as it normally should), we check the returned
  value twice. Move the "-EAGAIN" check under "if (error)".

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: mingo@elte.hu
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/posix-timers.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index c459b29efdd4..7be385fe4eca 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -454,9 +454,8 @@ sys_timer_create(const clockid_t which_clock,
 		 struct sigevent __user *timer_event_spec,
 		 timer_t __user * created_timer_id)
 {
-	int error = 0;
 	struct k_itimer *new_timer;
-	int new_timer_id;
+	int error, new_timer_id;
 	struct task_struct *process;
 	sigevent_t event;
 	int it_id_set = IT_ID_NOT_SET;
@@ -478,9 +477,9 @@ sys_timer_create(const clockid_t which_clock,
 	error = idr_get_new(&posix_timers_id, (void *) new_timer,
 			    &new_timer_id);
 	spin_unlock_irq(&idr_lock);
-	if (error == -EAGAIN)
-		goto retry;
-	else if (error) {
+	if (error) {
+		if (error == -EAGAIN)
+			goto retry;
 		/*
 		 * Weird looking, but we return EAGAIN if the IDR is
 		 * full (proper POSIX return value for this)
@@ -541,6 +540,8 @@ sys_timer_create(const clockid_t which_clock,
 	new_timer->it_process = process;
 	list_add(&new_timer->list, &current->signal->posix_timers);
 	spin_unlock_irq(&current->sighand->siglock);
+
+	return 0;
  	/*
 	 * In the case of the timer belonging to another task, after
 	 * the task is unlocked, the timer is owned by the other task
@@ -548,9 +549,7 @@ sys_timer_create(const clockid_t which_clock,
 	 * new_timer after the unlock call.
 	 */
 out:
-	if (error)
-		release_posix_timer(new_timer, it_id_set);
-
+	release_posix_timer(new_timer, it_id_set);
 	return error;
 }
 
-- 
GitLab


From 5a9fa73072854981a5c05eb7ba18a96d49c2804f Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Mon, 22 Sep 2008 14:42:50 -0700
Subject: [PATCH 150/895] posix-timers: kill ->it_sigev_signo and
 ->it_sigev_value

With the recent changes ->it_sigev_signo and ->it_sigev_value are only
used in sys_timer_create(), kill them.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: mingo@elte.hu
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/posix-timers.h |  2 --
 kernel/posix-timers.c        | 17 +++++++----------
 2 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index f9d8e9e94e9b..a7c721355549 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -45,8 +45,6 @@ struct k_itimer {
 	int it_requeue_pending;		/* waiting to requeue this timer */
 #define REQUEUE_PENDING 1
 	int it_sigev_notify;		/* notify word of sigevent struct */
-	int it_sigev_signo;		/* signo word of sigevent struct */
-	sigval_t it_sigev_value;	/* value word of sigevent struct */
 	struct task_struct *it_process;	/* process to send signal to */
 	struct sigqueue *sigq;		/* signal queue entry. */
 	union {
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 7be385fe4eca..3eff47b0d8d5 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -510,10 +510,6 @@ sys_timer_create(const clockid_t which_clock,
 			error = -EFAULT;
 			goto out;
 		}
-		new_timer->it_sigev_notify = event.sigev_notify;
-		new_timer->it_sigev_signo = event.sigev_signo;
-		new_timer->it_sigev_value = event.sigev_value;
-
 		rcu_read_lock();
 		process = good_sigevent(&event);
 		if (process)
@@ -524,17 +520,18 @@ sys_timer_create(const clockid_t which_clock,
 			goto out;
 		}
 	} else {
-		new_timer->it_sigev_notify = SIGEV_SIGNAL;
-		new_timer->it_sigev_signo = SIGALRM;
-		new_timer->it_sigev_value.sival_int = new_timer->it_id;
+		event.sigev_notify = SIGEV_SIGNAL;
+		event.sigev_signo = SIGALRM;
+		event.sigev_value.sival_int = new_timer->it_id;
 		process = current->group_leader;
 		get_task_struct(process);
 	}
 
-	new_timer->sigq->info.si_code  = SI_TIMER;
+	new_timer->it_sigev_notify     = event.sigev_notify;
+	new_timer->sigq->info.si_signo = event.sigev_signo;
+	new_timer->sigq->info.si_value = event.sigev_value;
 	new_timer->sigq->info.si_tid   = new_timer->it_id;
-	new_timer->sigq->info.si_signo = new_timer->it_sigev_signo;
-	new_timer->sigq->info.si_value = new_timer->it_sigev_value;
+	new_timer->sigq->info.si_code  = SI_TIMER;
 
 	spin_lock_irq(&current->sighand->siglock);
 	new_timer->it_process = process;
-- 
GitLab


From 5a51b713ccf8835d5adf7217e2f86eb12b1ca851 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Mon, 22 Sep 2008 14:42:51 -0700
Subject: [PATCH 151/895] posix-timers: lock_timer: kill the bogus ->it_id
 check

lock_timer() checks that the timer found by idr_find(timer_id) has ->it_id
== timer_id.  This buys nothing.  This check can fail only if
sys_timer_create() unlocked idr_lock after idr_get_new(), but didn't set
->it_id = new_timer_id yet.  But in that case ->it_process == NULL so
lock_timer() can't succeed anyway.

Also remove a couple of unneeded typecasts.

Note that with or without this patch we have a small problem.
sys_timer_create() doesn't ensure that the result of setting (say)
->it_sigev_notify must be visible if lock_timer() succeeds.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: mingo@elte.hu
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/posix-timers.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 3eff47b0d8d5..7185f05d53a9 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -474,8 +474,7 @@ sys_timer_create(const clockid_t which_clock,
 		goto out;
 	}
 	spin_lock_irq(&idr_lock);
-	error = idr_get_new(&posix_timers_id, (void *) new_timer,
-			    &new_timer_id);
+	error = idr_get_new(&posix_timers_id, new_timer, &new_timer_id);
 	spin_unlock_irq(&idr_lock);
 	if (error) {
 		if (error == -EAGAIN)
@@ -567,12 +566,12 @@ static struct k_itimer * lock_timer(timer_t timer_id, unsigned long *flags)
 	 */
 
 	spin_lock_irqsave(&idr_lock, *flags);
-	timr = (struct k_itimer *) idr_find(&posix_timers_id, (int) timer_id);
+	timr = idr_find(&posix_timers_id, (int) timer_id);
 	if (timr) {
 		spin_lock(&timr->it_lock);
 
-		if ((timr->it_id != timer_id) || !(timr->it_process) ||
-				!same_thread_group(timr->it_process, current)) {
+		if (!timr->it_process ||
+		    !same_thread_group(timr->it_process, current)) {
 			spin_unlock(&timr->it_lock);
 			spin_unlock_irqrestore(&idr_lock, *flags);
 			timr = NULL;
-- 
GitLab


From 31d9284569e38fb97117497af3e8047a6a3c86f0 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Mon, 22 Sep 2008 14:42:51 -0700
Subject: [PATCH 152/895] posix-timers: lock_timer: make it readable

Cleanup.  Imho makes the code much more understandable.  At least this
patch lessens both the source and compiled code.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: mingo@elte.hu
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/posix-timers.c | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 7185f05d53a9..95451bf7d2eb 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -556,7 +556,7 @@ out:
  * the find to the timer lock.  To avoid a dead lock, the timer id MUST
  * be release with out holding the timer lock.
  */
-static struct k_itimer * lock_timer(timer_t timer_id, unsigned long *flags)
+static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags)
 {
 	struct k_itimer *timr;
 	/*
@@ -564,23 +564,20 @@ static struct k_itimer * lock_timer(timer_t timer_id, unsigned long *flags)
 	 * flags part over to the timer lock.  Must not let interrupts in
 	 * while we are moving the lock.
 	 */
-
 	spin_lock_irqsave(&idr_lock, *flags);
-	timr = idr_find(&posix_timers_id, (int) timer_id);
+	timr = idr_find(&posix_timers_id, (int)timer_id);
 	if (timr) {
 		spin_lock(&timr->it_lock);
-
-		if (!timr->it_process ||
-		    !same_thread_group(timr->it_process, current)) {
-			spin_unlock(&timr->it_lock);
-			spin_unlock_irqrestore(&idr_lock, *flags);
-			timr = NULL;
-		} else
+		if (timr->it_process &&
+		    same_thread_group(timr->it_process, current)) {
 			spin_unlock(&idr_lock);
-	} else
-		spin_unlock_irqrestore(&idr_lock, *flags);
+			return timr;
+		}
+		spin_unlock(&timr->it_lock);
+	}
+	spin_unlock_irqrestore(&idr_lock, *flags);
 
-	return timr;
+	return NULL;
 }
 
 /*
-- 
GitLab


From 1b02469088ac7a13d7e622b618b7410d0f1ce5ec Mon Sep 17 00:00:00 2001
From: Richard Kennedy <richard@rsk.demon.co.uk>
Date: Mon, 22 Sep 2008 14:42:43 -0700
Subject: [PATCH 153/895] hrtimer: reorder struct hrtimer to save 8 bytes on
 64bit builds

reorder struct hrtimer to save 8 bytes on 64 bit builds when
CONFIG_TIMER_STATS selected.  (also removes 8 bytes from signal_struct)

Signed-off-by: Richard Kennedy <richard@rsk.demon.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 68b0196d8696..8730b60c9432 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -115,12 +115,12 @@ struct hrtimer {
 	enum hrtimer_restart		(*function)(struct hrtimer *);
 	struct hrtimer_clock_base	*base;
 	unsigned long			state;
-	enum hrtimer_cb_mode		cb_mode;
 	struct list_head		cb_entry;
+	enum hrtimer_cb_mode		cb_mode;
 #ifdef CONFIG_TIMER_STATS
+	int				start_pid;
 	void				*start_site;
 	char				start_comm[16];
-	int				start_pid;
 #endif
 };
 
-- 
GitLab


From eb3f938fd6292dc79f43a5fe14784b044776e9f0 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@linux-mips.org>
Date: Mon, 22 Sep 2008 14:42:40 -0700
Subject: [PATCH 154/895] ntp: let update_persistent_clock() sleep

This is a change that makes the 11-minute RTC update be run in the process
context.  This is so that update_persistent_clock() can sleep, which may
be required for certain types of RTC hardware -- most notably I2C devices.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Cc: Roman Zippel <zippel@linux-m68k.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: David Brownell <david-b@pacbell.net>
Acked-by: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/ntp.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index c6921aa1a42a..450a45cb01c1 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -10,13 +10,13 @@
 
 #include <linux/mm.h>
 #include <linux/time.h>
-#include <linux/timer.h>
 #include <linux/timex.h>
 #include <linux/jiffies.h>
 #include <linux/hrtimer.h>
 #include <linux/capability.h>
 #include <linux/math64.h>
 #include <linux/clocksource.h>
+#include <linux/workqueue.h>
 #include <asm/timex.h>
 
 /*
@@ -218,11 +218,11 @@ void second_overflow(void)
 /* Disable the cmos update - used by virtualization and embedded */
 int no_sync_cmos_clock  __read_mostly;
 
-static void sync_cmos_clock(unsigned long dummy);
+static void sync_cmos_clock(struct work_struct *work);
 
-static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0);
+static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock);
 
-static void sync_cmos_clock(unsigned long dummy)
+static void sync_cmos_clock(struct work_struct *work)
 {
 	struct timespec now, next;
 	int fail = 1;
@@ -258,13 +258,13 @@ static void sync_cmos_clock(unsigned long dummy)
 		next.tv_sec++;
 		next.tv_nsec -= NSEC_PER_SEC;
 	}
-	mod_timer(&sync_cmos_timer, jiffies + timespec_to_jiffies(&next));
+	schedule_delayed_work(&sync_cmos_work, timespec_to_jiffies(&next));
 }
 
 static void notify_cmos_timer(void)
 {
 	if (!no_sync_cmos_clock)
-		mod_timer(&sync_cmos_timer, jiffies + 1);
+		schedule_delayed_work(&sync_cmos_work, 0);
 }
 
 #else
-- 
GitLab


From 5cd1c9c5cf30d4b33df3d3f74d8142f278d536b7 Mon Sep 17 00:00:00 2001
From: Roman Zippel <zippel@linux-m68k.org>
Date: Mon, 22 Sep 2008 14:42:43 -0700
Subject: [PATCH 155/895] timekeeping: fix rounding problem during clock update

Due to a rounding problem during a clock update it's possible for readers
to observe the clock jumping back by 1nsec.  The following simplified
example demonstrates the problem:

cycle	xtime
0	0
1000	999999.6
2000	1999999.2
3000	2999998.8
...

1500 =	1499999.4
=	0.0 + 1499999.4
=	999999.6 + 499999.8

When reading the clock only the full nanosecond part is used, while
timekeeping internally keeps nanosecond fractions.  If the clock is now
updated at cycle 1500 here, a nanosecond is missing due to the truncation.

The simple fix is to round up the xtime value during the update, this also
changes the distance to the reference time, but the adjustment will
automatically take care that it stays under control.

Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/timekeeping.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index e91c29f961c9..5ecbfc39a268 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -454,7 +454,7 @@ void update_wall_time(void)
 #else
 	offset = clock->cycle_interval;
 #endif
-	clock->xtime_nsec += (s64)xtime.tv_nsec << clock->shift;
+	clock->xtime_nsec = (s64)xtime.tv_nsec << clock->shift;
 
 	/* normally this loop will run just once, however in the
 	 * case of lost or late ticks, it will accumulate correctly.
@@ -479,9 +479,12 @@ void update_wall_time(void)
 	/* correct the clock when NTP error is too big */
 	clocksource_adjust(offset);
 
-	/* store full nanoseconds into xtime */
-	xtime.tv_nsec = (s64)clock->xtime_nsec >> clock->shift;
+	/* store full nanoseconds into xtime after rounding it up and
+	 * add the remainder to the error difference.
+	 */
+	xtime.tv_nsec = ((s64)clock->xtime_nsec >> clock->shift) + 1;
 	clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift;
+	clock->error += clock->xtime_nsec << (NTP_SCALE_SHIFT - clock->shift);
 
 	update_xtime_cache(cyc2ns(clock, offset));
 
-- 
GitLab


From d40e944c25fb4642adb2a4c580a48218a9f3f824 Mon Sep 17 00:00:00 2001
From: Roman Zippel <zippel@linux-m68k.org>
Date: Mon, 22 Sep 2008 14:42:44 -0700
Subject: [PATCH 156/895] ntp: improve adjtimex frequency rounding

Change PPM_SCALE_INV_SHIFT so that it doesn't throw away any input bits
(19 is the amount of the factor 2 in PPM_SCALE), the output frequency
can then be calculated back to its input value, as the inverse divide
produce a slightly larger value, which is then correctly rounded by the
final shift.

Reported-by: Martin Ziegler <ziegler@uni-freiburg.de>
Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Cc: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/timex.h | 2 +-
 kernel/time/ntp.c     | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/include/linux/timex.h b/include/linux/timex.h
index c00bcdd3ae42..9007313b5b71 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -82,7 +82,7 @@
  */
 #define SHIFT_USEC 16		/* frequency offset scale (shift) */
 #define PPM_SCALE (NSEC_PER_USEC << (NTP_SCALE_SHIFT - SHIFT_USEC))
-#define PPM_SCALE_INV_SHIFT 20
+#define PPM_SCALE_INV_SHIFT 19
 #define PPM_SCALE_INV ((1ll << (PPM_SCALE_INV_SHIFT + NTP_SCALE_SHIFT)) / \
 		       PPM_SCALE + 1)
 
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 450a45cb01c1..ddb0465a6baa 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -406,9 +406,8 @@ adj_done:
 	if (time_status & (STA_UNSYNC|STA_CLOCKERR))
 		result = TIME_ERROR;
 
-	txc->freq	   = shift_right((s32)(time_freq >> PPM_SCALE_INV_SHIFT) *
-					 (s64)PPM_SCALE_INV,
-					 NTP_SCALE_SHIFT);
+	txc->freq	   = shift_right((time_freq >> PPM_SCALE_INV_SHIFT) *
+					 (s64)PPM_SCALE_INV, NTP_SCALE_SHIFT);
 	txc->maxerror	   = time_maxerror;
 	txc->esterror	   = time_esterror;
 	txc->status	   = time_status;
-- 
GitLab


From 6a9037887ccea92152b034edeb15d453d1a98555 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dbaryshkov@gmail.com>
Date: Mon, 22 Sep 2008 14:06:01 -0700
Subject: [PATCH 157/895] power_supply: fix dependency of tosa_battery

tosa_battery should also depend on wm97xx_ts as it uses dac-accessing
functions from that module.

Signed-off-by: Dmitry Baryshkov <dbaryshkov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index b2bd104b9869..ae095a498db4 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -51,7 +51,7 @@ config BATTERY_OLPC
 
 config BATTERY_TOSA
 	tristate "Sharp SL-6000 (tosa) battery"
-	depends on MACH_TOSA && MFD_TC6393XB
+	depends on MACH_TOSA && MFD_TC6393XB && TOUCHSCREEN_WM97XX
 	help
 	  Say Y to enable support for the battery on the Sharp Zaurus
 	  SL-6000 (tosa) models.
-- 
GitLab


From 8aef7e8f8de2d900da892085edbf14ea35fe6881 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Mon, 22 Sep 2008 14:53:50 -0700
Subject: [PATCH 158/895] bq27x00_battery: use unaligned access helper

Remove hand-rolled get_unaligned_be16, this points to a possible bug as
bq27x00_read does another endian byteswap which sparse notices:

drivers/power/bq27x00_battery.c:81:14: warning: cast to restricted __be16
Which should probably be checked.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Cc: Rodolfo Giometti <giometti@linux.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/bq27x00_battery.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/power/bq27x00_battery.c b/drivers/power/bq27x00_battery.c
index 62d4948e8206..0c056fcc01ce 100644
--- a/drivers/power/bq27x00_battery.c
+++ b/drivers/power/bq27x00_battery.c
@@ -23,8 +23,8 @@
 #include <linux/platform_device.h>
 #include <linux/power_supply.h>
 #include <linux/idr.h>
-
 #include <linux/i2c.h>
+#include <asm/unaligned.h>
 
 #define DRIVER_VERSION			"1.0.0"
 
@@ -33,7 +33,6 @@
 #define BQ27x00_REG_RSOC		0x0B /* Relative State-of-Charge */
 #define BQ27x00_REG_AI			0x14
 #define BQ27x00_REG_FLAGS		0x0A
-#define HIGH_BYTE(A)			((A) << 8)
 
 /* If the system has several batteries we need a different name for each
  * of them...
@@ -239,7 +238,7 @@ static int bq27200_read(u8 reg, int *rt_value, int b_single,
 		err = i2c_transfer(client->adapter, msg, 1);
 		if (err >= 0) {
 			if (!b_single)
-				*rt_value = data[1] | HIGH_BYTE(data[0]);
+				*rt_value = get_unaligned_be16(data);
 			else
 				*rt_value = data[0];
 
-- 
GitLab


From 0e4a008a4f389b468cfe8b58c7d77882a6e25695 Mon Sep 17 00:00:00 2001
From: Julien Brunel <brunel@diku.dk>
Date: Fri, 26 Sep 2008 15:27:25 +0200
Subject: [PATCH 159/895] UBI: fix IS_ERR test

In case of error, the function add_volume returns an ERR pointer. The
result of IS_ERR, which is supposed to be used in a test as it is, is
here checked to be less than zero, which seems odd. We suggest to
replace this test by a simple IS_ERR test.

A simplified version of the semantic match that finds this problem is
as follows:
(http://www.emn.fr/x-info/coccinelle/)

// <smpl>
@def0@
expression x;
position p0;
@@
x@p0 = add_volume(...)

@protected@
expression def0.x,E;
position def0.p0;
position p;
statement S;
@@
x@p0
... when != x = E
if (!IS_ERR(x) && ...) {<... x@p ...>} else S

@unprotected@
expression def0.x,E;
identifier fld;
position def0.p0;
position p != protected.p;
@@
x@p0
... when != x = E
* x@p->fld
// </smpl>

Signed-off-by:  Julien Brunel <brunel@diku.dk>
Signed-off-by:  Julia Lawall <julia@diku.dk>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/scan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c
index 967bb4406df9..4f2daa5bbecf 100644
--- a/drivers/mtd/ubi/scan.c
+++ b/drivers/mtd/ubi/scan.c
@@ -387,7 +387,7 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si,
 		pnum, vol_id, lnum, ec, sqnum, bitflips);
 
 	sv = add_volume(si, vol_id, pnum, vid_hdr);
-	if (IS_ERR(sv) < 0)
+	if (IS_ERR(sv))
 		return PTR_ERR(sv);
 
 	if (si->max_sqnum < sqnum)
-- 
GitLab


From 77cd62e8082b9743b59ee1946a4c3ee2e3cd2bce Mon Sep 17 00:00:00 2001
From: Timur Tabi <timur@freescale.com>
Date: Fri, 26 Sep 2008 17:00:11 -0700
Subject: [PATCH 160/895] fsldma: allow Freescale Elo DMA driver to be compiled
 as a module

Modify the Freescale Elo / Elo Plus DMA driver so that it can be compiled as
a module.

The primary change is to stop treating the DMA controller as a bus, and the
DMA channels as devices on the bus.  This is because the Open Firmware (OF)
kernel code does not allow busses to be removed, so although we can call
of_platform_bus_probe() to probe the DMA channels, there is no
of_platform_bus_remove().  Instead, the DMA channels are manually probed,
similar to what fsl_elbc_nand.c does.

Cc: Scott Wood <scottwood@freescale.com>
Acked-by: Li Yang <leoli@freescale.com>
Signed-off-by: Timur Tabi <timur@freescale.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/Kconfig  |  10 ++--
 drivers/dma/fsldma.c | 138 +++++++++++++++++++++++++++----------------
 drivers/dma/fsldma.h |   1 +
 3 files changed, 94 insertions(+), 55 deletions(-)

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index cd303901eb5b..904e57558bb5 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -48,13 +48,13 @@ config DW_DMAC
 	  can be integrated in chips such as the Atmel AT32ap7000.
 
 config FSL_DMA
-	bool "Freescale MPC85xx/MPC83xx DMA support"
-	depends on PPC
+	tristate "Freescale Elo and Elo Plus DMA support"
+	depends on FSL_SOC
 	select DMA_ENGINE
 	---help---
-	  Enable support for the Freescale DMA engine. Now, it support
-	  MPC8560/40, MPC8555, MPC8548 and MPC8641 processors.
-	  The MPC8349, MPC8360 is also supported.
+	  Enable support for the Freescale Elo and Elo Plus DMA controllers.
+	  The Elo is the DMA controller on some 82xx and 83xx parts, and the
+	  Elo Plus is the DMA controller on 85xx and 86xx parts.
 
 config MV_XOR
 	bool "Marvell XOR engine support"
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index e9b263897c03..0b95dcce447e 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -370,7 +370,10 @@ static int fsl_dma_alloc_chan_resources(struct dma_chan *chan,
 					struct dma_client *client)
 {
 	struct fsl_dma_chan *fsl_chan = to_fsl_chan(chan);
-	LIST_HEAD(tmp_list);
+
+	/* Has this channel already been allocated? */
+	if (fsl_chan->desc_pool)
+		return 1;
 
 	/* We need the descriptor to be aligned to 32bytes
 	 * for meeting FSL DMA specification requirement.
@@ -410,6 +413,8 @@ static void fsl_dma_free_chan_resources(struct dma_chan *chan)
 	}
 	spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
 	dma_pool_destroy(fsl_chan->desc_pool);
+
+	fsl_chan->desc_pool = NULL;
 }
 
 static struct dma_async_tx_descriptor *
@@ -786,33 +791,29 @@ static void dma_do_tasklet(unsigned long data)
 	fsl_chan_ld_cleanup(fsl_chan);
 }
 
-static int __devinit of_fsl_dma_chan_probe(struct of_device *dev,
-			const struct of_device_id *match)
+static int __devinit fsl_dma_chan_probe(struct fsl_dma_device *fdev,
+	struct device_node *node, u32 feature, const char *compatible)
 {
-	struct fsl_dma_device *fdev;
 	struct fsl_dma_chan *new_fsl_chan;
 	int err;
 
-	fdev = dev_get_drvdata(dev->dev.parent);
-	BUG_ON(!fdev);
-
 	/* alloc channel */
 	new_fsl_chan = kzalloc(sizeof(struct fsl_dma_chan), GFP_KERNEL);
 	if (!new_fsl_chan) {
-		dev_err(&dev->dev, "No free memory for allocating "
+		dev_err(fdev->dev, "No free memory for allocating "
 				"dma channels!\n");
 		return -ENOMEM;
 	}
 
 	/* get dma channel register base */
-	err = of_address_to_resource(dev->node, 0, &new_fsl_chan->reg);
+	err = of_address_to_resource(node, 0, &new_fsl_chan->reg);
 	if (err) {
-		dev_err(&dev->dev, "Can't get %s property 'reg'\n",
-				dev->node->full_name);
+		dev_err(fdev->dev, "Can't get %s property 'reg'\n",
+				node->full_name);
 		goto err_no_reg;
 	}
 
-	new_fsl_chan->feature = *(u32 *)match->data;
+	new_fsl_chan->feature = feature;
 
 	if (!fdev->feature)
 		fdev->feature = new_fsl_chan->feature;
@@ -822,13 +823,13 @@ static int __devinit of_fsl_dma_chan_probe(struct of_device *dev,
 	 */
 	WARN_ON(fdev->feature != new_fsl_chan->feature);
 
-	new_fsl_chan->dev = &dev->dev;
+	new_fsl_chan->dev = &new_fsl_chan->common.dev;
 	new_fsl_chan->reg_base = ioremap(new_fsl_chan->reg.start,
 			new_fsl_chan->reg.end - new_fsl_chan->reg.start + 1);
 
 	new_fsl_chan->id = ((new_fsl_chan->reg.start - 0x100) & 0xfff) >> 7;
 	if (new_fsl_chan->id > FSL_DMA_MAX_CHANS_PER_DEVICE) {
-		dev_err(&dev->dev, "There is no %d channel!\n",
+		dev_err(fdev->dev, "There is no %d channel!\n",
 				new_fsl_chan->id);
 		err = -EINVAL;
 		goto err_no_chan;
@@ -862,20 +863,20 @@ static int __devinit of_fsl_dma_chan_probe(struct of_device *dev,
 			&fdev->common.channels);
 	fdev->common.chancnt++;
 
-	new_fsl_chan->irq = irq_of_parse_and_map(dev->node, 0);
+	new_fsl_chan->irq = irq_of_parse_and_map(node, 0);
 	if (new_fsl_chan->irq != NO_IRQ) {
 		err = request_irq(new_fsl_chan->irq,
 					&fsl_dma_chan_do_interrupt, IRQF_SHARED,
 					"fsldma-channel", new_fsl_chan);
 		if (err) {
-			dev_err(&dev->dev, "DMA channel %s request_irq error "
-				"with return %d\n", dev->node->full_name, err);
+			dev_err(fdev->dev, "DMA channel %s request_irq error "
+				"with return %d\n", node->full_name, err);
 			goto err_no_irq;
 		}
 	}
 
-	dev_info(&dev->dev, "#%d (%s), irq %d\n", new_fsl_chan->id,
-				match->compatible, new_fsl_chan->irq);
+	dev_info(fdev->dev, "#%d (%s), irq %d\n", new_fsl_chan->id,
+				compatible, new_fsl_chan->irq);
 
 	return 0;
 
@@ -888,38 +889,20 @@ err_no_reg:
 	return err;
 }
 
-const u32 mpc8540_dma_ip_feature = FSL_DMA_IP_85XX | FSL_DMA_BIG_ENDIAN;
-const u32 mpc8349_dma_ip_feature = FSL_DMA_IP_83XX | FSL_DMA_LITTLE_ENDIAN;
-
-static struct of_device_id of_fsl_dma_chan_ids[] = {
-	{
-		.compatible = "fsl,eloplus-dma-channel",
-		.data = (void *)&mpc8540_dma_ip_feature,
-	},
-	{
-		.compatible = "fsl,elo-dma-channel",
-		.data = (void *)&mpc8349_dma_ip_feature,
-	},
-	{}
-};
-
-static struct of_platform_driver of_fsl_dma_chan_driver = {
-	.name = "of-fsl-dma-channel",
-	.match_table = of_fsl_dma_chan_ids,
-	.probe = of_fsl_dma_chan_probe,
-};
-
-static __init int of_fsl_dma_chan_init(void)
+static void fsl_dma_chan_remove(struct fsl_dma_chan *fchan)
 {
-	return of_register_platform_driver(&of_fsl_dma_chan_driver);
+	free_irq(fchan->irq, fchan);
+	list_del(&fchan->common.device_node);
+	iounmap(fchan->reg_base);
+	kfree(fchan);
 }
 
 static int __devinit of_fsl_dma_probe(struct of_device *dev,
 			const struct of_device_id *match)
 {
 	int err;
-	unsigned int irq;
 	struct fsl_dma_device *fdev;
+	struct device_node *child;
 
 	fdev = kzalloc(sizeof(struct fsl_dma_device), GFP_KERNEL);
 	if (!fdev) {
@@ -953,9 +936,9 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev,
 	fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending;
 	fdev->common.dev = &dev->dev;
 
-	irq = irq_of_parse_and_map(dev->node, 0);
-	if (irq != NO_IRQ) {
-		err = request_irq(irq, &fsl_dma_do_interrupt, IRQF_SHARED,
+	fdev->irq = irq_of_parse_and_map(dev->node, 0);
+	if (fdev->irq != NO_IRQ) {
+		err = request_irq(fdev->irq, &fsl_dma_do_interrupt, IRQF_SHARED,
 					"fsldma-device", fdev);
 		if (err) {
 			dev_err(&dev->dev, "DMA device request_irq error "
@@ -965,7 +948,21 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev,
 	}
 
 	dev_set_drvdata(&(dev->dev), fdev);
-	of_platform_bus_probe(dev->node, of_fsl_dma_chan_ids, &dev->dev);
+
+	/* We cannot use of_platform_bus_probe() because there is no
+	 * of_platform_bus_remove.  Instead, we manually instantiate every DMA
+	 * channel object.
+	 */
+	for_each_child_of_node(dev->node, child) {
+		if (of_device_is_compatible(child, "fsl,eloplus-dma-channel"))
+			fsl_dma_chan_probe(fdev, child,
+				FSL_DMA_IP_85XX | FSL_DMA_BIG_ENDIAN,
+				"fsl,eloplus-dma-channel");
+		if (of_device_is_compatible(child, "fsl,elo-dma-channel"))
+			fsl_dma_chan_probe(fdev, child,
+				FSL_DMA_IP_83XX | FSL_DMA_LITTLE_ENDIAN,
+				"fsl,elo-dma-channel");
+	}
 
 	dma_async_device_register(&fdev->common);
 	return 0;
@@ -977,6 +974,30 @@ err_no_reg:
 	return err;
 }
 
+static int of_fsl_dma_remove(struct of_device *of_dev)
+{
+	struct fsl_dma_device *fdev;
+	unsigned int i;
+
+	fdev = dev_get_drvdata(&of_dev->dev);
+
+	dma_async_device_unregister(&fdev->common);
+
+	for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++)
+		if (fdev->chan[i])
+			fsl_dma_chan_remove(fdev->chan[i]);
+
+	if (fdev->irq != NO_IRQ)
+		free_irq(fdev->irq, fdev);
+
+	iounmap(fdev->reg_base);
+
+	kfree(fdev);
+	dev_set_drvdata(&of_dev->dev, NULL);
+
+	return 0;
+}
+
 static struct of_device_id of_fsl_dma_ids[] = {
 	{ .compatible = "fsl,eloplus-dma", },
 	{ .compatible = "fsl,elo-dma", },
@@ -984,15 +1005,32 @@ static struct of_device_id of_fsl_dma_ids[] = {
 };
 
 static struct of_platform_driver of_fsl_dma_driver = {
-	.name = "of-fsl-dma",
+	.name = "fsl-elo-dma",
 	.match_table = of_fsl_dma_ids,
 	.probe = of_fsl_dma_probe,
+	.remove = of_fsl_dma_remove,
 };
 
 static __init int of_fsl_dma_init(void)
 {
-	return of_register_platform_driver(&of_fsl_dma_driver);
+	int ret;
+
+	pr_info("Freescale Elo / Elo Plus DMA driver\n");
+
+	ret = of_register_platform_driver(&of_fsl_dma_driver);
+	if (ret)
+		pr_err("fsldma: failed to register platform driver\n");
+
+	return ret;
+}
+
+static void __exit of_fsl_dma_exit(void)
+{
+	of_unregister_platform_driver(&of_fsl_dma_driver);
 }
 
-subsys_initcall(of_fsl_dma_chan_init);
 subsys_initcall(of_fsl_dma_init);
+module_exit(of_fsl_dma_exit);
+
+MODULE_DESCRIPTION("Freescale Elo / Elo Plus DMA driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h
index 6faf07ba0d0e..4f21a512d848 100644
--- a/drivers/dma/fsldma.h
+++ b/drivers/dma/fsldma.h
@@ -114,6 +114,7 @@ struct fsl_dma_device {
 	struct dma_device common;
 	struct fsl_dma_chan *chan[FSL_DMA_MAX_CHANS_PER_DEVICE];
 	u32 feature;		/* The same as DMA channels */
+	int irq;		/* Channel IRQ */
 };
 
 /* Define macros for fsl_dma_chan->feature property */
-- 
GitLab


From 3afe7eb37f4d47f31d30a81c1b42ca02eab01e44 Mon Sep 17 00:00:00 2001
From: Alexander Belyakov <abelyako@mail.ru>
Date: Thu, 25 Sep 2008 17:53:24 +0400
Subject: [PATCH 161/895] [MTD] [NOR] fix cfi_cmdset_0001 FL_SYNCING race (take
 2)

The patch fixes CFI issue with multipartitional devices leading to the
set of errors or even deadlock. The problem is CFI FL_SYNCING state race
with flash operations (e.g. erase suspend). It is reproduced by running
intensive writes on one JFFS2 partition and simultaneously performing
mount/unmount cycle on another partition of the same chip.

Signed-off-by: Alexander Belyakov <abelyako@googlemail.com>
Acked-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/chips/cfi_cmdset_0001.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c
index 5157e3cb4b9e..c93a8be5d5f1 100644
--- a/drivers/mtd/chips/cfi_cmdset_0001.c
+++ b/drivers/mtd/chips/cfi_cmdset_0001.c
@@ -725,6 +725,10 @@ static int chip_ready (struct map_info *map, struct flchip *chip, unsigned long
 	struct cfi_pri_intelext *cfip = cfi->cmdset_priv;
 	unsigned long timeo = jiffies + HZ;
 
+	/* Prevent setting state FL_SYNCING for chip in suspended state. */
+	if (mode == FL_SYNCING && chip->oldstate != FL_READY)
+		goto sleep;
+
 	switch (chip->state) {
 
 	case FL_STATUS:
@@ -830,8 +834,9 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr
 	DECLARE_WAITQUEUE(wait, current);
 
  retry:
-	if (chip->priv && (mode == FL_WRITING || mode == FL_ERASING
-			   || mode == FL_OTP_WRITE || mode == FL_SHUTDOWN)) {
+	if (chip->priv &&
+	    (mode == FL_WRITING || mode == FL_ERASING || mode == FL_OTP_WRITE
+	    || mode == FL_SHUTDOWN) && chip->state != FL_SYNCING) {
 		/*
 		 * OK. We have possibility for contention on the write/erase
 		 * operations which are global to the real chip and not per
@@ -881,6 +886,14 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr
 				return ret;
 			}
 			spin_lock(&shared->lock);
+
+			/* We should not own chip if it is already
+			 * in FL_SYNCING state. Put contender and retry. */
+			if (chip->state == FL_SYNCING) {
+				put_chip(map, contender, contender->start);
+				spin_unlock(contender->mutex);
+				goto retry;
+			}
 			spin_unlock(contender->mutex);
 		}
 
-- 
GitLab


From e416de5e61e1a9b7f987804cbb67230b5f5293c6 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Tue, 23 Sep 2008 17:25:10 +0100
Subject: [PATCH 162/895] Export the ROM enable/disable helpers

.... so that they can be used by MTD map drivers. Lets us close #9420

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/pci/rom.c   | 6 ++++--
 include/linux/pci.h | 2 ++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/rom.c b/drivers/pci/rom.c
index bd5c0e031398..1f5f6143f35c 100644
--- a/drivers/pci/rom.c
+++ b/drivers/pci/rom.c
@@ -21,7 +21,7 @@
  * between the ROM and other resources, so enabling it may disable access
  * to MMIO registers or other card memory.
  */
-static int pci_enable_rom(struct pci_dev *pdev)
+int pci_enable_rom(struct pci_dev *pdev)
 {
 	struct resource *res = pdev->resource + PCI_ROM_RESOURCE;
 	struct pci_bus_region region;
@@ -45,7 +45,7 @@ static int pci_enable_rom(struct pci_dev *pdev)
  * Disable ROM decoding on a PCI device by turning off the last bit in the
  * ROM BAR.
  */
-static void pci_disable_rom(struct pci_dev *pdev)
+void pci_disable_rom(struct pci_dev *pdev)
 {
 	u32 rom_addr;
 	pci_read_config_dword(pdev, pdev->rom_base_reg, &rom_addr);
@@ -260,3 +260,5 @@ void pci_cleanup_rom(struct pci_dev *pdev)
 
 EXPORT_SYMBOL(pci_map_rom);
 EXPORT_SYMBOL(pci_unmap_rom);
+EXPORT_SYMBOL_GPL(pci_enable_rom);
+EXPORT_SYMBOL_GPL(pci_disable_rom);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index c0e14008a3c2..7a4cee00c1d6 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -631,6 +631,8 @@ int __must_check pci_assign_resource(struct pci_dev *dev, int i);
 int pci_select_bars(struct pci_dev *dev, unsigned long flags);
 
 /* ROM control related routines */
+int pci_enable_rom(struct pci_dev *pdev);
+void pci_disable_rom(struct pci_dev *pdev);
 void __iomem __must_check *pci_map_rom(struct pci_dev *pdev, size_t *size);
 void pci_unmap_rom(struct pci_dev *pdev, void __iomem *rom);
 size_t pci_get_rom_size(void __iomem *rom, size_t size);
-- 
GitLab


From 4ab13943612673ef0822e1a041a9e629ba13a87c Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Tue, 23 Sep 2008 17:25:10 +0100
Subject: [PATCH 163/895] [MTD] [NOR] intel_dc21285 switch to ROM API

Now that the needed helpers are exported, it becomes a nice simple
switch over. Closes #9420

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/maps/pci.c | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/drivers/mtd/maps/pci.c b/drivers/mtd/maps/pci.c
index 5c6a25c90380..d978c2e28b1d 100644
--- a/drivers/mtd/maps/pci.c
+++ b/drivers/mtd/maps/pci.c
@@ -203,15 +203,8 @@ intel_dc21285_init(struct pci_dev *dev, struct map_pci_info *map)
 		 * not enabled, should we be allocating a new resource for it
 		 * or simply enabling it?
 		 */
-		if (!(pci_resource_flags(dev, PCI_ROM_RESOURCE) &
-				    IORESOURCE_ROM_ENABLE)) {
-		     	u32 val;
-			pci_resource_flags(dev, PCI_ROM_RESOURCE) |= IORESOURCE_ROM_ENABLE;
-			pci_read_config_dword(dev, PCI_ROM_ADDRESS, &val);
-			val |= PCI_ROM_ADDRESS_ENABLE;
-			pci_write_config_dword(dev, PCI_ROM_ADDRESS, val);
-			printk("%s: enabling expansion ROM\n", pci_name(dev));
-		}
+		pci_enable_rom(dev);
+		printk("%s: enabling expansion ROM\n", pci_name(dev));
 	}
 
 	if (!len || !base)
@@ -240,10 +233,7 @@ intel_dc21285_exit(struct pci_dev *dev, struct map_pci_info *map)
 	/*
 	 * We need to undo the PCI BAR2/PCI ROM BAR address alteration.
 	 */
-	pci_resource_flags(dev, PCI_ROM_RESOURCE) &= ~IORESOURCE_ROM_ENABLE;
-	pci_read_config_dword(dev, PCI_ROM_ADDRESS, &val);
-	val &= ~PCI_ROM_ADDRESS_ENABLE;
-	pci_write_config_dword(dev, PCI_ROM_ADDRESS, val);
+	pci_disable_rom(dev);
 }
 
 static unsigned long
-- 
GitLab


From f324277cf70ad284dd99acf5ac5101e32bc8c55b Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier.adi@gmail.com>
Date: Mon, 22 Sep 2008 14:49:52 -0700
Subject: [PATCH 164/895] [MTD] [MAPS] Maps: make uclinux mapping driver depend
 on MTD_RAM

...since it only probes that

Signed-off-by: Mike Frysinger <vapier.adi@gmail.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/maps/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index 3ae76ecc07d7..5ea169362164 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig
@@ -491,7 +491,7 @@ config MTD_BFIN_ASYNC
 
 config MTD_UCLINUX
 	tristate "Generic uClinux RAM/ROM filesystem support"
-	depends on MTD_PARTITIONS && !MMU
+	depends on MTD_PARTITIONS && MTD_RAM && !MMU
 	help
 	  Map driver to support image based filesystems for uClinux.
 
-- 
GitLab


From 7086efe1c1536f6bc160e7d60a9bfd645b91f279 Mon Sep 17 00:00:00 2001
From: Frank Mayhar <fmayhar@google.com>
Date: Fri, 12 Sep 2008 09:54:39 -0700
Subject: [PATCH 165/895] timers: fix itimer/many thread hang, v3

- fix UP lockup
- another set of UP/SMP cleanups and simplifications

Signed-off-by: Frank Mayhar <fmayhar@google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |   1 -
 kernel/sched.c        |   1 -
 kernel/sched_stats.h  | 126 +++++++++++++-----------------------------
 3 files changed, 38 insertions(+), 90 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b982fb48c8f0..23d9d5464544 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2134,7 +2134,6 @@ static inline int thread_group_cputime_clone_thread(struct task_struct *curr)
 	return thread_group_cputime_alloc(curr);
 }
 
-
 static inline void thread_group_cputime_free(struct signal_struct *sig)
 {
 	free_percpu(sig->cputime.totals);
diff --git a/kernel/sched.c b/kernel/sched.c
index 260c22cc530a..29a3152c45db 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4046,7 +4046,6 @@ unsigned long long task_delta_exec(struct task_struct *p)
 	unsigned long flags;
 	u64 ns = 0;
 
-	rq = task_rq_lock(p, &flags);
 	if (task_current(rq, p)) {
 		u64 delta_exec;
 
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index d6903bd0c7a8..b8c156979cf2 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -276,133 +276,83 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next)
  * on CONFIG_SCHEDSTATS.
  */
 
-#ifdef CONFIG_SMP
-
 /**
- * thread_group_cputime_account_user - Maintain utime for a thread group.
+ * account_group_user_time - Maintain utime for a thread group.
  *
- * @tgtimes:	Pointer to thread_group_cputime structure.
- * @cputime:	Time value by which to increment the utime field of that
- *		structure.
+ * @tsk:	Pointer to task structure.
+ * @cputime:	Time value by which to increment the utime field of the
+ *		thread_group_cputime structure.
  *
  * If thread group time is being maintained, get the structure for the
  * running CPU and update the utime field there.
  */
-static inline void thread_group_cputime_account_user(
-	struct thread_group_cputime *tgtimes,
-	cputime_t cputime)
+static inline void account_group_user_time(struct task_struct *tsk,
+					   cputime_t cputime)
 {
-	if (tgtimes->totals) {
+	struct signal_struct *sig;
+
+	sig = tsk->signal;
+	if (unlikely(!sig))
+		return;
+	if (sig->cputime.totals) {
 		struct task_cputime *times;
 
-		times = per_cpu_ptr(tgtimes->totals, get_cpu());
+		times = per_cpu_ptr(sig->cputime.totals, get_cpu());
 		times->utime = cputime_add(times->utime, cputime);
 		put_cpu_no_resched();
 	}
 }
 
 /**
- * thread_group_cputime_account_system - Maintain stime for a thread group.
+ * account_group_system_time - Maintain stime for a thread group.
  *
- * @tgtimes:	Pointer to thread_group_cputime structure.
- * @cputime:	Time value by which to increment the stime field of that
- *		structure.
+ * @tsk:	Pointer to task structure.
+ * @cputime:	Time value by which to increment the stime field of the
+ *		thread_group_cputime structure.
  *
  * If thread group time is being maintained, get the structure for the
  * running CPU and update the stime field there.
  */
-static inline void thread_group_cputime_account_system(
-	struct thread_group_cputime *tgtimes,
-	cputime_t cputime)
+static inline void account_group_system_time(struct task_struct *tsk,
+					     cputime_t cputime)
 {
-	if (tgtimes->totals) {
+	struct signal_struct *sig;
+
+	sig = tsk->signal;
+	if (unlikely(!sig))
+		return;
+	if (sig->cputime.totals) {
 		struct task_cputime *times;
 
-		times = per_cpu_ptr(tgtimes->totals, get_cpu());
+		times = per_cpu_ptr(sig->cputime.totals, get_cpu());
 		times->stime = cputime_add(times->stime, cputime);
 		put_cpu_no_resched();
 	}
 }
 
 /**
- * thread_group_cputime_account_exec_runtime - Maintain exec runtime for a
- *						thread group.
+ * account_group_exec_runtime - Maintain exec runtime for a thread group.
  *
- * @tgtimes:	Pointer to thread_group_cputime structure.
+ * @tsk:	Pointer to task structure.
  * @ns:		Time value by which to increment the sum_exec_runtime field
- *		of that structure.
+ *		of the thread_group_cputime structure.
  *
  * If thread group time is being maintained, get the structure for the
  * running CPU and update the sum_exec_runtime field there.
  */
-static inline void thread_group_cputime_account_exec_runtime(
-	struct thread_group_cputime *tgtimes,
-	unsigned long long ns)
+static inline void account_group_exec_runtime(struct task_struct *tsk,
+					      unsigned long long ns)
 {
-	if (tgtimes->totals) {
+	struct signal_struct *sig;
+
+	sig = tsk->signal;
+	if (unlikely(!sig))
+		return;
+	if (sig->cputime.totals) {
 		struct task_cputime *times;
 
-		times = per_cpu_ptr(tgtimes->totals, get_cpu());
+		times = per_cpu_ptr(sig->cputime.totals, get_cpu());
 		times->sum_exec_runtime += ns;
 		put_cpu_no_resched();
 	}
 }
-
-#else /* CONFIG_SMP */
-
-static inline void thread_group_cputime_account_user(
-	struct thread_group_cputime *tgtimes,
-	cputime_t cputime)
-{
-	tgtimes->totals->utime = cputime_add(tgtimes->totals->utime, cputime);
-}
-
-static inline void thread_group_cputime_account_system(
-	struct thread_group_cputime *tgtimes,
-	cputime_t cputime)
-{
-	tgtimes->totals->stime = cputime_add(tgtimes->totals->stime, cputime);
-}
-
-static inline void thread_group_cputime_account_exec_runtime(
-	struct thread_group_cputime *tgtimes,
-	unsigned long long ns)
-{
-	tgtimes->totals->sum_exec_runtime += ns;
-}
-
-#endif /* CONFIG_SMP */
-
-/*
- * These are the generic time-accounting routines that use the above
- * functions.  They are the functions actually called by the scheduler.
- */
-static inline void account_group_user_time(struct task_struct *tsk,
-					    cputime_t cputime)
-{
-	struct signal_struct *sig;
-
-	sig = tsk->signal;
-	if (likely(sig))
-		thread_group_cputime_account_user(&sig->cputime, cputime);
-}
-
-static inline void account_group_system_time(struct task_struct *tsk,
-					      cputime_t cputime)
-{
-	struct signal_struct *sig;
-
-	sig = tsk->signal;
-	if (likely(sig))
-		thread_group_cputime_account_system(&sig->cputime, cputime);
-}
-
-static inline void account_group_exec_runtime(struct task_struct *tsk,
-					       unsigned long long ns)
-{
-	struct signal_struct *sig;
-
-	sig = tsk->signal;
-	if (likely(sig))
-		thread_group_cputime_account_exec_runtime(&sig->cputime, ns);
-}
-- 
GitLab


From 88856d67cf6b787447889915bafd541be20b8630 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 29 Sep 2008 19:43:44 +0900
Subject: [PATCH 166/895] sh: Fix up uaccess_64 put/get_user() cast warnings.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/uaccess_64.h | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/arch/sh/include/asm/uaccess_64.h b/arch/sh/include/asm/uaccess_64.h
index 5580fd471003..0042c9054b20 100644
--- a/arch/sh/include/asm/uaccess_64.h
+++ b/arch/sh/include/asm/uaccess_64.h
@@ -26,16 +26,20 @@ do {								\
 	retval = 0;						\
 	switch (size) {						\
 	case 1:							\
-		retval = __get_user_asm_b(x, ptr);		\
+		retval = __get_user_asm_b((void *)&x,		\
+					  (long)ptr);		\
 		break;						\
 	case 2:							\
-		retval = __get_user_asm_w(x, ptr);		\
+		retval = __get_user_asm_w((void *)&x,		\
+					  (long)ptr);		\
 		break;						\
 	case 4:							\
-		retval = __get_user_asm_l(x, ptr);		\
+		retval = __get_user_asm_l((void *)&x,		\
+					  (long)ptr);		\
 		break;						\
 	case 8:							\
-		retval = __get_user_asm_q(x, ptr);		\
+		retval = __get_user_asm_q((void *)&x,		\
+					  (long)ptr);		\
 		break;						\
 	default:						\
 		__get_user_unknown();				\
@@ -54,16 +58,20 @@ do {								\
 	retval = 0;						\
 	switch (size) {						\
 	case 1:							\
-		retval = __put_user_asm_b(x, ptr);		\
+		retval = __put_user_asm_b((void *)&x,		\
+					  (long)ptr);		\
 		break;						\
 	case 2:							\
-		retval = __put_user_asm_w(x, ptr);		\
+		retval = __put_user_asm_w((void *)&x,		\
+					  (long)ptr);		\
 		break;						\
 	case 4:							\
-		retval = __put_user_asm_l(x, ptr);		\
+		retval = __put_user_asm_l((void *)&x,		\
+					  (long)ptr);		\
 		break;						\
 	case 8:							\
-		retval = __put_user_asm_q(x, ptr);		\
+		retval = __put_user_asm_q((void *)&x,		\
+					  (long)ptr);		\
 		break;						\
 	default:						\
 		__put_user_unknown();				\
-- 
GitLab


From 091db04559a62a00769553c9120623c2f6bc8b4d Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 29 Sep 2008 19:44:40 +0900
Subject: [PATCH 167/895] sh: Fix up signal_64 cast warnings.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/signal_64.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c
index 0582ae4739ba..ce3e851dffcb 100644
--- a/arch/sh/kernel/signal_64.c
+++ b/arch/sh/kernel/signal_64.c
@@ -563,7 +563,7 @@ static void setup_frame(int sig, struct k_sigaction *ka,
 			(DEREF_REG_PR | NEFF_MASK) : DEREF_REG_PR;
 
 		if (__copy_to_user(frame->retcode,
-			(unsigned long long)sa_default_restorer & (~1), 16) != 0)
+			(void *)((unsigned long)sa_default_restorer & (~1)), 16) != 0)
 			goto give_sigsegv;
 
 		/* Cohere the trampoline with the I-cache. */
@@ -681,7 +681,7 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 			(DEREF_REG_PR | NEFF_MASK) : DEREF_REG_PR;
 
 		if (__copy_to_user(frame->retcode,
-			(unsigned long long)sa_default_rt_restorer & (~1), 16) != 0)
+			(void *)((unsigned long)sa_default_rt_restorer & (~1)), 16) != 0)
 			goto give_sigsegv;
 
 		flush_icache_range(DEREF_REG_PR-1, DEREF_REG_PR-1+15);
-- 
GitLab


From 50b72e600b62bcdf40971e55f609cf4771346cc1 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 29 Sep 2008 19:45:16 +0900
Subject: [PATCH 168/895] sh: sh_ksyms_64 needs __strncpy_from_user()
 definition.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/uaccess_64.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/sh/include/asm/uaccess_64.h b/arch/sh/include/asm/uaccess_64.h
index 0042c9054b20..56fd20b8cdcc 100644
--- a/arch/sh/include/asm/uaccess_64.h
+++ b/arch/sh/include/asm/uaccess_64.h
@@ -85,5 +85,7 @@ extern long __put_user_asm_q(void *, long);
 extern void __put_user_unknown(void);
 
 extern long __strnlen_user(const char *__s, long __n);
+extern int __strncpy_from_user(unsigned long __dest,
+	       unsigned long __user __src, int __count);
 
 #endif /* __ASM_SH_UACCESS_64_H */
-- 
GitLab


From 4d01cdafbafc0fdeb730838ca38a48e5ca2894cd Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 29 Sep 2008 20:09:17 +0900
Subject: [PATCH 169/895] sh: SH-5 clk fwk support.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/cpu/sh5/Makefile    |   5 +
 arch/sh/kernel/cpu/sh5/clock-sh5.c |  79 ++++++++++++++
 arch/sh/kernel/time_64.c           | 168 ++---------------------------
 3 files changed, 91 insertions(+), 161 deletions(-)
 create mode 100644 arch/sh/kernel/cpu/sh5/clock-sh5.c

diff --git a/arch/sh/kernel/cpu/sh5/Makefile b/arch/sh/kernel/cpu/sh5/Makefile
index 8646363e9ded..ce4602ea23a8 100644
--- a/arch/sh/kernel/cpu/sh5/Makefile
+++ b/arch/sh/kernel/cpu/sh5/Makefile
@@ -5,3 +5,8 @@ obj-y := entry.o probe.o switchto.o
 
 obj-$(CONFIG_SH_FPU)		+= fpu.o
 obj-$(CONFIG_KALLSYMS)		+= unwind.o
+
+# Primary on-chip clocks (common)
+clock-$(CONFIG_CPU_SH5)		:= clock-sh5.o
+
+obj-y			+= $(clock-y)
diff --git a/arch/sh/kernel/cpu/sh5/clock-sh5.c b/arch/sh/kernel/cpu/sh5/clock-sh5.c
new file mode 100644
index 000000000000..52c49248833a
--- /dev/null
+++ b/arch/sh/kernel/cpu/sh5/clock-sh5.c
@@ -0,0 +1,79 @@
+/*
+ * arch/sh/kernel/cpu/sh5/clock-sh5.c
+ *
+ * SH-5 support for the clock framework
+ *
+ *  Copyright (C) 2008  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <asm/clock.h>
+#include <asm/io.h>
+
+static int ifc_table[] = { 2, 4, 6, 8, 10, 12, 16, 24 };
+
+/* Clock, Power and Reset Controller */
+#define	CPRC_BLOCK_OFF	0x01010000
+#define CPRC_BASE	(PHYS_PERIPHERAL_BLOCK + CPRC_BLOCK_OFF)
+
+static unsigned long cprc_base;
+
+static void master_clk_init(struct clk *clk)
+{
+	int idx = (ctrl_inl(cprc_base + 0x00) >> 6) & 0x0007;
+	clk->rate *= ifc_table[idx];
+}
+
+static struct clk_ops sh5_master_clk_ops = {
+	.init		= master_clk_init,
+};
+
+static void module_clk_recalc(struct clk *clk)
+{
+	int idx = (ctrl_inw(cprc_base) >> 12) & 0x0007;
+	clk->rate = clk->parent->rate / ifc_table[idx];
+}
+
+static struct clk_ops sh5_module_clk_ops = {
+	.recalc		= module_clk_recalc,
+};
+
+static void bus_clk_recalc(struct clk *clk)
+{
+	int idx = (ctrl_inw(cprc_base) >> 3) & 0x0007;
+	clk->rate = clk->parent->rate / ifc_table[idx];
+}
+
+static struct clk_ops sh5_bus_clk_ops = {
+	.recalc		= bus_clk_recalc,
+};
+
+static void cpu_clk_recalc(struct clk *clk)
+{
+	int idx = (ctrl_inw(cprc_base) & 0x0007);
+	clk->rate = clk->parent->rate / ifc_table[idx];
+}
+
+static struct clk_ops sh5_cpu_clk_ops = {
+	.recalc		= cpu_clk_recalc,
+};
+
+static struct clk_ops *sh5_clk_ops[] = {
+	&sh5_master_clk_ops,
+	&sh5_module_clk_ops,
+	&sh5_bus_clk_ops,
+	&sh5_cpu_clk_ops,
+};
+
+void __init arch_init_clk_ops(struct clk_ops **ops, int idx)
+{
+	cprc_base = onchip_remap(CPRC_BASE, 1024, "CPRC");
+	BUG_ON(!cprc_base);
+
+	if (idx < ARRAY_SIZE(sh5_clk_ops))
+		*ops = sh5_clk_ops[idx];
+}
diff --git a/arch/sh/kernel/time_64.c b/arch/sh/kernel/time_64.c
index 791edabf7d83..bbb2af1004d9 100644
--- a/arch/sh/kernel/time_64.c
+++ b/arch/sh/kernel/time_64.c
@@ -39,6 +39,7 @@
 #include <asm/processor.h>
 #include <asm/uaccess.h>
 #include <asm/delay.h>
+#include <asm/clock.h>
 
 #define TMU_TOCR_INIT	0x00
 #define TMU0_TCR_INIT	0x0020
@@ -51,14 +52,6 @@
 #define RTC_RCR1_CIE	0x10	/* Carry Interrupt Enable */
 #define RTC_RCR1	(rtc_base + 0x38)
 
-/* Clock, Power and Reset Controller */
-#define	CPRC_BLOCK_OFF	0x01010000
-#define CPRC_BASE	PHYS_PERIPHERAL_BLOCK + CPRC_BLOCK_OFF
-
-#define FRQCR		(cprc_base+0x0)
-#define WTCSR		(cprc_base+0x0018)
-#define STBCR		(cprc_base+0x0030)
-
 /* Time Management Unit */
 #define	TMU_BLOCK_OFF	0x01020000
 #define TMU_BASE	PHYS_PERIPHERAL_BLOCK + TMU_BLOCK_OFF
@@ -293,103 +286,17 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-
-static __init unsigned int get_cpu_hz(void)
-{
-	unsigned int count;
-	unsigned long __dummy;
-	unsigned long ctc_val_init, ctc_val;
-
-	/*
-	** Regardless the toolchain, force the compiler to use the
-	** arbitrary register r3 as a clock tick counter.
-	** NOTE: r3 must be in accordance with sh64_rtc_interrupt()
-	*/
-	register unsigned long long  __rtc_irq_flag __asm__ ("r3");
-
-	local_irq_enable();
-	do {} while (ctrl_inb(rtc_base) != 0);
-	ctrl_outb(RTC_RCR1_CIE, RTC_RCR1); /* Enable carry interrupt */
-
-	/*
-	 * r3 is arbitrary. CDC does not support "=z".
-	 */
-	ctc_val_init = 0xffffffff;
-	ctc_val = ctc_val_init;
-
-	asm volatile("gettr	tr0, %1\n\t"
-		     "putcon	%0, " __CTC "\n\t"
-		     "and	%2, r63, %2\n\t"
-		     "pta	$+4, tr0\n\t"
-		     "beq/l	%2, r63, tr0\n\t"
-		     "ptabs	%1, tr0\n\t"
-		     "getcon	" __CTC ", %0\n\t"
-		: "=r"(ctc_val), "=r" (__dummy), "=r" (__rtc_irq_flag)
-		: "0" (0));
-	local_irq_disable();
-	/*
-	 * SH-3:
-	 * CPU clock = 4 stages * loop
-	 * tst    rm,rm      if id ex
-	 * bt/s   1b            if id ex
-	 * add    #1,rd            if id ex
-         *                            (if) pipe line stole
-	 * tst    rm,rm                  if id ex
-         * ....
-	 *
-	 *
-	 * SH-4:
-	 * CPU clock = 6 stages * loop
-	 * I don't know why.
-         * ....
-	 *
-	 * SH-5:
-	 * Use CTC register to count.  This approach returns the right value
-	 * even if the I-cache is disabled (e.g. whilst debugging.)
-	 *
-	 */
-
-	count = ctc_val_init - ctc_val; /* CTC counts down */
-
-	/*
-	 * This really is count by the number of clock cycles
-         * by the ratio between a complete R64CNT
-         * wrap-around (128) and CUI interrupt being raised (64).
-	 */
-	return count*2;
-}
-
-static irqreturn_t sh64_rtc_interrupt(int irq, void *dev_id)
-{
-	struct pt_regs *regs = get_irq_regs();
-
-	ctrl_outb(0, RTC_RCR1);	/* Disable Carry Interrupts */
-	regs->regs[3] = 1;	/* Using r3 */
-
-	return IRQ_HANDLED;
-}
-
 static struct irqaction irq0  = {
 	.handler = timer_interrupt,
 	.flags = IRQF_DISABLED,
 	.mask = CPU_MASK_NONE,
 	.name = "timer",
 };
-static struct irqaction irq1  = {
-	.handler = sh64_rtc_interrupt,
-	.flags = IRQF_DISABLED,
-	.mask = CPU_MASK_NONE,
-	.name = "rtc",
-};
 
 void __init time_init(void)
 {
-	unsigned int cpu_clock, master_clock, bus_clock, module_clock;
 	unsigned long interval;
-	unsigned long frqcr, ifc, pfc;
-	static int ifc_table[] = { 2, 4, 6, 8, 10, 12, 16, 24 };
-#define bfc_table ifc_table	/* Same */
-#define pfc_table ifc_table	/* Same */
+	struct clk *clk;
 
 	tmu_base = onchip_remap(TMU_BASE, 1024, "TMU");
 	if (!tmu_base) {
@@ -401,50 +308,19 @@ void __init time_init(void)
 		panic("Unable to remap RTC\n");
 	}
 
-	cprc_base = onchip_remap(CPRC_BASE, 1024, "CPRC");
-	if (!cprc_base) {
-		panic("Unable to remap CPRC\n");
-	}
+	clk = clk_get(NULL, "cpu_clk");
+	scaled_recip_ctc_ticks_per_jiffy = ((1ULL << CTC_JIFFY_SCALE_SHIFT) /
+			(unsigned long long)(clk_get_rate(clk) / HZ));
 
 	rtc_sh_get_time(&xtime);
 
 	setup_irq(TIMER_IRQ, &irq0);
-	setup_irq(RTC_IRQ, &irq1);
-
-	/* Check how fast it is.. */
-	cpu_clock = get_cpu_hz();
-
-	/* Note careful order of operations to maintain reasonable precision and avoid overflow. */
-	scaled_recip_ctc_ticks_per_jiffy = ((1ULL << CTC_JIFFY_SCALE_SHIFT) / (unsigned long long)(cpu_clock / HZ));
-
-	free_irq(RTC_IRQ, NULL);
-
-	printk("CPU clock: %d.%02dMHz\n",
-	       (cpu_clock / 1000000), (cpu_clock % 1000000)/10000);
-	{
-		unsigned short bfc;
-		frqcr = ctrl_inl(FRQCR);
-		ifc  = ifc_table[(frqcr>> 6) & 0x0007];
-		bfc  = bfc_table[(frqcr>> 3) & 0x0007];
-		pfc  = pfc_table[(frqcr>> 12) & 0x0007];
-		master_clock = cpu_clock * ifc;
-		bus_clock = master_clock/bfc;
-	}
 
-	printk("Bus clock: %d.%02dMHz\n",
-	       (bus_clock/1000000), (bus_clock % 1000000)/10000);
-	module_clock = master_clock/pfc;
-	printk("Module clock: %d.%02dMHz\n",
-	       (module_clock/1000000), (module_clock % 1000000)/10000);
-	interval = (module_clock/(HZ*4));
+	clk = clk_get(NULL, "module_clk");
+	interval = (clk_get_rate(clk)/(HZ*4));
 
 	printk("Interval = %ld\n", interval);
 
-	current_cpu_data.cpu_clock    = cpu_clock;
-	current_cpu_data.master_clock = master_clock;
-	current_cpu_data.bus_clock    = bus_clock;
-	current_cpu_data.module_clock = module_clock;
-
 	/* Start TMU0 */
 	ctrl_outb(TMU_TSTR_OFF, TMU_TSTR);
 	ctrl_outb(TMU_TOCR_INIT, TMU_TOCR);
@@ -454,36 +330,6 @@ void __init time_init(void)
 	ctrl_outb(TMU_TSTR_INIT, TMU_TSTR);
 }
 
-void enter_deep_standby(void)
-{
-	/* Disable watchdog timer */
-	ctrl_outl(0xa5000000, WTCSR);
-	/* Configure deep standby on sleep */
-	ctrl_outl(0x03, STBCR);
-
-#ifdef CONFIG_SH_ALPHANUMERIC
-	{
-		extern void mach_alphanum(int position, unsigned char value);
-		extern void mach_alphanum_brightness(int setting);
-		char halted[] = "Halted. ";
-		int i;
-		mach_alphanum_brightness(6); /* dimmest setting above off */
-		for (i=0; i<8; i++) {
-			mach_alphanum(i, halted[i]);
-		}
-		asm __volatile__ ("synco");
-	}
-#endif
-
-	asm __volatile__ ("sleep");
-	asm __volatile__ ("synci");
-	asm __volatile__ ("nop");
-	asm __volatile__ ("nop");
-	asm __volatile__ ("nop");
-	asm __volatile__ ("nop");
-	panic("Unexpected wakeup!\n");
-}
-
 static struct resource rtc_resources[] = {
 	[0] = {
 		/* RTC base, filled in by rtc_init */
-- 
GitLab


From bdeb3be7cc6911477b7169dad62a427d7a263d02 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 29 Sep 2008 20:14:44 +0900
Subject: [PATCH 170/895] sh: Use clk fwk for preset lpj on sh64, too.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 71be7ff5c771..dca54ccdf291 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -28,7 +28,6 @@ config SUPERH32
 
 config SUPERH64
 	def_bool y if CPU_SH5
-	select GENERIC_CALIBRATE_DELAY
 
 config ARCH_DEFCONFIG
 	string
-- 
GitLab


From 1508487e7f16d992ad23cabd3712563ff912f413 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 30 Sep 2008 08:28:17 +0200
Subject: [PATCH 171/895] timers: fix itimer/many thread hang, fix

fix bogus rq dereference: v3 removed the locking but also removed the rq
initialization.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index 29a3152c45db..ebb03def564b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4042,10 +4042,12 @@ EXPORT_PER_CPU_SYMBOL(kstat);
  */
 unsigned long long task_delta_exec(struct task_struct *p)
 {
-	struct rq *rq;
 	unsigned long flags;
+	struct rq *rq;
 	u64 ns = 0;
 
+	rq = task_rq_lock(p, &flags);
+
 	if (task_current(rq, p)) {
 		u64 delta_exec;
 
@@ -4055,6 +4057,8 @@ unsigned long long task_delta_exec(struct task_struct *p)
 			ns = delta_exec;
 	}
 
+	task_rq_unlock(rq, &flags);
+
 	return ns;
 }
 
-- 
GitLab


From 948cfb219bbbc3c8e1b10a671ca88219fa42a052 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Wed, 20 Aug 2008 11:56:33 +0300
Subject: [PATCH 172/895] UBIFS: add a print, fix comments and more minor stuff

This commit adds a reserved pool size print and tweaks the
prints to make them look nicer.

It also fixes and cleans-up some comments.

Additionally, it deletes some blank lines to make the code look
a little nicer.

In other words, nothing essential.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 fs/ubifs/budget.c | 26 ++++++++++++++------------
 fs/ubifs/lprops.c |  6 +-----
 fs/ubifs/super.c  | 16 +++++++++-------
 3 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 73db464cd08b..1a4973e10664 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -414,19 +414,21 @@ static int do_budget_space(struct ubifs_info *c)
 	 *    @c->lst.empty_lebs + @c->freeable_cnt + @c->idx_gc_cnt -
 	 *    @c->lst.taken_empty_lebs
 	 *
-	 * @empty_lebs are available because they are empty. @freeable_cnt are
-	 * available because they contain only free and dirty space and the
-	 * index allocation always occurs after wbufs are synch'ed.
-	 * @idx_gc_cnt are available because they are index LEBs that have been
-	 * garbage collected (including trivial GC) and are awaiting the commit
-	 * before they can be unmapped - note that the in-the-gaps method will
-	 * grab these if it needs them. @taken_empty_lebs are empty_lebs that
-	 * have already been allocated for some purpose (also includes those
-	 * LEBs on the @idx_gc list).
+	 * @c->lst.empty_lebs are available because they are empty.
+	 * @c->freeable_cnt are available because they contain only free and
+	 * dirty space, @c->idx_gc_cnt are available because they are index
+	 * LEBs that have been garbage collected and are awaiting the commit
+	 * before they can be used. And the in-the-gaps method will grab these
+	 * if it needs them. @c->lst.taken_empty_lebs are empty LEBs that have
+	 * already been allocated for some purpose.
 	 *
-	 * Note, @taken_empty_lebs may temporarily be higher by one because of
-	 * the way we serialize LEB allocations and budgeting. See a comment in
-	 * 'ubifs_find_free_space()'.
+	 * Note, @c->idx_gc_cnt is included to both @c->lst.empty_lebs (because
+	 * these LEBs are empty) and to @c->lst.taken_empty_lebs (because they
+	 * are taken until after the commit).
+	 *
+	 * Note, @c->lst.taken_empty_lebs may temporarily be higher by one
+	 * because of the way we serialize LEB allocations and budgeting. See a
+	 * comment in 'ubifs_find_free_space()'.
 	 */
 	lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
 	       c->lst.taken_empty_lebs;
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index 2ba93da71b65..3659b887743b 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -125,6 +125,7 @@ static void adjust_lpt_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap,
 			}
 		}
 	}
+
 	/* Not greater than parent, so compare to children */
 	while (1) {
 		/* Compare to left child */
@@ -576,7 +577,6 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
 	ubifs_assert(!(lprops->free & 7) && !(lprops->dirty & 7));
 
 	spin_lock(&c->space_lock);
-
 	if ((lprops->flags & LPROPS_TAKEN) && lprops->free == c->leb_size)
 		c->lst.taken_empty_lebs -= 1;
 
@@ -637,11 +637,8 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
 		c->lst.taken_empty_lebs += 1;
 
 	change_category(c, lprops);
-
 	c->idx_gc_cnt += idx_gc_cnt;
-
 	spin_unlock(&c->space_lock);
-
 	return lprops;
 }
 
@@ -1262,7 +1259,6 @@ static int scan_check_cb(struct ubifs_info *c,
 	}
 
 	ubifs_scan_destroy(sleb);
-
 	return LPT_SCAN_CONTINUE;
 
 out_print:
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 3f4902060c7a..667c72d8a5cc 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1144,19 +1144,21 @@ static int mount_ubifs(struct ubifs_info *c)
 	if (mounted_read_only)
 		ubifs_msg("mounted read-only");
 	x = (long long)c->main_lebs * c->leb_size;
-	ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d LEBs)",
-		  x, x >> 10, x >> 20, c->main_lebs);
+	ubifs_msg("file system size:   %lld bytes (%lld KiB, %lld MiB, %d "
+		  "LEBs)", x, x >> 10, x >> 20, c->main_lebs);
 	x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes;
-	ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d LEBs)",
-		  x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt);
-	ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr));
-	ubifs_msg("media format %d, latest format %d",
+	ubifs_msg("journal size:       %lld bytes (%lld KiB, %lld MiB, %d "
+		  "LEBs)", x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt);
+	ubifs_msg("media format:       %d (latest is %d)",
 		  c->fmt_version, UBIFS_FORMAT_VERSION);
+	ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr));
+	ubifs_msg("reserved pool size: %llu bytes (%llu KiB)",
+		c->report_rp_size, c->report_rp_size >> 10);
 
 	dbg_msg("compiled on:         " __DATE__ " at " __TIME__);
 	dbg_msg("min. I/O unit size:  %d bytes", c->min_io_size);
 	dbg_msg("LEB size:            %d bytes (%d KiB)",
-		c->leb_size, c->leb_size / 1024);
+		c->leb_size, c->leb_size >> 10);
 	dbg_msg("data journal heads:  %d",
 		c->jhead_cnt - NONDATA_JHEADS_CNT);
 	dbg_msg("UUID:                %02X%02X%02X%02X-%02X%02X"
-- 
GitLab


From 8d47aef43ba166bdd11d522307c61ab23aab61c3 Mon Sep 17 00:00:00 2001
From: Hirofumi Nakagawa <hnakagawa@miraclelinux.com>
Date: Thu, 21 Aug 2008 17:16:40 +0300
Subject: [PATCH 173/895] UBIFS: remove unneeded unlikely()

IS_ERR() macro already has unlikely(), so do not use constructions
like 'if (unlikely(IS_ERR())'.

Signed-off-by: Hirofumi Nakagawa <hnakagawa@miraclelinux.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 fs/ubifs/find.c  | 4 ++--
 fs/ubifs/gc.c    | 8 ++++----
 fs/ubifs/tnc.c   | 4 ++--
 fs/ubifs/xattr.c | 2 +-
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index 47814cde2407..717d79c97c5e 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -901,11 +901,11 @@ static int get_idx_gc_leb(struct ubifs_info *c)
 	 * it is needed now for this commit.
 	 */
 	lp = ubifs_lpt_lookup_dirty(c, lnum);
-	if (unlikely(IS_ERR(lp)))
+	if (IS_ERR(lp))
 		return PTR_ERR(lp);
 	lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC,
 			     lp->flags | LPROPS_INDEX, -1);
-	if (unlikely(IS_ERR(lp)))
+	if (IS_ERR(lp))
 		return PTR_ERR(lp);
 	dbg_find("LEB %d, dirty %d and free %d flags %#x",
 		 lp->lnum, lp->dirty, lp->free, lp->flags);
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index 02aba36fe3d4..a6b633a5629f 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -653,7 +653,7 @@ int ubifs_gc_start_commit(struct ubifs_info *c)
 	 */
 	while (1) {
 		lp = ubifs_fast_find_freeable(c);
-		if (unlikely(IS_ERR(lp))) {
+		if (IS_ERR(lp)) {
 			err = PTR_ERR(lp);
 			goto out;
 		}
@@ -665,7 +665,7 @@ int ubifs_gc_start_commit(struct ubifs_info *c)
 		if (err)
 			goto out;
 		lp = ubifs_change_lp(c, lp, c->leb_size, 0, lp->flags, 0);
-		if (unlikely(IS_ERR(lp))) {
+		if (IS_ERR(lp)) {
 			err = PTR_ERR(lp);
 			goto out;
 		}
@@ -680,7 +680,7 @@ int ubifs_gc_start_commit(struct ubifs_info *c)
 	/* Record index freeable LEBs for unmapping after commit */
 	while (1) {
 		lp = ubifs_fast_find_frdi_idx(c);
-		if (unlikely(IS_ERR(lp))) {
+		if (IS_ERR(lp)) {
 			err = PTR_ERR(lp);
 			goto out;
 		}
@@ -696,7 +696,7 @@ int ubifs_gc_start_commit(struct ubifs_info *c)
 		/* Don't release the LEB until after the next commit */
 		flags = (lp->flags | LPROPS_TAKEN) ^ LPROPS_INDEX;
 		lp = ubifs_change_lp(c, lp, c->leb_size, 0, flags, 1);
-		if (unlikely(IS_ERR(lp))) {
+		if (IS_ERR(lp)) {
 			err = PTR_ERR(lp);
 			kfree(idx_gc);
 			goto out;
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 7634c5970887..ba13c92fdf6a 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -284,7 +284,7 @@ static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c,
 	}
 
 	zn = copy_znode(c, znode);
-	if (unlikely(IS_ERR(zn)))
+	if (IS_ERR(zn))
 		return zn;
 
 	if (zbr->len) {
@@ -1128,7 +1128,7 @@ static struct ubifs_znode *dirty_cow_bottom_up(struct ubifs_info *c,
 			ubifs_assert(znode == c->zroot.znode);
 			znode = dirty_cow_znode(c, &c->zroot);
 		}
-		if (unlikely(IS_ERR(znode)) || !p)
+		if (IS_ERR(znode) || !p)
 			break;
 		ubifs_assert(path[p - 1] >= 0);
 		ubifs_assert(path[p - 1] < znode->child_cnt);
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 649bec78b645..cfd31e229c89 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -446,7 +446,7 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size)
 		int type;
 
 		xent = ubifs_tnc_next_ent(c, &key, &nm);
-		if (unlikely(IS_ERR(xent))) {
+		if (IS_ERR(xent)) {
 			err = PTR_ERR(xent);
 			break;
 		}
-- 
GitLab


From 746103aca2ae2b044e32a6ab06a6536652124c99 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Wed, 27 Aug 2008 12:50:57 +0300
Subject: [PATCH 174/895] UBIFS: inline one-line functions

'ubifs_get_lprops()' and 'ubifs_release_lprops()' basically wrap
mutex lock and unlock. We have them because we want lprops subsystem
be separate and as independent as possible. And we planned better
locking rules for lprops.

Anyway, because they are short, it is better to inline them.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 fs/ubifs/lprops.c | 28 ----------------------------
 fs/ubifs/misc.h   | 27 +++++++++++++++++++++++++++
 fs/ubifs/ubifs.h  |  2 --
 3 files changed, 27 insertions(+), 30 deletions(-)

diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index 3659b887743b..f27176e9b70d 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -460,18 +460,6 @@ static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops)
 	}
 }
 
-/**
- * ubifs_get_lprops - get reference to LEB properties.
- * @c: the UBIFS file-system description object
- *
- * This function locks lprops. Lprops have to be unlocked by
- * 'ubifs_release_lprops()'.
- */
-void ubifs_get_lprops(struct ubifs_info *c)
-{
-	mutex_lock(&c->lp_mutex);
-}
-
 /**
  * calc_dark - calculate LEB dark space size.
  * @c: the UBIFS file-system description object
@@ -642,22 +630,6 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
 	return lprops;
 }
 
-/**
- * ubifs_release_lprops - release lprops lock.
- * @c: the UBIFS file-system description object
- *
- * This function has to be called after each 'ubifs_get_lprops()' call to
- * unlock lprops.
- */
-void ubifs_release_lprops(struct ubifs_info *c)
-{
-	ubifs_assert(mutex_is_locked(&c->lp_mutex));
-	ubifs_assert(c->lst.empty_lebs >= 0 &&
-		     c->lst.empty_lebs <= c->main_lebs);
-
-	mutex_unlock(&c->lp_mutex);
-}
-
 /**
  * ubifs_get_lp_stats - get lprops statistics.
  * @c: UBIFS file-system description object
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h
index 4c12a9215d7f..4fa81d867e41 100644
--- a/fs/ubifs/misc.h
+++ b/fs/ubifs/misc.h
@@ -310,4 +310,31 @@ static inline int ubifs_tnc_lookup(struct ubifs_info *c,
 	return ubifs_tnc_locate(c, key, node, NULL, NULL);
 }
 
+/**
+ * ubifs_get_lprops - get reference to LEB properties.
+ * @c: the UBIFS file-system description object
+ *
+ * This function locks lprops. Lprops have to be unlocked by
+ * 'ubifs_release_lprops()'.
+ */
+static inline void ubifs_get_lprops(struct ubifs_info *c)
+{
+	mutex_lock(&c->lp_mutex);
+}
+
+/**
+ * ubifs_release_lprops - release lprops lock.
+ * @c: the UBIFS file-system description object
+ *
+ * This function has to be called after each 'ubifs_get_lprops()' call to
+ * unlock lprops.
+ */
+static inline void ubifs_release_lprops(struct ubifs_info *c)
+{
+	ubifs_assert(mutex_is_locked(&c->lp_mutex));
+	ubifs_assert(c->lst.empty_lebs >= 0 &&
+		     c->lst.empty_lebs <= c->main_lebs);
+	mutex_unlock(&c->lp_mutex);
+}
+
 #endif /* __UBIFS_MISC_H__ */
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 17c620b93eec..ce8654928aad 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1586,12 +1586,10 @@ int ubifs_lpt_post_commit(struct ubifs_info *c);
 void ubifs_lpt_free(struct ubifs_info *c, int wr_only);
 
 /* lprops.c */
-void ubifs_get_lprops(struct ubifs_info *c);
 const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
 					   const struct ubifs_lprops *lp,
 					   int free, int dirty, int flags,
 					   int idx_gc_cnt);
-void ubifs_release_lprops(struct ubifs_info *c);
 void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *stats);
 void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops,
 		      int cat);
-- 
GitLab


From a70948b564e9f6cb81115c606d46f5b74a77b7c2 Mon Sep 17 00:00:00 2001
From: Julien Brunel <brunel@diku.dk>
Date: Fri, 29 Aug 2008 11:08:32 +0200
Subject: [PATCH 175/895] UBIFS: use an IS_ERR test rather than a NULL test

In case of error, the function kthread_create returns an ERR pointer,
but never returns a NULL pointer. So a NULL test that comes before an
IS_ERR test should be deleted.

The semantic match that finds this problem is as follows:
(http://www.emn.fr/x-info/coccinelle/)

// <smpl>
@match_bad_null_test@
expression x, E;
statement S1,S2;
@@
x = kthread_create(...)
... when != x = E
* if (x == NULL)
S1 else S2
// </smpl>

Signed-off-by: Julien Brunel <brunel@diku.dk>
Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 fs/ubifs/super.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 667c72d8a5cc..d87b0cf5f661 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1032,8 +1032,6 @@ static int mount_ubifs(struct ubifs_info *c)
 
 		/* Create background thread */
 		c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name);
-		if (!c->bgt)
-			c->bgt = ERR_PTR(-EINVAL);
 		if (IS_ERR(c->bgt)) {
 			err = PTR_ERR(c->bgt);
 			c->bgt = NULL;
@@ -1347,8 +1345,6 @@ static int ubifs_remount_rw(struct ubifs_info *c)
 
 	/* Create background thread */
 	c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name);
-	if (!c->bgt)
-		c->bgt = ERR_PTR(-EINVAL);
 	if (IS_ERR(c->bgt)) {
 		err = PTR_ERR(c->bgt);
 		c->bgt = NULL;
-- 
GitLab


From 4793e7c5e1c88382ead18db5ca072bac54467318 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <ext-adrian.hunter@nokia.com>
Date: Tue, 2 Sep 2008 16:29:46 +0300
Subject: [PATCH 176/895] UBIFS: add bulk-read facility

Some flash media are capable of reading sequentially at faster rates.
UBIFS bulk-read facility is designed to take advantage of that, by
reading in one go consecutive data nodes that are also located
consecutively in the same LEB.

Read speed on Arm platform with OneNAND goes from 17 MiB/s to
19 MiB/s.

Signed-off-by: Adrian Hunter <ext-adrian.hunter@nokia.com>
---
 Documentation/filesystems/ubifs.txt |   3 +
 fs/ubifs/file.c                     | 248 ++++++++++++++++++++++++
 fs/ubifs/key.h                      |  22 ++-
 fs/ubifs/super.c                    |  31 +++
 fs/ubifs/tnc.c                      | 283 ++++++++++++++++++++++++++++
 fs/ubifs/ubifs.h                    |  45 ++++-
 6 files changed, 629 insertions(+), 3 deletions(-)

diff --git a/Documentation/filesystems/ubifs.txt b/Documentation/filesystems/ubifs.txt
index 6a0d70a22f05..340512c32506 100644
--- a/Documentation/filesystems/ubifs.txt
+++ b/Documentation/filesystems/ubifs.txt
@@ -86,6 +86,9 @@ norm_unmount (*)	commit on unmount; the journal is committed
 fast_unmount		do not commit on unmount; this option makes
 			unmount faster, but the next mount slower
 			because of the need to replay the journal.
+bulk_read		read more in one go to take advantage of flash
+			media that read faster sequentially
+no_bulk_read (*)	do not bulk-read
 
 
 Quick usage instructions
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 3d698e2022b1..cdcfe95cbfb4 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -577,8 +577,256 @@ out:
 	return copied;
 }
 
+/**
+ * populate_page - copy data nodes into a page for bulk-read.
+ * @c: UBIFS file-system description object
+ * @page: page
+ * @bu: bulk-read information
+ * @n: next zbranch slot
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int populate_page(struct ubifs_info *c, struct page *page,
+			 struct bu_info *bu, int *n)
+{
+	int i = 0, nn = *n, offs = bu->zbranch[0].offs, hole = 1, read = 0;
+	struct inode *inode = page->mapping->host;
+	loff_t i_size = i_size_read(inode);
+	unsigned int page_block;
+	void *addr, *zaddr;
+	pgoff_t end_index;
+
+	dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx",
+		inode->i_ino, page->index, i_size, page->flags);
+
+	addr = zaddr = kmap(page);
+
+	end_index = (i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	if (!i_size || page->index > end_index) {
+		memset(addr, 0, PAGE_CACHE_SIZE);
+		goto out_hole;
+	}
+
+	page_block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
+	while (1) {
+		int err, len, out_len, dlen;
+
+		if (nn >= bu->cnt ||
+		    key_block(c, &bu->zbranch[nn].key) != page_block)
+			memset(addr, 0, UBIFS_BLOCK_SIZE);
+		else {
+			struct ubifs_data_node *dn;
+
+			dn = bu->buf + (bu->zbranch[nn].offs - offs);
+
+			ubifs_assert(dn->ch.sqnum >
+				     ubifs_inode(inode)->creat_sqnum);
+
+			len = le32_to_cpu(dn->size);
+			if (len <= 0 || len > UBIFS_BLOCK_SIZE)
+				goto out_err;
+
+			dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ;
+			out_len = UBIFS_BLOCK_SIZE;
+			err = ubifs_decompress(&dn->data, dlen, addr, &out_len,
+					       le16_to_cpu(dn->compr_type));
+			if (err || len != out_len)
+				goto out_err;
+
+			if (len < UBIFS_BLOCK_SIZE)
+				memset(addr + len, 0, UBIFS_BLOCK_SIZE - len);
+
+			nn += 1;
+			hole = 0;
+			read = (i << UBIFS_BLOCK_SHIFT) + len;
+		}
+		if (++i >= UBIFS_BLOCKS_PER_PAGE)
+			break;
+		addr += UBIFS_BLOCK_SIZE;
+		page_block += 1;
+	}
+
+	if (end_index == page->index) {
+		int len = i_size & (PAGE_CACHE_SIZE - 1);
+
+		if (len < read)
+			memset(zaddr + len, 0, read - len);
+	}
+
+out_hole:
+	if (hole) {
+		SetPageChecked(page);
+		dbg_gen("hole");
+	}
+
+	SetPageUptodate(page);
+	ClearPageError(page);
+	flush_dcache_page(page);
+	kunmap(page);
+	*n = nn;
+	return 0;
+
+out_err:
+	ClearPageUptodate(page);
+	SetPageError(page);
+	flush_dcache_page(page);
+	kunmap(page);
+	ubifs_err("bad data node (block %u, inode %lu)",
+		  page_block, inode->i_ino);
+	return -EINVAL;
+}
+
+/**
+ * ubifs_do_bulk_read - do bulk-read.
+ * @c: UBIFS file-system description object
+ * @page1: first page
+ *
+ * This function returns %1 if the bulk-read is done, otherwise %0 is returned.
+ */
+static int ubifs_do_bulk_read(struct ubifs_info *c, struct page *page1)
+{
+	pgoff_t offset = page1->index, end_index;
+	struct address_space *mapping = page1->mapping;
+	struct inode *inode = mapping->host;
+	struct ubifs_inode *ui = ubifs_inode(inode);
+	struct bu_info *bu;
+	int err, page_idx, page_cnt, ret = 0, n = 0;
+	loff_t isize;
+
+	bu = kmalloc(sizeof(struct bu_info), GFP_NOFS);
+	if (!bu)
+		return 0;
+
+	bu->buf_len = c->bulk_read_buf_size;
+	bu->buf = kmalloc(bu->buf_len, GFP_NOFS);
+	if (!bu->buf)
+		goto out_free;
+
+	data_key_init(c, &bu->key, inode->i_ino,
+		      offset << UBIFS_BLOCKS_PER_PAGE_SHIFT);
+
+	err = ubifs_tnc_get_bu_keys(c, bu);
+	if (err)
+		goto out_warn;
+
+	if (bu->eof) {
+		/* Turn off bulk-read at the end of the file */
+		ui->read_in_a_row = 1;
+		ui->bulk_read = 0;
+	}
+
+	page_cnt = bu->blk_cnt >> UBIFS_BLOCKS_PER_PAGE_SHIFT;
+	if (!page_cnt) {
+		/*
+		 * This happens when there are multiple blocks per page and the
+		 * blocks for the first page we are looking for, are not
+		 * together. If all the pages were like this, bulk-read would
+		 * reduce performance, so we turn it off for a while.
+		 */
+		ui->read_in_a_row = 0;
+		ui->bulk_read = 0;
+		goto out_free;
+	}
+
+	if (bu->cnt) {
+		err = ubifs_tnc_bulk_read(c, bu);
+		if (err)
+			goto out_warn;
+	}
+
+	err = populate_page(c, page1, bu, &n);
+	if (err)
+		goto out_warn;
+
+	unlock_page(page1);
+	ret = 1;
+
+	isize = i_size_read(inode);
+	if (isize == 0)
+		goto out_free;
+	end_index = ((isize - 1) >> PAGE_CACHE_SHIFT);
+
+	for (page_idx = 1; page_idx < page_cnt; page_idx++) {
+		pgoff_t page_offset = offset + page_idx;
+		struct page *page;
+
+		if (page_offset > end_index)
+			break;
+		page = find_or_create_page(mapping, page_offset,
+					   GFP_NOFS | __GFP_COLD);
+		if (!page)
+			break;
+		if (!PageUptodate(page))
+			err = populate_page(c, page, bu, &n);
+		unlock_page(page);
+		page_cache_release(page);
+		if (err)
+			break;
+	}
+
+	ui->last_page_read = offset + page_idx - 1;
+
+out_free:
+	kfree(bu->buf);
+	kfree(bu);
+	return ret;
+
+out_warn:
+	ubifs_warn("ignoring error %d and skipping bulk-read", err);
+	goto out_free;
+}
+
+/**
+ * ubifs_bulk_read - determine whether to bulk-read and, if so, do it.
+ * @page: page from which to start bulk-read.
+ *
+ * Some flash media are capable of reading sequentially at faster rates. UBIFS
+ * bulk-read facility is designed to take advantage of that, by reading in one
+ * go consecutive data nodes that are also located consecutively in the same
+ * LEB. This function returns %1 if a bulk-read is done and %0 otherwise.
+ */
+static int ubifs_bulk_read(struct page *page)
+{
+	struct inode *inode = page->mapping->host;
+	struct ubifs_info *c = inode->i_sb->s_fs_info;
+	struct ubifs_inode *ui = ubifs_inode(inode);
+	pgoff_t index = page->index, last_page_read = ui->last_page_read;
+	int ret = 0;
+
+	ui->last_page_read = index;
+
+	if (!c->bulk_read)
+		return 0;
+	/*
+	 * Bulk-read is protected by ui_mutex, but it is an optimization, so
+	 * don't bother if we cannot lock the mutex.
+	 */
+	if (!mutex_trylock(&ui->ui_mutex))
+		return 0;
+	if (index != last_page_read + 1) {
+		/* Turn off bulk-read if we stop reading sequentially */
+		ui->read_in_a_row = 1;
+		if (ui->bulk_read)
+			ui->bulk_read = 0;
+		goto out_unlock;
+	}
+	if (!ui->bulk_read) {
+		ui->read_in_a_row += 1;
+		if (ui->read_in_a_row < 3)
+			goto out_unlock;
+		/* Three reads in a row, so switch on bulk-read */
+		ui->bulk_read = 1;
+	}
+	ret = ubifs_do_bulk_read(c, page);
+out_unlock:
+	mutex_unlock(&ui->ui_mutex);
+	return ret;
+}
+
 static int ubifs_readpage(struct file *file, struct page *page)
 {
+	if (ubifs_bulk_read(page))
+		return 0;
 	do_readpage(page);
 	unlock_page(page);
 	return 0;
diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h
index 8f7476007549..9ee65086f627 100644
--- a/fs/ubifs/key.h
+++ b/fs/ubifs/key.h
@@ -484,7 +484,7 @@ static inline void key_copy(const struct ubifs_info *c,
  * @key2: the second key to compare
  *
  * This function compares 2 keys and returns %-1 if @key1 is less than
- * @key2, 0 if the keys are equivalent and %1 if @key1 is greater than @key2.
+ * @key2, %0 if the keys are equivalent and %1 if @key1 is greater than @key2.
  */
 static inline int keys_cmp(const struct ubifs_info *c,
 			   const union ubifs_key *key1,
@@ -502,6 +502,26 @@ static inline int keys_cmp(const struct ubifs_info *c,
 	return 0;
 }
 
+/**
+ * keys_eq - determine if keys are equivalent.
+ * @c: UBIFS file-system description object
+ * @key1: the first key to compare
+ * @key2: the second key to compare
+ *
+ * This function compares 2 keys and returns %1 if @key1 is equal to @key2 and
+ * %0 if not.
+ */
+static inline int keys_eq(const struct ubifs_info *c,
+			  const union ubifs_key *key1,
+			  const union ubifs_key *key2)
+{
+	if (key1->u32[0] != key2->u32[0])
+		return 0;
+	if (key1->u32[1] != key2->u32[1])
+		return 0;
+	return 1;
+}
+
 /**
  * is_hash_key - is a key vulnerable to hash collisions.
  * @c: UBIFS file-system description object
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index d87b0cf5f661..b1c57e8ee855 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -401,6 +401,11 @@ static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt)
 	else if (c->mount_opts.unmount_mode == 1)
 		seq_printf(s, ",norm_unmount");
 
+	if (c->mount_opts.bulk_read == 2)
+		seq_printf(s, ",bulk_read");
+	else if (c->mount_opts.bulk_read == 1)
+		seq_printf(s, ",no_bulk_read");
+
 	return 0;
 }
 
@@ -538,6 +543,18 @@ static int init_constants_early(struct ubifs_info *c)
 	 * calculations when reporting free space.
 	 */
 	c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ;
+	/* Buffer size for bulk-reads */
+	c->bulk_read_buf_size = UBIFS_MAX_BULK_READ * UBIFS_MAX_DATA_NODE_SZ;
+	if (c->bulk_read_buf_size > c->leb_size)
+		c->bulk_read_buf_size = c->leb_size;
+	if (c->bulk_read_buf_size > 128 * 1024) {
+		/* Check if we can kmalloc more than 128KiB */
+		void *try = kmalloc(c->bulk_read_buf_size, GFP_KERNEL);
+
+		kfree(try);
+		if (!try)
+			c->bulk_read_buf_size = 128 * 1024;
+	}
 	return 0;
 }
 
@@ -840,17 +857,23 @@ static int check_volume_empty(struct ubifs_info *c)
  *
  * Opt_fast_unmount: do not run a journal commit before un-mounting
  * Opt_norm_unmount: run a journal commit before un-mounting
+ * Opt_bulk_read: enable bulk-reads
+ * Opt_no_bulk_read: disable bulk-reads
  * Opt_err: just end of array marker
  */
 enum {
 	Opt_fast_unmount,
 	Opt_norm_unmount,
+	Opt_bulk_read,
+	Opt_no_bulk_read,
 	Opt_err,
 };
 
 static match_table_t tokens = {
 	{Opt_fast_unmount, "fast_unmount"},
 	{Opt_norm_unmount, "norm_unmount"},
+	{Opt_bulk_read, "bulk_read"},
+	{Opt_no_bulk_read, "no_bulk_read"},
 	{Opt_err, NULL},
 };
 
@@ -888,6 +911,14 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options,
 			c->mount_opts.unmount_mode = 1;
 			c->fast_unmount = 0;
 			break;
+		case Opt_bulk_read:
+			c->mount_opts.bulk_read = 2;
+			c->bulk_read = 1;
+			break;
+		case Opt_no_bulk_read:
+			c->mount_opts.bulk_read = 1;
+			c->bulk_read = 0;
+			break;
 		default:
 			ubifs_err("unrecognized mount option \"%s\" "
 				  "or missing value", p);
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index ba13c92fdf6a..d279012d8dd5 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -1491,6 +1491,289 @@ out:
 	return err;
 }
 
+/**
+ * ubifs_tnc_get_bu_keys - lookup keys for bulk-read.
+ * @c: UBIFS file-system description object
+ * @bu: bulk-read parameters and results
+ *
+ * Lookup consecutive data node keys for the same inode that reside
+ * consecutively in the same LEB.
+ */
+int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu)
+{
+	int n, err = 0, lnum = -1, uninitialized_var(offs);
+	int uninitialized_var(len);
+	unsigned int block = key_block(c, &bu->key);
+	struct ubifs_znode *znode;
+
+	bu->cnt = 0;
+	bu->blk_cnt = 0;
+	bu->eof = 0;
+
+	mutex_lock(&c->tnc_mutex);
+	/* Find first key */
+	err = ubifs_lookup_level0(c, &bu->key, &znode, &n);
+	if (err < 0)
+		goto out;
+	if (err) {
+		/* Key found */
+		len = znode->zbranch[n].len;
+		/* The buffer must be big enough for at least 1 node */
+		if (len > bu->buf_len) {
+			err = -EINVAL;
+			goto out;
+		}
+		/* Add this key */
+		bu->zbranch[bu->cnt++] = znode->zbranch[n];
+		bu->blk_cnt += 1;
+		lnum = znode->zbranch[n].lnum;
+		offs = ALIGN(znode->zbranch[n].offs + len, 8);
+	}
+	while (1) {
+		struct ubifs_zbranch *zbr;
+		union ubifs_key *key;
+		unsigned int next_block;
+
+		/* Find next key */
+		err = tnc_next(c, &znode, &n);
+		if (err)
+			goto out;
+		zbr = &znode->zbranch[n];
+		key = &zbr->key;
+		/* See if there is another data key for this file */
+		if (key_inum(c, key) != key_inum(c, &bu->key) ||
+		    key_type(c, key) != UBIFS_DATA_KEY) {
+			err = -ENOENT;
+			goto out;
+		}
+		if (lnum < 0) {
+			/* First key found */
+			lnum = zbr->lnum;
+			offs = ALIGN(zbr->offs + zbr->len, 8);
+			len = zbr->len;
+			if (len > bu->buf_len) {
+				err = -EINVAL;
+				goto out;
+			}
+		} else {
+			/*
+			 * The data nodes must be in consecutive positions in
+			 * the same LEB.
+			 */
+			if (zbr->lnum != lnum || zbr->offs != offs)
+				goto out;
+			offs += ALIGN(zbr->len, 8);
+			len = ALIGN(len, 8) + zbr->len;
+			/* Must not exceed buffer length */
+			if (len > bu->buf_len)
+				goto out;
+		}
+		/* Allow for holes */
+		next_block = key_block(c, key);
+		bu->blk_cnt += (next_block - block - 1);
+		if (bu->blk_cnt >= UBIFS_MAX_BULK_READ)
+			goto out;
+		block = next_block;
+		/* Add this key */
+		bu->zbranch[bu->cnt++] = *zbr;
+		bu->blk_cnt += 1;
+		/* See if we have room for more */
+		if (bu->cnt >= UBIFS_MAX_BULK_READ)
+			goto out;
+		if (bu->blk_cnt >= UBIFS_MAX_BULK_READ)
+			goto out;
+	}
+out:
+	if (err == -ENOENT) {
+		bu->eof = 1;
+		err = 0;
+	}
+	bu->gc_seq = c->gc_seq;
+	mutex_unlock(&c->tnc_mutex);
+	if (err)
+		return err;
+	/*
+	 * An enormous hole could cause bulk-read to encompass too many
+	 * page cache pages, so limit the number here.
+	 */
+	if (bu->blk_cnt >= UBIFS_MAX_BULK_READ)
+		bu->blk_cnt = UBIFS_MAX_BULK_READ;
+	/*
+	 * Ensure that bulk-read covers a whole number of page cache
+	 * pages.
+	 */
+	if (UBIFS_BLOCKS_PER_PAGE == 1 ||
+	    !(bu->blk_cnt & (UBIFS_BLOCKS_PER_PAGE - 1)))
+		return 0;
+	if (bu->eof) {
+		/* At the end of file we can round up */
+		bu->blk_cnt += UBIFS_BLOCKS_PER_PAGE - 1;
+		return 0;
+	}
+	/* Exclude data nodes that do not make up a whole page cache page */
+	block = key_block(c, &bu->key) + bu->blk_cnt;
+	block &= ~(UBIFS_BLOCKS_PER_PAGE - 1);
+	while (bu->cnt) {
+		if (key_block(c, &bu->zbranch[bu->cnt - 1].key) < block)
+			break;
+		bu->cnt -= 1;
+	}
+	return 0;
+}
+
+/**
+ * read_wbuf - bulk-read from a LEB with a wbuf.
+ * @wbuf: wbuf that may overlap the read
+ * @buf: buffer into which to read
+ * @len: read length
+ * @lnum: LEB number from which to read
+ * @offs: offset from which to read
+ *
+ * This functions returns %0 on success or a negative error code on failure.
+ */
+static int read_wbuf(struct ubifs_wbuf *wbuf, void *buf, int len, int lnum,
+		     int offs)
+{
+	const struct ubifs_info *c = wbuf->c;
+	int rlen, overlap;
+
+	dbg_io("LEB %d:%d, length %d", lnum, offs, len);
+	ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
+	ubifs_assert(!(offs & 7) && offs < c->leb_size);
+	ubifs_assert(offs + len <= c->leb_size);
+
+	spin_lock(&wbuf->lock);
+	overlap = (lnum == wbuf->lnum && offs + len > wbuf->offs);
+	if (!overlap) {
+		/* We may safely unlock the write-buffer and read the data */
+		spin_unlock(&wbuf->lock);
+		return ubi_read(c->ubi, lnum, buf, offs, len);
+	}
+
+	/* Don't read under wbuf */
+	rlen = wbuf->offs - offs;
+	if (rlen < 0)
+		rlen = 0;
+
+	/* Copy the rest from the write-buffer */
+	memcpy(buf + rlen, wbuf->buf + offs + rlen - wbuf->offs, len - rlen);
+	spin_unlock(&wbuf->lock);
+
+	if (rlen > 0)
+		/* Read everything that goes before write-buffer */
+		return ubi_read(c->ubi, lnum, buf, offs, rlen);
+
+	return 0;
+}
+
+/**
+ * validate_data_node - validate data nodes for bulk-read.
+ * @c: UBIFS file-system description object
+ * @buf: buffer containing data node to validate
+ * @zbr: zbranch of data node to validate
+ *
+ * This functions returns %0 on success or a negative error code on failure.
+ */
+static int validate_data_node(struct ubifs_info *c, void *buf,
+			      struct ubifs_zbranch *zbr)
+{
+	union ubifs_key key1;
+	struct ubifs_ch *ch = buf;
+	int err, len;
+
+	if (ch->node_type != UBIFS_DATA_NODE) {
+		ubifs_err("bad node type (%d but expected %d)",
+			  ch->node_type, UBIFS_DATA_NODE);
+		goto out_err;
+	}
+
+	err = ubifs_check_node(c, buf, zbr->lnum, zbr->offs, 0);
+	if (err) {
+		ubifs_err("expected node type %d", UBIFS_DATA_NODE);
+		goto out;
+	}
+
+	len = le32_to_cpu(ch->len);
+	if (len != zbr->len) {
+		ubifs_err("bad node length %d, expected %d", len, zbr->len);
+		goto out_err;
+	}
+
+	/* Make sure the key of the read node is correct */
+	key_read(c, buf + UBIFS_KEY_OFFSET, &key1);
+	if (!keys_eq(c, &zbr->key, &key1)) {
+		ubifs_err("bad key in node at LEB %d:%d",
+			  zbr->lnum, zbr->offs);
+		dbg_tnc("looked for key %s found node's key %s",
+			DBGKEY(&zbr->key), DBGKEY1(&key1));
+		goto out_err;
+	}
+
+	return 0;
+
+out_err:
+	err = -EINVAL;
+out:
+	ubifs_err("bad node at LEB %d:%d", zbr->lnum, zbr->offs);
+	dbg_dump_node(c, buf);
+	dbg_dump_stack();
+	return err;
+}
+
+/**
+ * ubifs_tnc_bulk_read - read a number of data nodes in one go.
+ * @c: UBIFS file-system description object
+ * @bu: bulk-read parameters and results
+ *
+ * This functions reads and validates the data nodes that were identified by the
+ * 'ubifs_tnc_get_bu_keys()' function. This functions returns %0 on success,
+ * -EAGAIN to indicate a race with GC, or another negative error code on
+ * failure.
+ */
+int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu)
+{
+	int lnum = bu->zbranch[0].lnum, offs = bu->zbranch[0].offs, len, err, i;
+	struct ubifs_wbuf *wbuf;
+	void *buf;
+
+	len = bu->zbranch[bu->cnt - 1].offs;
+	len += bu->zbranch[bu->cnt - 1].len - offs;
+	if (len > bu->buf_len) {
+		ubifs_err("buffer too small %d vs %d", bu->buf_len, len);
+		return -EINVAL;
+	}
+
+	/* Do the read */
+	wbuf = ubifs_get_wbuf(c, lnum);
+	if (wbuf)
+		err = read_wbuf(wbuf, bu->buf, len, lnum, offs);
+	else
+		err = ubi_read(c->ubi, lnum, bu->buf, offs, len);
+
+	/* Check for a race with GC */
+	if (maybe_leb_gced(c, lnum, bu->gc_seq))
+		return -EAGAIN;
+
+	if (err && err != -EBADMSG) {
+		ubifs_err("failed to read from LEB %d:%d, error %d",
+			  lnum, offs, err);
+		dbg_dump_stack();
+		dbg_tnc("key %s", DBGKEY(&bu->key));
+		return err;
+	}
+
+	/* Validate the nodes read */
+	buf = bu->buf;
+	for (i = 0; i < bu->cnt; i++) {
+		err = validate_data_node(c, buf, &bu->zbranch[i]);
+		if (err)
+			return err;
+		buf = buf + ALIGN(bu->zbranch[i].len, 8);
+	}
+
+	return 0;
+}
+
 /**
  * do_lookup_nm- look up a "hashed" node.
  * @c: UBIFS file-system description object
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index ce8654928aad..8513239ea8a0 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -142,6 +142,9 @@
 /* Maximum expected tree height for use by bottom_up_buf */
 #define BOTTOM_UP_HEIGHT 64
 
+/* Maximum number of data nodes to bulk-read */
+#define UBIFS_MAX_BULK_READ 32
+
 /*
  * Lockdep classes for UBIFS inode @ui_mutex.
  */
@@ -329,8 +332,8 @@ struct ubifs_gced_idx_leb {
  * @dirty: non-zero if the inode is dirty
  * @xattr: non-zero if this is an extended attribute inode
  * @ui_mutex: serializes inode write-back with the rest of VFS operations,
- *            serializes "clean <-> dirty" state changes, protects @dirty,
- *            @ui_size, and @xattr_size
+ *            serializes "clean <-> dirty" state changes, serializes bulk-read,
+ *            protects @dirty, @ui_size, and @xattr_size
  * @ui_lock: protects @synced_i_size
  * @synced_i_size: synchronized size of inode, i.e. the value of inode size
  *                 currently stored on the flash; used only for regular file
@@ -338,6 +341,9 @@ struct ubifs_gced_idx_leb {
  * @ui_size: inode size used by UBIFS when writing to flash
  * @flags: inode flags (@UBIFS_COMPR_FL, etc)
  * @compr_type: default compression type used for this inode
+ * @last_page_read: page number of last page read (for bulk read)
+ * @read_in_a_row: number of consecutive pages read in a row (for bulk read)
+ * @bulk_read: indicates whether bulk-read should be used
  * @data_len: length of the data attached to the inode
  * @data: inode's data
  *
@@ -385,6 +391,9 @@ struct ubifs_inode {
 	loff_t ui_size;
 	int flags;
 	int compr_type;
+	pgoff_t last_page_read;
+	pgoff_t read_in_a_row;
+	int bulk_read;
 	int data_len;
 	void *data;
 };
@@ -743,6 +752,28 @@ struct ubifs_znode {
 	struct ubifs_zbranch zbranch[];
 };
 
+/**
+ * struct bu_info - bulk-read information
+ * @key: first data node key
+ * @zbranch: zbranches of data nodes to bulk read
+ * @buf: buffer to read into
+ * @buf_len: buffer length
+ * @gc_seq: GC sequence number to detect races with GC
+ * @cnt: number of data nodes for bulk read
+ * @blk_cnt: number of data blocks including holes
+ * @oef: end of file reached
+ */
+struct bu_info {
+	union ubifs_key key;
+	struct ubifs_zbranch zbranch[UBIFS_MAX_BULK_READ];
+	void *buf;
+	int buf_len;
+	int gc_seq;
+	int cnt;
+	int blk_cnt;
+	int eof;
+};
+
 /**
  * struct ubifs_node_range - node length range description data structure.
  * @len: fixed node length
@@ -862,9 +893,11 @@ struct ubifs_orphan {
 /**
  * struct ubifs_mount_opts - UBIFS-specific mount options information.
  * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast)
+ * @bulk_read: enable bulk-reads
  */
 struct ubifs_mount_opts {
 	unsigned int unmount_mode:2;
+	unsigned int bulk_read:2;
 };
 
 /**
@@ -965,6 +998,9 @@ struct ubifs_mount_opts {
  * @old_leb_cnt: count of logical eraseblocks before re-size
  * @ro_media: the underlying UBI volume is read-only
  *
+ * @bulk_read: enable bulk-reads
+ * @bulk_read_buf_size: buffer size for bulk-reads
+ *
  * @dirty_pg_cnt: number of dirty pages (not used)
  * @dirty_zn_cnt: number of dirty znodes
  * @clean_zn_cnt: number of clean znodes
@@ -1205,6 +1241,9 @@ struct ubifs_info {
 	int old_leb_cnt;
 	int ro_media;
 
+	int bulk_read;
+	int bulk_read_buf_size;
+
 	atomic_long_t dirty_pg_cnt;
 	atomic_long_t dirty_zn_cnt;
 	atomic_long_t clean_zn_cnt;
@@ -1490,6 +1529,8 @@ void destroy_old_idx(struct ubifs_info *c);
 int is_idx_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, int level,
 		       int lnum, int offs);
 int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode);
+int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu);
+int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu);
 
 /* tnc_misc.c */
 struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr,
-- 
GitLab


From 2953e73f1ce4b3284b409aefb9d46bbde6515c37 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <ext-adrian.hunter@nokia.com>
Date: Thu, 4 Sep 2008 16:26:00 +0300
Subject: [PATCH 177/895] UBIFS: add no_chk_data_crc mount option

UBIFS read performance can be improved by skipping the CRC
check when data nodes are read.  This option can be used if
the underlying media is considered to be highly reliable.
Note that CRCs are always checked for metadata.

Read speed on Arm platform with OneNAND goes from 19 MiB/s
to 27 MiB/s with data CRC checking disabled.

Signed-off-by: Adrian Hunter <ext-adrian.hunter@nokia.com>
---
 Documentation/filesystems/ubifs.txt |  6 +++++
 fs/ubifs/io.c                       | 11 +++++++---
 fs/ubifs/scan.c                     |  2 +-
 fs/ubifs/super.c                    | 34 ++++++++++++++++++++++++++---
 fs/ubifs/tnc.c                      |  6 ++++-
 fs/ubifs/ubifs.h                    | 11 +++++++++-
 6 files changed, 61 insertions(+), 9 deletions(-)

diff --git a/Documentation/filesystems/ubifs.txt b/Documentation/filesystems/ubifs.txt
index 340512c32506..dd84ea3c10da 100644
--- a/Documentation/filesystems/ubifs.txt
+++ b/Documentation/filesystems/ubifs.txt
@@ -89,6 +89,12 @@ fast_unmount		do not commit on unmount; this option makes
 bulk_read		read more in one go to take advantage of flash
 			media that read faster sequentially
 no_bulk_read (*)	do not bulk-read
+no_chk_data_crc		skip checking of CRCs on data nodes in order to
+			improve read performance. Use this option only
+			if the flash media is highly reliable. The effect
+			of this option is that corruption of the contents
+			of a file can go unnoticed.
+chk_data_crc (*)	do not skip checking CRCs on data nodes
 
 
 Quick usage instructions
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 054363f2b207..40e2790b62ce 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -74,6 +74,7 @@ void ubifs_ro_mode(struct ubifs_info *c, int err)
  * @lnum: logical eraseblock number
  * @offs: offset within the logical eraseblock
  * @quiet: print no messages
+ * @chk_crc: indicates whether to always check the CRC
  *
  * This function checks node magic number and CRC checksum. This function also
  * validates node length to prevent UBIFS from becoming crazy when an attacker
@@ -85,7 +86,7 @@ void ubifs_ro_mode(struct ubifs_info *c, int err)
  * or magic.
  */
 int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
-		     int offs, int quiet)
+		     int offs, int quiet, int chk_crc)
 {
 	int err = -EINVAL, type, node_len;
 	uint32_t crc, node_crc, magic;
@@ -121,6 +122,10 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
 		   node_len > c->ranges[type].max_len)
 		goto out_len;
 
+	if (!chk_crc && type == UBIFS_DATA_NODE && !c->always_chk_crc)
+		if (c->no_chk_data_crc)
+			return 0;
+
 	crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
 	node_crc = le32_to_cpu(ch->crc);
 	if (crc != node_crc) {
@@ -722,7 +727,7 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
 		goto out;
 	}
 
-	err = ubifs_check_node(c, buf, lnum, offs, 0);
+	err = ubifs_check_node(c, buf, lnum, offs, 0, 0);
 	if (err) {
 		ubifs_err("expected node type %d", type);
 		return err;
@@ -781,7 +786,7 @@ int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len,
 		goto out;
 	}
 
-	err = ubifs_check_node(c, buf, lnum, offs, 0);
+	err = ubifs_check_node(c, buf, lnum, offs, 0, 0);
 	if (err) {
 		ubifs_err("expected node type %d", type);
 		return err;
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c
index acf5c5fffc60..0ed82479b44b 100644
--- a/fs/ubifs/scan.c
+++ b/fs/ubifs/scan.c
@@ -87,7 +87,7 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
 
 	dbg_scan("scanning %s", dbg_ntype(ch->node_type));
 
-	if (ubifs_check_node(c, buf, lnum, offs, quiet))
+	if (ubifs_check_node(c, buf, lnum, offs, quiet, 1))
 		return SCANNED_A_CORRUPT_NODE;
 
 	if (ch->node_type == UBIFS_PAD_NODE) {
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index b1c57e8ee855..cf078b5cc88c 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -406,6 +406,11 @@ static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt)
 	else if (c->mount_opts.bulk_read == 1)
 		seq_printf(s, ",no_bulk_read");
 
+	if (c->mount_opts.chk_data_crc == 2)
+		seq_printf(s, ",chk_data_crc");
+	else if (c->mount_opts.chk_data_crc == 1)
+		seq_printf(s, ",no_chk_data_crc");
+
 	return 0;
 }
 
@@ -859,6 +864,8 @@ static int check_volume_empty(struct ubifs_info *c)
  * Opt_norm_unmount: run a journal commit before un-mounting
  * Opt_bulk_read: enable bulk-reads
  * Opt_no_bulk_read: disable bulk-reads
+ * Opt_chk_data_crc: check CRCs when reading data nodes
+ * Opt_no_chk_data_crc: do not check CRCs when reading data nodes
  * Opt_err: just end of array marker
  */
 enum {
@@ -866,6 +873,8 @@ enum {
 	Opt_norm_unmount,
 	Opt_bulk_read,
 	Opt_no_bulk_read,
+	Opt_chk_data_crc,
+	Opt_no_chk_data_crc,
 	Opt_err,
 };
 
@@ -874,6 +883,8 @@ static match_table_t tokens = {
 	{Opt_norm_unmount, "norm_unmount"},
 	{Opt_bulk_read, "bulk_read"},
 	{Opt_no_bulk_read, "no_bulk_read"},
+	{Opt_chk_data_crc, "chk_data_crc"},
+	{Opt_no_chk_data_crc, "no_chk_data_crc"},
 	{Opt_err, NULL},
 };
 
@@ -919,6 +930,14 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options,
 			c->mount_opts.bulk_read = 1;
 			c->bulk_read = 0;
 			break;
+		case Opt_chk_data_crc:
+			c->mount_opts.chk_data_crc = 2;
+			c->no_chk_data_crc = 0;
+			break;
+		case Opt_no_chk_data_crc:
+			c->mount_opts.chk_data_crc = 1;
+			c->no_chk_data_crc = 1;
+			break;
 		default:
 			ubifs_err("unrecognized mount option \"%s\" "
 				  "or missing value", p);
@@ -1027,6 +1046,8 @@ static int mount_ubifs(struct ubifs_info *c)
 			goto out_free;
 	}
 
+	c->always_chk_crc = 1;
+
 	err = ubifs_read_superblock(c);
 	if (err)
 		goto out_free;
@@ -1168,6 +1189,8 @@ static int mount_ubifs(struct ubifs_info *c)
 	if (err)
 		goto out_infos;
 
+	c->always_chk_crc = 0;
+
 	ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"",
 		  c->vi.ubi_num, c->vi.vol_id, c->vi.name);
 	if (mounted_read_only)
@@ -1313,6 +1336,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
 
 	mutex_lock(&c->umount_mutex);
 	c->remounting_rw = 1;
+	c->always_chk_crc = 1;
 
 	/* Check for enough free space */
 	if (ubifs_calc_available(c, c->min_idx_lebs) <= 0) {
@@ -1381,13 +1405,15 @@ static int ubifs_remount_rw(struct ubifs_info *c)
 		c->bgt = NULL;
 		ubifs_err("cannot spawn \"%s\", error %d",
 			  c->bgt_name, err);
-		return err;
+		goto out;
 	}
 	wake_up_process(c->bgt);
 
 	c->orph_buf = vmalloc(c->leb_size);
-	if (!c->orph_buf)
-		return -ENOMEM;
+	if (!c->orph_buf) {
+		err = -ENOMEM;
+		goto out;
+	}
 
 	/* Check for enough log space */
 	lnum = c->lhead_lnum + 1;
@@ -1414,6 +1440,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
 	dbg_gen("re-mounted read-write");
 	c->vfs_sb->s_flags &= ~MS_RDONLY;
 	c->remounting_rw = 0;
+	c->always_chk_crc = 0;
 	mutex_unlock(&c->umount_mutex);
 	return 0;
 
@@ -1429,6 +1456,7 @@ out:
 	c->ileb_buf = NULL;
 	ubifs_lpt_free(c, 1);
 	c->remounting_rw = 0;
+	c->always_chk_crc = 0;
 	mutex_unlock(&c->umount_mutex);
 	return err;
 }
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index d279012d8dd5..66dc57100bdf 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -470,6 +470,10 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type,
 	if (node_len != len)
 		return 0;
 
+	if (type == UBIFS_DATA_NODE && !c->always_chk_crc)
+		if (c->no_chk_data_crc)
+			return 0;
+
 	crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
 	node_crc = le32_to_cpu(ch->crc);
 	if (crc != node_crc)
@@ -1687,7 +1691,7 @@ static int validate_data_node(struct ubifs_info *c, void *buf,
 		goto out_err;
 	}
 
-	err = ubifs_check_node(c, buf, zbr->lnum, zbr->offs, 0);
+	err = ubifs_check_node(c, buf, zbr->lnum, zbr->offs, 0, 0);
 	if (err) {
 		ubifs_err("expected node type %d", UBIFS_DATA_NODE);
 		goto out;
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 8513239ea8a0..d6ae3f7b2b05 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -894,10 +894,12 @@ struct ubifs_orphan {
  * struct ubifs_mount_opts - UBIFS-specific mount options information.
  * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast)
  * @bulk_read: enable bulk-reads
+ * @chk_data_crc: check CRCs when reading data nodes
  */
 struct ubifs_mount_opts {
 	unsigned int unmount_mode:2;
 	unsigned int bulk_read:2;
+	unsigned int chk_data_crc:2;
 };
 
 /**
@@ -1001,6 +1003,9 @@ struct ubifs_mount_opts {
  * @bulk_read: enable bulk-reads
  * @bulk_read_buf_size: buffer size for bulk-reads
  *
+ * @no_chk_data_crc: do not check CRCs when reading data nodes (except during
+ *                   recovery)
+ *
  * @dirty_pg_cnt: number of dirty pages (not used)
  * @dirty_zn_cnt: number of dirty znodes
  * @clean_zn_cnt: number of clean znodes
@@ -1138,6 +1143,7 @@ struct ubifs_mount_opts {
  * @rcvrd_mst_node: recovered master node to write when mounting ro to rw
  * @size_tree: inode size information for recovery
  * @remounting_rw: set while remounting from ro to rw (sb flags have MS_RDONLY)
+ * @always_chk_crc: always check CRCs (while mounting and remounting rw)
  * @mount_opts: UBIFS-specific mount options
  *
  * @dbg_buf: a buffer of LEB size used for debugging purposes
@@ -1244,6 +1250,8 @@ struct ubifs_info {
 	int bulk_read;
 	int bulk_read_buf_size;
 
+	int no_chk_data_crc;
+
 	atomic_long_t dirty_pg_cnt;
 	atomic_long_t dirty_zn_cnt;
 	atomic_long_t clean_zn_cnt;
@@ -1374,6 +1382,7 @@ struct ubifs_info {
 	struct ubifs_mst_node *rcvrd_mst_node;
 	struct rb_root size_tree;
 	int remounting_rw;
+	int always_chk_crc;
 	struct ubifs_mount_opts mount_opts;
 
 #ifdef CONFIG_UBIFS_FS_DEBUG
@@ -1416,7 +1425,7 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
 int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum,
 		     int offs, int dtype);
 int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
-		     int offs, int quiet);
+		     int offs, int quiet, int chk_crc);
 void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad);
 void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last);
 int ubifs_io_init(struct ubifs_info *c);
-- 
GitLab


From 2242c689ecc390fb4719f595751351d1ecc5c409 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <ext-adrian.hunter@nokia.com>
Date: Fri, 5 Sep 2008 11:56:05 +0300
Subject: [PATCH 178/895] UBIFS: improve znode splitting rules

When inserting into a full znode it is split into two
znodes.  Because data node keys are usually consecutive,
it is better to try to keep them together.  This patch
does a better job of that.

Signed-off-by: Adrian Hunter <ext-adrian.hunter@nokia.com>
---
 fs/ubifs/tnc.c | 54 ++++++++++++++++++++++++++++++--------------------
 1 file changed, 33 insertions(+), 21 deletions(-)

diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 66dc57100bdf..e0878a431b9c 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -1962,7 +1962,7 @@ static int tnc_insert(struct ubifs_info *c, struct ubifs_znode *znode,
 {
 	struct ubifs_znode *zn, *zi, *zp;
 	int i, keep, move, appending = 0;
-	union ubifs_key *key = &zbr->key;
+	union ubifs_key *key = &zbr->key, *key1;
 
 	ubifs_assert(n >= 0 && n <= c->fanout);
 
@@ -2003,20 +2003,33 @@ again:
 	zn->level = znode->level;
 
 	/* Decide where to split */
-	if (znode->level == 0 && n == c->fanout &&
-	    key_type(c, key) == UBIFS_DATA_KEY) {
-		union ubifs_key *key1;
-
-		/*
-		 * If this is an inode which is being appended - do not split
-		 * it because no other zbranches can be inserted between
-		 * zbranches of consecutive data nodes anyway.
-		 */
-		key1 = &znode->zbranch[n - 1].key;
-		if (key_inum(c, key1) == key_inum(c, key) &&
-		    key_type(c, key1) == UBIFS_DATA_KEY &&
-		    key_block(c, key1) == key_block(c, key) - 1)
-			appending = 1;
+	if (znode->level == 0 && key_type(c, key) == UBIFS_DATA_KEY) {
+		/* Try not to split consecutive data keys */
+		if (n == c->fanout) {
+			key1 = &znode->zbranch[n - 1].key;
+			if (key_inum(c, key1) == key_inum(c, key) &&
+			    key_type(c, key1) == UBIFS_DATA_KEY)
+				appending = 1;
+		} else
+			goto check_split;
+	} else if (appending && n != c->fanout) {
+		/* Try not to split consecutive data keys */
+		appending = 0;
+check_split:
+		if (n >= (c->fanout + 1) / 2) {
+			key1 = &znode->zbranch[0].key;
+			if (key_inum(c, key1) == key_inum(c, key) &&
+			    key_type(c, key1) == UBIFS_DATA_KEY) {
+				key1 = &znode->zbranch[n].key;
+				if (key_inum(c, key1) != key_inum(c, key) ||
+				    key_type(c, key1) != UBIFS_DATA_KEY) {
+					keep = n;
+					move = c->fanout - keep;
+					zi = znode;
+					goto do_split;
+				}
+			}
+		}
 	}
 
 	if (appending) {
@@ -2046,6 +2059,8 @@ again:
 			zbr->znode->parent = zn;
 	}
 
+do_split:
+
 	__set_bit(DIRTY_ZNODE, &zn->flags);
 	atomic_long_inc(&c->dirty_zn_cnt);
 
@@ -2072,14 +2087,11 @@ again:
 
 	/* Insert new znode (produced by spitting) into the parent */
 	if (zp) {
-		i = n;
+		if (n == 0 && zi == znode && znode->iip == 0)
+			correct_parent_keys(c, znode);
+
 		/* Locate insertion point */
 		n = znode->iip + 1;
-		if (appending && n != c->fanout)
-			appending = 0;
-
-		if (i == 0 && zi == znode && znode->iip == 0)
-			correct_parent_keys(c, znode);
 
 		/* Tail recursion */
 		zbr->key = zn->zbranch[0].key;
-- 
GitLab


From ccb3eba72453a3b5aa37dda02e3a690449e3d229 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Mon, 8 Sep 2008 16:07:01 +0300
Subject: [PATCH 179/895] UBIFS: check data CRC when in error state

When UBIFS switches to R/O mode because of an error,
it is reasonable to enable data CRC checking.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 fs/ubifs/io.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 40e2790b62ce..01682713af69 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -62,6 +62,7 @@ void ubifs_ro_mode(struct ubifs_info *c, int err)
 {
 	if (!c->ro_media) {
 		c->ro_media = 1;
+		c->no_chk_data_crc = 0;
 		ubifs_warn("switched to read-only mode, error %d", err);
 		dbg_dump_stack();
 	}
-- 
GitLab


From 625bf371c1522764fc1cf2981b041c5f9a19e894 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Mon, 8 Sep 2008 16:13:38 +0300
Subject: [PATCH 180/895] UBIFS: use bit-fields when possible

The "bulk_read" and "no_chk_data_crc" have only 2 values -
0 and 1. We already have bit-fields in corresponding data
structers, so make "bulk_read" and "no_chk_data_crc"
bit-fields as well.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 fs/ubifs/ubifs.h | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index d6ae3f7b2b05..542cbafe76e1 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -331,6 +331,7 @@ struct ubifs_gced_idx_leb {
  *               this inode
  * @dirty: non-zero if the inode is dirty
  * @xattr: non-zero if this is an extended attribute inode
+ * @bulk_read: non-zero if bulk-read should be used
  * @ui_mutex: serializes inode write-back with the rest of VFS operations,
  *            serializes "clean <-> dirty" state changes, serializes bulk-read,
  *            protects @dirty, @ui_size, and @xattr_size
@@ -343,7 +344,6 @@ struct ubifs_gced_idx_leb {
  * @compr_type: default compression type used for this inode
  * @last_page_read: page number of last page read (for bulk read)
  * @read_in_a_row: number of consecutive pages read in a row (for bulk read)
- * @bulk_read: indicates whether bulk-read should be used
  * @data_len: length of the data attached to the inode
  * @data: inode's data
  *
@@ -385,6 +385,7 @@ struct ubifs_inode {
 	unsigned int xattr_names;
 	unsigned int dirty:1;
 	unsigned int xattr:1;
+	unsigned int bulk_read:1;
 	struct mutex ui_mutex;
 	spinlock_t ui_lock;
 	loff_t synced_i_size;
@@ -393,7 +394,6 @@ struct ubifs_inode {
 	int compr_type;
 	pgoff_t last_page_read;
 	pgoff_t read_in_a_row;
-	int bulk_read;
 	int data_len;
 	void *data;
 };
@@ -940,6 +940,7 @@ struct ubifs_mount_opts {
  * @cmt_state: commit state
  * @cs_lock: commit state lock
  * @cmt_wq: wait queue to sleep on if the log is full and a commit is running
+ *
  * @fast_unmount: do not run journal commit before un-mounting
  * @big_lpt: flag that LPT is too big to write whole during commit
  * @check_lpt_free: flag that indicates LPT GC may be needed
@@ -947,6 +948,9 @@ struct ubifs_mount_opts {
  *           optimization)
  * @nospace_rp: the same as @nospace, but additionally means that even reserved
  *              pool is full
+ * @no_chk_data_crc: do not check CRCs when reading data nodes (except during
+ *                   recovery)
+ * @bulk_read: enable bulk-reads
  *
  * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and
  *             @calc_idx_sz
@@ -970,6 +974,7 @@ struct ubifs_mount_opts {
  * @mst_node: master node
  * @mst_offs: offset of valid master node
  * @mst_mutex: protects the master node area, @mst_node, and @mst_offs
+ * @bulk_read_buf_size: buffer size for bulk-reads
  *
  * @log_lebs: number of logical eraseblocks in the log
  * @log_bytes: log size in bytes
@@ -1000,12 +1005,6 @@ struct ubifs_mount_opts {
  * @old_leb_cnt: count of logical eraseblocks before re-size
  * @ro_media: the underlying UBI volume is read-only
  *
- * @bulk_read: enable bulk-reads
- * @bulk_read_buf_size: buffer size for bulk-reads
- *
- * @no_chk_data_crc: do not check CRCs when reading data nodes (except during
- *                   recovery)
- *
  * @dirty_pg_cnt: number of dirty pages (not used)
  * @dirty_zn_cnt: number of dirty znodes
  * @clean_zn_cnt: number of clean znodes
@@ -1188,11 +1187,14 @@ struct ubifs_info {
 	int cmt_state;
 	spinlock_t cs_lock;
 	wait_queue_head_t cmt_wq;
+
 	unsigned int fast_unmount:1;
 	unsigned int big_lpt:1;
 	unsigned int check_lpt_free:1;
 	unsigned int nospace:1;
 	unsigned int nospace_rp:1;
+	unsigned int no_chk_data_crc:1;
+	unsigned int bulk_read:1;
 
 	struct mutex tnc_mutex;
 	struct ubifs_zbranch zroot;
@@ -1217,6 +1219,7 @@ struct ubifs_info {
 	struct ubifs_mst_node *mst_node;
 	int mst_offs;
 	struct mutex mst_mutex;
+	int bulk_read_buf_size;
 
 	int log_lebs;
 	long long log_bytes;
@@ -1247,11 +1250,6 @@ struct ubifs_info {
 	int old_leb_cnt;
 	int ro_media;
 
-	int bulk_read;
-	int bulk_read_buf_size;
-
-	int no_chk_data_crc;
-
 	atomic_long_t dirty_pg_cnt;
 	atomic_long_t dirty_zn_cnt;
 	atomic_long_t clean_zn_cnt;
-- 
GitLab


From 2094c334fdebbcceddf21f97cb16b144707af56e Mon Sep 17 00:00:00 2001
From: Adrian Hunter <ext-adrian.hunter@nokia.com>
Date: Fri, 5 Sep 2008 15:20:04 +0300
Subject: [PATCH 181/895] UBIFS: correct key comparison

The comparison was working, but more by accident than design.

Signed-off-by: Adrian Hunter <ext-adrian.hunter@nokia.com>
---
 fs/ubifs/tnc_misc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c
index a25c1cc1f8d9..b48db999903e 100644
--- a/fs/ubifs/tnc_misc.c
+++ b/fs/ubifs/tnc_misc.c
@@ -480,8 +480,8 @@ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr,
 	}
 
 	/* Make sure the key of the read node is correct */
-	key_read(c, key, &key1);
-	if (memcmp(node + UBIFS_KEY_OFFSET, &key1, c->key_len)) {
+	key_read(c, node + UBIFS_KEY_OFFSET, &key1);
+	if (!keys_eq(c, key, &key1)) {
 		ubifs_err("bad key in node at LEB %d:%d",
 			  zbr->lnum, zbr->offs);
 		dbg_tnc("looked for key %s found node's key %s",
-- 
GitLab


From ed382d5898ccfc3d7ba775be2f1596f6a1547935 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <ext-adrian.hunter@nokia.com>
Date: Fri, 5 Sep 2008 16:17:42 +0300
Subject: [PATCH 182/895] UBIFS: ensure data read beyond i_size is zeroed out
 correctly

Signed-off-by: Adrian Hunter <ext-adrian.hunter@nokia.com>
---
 fs/ubifs/file.c        | 10 ++++++++--
 fs/ubifs/ubifs-media.h |  1 -
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index cdcfe95cbfb4..2f20a49ba34e 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -147,6 +147,12 @@ static int do_readpage(struct page *page)
 				err = ret;
 				if (err != -ENOENT)
 					break;
+			} else if (block + 1 == beyond) {
+				int dlen = le32_to_cpu(dn->size);
+				int ilen = i_size & (UBIFS_BLOCK_SIZE - 1);
+
+				if (ilen && ilen < dlen)
+					memset(addr + ilen, 0, dlen - ilen);
 			}
 		}
 		if (++i >= UBIFS_BLOCKS_PER_PAGE)
@@ -601,7 +607,7 @@ static int populate_page(struct ubifs_info *c, struct page *page,
 
 	addr = zaddr = kmap(page);
 
-	end_index = (i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
 	if (!i_size || page->index > end_index) {
 		memset(addr, 0, PAGE_CACHE_SIZE);
 		goto out_hole;
@@ -649,7 +655,7 @@ static int populate_page(struct ubifs_info *c, struct page *page,
 	if (end_index == page->index) {
 		int len = i_size & (PAGE_CACHE_SIZE - 1);
 
-		if (len < read)
+		if (len && len < read)
 			memset(zaddr + len, 0, read - len);
 	}
 
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h
index a9ecbd9af20d..0b378042a3a2 100644
--- a/fs/ubifs/ubifs-media.h
+++ b/fs/ubifs/ubifs-media.h
@@ -75,7 +75,6 @@
  */
 #define UBIFS_BLOCK_SIZE  4096
 #define UBIFS_BLOCK_SHIFT 12
-#define UBIFS_BLOCK_MASK  0x00000FFF
 
 /* UBIFS padding byte pattern (must not be first or last byte of node magic) */
 #define UBIFS_PADDING_BYTE 0xCE
-- 
GitLab


From ba60ecabf067c8ecbde47af4d99b74ee57234d8e Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Mon, 8 Sep 2008 16:38:01 +0300
Subject: [PATCH 183/895] UBIFS: fix races in bit-fields

We cannot store bit-fields together if the processes which
change them may race, unless we serialize them.

Thus, move the nospc and nospc_rp bit-fields eway from
the mount option/constant bit-fields, to avoid races.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 fs/ubifs/ubifs.h | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 542cbafe76e1..c3ac5a8221ff 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -334,7 +334,7 @@ struct ubifs_gced_idx_leb {
  * @bulk_read: non-zero if bulk-read should be used
  * @ui_mutex: serializes inode write-back with the rest of VFS operations,
  *            serializes "clean <-> dirty" state changes, serializes bulk-read,
- *            protects @dirty, @ui_size, and @xattr_size
+ *            protects @dirty, @bulk_read, @ui_size, and @xattr_size
  * @ui_lock: protects @synced_i_size
  * @synced_i_size: synchronized size of inode, i.e. the value of inode size
  *                 currently stored on the flash; used only for regular file
@@ -944,10 +944,6 @@ struct ubifs_mount_opts {
  * @fast_unmount: do not run journal commit before un-mounting
  * @big_lpt: flag that LPT is too big to write whole during commit
  * @check_lpt_free: flag that indicates LPT GC may be needed
- * @nospace: non-zero if the file-system does not have flash space (used as
- *           optimization)
- * @nospace_rp: the same as @nospace, but additionally means that even reserved
- *              pool is full
  * @no_chk_data_crc: do not check CRCs when reading data nodes (except during
  *                   recovery)
  * @bulk_read: enable bulk-reads
@@ -1017,12 +1013,17 @@ struct ubifs_mount_opts {
  *                        but which still have to be taken into account because
  *                        the index has not been committed so far
  * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth,
- *              @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, and @lst;
+ *              @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, @lst,
+ *              @nospace, and @nospace_rp;
  * @min_idx_lebs: minimum number of LEBs required for the index
  * @old_idx_sz: size of index on flash
  * @calc_idx_sz: temporary variable which is used to calculate new index size
  *               (contains accurate new index size at end of TNC commit start)
  * @lst: lprops statistics
+ * @nospace: non-zero if the file-system does not have flash space (used as
+ *           optimization)
+ * @nospace_rp: the same as @nospace, but additionally means that even reserved
+ *              pool is full
  *
  * @page_budget: budget for a page
  * @inode_budget: budget for an inode
@@ -1191,8 +1192,6 @@ struct ubifs_info {
 	unsigned int fast_unmount:1;
 	unsigned int big_lpt:1;
 	unsigned int check_lpt_free:1;
-	unsigned int nospace:1;
-	unsigned int nospace_rp:1;
 	unsigned int no_chk_data_crc:1;
 	unsigned int bulk_read:1;
 
@@ -1263,6 +1262,8 @@ struct ubifs_info {
 	unsigned long long old_idx_sz;
 	unsigned long long calc_idx_sz;
 	struct ubifs_lp_stats lst;
+	unsigned int nospace:1;
+	unsigned int nospace_rp:1;
 
 	int page_budget;
 	int inode_budget;
-- 
GitLab


From be61678b1d97c9b47bb839beb3c3f48da36af072 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Mon, 8 Sep 2008 18:08:39 +0300
Subject: [PATCH 184/895] UBIFS: fix commentary

Znode may refer both data nodes and indexing nodes

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 fs/ubifs/ubifs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index c3ac5a8221ff..49b06c9f675a 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -707,8 +707,8 @@ struct ubifs_jhead {
  * struct ubifs_zbranch - key/coordinate/length branch stored in znodes.
  * @key: key
  * @znode: znode address in memory
- * @lnum: LEB number of the indexing node
- * @offs: offset of the indexing node within @lnum
+ * @lnum: LEB number of the target node (indexing node or data node)
+ * @offs: target node offset within @lnum
  * @len: target node length
  */
 struct ubifs_zbranch {
-- 
GitLab


From b5e426e9a4a8d4ccc65a6849420d47f87c080d5d Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Tue, 9 Sep 2008 11:20:35 +0300
Subject: [PATCH 185/895] UBIFS: update dbg_dump_inode

'dbg_dump_inode()' is quite outdated and does not print all
the fileds.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 fs/ubifs/debug.c | 42 +++++++++++++++++++++++++-----------------
 1 file changed, 25 insertions(+), 17 deletions(-)

diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index d7f7645779f2..32071ec87cd7 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -222,30 +222,38 @@ void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode)
 {
 	const struct ubifs_inode *ui = ubifs_inode(inode);
 
-	printk(KERN_DEBUG "inode      %lu\n", inode->i_ino);
-	printk(KERN_DEBUG "size       %llu\n",
+	printk(KERN_DEBUG "Dump in-memory inode:");
+	printk(KERN_DEBUG "\tinode          %lu\n", inode->i_ino);
+	printk(KERN_DEBUG "\tsize           %llu\n",
 	       (unsigned long long)i_size_read(inode));
-	printk(KERN_DEBUG "nlink      %u\n", inode->i_nlink);
-	printk(KERN_DEBUG "uid        %u\n", (unsigned int)inode->i_uid);
-	printk(KERN_DEBUG "gid        %u\n", (unsigned int)inode->i_gid);
-	printk(KERN_DEBUG "atime      %u.%u\n",
+	printk(KERN_DEBUG "\tnlink          %u\n", inode->i_nlink);
+	printk(KERN_DEBUG "\tuid            %u\n", (unsigned int)inode->i_uid);
+	printk(KERN_DEBUG "\tgid            %u\n", (unsigned int)inode->i_gid);
+	printk(KERN_DEBUG "\tatime          %u.%u\n",
 	       (unsigned int)inode->i_atime.tv_sec,
 	       (unsigned int)inode->i_atime.tv_nsec);
-	printk(KERN_DEBUG "mtime      %u.%u\n",
+	printk(KERN_DEBUG "\tmtime          %u.%u\n",
 	       (unsigned int)inode->i_mtime.tv_sec,
 	       (unsigned int)inode->i_mtime.tv_nsec);
-	printk(KERN_DEBUG "ctime       %u.%u\n",
+	printk(KERN_DEBUG "\tctime          %u.%u\n",
 	       (unsigned int)inode->i_ctime.tv_sec,
 	       (unsigned int)inode->i_ctime.tv_nsec);
-	printk(KERN_DEBUG "creat_sqnum %llu\n", ui->creat_sqnum);
-	printk(KERN_DEBUG "xattr_size  %u\n", ui->xattr_size);
-	printk(KERN_DEBUG "xattr_cnt   %u\n", ui->xattr_cnt);
-	printk(KERN_DEBUG "xattr_names %u\n", ui->xattr_names);
-	printk(KERN_DEBUG "dirty       %u\n", ui->dirty);
-	printk(KERN_DEBUG "xattr       %u\n", ui->xattr);
-	printk(KERN_DEBUG "flags       %d\n", ui->flags);
-	printk(KERN_DEBUG "compr_type  %d\n", ui->compr_type);
-	printk(KERN_DEBUG "data_len    %d\n", ui->data_len);
+	printk(KERN_DEBUG "\tcreat_sqnum    %llu\n", ui->creat_sqnum);
+	printk(KERN_DEBUG "\txattr_size     %u\n", ui->xattr_size);
+	printk(KERN_DEBUG "\txattr_cnt      %u\n", ui->xattr_cnt);
+	printk(KERN_DEBUG "\txattr_names    %u\n", ui->xattr_names);
+	printk(KERN_DEBUG "\tdirty          %u\n", ui->dirty);
+	printk(KERN_DEBUG "\txattr          %u\n", ui->xattr);
+	printk(KERN_DEBUG "\tbulk_read      %u\n", ui->xattr);
+	printk(KERN_DEBUG "\tsynced_i_size  %llu\n",
+	       (unsigned long long)ui->synced_i_size);
+	printk(KERN_DEBUG "\tui_size        %llu\n",
+	       (unsigned long long)ui->ui_size);
+	printk(KERN_DEBUG "\tflags          %d\n", ui->flags);
+	printk(KERN_DEBUG "\tcompr_type     %d\n", ui->compr_type);
+	printk(KERN_DEBUG "\tlast_page_read %lu\n", ui->last_page_read);
+	printk(KERN_DEBUG "\tread_in_a_row  %lu\n", ui->read_in_a_row);
+	printk(KERN_DEBUG "\tdata_len       %d\n", ui->data_len);
 }
 
 void dbg_dump_node(const struct ubifs_info *c, const void *node)
-- 
GitLab


From af2eb5637b88f7b8edf295ad3880706c5c30c324 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Tue, 9 Sep 2008 12:23:50 +0300
Subject: [PATCH 186/895] UBIFS: correct comment for commit_on_unmount

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 fs/ubifs/super.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index cf078b5cc88c..dae1c62156c6 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1465,12 +1465,9 @@ out:
  * commit_on_unmount - commit the journal when un-mounting.
  * @c: UBIFS file-system description object
  *
- * This function is called during un-mounting and it commits the journal unless
- * the "fast unmount" mode is enabled. It also avoids committing the journal if
- * it contains too few data.
- *
- * Sometimes recovery requires the journal to be committed at least once, and
- * this function takes care about this.
+ * This function is called during un-mounting and re-mounting, and it commits
+ * the journal unless the "fast unmount" mode is enabled. It also avoids
+ * committing the journal if it contains too few data.
  */
 static void commit_on_unmount(struct ubifs_info *c)
 {
-- 
GitLab


From 403e12ab30ab160e1015bd998f0abc1865c574e0 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Tue, 9 Sep 2008 12:31:37 +0300
Subject: [PATCH 187/895] UBIFS: commit on sync_fs

Commit the journal when the FS is sync'ed. This will make
statfs provide better free space report. And we anyway
advice our users to sync the FS if they want better statfs
report.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 fs/ubifs/super.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index dae1c62156c6..7e1f3efdf632 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -418,6 +418,7 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
 {
 	struct ubifs_info *c = sb->s_fs_info;
 	int i, ret = 0, err;
+	long long bud_bytes;
 
 	if (c->jheads)
 		for (i = 0; i < c->jhead_cnt; i++) {
@@ -425,6 +426,17 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
 			if (err && !ret)
 				ret = err;
 		}
+
+	/* Commit the journal unless it has too few data */
+	spin_lock(&c->buds_lock);
+	bud_bytes = c->bud_bytes;
+	spin_unlock(&c->buds_lock);
+	if (bud_bytes > c->leb_size) {
+		err = ubifs_run_commit(c);
+		if (err)
+			return err;
+	}
+
 	/*
 	 * We ought to call sync for c->ubi but it does not have one. If it had
 	 * it would in turn call mtd->sync, however mtd operations are
-- 
GitLab


From bed79935de9a658678f44b88a097367d3b26429f Mon Sep 17 00:00:00 2001
From: Adrian Hunter <ext-adrian.hunter@nokia.com>
Date: Thu, 11 Sep 2008 14:25:44 +0300
Subject: [PATCH 188/895] UBIFS: allow for sync_fs when read-only

sync_fs can be called even if the file system is mounted
read-only.  Ensure the commit is not run in that case.

Reported-by: Zoltan Sogor <weth@inf.u-szeged.hu>
Signed-off-by: Adrian Hunter <ext-adrian.hunter@nokia.com>
---
 fs/ubifs/super.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 7e1f3efdf632..7fd759dde796 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -420,21 +420,22 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
 	int i, ret = 0, err;
 	long long bud_bytes;
 
-	if (c->jheads)
+	if (c->jheads) {
 		for (i = 0; i < c->jhead_cnt; i++) {
 			err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
 			if (err && !ret)
 				ret = err;
 		}
 
-	/* Commit the journal unless it has too few data */
-	spin_lock(&c->buds_lock);
-	bud_bytes = c->bud_bytes;
-	spin_unlock(&c->buds_lock);
-	if (bud_bytes > c->leb_size) {
-		err = ubifs_run_commit(c);
-		if (err)
-			return err;
+		/* Commit the journal unless it has too little data */
+		spin_lock(&c->buds_lock);
+		bud_bytes = c->bud_bytes;
+		spin_unlock(&c->buds_lock);
+		if (bud_bytes > c->leb_size) {
+			err = ubifs_run_commit(c);
+			if (err)
+				return err;
+		}
 	}
 
 	/*
-- 
GitLab


From 46773be497a05010a2873e9ad96d739fb352c1e4 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <ext-adrian.hunter@nokia.com>
Date: Thu, 11 Sep 2008 12:57:49 +0300
Subject: [PATCH 189/895] UBIFS: improve garbage collection

Make garbage collection try to keep data nodes from the same
inode together and in ascending order.  This improves
performance when reading those nodes especially when bulk-read
is used.

Signed-off-by: Adrian Hunter <ext-adrian.hunter@nokia.com>
---
 fs/ubifs/gc.c | 82 ++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 72 insertions(+), 10 deletions(-)

diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index a6b633a5629f..0bef6501d58a 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -95,6 +95,48 @@ static int switch_gc_head(struct ubifs_info *c)
 	return err;
 }
 
+/**
+ * joinup - bring data nodes for an inode together.
+ * @c: UBIFS file-system description object
+ * @sleb: describes scanned LEB
+ * @inum: inode number
+ * @blk: block number
+ * @data: list to which to add data nodes
+ *
+ * This function looks at the first few nodes in the scanned LEB @sleb and adds
+ * them to @data if they are data nodes from @inum and have a larger block
+ * number than @blk. This function returns %0 on success and a negative error
+ * code on failure.
+ */
+static int joinup(struct ubifs_info *c, struct ubifs_scan_leb *sleb, ino_t inum,
+		  unsigned int blk, struct list_head *data)
+{
+	int err, cnt = 6, lnum = sleb->lnum, offs;
+	struct ubifs_scan_node *snod, *tmp;
+	union ubifs_key *key;
+
+	list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
+		key = &snod->key;
+		if (key_inum(c, key) == inum &&
+		    key_type(c, key) == UBIFS_DATA_KEY &&
+		    key_block(c, key) > blk) {
+			offs = snod->offs;
+			err = ubifs_tnc_has_node(c, key, 0, lnum, offs, 0);
+			if (err < 0)
+				return err;
+			list_del(&snod->list);
+			if (err) {
+				list_add_tail(&snod->list, data);
+				blk = key_block(c, key);
+			} else
+				kfree(snod);
+			cnt = 6;
+		} else if (--cnt == 0)
+			break;
+	}
+	return 0;
+}
+
 /**
  * move_nodes - move nodes.
  * @c: UBIFS file-system description object
@@ -116,16 +158,21 @@ static int switch_gc_head(struct ubifs_info *c)
 static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
 {
 	struct ubifs_scan_node *snod, *tmp;
-	struct list_head large, medium, small;
+	struct list_head data, large, medium, small;
 	struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
 	int avail, err, min = INT_MAX;
+	unsigned int blk = 0;
+	ino_t inum = 0;
 
+	INIT_LIST_HEAD(&data);
 	INIT_LIST_HEAD(&large);
 	INIT_LIST_HEAD(&medium);
 	INIT_LIST_HEAD(&small);
 
-	list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
-		struct list_head *lst;
+	while (!list_empty(&sleb->nodes)) {
+		struct list_head *lst = sleb->nodes.next;
+
+		snod = list_entry(lst, struct ubifs_scan_node, list);
 
 		ubifs_assert(snod->type != UBIFS_IDX_NODE);
 		ubifs_assert(snod->type != UBIFS_REF_NODE);
@@ -136,7 +183,6 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
 		if (err < 0)
 			goto out;
 
-		lst = &snod->list;
 		list_del(lst);
 		if (!err) {
 			/* The node is obsolete, remove it from the list */
@@ -145,15 +191,30 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
 		}
 
 		/*
-		 * Sort the list of nodes so that large nodes go first, and
-		 * small nodes go last.
+		 * Sort the list of nodes so that data nodes go first, large
+		 * nodes go second, and small nodes go last.
 		 */
-		if (snod->len > MEDIUM_NODE_WM)
-			list_add(lst, &large);
+		if (key_type(c, &snod->key) == UBIFS_DATA_KEY) {
+			if (inum != key_inum(c, &snod->key)) {
+				if (inum) {
+					/*
+					 * Try to move data nodes from the same
+					 * inode together.
+					 */
+					err = joinup(c, sleb, inum, blk, &data);
+					if (err)
+						goto out;
+				}
+				inum = key_inum(c, &snod->key);
+				blk = key_block(c, &snod->key);
+			}
+			list_add_tail(lst, &data);
+		} else if (snod->len > MEDIUM_NODE_WM)
+			list_add_tail(lst, &large);
 		else if (snod->len > SMALL_NODE_WM)
-			list_add(lst, &medium);
+			list_add_tail(lst, &medium);
 		else
-			list_add(lst, &small);
+			list_add_tail(lst, &small);
 
 		/* And find the smallest node */
 		if (snod->len < min)
@@ -164,6 +225,7 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
 	 * Join the tree lists so that we'd have one roughly sorted list
 	 * ('large' will be the head of the joined list).
 	 */
+	list_splice(&data, &large);
 	list_splice(&medium, large.prev);
 	list_splice(&small, large.prev);
 
-- 
GitLab


From 5c0013c16bd2ee08ffef1a1365622556a57218f5 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <ext-adrian.hunter@nokia.com>
Date: Fri, 12 Sep 2008 10:34:51 +0300
Subject: [PATCH 190/895] UBIFS: fix bulk-read handling uptodate pages

Bulk-read skips uptodate pages but this was putting its
array index out and causing it to treat subsequent pages
as holes.

Signed-off-by: Adrian Hunter <ext-adrian.hunter@nokia.com>
---
 fs/ubifs/file.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 2f20a49ba34e..51cf511d44d9 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -595,7 +595,7 @@ out:
 static int populate_page(struct ubifs_info *c, struct page *page,
 			 struct bu_info *bu, int *n)
 {
-	int i = 0, nn = *n, offs = bu->zbranch[0].offs, hole = 1, read = 0;
+	int i = 0, nn = *n, offs = bu->zbranch[0].offs, hole = 0, read = 0;
 	struct inode *inode = page->mapping->host;
 	loff_t i_size = i_size_read(inode);
 	unsigned int page_block;
@@ -609,6 +609,7 @@ static int populate_page(struct ubifs_info *c, struct page *page,
 
 	end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
 	if (!i_size || page->index > end_index) {
+		hole = 1;
 		memset(addr, 0, PAGE_CACHE_SIZE);
 		goto out_hole;
 	}
@@ -617,10 +618,10 @@ static int populate_page(struct ubifs_info *c, struct page *page,
 	while (1) {
 		int err, len, out_len, dlen;
 
-		if (nn >= bu->cnt ||
-		    key_block(c, &bu->zbranch[nn].key) != page_block)
+		if (nn >= bu->cnt) {
+			hole = 1;
 			memset(addr, 0, UBIFS_BLOCK_SIZE);
-		else {
+		} else if (key_block(c, &bu->zbranch[nn].key) == page_block) {
 			struct ubifs_data_node *dn;
 
 			dn = bu->buf + (bu->zbranch[nn].offs - offs);
@@ -643,8 +644,13 @@ static int populate_page(struct ubifs_info *c, struct page *page,
 				memset(addr + len, 0, UBIFS_BLOCK_SIZE - len);
 
 			nn += 1;
-			hole = 0;
 			read = (i << UBIFS_BLOCK_SHIFT) + len;
+		} else if (key_block(c, &bu->zbranch[nn].key) < page_block) {
+			nn += 1;
+			continue;
+		} else {
+			hole = 1;
+			memset(addr, 0, UBIFS_BLOCK_SIZE);
 		}
 		if (++i >= UBIFS_BLOCKS_PER_PAGE)
 			break;
-- 
GitLab


From 73944a6de048c2c49422e9063e57198256efd23e Mon Sep 17 00:00:00 2001
From: Adrian Hunter <ext-adrian.hunter@nokia.com>
Date: Fri, 12 Sep 2008 18:13:31 +0300
Subject: [PATCH 191/895] UBIFS: add more debugging messages for LPT

Also add debugging checks for LPT size and separate
out c->check_lpt_free from unrelated bitfields.

Signed-off-by: Adrian Hunter <ext-adrian.hunter@nokia.com>
---
 fs/ubifs/debug.c      |  37 +++++++++
 fs/ubifs/debug.h      |   6 ++
 fs/ubifs/lpt.c        |   3 +-
 fs/ubifs/lpt_commit.c | 185 +++++++++++++++++++++++++++++++++++++++---
 fs/ubifs/ubifs.h      |  10 ++-
 5 files changed, 228 insertions(+), 13 deletions(-)

diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 32071ec87cd7..7186400750e7 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -655,6 +655,43 @@ void dbg_dump_lprops(struct ubifs_info *c)
 	}
 }
 
+void dbg_dump_lpt_info(struct ubifs_info *c)
+{
+	int i;
+
+	spin_lock(&dbg_lock);
+	printk(KERN_DEBUG "\tlpt_sz:        %lld\n", c->lpt_sz);
+	printk(KERN_DEBUG "\tpnode_sz:      %d\n", c->pnode_sz);
+	printk(KERN_DEBUG "\tnnode_sz:      %d\n", c->nnode_sz);
+	printk(KERN_DEBUG "\tltab_sz:       %d\n", c->ltab_sz);
+	printk(KERN_DEBUG "\tlsave_sz:      %d\n", c->lsave_sz);
+	printk(KERN_DEBUG "\tbig_lpt:       %d\n", c->big_lpt);
+	printk(KERN_DEBUG "\tlpt_hght:      %d\n", c->lpt_hght);
+	printk(KERN_DEBUG "\tpnode_cnt:     %d\n", c->pnode_cnt);
+	printk(KERN_DEBUG "\tnnode_cnt:     %d\n", c->nnode_cnt);
+	printk(KERN_DEBUG "\tdirty_pn_cnt:  %d\n", c->dirty_pn_cnt);
+	printk(KERN_DEBUG "\tdirty_nn_cnt:  %d\n", c->dirty_nn_cnt);
+	printk(KERN_DEBUG "\tlsave_cnt:     %d\n", c->lsave_cnt);
+	printk(KERN_DEBUG "\tspace_bits:    %d\n", c->space_bits);
+	printk(KERN_DEBUG "\tlpt_lnum_bits: %d\n", c->lpt_lnum_bits);
+	printk(KERN_DEBUG "\tlpt_offs_bits: %d\n", c->lpt_offs_bits);
+	printk(KERN_DEBUG "\tlpt_spc_bits:  %d\n", c->lpt_spc_bits);
+	printk(KERN_DEBUG "\tpcnt_bits:     %d\n", c->pcnt_bits);
+	printk(KERN_DEBUG "\tlnum_bits:     %d\n", c->lnum_bits);
+	printk(KERN_DEBUG "\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs);
+	printk(KERN_DEBUG "\tLPT head is at %d:%d\n",
+	       c->nhead_lnum, c->nhead_offs);
+	printk(KERN_DEBUG "\tLPT ltab is at %d:%d\n", c->ltab_lnum, c->ltab_offs);
+	if (c->big_lpt)
+		printk(KERN_DEBUG "\tLPT lsave is at %d:%d\n",
+		       c->lsave_lnum, c->lsave_offs);
+	for (i = 0; i < c->lpt_lebs; i++)
+		printk(KERN_DEBUG "\tLPT LEB %d free %d dirty %d tgc %d "
+		       "cmt %d\n", i + c->lpt_first, c->ltab[i].free,
+		       c->ltab[i].dirty, c->ltab[i].tgc, c->ltab[i].cmt);
+	spin_unlock(&dbg_lock);
+}
+
 void dbg_dump_leb(const struct ubifs_info *c, int lnum)
 {
 	struct ubifs_scan_leb *sleb;
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 50315fc57185..33d6b95071e4 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -224,6 +224,7 @@ void dbg_dump_lstats(const struct ubifs_lp_stats *lst);
 void dbg_dump_budg(struct ubifs_info *c);
 void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp);
 void dbg_dump_lprops(struct ubifs_info *c);
+void dbg_dump_lpt_info(struct ubifs_info *c);
 void dbg_dump_leb(const struct ubifs_info *c, int lnum);
 void dbg_dump_znode(const struct ubifs_info *c,
 		    const struct ubifs_znode *znode);
@@ -249,6 +250,8 @@ int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot);
 int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot);
 int dbg_check_cats(struct ubifs_info *c);
 int dbg_check_ltab(struct ubifs_info *c);
+int dbg_chk_lpt_free_spc(struct ubifs_info *c);
+int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len);
 int dbg_check_synced_i_size(struct inode *inode);
 int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir);
 int dbg_check_tnc(struct ubifs_info *c, int extra);
@@ -367,6 +370,7 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum,
 #define dbg_dump_budg(c)                      ({})
 #define dbg_dump_lprop(c, lp)                 ({})
 #define dbg_dump_lprops(c)                    ({})
+#define dbg_dump_lpt_info(c)                  ({})
 #define dbg_dump_leb(c, lnum)                 ({})
 #define dbg_dump_znode(c, znode)              ({})
 #define dbg_dump_heap(c, heap, cat)           ({})
@@ -379,6 +383,8 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum,
 #define dbg_check_old_index(c, zroot)              0
 #define dbg_check_cats(c)                          0
 #define dbg_check_ltab(c)                          0
+#define dbg_chk_lpt_free_spc(c)                    0
+#define dbg_chk_lpt_sz(c, action, len)             0
 #define dbg_check_synced_i_size(inode)             0
 #define dbg_check_dir_size(c, dir)                 0
 #define dbg_check_tnc(c, x)                        0
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index 9ff2463177e5..cd11b23a1875 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -109,7 +109,8 @@ static void do_calc_lpt_geom(struct ubifs_info *c)
 	c->lpt_sz = (long long)c->pnode_cnt * c->pnode_sz;
 	c->lpt_sz += (long long)c->nnode_cnt * c->nnode_sz;
 	c->lpt_sz += c->ltab_sz;
-	c->lpt_sz += c->lsave_sz;
+	if (c->big_lpt)
+		c->lpt_sz += c->lsave_sz;
 
 	/* Add wastage */
 	sz = c->lpt_sz;
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 5f0b83e20af6..8546865a9104 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -177,8 +177,6 @@ static int alloc_lpt_leb(struct ubifs_info *c, int *lnum)
 			return 0;
 		}
 	}
-	dbg_err("last LEB %d", *lnum);
-	dump_stack();
 	return -ENOSPC;
 }
 
@@ -193,6 +191,9 @@ static int layout_cnodes(struct ubifs_info *c)
 	int lnum, offs, len, alen, done_lsave, done_ltab, err;
 	struct ubifs_cnode *cnode;
 
+	err = dbg_chk_lpt_sz(c, 0, 0);
+	if (err)
+		return err;
 	cnode = c->lpt_cnext;
 	if (!cnode)
 		return 0;
@@ -206,6 +207,7 @@ static int layout_cnodes(struct ubifs_info *c)
 		c->lsave_lnum = lnum;
 		c->lsave_offs = offs;
 		offs += c->lsave_sz;
+		dbg_chk_lpt_sz(c, 1, c->lsave_sz);
 	}
 
 	if (offs + c->ltab_sz <= c->leb_size) {
@@ -213,6 +215,7 @@ static int layout_cnodes(struct ubifs_info *c)
 		c->ltab_lnum = lnum;
 		c->ltab_offs = offs;
 		offs += c->ltab_sz;
+		dbg_chk_lpt_sz(c, 1, c->ltab_sz);
 	}
 
 	do {
@@ -226,9 +229,10 @@ static int layout_cnodes(struct ubifs_info *c)
 		while (offs + len > c->leb_size) {
 			alen = ALIGN(offs, c->min_io_size);
 			upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
+			dbg_chk_lpt_sz(c, 2, alen - offs);
 			err = alloc_lpt_leb(c, &lnum);
 			if (err)
-				return err;
+				goto no_space;
 			offs = 0;
 			ubifs_assert(lnum >= c->lpt_first &&
 				     lnum <= c->lpt_last);
@@ -238,6 +242,7 @@ static int layout_cnodes(struct ubifs_info *c)
 				c->lsave_lnum = lnum;
 				c->lsave_offs = offs;
 				offs += c->lsave_sz;
+				dbg_chk_lpt_sz(c, 1, c->lsave_sz);
 				continue;
 			}
 			if (!done_ltab) {
@@ -245,6 +250,7 @@ static int layout_cnodes(struct ubifs_info *c)
 				c->ltab_lnum = lnum;
 				c->ltab_offs = offs;
 				offs += c->ltab_sz;
+				dbg_chk_lpt_sz(c, 1, c->ltab_sz);
 				continue;
 			}
 			break;
@@ -257,6 +263,7 @@ static int layout_cnodes(struct ubifs_info *c)
 			c->lpt_offs = offs;
 		}
 		offs += len;
+		dbg_chk_lpt_sz(c, 1, len);
 		cnode = cnode->cnext;
 	} while (cnode && cnode != c->lpt_cnext);
 
@@ -265,9 +272,10 @@ static int layout_cnodes(struct ubifs_info *c)
 		if (offs + c->lsave_sz > c->leb_size) {
 			alen = ALIGN(offs, c->min_io_size);
 			upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
+			dbg_chk_lpt_sz(c, 2, alen - offs);
 			err = alloc_lpt_leb(c, &lnum);
 			if (err)
-				return err;
+				goto no_space;
 			offs = 0;
 			ubifs_assert(lnum >= c->lpt_first &&
 				     lnum <= c->lpt_last);
@@ -276,6 +284,7 @@ static int layout_cnodes(struct ubifs_info *c)
 		c->lsave_lnum = lnum;
 		c->lsave_offs = offs;
 		offs += c->lsave_sz;
+		dbg_chk_lpt_sz(c, 1, c->lsave_sz);
 	}
 
 	/* Make sure to place LPT's own lprops table */
@@ -283,9 +292,10 @@ static int layout_cnodes(struct ubifs_info *c)
 		if (offs + c->ltab_sz > c->leb_size) {
 			alen = ALIGN(offs, c->min_io_size);
 			upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
+			dbg_chk_lpt_sz(c, 2, alen - offs);
 			err = alloc_lpt_leb(c, &lnum);
 			if (err)
-				return err;
+				goto no_space;
 			offs = 0;
 			ubifs_assert(lnum >= c->lpt_first &&
 				     lnum <= c->lpt_last);
@@ -294,11 +304,23 @@ static int layout_cnodes(struct ubifs_info *c)
 		c->ltab_lnum = lnum;
 		c->ltab_offs = offs;
 		offs += c->ltab_sz;
+		dbg_chk_lpt_sz(c, 1, c->ltab_sz);
 	}
 
 	alen = ALIGN(offs, c->min_io_size);
 	upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
+	dbg_chk_lpt_sz(c, 4, alen - offs);
+	err = dbg_chk_lpt_sz(c, 3, alen);
+	if (err)
+		return err;
 	return 0;
+
+no_space:
+	ubifs_err("LPT out of space");
+	dbg_err("LPT out of space at LEB %d:%d needing %d, done_ltab %d, "
+		"done_lsave %d", lnum, offs, len, done_ltab, done_lsave);
+	dbg_dump_lpt_info(c);
+	return err;
 }
 
 /**
@@ -333,8 +355,6 @@ static int realloc_lpt_leb(struct ubifs_info *c, int *lnum)
 			*lnum = i + c->lpt_first;
 			return 0;
 		}
-	dbg_err("last LEB %d", *lnum);
-	dump_stack();
 	return -ENOSPC;
 }
 
@@ -369,12 +389,14 @@ static int write_cnodes(struct ubifs_info *c)
 		done_lsave = 1;
 		ubifs_pack_lsave(c, buf + offs, c->lsave);
 		offs += c->lsave_sz;
+		dbg_chk_lpt_sz(c, 1, c->lsave_sz);
 	}
 
 	if (offs + c->ltab_sz <= c->leb_size) {
 		done_ltab = 1;
 		ubifs_pack_ltab(c, buf + offs, c->ltab_cmt);
 		offs += c->ltab_sz;
+		dbg_chk_lpt_sz(c, 1, c->ltab_sz);
 	}
 
 	/* Loop for each cnode */
@@ -392,10 +414,12 @@ static int write_cnodes(struct ubifs_info *c)
 						       alen, UBI_SHORTTERM);
 				if (err)
 					return err;
+				dbg_chk_lpt_sz(c, 4, alen - wlen);
 			}
+			dbg_chk_lpt_sz(c, 2, 0);
 			err = realloc_lpt_leb(c, &lnum);
 			if (err)
-				return err;
+				goto no_space;
 			offs = 0;
 			from = 0;
 			ubifs_assert(lnum >= c->lpt_first &&
@@ -408,12 +432,14 @@ static int write_cnodes(struct ubifs_info *c)
 				done_lsave = 1;
 				ubifs_pack_lsave(c, buf + offs, c->lsave);
 				offs += c->lsave_sz;
+				dbg_chk_lpt_sz(c, 1, c->lsave_sz);
 				continue;
 			}
 			if (!done_ltab) {
 				done_ltab = 1;
 				ubifs_pack_ltab(c, buf + offs, c->ltab_cmt);
 				offs += c->ltab_sz;
+				dbg_chk_lpt_sz(c, 1, c->ltab_sz);
 				continue;
 			}
 			break;
@@ -435,6 +461,7 @@ static int write_cnodes(struct ubifs_info *c)
 		clear_bit(COW_ZNODE, &cnode->flags);
 		smp_mb__after_clear_bit();
 		offs += len;
+		dbg_chk_lpt_sz(c, 1, len);
 		cnode = cnode->cnext;
 	} while (cnode && cnode != c->lpt_cnext);
 
@@ -448,9 +475,10 @@ static int write_cnodes(struct ubifs_info *c)
 					      UBI_SHORTTERM);
 			if (err)
 				return err;
+			dbg_chk_lpt_sz(c, 2, alen - wlen);
 			err = realloc_lpt_leb(c, &lnum);
 			if (err)
-				return err;
+				goto no_space;
 			offs = 0;
 			ubifs_assert(lnum >= c->lpt_first &&
 				     lnum <= c->lpt_last);
@@ -461,6 +489,7 @@ static int write_cnodes(struct ubifs_info *c)
 		done_lsave = 1;
 		ubifs_pack_lsave(c, buf + offs, c->lsave);
 		offs += c->lsave_sz;
+		dbg_chk_lpt_sz(c, 1, c->lsave_sz);
 	}
 
 	/* Make sure to place LPT's own lprops table */
@@ -473,9 +502,10 @@ static int write_cnodes(struct ubifs_info *c)
 					      UBI_SHORTTERM);
 			if (err)
 				return err;
+			dbg_chk_lpt_sz(c, 2, alen - wlen);
 			err = realloc_lpt_leb(c, &lnum);
 			if (err)
-				return err;
+				goto no_space;
 			offs = 0;
 			ubifs_assert(lnum >= c->lpt_first &&
 				     lnum <= c->lpt_last);
@@ -486,6 +516,7 @@ static int write_cnodes(struct ubifs_info *c)
 		done_ltab = 1;
 		ubifs_pack_ltab(c, buf + offs, c->ltab_cmt);
 		offs += c->ltab_sz;
+		dbg_chk_lpt_sz(c, 1, c->ltab_sz);
 	}
 
 	/* Write remaining data in buffer */
@@ -495,6 +526,12 @@ static int write_cnodes(struct ubifs_info *c)
 	err = ubifs_leb_write(c, lnum, buf + from, from, alen, UBI_SHORTTERM);
 	if (err)
 		return err;
+
+	dbg_chk_lpt_sz(c, 4, alen - wlen);
+	err = dbg_chk_lpt_sz(c, 3, ALIGN(offs, c->min_io_size));
+	if (err)
+		return err;
+
 	c->nhead_lnum = lnum;
 	c->nhead_offs = ALIGN(offs, c->min_io_size);
 
@@ -503,7 +540,15 @@ static int write_cnodes(struct ubifs_info *c)
 	dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs);
 	if (c->big_lpt)
 		dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs);
+
 	return 0;
+
+no_space:
+	ubifs_err("LPT out of space mismatch");
+	dbg_err("LPT out of space mismatch at LEB %d:%d needing %d, done_ltab "
+	        "%d, done_lsave %d", lnum, offs, len, done_ltab, done_lsave);
+	dbg_dump_lpt_info(c);
+	return err;
 }
 
 /**
@@ -1156,6 +1201,9 @@ int ubifs_lpt_start_commit(struct ubifs_info *c)
 	dbg_lp("");
 
 	mutex_lock(&c->lp_mutex);
+	err = dbg_chk_lpt_free_spc(c);
+	if (err)
+		goto out;
 	err = dbg_check_ltab(c);
 	if (err)
 		goto out;
@@ -1645,4 +1693,121 @@ int dbg_check_ltab(struct ubifs_info *c)
 	return 0;
 }
 
+/**
+ * dbg_chk_lpt_free_spc - check LPT free space is enough to write entire LPT.
+ * @c: the UBIFS file-system description object
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+int dbg_chk_lpt_free_spc(struct ubifs_info *c)
+{
+	long long free = 0;
+	int i;
+
+	for (i = 0; i < c->lpt_lebs; i++) {
+		if (c->ltab[i].tgc || c->ltab[i].cmt)
+			continue;
+		if (i + c->lpt_first == c->nhead_lnum)
+			free += c->leb_size - c->nhead_offs;
+		else if (c->ltab[i].free == c->leb_size)
+			free += c->leb_size;
+	}
+	if (free < c->lpt_sz) {
+		dbg_err("LPT space error: free %lld lpt_sz %lld",
+			free, c->lpt_sz);
+		dbg_dump_lpt_info(c);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/**
+ * dbg_chk_lpt_sz - check LPT does not write more than LPT size.
+ * @c: the UBIFS file-system description object
+ * @action: action
+ * @len: length written
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
+{
+	long long chk_lpt_sz, lpt_sz;
+	int err = 0;
+
+	switch (action) {
+	case 0:
+		c->chk_lpt_sz = 0;
+		c->chk_lpt_sz2 = 0;
+		c->chk_lpt_lebs = 0;
+		c->chk_lpt_wastage = 0;
+		if (c->dirty_pn_cnt > c->pnode_cnt) {
+			dbg_err("dirty pnodes %d exceed max %d",
+				c->dirty_pn_cnt, c->pnode_cnt);
+			err = -EINVAL;
+		}
+		if (c->dirty_nn_cnt > c->nnode_cnt) {
+			dbg_err("dirty nnodes %d exceed max %d",
+				c->dirty_nn_cnt, c->nnode_cnt);
+			err = -EINVAL;
+		}
+		return err;
+	case 1:
+		c->chk_lpt_sz += len;
+		return 0;
+	case 2:
+		c->chk_lpt_sz += len;
+		c->chk_lpt_wastage += len;
+		c->chk_lpt_lebs += 1;
+		return 0;
+	case 3:
+		chk_lpt_sz = c->leb_size;
+		chk_lpt_sz *= c->chk_lpt_lebs;
+		chk_lpt_sz += len - c->nhead_offs;
+		if (c->chk_lpt_sz != chk_lpt_sz) {
+			dbg_err("LPT wrote %lld but space used was %lld",
+				c->chk_lpt_sz, chk_lpt_sz);
+			err = -EINVAL;
+		}
+		if (c->chk_lpt_sz > c->lpt_sz) {
+			dbg_err("LPT wrote %lld but lpt_sz is %lld",
+				c->chk_lpt_sz, c->lpt_sz);
+			err = -EINVAL;
+		}
+		if (c->chk_lpt_sz2 && c->chk_lpt_sz != c->chk_lpt_sz2) {
+			dbg_err("LPT layout size %lld but wrote %lld",
+				c->chk_lpt_sz, c->chk_lpt_sz2);
+			err = -EINVAL;
+		}
+		if (c->chk_lpt_sz2 && c->new_nhead_offs != len) {
+			dbg_err("LPT new nhead offs: expected %d was %d",
+				c->new_nhead_offs, len);
+			err = -EINVAL;
+		}
+		lpt_sz = (long long)c->pnode_cnt * c->pnode_sz;
+		lpt_sz += (long long)c->nnode_cnt * c->nnode_sz;
+		lpt_sz += c->ltab_sz;
+		if (c->big_lpt)
+			lpt_sz += c->lsave_sz;
+		if (c->chk_lpt_sz - c->chk_lpt_wastage > lpt_sz) {
+			dbg_err("LPT chk_lpt_sz %lld + waste %lld exceeds %lld",
+				c->chk_lpt_sz, c->chk_lpt_wastage, lpt_sz);
+			err = -EINVAL;
+		}
+		if (err)
+			dbg_dump_lpt_info(c);
+		c->chk_lpt_sz2 = c->chk_lpt_sz;
+		c->chk_lpt_sz = 0;
+		c->chk_lpt_wastage = 0;
+		c->chk_lpt_lebs = 0;
+		c->new_nhead_offs = len;
+		return err;
+	case 4:
+		c->chk_lpt_sz += len;
+		c->chk_lpt_wastage += len;
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
 #endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 49b06c9f675a..a7bd32fa15b9 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -943,7 +943,6 @@ struct ubifs_mount_opts {
  *
  * @fast_unmount: do not run journal commit before un-mounting
  * @big_lpt: flag that LPT is too big to write whole during commit
- * @check_lpt_free: flag that indicates LPT GC may be needed
  * @no_chk_data_crc: do not check CRCs when reading data nodes (except during
  *                   recovery)
  * @bulk_read: enable bulk-reads
@@ -1102,6 +1101,7 @@ struct ubifs_mount_opts {
  * @lpt_drty_flgs: dirty flags for LPT special nodes e.g. ltab
  * @dirty_nn_cnt: number of dirty nnodes
  * @dirty_pn_cnt: number of dirty pnodes
+ * @check_lpt_free: flag that indicates LPT GC may be needed
  * @lpt_sz: LPT size
  * @lpt_nod_buf: buffer for an on-flash nnode or pnode
  * @lpt_buf: buffer of LEB size used by LPT
@@ -1191,7 +1191,6 @@ struct ubifs_info {
 
 	unsigned int fast_unmount:1;
 	unsigned int big_lpt:1;
-	unsigned int check_lpt_free:1;
 	unsigned int no_chk_data_crc:1;
 	unsigned int bulk_read:1;
 
@@ -1340,6 +1339,7 @@ struct ubifs_info {
 	int lpt_drty_flgs;
 	int dirty_nn_cnt;
 	int dirty_pn_cnt;
+	int check_lpt_free;
 	long long lpt_sz;
 	void *lpt_nod_buf;
 	void *lpt_buf;
@@ -1394,6 +1394,12 @@ struct ubifs_info {
 	unsigned long fail_timeout;
 	unsigned int fail_cnt;
 	unsigned int fail_cnt_max;
+	long long chk_lpt_sz;
+	long long chk_lpt_sz2;
+	long long chk_lpt_wastage;
+	int chk_lpt_lebs;
+	int new_nhead_lnum;
+	int new_nhead_offs;
 #endif
 };
 
-- 
GitLab


From 63c300b68fd93a9fadc5e317d4d001b7a6985486 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <ext-adrian.hunter@nokia.com>
Date: Wed, 17 Sep 2008 12:11:13 +0300
Subject: [PATCH 192/895] UBIFS: correct condition to eliminate unecessary
 assignment

Signed-off-by: Adrian Hunter <ext-adrian.hunter@nokia.com>
---
 fs/ubifs/tnc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index e0878a431b9c..d27fd918b9c9 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -1600,7 +1600,7 @@ out:
 	 * An enormous hole could cause bulk-read to encompass too many
 	 * page cache pages, so limit the number here.
 	 */
-	if (bu->blk_cnt >= UBIFS_MAX_BULK_READ)
+	if (bu->blk_cnt > UBIFS_MAX_BULK_READ)
 		bu->blk_cnt = UBIFS_MAX_BULK_READ;
 	/*
 	 * Ensure that bulk-read covers a whole number of page cache
-- 
GitLab


From be2f6bd62d0d4246a9227dacbe2469e1f0eccf26 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <ext-adrian.hunter@nokia.com>
Date: Fri, 26 Sep 2008 12:52:21 +0300
Subject: [PATCH 193/895] UBIFS: check buffer length when scanning for LPT
 nodes

'is_a_node()' function was reading from a buffer before
checking the buffer length, resulting in an OOPS as
follows:

BUG: unable to handle kernel paging request at f8f74002
IP: [<f8f9783f>] :ubifs:ubifs_unpack_bits+0xca/0x233
*pde = 19e95067 *pte = 00000000
Oops: 0000 [#1] PREEMPT SMP
Modules linked in: ubifs ubi mtdchar bio2mtd mtd brd video output
[last unloaded: mtd]

Pid: 6414, comm: integck Not tainted (2.6.27-rc6ubifs34 #23)
EIP: 0060:[<f8f9783f>] EFLAGS: 00010246 CPU: 0
EIP is at ubifs_unpack_bits+0xca/0x233 [ubifs]
EAX: 00000000 EBX: f6090630 ECX: d9badcfc EDX: 00000000
ESI: 00000004 EDI: f8f74002 EBP: d9badcec ESP: d9badcc0
 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068
Process integck (pid: 6414, ti=d9bac000 task=f727dae0 task.ti=d9bac000)
Stack: 00000006 f7306240 00000002 00000000 d9badcfc d9badd00 0000001c 00000000
       f6090630 f6090630 f8f74000 d9badd10 f8fa1cc9 00000000 f8f74002 00000000
       f8f74002 f60fe128 f6090630 f8f74000 d9badd68 f8fa1e46 00000000 0001e000
Call Trace:
 [<f8fa1cc9>] ? is_a_node+0x30/0x90 [ubifs]
 [<f8fa1e46>] ? dbg_check_ltab+0x11d/0x5bd [ubifs]
 [<f8fa388f>] ? ubifs_lpt_start_commit+0x42/0xed3 [ubifs]
 [<c038e76a>] ? mutex_unlock+0x8/0xa
 [<f8f9625d>] ? ubifs_tnc_start_commit+0x1c8/0xedb [ubifs]
 [<f8f8d90b>] ? do_commit+0x187/0x523 [ubifs]
 [<c038e76a>] ? mutex_unlock+0x8/0xa
 [<f8f7ca17>] ? bud_wbuf_callback+0x22/0x28 [ubifs]
 [<f8f8dd1d>] ? ubifs_run_commit+0x76/0xc0 [ubifs]
 [<f8f8032c>] ? ubifs_sync_fs+0xd2/0xe6 [ubifs]
 [<c01a2e97>] ? vfs_quota_sync+0x0/0x17e
 [<c01a5ba6>] ? quota_sync_sb+0x26/0xbb
 [<c01a2e97>] ? vfs_quota_sync+0x0/0x17e
 [<c01a5c5d>] ? sync_dquots+0x22/0x12c
 [<c0173d1b>] ? __fsync_super+0x19/0x68
 [<c0173d75>] ? fsync_super+0xb/0x19
 [<c0174065>] ? generic_shutdown_super+0x22/0xe7
 [<c01a31fc>] ? vfs_quota_off+0x0/0x5fd
 [<f8f7cf4d>] ? ubifs_kill_sb+0x31/0x35 [ubifs]
 [<c01741f9>] ? deactivate_super+0x5e/0x71
 [<c0187610>] ? mntput_no_expire+0x82/0xe4
 [<c0187905>] ? sys_umount+0x4c/0x2f6
 [<c0187bc8>] ? sys_oldumount+0x19/0x1b
 [<c0103b71>] ? sysenter_do_call+0x12/0x25
 =======================
Code: c1 f8 03 8d 04 07 8b 4d e8 89 01 8b 45 e4 89 10 89 d8 89 f1 d3 e8 85 c0
      74 07 29 d6 83 fe 20 75 2a 89 d8 83 c4 20 5b 5e 5f 5d
EIP: [<f8f9783f>] ubifs_unpack_bits+0xca/0x233 [ubifs] SS:ESP 0068:d9badcc0
---[ end trace 1f02572436518c13 ]---

Signed-off-by: Adrian Hunter <ext-adrian.hunter@nokia.com>
---
 fs/ubifs/lpt_commit.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 8546865a9104..eed5a0025d63 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -1089,6 +1089,8 @@ static int is_a_node(struct ubifs_info *c, uint8_t *buf, int len)
 	int pos = 0, node_type, node_len;
 	uint16_t crc, calc_crc;
 
+	if (len < UBIFS_LPT_CRC_BYTES + (UBIFS_LPT_TYPE_BITS + 7) / 8)
+		return 0;
 	node_type = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_TYPE_BITS);
 	if (node_type == UBIFS_LPT_NOT_A_NODE)
 		return 0;
-- 
GitLab


From 64c9627c2628bc3bd3291710b8ee6f8335883f8b Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 1 Oct 2008 15:12:27 +0900
Subject: [PATCH 194/895] sh: Fix up the __raw_read/writeX() definitions.

These were doing largely bogus things and using the wrong typing for
the address. Bring these in line with the ARM definitions.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/io.h | 77 +++++++++++++++++++---------------------
 arch/sh/lib/io.c         |  8 ++---
 2 files changed, 40 insertions(+), 45 deletions(-)

diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h
index 1857666136f2..d9e794eff830 100644
--- a/arch/sh/include/asm/io.h
+++ b/arch/sh/include/asm/io.h
@@ -101,44 +101,33 @@
 #define outsw		__outsw
 #define outsl		__outsl
 
-#define __raw_readb(a)		__readb((void __iomem *)(a))
-#define __raw_readw(a)		__readw((void __iomem *)(a))
-#define __raw_readl(a)		__readl((void __iomem *)(a))
-#define __raw_writeb(v, a)	__writeb(v, (void __iomem *)(a))
-#define __raw_writew(v, a)	__writew(v, (void __iomem *)(a))
-#define __raw_writel(v, a)	__writel(v, (void __iomem *)(a))
+#define __raw_writeb(v,a)	(__chk_io_ptr(a), *(volatile unsigned char __force  *)(a) = (v))
+#define __raw_writew(v,a)	(__chk_io_ptr(a), *(volatile unsigned short __force *)(a) = (v))
+#define __raw_writel(v,a)	(__chk_io_ptr(a), *(volatile unsigned int __force   *)(a) = (v))
 
-void __raw_writesl(unsigned long addr, const void *data, int longlen);
-void __raw_readsl(unsigned long addr, void *data, int longlen);
+#define __raw_readb(a)		(__chk_io_ptr(a), *(volatile unsigned char __force  *)(a))
+#define __raw_readw(a)		(__chk_io_ptr(a), *(volatile unsigned short __force *)(a))
+#define __raw_readl(a)		(__chk_io_ptr(a), *(volatile unsigned int __force   *)(a))
+
+void __raw_writesl(void __iomem *addr, const void *data, int longlen);
+void __raw_readsl(const void __iomem *addr, void *data, int longlen);
 
 /*
  * The platform header files may define some of these macros to use
  * the inlined versions where appropriate.  These macros may also be
  * redefined by userlevel programs.
  */
-#ifdef __readb
-# define readb(a)	({ unsigned int r_ = __raw_readb(a); mb(); r_; })
-#endif
-#ifdef __raw_readw
-# define readw(a)	({ unsigned int r_ = __raw_readw(a); mb(); r_; })
-#endif
-#ifdef __raw_readl
-# define readl(a)	({ unsigned int r_ = __raw_readl(a); mb(); r_; })
-#endif
+#define readb(a)	({ unsigned int r_ = __readb(a); mb(); r_; })
+#define readw(a)	({ unsigned int r_ = __readw(a); mb(); r_; })
+#define readl(a)	({ unsigned int r_ = __readl(a); mb(); r_; })
 
-#ifdef __raw_writeb
-# define writeb(v,a)	({ __raw_writeb((v),(a)); mb(); })
-#endif
-#ifdef __raw_writew
-# define writew(v,a)	({ __raw_writew((v),(a)); mb(); })
-#endif
-#ifdef __raw_writel
-# define writel(v,a)	({ __raw_writel((v),(a)); mb(); })
-#endif
+#define writeb(v,a)	({ __writeb((v),(a)); mb(); })
+#define writew(v,a)	({ __writew((v),(a)); mb(); })
+#define writel(v,a)	({ __writel((v),(a)); mb(); })
 
 #define __BUILD_MEMORY_STRING(bwlq, type)				\
 									\
-static inline void writes##bwlq(volatile void __iomem *mem,		\
+static inline void __raw_writes##bwlq(volatile void __iomem *mem,	\
 				const void *addr, unsigned int count)	\
 {									\
 	const volatile type *__addr = addr;				\
@@ -149,8 +138,8 @@ static inline void writes##bwlq(volatile void __iomem *mem,		\
 	}								\
 }									\
 									\
-static inline void reads##bwlq(volatile void __iomem *mem, void *addr,	\
-			       unsigned int count)			\
+static inline void __raw_reads##bwlq(volatile void __iomem *mem,	\
+			       void *addr, unsigned int count)		\
 {									\
 	volatile type *__addr = addr;					\
 									\
@@ -162,7 +151,13 @@ static inline void reads##bwlq(volatile void __iomem *mem, void *addr,	\
 
 __BUILD_MEMORY_STRING(b, u8)
 __BUILD_MEMORY_STRING(w, u16)
+
+#define writesb	__raw_writesb
+#define writesw	__raw_writesw
 #define writesl __raw_writesl
+
+#define readsb	__raw_readsb
+#define readsw	__raw_readsw
 #define readsl  __raw_readsl
 
 #define readb_relaxed(a) readb(a)
@@ -170,25 +165,25 @@ __BUILD_MEMORY_STRING(w, u16)
 #define readl_relaxed(a) readl(a)
 
 /* Simple MMIO */
-#define ioread8(a)		readb(a)
-#define ioread16(a)		readw(a)
+#define ioread8(a)		__raw_readb(a)
+#define ioread16(a)		__raw_readw(a)
 #define ioread16be(a)		be16_to_cpu(__raw_readw((a)))
-#define ioread32(a)		readl(a)
+#define ioread32(a)		__raw_readl(a)
 #define ioread32be(a)		be32_to_cpu(__raw_readl((a)))
 
-#define iowrite8(v,a)		writeb((v),(a))
-#define iowrite16(v,a)		writew((v),(a))
+#define iowrite8(v,a)		__raw_writeb((v),(a))
+#define iowrite16(v,a)		__raw_writew((v),(a))
 #define iowrite16be(v,a)	__raw_writew(cpu_to_be16((v)),(a))
-#define iowrite32(v,a)		writel((v),(a))
+#define iowrite32(v,a)		__raw_writel((v),(a))
 #define iowrite32be(v,a)	__raw_writel(cpu_to_be32((v)),(a))
 
-#define ioread8_rep(a, d, c)	readsb((a), (d), (c))
-#define ioread16_rep(a, d, c)	readsw((a), (d), (c))
-#define ioread32_rep(a, d, c)	readsl((a), (d), (c))
+#define ioread8_rep(a, d, c)	__raw_readsb((a), (d), (c))
+#define ioread16_rep(a, d, c)	__raw_readsw((a), (d), (c))
+#define ioread32_rep(a, d, c)	__raw_readsl((a), (d), (c))
 
-#define iowrite8_rep(a, s, c)	writesb((a), (s), (c))
-#define iowrite16_rep(a, s, c)	writesw((a), (s), (c))
-#define iowrite32_rep(a, s, c)	writesl((a), (s), (c))
+#define iowrite8_rep(a, s, c)	__raw_writesb((a), (s), (c))
+#define iowrite16_rep(a, s, c)	__raw_writesw((a), (s), (c))
+#define iowrite32_rep(a, s, c)	__raw_writesl((a), (s), (c))
 
 #define mmiowb()	wmb()	/* synco on SH-4A, otherwise a nop */
 
diff --git a/arch/sh/lib/io.c b/arch/sh/lib/io.c
index 4f54ec43516f..88dfe6e396bc 100644
--- a/arch/sh/lib/io.c
+++ b/arch/sh/lib/io.c
@@ -14,12 +14,12 @@
 #include <linux/module.h>
 #include <linux/io.h>
 
-void __raw_readsl(unsigned long addr, void *datap, int len)
+void __raw_readsl(const void __iomem *addr, void *datap, int len)
 {
 	u32 *data;
 
 	for (data = datap; (len != 0) && (((u32)data & 0x1f) != 0); len--)
-		*data++ = ctrl_inl(addr);
+		*data++ = __raw_readl(addr);
 
 	if (likely(len >= (0x20 >> 2))) {
 		int tmp2, tmp3, tmp4, tmp5, tmp6;
@@ -59,11 +59,11 @@ void __raw_readsl(unsigned long addr, void *datap, int len)
 	}
 
 	for (; len != 0; len--)
-		*data++ = ctrl_inl(addr);
+		*data++ = __raw_readl(addr);
 }
 EXPORT_SYMBOL(__raw_readsl);
 
-void __raw_writesl(unsigned long addr, const void *data, int len)
+void __raw_writesl(void __iomem *addr, const void *data, int len)
 {
 	if (likely(len != 0)) {
 		int tmp1;
-- 
GitLab


From 62429e03644833693e6f94afe537f252e2d3b475 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 1 Oct 2008 15:19:10 +0900
Subject: [PATCH 195/895] sh: Use __raw_xxx() I/O accessors for INTC and IPR.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/cpu/irq/intc.c | 24 ++++++++++++------------
 arch/sh/kernel/cpu/irq/ipr.c  |  4 ++--
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/arch/sh/kernel/cpu/irq/intc.c b/arch/sh/kernel/cpu/irq/intc.c
index 94536d358fe1..138efa4e95db 100644
--- a/arch/sh/kernel/cpu/irq/intc.c
+++ b/arch/sh/kernel/cpu/irq/intc.c
@@ -86,24 +86,24 @@ static inline unsigned int set_field(unsigned int value,
 
 static void write_8(unsigned long addr, unsigned long h, unsigned long data)
 {
-	ctrl_outb(set_field(0, data, h), addr);
+	__raw_writeb(set_field(0, data, h), addr);
 }
 
 static void write_16(unsigned long addr, unsigned long h, unsigned long data)
 {
-	ctrl_outw(set_field(0, data, h), addr);
+	__raw_writew(set_field(0, data, h), addr);
 }
 
 static void write_32(unsigned long addr, unsigned long h, unsigned long data)
 {
-	ctrl_outl(set_field(0, data, h), addr);
+	__raw_writel(set_field(0, data, h), addr);
 }
 
 static void modify_8(unsigned long addr, unsigned long h, unsigned long data)
 {
 	unsigned long flags;
 	local_irq_save(flags);
-	ctrl_outb(set_field(ctrl_inb(addr), data, h), addr);
+	__raw_writeb(set_field(__raw_readb(addr), data, h), addr);
 	local_irq_restore(flags);
 }
 
@@ -111,7 +111,7 @@ static void modify_16(unsigned long addr, unsigned long h, unsigned long data)
 {
 	unsigned long flags;
 	local_irq_save(flags);
-	ctrl_outw(set_field(ctrl_inw(addr), data, h), addr);
+	__raw_writew(set_field(__raw_readw(addr), data, h), addr);
 	local_irq_restore(flags);
 }
 
@@ -119,7 +119,7 @@ static void modify_32(unsigned long addr, unsigned long h, unsigned long data)
 {
 	unsigned long flags;
 	local_irq_save(flags);
-	ctrl_outl(set_field(ctrl_inl(addr), data, h), addr);
+	__raw_writel(set_field(__raw_readl(addr), data, h), addr);
 	local_irq_restore(flags);
 }
 
@@ -246,16 +246,16 @@ static void intc_mask_ack(unsigned int irq)
 		addr = INTC_REG(d, _INTC_ADDR_D(handle), 0);
 		switch (_INTC_FN(handle)) {
 		case REG_FN_MODIFY_BASE + 0:	/* 8bit */
-			ctrl_inb(addr);
-			ctrl_outb(0xff ^ set_field(0, 1, handle), addr);
+			__raw_readb(addr);
+			__raw_writeb(0xff ^ set_field(0, 1, handle), addr);
 			break;
 		case REG_FN_MODIFY_BASE + 1:	/* 16bit */
-			ctrl_inw(addr);
-			ctrl_outw(0xffff ^ set_field(0, 1, handle), addr);
+			__raw_readw(addr);
+			__raw_writew(0xffff ^ set_field(0, 1, handle), addr);
 			break;
 		case REG_FN_MODIFY_BASE + 3:	/* 32bit */
-			ctrl_inl(addr);
-			ctrl_outl(0xffffffff ^ set_field(0, 1, handle), addr);
+			__raw_readl(addr);
+			__raw_writel(0xffffffff ^ set_field(0, 1, handle), addr);
 			break;
 		default:
 			BUG();
diff --git a/arch/sh/kernel/cpu/irq/ipr.c b/arch/sh/kernel/cpu/irq/ipr.c
index 56ea7b269b59..3eb17ee5540e 100644
--- a/arch/sh/kernel/cpu/irq/ipr.c
+++ b/arch/sh/kernel/cpu/irq/ipr.c
@@ -33,7 +33,7 @@ static void disable_ipr_irq(unsigned int irq)
 	struct ipr_data *p = get_irq_chip_data(irq);
 	unsigned long addr = get_ipr_desc(irq)->ipr_offsets[p->ipr_idx];
 	/* Set the priority in IPR to 0 */
-	ctrl_outw(ctrl_inw(addr) & (0xffff ^ (0xf << p->shift)), addr);
+	__raw_writew(__raw_readw(addr) & (0xffff ^ (0xf << p->shift)), addr);
 }
 
 static void enable_ipr_irq(unsigned int irq)
@@ -41,7 +41,7 @@ static void enable_ipr_irq(unsigned int irq)
 	struct ipr_data *p = get_irq_chip_data(irq);
 	unsigned long addr = get_ipr_desc(irq)->ipr_offsets[p->ipr_idx];
 	/* Set priority in IPR back to original value */
-	ctrl_outw(ctrl_inw(addr) | (p->priority << p->shift), addr);
+	__raw_writew(__raw_readw(addr) | (p->priority << p->shift), addr);
 }
 
 /*
-- 
GitLab


From 7ff731aeba1cdac473c818a9884eb94ddad18e7f Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 1 Oct 2008 15:46:58 +0900
Subject: [PATCH 196/895] serial: sh-sci: Handle the general UPF_IOREMAP case.

Presently we don't do much with UPF_IOREMAP other than special case it
for SH-5's onchip_remap() on the early console. Tie this in generically
for platforms that need the remap.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/serial/sh-sci.c | 45 ++++++++++++++++++++++++++++++-----------
 drivers/serial/sh-sci.h | 10 ++++-----
 2 files changed, 37 insertions(+), 18 deletions(-)

diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c
index f5aebc9f27e7..ac658a7a27be 100644
--- a/drivers/serial/sh-sci.c
+++ b/drivers/serial/sh-sci.c
@@ -3,7 +3,7 @@
  *
  * SuperH on-chip serial module support.  (SCI with no FIFO / with FIFO)
  *
- *  Copyright (C) 2002 - 2006  Paul Mundt
+ *  Copyright (C) 2002 - 2008  Paul Mundt
  *  Modified to support SH7720 SCIF. Markus Brunner, Mark Jonas (Jul 2007).
  *
  * based off of the old drivers/char/sh-sci.c by:
@@ -46,6 +46,7 @@
 #include <linux/cpufreq.h>
 #include <linux/clk.h>
 #include <linux/ctype.h>
+#include <linux/err.h>
 
 #ifdef CONFIG_SUPERH
 #include <asm/clock.h>
@@ -1145,12 +1146,16 @@ static void sci_config_port(struct uart_port *port, int flags)
 		break;
 	}
 
-#if defined(CONFIG_CPU_SUBTYPE_SH5_101) || defined(CONFIG_CPU_SUBTYPE_SH5_103)
-	if (port->mapbase == 0)
+	if (port->flags & UPF_IOREMAP && !port->membase) {
+#if defined(CONFIG_SUPERH64)
 		port->mapbase = onchip_remap(SCIF_ADDR_SH5, 1024, "SCIF");
-
-	port->membase = (void __iomem *)port->mapbase;
+		port->membase = (void __iomem *)port->mapbase;
+#else
+		port->membase = ioremap_nocache(port->mapbase, 0x40);
 #endif
+
+		printk(KERN_ERR "sci: can't remap port#%d\n", port->line);
+	}
 }
 
 static int sci_verify_port(struct uart_port *port, struct serial_struct *ser)
@@ -1436,7 +1441,7 @@ static struct uart_driver sci_uart_driver = {
 static int __devinit sci_probe(struct platform_device *dev)
 {
 	struct plat_sci_port *p = dev->dev.platform_data;
-	int i;
+	int i, ret = -EINVAL;
 
 	for (i = 0; p && p->flags != 0; p++, i++) {
 		struct sci_port *sciport = &sci_ports[i];
@@ -1453,12 +1458,22 @@ static int __devinit sci_probe(struct platform_device *dev)
 
 		sciport->port.mapbase	= p->mapbase;
 
-		/*
-		 * For the simple (and majority of) cases where we don't need
-		 * to do any remapping, just cast the cookie directly.
-		 */
-		if (p->mapbase && !p->membase && !(p->flags & UPF_IOREMAP))
-			p->membase = (void __iomem *)p->mapbase;
+		if (p->mapbase && !p->membase) {
+			if (p->flags & UPF_IOREMAP) {
+				p->membase = ioremap_nocache(p->mapbase, 0x40);
+				if (IS_ERR(p->membase)) {
+					ret = PTR_ERR(p->membase);
+					goto err_unreg;
+				}
+			} else {
+				/*
+				 * For the simple (and majority of) cases
+				 * where we don't need to do any remapping,
+				 * just cast the cookie directly.
+				 */
+				p->membase = (void __iomem *)p->mapbase;
+			}
+		}
 
 		sciport->port.membase	= p->membase;
 
@@ -1489,6 +1504,12 @@ static int __devinit sci_probe(struct platform_device *dev)
 #endif
 
 	return 0;
+
+err_unreg:
+	for (i = i - 1; i >= 0; i--)
+		uart_remove_one_port(&sci_uart_driver, &sci_ports[i].port);
+
+	return ret;
 }
 
 static int __devexit sci_remove(struct platform_device *dev)
diff --git a/drivers/serial/sh-sci.h b/drivers/serial/sh-sci.h
index 8a0749e34ca3..2b4c1dff1e85 100644
--- a/drivers/serial/sh-sci.h
+++ b/drivers/serial/sh-sci.h
@@ -320,18 +320,16 @@
 #define SCI_EVENT_WRITE_WAKEUP	0
 
 #define SCI_IN(size, offset)					\
-  unsigned int addr = port->mapbase + (offset);			\
   if ((size) == 8) {						\
-    return ctrl_inb(addr);					\
+    return ioread8(port->membase + (offset));			\
   } else {							\
-    return ctrl_inw(addr);					\
+    return ioread16(port->membase + (offset));			\
   }
 #define SCI_OUT(size, offset, value)				\
-  unsigned int addr = port->mapbase + (offset);			\
   if ((size) == 8) {						\
-    ctrl_outb(value, addr);					\
+    iowrite8(value, port->membase + (offset));			\
   } else if ((size) == 16) {					\
-    ctrl_outw(value, addr);					\
+    iowrite16(value, port->membase + (offset));			\
   }
 
 #define CPU_SCIx_FNS(name, sci_offset, sci_size, scif_offset, scif_size)\
-- 
GitLab


From bbfbd8b151fe35c9a1180a7f5254c5d6b8387cc0 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 1 Oct 2008 16:13:54 +0900
Subject: [PATCH 197/895] sh: Move the shared INTC code out to drivers/sh/

The INTC code will be re-used across different architectures, so move
this out to drivers/sh/ and include/linux/sh_intc.h respectively.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/hw_irq.h                  | 92 +------------------
 arch/sh/kernel/cpu/irq/Makefile               |  2 -
 drivers/sh/Makefile                           |  2 +-
 {arch/sh/kernel/cpu/irq => drivers/sh}/intc.c |  1 +
 include/linux/sh_intc.h                       | 91 ++++++++++++++++++
 5 files changed, 95 insertions(+), 93 deletions(-)
 rename {arch/sh/kernel/cpu/irq => drivers/sh}/intc.c (99%)
 create mode 100644 include/linux/sh_intc.h

diff --git a/arch/sh/include/asm/hw_irq.h b/arch/sh/include/asm/hw_irq.h
index d557b00111bf..603cdde813d1 100644
--- a/arch/sh/include/asm/hw_irq.h
+++ b/arch/sh/include/asm/hw_irq.h
@@ -2,6 +2,7 @@
 #define __ASM_SH_HW_IRQ_H
 
 #include <linux/init.h>
+#include <linux/sh_intc.h>
 #include <asm/atomic.h>
 
 extern atomic_t irq_err_count;
@@ -23,101 +24,12 @@ struct ipr_desc {
 
 void register_ipr_controller(struct ipr_desc *);
 
-typedef unsigned char intc_enum;
-
-struct intc_vect {
-	intc_enum enum_id;
-	unsigned short vect;
-};
-
-#define INTC_VECT(enum_id, vect) { enum_id, vect }
-#define INTC_IRQ(enum_id, irq) INTC_VECT(enum_id, irq2evt(irq))
-
-struct intc_group {
-	intc_enum enum_id;
-	intc_enum enum_ids[32];
-};
-
-#define INTC_GROUP(enum_id, ids...) { enum_id, { ids } }
-
-struct intc_mask_reg {
-	unsigned long set_reg, clr_reg, reg_width;
-	intc_enum enum_ids[32];
-#ifdef CONFIG_SMP
-	unsigned long smp;
-#endif
-};
-
-struct intc_prio_reg {
-	unsigned long set_reg, clr_reg, reg_width, field_width;
-	intc_enum enum_ids[16];
-#ifdef CONFIG_SMP
-	unsigned long smp;
-#endif
-};
-
-struct intc_sense_reg {
-	unsigned long reg, reg_width, field_width;
-	intc_enum enum_ids[16];
-};
-
-#ifdef CONFIG_SMP
-#define INTC_SMP(stride, nr) .smp = (stride) | ((nr) << 8)
-#else
-#define INTC_SMP(stride, nr)
-#endif
-
-struct intc_desc {
-	struct intc_vect *vectors;
-	unsigned int nr_vectors;
-	struct intc_group *groups;
-	unsigned int nr_groups;
-	struct intc_mask_reg *mask_regs;
-	unsigned int nr_mask_regs;
-	struct intc_prio_reg *prio_regs;
-	unsigned int nr_prio_regs;
-	struct intc_sense_reg *sense_regs;
-	unsigned int nr_sense_regs;
-	char *name;
-#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A)
-	struct intc_mask_reg *ack_regs;
-	unsigned int nr_ack_regs;
-#endif
-};
-
-#define _INTC_ARRAY(a) a, sizeof(a)/sizeof(*a)
-#define DECLARE_INTC_DESC(symbol, chipname, vectors, groups,		\
-	mask_regs, prio_regs, sense_regs)				\
-struct intc_desc symbol __initdata = {					\
-	_INTC_ARRAY(vectors), _INTC_ARRAY(groups),			\
-	_INTC_ARRAY(mask_regs), _INTC_ARRAY(prio_regs),			\
-	_INTC_ARRAY(sense_regs),					\
-	chipname,							\
-}
-
-#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A)
-#define DECLARE_INTC_DESC_ACK(symbol, chipname, vectors, groups,	\
-	mask_regs, prio_regs, sense_regs, ack_regs)			\
-struct intc_desc symbol __initdata = {					\
-	_INTC_ARRAY(vectors), _INTC_ARRAY(groups),			\
-	_INTC_ARRAY(mask_regs), _INTC_ARRAY(prio_regs),			\
-	_INTC_ARRAY(sense_regs),					\
-	chipname,							\
-	_INTC_ARRAY(ack_regs),						\
-}
-#endif
-
-void __init register_intc_controller(struct intc_desc *desc);
-int intc_set_priority(unsigned int irq, unsigned int prio);
-
 void __init plat_irq_setup(void);
-#ifdef CONFIG_CPU_SH3
 void __init plat_irq_setup_sh3(void);
-#endif
+void __init plat_irq_setup_pins(int mode);
 
 enum { IRQ_MODE_IRQ, IRQ_MODE_IRQ7654, IRQ_MODE_IRQ3210,
        IRQ_MODE_IRL7654_MASK, IRQ_MODE_IRL3210_MASK,
        IRQ_MODE_IRL7654, IRQ_MODE_IRL3210 };
-void __init plat_irq_setup_pins(int mode);
 
 #endif /* __ASM_SH_HW_IRQ_H */
diff --git a/arch/sh/kernel/cpu/irq/Makefile b/arch/sh/kernel/cpu/irq/Makefile
index 462a8f6dfee2..f0c7025a67d1 100644
--- a/arch/sh/kernel/cpu/irq/Makefile
+++ b/arch/sh/kernel/cpu/irq/Makefile
@@ -1,8 +1,6 @@
 #
 # Makefile for the Linux/SuperH CPU-specifc IRQ handlers.
 #
-obj-y	+= intc.o
-
 obj-$(CONFIG_SUPERH32)			+= imask.o
 obj-$(CONFIG_CPU_SH5)			+= intc-sh5.o
 obj-$(CONFIG_CPU_HAS_IPR_IRQ)		+= ipr.o
diff --git a/drivers/sh/Makefile b/drivers/sh/Makefile
index a96f4a8cfeb8..6a025cefe6dc 100644
--- a/drivers/sh/Makefile
+++ b/drivers/sh/Makefile
@@ -1,6 +1,6 @@
 #
 # Makefile for the SuperH specific drivers.
 #
-
 obj-$(CONFIG_SUPERHYWAY)	+= superhyway/
 obj-$(CONFIG_MAPLE)		+= maple/
+obj-y				+= intc.o
diff --git a/arch/sh/kernel/cpu/irq/intc.c b/drivers/sh/intc.c
similarity index 99%
rename from arch/sh/kernel/cpu/irq/intc.c
rename to drivers/sh/intc.c
index 138efa4e95db..58d24c5a76ce 100644
--- a/arch/sh/kernel/cpu/irq/intc.c
+++ b/drivers/sh/intc.c
@@ -21,6 +21,7 @@
 #include <linux/io.h>
 #include <linux/interrupt.h>
 #include <linux/bootmem.h>
+#include <linux/sh_intc.h>
 
 #define _INTC_MK(fn, mode, addr_e, addr_d, width, shift) \
 	((shift) | ((width) << 5) | ((fn) << 9) | ((mode) << 13) | \
diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h
new file mode 100644
index 000000000000..68e212ff9dde
--- /dev/null
+++ b/include/linux/sh_intc.h
@@ -0,0 +1,91 @@
+#ifndef __SH_INTC_H
+#define __SH_INTC_H
+
+typedef unsigned char intc_enum;
+
+struct intc_vect {
+	intc_enum enum_id;
+	unsigned short vect;
+};
+
+#define INTC_VECT(enum_id, vect) { enum_id, vect }
+#define INTC_IRQ(enum_id, irq) INTC_VECT(enum_id, irq2evt(irq))
+
+struct intc_group {
+	intc_enum enum_id;
+	intc_enum enum_ids[32];
+};
+
+#define INTC_GROUP(enum_id, ids...) { enum_id, { ids } }
+
+struct intc_mask_reg {
+	unsigned long set_reg, clr_reg, reg_width;
+	intc_enum enum_ids[32];
+#ifdef CONFIG_SMP
+	unsigned long smp;
+#endif
+};
+
+struct intc_prio_reg {
+	unsigned long set_reg, clr_reg, reg_width, field_width;
+	intc_enum enum_ids[16];
+#ifdef CONFIG_SMP
+	unsigned long smp;
+#endif
+};
+
+struct intc_sense_reg {
+	unsigned long reg, reg_width, field_width;
+	intc_enum enum_ids[16];
+};
+
+#ifdef CONFIG_SMP
+#define INTC_SMP(stride, nr) .smp = (stride) | ((nr) << 8)
+#else
+#define INTC_SMP(stride, nr)
+#endif
+
+struct intc_desc {
+	struct intc_vect *vectors;
+	unsigned int nr_vectors;
+	struct intc_group *groups;
+	unsigned int nr_groups;
+	struct intc_mask_reg *mask_regs;
+	unsigned int nr_mask_regs;
+	struct intc_prio_reg *prio_regs;
+	unsigned int nr_prio_regs;
+	struct intc_sense_reg *sense_regs;
+	unsigned int nr_sense_regs;
+	char *name;
+#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A)
+	struct intc_mask_reg *ack_regs;
+	unsigned int nr_ack_regs;
+#endif
+};
+
+#define _INTC_ARRAY(a) a, sizeof(a)/sizeof(*a)
+#define DECLARE_INTC_DESC(symbol, chipname, vectors, groups,		\
+	mask_regs, prio_regs, sense_regs)				\
+struct intc_desc symbol __initdata = {					\
+	_INTC_ARRAY(vectors), _INTC_ARRAY(groups),			\
+	_INTC_ARRAY(mask_regs), _INTC_ARRAY(prio_regs),			\
+	_INTC_ARRAY(sense_regs),					\
+	chipname,							\
+}
+
+#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A)
+#define DECLARE_INTC_DESC_ACK(symbol, chipname, vectors, groups,	\
+	mask_regs, prio_regs, sense_regs, ack_regs)			\
+struct intc_desc symbol __initdata = {					\
+	_INTC_ARRAY(vectors), _INTC_ARRAY(groups),			\
+	_INTC_ARRAY(mask_regs), _INTC_ARRAY(prio_regs),			\
+	_INTC_ARRAY(sense_regs),					\
+	chipname,							\
+	_INTC_ARRAY(ack_regs),						\
+}
+#endif
+
+void __init register_intc_controller(struct intc_desc *desc);
+int intc_set_priority(unsigned int irq, unsigned int prio);
+
+#endif /* __SH_INTC_H */
-- 
GitLab


From 225c9a8d1da274bf23efec43ec28b1c9e45e12f8 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 1 Oct 2008 16:24:32 +0900
Subject: [PATCH 198/895] video: sh_mobile_lcdcfb: Support HAVE_CLK=n
 configurations.

This provides a workaround for users of sh_mobile_lcdcfb that don't
define HAVE_CLK and have otherwise sane clock initialization.

At the same time, move the sh_mobile_lcdc.h header to include/video/.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/boards/board-ap325rxa.c                         | 2 +-
 arch/sh/boards/mach-migor/lcd_qvga.c                    | 2 +-
 arch/sh/boards/mach-migor/setup.c                       | 2 +-
 arch/sh/include/asm/migor.h                             | 2 +-
 drivers/video/sh_mobile_lcdcfb.c                        | 8 +++++++-
 {arch/sh/include/asm => include/video}/sh_mobile_lcdc.h | 0
 6 files changed, 11 insertions(+), 5 deletions(-)
 rename {arch/sh/include/asm => include/video}/sh_mobile_lcdc.h (100%)

diff --git a/arch/sh/boards/board-ap325rxa.c b/arch/sh/boards/board-ap325rxa.c
index 00e632fc0688..7ae8dcddfeb4 100644
--- a/arch/sh/boards/board-ap325rxa.c
+++ b/arch/sh/boards/board-ap325rxa.c
@@ -20,7 +20,7 @@
 #include <linux/smc911x.h>
 #include <media/soc_camera_platform.h>
 #include <media/sh_mobile_ceu.h>
-#include <asm/sh_mobile_lcdc.h>
+#include <video/sh_mobile_lcdc.h>
 #include <asm/io.h>
 #include <asm/clock.h>
 
diff --git a/arch/sh/boards/mach-migor/lcd_qvga.c b/arch/sh/boards/mach-migor/lcd_qvga.c
index 6e9609596448..735326c04497 100644
--- a/arch/sh/boards/mach-migor/lcd_qvga.c
+++ b/arch/sh/boards/mach-migor/lcd_qvga.c
@@ -17,7 +17,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <asm/sh_mobile_lcdc.h>
+#include <video/sh_mobile_lcdc.h>
 #include <asm/migor.h>
 
 /* LCD Module is a PH240320T according to board schematics. This module
diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c
index 714dce91cc9b..f27475d2fa0c 100644
--- a/arch/sh/boards/mach-migor/setup.c
+++ b/arch/sh/boards/mach-migor/setup.c
@@ -19,11 +19,11 @@
 #include <linux/clk.h>
 #include <media/soc_camera_platform.h>
 #include <media/sh_mobile_ceu.h>
+#include <video/sh_mobile_lcdc.h>
 #include <asm/clock.h>
 #include <asm/machvec.h>
 #include <asm/io.h>
 #include <asm/sh_keysc.h>
-#include <asm/sh_mobile_lcdc.h>
 #include <asm/migor.h>
 
 /* Address     IRQ  Size  Bus  Description
diff --git a/arch/sh/include/asm/migor.h b/arch/sh/include/asm/migor.h
index c12b632c540b..70596d38fd67 100644
--- a/arch/sh/include/asm/migor.h
+++ b/arch/sh/include/asm/migor.h
@@ -54,7 +54,7 @@
 
 #define BSC_CS6ABCR 0xfec1001c
 
-#include <asm/sh_mobile_lcdc.h>
+#include <video/sh_mobile_lcdc.h>
 
 int migor_lcd_qvga_setup(void *board_data, void *sys_ops_handle,
 			 struct sh_mobile_lcdc_sys_bus_ops *sys_ops);
diff --git a/drivers/video/sh_mobile_lcdcfb.c b/drivers/video/sh_mobile_lcdcfb.c
index 4c32c06579a0..b7468bacce80 100644
--- a/drivers/video/sh_mobile_lcdcfb.c
+++ b/drivers/video/sh_mobile_lcdcfb.c
@@ -16,7 +16,7 @@
 #include <linux/clk.h>
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
-#include <asm/sh_mobile_lcdc.h>
+#include <video/sh_mobile_lcdc.h>
 
 #define PALETTE_NR 16
 
@@ -34,7 +34,9 @@ struct sh_mobile_lcdc_chan {
 
 struct sh_mobile_lcdc_priv {
 	void __iomem *base;
+#ifdef CONFIG_HAVE_CLK
 	struct clk *clk;
+#endif
 	unsigned long lddckr;
 	struct sh_mobile_lcdc_chan ch[2];
 };
@@ -422,6 +424,7 @@ static int sh_mobile_lcdc_setup_clocks(struct device *dev, int clock_source,
 
 	priv->lddckr = icksel << 16;
 
+#ifdef CONFIG_HAVE_CLK
 	if (str) {
 		priv->clk = clk_get(dev, str);
 		if (IS_ERR(priv->clk)) {
@@ -431,6 +434,7 @@ static int sh_mobile_lcdc_setup_clocks(struct device *dev, int clock_source,
 
 		clk_enable(priv->clk);
 	}
+#endif
 
 	return 0;
 }
@@ -688,10 +692,12 @@ static int sh_mobile_lcdc_remove(struct platform_device *pdev)
 		fb_dealloc_cmap(&info->cmap);
 	}
 
+#ifdef CONFIG_HAVE_CLK
 	if (priv->clk) {
 		clk_disable(priv->clk);
 		clk_put(priv->clk);
 	}
+#endif
 
 	if (priv->base)
 		iounmap(priv->base);
diff --git a/arch/sh/include/asm/sh_mobile_lcdc.h b/include/video/sh_mobile_lcdc.h
similarity index 100%
rename from arch/sh/include/asm/sh_mobile_lcdc.h
rename to include/video/sh_mobile_lcdc.h
-- 
GitLab


From 0c5d1eb77a8be917b638344a22afe1398236482b Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Wed, 1 Oct 2008 14:46:18 -0700
Subject: [PATCH 199/895] genirq: record trigger type

Genirq hasn't previously recorded the trigger type used by any given IRQ,
although some irq_chip support has done so.  That data can be useful when
troubleshooting.  This patch records it in the relevant irq_desc.status
bits, and improves consistency between the two driver-visible calls
affected:

 - Make set_irq_type() usage match request_irq() usage:
    * IRQ_TYPE_NONE should be a NOP; succeed, so irq_chip methods
      won't have to handle that case any more (many do it wrong).
    * IRQ_TYPE_PROBE is ignored; any buggy out-of-tree callers
      might need to switch over to the real IRQ probing code.
    * emit the same diagnostics (from shared utility code)

 - Their kerneldoc now reflects usage:
    * request_irq() flags include IRQF_TRIGGER_* to specify
      active edge(s)/level ... docs previously omitted that
    * set_irq_type() is declared in <linux/irq.h> so callers
      should use the (bit-equivalent) IRQ_TYPE_* symbols there

Also: adds a warning about shared IRQs that don't end up using the
requested trigger mode; and fix an unrelated "sparse" warning.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/irq/chip.c      | 15 ++++++++-------
 kernel/irq/internals.h |  3 +++
 kernel/irq/manage.c    | 21 ++++++++++++++++++---
 3 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index d663338cb4a8..5203a599d211 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -111,9 +111,9 @@ int set_irq_chip(unsigned int irq, struct irq_chip *chip)
 EXPORT_SYMBOL(set_irq_chip);
 
 /**
- *	set_irq_type - set the irq type for an irq
+ *	set_irq_type - set the irq trigger type for an irq
  *	@irq:	irq number
- *	@type:	interrupt type - see include/linux/interrupt.h
+ *	@type:	IRQ_TYPE_{LEVEL,EDGE}_* value - see include/linux/irq.h
  */
 int set_irq_type(unsigned int irq, unsigned int type)
 {
@@ -127,11 +127,12 @@ int set_irq_type(unsigned int irq, unsigned int type)
 	}
 
 	desc = irq_desc + irq;
-	if (desc->chip->set_type) {
-		spin_lock_irqsave(&desc->lock, flags);
-		ret = desc->chip->set_type(irq, type);
-		spin_unlock_irqrestore(&desc->lock, flags);
-	}
+	if (type == IRQ_TYPE_NONE)
+		return 0;
+
+	spin_lock_irqsave(&desc->lock, flags);
+	ret = __irq_set_trigger(desc, irq, flags);
+	spin_unlock_irqrestore(&desc->lock, flags);
 	return ret;
 }
 EXPORT_SYMBOL(set_irq_type);
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 08a849a22447..422dd00c8bd3 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -10,6 +10,9 @@ extern void irq_chip_set_defaults(struct irq_chip *chip);
 /* Set default handler: */
 extern void compat_irq_chip_set_default_handler(struct irq_desc *desc);
 
+extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
+		unsigned long flags);
+
 #ifdef CONFIG_PROC_FS
 extern void register_irq_proc(unsigned int irq);
 extern void register_handler_proc(unsigned int irq, struct irqaction *action);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index d62f69ba7453..e59157b591f8 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -216,7 +216,7 @@ void enable_irq(unsigned int irq)
 }
 EXPORT_SYMBOL(enable_irq);
 
-int set_irq_wake_real(unsigned int irq, unsigned int on)
+static int set_irq_wake_real(unsigned int irq, unsigned int on)
 {
 	struct irq_desc *desc = irq_desc + irq;
 	int ret = -ENXIO;
@@ -305,10 +305,11 @@ void compat_irq_chip_set_default_handler(struct irq_desc *desc)
 		desc->handle_irq = NULL;
 }
 
-static int __irq_set_trigger(struct irq_chip *chip, unsigned int irq,
+int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
 		unsigned long flags)
 {
 	int ret;
+	struct irq_chip *chip = desc->chip;
 
 	if (!chip || !chip->set_type) {
 		/*
@@ -326,6 +327,11 @@ static int __irq_set_trigger(struct irq_chip *chip, unsigned int irq,
 		pr_err("setting trigger mode %d for irq %u failed (%pF)\n",
 				(int)(flags & IRQF_TRIGGER_MASK),
 				irq, chip->set_type);
+	else {
+		/* note that IRQF_TRIGGER_MASK == IRQ_TYPE_SENSE_MASK */
+		desc->status &= ~IRQ_TYPE_SENSE_MASK;
+		desc->status |= flags & IRQ_TYPE_SENSE_MASK;
+	}
 
 	return ret;
 }
@@ -404,7 +410,7 @@ int setup_irq(unsigned int irq, struct irqaction *new)
 
 		/* Setup the type (level, edge polarity) if configured: */
 		if (new->flags & IRQF_TRIGGER_MASK) {
-			ret = __irq_set_trigger(desc->chip, irq, new->flags);
+			ret = __irq_set_trigger(desc, irq, new->flags);
 
 			if (ret) {
 				spin_unlock_irqrestore(&desc->lock, flags);
@@ -430,6 +436,14 @@ int setup_irq(unsigned int irq, struct irqaction *new)
 
 		/* Set default affinity mask once everything is setup */
 		irq_select_affinity(irq);
+
+	} else if ((new->flags & IRQF_TRIGGER_MASK)
+			&& (new->flags & IRQF_TRIGGER_MASK)
+				!= (desc->status & IRQ_TYPE_SENSE_MASK)) {
+		/* hope the handler works with the actual trigger mode... */
+		pr_warning("IRQ %d uses trigger mode %d; requested %d\n",
+				irq, (int)(desc->status & IRQ_TYPE_SENSE_MASK),
+				(int)(new->flags & IRQF_TRIGGER_MASK));
 	}
 
 	*p = new;
@@ -586,6 +600,7 @@ EXPORT_SYMBOL(free_irq);
  *	IRQF_SHARED		Interrupt is shared
  *	IRQF_DISABLED	Disable local interrupts while processing
  *	IRQF_SAMPLE_RANDOM	The interrupt can be used for entropy
+ *	IRQF_TRIGGER_*		Specify active edge(s) or level
  *
  */
 int request_irq(unsigned int irq, irq_handler_t handler,
-- 
GitLab


From a2159b52219870553fd67e6456f41cd5225c46c6 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 2 Oct 2008 19:09:13 +0900
Subject: [PATCH 200/895] serial: sh-sci: Dynamic clock management depends on
 HAVE_CLK.

Presently this is conditionalized on sh, and disabled for sh64.
Now that SH-5 ties in to the clock framework, the sh64 exception
can be dropped. Additionally, ARM will want to use the same hooks
once SH-Mobile G3 grows clock framework support, so switch these
paths over to HAVE_CLK now.

Once the H8 and ARM sh-sci users hook up HAVE_CLK, the driver can
be switched over to having an outright dependency on it and the
ifdefs can go away.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/serial/sh-sci.c | 43 ++++++++++++++---------------------------
 drivers/serial/sh-sci.h |  4 +---
 2 files changed, 16 insertions(+), 31 deletions(-)

diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c
index f5aebc9f27e7..5bb571548558 100644
--- a/drivers/serial/sh-sci.c
+++ b/drivers/serial/sh-sci.c
@@ -78,7 +78,7 @@ struct sci_port {
 	struct timer_list	break_timer;
 	int			break_flag;
 
-#ifdef CONFIG_SUPERH
+#ifdef CONFIG_HAVE_CLK
 	/* Port clock */
 	struct clk		*clk;
 #endif
@@ -831,7 +831,7 @@ static irqreturn_t sci_mpxed_interrupt(int irq, void *ptr)
 	return IRQ_HANDLED;
 }
 
-#ifdef CONFIG_CPU_FREQ
+#if defined(CONFIG_CPU_FREQ) && defined(CONFIG_HAVE_CLK)
 /*
  * Here we define a transistion notifier so that we can update all of our
  * ports' baud rate when the peripheral clock changes.
@@ -860,7 +860,7 @@ static int sci_notifier(struct notifier_block *self,
 			 * Clean this up later..
 			 */
 			clk = clk_get(NULL, "module_clk");
-			port->uartclk = clk_get_rate(clk) * 16;
+			port->uartclk = clk_get_rate(clk);
 			clk_put(clk);
 		}
 
@@ -873,7 +873,7 @@ static int sci_notifier(struct notifier_block *self,
 }
 
 static struct notifier_block sci_nb = { &sci_notifier, NULL, 0 };
-#endif /* CONFIG_CPU_FREQ */
+#endif /* CONFIG_CPU_FREQ && CONFIG_HAVE_CLK */
 
 static int sci_request_irq(struct sci_port *port)
 {
@@ -1008,7 +1008,7 @@ static int sci_startup(struct uart_port *port)
 	if (s->enable)
 		s->enable(port);
 
-#if defined(CONFIG_SUPERH) && !defined(CONFIG_SUPERH64)
+#ifdef CONFIG_HAVE_CLK
 	s->clk = clk_get(NULL, "module_clk");
 #endif
 
@@ -1030,7 +1030,7 @@ static void sci_shutdown(struct uart_port *port)
 	if (s->disable)
 		s->disable(port);
 
-#if defined(CONFIG_SUPERH) && !defined(CONFIG_SUPERH64)
+#ifdef CONFIG_HAVE_CLK
 	clk_put(s->clk);
 	s->clk = NULL;
 #endif
@@ -1041,24 +1041,11 @@ static void sci_set_termios(struct uart_port *port, struct ktermios *termios,
 {
 	struct sci_port *s = &sci_ports[port->line];
 	unsigned int status, baud, smr_val;
-	int t;
+	int t = -1;
 
 	baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk/16);
-
-	switch (baud) {
-		case 0:
-			t = -1;
-			break;
-		default:
-		{
-#if defined(CONFIG_SUPERH) && !defined(CONFIG_SUPERH64)
-			t = SCBRR_VALUE(baud, clk_get_rate(s->clk));
-#else
-			t = SCBRR_VALUE(baud);
-#endif
-			break;
-		}
-	}
+	if (likely(baud))
+		t = SCBRR_VALUE(baud, port->uartclk);
 
 	do {
 		status = sci_in(port, SCxSR);
@@ -1207,17 +1194,17 @@ static void __init sci_init_ports(void)
 		sci_ports[i].disable	= h8300_sci_disable;
 #endif
 		sci_ports[i].port.uartclk = CONFIG_CPU_CLOCK;
-#elif defined(CONFIG_SUPERH64)
-		sci_ports[i].port.uartclk = current_cpu_data.module_clock * 16;
-#else
+#elif defined(CONFIG_HAVE_CLK)
 		/*
 		 * XXX: We should use a proper SCI/SCIF clock
 		 */
 		{
 			struct clk *clk = clk_get(NULL, "module_clk");
-			sci_ports[i].port.uartclk = clk_get_rate(clk) * 16;
+			sci_ports[i].port.uartclk = clk_get_rate(clk);
 			clk_put(clk);
 		}
+#else
+#error "Need a valid uartclk"
 #endif
 
 		sci_ports[i].break_timer.data = (unsigned long)&sci_ports[i];
@@ -1285,7 +1272,7 @@ static int __init serial_console_setup(struct console *co, char *options)
 
 	port->type = serial_console_port->type;
 
-#if defined(CONFIG_SUPERH) && !defined(CONFIG_SUPERH64)
+#ifdef CONFIG_HAVE_CLK
 	if (!serial_console_port->clk)
 		serial_console_port->clk = clk_get(NULL, "module_clk");
 #endif
@@ -1479,7 +1466,7 @@ static int __devinit sci_probe(struct platform_device *dev)
 	kgdb_putchar	= kgdb_sci_putchar;
 #endif
 
-#ifdef CONFIG_CPU_FREQ
+#if defined(CONFIG_CPU_FREQ) && defined(CONFIG_HAVE_CLK)
 	cpufreq_register_notifier(&sci_nb, CPUFREQ_TRANSITION_NOTIFIER);
 	dev_info(&dev->dev, "CPU frequency notifier registered\n");
 #endif
diff --git a/drivers/serial/sh-sci.h b/drivers/serial/sh-sci.h
index 8a0749e34ca3..b3d906bfbd87 100644
--- a/drivers/serial/sh-sci.h
+++ b/drivers/serial/sh-sci.h
@@ -793,9 +793,7 @@ static inline int sci_rxd_in(struct uart_port *port)
 #elif defined(CONFIG_CPU_SUBTYPE_SH7723)
 #define SCBRR_VALUE(bps, clk) (((clk*2)+16*bps)/(16*bps)-1)
 #elif defined(__H8300H__) || defined(__H8300S__)
-#define SCBRR_VALUE(bps) (((CONFIG_CPU_CLOCK*1000/32)/bps)-1)
-#elif defined(CONFIG_SUPERH64)
-#define SCBRR_VALUE(bps) ((current_cpu_data.module_clock+16*bps)/(32*bps)-1)
+#define SCBRR_VALUE(bps, clk) (((clk*1000/32)/bps)-1)
 #else /* Generic SH */
 #define SCBRR_VALUE(bps, clk) ((clk+16*bps)/(32*bps)-1)
 #endif
-- 
GitLab


From 14866543ad22014a0b12e10657a917eb6b487248 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sat, 4 Oct 2008 05:25:52 +0900
Subject: [PATCH 201/895] sh: More I/O routine overhauling.

This tidies up a lot of the PIO/MMIO split. No in-tree platforms were
making use of the MMIO overloading through the machvec (nor have any of
them been in some time), so we just kill all of that off. The ISA I/O
routine wrapping remains unaffected, which remains the only special
casing outside of the iomap API that boards need to think about.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/io.h         | 257 ++++++++++++-------------------
 arch/sh/include/asm/io_generic.h |   7 -
 arch/sh/include/asm/machvec.h    |   7 -
 arch/sh/kernel/io.c              |  12 +-
 arch/sh/kernel/io_generic.c      |  61 ++------
 arch/sh/kernel/machvec.c         |   3 -
 6 files changed, 115 insertions(+), 232 deletions(-)

diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h
index d9e794eff830..436c28539577 100644
--- a/arch/sh/include/asm/io.h
+++ b/arch/sh/include/asm/io.h
@@ -1,27 +1,26 @@
 #ifndef __ASM_SH_IO_H
 #define __ASM_SH_IO_H
-
 /*
  * Convention:
- *    read{b,w,l}/write{b,w,l} are for PCI,
+ *    read{b,w,l,q}/write{b,w,l,q} are for PCI,
  *    while in{b,w,l}/out{b,w,l} are for ISA
- * These may (will) be platform specific function.
+ *
  * In addition we have 'pausing' versions: in{b,w,l}_p/out{b,w,l}_p
  * and 'string' versions: ins{b,w,l}/outs{b,w,l}
- * For read{b,w,l} and write{b,w,l} there are also __raw versions, which
- * do not have a memory barrier after them.
  *
- * In addition, we have
- *   ctrl_in{b,w,l}/ctrl_out{b,w,l} for SuperH specific I/O.
- *   which are processor specific.
- */
-
-/*
- * We follow the Alpha convention here:
- *  __inb expands to an inline function call (which calls via the mv)
- *  _inb  is a real function call (note ___raw fns are _ version of __raw)
- *  inb   by default expands to _inb, but the machine specific code may
- *        define it to __inb if it chooses.
+ * While read{b,w,l,q} and write{b,w,l,q} contain memory barriers
+ * automatically, there are also __raw versions, which do not.
+ *
+ * Historically, we have also had ctrl_in{b,w,l,q}/ctrl_out{b,w,l,q} for
+ * SuperH specific I/O (raw I/O to on-chip CPU peripherals). In practice
+ * these have the same semantics as the __raw variants, and as such, all
+ * new code should be using the __raw versions.
+ *
+ * All ISA I/O routines are wrapped through the machine vector. If a
+ * board does not provide overrides, a generic set that are copied in
+ * from the default machine vector are used instead. These are largely
+ * for old compat code for I/O offseting to SuperIOs, all of which are
+ * better handled through the machvec ioport mapping routines these days.
  */
 #include <asm/cache.h>
 #include <asm/system.h>
@@ -31,7 +30,6 @@
 #include <asm-generic/iomap.h>
 
 #ifdef __KERNEL__
-
 /*
  * Depending on which platform we are running on, we need different
  * I/O functions.
@@ -40,90 +38,64 @@
 #include <asm/io_generic.h>
 #include <asm/io_trapped.h>
 
-#define maybebadio(port) \
-  printk(KERN_ERR "bad PC-like io %s:%u for port 0x%lx at 0x%08x\n", \
-	 __FUNCTION__, __LINE__, (port), (u32)__builtin_return_address(0))
+#define inb(p)			sh_mv.mv_inb((p))
+#define inw(p)			sh_mv.mv_inw((p))
+#define inl(p)			sh_mv.mv_inl((p))
+#define outb(x,p)		sh_mv.mv_outb((x),(p))
+#define outw(x,p)		sh_mv.mv_outw((x),(p))
+#define outl(x,p)		sh_mv.mv_outl((x),(p))
+
+#define inb_p(p)		sh_mv.mv_inb_p((p))
+#define inw_p(p)		sh_mv.mv_inw_p((p))
+#define inl_p(p)		sh_mv.mv_inl_p((p))
+#define outb_p(x,p)		sh_mv.mv_outb_p((x),(p))
+#define outw_p(x,p)		sh_mv.mv_outw_p((x),(p))
+#define outl_p(x,p)		sh_mv.mv_outl_p((x),(p))
+
+#define insb(p,b,c)		sh_mv.mv_insb((p), (b), (c))
+#define insw(p,b,c)		sh_mv.mv_insw((p), (b), (c))
+#define insl(p,b,c)		sh_mv.mv_insl((p), (b), (c))
+#define outsb(p,b,c)		sh_mv.mv_outsb((p), (b), (c))
+#define outsw(p,b,c)		sh_mv.mv_outsw((p), (b), (c))
+#define outsl(p,b,c)		sh_mv.mv_outsl((p), (b), (c))
+
+#define __raw_writeb(v,a)	(__chk_io_ptr(a), *(volatile u8  __force *)(a) = (v))
+#define __raw_writew(v,a)	(__chk_io_ptr(a), *(volatile u16 __force *)(a) = (v))
+#define __raw_writel(v,a)	(__chk_io_ptr(a), *(volatile u32 __force *)(a) = (v))
+#define __raw_writeq(v,a)	(__chk_io_ptr(a), *(volatile u64 __force *)(a) = (v))
+
+#define __raw_readb(a)		(__chk_io_ptr(a), *(volatile u8  __force *)(a))
+#define __raw_readw(a)		(__chk_io_ptr(a), *(volatile u16 __force *)(a))
+#define __raw_readl(a)		(__chk_io_ptr(a), *(volatile u32 __force *)(a))
+#define __raw_readq(a)		(__chk_io_ptr(a), *(volatile u64 __force *)(a))
+
+#define readb(a)		({ u8  r_ = __raw_readb(a); mb(); r_; })
+#define readw(a)		({ u16 r_ = __raw_readw(a); mb(); r_; })
+#define readl(a)		({ u32 r_ = __raw_readl(a); mb(); r_; })
+#define readq(a)		({ u64 r_ = __raw_readq(a); mb(); r_; })
+
+#define writeb(v,a)		({ __raw_writeb((v),(a)); mb(); })
+#define writew(v,a)		({ __raw_writew((v),(a)); mb(); })
+#define writel(v,a)		({ __raw_writel((v),(a)); mb(); })
+#define writeq(v,a)		({ __raw_writeq((v),(a)); mb(); })
 
-/*
- * Since boards are able to define their own set of I/O routines through
- * their respective machine vector, we always wrap through the mv.
- *
- * Also, in the event that a board hasn't provided its own definition for
- * a given routine, it will be wrapped to generic code at run-time.
- */
-
-#define __inb(p)	sh_mv.mv_inb((p))
-#define __inw(p)	sh_mv.mv_inw((p))
-#define __inl(p)	sh_mv.mv_inl((p))
-#define __outb(x,p)	sh_mv.mv_outb((x),(p))
-#define __outw(x,p)	sh_mv.mv_outw((x),(p))
-#define __outl(x,p)	sh_mv.mv_outl((x),(p))
-
-#define __inb_p(p)	sh_mv.mv_inb_p((p))
-#define __inw_p(p)	sh_mv.mv_inw_p((p))
-#define __inl_p(p)	sh_mv.mv_inl_p((p))
-#define __outb_p(x,p)	sh_mv.mv_outb_p((x),(p))
-#define __outw_p(x,p)	sh_mv.mv_outw_p((x),(p))
-#define __outl_p(x,p)	sh_mv.mv_outl_p((x),(p))
-
-#define __insb(p,b,c)	sh_mv.mv_insb((p), (b), (c))
-#define __insw(p,b,c)	sh_mv.mv_insw((p), (b), (c))
-#define __insl(p,b,c)	sh_mv.mv_insl((p), (b), (c))
-#define __outsb(p,b,c)	sh_mv.mv_outsb((p), (b), (c))
-#define __outsw(p,b,c)	sh_mv.mv_outsw((p), (b), (c))
-#define __outsl(p,b,c)	sh_mv.mv_outsl((p), (b), (c))
-
-#define __readb(a)	sh_mv.mv_readb((a))
-#define __readw(a)	sh_mv.mv_readw((a))
-#define __readl(a)	sh_mv.mv_readl((a))
-#define __writeb(v,a)	sh_mv.mv_writeb((v),(a))
-#define __writew(v,a)	sh_mv.mv_writew((v),(a))
-#define __writel(v,a)	sh_mv.mv_writel((v),(a))
-
-#define inb		__inb
-#define inw		__inw
-#define inl		__inl
-#define outb		__outb
-#define outw		__outw
-#define outl		__outl
-
-#define inb_p		__inb_p
-#define inw_p		__inw_p
-#define inl_p		__inl_p
-#define outb_p		__outb_p
-#define outw_p		__outw_p
-#define outl_p		__outl_p
-
-#define insb		__insb
-#define insw		__insw
-#define insl		__insl
-#define outsb		__outsb
-#define outsw		__outsw
-#define outsl		__outsl
-
-#define __raw_writeb(v,a)	(__chk_io_ptr(a), *(volatile unsigned char __force  *)(a) = (v))
-#define __raw_writew(v,a)	(__chk_io_ptr(a), *(volatile unsigned short __force *)(a) = (v))
-#define __raw_writel(v,a)	(__chk_io_ptr(a), *(volatile unsigned int __force   *)(a) = (v))
-
-#define __raw_readb(a)		(__chk_io_ptr(a), *(volatile unsigned char __force  *)(a))
-#define __raw_readw(a)		(__chk_io_ptr(a), *(volatile unsigned short __force *)(a))
-#define __raw_readl(a)		(__chk_io_ptr(a), *(volatile unsigned int __force   *)(a))
-
-void __raw_writesl(void __iomem *addr, const void *data, int longlen);
-void __raw_readsl(const void __iomem *addr, void *data, int longlen);
+/* SuperH on-chip I/O functions */
+#define ctrl_inb		__raw_readb
+#define ctrl_inw		__raw_readw
+#define ctrl_inl		__raw_readl
+#define ctrl_inq		__raw_readq
 
-/*
- * The platform header files may define some of these macros to use
- * the inlined versions where appropriate.  These macros may also be
- * redefined by userlevel programs.
- */
-#define readb(a)	({ unsigned int r_ = __readb(a); mb(); r_; })
-#define readw(a)	({ unsigned int r_ = __readw(a); mb(); r_; })
-#define readl(a)	({ unsigned int r_ = __readl(a); mb(); r_; })
+#define ctrl_outb		__raw_writeb
+#define ctrl_outw		__raw_writew
+#define ctrl_outl		__raw_writel
+#define ctrl_outq		__raw_writeq
 
-#define writeb(v,a)	({ __writeb((v),(a)); mb(); })
-#define writew(v,a)	({ __writew((v),(a)); mb(); })
-#define writel(v,a)	({ __writel((v),(a)); mb(); })
+static inline void ctrl_delay(void)
+{
+#ifdef P2SEG
+	__raw_readw(P2SEG);
+#endif
+}
 
 #define __BUILD_MEMORY_STRING(bwlq, type)				\
 									\
@@ -151,18 +123,23 @@ static inline void __raw_reads##bwlq(volatile void __iomem *mem,	\
 
 __BUILD_MEMORY_STRING(b, u8)
 __BUILD_MEMORY_STRING(w, u16)
+__BUILD_MEMORY_STRING(q, u64)
+
+void __raw_writesl(void __iomem *addr, const void *data, int longlen);
+void __raw_readsl(const void __iomem *addr, void *data, int longlen);
 
-#define writesb	__raw_writesb
-#define writesw	__raw_writesw
-#define writesl __raw_writesl
+#define writesb			__raw_writesb
+#define writesw			__raw_writesw
+#define writesl			__raw_writesl
 
-#define readsb	__raw_readsb
-#define readsw	__raw_readsw
-#define readsl  __raw_readsl
+#define readsb			__raw_readsb
+#define readsw			__raw_readsw
+#define readsl			__raw_readsl
 
-#define readb_relaxed(a) readb(a)
-#define readw_relaxed(a) readw(a)
-#define readl_relaxed(a) readl(a)
+#define readb_relaxed(a)	readb(a)
+#define readw_relaxed(a)	readw(a)
+#define readl_relaxed(a)	readl(a)
+#define readq_relaxed(a)	readq(a)
 
 /* Simple MMIO */
 #define ioread8(a)		__raw_readb(a)
@@ -185,15 +162,17 @@ __BUILD_MEMORY_STRING(w, u16)
 #define iowrite16_rep(a, s, c)	__raw_writesw((a), (s), (c))
 #define iowrite32_rep(a, s, c)	__raw_writesl((a), (s), (c))
 
-#define mmiowb()	wmb()	/* synco on SH-4A, otherwise a nop */
+/* synco on SH-4A, otherwise a nop */
+#define mmiowb()		wmb()
 
 #define IO_SPACE_LIMIT 0xffffffff
 
 extern unsigned long generic_io_base;
 
 /*
- * This function provides a method for the generic case where a board-specific
- * ioport_map simply needs to return the port + some arbitrary port base.
+ * This function provides a method for the generic case where a
+ * board-specific ioport_map simply needs to return the port + some
+ * arbitrary port base.
  *
  * We use this at board setup time to implicitly set the port base, and
  * as a result, we can use the generic ioport_map.
@@ -206,57 +185,9 @@ static inline void __set_io_port_base(unsigned long pbase)
 #define __ioport_map(p, n) sh_mv.mv_ioport_map((p), (n))
 
 /* We really want to try and get these to memcpy etc */
-extern void memcpy_fromio(void *, volatile void __iomem *, unsigned long);
-extern void memcpy_toio(volatile void __iomem *, const void *, unsigned long);
-extern void memset_io(volatile void __iomem *, int, unsigned long);
-
-/* SuperH on-chip I/O functions */
-static inline unsigned char ctrl_inb(unsigned long addr)
-{
-	return *(volatile unsigned char*)addr;
-}
-
-static inline unsigned short ctrl_inw(unsigned long addr)
-{
-	return *(volatile unsigned short*)addr;
-}
-
-static inline unsigned int ctrl_inl(unsigned long addr)
-{
-	return *(volatile unsigned long*)addr;
-}
-
-static inline unsigned long long ctrl_inq(unsigned long addr)
-{
-	return *(volatile unsigned long long*)addr;
-}
-
-static inline void ctrl_outb(unsigned char b, unsigned long addr)
-{
-	*(volatile unsigned char*)addr = b;
-}
-
-static inline void ctrl_outw(unsigned short b, unsigned long addr)
-{
-	*(volatile unsigned short*)addr = b;
-}
-
-static inline void ctrl_outl(unsigned int b, unsigned long addr)
-{
-        *(volatile unsigned long*)addr = b;
-}
-
-static inline void ctrl_outq(unsigned long long b, unsigned long addr)
-{
-	*(volatile unsigned long long*)addr = b;
-}
-
-static inline void ctrl_delay(void)
-{
-#ifdef P2SEG
-	ctrl_inw(P2SEG);
-#endif
-}
+void memcpy_fromio(void *, const volatile void __iomem *, unsigned long);
+void memcpy_toio(volatile void __iomem *, const void *, unsigned long);
+void memset_io(volatile void __iomem *, int, unsigned long);
 
 /* Quad-word real-mode I/O, don't ask.. */
 unsigned long long peek_real_address_q(unsigned long long addr);
@@ -347,6 +278,10 @@ __ioremap_mode(unsigned long offset, unsigned long size, unsigned long flags)
 #define iounmap(addr)					\
 	__iounmap((addr))
 
+#define maybebadio(port) \
+	printk(KERN_ERR "bad PC-like io %s:%u for port 0x%lx at 0x%08x\n", \
+	       __func__, __LINE__, (port), (u32)__builtin_return_address(0))
+
 /*
  * Convert a physical pointer to a virtual kernel pointer for /dev/mem
  * access
diff --git a/arch/sh/include/asm/io_generic.h b/arch/sh/include/asm/io_generic.h
index 92fc6070d7b3..1e5d375f55dc 100644
--- a/arch/sh/include/asm/io_generic.h
+++ b/arch/sh/include/asm/io_generic.h
@@ -33,13 +33,6 @@ void IO_CONCAT(__IO_PREFIX,outsb)(unsigned long, const void *src, unsigned long
 void IO_CONCAT(__IO_PREFIX,outsw)(unsigned long, const void *src, unsigned long count);
 void IO_CONCAT(__IO_PREFIX,outsl)(unsigned long, const void *src, unsigned long count);
 
-u8 IO_CONCAT(__IO_PREFIX,readb)(void __iomem *);
-u16 IO_CONCAT(__IO_PREFIX,readw)(void __iomem *);
-u32 IO_CONCAT(__IO_PREFIX,readl)(void __iomem *);
-void IO_CONCAT(__IO_PREFIX,writeb)(u8, void __iomem *);
-void IO_CONCAT(__IO_PREFIX,writew)(u16, void __iomem *);
-void IO_CONCAT(__IO_PREFIX,writel)(u32, void __iomem *);
-
 void *IO_CONCAT(__IO_PREFIX,ioremap)(unsigned long offset, unsigned long size);
 void IO_CONCAT(__IO_PREFIX,iounmap)(void *addr);
 
diff --git a/arch/sh/include/asm/machvec.h b/arch/sh/include/asm/machvec.h
index b2e4124070ae..f1bae02ef7b6 100644
--- a/arch/sh/include/asm/machvec.h
+++ b/arch/sh/include/asm/machvec.h
@@ -42,13 +42,6 @@ struct sh_machine_vector {
 	void (*mv_outsw)(unsigned long, const void *src, unsigned long count);
 	void (*mv_outsl)(unsigned long, const void *src, unsigned long count);
 
-	u8 (*mv_readb)(void __iomem *);
-	u16 (*mv_readw)(void __iomem *);
-	u32 (*mv_readl)(void __iomem *);
-	void (*mv_writeb)(u8, void __iomem *);
-	void (*mv_writew)(u16, void __iomem *);
-	void (*mv_writel)(u32, void __iomem *);
-
 	int (*mv_irq_demux)(int irq);
 
 	void (*mv_init_irq)(void);
diff --git a/arch/sh/kernel/io.c b/arch/sh/kernel/io.c
index 2b8991229900..29cf4588fc05 100644
--- a/arch/sh/kernel/io.c
+++ b/arch/sh/kernel/io.c
@@ -19,12 +19,12 @@
  * Copy data from IO memory space to "real" memory space.
  * This needs to be optimized.
  */
-void memcpy_fromio(void *to, volatile void __iomem *from, unsigned long count)
+void memcpy_fromio(void *to, const volatile void __iomem *from, unsigned long count)
 {
-	char *p = to;
+	unsigned char *p = to;
         while (count) {
                 count--;
-                *p = readb((void __iomem *)from);
+                *p = readb(from);
                 p++;
                 from++;
         }
@@ -37,10 +37,10 @@ EXPORT_SYMBOL(memcpy_fromio);
  */
 void memcpy_toio(volatile void __iomem *to, const void *from, unsigned long count)
 {
-	const char *p = from;
+	const unsigned char *p = from;
         while (count) {
                 count--;
-                writeb(*p, (void __iomem *)to);
+                writeb(*p, to);
                 p++;
                 to++;
         }
@@ -55,7 +55,7 @@ void memset_io(volatile void __iomem *dst, int c, unsigned long count)
 {
         while (count) {
                 count--;
-                writeb(c, (void __iomem *)dst);
+                writeb(c, dst);
                 dst++;
         }
 }
diff --git a/arch/sh/kernel/io_generic.c b/arch/sh/kernel/io_generic.c
index f1b214d3bce3..5a7f554d9ca1 100644
--- a/arch/sh/kernel/io_generic.c
+++ b/arch/sh/kernel/io_generic.c
@@ -19,38 +19,33 @@
 /* SH3 has a PCMCIA bug that needs a dummy read from area 6 for a
  * workaround. */
 /* I'm not sure SH7709 has this kind of bug */
-#define dummy_read()	ctrl_inb(0xba000000)
+#define dummy_read()	__raw_readb(0xba000000)
 #else
 #define dummy_read()
 #endif
 
 unsigned long generic_io_base;
 
-static inline void delay(void)
-{
-	ctrl_inw(0xa0000000);
-}
-
 u8 generic_inb(unsigned long port)
 {
-	return ctrl_inb((unsigned long __force)__ioport_map(port, 1));
+	return __raw_readb(__ioport_map(port, 1));
 }
 
 u16 generic_inw(unsigned long port)
 {
-	return ctrl_inw((unsigned long __force)__ioport_map(port, 2));
+	return __raw_readw(__ioport_map(port, 2));
 }
 
 u32 generic_inl(unsigned long port)
 {
-	return ctrl_inl((unsigned long __force)__ioport_map(port, 4));
+	return __raw_readl(__ioport_map(port, 4));
 }
 
 u8 generic_inb_p(unsigned long port)
 {
 	unsigned long v = generic_inb(port);
 
-	delay();
+	ctrl_delay();
 	return v;
 }
 
@@ -58,7 +53,7 @@ u16 generic_inw_p(unsigned long port)
 {
 	unsigned long v = generic_inw(port);
 
-	delay();
+	ctrl_delay();
 	return v;
 }
 
@@ -66,7 +61,7 @@ u32 generic_inl_p(unsigned long port)
 {
 	unsigned long v = generic_inl(port);
 
-	delay();
+	ctrl_delay();
 	return v;
 }
 
@@ -112,35 +107,35 @@ void generic_insl(unsigned long port, void *dst, unsigned long count)
 
 void generic_outb(u8 b, unsigned long port)
 {
-	ctrl_outb(b, (unsigned long __force)__ioport_map(port, 1));
+	__raw_writeb(b, __ioport_map(port, 1));
 }
 
 void generic_outw(u16 b, unsigned long port)
 {
-	ctrl_outw(b, (unsigned long __force)__ioport_map(port, 2));
+	__raw_writew(b, __ioport_map(port, 2));
 }
 
 void generic_outl(u32 b, unsigned long port)
 {
-	ctrl_outl(b, (unsigned long __force)__ioport_map(port, 4));
+	__raw_writel(b, __ioport_map(port, 4));
 }
 
 void generic_outb_p(u8 b, unsigned long port)
 {
 	generic_outb(b, port);
-	delay();
+	ctrl_delay();
 }
 
 void generic_outw_p(u16 b, unsigned long port)
 {
 	generic_outw(b, port);
-	delay();
+	ctrl_delay();
 }
 
 void generic_outl_p(u32 b, unsigned long port)
 {
 	generic_outl(b, port);
-	delay();
+	ctrl_delay();
 }
 
 /*
@@ -184,36 +179,6 @@ void generic_outsl(unsigned long port, const void *src, unsigned long count)
 	dummy_read();
 }
 
-u8 generic_readb(void __iomem *addr)
-{
-	return ctrl_inb((unsigned long __force)addr);
-}
-
-u16 generic_readw(void __iomem *addr)
-{
-	return ctrl_inw((unsigned long __force)addr);
-}
-
-u32 generic_readl(void __iomem *addr)
-{
-	return ctrl_inl((unsigned long __force)addr);
-}
-
-void generic_writeb(u8 b, void __iomem *addr)
-{
-	ctrl_outb(b, (unsigned long __force)addr);
-}
-
-void generic_writew(u16 b, void __iomem *addr)
-{
-	ctrl_outw(b, (unsigned long __force)addr);
-}
-
-void generic_writel(u32 b, void __iomem *addr)
-{
-	ctrl_outl(b, (unsigned long __force)addr);
-}
-
 void __iomem *generic_ioport_map(unsigned long addr, unsigned int size)
 {
 	return (void __iomem *)(addr + generic_io_base);
diff --git a/arch/sh/kernel/machvec.c b/arch/sh/kernel/machvec.c
index 8bfdd275e940..c1ea41e5812a 100644
--- a/arch/sh/kernel/machvec.c
+++ b/arch/sh/kernel/machvec.c
@@ -126,9 +126,6 @@ void __init sh_mv_setup(void)
 	mv_set(insb);	mv_set(insw);	mv_set(insl);
 	mv_set(outsb);	mv_set(outsw);	mv_set(outsl);
 
-	mv_set(readb);	mv_set(readw);	mv_set(readl);
-	mv_set(writeb);	mv_set(writew);	mv_set(writel);
-
 	mv_set(ioport_map);
 	mv_set(ioport_unmap);
 	mv_set(irq_demux);
-- 
GitLab


From 63fd7f30f328f99956d3c774d17219c3c8d54131 Mon Sep 17 00:00:00 2001
From: Daniel Rosenthal <danielrosenthal@acm.org>
Date: Sun, 5 Oct 2008 17:43:10 -0400
Subject: [PATCH 202/895] [MTD] [INFTL] Fix infinite loop in INFTL_foldchain

When iterating over a chain in reverse (oldest block first), this
patch correctly marks the PUtable[] entry of the second to last erase
block of a chain as BLOCK_NIL, regardless of whether or not it can
format the last block successfully. Before, the second to last block
was only marked as pointing to BLOCK_NIL if INFTL_formatblock()
succeeded on the last block of the chain, which could potentially
result in an infinite loop if the block was worn out and refused to
format.

Signed-off-by: Daniel Rosenthal <danielrosenthal@acm.org>
Acked-by: Greg Ungerer <gerg@snapgear.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/inftlcore.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/inftlcore.c b/drivers/mtd/inftlcore.c
index c4f9d3378b24..50ce13887f63 100644
--- a/drivers/mtd/inftlcore.c
+++ b/drivers/mtd/inftlcore.c
@@ -388,6 +388,10 @@ static u16 INFTL_foldchain(struct INFTLrecord *inftl, unsigned thisVUC, unsigned
 		if (thisEUN == targetEUN)
 			break;
 
+		/* Unlink the last block from the chain. */
+		inftl->PUtable[prevEUN] = BLOCK_NIL;
+
+		/* Now try to erase it. */
 		if (INFTL_formatblock(inftl, thisEUN) < 0) {
 			/*
 			 * Could not erase : mark block as reserved.
@@ -396,7 +400,6 @@ static u16 INFTL_foldchain(struct INFTLrecord *inftl, unsigned thisVUC, unsigned
 		} else {
 			/* Correctly erased : mark it as free */
 			inftl->PUtable[thisEUN] = BLOCK_FREE;
-			inftl->PUtable[prevEUN] = BLOCK_NIL;
 			inftl->numfreeEUNs++;
 		}
 	}
-- 
GitLab


From 762a9f291bfdf9e4d5c2b80d730d79055c8d8c99 Mon Sep 17 00:00:00 2001
From: Deepak Saxena <dsaxena@plexity.net>
Date: Wed, 8 Oct 2008 12:56:24 -0700
Subject: [PATCH 203/895] UBI: print reserved_peb when it is too large

This patch makes debugging a missconfigured UBI a bit easier
by providing the needed information in the boot log.

Signed-off-by: Deepak Saxena <dsaxena@laptop.org>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/vtbl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c
index 217d0e111b2a..333c8941552f 100644
--- a/drivers/mtd/ubi/vtbl.c
+++ b/drivers/mtd/ubi/vtbl.c
@@ -244,8 +244,8 @@ static int vtbl_check(const struct ubi_device *ubi,
 		}
 
 		if (reserved_pebs > ubi->good_peb_count) {
-			dbg_err("too large reserved_pebs, good PEBs %d",
-				ubi->good_peb_count);
+			dbg_err("too large reserved_pebs %d, good PEBs %d",
+				reserved_pebs, ubi->good_peb_count);
 			err = 9;
 			goto bad;
 		}
-- 
GitLab


From 95ebffd749c8e6c8cbb746bc0833a5738cc23321 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Thu, 18 Sep 2008 20:50:26 +0400
Subject: [PATCH 204/895] [MTD] [NAND] fsl_upm: update driver for the new OF
 bindings

- Get rid of fsl,wait-pattern and fsl,wait-write. I think this isn't
  chip-specific, and we should always do waits. I saw one board that
  didn't need fsl,wait-pattern, but I assume this was the exception
  that proves the rule;
- Get rid of chip-delay. Today there are no users for this, and if
  anyone really need this they should push the OF bindings beforehand;
- Now flash chips should be child nodes of the FSL UPM NAND controller;
- Implement OF partition parsing.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/fsl_upm.c | 57 ++++++++++++++++++++++----------------
 1 file changed, 33 insertions(+), 24 deletions(-)

diff --git a/drivers/mtd/nand/fsl_upm.c b/drivers/mtd/nand/fsl_upm.c
index 1ebfd87f00b4..3af5ef399a0f 100644
--- a/drivers/mtd/nand/fsl_upm.c
+++ b/drivers/mtd/nand/fsl_upm.c
@@ -36,9 +36,6 @@ struct fsl_upm_nand {
 	uint8_t upm_cmd_offset;
 	void __iomem *io_base;
 	int rnb_gpio;
-	const uint32_t *wait_pattern;
-	const uint32_t *wait_write;
-	int chip_delay;
 };
 
 #define to_fsl_upm_nand(mtd) container_of(mtd, struct fsl_upm_nand, mtd)
@@ -89,8 +86,7 @@ static void fun_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
 
 	fsl_upm_run_pattern(&fun->upm, fun->io_base, cmd);
 
-	if (fun->wait_pattern)
-		fun_wait_rnb(fun);
+	fun_wait_rnb(fun);
 }
 
 static uint8_t fun_read_byte(struct mtd_info *mtd)
@@ -116,14 +112,16 @@ static void fun_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
 
 	for (i = 0; i < len; i++) {
 		out_8(fun->chip.IO_ADDR_W, buf[i]);
-		if (fun->wait_write)
-			fun_wait_rnb(fun);
+		fun_wait_rnb(fun);
 	}
 }
 
-static int __devinit fun_chip_init(struct fsl_upm_nand *fun)
+static int __devinit fun_chip_init(struct fsl_upm_nand *fun,
+				   const struct device_node *upm_np,
+				   const struct resource *io_res)
 {
 	int ret;
+	struct device_node *flash_np;
 #ifdef CONFIG_MTD_PARTITIONS
 	static const char *part_types[] = { "cmdlinepart", NULL, };
 #endif
@@ -131,7 +129,7 @@ static int __devinit fun_chip_init(struct fsl_upm_nand *fun)
 	fun->chip.IO_ADDR_R = fun->io_base;
 	fun->chip.IO_ADDR_W = fun->io_base;
 	fun->chip.cmd_ctrl = fun_cmd_ctrl;
-	fun->chip.chip_delay = fun->chip_delay;
+	fun->chip.chip_delay = 50;
 	fun->chip.read_byte = fun_read_byte;
 	fun->chip.read_buf = fun_read_buf;
 	fun->chip.write_buf = fun_write_buf;
@@ -143,18 +141,37 @@ static int __devinit fun_chip_init(struct fsl_upm_nand *fun)
 	fun->mtd.priv = &fun->chip;
 	fun->mtd.owner = THIS_MODULE;
 
+	flash_np = of_get_next_child(upm_np, NULL);
+	if (!flash_np)
+		return -ENODEV;
+
+	fun->mtd.name = kasprintf(GFP_KERNEL, "%x.%s", io_res->start,
+				  flash_np->name);
+	if (!fun->mtd.name) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
 	ret = nand_scan(&fun->mtd, 1);
 	if (ret)
-		return ret;
-
-	fun->mtd.name = fun->dev->bus_id;
+		goto err;
 
 #ifdef CONFIG_MTD_PARTITIONS
 	ret = parse_mtd_partitions(&fun->mtd, part_types, &fun->parts, 0);
+
+#ifdef CONFIG_MTD_OF_PARTS
+	if (ret == 0)
+		ret = of_mtd_parse_partitions(fun->dev, &fun->mtd,
+					      flash_np, &fun->parts);
+#endif
 	if (ret > 0)
-		return add_mtd_partitions(&fun->mtd, fun->parts, ret);
+		ret = add_mtd_partitions(&fun->mtd, fun->parts, ret);
+	else
 #endif
-	return add_mtd_device(&fun->mtd);
+		ret = add_mtd_device(&fun->mtd);
+err:
+	of_node_put(flash_np);
+	return ret;
 }
 
 static int __devinit fun_probe(struct of_device *ofdev,
@@ -220,17 +237,8 @@ static int __devinit fun_probe(struct of_device *ofdev,
 
 	fun->dev = &ofdev->dev;
 	fun->last_ctrl = NAND_CLE;
-	fun->wait_pattern = of_get_property(ofdev->node, "fsl,wait-pattern",
-					    NULL);
-	fun->wait_write = of_get_property(ofdev->node, "fsl,wait-write", NULL);
-
-	prop = of_get_property(ofdev->node, "chip-delay", NULL);
-	if (prop)
-		fun->chip_delay = *prop;
-	else
-		fun->chip_delay = 50;
 
-	ret = fun_chip_init(fun);
+	ret = fun_chip_init(fun, ofdev->node, &io_res);
 	if (ret)
 		goto err2;
 
@@ -251,6 +259,7 @@ static int __devexit fun_remove(struct of_device *ofdev)
 	struct fsl_upm_nand *fun = dev_get_drvdata(&ofdev->dev);
 
 	nand_release(&fun->mtd);
+	kfree(fun->mtd.name);
 
 	if (fun->rnb_gpio >= 0)
 		gpio_free(fun->rnb_gpio);
-- 
GitLab


From 13f5369704d3c27a07936999f063be3a8a905398 Mon Sep 17 00:00:00 2001
From: Wolfgang Grandegger <wg@grandegger.com>
Date: Mon, 9 Jun 2008 10:19:08 +0200
Subject: [PATCH 205/895] [MTD] [NAND] driver extension to support NAND on
 TQM85xx modules

This patch extends the FSL UPM NAND driver from Anton Vorontsov to
support hardware which does not have the R/B pin of the NAND chip
connected, like the TQM8548 module:

- The OF_GPIO dependency has been removed from the Kconfig option
  because GPIO is not needed. The relevant gpio_* function are then
  stubbed out in <linux/gpio.h>.

- It re-introduces the chip-delay property to define an appropriate
  maximum delay time (tR) required for read operations. The binding
  will be documented in a separate patch.

Signed-off-by: Wolfgang Grandegger <wg@grandegger.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/Kconfig   |  2 +-
 drivers/mtd/nand/fsl_upm.c | 17 +++++++++++++----
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 7153854eb83a..18b9ffeabc98 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -401,7 +401,7 @@ config MTD_NAND_FSL_ELBC
 
 config MTD_NAND_FSL_UPM
 	tristate "Support for NAND on Freescale UPM"
-	depends on MTD_NAND && OF_GPIO && (PPC_83xx || PPC_85xx)
+	depends on MTD_NAND && (PPC_83xx || PPC_85xx)
 	select FSL_LBC
 	help
 	  Enables support for NAND Flash chips wired onto Freescale PowerPC
diff --git a/drivers/mtd/nand/fsl_upm.c b/drivers/mtd/nand/fsl_upm.c
index 3af5ef399a0f..024e3fffd4bb 100644
--- a/drivers/mtd/nand/fsl_upm.c
+++ b/drivers/mtd/nand/fsl_upm.c
@@ -13,6 +13,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/delay.h>
 #include <linux/mtd/nand.h>
 #include <linux/mtd/nand_ecc.h>
 #include <linux/mtd/partitions.h>
@@ -36,6 +37,7 @@ struct fsl_upm_nand {
 	uint8_t upm_cmd_offset;
 	void __iomem *io_base;
 	int rnb_gpio;
+	int chip_delay;
 };
 
 #define to_fsl_upm_nand(mtd) container_of(mtd, struct fsl_upm_nand, mtd)
@@ -58,10 +60,11 @@ static void fun_wait_rnb(struct fsl_upm_nand *fun)
 	if (fun->rnb_gpio >= 0) {
 		while (--cnt && !fun_chip_ready(&fun->mtd))
 			cpu_relax();
+		if (!cnt)
+			dev_err(fun->dev, "tired waiting for RNB\n");
+	} else {
+		ndelay(100);
 	}
-
-	if (!cnt)
-		dev_err(fun->dev, "tired waiting for RNB\n");
 }
 
 static void fun_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
@@ -129,7 +132,7 @@ static int __devinit fun_chip_init(struct fsl_upm_nand *fun,
 	fun->chip.IO_ADDR_R = fun->io_base;
 	fun->chip.IO_ADDR_W = fun->io_base;
 	fun->chip.cmd_ctrl = fun_cmd_ctrl;
-	fun->chip.chip_delay = 50;
+	fun->chip.chip_delay = fun->chip_delay;
 	fun->chip.read_byte = fun_read_byte;
 	fun->chip.read_buf = fun_read_buf;
 	fun->chip.write_buf = fun_write_buf;
@@ -228,6 +231,12 @@ static int __devinit fun_probe(struct of_device *ofdev,
 		goto err2;
 	}
 
+	prop = of_get_property(ofdev->node, "chip-delay", NULL);
+	if (prop)
+		fun->chip_delay = *prop;
+	else
+		fun->chip_delay = 50;
+
 	fun->io_base = devm_ioremap_nocache(&ofdev->dev, io_res.start,
 					  io_res.end - io_res.start + 1);
 	if (!fun->io_base) {
-- 
GitLab


From 52551beb0519c014fb77e1c78a6888d20e62209c Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@kernel.crashing.org>
Date: Thu, 9 Oct 2008 22:50:06 -0500
Subject: [PATCH 206/895] [MTD] [NAND] remove dead Kconfig associated with
 !CONFIG_PPC_MERGE

Removed the Kconfig associated with 'NDFC NanD Flash Controller'.
We can't enable !CONFIG_PPC_MERGE so there is no way to enable
this.  Additionally the code needs to get updated for arch/powerpc.

For the time being lets just remove the Kconfig option so we can
actually remove CONFIG_PPC_MERGE.

Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/Kconfig | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 18b9ffeabc98..82815dd64bf6 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -157,13 +157,6 @@ config MTD_NAND_S3C2410_HWECC
 	  incorrect ECC generation, and if using these, the default of
 	  software ECC is preferable.
 
-config MTD_NAND_NDFC
-	tristate "NDFC NanD Flash Controller"
-	depends on 4xx && !PPC_MERGE
-	select MTD_NAND_ECC_SMC
-	help
-	 NDFC Nand Flash Controllers are integrated in IBM/AMCC's 4xx SoCs
-
 config MTD_NAND_S3C2410_CLKSTOP
 	bool "S3C2410 NAND IDLE clock stop"
 	depends on MTD_NAND_S3C2410
-- 
GitLab


From 98f172b257725de516a792b810b08639d6293d4d Mon Sep 17 00:00:00 2001
From: Kyle McMartin <kyle@mcmartin.ca>
Date: Mon, 28 Jul 2008 16:21:19 -0400
Subject: [PATCH 207/895] parisc: parisc-agp - fix <asm-parisc/*> -> <asm/*>

---
 drivers/char/agp/parisc-agp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/char/agp/parisc-agp.c b/drivers/char/agp/parisc-agp.c
index 8c42dcc5958c..f4bbcb273208 100644
--- a/drivers/char/agp/parisc-agp.c
+++ b/drivers/char/agp/parisc-agp.c
@@ -20,8 +20,8 @@
 #include <linux/agp_backend.h>
 #include <linux/log2.h>
 
-#include <asm-parisc/parisc-device.h>
-#include <asm-parisc/ropes.h>
+#include <asm/parisc-device.h>
+#include <asm/ropes.h>
 
 #include "agp.h"
 
-- 
GitLab


From 40b10fc58a62a9698f4fa1916e61b0953139928a Mon Sep 17 00:00:00 2001
From: Kyle McMartin <kyle@mcmartin.ca>
Date: Mon, 28 Jul 2008 16:22:34 -0400
Subject: [PATCH 208/895] parisc: ropes.h - fix <asm-parisc/*> -> <asm/*>

---
 include/asm-parisc/ropes.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/asm-parisc/ropes.h b/include/asm-parisc/ropes.h
index 007a880615eb..09f51d5ab57c 100644
--- a/include/asm-parisc/ropes.h
+++ b/include/asm-parisc/ropes.h
@@ -1,7 +1,7 @@
 #ifndef _ASM_PARISC_ROPES_H_
 #define _ASM_PARISC_ROPES_H_
 
-#include <asm-parisc/parisc-device.h>
+#include <asm/parisc-device.h>
 
 #ifdef CONFIG_64BIT
 /* "low end" PA8800 machines use ZX1 chipset: PAT PDC and only run 64-bit */
-- 
GitLab


From dae2cdf3e25bf1c63f8012ae19c133e3b3b187ca Mon Sep 17 00:00:00 2001
From: Kyle McMartin <kyle@mcmartin.ca>
Date: Mon, 28 Jul 2008 21:14:50 -0400
Subject: [PATCH 209/895] parisc: add arch/parisc/kernel/.gitignore

---
 arch/parisc/kernel/.gitignore | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 arch/parisc/kernel/.gitignore

diff --git a/arch/parisc/kernel/.gitignore b/arch/parisc/kernel/.gitignore
new file mode 100644
index 000000000000..c5f676c3c224
--- /dev/null
+++ b/arch/parisc/kernel/.gitignore
@@ -0,0 +1 @@
+vmlinux.lds
-- 
GitLab


From 1e22166c40a99fb25fa6ff4f711a3217d848dd85 Mon Sep 17 00:00:00 2001
From: Kyle McMartin <kyle@mcmartin.ca>
Date: Mon, 28 Jul 2008 21:17:23 -0400
Subject: [PATCH 210/895] parisc: unify CCIO_COLLECT_STATS implementation

Make it behave in the same manner as SBA_COLLECT_STATS, further
clean ups pending.
---
 drivers/parisc/ccio-dma.c | 43 ++++++++++++++++-----------------------
 1 file changed, 17 insertions(+), 26 deletions(-)

diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index b30e38f3a50d..dcc1e9958d2f 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -66,15 +66,8 @@
 #undef DEBUG_CCIO_RUN_SG
 
 #ifdef CONFIG_PROC_FS
-/*
- * CCIO_SEARCH_TIME can help measure how fast the bitmap search is.
- * impacts performance though - ditch it if you don't use it.
- */
-#define CCIO_SEARCH_TIME
-#undef CCIO_MAP_STATS
-#else
-#undef CCIO_SEARCH_TIME
-#undef CCIO_MAP_STATS
+/* depends on proc fs support. But costs CPU performance. */
+#undef CCIO_COLLECT_STATS
 #endif
 
 #include <linux/proc_fs.h>
@@ -239,12 +232,10 @@ struct ioc {
 	u32 res_size;		    	/* size of resource map in bytes */
 	spinlock_t res_lock;
 
-#ifdef CCIO_SEARCH_TIME
+#ifdef CCIO_COLLECT_STATS
 #define CCIO_SEARCH_SAMPLE 0x100
 	unsigned long avg_search[CCIO_SEARCH_SAMPLE];
 	unsigned long avg_idx;		  /* current index into avg_search */
-#endif
-#ifdef CCIO_MAP_STATS
 	unsigned long used_pages;
 	unsigned long msingle_calls;
 	unsigned long msingle_pages;
@@ -351,7 +342,7 @@ ccio_alloc_range(struct ioc *ioc, struct device *dev, size_t size)
 	unsigned int pages_needed = size >> IOVP_SHIFT;
 	unsigned int res_idx;
 	unsigned long boundary_size;
-#ifdef CCIO_SEARCH_TIME
+#ifdef CCIO_COLLECT_STATS
 	unsigned long cr_start = mfctl(16);
 #endif
 	
@@ -406,7 +397,7 @@ resource_found:
 	DBG_RES("%s() res_idx %d res_hint: %d\n",
 		__func__, res_idx, ioc->res_hint);
 
-#ifdef CCIO_SEARCH_TIME
+#ifdef CCIO_COLLECT_STATS
 	{
 		unsigned long cr_end = mfctl(16);
 		unsigned long tmp = cr_end - cr_start;
@@ -416,7 +407,7 @@ resource_found:
 	ioc->avg_search[ioc->avg_idx++] = cr_start;
 	ioc->avg_idx &= CCIO_SEARCH_SAMPLE - 1;
 #endif
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
 	ioc->used_pages += pages_needed;
 #endif
 	/* 
@@ -452,7 +443,7 @@ ccio_free_range(struct ioc *ioc, dma_addr_t iova, unsigned long pages_mapped)
 	DBG_RES("%s():  res_idx: %d pages_mapped %d\n", 
 		__func__, res_idx, pages_mapped);
 
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
 	ioc->used_pages -= pages_mapped;
 #endif
 
@@ -764,7 +755,7 @@ ccio_map_single(struct device *dev, void *addr, size_t size,
 	size = ALIGN(size + offset, IOVP_SIZE);
 	spin_lock_irqsave(&ioc->res_lock, flags);
 
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
 	ioc->msingle_calls++;
 	ioc->msingle_pages += size >> IOVP_SHIFT;
 #endif
@@ -828,7 +819,7 @@ ccio_unmap_single(struct device *dev, dma_addr_t iova, size_t size,
 
 	spin_lock_irqsave(&ioc->res_lock, flags);
 
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
 	ioc->usingle_calls++;
 	ioc->usingle_pages += size >> IOVP_SHIFT;
 #endif
@@ -894,7 +885,7 @@ ccio_free_consistent(struct device *dev, size_t size, void *cpu_addr,
 */
 #define PIDE_FLAG 0x80000000UL
 
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
 #define IOMMU_MAP_STATS
 #endif
 #include "iommu-helpers.h"
@@ -938,7 +929,7 @@ ccio_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
 	
 	spin_lock_irqsave(&ioc->res_lock, flags);
 
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
 	ioc->msg_calls++;
 #endif
 
@@ -997,13 +988,13 @@ ccio_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents,
 	DBG_RUN_SG("%s() START %d entries,  %08lx,%x\n",
 		__func__, nents, sg_virt_addr(sglist), sglist->length);
 
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
 	ioc->usg_calls++;
 #endif
 
 	while(sg_dma_len(sglist) && nents--) {
 
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
 		ioc->usg_pages += sg_dma_len(sglist) >> PAGE_SHIFT;
 #endif
 		ccio_unmap_single(dev, sg_dma_address(sglist),
@@ -1048,7 +1039,7 @@ static int ccio_proc_info(struct seq_file *m, void *p)
 		len += seq_printf(m, "IO PDIR size    : %d bytes (%d entries)\n",
 			       total_pages * 8, total_pages);
 
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
 		len += seq_printf(m, "IO PDIR entries : %ld free  %ld used (%d%%)\n",
 				  total_pages - ioc->used_pages, ioc->used_pages,
 				  (int)(ioc->used_pages * 100 / total_pages));
@@ -1057,7 +1048,7 @@ static int ccio_proc_info(struct seq_file *m, void *p)
 		len += seq_printf(m, "Resource bitmap : %d bytes (%d pages)\n", 
 				  ioc->res_size, total_pages);
 
-#ifdef CCIO_SEARCH_TIME
+#ifdef CCIO_COLLECT_STATS
 		min = max = ioc->avg_search[0];
 		for(j = 0; j < CCIO_SEARCH_SAMPLE; ++j) {
 			avg += ioc->avg_search[j];
@@ -1070,7 +1061,7 @@ static int ccio_proc_info(struct seq_file *m, void *p)
 		len += seq_printf(m, "  Bitmap search : %ld/%ld/%ld (min/avg/max CPU Cycles)\n",
 				  min, avg, max);
 #endif
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
 		len += seq_printf(m, "pci_map_single(): %8ld calls  %8ld pages (avg %d/1000)\n",
 				  ioc->msingle_calls, ioc->msingle_pages,
 				  (int)((ioc->msingle_pages * 1000)/ioc->msingle_calls));
@@ -1088,7 +1079,7 @@ static int ccio_proc_info(struct seq_file *m, void *p)
 		len += seq_printf(m, "pci_unmap_sg()  : %8ld calls  %8ld pages (avg %d/1000)\n\n\n",
 				  ioc->usg_calls, ioc->usg_pages,
 				  (int)((ioc->usg_pages * 1000)/ioc->usg_calls));
-#endif	/* CCIO_MAP_STATS */
+#endif	/* CCIO_COLLECT_STATS */
 
 		ioc = ioc->next;
 	}
-- 
GitLab


From 6c86cb8237bf08443806089130dc108051569a93 Mon Sep 17 00:00:00 2001
From: Kyle McMartin <kyle@mcmartin.ca>
Date: Mon, 28 Jul 2008 22:52:18 -0400
Subject: [PATCH 211/895] parisc: move pdc_result to real2.S

---
 arch/parisc/kernel/asm-offsets.c |  3 +++
 arch/parisc/kernel/firmware.c    |  4 ++--
 arch/parisc/kernel/real2.S       | 12 ++++++++++++
 include/asm-parisc/pdc.h         |  3 +++
 4 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c
index 3efc0b73e4ff..699cf8ef2118 100644
--- a/arch/parisc/kernel/asm-offsets.c
+++ b/arch/parisc/kernel/asm-offsets.c
@@ -290,5 +290,8 @@ int main(void)
 	DEFINE(EXCDATA_IP, offsetof(struct exception_data, fault_ip));
 	DEFINE(EXCDATA_SPACE, offsetof(struct exception_data, fault_space));
 	DEFINE(EXCDATA_ADDR, offsetof(struct exception_data, fault_addr));
+	BLANK();
+	DEFINE(ASM_PDC_RESULT_SIZE, NUM_PDC_RESULT * sizeof(unsigned long));
+	BLANK();
 	return 0;
 }
diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c
index 7177a6cd1b7f..99a9e505edf9 100644
--- a/arch/parisc/kernel/firmware.c
+++ b/arch/parisc/kernel/firmware.c
@@ -71,8 +71,8 @@
 #include <asm/processor.h>	/* for boot_cpu_data */
 
 static DEFINE_SPINLOCK(pdc_lock);
-static unsigned long pdc_result[32] __attribute__ ((aligned (8)));
-static unsigned long pdc_result2[32] __attribute__ ((aligned (8)));
+extern unsigned long pdc_result[NUM_PDC_RESULT];
+extern unsigned long pdc_result2[NUM_PDC_RESULT];
 
 #ifdef CONFIG_64BIT
 #define WIDE_FIRMWARE 0x1
diff --git a/arch/parisc/kernel/real2.S b/arch/parisc/kernel/real2.S
index 7a92695d95a6..5f3d3a1f9037 100644
--- a/arch/parisc/kernel/real2.S
+++ b/arch/parisc/kernel/real2.S
@@ -8,12 +8,24 @@
  *
  */
 
+#include <asm/pdc.h>
 #include <asm/psw.h>
 #include <asm/assembly.h>
+#include <asm/asm-offsets.h>
 
 #include <linux/linkage.h>
 
+
 	.section	.bss
+
+	.export pdc_result
+	.export pdc_result2
+	.align 8
+pdc_result:
+	.block	ASM_PDC_RESULT_SIZE
+pdc_result2:
+	.block	ASM_PDC_RESULT_SIZE
+
 	.export real_stack
 	.export real32_stack
 	.export real64_stack
diff --git a/include/asm-parisc/pdc.h b/include/asm-parisc/pdc.h
index 9eaa794c3e4a..46b75f9cce51 100644
--- a/include/asm-parisc/pdc.h
+++ b/include/asm-parisc/pdc.h
@@ -332,6 +332,9 @@
 #define BOOT_CONSOLE_SPA_OFFSET  0x3c4
 #define BOOT_CONSOLE_PATH_OFFSET 0x3a8
 
+/* size of the pdc_result buffer for firmware.c */
+#define NUM_PDC_RESULT	32
+
 #if !defined(__ASSEMBLY__)
 #ifdef __KERNEL__
 
-- 
GitLab


From deae26bf6a10e47983606f5df080b91e97650ead Mon Sep 17 00:00:00 2001
From: Kyle McMartin <kyle@mcmartin.ca>
Date: Mon, 28 Jul 2008 23:02:13 -0400
Subject: [PATCH 212/895] parisc: move include/asm-parisc to
 arch/parisc/include/asm

---
 {include/asm-parisc => arch/parisc/include/asm}/Kbuild            | 0
 {include/asm-parisc => arch/parisc/include/asm}/a.out.h           | 0
 {include/asm-parisc => arch/parisc/include/asm}/agp.h             | 0
 {include/asm-parisc => arch/parisc/include/asm}/asmregs.h         | 0
 {include/asm-parisc => arch/parisc/include/asm}/assembly.h        | 0
 {include/asm-parisc => arch/parisc/include/asm}/atomic.h          | 0
 {include/asm-parisc => arch/parisc/include/asm}/auxvec.h          | 0
 {include/asm-parisc => arch/parisc/include/asm}/bitops.h          | 0
 {include/asm-parisc => arch/parisc/include/asm}/bug.h             | 0
 {include/asm-parisc => arch/parisc/include/asm}/bugs.h            | 0
 {include/asm-parisc => arch/parisc/include/asm}/byteorder.h       | 0
 {include/asm-parisc => arch/parisc/include/asm}/cache.h           | 0
 {include/asm-parisc => arch/parisc/include/asm}/cacheflush.h      | 0
 {include/asm-parisc => arch/parisc/include/asm}/checksum.h        | 0
 {include/asm-parisc => arch/parisc/include/asm}/compat.h          | 0
 .../asm-parisc => arch/parisc/include/asm}/compat_rt_sigframe.h   | 0
 {include/asm-parisc => arch/parisc/include/asm}/compat_signal.h   | 0
 {include/asm-parisc => arch/parisc/include/asm}/compat_ucontext.h | 0
 {include/asm-parisc => arch/parisc/include/asm}/cputime.h         | 0
 {include/asm-parisc => arch/parisc/include/asm}/current.h         | 0
 {include/asm-parisc => arch/parisc/include/asm}/delay.h           | 0
 {include/asm-parisc => arch/parisc/include/asm}/device.h          | 0
 {include/asm-parisc => arch/parisc/include/asm}/div64.h           | 0
 {include/asm-parisc => arch/parisc/include/asm}/dma-mapping.h     | 0
 {include/asm-parisc => arch/parisc/include/asm}/dma.h             | 0
 {include/asm-parisc => arch/parisc/include/asm}/eisa_bus.h        | 0
 {include/asm-parisc => arch/parisc/include/asm}/eisa_eeprom.h     | 0
 {include/asm-parisc => arch/parisc/include/asm}/elf.h             | 0
 .../asm-parisc => arch/parisc/include/asm}/emergency-restart.h    | 0
 {include/asm-parisc => arch/parisc/include/asm}/errno.h           | 0
 {include/asm-parisc => arch/parisc/include/asm}/fb.h              | 0
 {include/asm-parisc => arch/parisc/include/asm}/fcntl.h           | 0
 {include/asm-parisc => arch/parisc/include/asm}/fixmap.h          | 0
 {include/asm-parisc => arch/parisc/include/asm}/floppy.h          | 0
 {include/asm-parisc => arch/parisc/include/asm}/futex.h           | 0
 {include/asm-parisc => arch/parisc/include/asm}/grfioctl.h        | 0
 {include/asm-parisc => arch/parisc/include/asm}/hardirq.h         | 0
 {include/asm-parisc => arch/parisc/include/asm}/hardware.h        | 0
 {include/asm-parisc => arch/parisc/include/asm}/hw_irq.h          | 0
 {include/asm-parisc => arch/parisc/include/asm}/ide.h             | 0
 {include/asm-parisc => arch/parisc/include/asm}/io.h              | 0
 {include/asm-parisc => arch/parisc/include/asm}/ioctl.h           | 0
 {include/asm-parisc => arch/parisc/include/asm}/ioctls.h          | 0
 {include/asm-parisc => arch/parisc/include/asm}/ipcbuf.h          | 0
 {include/asm-parisc => arch/parisc/include/asm}/irq.h             | 0
 {include/asm-parisc => arch/parisc/include/asm}/irq_regs.h        | 0
 {include/asm-parisc => arch/parisc/include/asm}/kdebug.h          | 0
 {include/asm-parisc => arch/parisc/include/asm}/kmap_types.h      | 0
 {include/asm-parisc => arch/parisc/include/asm}/led.h             | 0
 {include/asm-parisc => arch/parisc/include/asm}/linkage.h         | 0
 {include/asm-parisc => arch/parisc/include/asm}/local.h           | 0
 {include/asm-parisc => arch/parisc/include/asm}/machdep.h         | 0
 {include/asm-parisc => arch/parisc/include/asm}/mc146818rtc.h     | 0
 {include/asm-parisc => arch/parisc/include/asm}/mckinley.h        | 0
 {include/asm-parisc => arch/parisc/include/asm}/mman.h            | 0
 {include/asm-parisc => arch/parisc/include/asm}/mmu.h             | 0
 {include/asm-parisc => arch/parisc/include/asm}/mmu_context.h     | 0
 {include/asm-parisc => arch/parisc/include/asm}/mmzone.h          | 0
 {include/asm-parisc => arch/parisc/include/asm}/module.h          | 0
 {include/asm-parisc => arch/parisc/include/asm}/msgbuf.h          | 0
 {include/asm-parisc => arch/parisc/include/asm}/mutex.h           | 0
 {include/asm-parisc => arch/parisc/include/asm}/page.h            | 0
 {include/asm-parisc => arch/parisc/include/asm}/param.h           | 0
 {include/asm-parisc => arch/parisc/include/asm}/parisc-device.h   | 0
 {include/asm-parisc => arch/parisc/include/asm}/parport.h         | 0
 {include/asm-parisc => arch/parisc/include/asm}/pci.h             | 0
 {include/asm-parisc => arch/parisc/include/asm}/pdc.h             | 0
 {include/asm-parisc => arch/parisc/include/asm}/pdc_chassis.h     | 0
 {include/asm-parisc => arch/parisc/include/asm}/pdcpat.h          | 0
 {include/asm-parisc => arch/parisc/include/asm}/percpu.h          | 0
 {include/asm-parisc => arch/parisc/include/asm}/perf.h            | 0
 {include/asm-parisc => arch/parisc/include/asm}/pgalloc.h         | 0
 {include/asm-parisc => arch/parisc/include/asm}/pgtable.h         | 0
 {include/asm-parisc => arch/parisc/include/asm}/poll.h            | 0
 {include/asm-parisc => arch/parisc/include/asm}/posix_types.h     | 0
 {include/asm-parisc => arch/parisc/include/asm}/prefetch.h        | 0
 {include/asm-parisc => arch/parisc/include/asm}/processor.h       | 0
 {include/asm-parisc => arch/parisc/include/asm}/psw.h             | 0
 {include/asm-parisc => arch/parisc/include/asm}/ptrace.h          | 0
 {include/asm-parisc => arch/parisc/include/asm}/real.h            | 0
 {include/asm-parisc => arch/parisc/include/asm}/resource.h        | 0
 {include/asm-parisc => arch/parisc/include/asm}/ropes.h           | 0
 {include/asm-parisc => arch/parisc/include/asm}/rt_sigframe.h     | 0
 {include/asm-parisc => arch/parisc/include/asm}/rtc.h             | 0
 {include/asm-parisc => arch/parisc/include/asm}/runway.h          | 0
 {include/asm-parisc => arch/parisc/include/asm}/scatterlist.h     | 0
 {include/asm-parisc => arch/parisc/include/asm}/sections.h        | 0
 {include/asm-parisc => arch/parisc/include/asm}/segment.h         | 0
 {include/asm-parisc => arch/parisc/include/asm}/sembuf.h          | 0
 {include/asm-parisc => arch/parisc/include/asm}/serial.h          | 0
 {include/asm-parisc => arch/parisc/include/asm}/setup.h           | 0
 {include/asm-parisc => arch/parisc/include/asm}/shmbuf.h          | 0
 {include/asm-parisc => arch/parisc/include/asm}/shmparam.h        | 0
 {include/asm-parisc => arch/parisc/include/asm}/sigcontext.h      | 0
 {include/asm-parisc => arch/parisc/include/asm}/siginfo.h         | 0
 {include/asm-parisc => arch/parisc/include/asm}/signal.h          | 0
 {include/asm-parisc => arch/parisc/include/asm}/smp.h             | 0
 {include/asm-parisc => arch/parisc/include/asm}/socket.h          | 0
 {include/asm-parisc => arch/parisc/include/asm}/sockios.h         | 0
 {include/asm-parisc => arch/parisc/include/asm}/spinlock.h        | 0
 {include/asm-parisc => arch/parisc/include/asm}/spinlock_types.h  | 0
 {include/asm-parisc => arch/parisc/include/asm}/stat.h            | 0
 {include/asm-parisc => arch/parisc/include/asm}/statfs.h          | 0
 {include/asm-parisc => arch/parisc/include/asm}/string.h          | 0
 {include/asm-parisc => arch/parisc/include/asm}/superio.h         | 0
 {include/asm-parisc => arch/parisc/include/asm}/system.h          | 0
 {include/asm-parisc => arch/parisc/include/asm}/termbits.h        | 0
 {include/asm-parisc => arch/parisc/include/asm}/termios.h         | 0
 {include/asm-parisc => arch/parisc/include/asm}/thread_info.h     | 0
 {include/asm-parisc => arch/parisc/include/asm}/timex.h           | 0
 {include/asm-parisc => arch/parisc/include/asm}/tlb.h             | 0
 {include/asm-parisc => arch/parisc/include/asm}/tlbflush.h        | 0
 {include/asm-parisc => arch/parisc/include/asm}/topology.h        | 0
 {include/asm-parisc => arch/parisc/include/asm}/traps.h           | 0
 {include/asm-parisc => arch/parisc/include/asm}/types.h           | 0
 {include/asm-parisc => arch/parisc/include/asm}/uaccess.h         | 0
 {include/asm-parisc => arch/parisc/include/asm}/ucontext.h        | 0
 {include/asm-parisc => arch/parisc/include/asm}/unaligned.h       | 0
 {include/asm-parisc => arch/parisc/include/asm}/unistd.h          | 0
 {include/asm-parisc => arch/parisc/include/asm}/unwind.h          | 0
 {include/asm-parisc => arch/parisc/include/asm}/user.h            | 0
 {include/asm-parisc => arch/parisc/include/asm}/vga.h             | 0
 {include/asm-parisc => arch/parisc/include/asm}/xor.h             | 0
 123 files changed, 0 insertions(+), 0 deletions(-)
 rename {include/asm-parisc => arch/parisc/include/asm}/Kbuild (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/a.out.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/agp.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/asmregs.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/assembly.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/atomic.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/auxvec.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/bitops.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/bug.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/bugs.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/byteorder.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/cache.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/cacheflush.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/checksum.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/compat.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/compat_rt_sigframe.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/compat_signal.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/compat_ucontext.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/cputime.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/current.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/delay.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/device.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/div64.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/dma-mapping.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/dma.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/eisa_bus.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/eisa_eeprom.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/elf.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/emergency-restart.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/errno.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/fb.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/fcntl.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/fixmap.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/floppy.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/futex.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/grfioctl.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/hardirq.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/hardware.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/hw_irq.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/ide.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/io.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/ioctl.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/ioctls.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/ipcbuf.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/irq.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/irq_regs.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/kdebug.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/kmap_types.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/led.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/linkage.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/local.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/machdep.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/mc146818rtc.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/mckinley.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/mman.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/mmu.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/mmu_context.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/mmzone.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/module.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/msgbuf.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/mutex.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/page.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/param.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/parisc-device.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/parport.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/pci.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/pdc.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/pdc_chassis.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/pdcpat.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/percpu.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/perf.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/pgalloc.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/pgtable.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/poll.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/posix_types.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/prefetch.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/processor.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/psw.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/ptrace.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/real.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/resource.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/ropes.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/rt_sigframe.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/rtc.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/runway.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/scatterlist.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/sections.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/segment.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/sembuf.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/serial.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/setup.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/shmbuf.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/shmparam.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/sigcontext.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/siginfo.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/signal.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/smp.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/socket.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/sockios.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/spinlock.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/spinlock_types.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/stat.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/statfs.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/string.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/superio.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/system.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/termbits.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/termios.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/thread_info.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/timex.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/tlb.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/tlbflush.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/topology.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/traps.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/types.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/uaccess.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/ucontext.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/unaligned.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/unistd.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/unwind.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/user.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/vga.h (100%)
 rename {include/asm-parisc => arch/parisc/include/asm}/xor.h (100%)

diff --git a/include/asm-parisc/Kbuild b/arch/parisc/include/asm/Kbuild
similarity index 100%
rename from include/asm-parisc/Kbuild
rename to arch/parisc/include/asm/Kbuild
diff --git a/include/asm-parisc/a.out.h b/arch/parisc/include/asm/a.out.h
similarity index 100%
rename from include/asm-parisc/a.out.h
rename to arch/parisc/include/asm/a.out.h
diff --git a/include/asm-parisc/agp.h b/arch/parisc/include/asm/agp.h
similarity index 100%
rename from include/asm-parisc/agp.h
rename to arch/parisc/include/asm/agp.h
diff --git a/include/asm-parisc/asmregs.h b/arch/parisc/include/asm/asmregs.h
similarity index 100%
rename from include/asm-parisc/asmregs.h
rename to arch/parisc/include/asm/asmregs.h
diff --git a/include/asm-parisc/assembly.h b/arch/parisc/include/asm/assembly.h
similarity index 100%
rename from include/asm-parisc/assembly.h
rename to arch/parisc/include/asm/assembly.h
diff --git a/include/asm-parisc/atomic.h b/arch/parisc/include/asm/atomic.h
similarity index 100%
rename from include/asm-parisc/atomic.h
rename to arch/parisc/include/asm/atomic.h
diff --git a/include/asm-parisc/auxvec.h b/arch/parisc/include/asm/auxvec.h
similarity index 100%
rename from include/asm-parisc/auxvec.h
rename to arch/parisc/include/asm/auxvec.h
diff --git a/include/asm-parisc/bitops.h b/arch/parisc/include/asm/bitops.h
similarity index 100%
rename from include/asm-parisc/bitops.h
rename to arch/parisc/include/asm/bitops.h
diff --git a/include/asm-parisc/bug.h b/arch/parisc/include/asm/bug.h
similarity index 100%
rename from include/asm-parisc/bug.h
rename to arch/parisc/include/asm/bug.h
diff --git a/include/asm-parisc/bugs.h b/arch/parisc/include/asm/bugs.h
similarity index 100%
rename from include/asm-parisc/bugs.h
rename to arch/parisc/include/asm/bugs.h
diff --git a/include/asm-parisc/byteorder.h b/arch/parisc/include/asm/byteorder.h
similarity index 100%
rename from include/asm-parisc/byteorder.h
rename to arch/parisc/include/asm/byteorder.h
diff --git a/include/asm-parisc/cache.h b/arch/parisc/include/asm/cache.h
similarity index 100%
rename from include/asm-parisc/cache.h
rename to arch/parisc/include/asm/cache.h
diff --git a/include/asm-parisc/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
similarity index 100%
rename from include/asm-parisc/cacheflush.h
rename to arch/parisc/include/asm/cacheflush.h
diff --git a/include/asm-parisc/checksum.h b/arch/parisc/include/asm/checksum.h
similarity index 100%
rename from include/asm-parisc/checksum.h
rename to arch/parisc/include/asm/checksum.h
diff --git a/include/asm-parisc/compat.h b/arch/parisc/include/asm/compat.h
similarity index 100%
rename from include/asm-parisc/compat.h
rename to arch/parisc/include/asm/compat.h
diff --git a/include/asm-parisc/compat_rt_sigframe.h b/arch/parisc/include/asm/compat_rt_sigframe.h
similarity index 100%
rename from include/asm-parisc/compat_rt_sigframe.h
rename to arch/parisc/include/asm/compat_rt_sigframe.h
diff --git a/include/asm-parisc/compat_signal.h b/arch/parisc/include/asm/compat_signal.h
similarity index 100%
rename from include/asm-parisc/compat_signal.h
rename to arch/parisc/include/asm/compat_signal.h
diff --git a/include/asm-parisc/compat_ucontext.h b/arch/parisc/include/asm/compat_ucontext.h
similarity index 100%
rename from include/asm-parisc/compat_ucontext.h
rename to arch/parisc/include/asm/compat_ucontext.h
diff --git a/include/asm-parisc/cputime.h b/arch/parisc/include/asm/cputime.h
similarity index 100%
rename from include/asm-parisc/cputime.h
rename to arch/parisc/include/asm/cputime.h
diff --git a/include/asm-parisc/current.h b/arch/parisc/include/asm/current.h
similarity index 100%
rename from include/asm-parisc/current.h
rename to arch/parisc/include/asm/current.h
diff --git a/include/asm-parisc/delay.h b/arch/parisc/include/asm/delay.h
similarity index 100%
rename from include/asm-parisc/delay.h
rename to arch/parisc/include/asm/delay.h
diff --git a/include/asm-parisc/device.h b/arch/parisc/include/asm/device.h
similarity index 100%
rename from include/asm-parisc/device.h
rename to arch/parisc/include/asm/device.h
diff --git a/include/asm-parisc/div64.h b/arch/parisc/include/asm/div64.h
similarity index 100%
rename from include/asm-parisc/div64.h
rename to arch/parisc/include/asm/div64.h
diff --git a/include/asm-parisc/dma-mapping.h b/arch/parisc/include/asm/dma-mapping.h
similarity index 100%
rename from include/asm-parisc/dma-mapping.h
rename to arch/parisc/include/asm/dma-mapping.h
diff --git a/include/asm-parisc/dma.h b/arch/parisc/include/asm/dma.h
similarity index 100%
rename from include/asm-parisc/dma.h
rename to arch/parisc/include/asm/dma.h
diff --git a/include/asm-parisc/eisa_bus.h b/arch/parisc/include/asm/eisa_bus.h
similarity index 100%
rename from include/asm-parisc/eisa_bus.h
rename to arch/parisc/include/asm/eisa_bus.h
diff --git a/include/asm-parisc/eisa_eeprom.h b/arch/parisc/include/asm/eisa_eeprom.h
similarity index 100%
rename from include/asm-parisc/eisa_eeprom.h
rename to arch/parisc/include/asm/eisa_eeprom.h
diff --git a/include/asm-parisc/elf.h b/arch/parisc/include/asm/elf.h
similarity index 100%
rename from include/asm-parisc/elf.h
rename to arch/parisc/include/asm/elf.h
diff --git a/include/asm-parisc/emergency-restart.h b/arch/parisc/include/asm/emergency-restart.h
similarity index 100%
rename from include/asm-parisc/emergency-restart.h
rename to arch/parisc/include/asm/emergency-restart.h
diff --git a/include/asm-parisc/errno.h b/arch/parisc/include/asm/errno.h
similarity index 100%
rename from include/asm-parisc/errno.h
rename to arch/parisc/include/asm/errno.h
diff --git a/include/asm-parisc/fb.h b/arch/parisc/include/asm/fb.h
similarity index 100%
rename from include/asm-parisc/fb.h
rename to arch/parisc/include/asm/fb.h
diff --git a/include/asm-parisc/fcntl.h b/arch/parisc/include/asm/fcntl.h
similarity index 100%
rename from include/asm-parisc/fcntl.h
rename to arch/parisc/include/asm/fcntl.h
diff --git a/include/asm-parisc/fixmap.h b/arch/parisc/include/asm/fixmap.h
similarity index 100%
rename from include/asm-parisc/fixmap.h
rename to arch/parisc/include/asm/fixmap.h
diff --git a/include/asm-parisc/floppy.h b/arch/parisc/include/asm/floppy.h
similarity index 100%
rename from include/asm-parisc/floppy.h
rename to arch/parisc/include/asm/floppy.h
diff --git a/include/asm-parisc/futex.h b/arch/parisc/include/asm/futex.h
similarity index 100%
rename from include/asm-parisc/futex.h
rename to arch/parisc/include/asm/futex.h
diff --git a/include/asm-parisc/grfioctl.h b/arch/parisc/include/asm/grfioctl.h
similarity index 100%
rename from include/asm-parisc/grfioctl.h
rename to arch/parisc/include/asm/grfioctl.h
diff --git a/include/asm-parisc/hardirq.h b/arch/parisc/include/asm/hardirq.h
similarity index 100%
rename from include/asm-parisc/hardirq.h
rename to arch/parisc/include/asm/hardirq.h
diff --git a/include/asm-parisc/hardware.h b/arch/parisc/include/asm/hardware.h
similarity index 100%
rename from include/asm-parisc/hardware.h
rename to arch/parisc/include/asm/hardware.h
diff --git a/include/asm-parisc/hw_irq.h b/arch/parisc/include/asm/hw_irq.h
similarity index 100%
rename from include/asm-parisc/hw_irq.h
rename to arch/parisc/include/asm/hw_irq.h
diff --git a/include/asm-parisc/ide.h b/arch/parisc/include/asm/ide.h
similarity index 100%
rename from include/asm-parisc/ide.h
rename to arch/parisc/include/asm/ide.h
diff --git a/include/asm-parisc/io.h b/arch/parisc/include/asm/io.h
similarity index 100%
rename from include/asm-parisc/io.h
rename to arch/parisc/include/asm/io.h
diff --git a/include/asm-parisc/ioctl.h b/arch/parisc/include/asm/ioctl.h
similarity index 100%
rename from include/asm-parisc/ioctl.h
rename to arch/parisc/include/asm/ioctl.h
diff --git a/include/asm-parisc/ioctls.h b/arch/parisc/include/asm/ioctls.h
similarity index 100%
rename from include/asm-parisc/ioctls.h
rename to arch/parisc/include/asm/ioctls.h
diff --git a/include/asm-parisc/ipcbuf.h b/arch/parisc/include/asm/ipcbuf.h
similarity index 100%
rename from include/asm-parisc/ipcbuf.h
rename to arch/parisc/include/asm/ipcbuf.h
diff --git a/include/asm-parisc/irq.h b/arch/parisc/include/asm/irq.h
similarity index 100%
rename from include/asm-parisc/irq.h
rename to arch/parisc/include/asm/irq.h
diff --git a/include/asm-parisc/irq_regs.h b/arch/parisc/include/asm/irq_regs.h
similarity index 100%
rename from include/asm-parisc/irq_regs.h
rename to arch/parisc/include/asm/irq_regs.h
diff --git a/include/asm-parisc/kdebug.h b/arch/parisc/include/asm/kdebug.h
similarity index 100%
rename from include/asm-parisc/kdebug.h
rename to arch/parisc/include/asm/kdebug.h
diff --git a/include/asm-parisc/kmap_types.h b/arch/parisc/include/asm/kmap_types.h
similarity index 100%
rename from include/asm-parisc/kmap_types.h
rename to arch/parisc/include/asm/kmap_types.h
diff --git a/include/asm-parisc/led.h b/arch/parisc/include/asm/led.h
similarity index 100%
rename from include/asm-parisc/led.h
rename to arch/parisc/include/asm/led.h
diff --git a/include/asm-parisc/linkage.h b/arch/parisc/include/asm/linkage.h
similarity index 100%
rename from include/asm-parisc/linkage.h
rename to arch/parisc/include/asm/linkage.h
diff --git a/include/asm-parisc/local.h b/arch/parisc/include/asm/local.h
similarity index 100%
rename from include/asm-parisc/local.h
rename to arch/parisc/include/asm/local.h
diff --git a/include/asm-parisc/machdep.h b/arch/parisc/include/asm/machdep.h
similarity index 100%
rename from include/asm-parisc/machdep.h
rename to arch/parisc/include/asm/machdep.h
diff --git a/include/asm-parisc/mc146818rtc.h b/arch/parisc/include/asm/mc146818rtc.h
similarity index 100%
rename from include/asm-parisc/mc146818rtc.h
rename to arch/parisc/include/asm/mc146818rtc.h
diff --git a/include/asm-parisc/mckinley.h b/arch/parisc/include/asm/mckinley.h
similarity index 100%
rename from include/asm-parisc/mckinley.h
rename to arch/parisc/include/asm/mckinley.h
diff --git a/include/asm-parisc/mman.h b/arch/parisc/include/asm/mman.h
similarity index 100%
rename from include/asm-parisc/mman.h
rename to arch/parisc/include/asm/mman.h
diff --git a/include/asm-parisc/mmu.h b/arch/parisc/include/asm/mmu.h
similarity index 100%
rename from include/asm-parisc/mmu.h
rename to arch/parisc/include/asm/mmu.h
diff --git a/include/asm-parisc/mmu_context.h b/arch/parisc/include/asm/mmu_context.h
similarity index 100%
rename from include/asm-parisc/mmu_context.h
rename to arch/parisc/include/asm/mmu_context.h
diff --git a/include/asm-parisc/mmzone.h b/arch/parisc/include/asm/mmzone.h
similarity index 100%
rename from include/asm-parisc/mmzone.h
rename to arch/parisc/include/asm/mmzone.h
diff --git a/include/asm-parisc/module.h b/arch/parisc/include/asm/module.h
similarity index 100%
rename from include/asm-parisc/module.h
rename to arch/parisc/include/asm/module.h
diff --git a/include/asm-parisc/msgbuf.h b/arch/parisc/include/asm/msgbuf.h
similarity index 100%
rename from include/asm-parisc/msgbuf.h
rename to arch/parisc/include/asm/msgbuf.h
diff --git a/include/asm-parisc/mutex.h b/arch/parisc/include/asm/mutex.h
similarity index 100%
rename from include/asm-parisc/mutex.h
rename to arch/parisc/include/asm/mutex.h
diff --git a/include/asm-parisc/page.h b/arch/parisc/include/asm/page.h
similarity index 100%
rename from include/asm-parisc/page.h
rename to arch/parisc/include/asm/page.h
diff --git a/include/asm-parisc/param.h b/arch/parisc/include/asm/param.h
similarity index 100%
rename from include/asm-parisc/param.h
rename to arch/parisc/include/asm/param.h
diff --git a/include/asm-parisc/parisc-device.h b/arch/parisc/include/asm/parisc-device.h
similarity index 100%
rename from include/asm-parisc/parisc-device.h
rename to arch/parisc/include/asm/parisc-device.h
diff --git a/include/asm-parisc/parport.h b/arch/parisc/include/asm/parport.h
similarity index 100%
rename from include/asm-parisc/parport.h
rename to arch/parisc/include/asm/parport.h
diff --git a/include/asm-parisc/pci.h b/arch/parisc/include/asm/pci.h
similarity index 100%
rename from include/asm-parisc/pci.h
rename to arch/parisc/include/asm/pci.h
diff --git a/include/asm-parisc/pdc.h b/arch/parisc/include/asm/pdc.h
similarity index 100%
rename from include/asm-parisc/pdc.h
rename to arch/parisc/include/asm/pdc.h
diff --git a/include/asm-parisc/pdc_chassis.h b/arch/parisc/include/asm/pdc_chassis.h
similarity index 100%
rename from include/asm-parisc/pdc_chassis.h
rename to arch/parisc/include/asm/pdc_chassis.h
diff --git a/include/asm-parisc/pdcpat.h b/arch/parisc/include/asm/pdcpat.h
similarity index 100%
rename from include/asm-parisc/pdcpat.h
rename to arch/parisc/include/asm/pdcpat.h
diff --git a/include/asm-parisc/percpu.h b/arch/parisc/include/asm/percpu.h
similarity index 100%
rename from include/asm-parisc/percpu.h
rename to arch/parisc/include/asm/percpu.h
diff --git a/include/asm-parisc/perf.h b/arch/parisc/include/asm/perf.h
similarity index 100%
rename from include/asm-parisc/perf.h
rename to arch/parisc/include/asm/perf.h
diff --git a/include/asm-parisc/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
similarity index 100%
rename from include/asm-parisc/pgalloc.h
rename to arch/parisc/include/asm/pgalloc.h
diff --git a/include/asm-parisc/pgtable.h b/arch/parisc/include/asm/pgtable.h
similarity index 100%
rename from include/asm-parisc/pgtable.h
rename to arch/parisc/include/asm/pgtable.h
diff --git a/include/asm-parisc/poll.h b/arch/parisc/include/asm/poll.h
similarity index 100%
rename from include/asm-parisc/poll.h
rename to arch/parisc/include/asm/poll.h
diff --git a/include/asm-parisc/posix_types.h b/arch/parisc/include/asm/posix_types.h
similarity index 100%
rename from include/asm-parisc/posix_types.h
rename to arch/parisc/include/asm/posix_types.h
diff --git a/include/asm-parisc/prefetch.h b/arch/parisc/include/asm/prefetch.h
similarity index 100%
rename from include/asm-parisc/prefetch.h
rename to arch/parisc/include/asm/prefetch.h
diff --git a/include/asm-parisc/processor.h b/arch/parisc/include/asm/processor.h
similarity index 100%
rename from include/asm-parisc/processor.h
rename to arch/parisc/include/asm/processor.h
diff --git a/include/asm-parisc/psw.h b/arch/parisc/include/asm/psw.h
similarity index 100%
rename from include/asm-parisc/psw.h
rename to arch/parisc/include/asm/psw.h
diff --git a/include/asm-parisc/ptrace.h b/arch/parisc/include/asm/ptrace.h
similarity index 100%
rename from include/asm-parisc/ptrace.h
rename to arch/parisc/include/asm/ptrace.h
diff --git a/include/asm-parisc/real.h b/arch/parisc/include/asm/real.h
similarity index 100%
rename from include/asm-parisc/real.h
rename to arch/parisc/include/asm/real.h
diff --git a/include/asm-parisc/resource.h b/arch/parisc/include/asm/resource.h
similarity index 100%
rename from include/asm-parisc/resource.h
rename to arch/parisc/include/asm/resource.h
diff --git a/include/asm-parisc/ropes.h b/arch/parisc/include/asm/ropes.h
similarity index 100%
rename from include/asm-parisc/ropes.h
rename to arch/parisc/include/asm/ropes.h
diff --git a/include/asm-parisc/rt_sigframe.h b/arch/parisc/include/asm/rt_sigframe.h
similarity index 100%
rename from include/asm-parisc/rt_sigframe.h
rename to arch/parisc/include/asm/rt_sigframe.h
diff --git a/include/asm-parisc/rtc.h b/arch/parisc/include/asm/rtc.h
similarity index 100%
rename from include/asm-parisc/rtc.h
rename to arch/parisc/include/asm/rtc.h
diff --git a/include/asm-parisc/runway.h b/arch/parisc/include/asm/runway.h
similarity index 100%
rename from include/asm-parisc/runway.h
rename to arch/parisc/include/asm/runway.h
diff --git a/include/asm-parisc/scatterlist.h b/arch/parisc/include/asm/scatterlist.h
similarity index 100%
rename from include/asm-parisc/scatterlist.h
rename to arch/parisc/include/asm/scatterlist.h
diff --git a/include/asm-parisc/sections.h b/arch/parisc/include/asm/sections.h
similarity index 100%
rename from include/asm-parisc/sections.h
rename to arch/parisc/include/asm/sections.h
diff --git a/include/asm-parisc/segment.h b/arch/parisc/include/asm/segment.h
similarity index 100%
rename from include/asm-parisc/segment.h
rename to arch/parisc/include/asm/segment.h
diff --git a/include/asm-parisc/sembuf.h b/arch/parisc/include/asm/sembuf.h
similarity index 100%
rename from include/asm-parisc/sembuf.h
rename to arch/parisc/include/asm/sembuf.h
diff --git a/include/asm-parisc/serial.h b/arch/parisc/include/asm/serial.h
similarity index 100%
rename from include/asm-parisc/serial.h
rename to arch/parisc/include/asm/serial.h
diff --git a/include/asm-parisc/setup.h b/arch/parisc/include/asm/setup.h
similarity index 100%
rename from include/asm-parisc/setup.h
rename to arch/parisc/include/asm/setup.h
diff --git a/include/asm-parisc/shmbuf.h b/arch/parisc/include/asm/shmbuf.h
similarity index 100%
rename from include/asm-parisc/shmbuf.h
rename to arch/parisc/include/asm/shmbuf.h
diff --git a/include/asm-parisc/shmparam.h b/arch/parisc/include/asm/shmparam.h
similarity index 100%
rename from include/asm-parisc/shmparam.h
rename to arch/parisc/include/asm/shmparam.h
diff --git a/include/asm-parisc/sigcontext.h b/arch/parisc/include/asm/sigcontext.h
similarity index 100%
rename from include/asm-parisc/sigcontext.h
rename to arch/parisc/include/asm/sigcontext.h
diff --git a/include/asm-parisc/siginfo.h b/arch/parisc/include/asm/siginfo.h
similarity index 100%
rename from include/asm-parisc/siginfo.h
rename to arch/parisc/include/asm/siginfo.h
diff --git a/include/asm-parisc/signal.h b/arch/parisc/include/asm/signal.h
similarity index 100%
rename from include/asm-parisc/signal.h
rename to arch/parisc/include/asm/signal.h
diff --git a/include/asm-parisc/smp.h b/arch/parisc/include/asm/smp.h
similarity index 100%
rename from include/asm-parisc/smp.h
rename to arch/parisc/include/asm/smp.h
diff --git a/include/asm-parisc/socket.h b/arch/parisc/include/asm/socket.h
similarity index 100%
rename from include/asm-parisc/socket.h
rename to arch/parisc/include/asm/socket.h
diff --git a/include/asm-parisc/sockios.h b/arch/parisc/include/asm/sockios.h
similarity index 100%
rename from include/asm-parisc/sockios.h
rename to arch/parisc/include/asm/sockios.h
diff --git a/include/asm-parisc/spinlock.h b/arch/parisc/include/asm/spinlock.h
similarity index 100%
rename from include/asm-parisc/spinlock.h
rename to arch/parisc/include/asm/spinlock.h
diff --git a/include/asm-parisc/spinlock_types.h b/arch/parisc/include/asm/spinlock_types.h
similarity index 100%
rename from include/asm-parisc/spinlock_types.h
rename to arch/parisc/include/asm/spinlock_types.h
diff --git a/include/asm-parisc/stat.h b/arch/parisc/include/asm/stat.h
similarity index 100%
rename from include/asm-parisc/stat.h
rename to arch/parisc/include/asm/stat.h
diff --git a/include/asm-parisc/statfs.h b/arch/parisc/include/asm/statfs.h
similarity index 100%
rename from include/asm-parisc/statfs.h
rename to arch/parisc/include/asm/statfs.h
diff --git a/include/asm-parisc/string.h b/arch/parisc/include/asm/string.h
similarity index 100%
rename from include/asm-parisc/string.h
rename to arch/parisc/include/asm/string.h
diff --git a/include/asm-parisc/superio.h b/arch/parisc/include/asm/superio.h
similarity index 100%
rename from include/asm-parisc/superio.h
rename to arch/parisc/include/asm/superio.h
diff --git a/include/asm-parisc/system.h b/arch/parisc/include/asm/system.h
similarity index 100%
rename from include/asm-parisc/system.h
rename to arch/parisc/include/asm/system.h
diff --git a/include/asm-parisc/termbits.h b/arch/parisc/include/asm/termbits.h
similarity index 100%
rename from include/asm-parisc/termbits.h
rename to arch/parisc/include/asm/termbits.h
diff --git a/include/asm-parisc/termios.h b/arch/parisc/include/asm/termios.h
similarity index 100%
rename from include/asm-parisc/termios.h
rename to arch/parisc/include/asm/termios.h
diff --git a/include/asm-parisc/thread_info.h b/arch/parisc/include/asm/thread_info.h
similarity index 100%
rename from include/asm-parisc/thread_info.h
rename to arch/parisc/include/asm/thread_info.h
diff --git a/include/asm-parisc/timex.h b/arch/parisc/include/asm/timex.h
similarity index 100%
rename from include/asm-parisc/timex.h
rename to arch/parisc/include/asm/timex.h
diff --git a/include/asm-parisc/tlb.h b/arch/parisc/include/asm/tlb.h
similarity index 100%
rename from include/asm-parisc/tlb.h
rename to arch/parisc/include/asm/tlb.h
diff --git a/include/asm-parisc/tlbflush.h b/arch/parisc/include/asm/tlbflush.h
similarity index 100%
rename from include/asm-parisc/tlbflush.h
rename to arch/parisc/include/asm/tlbflush.h
diff --git a/include/asm-parisc/topology.h b/arch/parisc/include/asm/topology.h
similarity index 100%
rename from include/asm-parisc/topology.h
rename to arch/parisc/include/asm/topology.h
diff --git a/include/asm-parisc/traps.h b/arch/parisc/include/asm/traps.h
similarity index 100%
rename from include/asm-parisc/traps.h
rename to arch/parisc/include/asm/traps.h
diff --git a/include/asm-parisc/types.h b/arch/parisc/include/asm/types.h
similarity index 100%
rename from include/asm-parisc/types.h
rename to arch/parisc/include/asm/types.h
diff --git a/include/asm-parisc/uaccess.h b/arch/parisc/include/asm/uaccess.h
similarity index 100%
rename from include/asm-parisc/uaccess.h
rename to arch/parisc/include/asm/uaccess.h
diff --git a/include/asm-parisc/ucontext.h b/arch/parisc/include/asm/ucontext.h
similarity index 100%
rename from include/asm-parisc/ucontext.h
rename to arch/parisc/include/asm/ucontext.h
diff --git a/include/asm-parisc/unaligned.h b/arch/parisc/include/asm/unaligned.h
similarity index 100%
rename from include/asm-parisc/unaligned.h
rename to arch/parisc/include/asm/unaligned.h
diff --git a/include/asm-parisc/unistd.h b/arch/parisc/include/asm/unistd.h
similarity index 100%
rename from include/asm-parisc/unistd.h
rename to arch/parisc/include/asm/unistd.h
diff --git a/include/asm-parisc/unwind.h b/arch/parisc/include/asm/unwind.h
similarity index 100%
rename from include/asm-parisc/unwind.h
rename to arch/parisc/include/asm/unwind.h
diff --git a/include/asm-parisc/user.h b/arch/parisc/include/asm/user.h
similarity index 100%
rename from include/asm-parisc/user.h
rename to arch/parisc/include/asm/user.h
diff --git a/include/asm-parisc/vga.h b/arch/parisc/include/asm/vga.h
similarity index 100%
rename from include/asm-parisc/vga.h
rename to arch/parisc/include/asm/vga.h
diff --git a/include/asm-parisc/xor.h b/arch/parisc/include/asm/xor.h
similarity index 100%
rename from include/asm-parisc/xor.h
rename to arch/parisc/include/asm/xor.h
-- 
GitLab


From 24b574d052a1996bac42fbd56715ab602092c291 Mon Sep 17 00:00:00 2001
From: Kyle McMartin <kyle@mcmartin.ca>
Date: Tue, 29 Jul 2008 00:09:22 -0400
Subject: [PATCH 213/895] parisc: add pdc_coproc_cfg_unlocked and
 set_firmware_width_unlocked

These functions are called only when bringing up the monarch cpu,
so it is safe to call them without taking the pdc spinlock. In the
future, this may become relevant for lockdep, since these functions were
taking spinlocks before start_kernel called the lockdep initializers.
---
 arch/parisc/include/asm/pdc.h |  2 ++
 arch/parisc/kernel/firmware.c | 65 ++++++++++++++++++++++++-----------
 2 files changed, 46 insertions(+), 21 deletions(-)

diff --git a/arch/parisc/include/asm/pdc.h b/arch/parisc/include/asm/pdc.h
index 46b75f9cce51..c584b00c6074 100644
--- a/arch/parisc/include/asm/pdc.h
+++ b/arch/parisc/include/asm/pdc.h
@@ -603,6 +603,7 @@ int pdc_chassis_info(struct pdc_chassis_info *chassis_info, void *led_info, unsi
 int pdc_chassis_disp(unsigned long disp);
 int pdc_chassis_warn(unsigned long *warn);
 int pdc_coproc_cfg(struct pdc_coproc_cfg *pdc_coproc_info);
+int pdc_coproc_cfg_unlocked(struct pdc_coproc_cfg *pdc_coproc_info);
 int pdc_iodc_read(unsigned long *actcnt, unsigned long hpa, unsigned int index,
 		  void *iodc_data, unsigned int iodc_data_size);
 int pdc_system_map_find_mods(struct pdc_system_map_mod_info *pdc_mod_info,
@@ -641,6 +642,7 @@ int pdc_mem_mem_table(struct pdc_memory_table_raddr *r_addr,
 #endif
 
 void set_firmware_width(void);
+void set_firmware_width_unlocked(void);
 int pdc_do_firm_test_reset(unsigned long ftc_bitmap);
 int pdc_do_reset(void);
 int pdc_soft_power_info(unsigned long *power_reg);
diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c
index 99a9e505edf9..03f26bd75bd8 100644
--- a/arch/parisc/kernel/firmware.c
+++ b/arch/parisc/kernel/firmware.c
@@ -150,26 +150,40 @@ static void convert_to_wide(unsigned long *addr)
 #endif
 }
 
+#ifdef CONFIG_64BIT
+void __init set_firmware_width_unlocked(void)
+{
+	int ret;
+
+	ret = mem_pdc_call(PDC_MODEL, PDC_MODEL_CAPABILITIES,
+		__pa(pdc_result), 0);
+	convert_to_wide(pdc_result);
+	if (pdc_result[0] != NARROW_FIRMWARE)
+		parisc_narrow_firmware = 0;
+}
+	
 /**
  * set_firmware_width - Determine if the firmware is wide or narrow.
  * 
- * This function must be called before any pdc_* function that uses the convert_to_wide
- * function.
+ * This function must be called before any pdc_* function that uses the
+ * convert_to_wide function.
  */
 void __init set_firmware_width(void)
 {
-#ifdef CONFIG_64BIT
-	int retval;
 	unsigned long flags;
+	spin_lock_irqsave(&pdc_lock, flags);
+	set_firmware_width_unlocked();
+	spin_unlock_irqrestore(&pdc_lock, flags);
+}
+#else
+void __init set_firmware_width_unlocked(void) {
+	return;
+}
 
-        spin_lock_irqsave(&pdc_lock, flags);
-	retval = mem_pdc_call(PDC_MODEL, PDC_MODEL_CAPABILITIES, __pa(pdc_result), 0);
-	convert_to_wide(pdc_result);
-	if(pdc_result[0] != NARROW_FIRMWARE)
-		parisc_narrow_firmware = 0;
-        spin_unlock_irqrestore(&pdc_lock, flags);
-#endif
+void __init set_firmware_width(void) {
+	return;
 }
+#endif /*CONFIG_64BIT*/
 
 /**
  * pdc_emergency_unlock - Unlock the linux pdc lock
@@ -288,6 +302,20 @@ int pdc_chassis_warn(unsigned long *warn)
 	return retval;
 }
 
+int __init pdc_coproc_cfg_unlocked(struct pdc_coproc_cfg *pdc_coproc_info)
+{
+	int ret;
+
+	ret = mem_pdc_call(PDC_COPROC, PDC_COPROC_CFG, __pa(pdc_result));
+	convert_to_wide(pdc_result);
+	pdc_coproc_info->ccr_functional = pdc_result[0];
+	pdc_coproc_info->ccr_present = pdc_result[1];
+	pdc_coproc_info->revision = pdc_result[17];
+	pdc_coproc_info->model = pdc_result[18];
+
+	return ret;
+}
+
 /**
  * pdc_coproc_cfg - To identify coprocessors attached to the processor.
  * @pdc_coproc_info: Return buffer address.
@@ -297,19 +325,14 @@ int pdc_chassis_warn(unsigned long *warn)
  */
 int __init pdc_coproc_cfg(struct pdc_coproc_cfg *pdc_coproc_info)
 {
-        int retval;
+	int ret;
 	unsigned long flags;
 
-        spin_lock_irqsave(&pdc_lock, flags);
-        retval = mem_pdc_call(PDC_COPROC, PDC_COPROC_CFG, __pa(pdc_result));
-        convert_to_wide(pdc_result);
-        pdc_coproc_info->ccr_functional = pdc_result[0];
-        pdc_coproc_info->ccr_present = pdc_result[1];
-        pdc_coproc_info->revision = pdc_result[17];
-        pdc_coproc_info->model = pdc_result[18];
-        spin_unlock_irqrestore(&pdc_lock, flags);
+	spin_lock_irqsave(&pdc_lock, flags);
+	ret = pdc_coproc_cfg_unlocked(pdc_coproc_info);
+	spin_unlock_irqrestore(&pdc_lock, flags);
 
-        return retval;
+	return ret;
 }
 
 /**
-- 
GitLab


From 089d55289db5d58d938d73b47a415b2b82ee19ac Mon Sep 17 00:00:00 2001
From: Kyle McMartin <kyle@mcmartin.ca>
Date: Tue, 29 Jul 2008 00:11:13 -0400
Subject: [PATCH 214/895] parisc: hijack jump to start_kernel

Bang in our own start_parisc call, which initializes the PDC
width, and turns on the FPU.

Previously, if CONFIG_PRINTK_TIME was on, we'd attempt to use
the FPU before we had enabled it, resulting in a difficult
to diagnose panic.

This patch causes init_per_cpu to redundantly set these for
cpu0, but this is harmless.
---
 arch/parisc/kernel/head.S  |  2 +-
 arch/parisc/kernel/setup.c | 27 ++++++++++++++++++++++++++-
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S
index a84e31e82876..0e3d9f9b9e33 100644
--- a/arch/parisc/kernel/head.S
+++ b/arch/parisc/kernel/head.S
@@ -121,7 +121,7 @@ $pgt_fill_loop:
 	copy		%r0,%r2
 
 	/* And the RFI Target address too */
-	load32		start_kernel,%r11
+	load32		start_parisc,%r11
 
 	/* And the initial task pointer */
 	load32		init_thread_union,%r6
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index 39e7c5a5946a..a59b71efdbe5 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -368,6 +368,31 @@ static int __init parisc_init(void)
 
 	return 0;
 }
-
 arch_initcall(parisc_init);
 
+void start_parisc(void)
+{
+	extern void start_kernel(void);
+
+	int ret, cpunum;
+	struct pdc_coproc_cfg coproc_cfg;
+
+	cpunum = smp_processor_id();
+
+	set_firmware_width_unlocked();
+
+	ret = pdc_coproc_cfg_unlocked(&coproc_cfg);
+	if (ret >= 0 && coproc_cfg.ccr_functional) {
+		mtctl(coproc_cfg.ccr_functional, 10);
+
+		cpu_data[cpunum].fp_rev = coproc_cfg.revision;
+		cpu_data[cpunum].fp_model = coproc_cfg.model;
+
+		asm volatile ("fstd	%fr0,8(%sp)");
+	} else {
+		panic("must have an fpu to boot linux");
+	}
+
+	start_kernel();
+	// not reached
+}
-- 
GitLab


From 0be7d1fe4361bb9f2ebbd6fa394687cbe4bea950 Mon Sep 17 00:00:00 2001
From: Kyle McMartin <kyle@mcmartin.ca>
Date: Sat, 9 Aug 2008 14:38:18 -0400
Subject: [PATCH 215/895] parisc: add new syscalls

Signed-off-by: Kyle McMartin <kyle@mcmartin.ca>
---
 arch/parisc/include/asm/unistd.h   | 10 ++++++++--
 arch/parisc/kernel/syscall_table.S |  6 ++++++
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h
index a7d857f0e4f4..ef26b009dc5d 100644
--- a/arch/parisc/include/asm/unistd.h
+++ b/arch/parisc/include/asm/unistd.h
@@ -801,8 +801,14 @@
 #define __NR_timerfd_create	(__NR_Linux + 306)
 #define __NR_timerfd_settime	(__NR_Linux + 307)
 #define __NR_timerfd_gettime	(__NR_Linux + 308)
-
-#define __NR_Linux_syscalls	(__NR_timerfd_gettime + 1)
+#define __NR_signalfd4		(__NR_Linux + 309)
+#define __NR_eventfd2		(__NR_Linux + 310)
+#define __NR_epoll_create1	(__NR_Linux + 311)
+#define __NR_dup3		(__NR_Linux + 312)
+#define __NR_pipe2		(__NR_Linux + 313)
+#define __NR_inotify_init1	(__NR_Linux + 314)
+
+#define __NR_Linux_syscalls	(__NR_inotify_init1 + 1)
 
 
 #define __IGNORE_select		/* newselect */
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index 6b5ac38f5a99..6084667eacf9 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -407,6 +407,12 @@
 	ENTRY_SAME(timerfd_create)
 	ENTRY_COMP(timerfd_settime)
 	ENTRY_COMP(timerfd_gettime)
+	ENTRY_COMP(signalfd4)
+	ENTRY_SAME(eventfd2)		/* 310 */
+	ENTRY_SAME(epoll_create1)
+	ENTRY_SAME(dup3)
+	ENTRY_SAME(pipe2)
+	ENTRY_SAME(inotify_init1)
 
 	/* Nothing yet */
 
-- 
GitLab


From f0514ae323f19ba1ad4bea4174ea274c812f7eee Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Thu, 11 Sep 2008 10:17:23 -0400
Subject: [PATCH 216/895] parisc: initialize unwinder much earlier

The unwinder was being initialized way too late to be any use
debugging early boot crashes. Instead of relying on module_init
initcalls to initialize it, let's do it explicitly as early as
we can.

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: Kyle McMartin <kyle@mcmartin.ca>
---
 arch/parisc/include/asm/unwind.h | 2 ++
 arch/parisc/kernel/setup.c       | 2 ++
 arch/parisc/kernel/unwind.c      | 4 +---
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/parisc/include/asm/unwind.h b/arch/parisc/include/asm/unwind.h
index 2f7e6e50a158..52482e4fc20d 100644
--- a/arch/parisc/include/asm/unwind.h
+++ b/arch/parisc/include/asm/unwind.h
@@ -74,4 +74,6 @@ void unwind_frame_init_running(struct unwind_frame_info *info, struct pt_regs *r
 int unwind_once(struct unwind_frame_info *info);
 int unwind_to_user(struct unwind_frame_info *info);
 
+int unwind_init(void);
+
 #endif
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index a59b71efdbe5..7d27853ff8c8 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -44,6 +44,7 @@
 #include <asm/pdc_chassis.h>
 #include <asm/io.h>
 #include <asm/setup.h>
+#include <asm/unwind.h>
 
 static char __initdata command_line[COMMAND_LINE_SIZE];
 
@@ -123,6 +124,7 @@ void __init setup_arch(char **cmdline_p)
 #ifdef CONFIG_64BIT
 	extern int parisc_narrow_firmware;
 #endif
+	unwind_init();
 
 	init_per_cpu(smp_processor_id());	/* Set Modes & Enable FP */
 
diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c
index 701b2d2d8882..6773c582e457 100644
--- a/arch/parisc/kernel/unwind.c
+++ b/arch/parisc/kernel/unwind.c
@@ -170,7 +170,7 @@ void unwind_table_remove(struct unwind_table *table)
 }
 
 /* Called from setup_arch to import the kernel unwind info */
-static int unwind_init(void)
+int unwind_init(void)
 {
 	long start, stop;
 	register unsigned long gp __asm__ ("r27");
@@ -417,5 +417,3 @@ int unwind_to_user(struct unwind_frame_info *info)
 
 	return ret;
 }
-
-module_init(unwind_init);
-- 
GitLab


From 9eb1686423756f4dfb0ad8bfb02bb8bf1b89e50a Mon Sep 17 00:00:00 2001
From: Kyle McMartin <kyle@mcmartin.ca>
Date: Wed, 10 Sep 2008 14:24:07 +0000
Subject: [PATCH 217/895] parisc: add rtc platform driver

Signed-off-by: Kyle McMartin <kyle@mcmartin.ca>
---
 arch/parisc/Kconfig       |   2 +
 arch/parisc/kernel/time.c |  20 ++++++-
 drivers/rtc/Kconfig       |   8 +++
 drivers/rtc/Makefile      |   1 +
 drivers/rtc/rtc-parisc.c  | 111 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 141 insertions(+), 1 deletion(-)
 create mode 100644 drivers/rtc/rtc-parisc.c

diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index a7d4fd353c2b..0a8ac703dbec 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -9,6 +9,8 @@ config PARISC
 	def_bool y
 	select HAVE_IDE
 	select HAVE_OPROFILE
+	select RTC_CLASS
+	select RTC_DRV_PARISC
 	help
 	  The PA-RISC microprocessor is designed by Hewlett-Packard and used
 	  in many of their workstations & servers (HP9000 700 and 800 series,
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index 24be86bba94d..4d09203bc693 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -23,6 +23,7 @@
 #include <linux/smp.h>
 #include <linux/profile.h>
 #include <linux/clocksource.h>
+#include <linux/platform_device.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -215,6 +216,24 @@ void __init start_cpu_itimer(void)
 	cpu_data[cpu].it_value = next_tick;
 }
 
+struct platform_device rtc_parisc_dev = {
+	.name = "rtc-parisc",
+	.id = -1,
+};
+
+static int __init rtc_init(void)
+{
+	int ret;
+
+	ret = platform_device_register(&rtc_parisc_dev);
+	if (ret < 0)
+		printk(KERN_ERR "unable to register rtc device...\n");
+
+	/* not necessarily an error */
+	return 0;
+}
+module_init(rtc_init);
+
 void __init time_init(void)
 {
 	static struct pdc_tod tod_data;
@@ -245,4 +264,3 @@ void __init time_init(void)
 		xtime.tv_nsec = 0;
 	}
 }
-
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 9a9755c92fad..30d40fe194a8 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -575,6 +575,14 @@ config RTC_DRV_RS5C313
 	help
 	  If you say yes here you get support for the Ricoh RS5C313 RTC chips.
 
+config RTC_DRV_PARISC
+	tristate "PA-RISC firmware RTC support"
+	depends on PARISC
+	help
+	  Say Y or M here to enable RTC support on PA-RISC systems using
+	  firmware calls. If you do not know what you are doing, you should
+	  just say Y.
+
 config RTC_DRV_PPC
        tristate "PowerPC machine dependent RTC support"
        depends on PPC_MERGE
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 18622ef84cab..180ddacde730 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -45,6 +45,7 @@ obj-$(CONFIG_RTC_DRV_PCF8563)	+= rtc-pcf8563.o
 obj-$(CONFIG_RTC_DRV_PCF8583)	+= rtc-pcf8583.o
 obj-$(CONFIG_RTC_DRV_PL030)	+= rtc-pl030.o
 obj-$(CONFIG_RTC_DRV_PL031)	+= rtc-pl031.o
+obj-$(CONFIG_RTC_DRV_PARISC)	+= rtc-parisc.o
 obj-$(CONFIG_RTC_DRV_PPC)	+= rtc-ppc.o
 obj-$(CONFIG_RTC_DRV_R9701)	+= rtc-r9701.o
 obj-$(CONFIG_RTC_DRV_RS5C313)	+= rtc-rs5c313.o
diff --git a/drivers/rtc/rtc-parisc.c b/drivers/rtc/rtc-parisc.c
new file mode 100644
index 000000000000..346d633655e7
--- /dev/null
+++ b/drivers/rtc/rtc-parisc.c
@@ -0,0 +1,111 @@
+/* rtc-parisc: RTC for HP PA-RISC firmware
+ *
+ * Copyright (C) 2008 Kyle McMartin <kyle@mcmartin.ca>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/time.h>
+#include <linux/platform_device.h>
+
+#include <asm/rtc.h>
+
+/* as simple as can be, and no simpler. */
+struct parisc_rtc {
+	struct rtc_device *rtc;
+	spinlock_t lock;
+};
+
+static int parisc_get_time(struct device *dev, struct rtc_time *tm)
+{
+	struct parisc_rtc *p = dev_get_drvdata(dev);
+	unsigned long flags, ret;
+
+	spin_lock_irqsave(&p->lock, flags);
+	ret = get_rtc_time(tm);
+	spin_unlock_irqrestore(&p->lock, flags);
+
+	if (ret & RTC_BATT_BAD)
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+static int parisc_set_time(struct device *dev, struct rtc_time *tm)
+{
+	struct parisc_rtc *p = dev_get_drvdata(dev);
+	unsigned long flags, ret;
+
+	spin_lock_irqsave(&p->lock, flags);
+	ret = set_rtc_time(tm);
+	spin_unlock_irqrestore(&p->lock, flags);
+
+	if (ret < 0)
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+static const struct rtc_class_ops parisc_rtc_ops = {
+	.read_time = parisc_get_time,
+	.set_time = parisc_set_time,
+};
+
+static int __devinit parisc_rtc_probe(struct platform_device *dev)
+{
+	struct parisc_rtc *p;
+
+	p = kzalloc(sizeof (*p), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	spin_lock_init(&p->lock);
+
+	p->rtc = rtc_device_register("rtc-parisc", &dev->dev, &parisc_rtc_ops,
+					THIS_MODULE);
+	if (IS_ERR(p->rtc)) {
+		int err = PTR_ERR(p->rtc);
+		kfree(p);
+		return err;
+	}
+
+	platform_set_drvdata(dev, p);
+
+	return 0;
+}
+
+static int __devexit parisc_rtc_remove(struct platform_device *dev)
+{
+	struct parisc_rtc *p = platform_get_drvdata(dev);
+
+	rtc_device_unregister(p->rtc);
+	kfree(p);
+
+	return 0;
+}
+
+static struct platform_driver parisc_rtc_driver = {
+	.driver = {
+		.name = "rtc-parisc",
+		.owner = THIS_MODULE,
+	},
+	.probe = parisc_rtc_probe,
+	.remove = __devexit_p(parisc_rtc_remove),
+};
+
+static int __init parisc_rtc_init(void)
+{
+	return platform_driver_register(&parisc_rtc_driver);
+}
+
+static void __exit parisc_rtc_fini(void)
+{
+	platform_driver_unregister(&parisc_rtc_driver);
+}
+
+module_init(parisc_rtc_init);
+module_exit(parisc_rtc_fini);
+
+MODULE_AUTHOR("Kyle McMartin <kyle@mcmartin.ca>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("HP PA-RISC RTC driver");
-- 
GitLab


From 69fd3a8d098faf41a04930afa83757c0555ee360 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Sun, 12 Oct 2008 16:18:36 +0200
Subject: [PATCH 218/895] [MTD] remove unused mtd parameter in
 of_mtd_parse_partitions()

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/maps/physmap_of.c    | 3 +--
 drivers/mtd/nand/fsl_elbc_nand.c | 3 +--
 drivers/mtd/ofpart.c             | 1 -
 include/linux/mtd/partitions.h   | 1 -
 4 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c
index 49acd4171893..5fcfec034a94 100644
--- a/drivers/mtd/maps/physmap_of.c
+++ b/drivers/mtd/maps/physmap_of.c
@@ -230,8 +230,7 @@ static int __devinit of_flash_probe(struct of_device *dev,
 
 #ifdef CONFIG_MTD_OF_PARTS
 	if (err == 0) {
-		err = of_mtd_parse_partitions(&dev->dev, info->mtd,
-		                              dp, &info->parts);
+		err = of_mtd_parse_partitions(&dev->dev, dp, &info->parts);
 		if (err < 0)
 			return err;
 	}
diff --git a/drivers/mtd/nand/fsl_elbc_nand.c b/drivers/mtd/nand/fsl_elbc_nand.c
index 98ad3cefcaf4..4aa5bd6158da 100644
--- a/drivers/mtd/nand/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/fsl_elbc_nand.c
@@ -918,8 +918,7 @@ static int __devinit fsl_elbc_chip_probe(struct fsl_elbc_ctrl *ctrl,
 
 #ifdef CONFIG_MTD_OF_PARTS
 	if (ret == 0) {
-		ret = of_mtd_parse_partitions(priv->dev, &priv->mtd,
-		                              node, &parts);
+		ret = of_mtd_parse_partitions(priv->dev, node, &parts);
 		if (ret < 0)
 			goto err;
 	}
diff --git a/drivers/mtd/ofpart.c b/drivers/mtd/ofpart.c
index 4f80c2fd89af..9e45b3f39c0e 100644
--- a/drivers/mtd/ofpart.c
+++ b/drivers/mtd/ofpart.c
@@ -20,7 +20,6 @@
 #include <linux/mtd/partitions.h>
 
 int __devinit of_mtd_parse_partitions(struct device *dev,
-                                      struct mtd_info *mtd,
                                       struct device_node *node,
                                       struct mtd_partition **pparts)
 {
diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h
index 5014f7a9f5df..c92b4d439609 100644
--- a/include/linux/mtd/partitions.h
+++ b/include/linux/mtd/partitions.h
@@ -73,7 +73,6 @@ struct device;
 struct device_node;
 
 int __devinit of_mtd_parse_partitions(struct device *dev,
-                                      struct mtd_info *mtd,
                                       struct device_node *node,
                                       struct mtd_partition **pparts);
 
-- 
GitLab


From 97e1c18e8d17bd87e1e383b2e9d9fc740332c8e2 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Fri, 18 Jul 2008 12:16:16 -0400
Subject: [PATCH 219/895] tracing: Kernel Tracepoints

Implementation of kernel tracepoints. Inspired from the Linux Kernel
Markers. Allows complete typing verification by declaring both tracing
statement inline functions and probe registration/unregistration static
inline functions within the same macro "DEFINE_TRACE". No format string
is required. See the tracepoint Documentation and Samples patches for
usage examples.

Taken from the documentation patch :

"A tracepoint placed in code provides a hook to call a function (probe)
that you can provide at runtime. A tracepoint can be "on" (a probe is
connected to it) or "off" (no probe is attached). When a tracepoint is
"off" it has no effect, except for adding a tiny time penalty (checking
a condition for a branch) and space penalty (adding a few bytes for the
function call at the end of the instrumented function and adds a data
structure in a separate section).  When a tracepoint is "on", the
function you provide is called each time the tracepoint is executed, in
the execution context of the caller. When the function provided ends its
execution, it returns to the caller (continuing from the tracepoint
site).

You can put tracepoints at important locations in the code. They are
lightweight hooks that can pass an arbitrary number of parameters, which
prototypes are described in a tracepoint declaration placed in a header
file."

Addition and removal of tracepoints is synchronized by RCU using the
scheduler (and preempt_disable) as guarantees to find a quiescent state
(this is really RCU "classic"). The update side uses rcu_barrier_sched()
with call_rcu_sched() and the read/execute side uses
"preempt_disable()/preempt_enable()".

We make sure the previous array containing probes, which has been
scheduled for deletion by the rcu callback, is indeed freed before we
proceed to the next update. It therefore limits the rate of modification
of a single tracepoint to one update per RCU period. The objective here
is to permit fast batch add/removal of probes on _different_
tracepoints.

Changelog :
- Use #name ":" #proto as string to identify the tracepoint in the
  tracepoint table. This will make sure not type mismatch happens due to
  connexion of a probe with the wrong type to a tracepoint declared with
  the same name in a different header.
- Add tracepoint_entry_free_old.
- Change __TO_TRACE to get rid of the 'i' iterator.

Masami Hiramatsu <mhiramat@redhat.com> :
Tested on x86-64.

Performance impact of a tracepoint : same as markers, except that it
adds about 70 bytes of instructions in an unlikely branch of each
instrumented function (the for loop, the stack setup and the function
call). It currently adds a memory read, a test and a conditional branch
at the instrumentation site (in the hot path). Immediate values will
eventually change this into a load immediate, test and branch, which
removes the memory read which will make the i-cache impact smaller
(changing the memory read for a load immediate removes 3-4 bytes per
site on x86_32 (depending on mov prefixes), or 7-8 bytes on x86_64, it
also saves the d-cache hit).

About the performance impact of tracepoints (which is comparable to
markers), even without immediate values optimizations, tests done by
Hideo Aoki on ia64 show no regression. His test case was using hackbench
on a kernel where scheduler instrumentation (about 5 events in code
scheduler code) was added.

Quoting Hideo Aoki about Markers :

I evaluated overhead of kernel marker using linux-2.6-sched-fixes git
tree, which includes several markers for LTTng, using an ia64 server.

While the immediate trace mark feature isn't implemented on ia64, there
is no major performance regression. So, I think that we don't have any
issues to propose merging marker point patches into Linus's tree from
the viewpoint of performance impact.

I prepared two kernels to evaluate. The first one was compiled without
CONFIG_MARKERS. The second one was enabled CONFIG_MARKERS.

I downloaded the original hackbench from the following URL:
http://devresources.linux-foundation.org/craiger/hackbench/src/hackbench.c

I ran hackbench 5 times in each condition and calculated the average and
difference between the kernels.

    The parameter of hackbench: every 50 from 50 to 800
    The number of CPUs of the server: 2, 4, and 8

Below is the results. As you can see, major performance regression
wasn't found in any case. Even if number of processes increases,
differences between marker-enabled kernel and marker- disabled kernel
doesn't increase. Moreover, if number of CPUs increases, the differences
doesn't increase either.

Curiously, marker-enabled kernel is better than marker-disabled kernel
in more than half cases, although I guess it comes from the difference
of memory access pattern.

* 2 CPUs

Number of | without      | with         | diff     | diff    |
processes | Marker [Sec] | Marker [Sec] |   [Sec]  |   [%]   |
--------------------------------------------------------------
       50 |      4.811   |       4.872  |  +0.061  |  +1.27  |
      100 |      9.854   |      10.309  |  +0.454  |  +4.61  |
      150 |     15.602   |      15.040  |  -0.562  |  -3.6   |
      200 |     20.489   |      20.380  |  -0.109  |  -0.53  |
      250 |     25.798   |      25.652  |  -0.146  |  -0.56  |
      300 |     31.260   |      30.797  |  -0.463  |  -1.48  |
      350 |     36.121   |      35.770  |  -0.351  |  -0.97  |
      400 |     42.288   |      42.102  |  -0.186  |  -0.44  |
      450 |     47.778   |      47.253  |  -0.526  |  -1.1   |
      500 |     51.953   |      52.278  |  +0.325  |  +0.63  |
      550 |     58.401   |      57.700  |  -0.701  |  -1.2   |
      600 |     63.334   |      63.222  |  -0.112  |  -0.18  |
      650 |     68.816   |      68.511  |  -0.306  |  -0.44  |
      700 |     74.667   |      74.088  |  -0.579  |  -0.78  |
      750 |     78.612   |      79.582  |  +0.970  |  +1.23  |
      800 |     85.431   |      85.263  |  -0.168  |  -0.2   |
--------------------------------------------------------------

* 4 CPUs

Number of | without      | with         | diff     | diff    |
processes | Marker [Sec] | Marker [Sec] |   [Sec]  |   [%]   |
--------------------------------------------------------------
       50 |      2.586   |       2.584  |  -0.003  |  -0.1   |
      100 |      5.254   |       5.283  |  +0.030  |  +0.56  |
      150 |      8.012   |       8.074  |  +0.061  |  +0.76  |
      200 |     11.172   |      11.000  |  -0.172  |  -1.54  |
      250 |     13.917   |      14.036  |  +0.119  |  +0.86  |
      300 |     16.905   |      16.543  |  -0.362  |  -2.14  |
      350 |     19.901   |      20.036  |  +0.135  |  +0.68  |
      400 |     22.908   |      23.094  |  +0.186  |  +0.81  |
      450 |     26.273   |      26.101  |  -0.172  |  -0.66  |
      500 |     29.554   |      29.092  |  -0.461  |  -1.56  |
      550 |     32.377   |      32.274  |  -0.103  |  -0.32  |
      600 |     35.855   |      35.322  |  -0.533  |  -1.49  |
      650 |     39.192   |      38.388  |  -0.804  |  -2.05  |
      700 |     41.744   |      41.719  |  -0.025  |  -0.06  |
      750 |     45.016   |      44.496  |  -0.520  |  -1.16  |
      800 |     48.212   |      47.603  |  -0.609  |  -1.26  |
--------------------------------------------------------------

* 8 CPUs

Number of | without      | with         | diff     | diff    |
processes | Marker [Sec] | Marker [Sec] |   [Sec]  |   [%]   |
--------------------------------------------------------------
       50 |      2.094   |       2.072  |  -0.022  |  -1.07  |
      100 |      4.162   |       4.273  |  +0.111  |  +2.66  |
      150 |      6.485   |       6.540  |  +0.055  |  +0.84  |
      200 |      8.556   |       8.478  |  -0.078  |  -0.91  |
      250 |     10.458   |      10.258  |  -0.200  |  -1.91  |
      300 |     12.425   |      12.750  |  +0.325  |  +2.62  |
      350 |     14.807   |      14.839  |  +0.032  |  +0.22  |
      400 |     16.801   |      16.959  |  +0.158  |  +0.94  |
      450 |     19.478   |      19.009  |  -0.470  |  -2.41  |
      500 |     21.296   |      21.504  |  +0.208  |  +0.98  |
      550 |     23.842   |      23.979  |  +0.137  |  +0.57  |
      600 |     26.309   |      26.111  |  -0.198  |  -0.75  |
      650 |     28.705   |      28.446  |  -0.259  |  -0.9   |
      700 |     31.233   |      31.394  |  +0.161  |  +0.52  |
      750 |     34.064   |      33.720  |  -0.344  |  -1.01  |
      800 |     36.320   |      36.114  |  -0.206  |  -0.57  |
--------------------------------------------------------------

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>
Acked-by: 'Peter Zijlstra' <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-generic/vmlinux.lds.h |   6 +-
 include/linux/module.h            |  17 ++
 include/linux/tracepoint.h        | 127 ++++++++
 init/Kconfig                      |   7 +
 kernel/Makefile                   |   1 +
 kernel/module.c                   |  66 ++++-
 kernel/tracepoint.c               | 476 ++++++++++++++++++++++++++++++
 7 files changed, 698 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/tracepoint.h
 create mode 100644 kernel/tracepoint.c

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 7440a0dceddb..3d8e472a09c8 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -52,7 +52,10 @@
 	. = ALIGN(8);							\
 	VMLINUX_SYMBOL(__start___markers) = .;				\
 	*(__markers)							\
-	VMLINUX_SYMBOL(__stop___markers) = .;
+	VMLINUX_SYMBOL(__stop___markers) = .;				\
+	VMLINUX_SYMBOL(__start___tracepoints) = .;			\
+	*(__tracepoints)						\
+	VMLINUX_SYMBOL(__stop___tracepoints) = .;
 
 #define RO_DATA(align)							\
 	. = ALIGN((align));						\
@@ -61,6 +64,7 @@
 		*(.rodata) *(.rodata.*)					\
 		*(__vermagic)		/* Kernel version magic */	\
 		*(__markers_strings)	/* Markers: strings */		\
+		*(__tracepoints_strings)/* Tracepoints: strings */	\
 	}								\
 									\
 	.rodata1          : AT(ADDR(.rodata1) - LOAD_OFFSET) {		\
diff --git a/include/linux/module.h b/include/linux/module.h
index 68e09557c951..8b6113503863 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -16,6 +16,7 @@
 #include <linux/kobject.h>
 #include <linux/moduleparam.h>
 #include <linux/marker.h>
+#include <linux/tracepoint.h>
 #include <asm/local.h>
 
 #include <asm/module.h>
@@ -331,6 +332,10 @@ struct module
 	struct marker *markers;
 	unsigned int num_markers;
 #endif
+#ifdef CONFIG_TRACEPOINTS
+	struct tracepoint *tracepoints;
+	unsigned int num_tracepoints;
+#endif
 
 #ifdef CONFIG_MODULE_UNLOAD
 	/* What modules depend on me? */
@@ -454,6 +459,9 @@ extern void print_modules(void);
 
 extern void module_update_markers(void);
 
+extern void module_update_tracepoints(void);
+extern int module_get_iter_tracepoints(struct tracepoint_iter *iter);
+
 #else /* !CONFIG_MODULES... */
 #define EXPORT_SYMBOL(sym)
 #define EXPORT_SYMBOL_GPL(sym)
@@ -558,6 +566,15 @@ static inline void module_update_markers(void)
 {
 }
 
+static inline void module_update_tracepoints(void)
+{
+}
+
+static inline int module_get_iter_tracepoints(struct tracepoint_iter *iter)
+{
+	return 0;
+}
+
 #endif /* CONFIG_MODULES */
 
 struct device_driver;
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
new file mode 100644
index 000000000000..e623a6fca5c3
--- /dev/null
+++ b/include/linux/tracepoint.h
@@ -0,0 +1,127 @@
+#ifndef _LINUX_TRACEPOINT_H
+#define _LINUX_TRACEPOINT_H
+
+/*
+ * Kernel Tracepoint API.
+ *
+ * See Documentation/tracepoint.txt.
+ *
+ * (C) Copyright 2008 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * Heavily inspired from the Linux Kernel Markers.
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+
+struct module;
+struct tracepoint;
+
+struct tracepoint {
+	const char *name;		/* Tracepoint name */
+	int state;			/* State. */
+	void **funcs;
+} __attribute__((aligned(8)));
+
+
+#define TPPROTO(args...)	args
+#define TPARGS(args...)		args
+
+#ifdef CONFIG_TRACEPOINTS
+
+/*
+ * it_func[0] is never NULL because there is at least one element in the array
+ * when the array itself is non NULL.
+ */
+#define __DO_TRACE(tp, proto, args)					\
+	do {								\
+		void **it_func;						\
+									\
+		rcu_read_lock_sched();					\
+		it_func = rcu_dereference((tp)->funcs);			\
+		if (it_func) {						\
+			do {						\
+				((void(*)(proto))(*it_func))(args);	\
+			} while (*(++it_func));				\
+		}							\
+		rcu_read_unlock_sched();				\
+	} while (0)
+
+/*
+ * Make sure the alignment of the structure in the __tracepoints section will
+ * not add unwanted padding between the beginning of the section and the
+ * structure. Force alignment to the same alignment as the section start.
+ */
+#define DEFINE_TRACE(name, proto, args)					\
+	static inline void trace_##name(proto)				\
+	{								\
+		static const char __tpstrtab_##name[]			\
+		__attribute__((section("__tracepoints_strings")))	\
+		= #name ":" #proto;					\
+		static struct tracepoint __tracepoint_##name		\
+		__attribute__((section("__tracepoints"), aligned(8))) =	\
+		{ __tpstrtab_##name, 0, NULL };				\
+		if (unlikely(__tracepoint_##name.state))		\
+			__DO_TRACE(&__tracepoint_##name,		\
+				TPPROTO(proto), TPARGS(args));		\
+	}								\
+	static inline int register_trace_##name(void (*probe)(proto))	\
+	{								\
+		return tracepoint_probe_register(#name ":" #proto,	\
+			(void *)probe);					\
+	}								\
+	static inline void unregister_trace_##name(void (*probe)(proto))\
+	{								\
+		tracepoint_probe_unregister(#name ":" #proto,		\
+			(void *)probe);					\
+	}
+
+extern void tracepoint_update_probe_range(struct tracepoint *begin,
+	struct tracepoint *end);
+
+#else /* !CONFIG_TRACEPOINTS */
+#define DEFINE_TRACE(name, proto, args)			\
+	static inline void _do_trace_##name(struct tracepoint *tp, proto) \
+	{ }								\
+	static inline void trace_##name(proto)				\
+	{ }								\
+	static inline int register_trace_##name(void (*probe)(proto))	\
+	{								\
+		return -ENOSYS;						\
+	}								\
+	static inline void unregister_trace_##name(void (*probe)(proto))\
+	{ }
+
+static inline void tracepoint_update_probe_range(struct tracepoint *begin,
+	struct tracepoint *end)
+{ }
+#endif /* CONFIG_TRACEPOINTS */
+
+/*
+ * Connect a probe to a tracepoint.
+ * Internal API, should not be used directly.
+ */
+extern int tracepoint_probe_register(const char *name, void *probe);
+
+/*
+ * Disconnect a probe from a tracepoint.
+ * Internal API, should not be used directly.
+ */
+extern int tracepoint_probe_unregister(const char *name, void *probe);
+
+struct tracepoint_iter {
+	struct module *module;
+	struct tracepoint *tracepoint;
+};
+
+extern void tracepoint_iter_start(struct tracepoint_iter *iter);
+extern void tracepoint_iter_next(struct tracepoint_iter *iter);
+extern void tracepoint_iter_stop(struct tracepoint_iter *iter);
+extern void tracepoint_iter_reset(struct tracepoint_iter *iter);
+extern int tracepoint_get_iter_range(struct tracepoint **tracepoint,
+	struct tracepoint *begin, struct tracepoint *end);
+
+#endif
diff --git a/init/Kconfig b/init/Kconfig
index c11da38837e5..70082678a914 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -771,6 +771,13 @@ config PROFILING
 	  Say Y here to enable the extended profiling support mechanisms used
 	  by profilers such as OProfile.
 
+config TRACEPOINTS
+	bool "Activate tracepoints"
+	default y
+	help
+	  Place an empty function call at each tracepoint site. Can be
+	  dynamically changed for a probe function.
+
 config MARKERS
 	bool "Activate markers"
 	help
diff --git a/kernel/Makefile b/kernel/Makefile
index 4e1d7df7c3e2..8f9ce7ec21b6 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -83,6 +83,7 @@ obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
 obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
 obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
 obj-$(CONFIG_MARKERS) += marker.o
+obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
 obj-$(CONFIG_LATENCYTOP) += latencytop.o
 obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
 obj-$(CONFIG_FTRACE) += trace/
diff --git a/kernel/module.c b/kernel/module.c
index 9db11911e04b..661d73db786e 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -46,6 +46,7 @@
 #include <asm/cacheflush.h>
 #include <linux/license.h>
 #include <asm/sections.h>
+#include <linux/tracepoint.h>
 
 #if 0
 #define DEBUGP printk
@@ -1831,6 +1832,8 @@ static noinline struct module *load_module(void __user *umod,
 #endif
 	unsigned int markersindex;
 	unsigned int markersstringsindex;
+	unsigned int tracepointsindex;
+	unsigned int tracepointsstringsindex;
 	struct module *mod;
 	long err = 0;
 	void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
@@ -2117,6 +2120,9 @@ static noinline struct module *load_module(void __user *umod,
 	markersindex = find_sec(hdr, sechdrs, secstrings, "__markers");
  	markersstringsindex = find_sec(hdr, sechdrs, secstrings,
 					"__markers_strings");
+	tracepointsindex = find_sec(hdr, sechdrs, secstrings, "__tracepoints");
+	tracepointsstringsindex = find_sec(hdr, sechdrs, secstrings,
+					"__tracepoints_strings");
 
 	/* Now do relocations. */
 	for (i = 1; i < hdr->e_shnum; i++) {
@@ -2144,6 +2150,12 @@ static noinline struct module *load_module(void __user *umod,
 	mod->num_markers =
 		sechdrs[markersindex].sh_size / sizeof(*mod->markers);
 #endif
+#ifdef CONFIG_TRACEPOINTS
+	mod->tracepoints = (void *)sechdrs[tracepointsindex].sh_addr;
+	mod->num_tracepoints =
+		sechdrs[tracepointsindex].sh_size / sizeof(*mod->tracepoints);
+#endif
+
 
         /* Find duplicate symbols */
 	err = verify_export_symbols(mod);
@@ -2162,11 +2174,16 @@ static noinline struct module *load_module(void __user *umod,
 
 	add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
 
+	if (!mod->taints) {
 #ifdef CONFIG_MARKERS
-	if (!mod->taints)
 		marker_update_probe_range(mod->markers,
 			mod->markers + mod->num_markers);
 #endif
+#ifdef CONFIG_TRACEPOINTS
+		tracepoint_update_probe_range(mod->tracepoints,
+			mod->tracepoints + mod->num_tracepoints);
+#endif
+	}
 	err = module_finalize(hdr, sechdrs, mod);
 	if (err < 0)
 		goto cleanup;
@@ -2717,3 +2734,50 @@ void module_update_markers(void)
 	mutex_unlock(&module_mutex);
 }
 #endif
+
+#ifdef CONFIG_TRACEPOINTS
+void module_update_tracepoints(void)
+{
+	struct module *mod;
+
+	mutex_lock(&module_mutex);
+	list_for_each_entry(mod, &modules, list)
+		if (!mod->taints)
+			tracepoint_update_probe_range(mod->tracepoints,
+				mod->tracepoints + mod->num_tracepoints);
+	mutex_unlock(&module_mutex);
+}
+
+/*
+ * Returns 0 if current not found.
+ * Returns 1 if current found.
+ */
+int module_get_iter_tracepoints(struct tracepoint_iter *iter)
+{
+	struct module *iter_mod;
+	int found = 0;
+
+	mutex_lock(&module_mutex);
+	list_for_each_entry(iter_mod, &modules, list) {
+		if (!iter_mod->taints) {
+			/*
+			 * Sorted module list
+			 */
+			if (iter_mod < iter->module)
+				continue;
+			else if (iter_mod > iter->module)
+				iter->tracepoint = NULL;
+			found = tracepoint_get_iter_range(&iter->tracepoint,
+				iter_mod->tracepoints,
+				iter_mod->tracepoints
+					+ iter_mod->num_tracepoints);
+			if (found) {
+				iter->module = iter_mod;
+				break;
+			}
+		}
+	}
+	mutex_unlock(&module_mutex);
+	return found;
+}
+#endif
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
new file mode 100644
index 000000000000..42e86ddbd2a0
--- /dev/null
+++ b/kernel/tracepoint.c
@@ -0,0 +1,476 @@
+/*
+ * Copyright (C) 2008 Mathieu Desnoyers
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/jhash.h>
+#include <linux/list.h>
+#include <linux/rcupdate.h>
+#include <linux/tracepoint.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+
+extern struct tracepoint __start___tracepoints[];
+extern struct tracepoint __stop___tracepoints[];
+
+/* Set to 1 to enable tracepoint debug output */
+static const int tracepoint_debug;
+
+/*
+ * tracepoints_mutex nests inside module_mutex. Tracepoints mutex protects the
+ * builtin and module tracepoints and the hash table.
+ */
+static DEFINE_MUTEX(tracepoints_mutex);
+
+/*
+ * Tracepoint hash table, containing the active tracepoints.
+ * Protected by tracepoints_mutex.
+ */
+#define TRACEPOINT_HASH_BITS 6
+#define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS)
+
+/*
+ * Note about RCU :
+ * It is used to to delay the free of multiple probes array until a quiescent
+ * state is reached.
+ * Tracepoint entries modifications are protected by the tracepoints_mutex.
+ */
+struct tracepoint_entry {
+	struct hlist_node hlist;
+	void **funcs;
+	int refcount;	/* Number of times armed. 0 if disarmed. */
+	struct rcu_head rcu;
+	void *oldptr;
+	unsigned char rcu_pending:1;
+	char name[0];
+};
+
+static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
+
+static void free_old_closure(struct rcu_head *head)
+{
+	struct tracepoint_entry *entry = container_of(head,
+		struct tracepoint_entry, rcu);
+	kfree(entry->oldptr);
+	/* Make sure we free the data before setting the pending flag to 0 */
+	smp_wmb();
+	entry->rcu_pending = 0;
+}
+
+static void tracepoint_entry_free_old(struct tracepoint_entry *entry, void *old)
+{
+	if (!old)
+		return;
+	entry->oldptr = old;
+	entry->rcu_pending = 1;
+	/* write rcu_pending before calling the RCU callback */
+	smp_wmb();
+#ifdef CONFIG_PREEMPT_RCU
+	synchronize_sched();	/* Until we have the call_rcu_sched() */
+#endif
+	call_rcu(&entry->rcu, free_old_closure);
+}
+
+static void debug_print_probes(struct tracepoint_entry *entry)
+{
+	int i;
+
+	if (!tracepoint_debug)
+		return;
+
+	for (i = 0; entry->funcs[i]; i++)
+		printk(KERN_DEBUG "Probe %d : %p\n", i, entry->funcs[i]);
+}
+
+static void *
+tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe)
+{
+	int nr_probes = 0;
+	void **old, **new;
+
+	WARN_ON(!probe);
+
+	debug_print_probes(entry);
+	old = entry->funcs;
+	if (old) {
+		/* (N -> N+1), (N != 0, 1) probes */
+		for (nr_probes = 0; old[nr_probes]; nr_probes++)
+			if (old[nr_probes] == probe)
+				return ERR_PTR(-EEXIST);
+	}
+	/* + 2 : one for new probe, one for NULL func */
+	new = kzalloc((nr_probes + 2) * sizeof(void *), GFP_KERNEL);
+	if (new == NULL)
+		return ERR_PTR(-ENOMEM);
+	if (old)
+		memcpy(new, old, nr_probes * sizeof(void *));
+	new[nr_probes] = probe;
+	entry->refcount = nr_probes + 1;
+	entry->funcs = new;
+	debug_print_probes(entry);
+	return old;
+}
+
+static void *
+tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
+{
+	int nr_probes = 0, nr_del = 0, i;
+	void **old, **new;
+
+	old = entry->funcs;
+
+	debug_print_probes(entry);
+	/* (N -> M), (N > 1, M >= 0) probes */
+	for (nr_probes = 0; old[nr_probes]; nr_probes++) {
+		if ((!probe || old[nr_probes] == probe))
+			nr_del++;
+	}
+
+	if (nr_probes - nr_del == 0) {
+		/* N -> 0, (N > 1) */
+		entry->funcs = NULL;
+		entry->refcount = 0;
+		debug_print_probes(entry);
+		return old;
+	} else {
+		int j = 0;
+		/* N -> M, (N > 1, M > 0) */
+		/* + 1 for NULL */
+		new = kzalloc((nr_probes - nr_del + 1)
+			* sizeof(void *), GFP_KERNEL);
+		if (new == NULL)
+			return ERR_PTR(-ENOMEM);
+		for (i = 0; old[i]; i++)
+			if ((probe && old[i] != probe))
+				new[j++] = old[i];
+		entry->refcount = nr_probes - nr_del;
+		entry->funcs = new;
+	}
+	debug_print_probes(entry);
+	return old;
+}
+
+/*
+ * Get tracepoint if the tracepoint is present in the tracepoint hash table.
+ * Must be called with tracepoints_mutex held.
+ * Returns NULL if not present.
+ */
+static struct tracepoint_entry *get_tracepoint(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct tracepoint_entry *e;
+	u32 hash = jhash(name, strlen(name), 0);
+
+	head = &tracepoint_table[hash & ((1 << TRACEPOINT_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name))
+			return e;
+	}
+	return NULL;
+}
+
+/*
+ * Add the tracepoint to the tracepoint hash table. Must be called with
+ * tracepoints_mutex held.
+ */
+static struct tracepoint_entry *add_tracepoint(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct tracepoint_entry *e;
+	size_t name_len = strlen(name) + 1;
+	u32 hash = jhash(name, name_len-1, 0);
+
+	head = &tracepoint_table[hash & ((1 << TRACEPOINT_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name)) {
+			printk(KERN_NOTICE
+				"tracepoint %s busy\n", name);
+			return ERR_PTR(-EEXIST);	/* Already there */
+		}
+	}
+	/*
+	 * Using kmalloc here to allocate a variable length element. Could
+	 * cause some memory fragmentation if overused.
+	 */
+	e = kmalloc(sizeof(struct tracepoint_entry) + name_len, GFP_KERNEL);
+	if (!e)
+		return ERR_PTR(-ENOMEM);
+	memcpy(&e->name[0], name, name_len);
+	e->funcs = NULL;
+	e->refcount = 0;
+	e->rcu_pending = 0;
+	hlist_add_head(&e->hlist, head);
+	return e;
+}
+
+/*
+ * Remove the tracepoint from the tracepoint hash table. Must be called with
+ * mutex_lock held.
+ */
+static int remove_tracepoint(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct tracepoint_entry *e;
+	int found = 0;
+	size_t len = strlen(name) + 1;
+	u32 hash = jhash(name, len-1, 0);
+
+	head = &tracepoint_table[hash & ((1 << TRACEPOINT_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name)) {
+			found = 1;
+			break;
+		}
+	}
+	if (!found)
+		return -ENOENT;
+	if (e->refcount)
+		return -EBUSY;
+	hlist_del(&e->hlist);
+	/* Make sure the call_rcu has been executed */
+	if (e->rcu_pending)
+		rcu_barrier();
+	kfree(e);
+	return 0;
+}
+
+/*
+ * Sets the probe callback corresponding to one tracepoint.
+ */
+static void set_tracepoint(struct tracepoint_entry **entry,
+	struct tracepoint *elem, int active)
+{
+	WARN_ON(strcmp((*entry)->name, elem->name) != 0);
+
+	/*
+	 * rcu_assign_pointer has a smp_wmb() which makes sure that the new
+	 * probe callbacks array is consistent before setting a pointer to it.
+	 * This array is referenced by __DO_TRACE from
+	 * include/linux/tracepoints.h. A matching smp_read_barrier_depends()
+	 * is used.
+	 */
+	rcu_assign_pointer(elem->funcs, (*entry)->funcs);
+	elem->state = active;
+}
+
+/*
+ * Disable a tracepoint and its probe callback.
+ * Note: only waiting an RCU period after setting elem->call to the empty
+ * function insures that the original callback is not used anymore. This insured
+ * by preempt_disable around the call site.
+ */
+static void disable_tracepoint(struct tracepoint *elem)
+{
+	elem->state = 0;
+}
+
+/**
+ * tracepoint_update_probe_range - Update a probe range
+ * @begin: beginning of the range
+ * @end: end of the range
+ *
+ * Updates the probe callback corresponding to a range of tracepoints.
+ */
+void tracepoint_update_probe_range(struct tracepoint *begin,
+	struct tracepoint *end)
+{
+	struct tracepoint *iter;
+	struct tracepoint_entry *mark_entry;
+
+	mutex_lock(&tracepoints_mutex);
+	for (iter = begin; iter < end; iter++) {
+		mark_entry = get_tracepoint(iter->name);
+		if (mark_entry) {
+			set_tracepoint(&mark_entry, iter,
+					!!mark_entry->refcount);
+		} else {
+			disable_tracepoint(iter);
+		}
+	}
+	mutex_unlock(&tracepoints_mutex);
+}
+
+/*
+ * Update probes, removing the faulty probes.
+ */
+static void tracepoint_update_probes(void)
+{
+	/* Core kernel tracepoints */
+	tracepoint_update_probe_range(__start___tracepoints,
+		__stop___tracepoints);
+	/* tracepoints in modules. */
+	module_update_tracepoints();
+}
+
+/**
+ * tracepoint_probe_register -  Connect a probe to a tracepoint
+ * @name: tracepoint name
+ * @probe: probe handler
+ *
+ * Returns 0 if ok, error value on error.
+ * The probe address must at least be aligned on the architecture pointer size.
+ */
+int tracepoint_probe_register(const char *name, void *probe)
+{
+	struct tracepoint_entry *entry;
+	int ret = 0;
+	void *old;
+
+	mutex_lock(&tracepoints_mutex);
+	entry = get_tracepoint(name);
+	if (!entry) {
+		entry = add_tracepoint(name);
+		if (IS_ERR(entry)) {
+			ret = PTR_ERR(entry);
+			goto end;
+		}
+	}
+	/*
+	 * If we detect that a call_rcu is pending for this tracepoint,
+	 * make sure it's executed now.
+	 */
+	if (entry->rcu_pending)
+		rcu_barrier();
+	old = tracepoint_entry_add_probe(entry, probe);
+	if (IS_ERR(old)) {
+		ret = PTR_ERR(old);
+		goto end;
+	}
+	mutex_unlock(&tracepoints_mutex);
+	tracepoint_update_probes();		/* may update entry */
+	mutex_lock(&tracepoints_mutex);
+	entry = get_tracepoint(name);
+	WARN_ON(!entry);
+	tracepoint_entry_free_old(entry, old);
+end:
+	mutex_unlock(&tracepoints_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(tracepoint_probe_register);
+
+/**
+ * tracepoint_probe_unregister -  Disconnect a probe from a tracepoint
+ * @name: tracepoint name
+ * @probe: probe function pointer
+ *
+ * We do not need to call a synchronize_sched to make sure the probes have
+ * finished running before doing a module unload, because the module unload
+ * itself uses stop_machine(), which insures that every preempt disabled section
+ * have finished.
+ */
+int tracepoint_probe_unregister(const char *name, void *probe)
+{
+	struct tracepoint_entry *entry;
+	void *old;
+	int ret = -ENOENT;
+
+	mutex_lock(&tracepoints_mutex);
+	entry = get_tracepoint(name);
+	if (!entry)
+		goto end;
+	if (entry->rcu_pending)
+		rcu_barrier();
+	old = tracepoint_entry_remove_probe(entry, probe);
+	mutex_unlock(&tracepoints_mutex);
+	tracepoint_update_probes();		/* may update entry */
+	mutex_lock(&tracepoints_mutex);
+	entry = get_tracepoint(name);
+	if (!entry)
+		goto end;
+	tracepoint_entry_free_old(entry, old);
+	remove_tracepoint(name);	/* Ignore busy error message */
+	ret = 0;
+end:
+	mutex_unlock(&tracepoints_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
+
+/**
+ * tracepoint_get_iter_range - Get a next tracepoint iterator given a range.
+ * @tracepoint: current tracepoints (in), next tracepoint (out)
+ * @begin: beginning of the range
+ * @end: end of the range
+ *
+ * Returns whether a next tracepoint has been found (1) or not (0).
+ * Will return the first tracepoint in the range if the input tracepoint is
+ * NULL.
+ */
+int tracepoint_get_iter_range(struct tracepoint **tracepoint,
+	struct tracepoint *begin, struct tracepoint *end)
+{
+	if (!*tracepoint && begin != end) {
+		*tracepoint = begin;
+		return 1;
+	}
+	if (*tracepoint >= begin && *tracepoint < end)
+		return 1;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(tracepoint_get_iter_range);
+
+static void tracepoint_get_iter(struct tracepoint_iter *iter)
+{
+	int found = 0;
+
+	/* Core kernel tracepoints */
+	if (!iter->module) {
+		found = tracepoint_get_iter_range(&iter->tracepoint,
+				__start___tracepoints, __stop___tracepoints);
+		if (found)
+			goto end;
+	}
+	/* tracepoints in modules. */
+	found = module_get_iter_tracepoints(iter);
+end:
+	if (!found)
+		tracepoint_iter_reset(iter);
+}
+
+void tracepoint_iter_start(struct tracepoint_iter *iter)
+{
+	tracepoint_get_iter(iter);
+}
+EXPORT_SYMBOL_GPL(tracepoint_iter_start);
+
+void tracepoint_iter_next(struct tracepoint_iter *iter)
+{
+	iter->tracepoint++;
+	/*
+	 * iter->tracepoint may be invalid because we blindly incremented it.
+	 * Make sure it is valid by marshalling on the tracepoints, getting the
+	 * tracepoints from following modules if necessary.
+	 */
+	tracepoint_get_iter(iter);
+}
+EXPORT_SYMBOL_GPL(tracepoint_iter_next);
+
+void tracepoint_iter_stop(struct tracepoint_iter *iter)
+{
+}
+EXPORT_SYMBOL_GPL(tracepoint_iter_stop);
+
+void tracepoint_iter_reset(struct tracepoint_iter *iter)
+{
+	iter->module = NULL;
+	iter->tracepoint = NULL;
+}
+EXPORT_SYMBOL_GPL(tracepoint_iter_reset);
-- 
GitLab


From 24b8d831d56aac7907752d22d2aba5d8127db6f6 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Fri, 18 Jul 2008 12:16:16 -0400
Subject: [PATCH 220/895] tracing: tracepoints, documentation

Documentation of tracepoint usage.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: 'Peter Zijlstra' <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/tracepoints.txt | 101 ++++++++++++++++++++++++++++++++++
 1 file changed, 101 insertions(+)
 create mode 100644 Documentation/tracepoints.txt

diff --git a/Documentation/tracepoints.txt b/Documentation/tracepoints.txt
new file mode 100644
index 000000000000..5d354e167494
--- /dev/null
+++ b/Documentation/tracepoints.txt
@@ -0,0 +1,101 @@
+	             Using the Linux Kernel Tracepoints
+
+			    Mathieu Desnoyers
+
+
+This document introduces Linux Kernel Tracepoints and their use. It provides
+examples of how to insert tracepoints in the kernel and connect probe functions
+to them and provides some examples of probe functions.
+
+
+* Purpose of tracepoints
+
+A tracepoint placed in code provides a hook to call a function (probe) that you
+can provide at runtime. A tracepoint can be "on" (a probe is connected to it) or
+"off" (no probe is attached). When a tracepoint is "off" it has no effect,
+except for adding a tiny time penalty (checking a condition for a branch) and
+space penalty (adding a few bytes for the function call at the end of the
+instrumented function and adds a data structure in a separate section).  When a
+tracepoint is "on", the function you provide is called each time the tracepoint
+is executed, in the execution context of the caller. When the function provided
+ends its execution, it returns to the caller (continuing from the tracepoint
+site).
+
+You can put tracepoints at important locations in the code. They are
+lightweight hooks that can pass an arbitrary number of parameters,
+which prototypes are described in a tracepoint declaration placed in a header
+file.
+
+They can be used for tracing and performance accounting.
+
+
+* Usage
+
+Two elements are required for tracepoints :
+
+- A tracepoint definition, placed in a header file.
+- The tracepoint statement, in C code.
+
+In order to use tracepoints, you should include linux/tracepoint.h.
+
+In include/trace/subsys.h :
+
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(subsys_eventname,
+	TPPTOTO(int firstarg, struct task_struct *p),
+	TPARGS(firstarg, p));
+
+In subsys/file.c (where the tracing statement must be added) :
+
+#include <trace/subsys.h>
+
+void somefct(void)
+{
+	...
+	trace_subsys_eventname(arg, task);
+	...
+}
+
+Where :
+- subsys_eventname is an identifier unique to your event
+    - subsys is the name of your subsystem.
+    - eventname is the name of the event to trace.
+- TPPTOTO(int firstarg, struct task_struct *p) is the prototype of the function
+  called by this tracepoint.
+- TPARGS(firstarg, p) are the parameters names, same as found in the prototype.
+
+Connecting a function (probe) to a tracepoint is done by providing a probe
+(function to call) for the specific tracepoint through
+register_trace_subsys_eventname().  Removing a probe is done through
+unregister_trace_subsys_eventname(); it will remove the probe sure there is no
+caller left using the probe when it returns. Probe removal is preempt-safe
+because preemption is disabled around the probe call. See the "Probe example"
+section below for a sample probe module.
+
+The tracepoint mechanism supports inserting multiple instances of the same
+tracepoint, but a single definition must be made of a given tracepoint name over
+all the kernel to make sure no type conflict will occur. Name mangling of the
+tracepoints is done using the prototypes to make sure typing is correct.
+Verification of probe type correctness is done at the registration site by the
+compiler. Tracepoints can be put in inline functions, inlined static functions,
+and unrolled loops as well as regular functions.
+
+The naming scheme "subsys_event" is suggested here as a convention intended
+to limit collisions. Tracepoint names are global to the kernel: they are
+considered as being the same whether they are in the core kernel image or in
+modules.
+
+
+* Probe / tracepoint example
+
+See the example provided in samples/tracepoints/src
+
+Compile them with your kernel.
+
+Run, as root :
+modprobe tracepoint-example (insmod order is not important)
+modprobe tracepoint-probe-example
+cat /proc/tracepoint-example (returns an expected error)
+rmmod tracepoint-example tracepoint-probe-example
+dmesg
-- 
GitLab


From 4a0897526bbc5c6ac0df80b16b8c60339e717ae2 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Fri, 18 Jul 2008 12:16:16 -0400
Subject: [PATCH 221/895] tracing: tracepoints, samples

Tracepoint example code under samples/.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: 'Peter Zijlstra' <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 samples/Kconfig                               |  6 ++
 samples/Makefile                              |  2 +-
 samples/tracepoints/Makefile                  |  6 ++
 samples/tracepoints/tp-samples-trace.h        | 13 +++++
 samples/tracepoints/tracepoint-probe-sample.c | 55 +++++++++++++++++++
 .../tracepoints/tracepoint-probe-sample2.c    | 42 ++++++++++++++
 samples/tracepoints/tracepoint-sample.c       | 53 ++++++++++++++++++
 7 files changed, 176 insertions(+), 1 deletion(-)
 create mode 100644 samples/tracepoints/Makefile
 create mode 100644 samples/tracepoints/tp-samples-trace.h
 create mode 100644 samples/tracepoints/tracepoint-probe-sample.c
 create mode 100644 samples/tracepoints/tracepoint-probe-sample2.c
 create mode 100644 samples/tracepoints/tracepoint-sample.c

diff --git a/samples/Kconfig b/samples/Kconfig
index e1fb471cc501..4b02f5a0e656 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -13,6 +13,12 @@ config SAMPLE_MARKERS
 	help
 	  This build markers example modules.
 
+config SAMPLE_TRACEPOINTS
+	tristate "Build tracepoints examples -- loadable modules only"
+	depends on TRACEPOINTS && m
+	help
+	  This build tracepoints example modules.
+
 config SAMPLE_KOBJECT
 	tristate "Build kobject examples"
 	help
diff --git a/samples/Makefile b/samples/Makefile
index 2e02575f7794..10eaca89fe17 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -1,3 +1,3 @@
 # Makefile for Linux samples code
 
-obj-$(CONFIG_SAMPLES)	+= markers/ kobject/ kprobes/
+obj-$(CONFIG_SAMPLES)	+= markers/ kobject/ kprobes/ tracepoints/
diff --git a/samples/tracepoints/Makefile b/samples/tracepoints/Makefile
new file mode 100644
index 000000000000..36479ad9ae14
--- /dev/null
+++ b/samples/tracepoints/Makefile
@@ -0,0 +1,6 @@
+# builds the tracepoint example kernel modules;
+# then to use one (as root):  insmod <module_name.ko>
+
+obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-sample.o
+obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-probe-sample.o
+obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-probe-sample2.o
diff --git a/samples/tracepoints/tp-samples-trace.h b/samples/tracepoints/tp-samples-trace.h
new file mode 100644
index 000000000000..0216b55bd640
--- /dev/null
+++ b/samples/tracepoints/tp-samples-trace.h
@@ -0,0 +1,13 @@
+#ifndef _TP_SAMPLES_TRACE_H
+#define _TP_SAMPLES_TRACE_H
+
+#include <linux/proc_fs.h>	/* for struct inode and struct file */
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(subsys_event,
+	TPPROTO(struct inode *inode, struct file *file),
+	TPARGS(inode, file));
+DEFINE_TRACE(subsys_eventb,
+	TPPROTO(void),
+	TPARGS());
+#endif
diff --git a/samples/tracepoints/tracepoint-probe-sample.c b/samples/tracepoints/tracepoint-probe-sample.c
new file mode 100644
index 000000000000..55abfdda4bd4
--- /dev/null
+++ b/samples/tracepoints/tracepoint-probe-sample.c
@@ -0,0 +1,55 @@
+/*
+ * tracepoint-probe-sample.c
+ *
+ * sample tracepoint probes.
+ */
+
+#include <linux/module.h>
+#include <linux/file.h>
+#include <linux/dcache.h>
+#include "tp-samples-trace.h"
+
+/*
+ * Here the caller only guarantees locking for struct file and struct inode.
+ * Locking must therefore be done in the probe to use the dentry.
+ */
+static void probe_subsys_event(struct inode *inode, struct file *file)
+{
+	path_get(&file->f_path);
+	dget(file->f_path.dentry);
+	printk(KERN_INFO "Event is encountered with filename %s\n",
+		file->f_path.dentry->d_name.name);
+	dput(file->f_path.dentry);
+	path_put(&file->f_path);
+}
+
+static void probe_subsys_eventb(void)
+{
+	printk(KERN_INFO "Event B is encountered\n");
+}
+
+int __init tp_sample_trace_init(void)
+{
+	int ret;
+
+	ret = register_trace_subsys_event(probe_subsys_event);
+	WARN_ON(ret);
+	ret = register_trace_subsys_eventb(probe_subsys_eventb);
+	WARN_ON(ret);
+
+	return 0;
+}
+
+module_init(tp_sample_trace_init);
+
+void __exit tp_sample_trace_exit(void)
+{
+	unregister_trace_subsys_eventb(probe_subsys_eventb);
+	unregister_trace_subsys_event(probe_subsys_event);
+}
+
+module_exit(tp_sample_trace_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("Tracepoint Probes Samples");
diff --git a/samples/tracepoints/tracepoint-probe-sample2.c b/samples/tracepoints/tracepoint-probe-sample2.c
new file mode 100644
index 000000000000..5e9fcf4afffe
--- /dev/null
+++ b/samples/tracepoints/tracepoint-probe-sample2.c
@@ -0,0 +1,42 @@
+/*
+ * tracepoint-probe-sample2.c
+ *
+ * 2nd sample tracepoint probes.
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include "tp-samples-trace.h"
+
+/*
+ * Here the caller only guarantees locking for struct file and struct inode.
+ * Locking must therefore be done in the probe to use the dentry.
+ */
+static void probe_subsys_event(struct inode *inode, struct file *file)
+{
+	printk(KERN_INFO "Event is encountered with inode number %lu\n",
+		inode->i_ino);
+}
+
+int __init tp_sample_trace_init(void)
+{
+	int ret;
+
+	ret = register_trace_subsys_event(probe_subsys_event);
+	WARN_ON(ret);
+
+	return 0;
+}
+
+module_init(tp_sample_trace_init);
+
+void __exit tp_sample_trace_exit(void)
+{
+	unregister_trace_subsys_event(probe_subsys_event);
+}
+
+module_exit(tp_sample_trace_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("Tracepoint Probes Samples");
diff --git a/samples/tracepoints/tracepoint-sample.c b/samples/tracepoints/tracepoint-sample.c
new file mode 100644
index 000000000000..4ae4b7fcc043
--- /dev/null
+++ b/samples/tracepoints/tracepoint-sample.c
@@ -0,0 +1,53 @@
+/* tracepoint-sample.c
+ *
+ * Executes a tracepoint when /proc/tracepoint-example is opened.
+ *
+ * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/proc_fs.h>
+#include "tp-samples-trace.h"
+
+struct proc_dir_entry *pentry_example;
+
+static int my_open(struct inode *inode, struct file *file)
+{
+	int i;
+
+	trace_subsys_event(inode, file);
+	for (i = 0; i < 10; i++)
+		trace_subsys_eventb();
+	return -EPERM;
+}
+
+static struct file_operations mark_ops = {
+	.open = my_open,
+};
+
+static int example_init(void)
+{
+	printk(KERN_ALERT "example init\n");
+	pentry_example = proc_create("tracepoint-example", 0444, NULL,
+		&mark_ops);
+	if (!pentry_example)
+		return -EPERM;
+	return 0;
+}
+
+static void example_exit(void)
+{
+	printk(KERN_ALERT "example exit\n");
+	remove_proc_entry("tracepoint-example", NULL);
+}
+
+module_init(example_init)
+module_exit(example_exit)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("Tracepoint example");
-- 
GitLab


From 0a16b6075843325dc402edf80c1662838b929aff Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Fri, 18 Jul 2008 12:16:17 -0400
Subject: [PATCH 222/895] tracing, sched: LTTng instrumentation - scheduler

Instrument the scheduler activity (sched_switch, migration, wakeups,
wait for a task, signal delivery) and process/thread
creation/destruction (fork, exit, kthread stop). Actually, kthread
creation is not instrumented in this patch because it is architecture
dependent. It allows to connect tracers such as ftrace which detects
scheduling latencies, good/bad scheduler decisions. Tools like LTTng can
export this scheduler information along with instrumentation of the rest
of the kernel activity to perform post-mortem analysis on the scheduler
activity.

About the performance impact of tracepoints (which is comparable to
markers), even without immediate values optimizations, tests done by
Hideo Aoki on ia64 show no regression. His test case was using hackbench
on a kernel where scheduler instrumentation (about 5 events in code
scheduler code) was added. See the "Tracepoints" patch header for
performance result detail.

Changelog :

- Change instrumentation location and parameter to match ftrace
  instrumentation, previously done with kernel markers.

[ mingo@elte.hu: conflict resolutions ]
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: 'Peter Zijlstra' <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/sched.h | 45 +++++++++++++++++++++++++++++++++++++++++++
 kernel/exit.c         | 10 +++++++++-
 kernel/fork.c         |  3 +++
 kernel/kthread.c      |  5 +++++
 kernel/sched.c        | 17 ++++++----------
 kernel/signal.c       |  3 +++
 6 files changed, 71 insertions(+), 12 deletions(-)
 create mode 100644 include/trace/sched.h

diff --git a/include/trace/sched.h b/include/trace/sched.h
new file mode 100644
index 000000000000..506ae1323656
--- /dev/null
+++ b/include/trace/sched.h
@@ -0,0 +1,45 @@
+#ifndef _TRACE_SCHED_H
+#define _TRACE_SCHED_H
+
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(sched_kthread_stop,
+	TPPROTO(struct task_struct *t),
+	TPARGS(t));
+DEFINE_TRACE(sched_kthread_stop_ret,
+	TPPROTO(int ret),
+	TPARGS(ret));
+DEFINE_TRACE(sched_wait_task,
+	TPPROTO(struct rq *rq, struct task_struct *p),
+	TPARGS(rq, p));
+DEFINE_TRACE(sched_wakeup,
+	TPPROTO(struct rq *rq, struct task_struct *p),
+	TPARGS(rq, p));
+DEFINE_TRACE(sched_wakeup_new,
+	TPPROTO(struct rq *rq, struct task_struct *p),
+	TPARGS(rq, p));
+DEFINE_TRACE(sched_switch,
+	TPPROTO(struct rq *rq, struct task_struct *prev,
+		struct task_struct *next),
+	TPARGS(rq, prev, next));
+DEFINE_TRACE(sched_migrate_task,
+	TPPROTO(struct rq *rq, struct task_struct *p, int dest_cpu),
+	TPARGS(rq, p, dest_cpu));
+DEFINE_TRACE(sched_process_free,
+	TPPROTO(struct task_struct *p),
+	TPARGS(p));
+DEFINE_TRACE(sched_process_exit,
+	TPPROTO(struct task_struct *p),
+	TPARGS(p));
+DEFINE_TRACE(sched_process_wait,
+	TPPROTO(struct pid *pid),
+	TPARGS(pid));
+DEFINE_TRACE(sched_process_fork,
+	TPPROTO(struct task_struct *parent, struct task_struct *child),
+	TPARGS(parent, child));
+DEFINE_TRACE(sched_signal_send,
+	TPPROTO(int sig, struct task_struct *p),
+	TPARGS(sig, p));
+
+#endif
diff --git a/kernel/exit.c b/kernel/exit.c
index 85a83c831856..7b71f87f1207 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -47,6 +47,7 @@
 #include <linux/blkdev.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/tracehook.h>
+#include <trace/sched.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -149,7 +150,10 @@ static void __exit_signal(struct task_struct *tsk)
 
 static void delayed_put_task_struct(struct rcu_head *rhp)
 {
-	put_task_struct(container_of(rhp, struct task_struct, rcu));
+	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
+
+	trace_sched_process_free(tsk);
+	put_task_struct(tsk);
 }
 
 
@@ -1074,6 +1078,8 @@ NORET_TYPE void do_exit(long code)
 
 	if (group_dead)
 		acct_process();
+	trace_sched_process_exit(tsk);
+
 	exit_sem(tsk);
 	exit_files(tsk);
 	exit_fs(tsk);
@@ -1675,6 +1681,8 @@ static long do_wait(enum pid_type type, struct pid *pid, int options,
 	struct task_struct *tsk;
 	int retval;
 
+	trace_sched_process_wait(pid);
+
 	add_wait_queue(&current->signal->wait_chldexit,&wait);
 repeat:
 	/*
diff --git a/kernel/fork.c b/kernel/fork.c
index 30de644a40c4..cfaff92f61ff 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -58,6 +58,7 @@
 #include <linux/tty.h>
 #include <linux/proc_fs.h>
 #include <linux/blkdev.h>
+#include <trace/sched.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -1364,6 +1365,8 @@ long do_fork(unsigned long clone_flags,
 	if (!IS_ERR(p)) {
 		struct completion vfork;
 
+		trace_sched_process_fork(current, p);
+
 		nr = task_pid_vnr(p);
 
 		if (clone_flags & CLONE_PARENT_SETTID)
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 96cff2f8710b..50598e29439a 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -13,6 +13,7 @@
 #include <linux/file.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
+#include <trace/sched.h>
 
 #define KTHREAD_NICE_LEVEL (-5)
 
@@ -206,6 +207,8 @@ int kthread_stop(struct task_struct *k)
 	/* It could exit after stop_info.k set, but before wake_up_process. */
 	get_task_struct(k);
 
+	trace_sched_kthread_stop(k);
+
 	/* Must init completion *before* thread sees kthread_stop_info.k */
 	init_completion(&kthread_stop_info.done);
 	smp_wmb();
@@ -221,6 +224,8 @@ int kthread_stop(struct task_struct *k)
 	ret = kthread_stop_info.err;
 	mutex_unlock(&kthread_stop_lock);
 
+	trace_sched_kthread_stop_ret(ret);
+
 	return ret;
 }
 EXPORT_SYMBOL(kthread_stop);
diff --git a/kernel/sched.c b/kernel/sched.c
index 6f230596bd0c..3d1ad130c24e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -71,6 +71,7 @@
 #include <linux/debugfs.h>
 #include <linux/ctype.h>
 #include <linux/ftrace.h>
+#include <trace/sched.h>
 
 #include <asm/tlb.h>
 #include <asm/irq_regs.h>
@@ -1936,6 +1937,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
 		 * just go back and repeat.
 		 */
 		rq = task_rq_lock(p, &flags);
+		trace_sched_wait_task(rq, p);
 		running = task_running(rq, p);
 		on_rq = p->se.on_rq;
 		ncsw = 0;
@@ -2297,9 +2299,7 @@ out_activate:
 	success = 1;
 
 out_running:
-	trace_mark(kernel_sched_wakeup,
-		"pid %d state %ld ## rq %p task %p rq->curr %p",
-		p->pid, p->state, rq, p, rq->curr);
+	trace_sched_wakeup(rq, p);
 	check_preempt_curr(rq, p, sync);
 
 	p->state = TASK_RUNNING;
@@ -2432,9 +2432,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 		p->sched_class->task_new(rq, p);
 		inc_nr_running(rq);
 	}
-	trace_mark(kernel_sched_wakeup_new,
-		"pid %d state %ld ## rq %p task %p rq->curr %p",
-		p->pid, p->state, rq, p, rq->curr);
+	trace_sched_wakeup_new(rq, p);
 	check_preempt_curr(rq, p, 0);
 #ifdef CONFIG_SMP
 	if (p->sched_class->task_wake_up)
@@ -2607,11 +2605,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
 	struct mm_struct *mm, *oldmm;
 
 	prepare_task_switch(rq, prev, next);
-	trace_mark(kernel_sched_schedule,
-		"prev_pid %d next_pid %d prev_state %ld "
-		"## rq %p prev %p next %p",
-		prev->pid, next->pid, prev->state,
-		rq, prev, next);
+	trace_sched_switch(rq, prev, next);
 	mm = next->mm;
 	oldmm = prev->active_mm;
 	/*
@@ -2851,6 +2845,7 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)
 	    || unlikely(!cpu_active(dest_cpu)))
 		goto out;
 
+	trace_sched_migrate_task(rq, p, dest_cpu);
 	/* force the process onto the specified CPU */
 	if (migrate_task(p, dest_cpu, &req)) {
 		/* Need to wait for migration thread (might exit: take ref). */
diff --git a/kernel/signal.c b/kernel/signal.c
index e661b01d340f..bf40ecc87b26 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -27,6 +27,7 @@
 #include <linux/freezer.h>
 #include <linux/pid_namespace.h>
 #include <linux/nsproxy.h>
+#include <trace/sched.h>
 
 #include <asm/param.h>
 #include <asm/uaccess.h>
@@ -803,6 +804,8 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
 	struct sigpending *pending;
 	struct sigqueue *q;
 
+	trace_sched_signal_send(sig, t);
+
 	assert_spin_locked(&t->sighand->siglock);
 	if (!prepare_signal(sig, t))
 		return 0;
-- 
GitLab


From b07c3f193a8074aa4afe43cfa8ae38ec4c7ccfa9 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Fri, 18 Jul 2008 12:16:17 -0400
Subject: [PATCH 223/895] ftrace: port to tracepoints

Porting the trace_mark() used by ftrace to tracepoints. (cleanup)

Changelog :
- Change error messages : marker -> tracepoint

[ mingo@elte.hu: conflict resolutions ]
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: 'Peter Zijlstra' <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace_sched_switch.c | 120 ++++++--------------------
 kernel/trace/trace_sched_wakeup.c | 135 ++++++++----------------------
 2 files changed, 58 insertions(+), 197 deletions(-)

diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index cb817a209aa0..789e927abc9c 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -9,8 +9,8 @@
 #include <linux/debugfs.h>
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
-#include <linux/marker.h>
 #include <linux/ftrace.h>
+#include <trace/sched.h>
 
 #include "trace.h"
 
@@ -19,16 +19,17 @@ static int __read_mostly	tracer_enabled;
 static atomic_t			sched_ref;
 
 static void
-sched_switch_func(void *private, void *__rq, struct task_struct *prev,
+probe_sched_switch(struct rq *__rq, struct task_struct *prev,
 			struct task_struct *next)
 {
-	struct trace_array **ptr = private;
-	struct trace_array *tr = *ptr;
 	struct trace_array_cpu *data;
 	unsigned long flags;
 	long disabled;
 	int cpu;
 
+	if (!atomic_read(&sched_ref))
+		return;
+
 	tracing_record_cmdline(prev);
 	tracing_record_cmdline(next);
 
@@ -37,95 +38,42 @@ sched_switch_func(void *private, void *__rq, struct task_struct *prev,
 
 	local_irq_save(flags);
 	cpu = raw_smp_processor_id();
-	data = tr->data[cpu];
+	data = ctx_trace->data[cpu];
 	disabled = atomic_inc_return(&data->disabled);
 
 	if (likely(disabled == 1))
-		tracing_sched_switch_trace(tr, data, prev, next, flags);
+		tracing_sched_switch_trace(ctx_trace, data, prev, next, flags);
 
 	atomic_dec(&data->disabled);
 	local_irq_restore(flags);
 }
 
-static notrace void
-sched_switch_callback(void *probe_data, void *call_data,
-		      const char *format, va_list *args)
-{
-	struct task_struct *prev;
-	struct task_struct *next;
-	struct rq *__rq;
-
-	if (!atomic_read(&sched_ref))
-		return;
-
-	/* skip prev_pid %d next_pid %d prev_state %ld */
-	(void)va_arg(*args, int);
-	(void)va_arg(*args, int);
-	(void)va_arg(*args, long);
-	__rq = va_arg(*args, typeof(__rq));
-	prev = va_arg(*args, typeof(prev));
-	next = va_arg(*args, typeof(next));
-
-	/*
-	 * If tracer_switch_func only points to the local
-	 * switch func, it still needs the ptr passed to it.
-	 */
-	sched_switch_func(probe_data, __rq, prev, next);
-}
-
 static void
-wakeup_func(void *private, void *__rq, struct task_struct *wakee, struct
-			task_struct *curr)
+probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee)
 {
-	struct trace_array **ptr = private;
-	struct trace_array *tr = *ptr;
 	struct trace_array_cpu *data;
 	unsigned long flags;
 	long disabled;
 	int cpu;
 
-	if (!tracer_enabled)
+	if (!likely(tracer_enabled))
 		return;
 
-	tracing_record_cmdline(curr);
+	tracing_record_cmdline(current);
 
 	local_irq_save(flags);
 	cpu = raw_smp_processor_id();
-	data = tr->data[cpu];
+	data = ctx_trace->data[cpu];
 	disabled = atomic_inc_return(&data->disabled);
 
 	if (likely(disabled == 1))
-		tracing_sched_wakeup_trace(tr, data, wakee, curr, flags);
+		tracing_sched_wakeup_trace(ctx_trace, data, wakee, current,
+			flags);
 
 	atomic_dec(&data->disabled);
 	local_irq_restore(flags);
 }
 
-static notrace void
-wake_up_callback(void *probe_data, void *call_data,
-		 const char *format, va_list *args)
-{
-	struct task_struct *curr;
-	struct task_struct *task;
-	struct rq *__rq;
-
-	if (likely(!tracer_enabled))
-		return;
-
-	/* Skip pid %d state %ld */
-	(void)va_arg(*args, int);
-	(void)va_arg(*args, long);
-	/* now get the meat: "rq %p task %p rq->curr %p" */
-	__rq = va_arg(*args, typeof(__rq));
-	task = va_arg(*args, typeof(task));
-	curr = va_arg(*args, typeof(curr));
-
-	tracing_record_cmdline(task);
-	tracing_record_cmdline(curr);
-
-	wakeup_func(probe_data, __rq, task, curr);
-}
-
 static void sched_switch_reset(struct trace_array *tr)
 {
 	int cpu;
@@ -140,60 +88,40 @@ static int tracing_sched_register(void)
 {
 	int ret;
 
-	ret = marker_probe_register("kernel_sched_wakeup",
-			"pid %d state %ld ## rq %p task %p rq->curr %p",
-			wake_up_callback,
-			&ctx_trace);
+	ret = register_trace_sched_wakeup(probe_sched_wakeup);
 	if (ret) {
-		pr_info("wakeup trace: Couldn't add marker"
+		pr_info("wakeup trace: Couldn't activate tracepoint"
 			" probe to kernel_sched_wakeup\n");
 		return ret;
 	}
 
-	ret = marker_probe_register("kernel_sched_wakeup_new",
-			"pid %d state %ld ## rq %p task %p rq->curr %p",
-			wake_up_callback,
-			&ctx_trace);
+	ret = register_trace_sched_wakeup_new(probe_sched_wakeup);
 	if (ret) {
-		pr_info("wakeup trace: Couldn't add marker"
+		pr_info("wakeup trace: Couldn't activate tracepoint"
 			" probe to kernel_sched_wakeup_new\n");
 		goto fail_deprobe;
 	}
 
-	ret = marker_probe_register("kernel_sched_schedule",
-		"prev_pid %d next_pid %d prev_state %ld "
-		"## rq %p prev %p next %p",
-		sched_switch_callback,
-		&ctx_trace);
+	ret = register_trace_sched_switch(probe_sched_switch);
 	if (ret) {
-		pr_info("sched trace: Couldn't add marker"
+		pr_info("sched trace: Couldn't activate tracepoint"
 			" probe to kernel_sched_schedule\n");
 		goto fail_deprobe_wake_new;
 	}
 
 	return ret;
 fail_deprobe_wake_new:
-	marker_probe_unregister("kernel_sched_wakeup_new",
-				wake_up_callback,
-				&ctx_trace);
+	unregister_trace_sched_wakeup_new(probe_sched_wakeup);
 fail_deprobe:
-	marker_probe_unregister("kernel_sched_wakeup",
-				wake_up_callback,
-				&ctx_trace);
+	unregister_trace_sched_wakeup(probe_sched_wakeup);
 	return ret;
 }
 
 static void tracing_sched_unregister(void)
 {
-	marker_probe_unregister("kernel_sched_schedule",
-				sched_switch_callback,
-				&ctx_trace);
-	marker_probe_unregister("kernel_sched_wakeup_new",
-				wake_up_callback,
-				&ctx_trace);
-	marker_probe_unregister("kernel_sched_wakeup",
-				wake_up_callback,
-				&ctx_trace);
+	unregister_trace_sched_switch(probe_sched_switch);
+	unregister_trace_sched_wakeup_new(probe_sched_wakeup);
+	unregister_trace_sched_wakeup(probe_sched_wakeup);
 }
 
 static void tracing_start_sched_switch(void)
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index e303ccb62cdf..08206b4e29c4 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -15,7 +15,7 @@
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
 #include <linux/ftrace.h>
-#include <linux/marker.h>
+#include <trace/sched.h>
 
 #include "trace.h"
 
@@ -112,18 +112,18 @@ static int report_latency(cycle_t delta)
 }
 
 static void notrace
-wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
+probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
 	struct task_struct *next)
 {
 	unsigned long latency = 0, t0 = 0, t1 = 0;
-	struct trace_array **ptr = private;
-	struct trace_array *tr = *ptr;
 	struct trace_array_cpu *data;
 	cycle_t T0, T1, delta;
 	unsigned long flags;
 	long disabled;
 	int cpu;
 
+	tracing_record_cmdline(prev);
+
 	if (unlikely(!tracer_enabled))
 		return;
 
@@ -140,11 +140,11 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
 		return;
 
 	/* The task we are waiting for is waking up */
-	data = tr->data[wakeup_cpu];
+	data = wakeup_trace->data[wakeup_cpu];
 
 	/* disable local data, not wakeup_cpu data */
 	cpu = raw_smp_processor_id();
-	disabled = atomic_inc_return(&tr->data[cpu]->disabled);
+	disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
 	if (likely(disabled != 1))
 		goto out;
 
@@ -155,7 +155,7 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
 	if (unlikely(!tracer_enabled || next != wakeup_task))
 		goto out_unlock;
 
-	trace_function(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags);
+	trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags);
 
 	/*
 	 * usecs conversion is slow so we try to delay the conversion
@@ -174,39 +174,14 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
 	t0 = nsecs_to_usecs(T0);
 	t1 = nsecs_to_usecs(T1);
 
-	update_max_tr(tr, wakeup_task, wakeup_cpu);
+	update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);
 
 out_unlock:
-	__wakeup_reset(tr);
+	__wakeup_reset(wakeup_trace);
 	__raw_spin_unlock(&wakeup_lock);
 	local_irq_restore(flags);
 out:
-	atomic_dec(&tr->data[cpu]->disabled);
-}
-
-static notrace void
-sched_switch_callback(void *probe_data, void *call_data,
-		      const char *format, va_list *args)
-{
-	struct task_struct *prev;
-	struct task_struct *next;
-	struct rq *__rq;
-
-	/* skip prev_pid %d next_pid %d prev_state %ld */
-	(void)va_arg(*args, int);
-	(void)va_arg(*args, int);
-	(void)va_arg(*args, long);
-	__rq = va_arg(*args, typeof(__rq));
-	prev = va_arg(*args, typeof(prev));
-	next = va_arg(*args, typeof(next));
-
-	tracing_record_cmdline(prev);
-
-	/*
-	 * If tracer_switch_func only points to the local
-	 * switch func, it still needs the ptr passed to it.
-	 */
-	wakeup_sched_switch(probe_data, __rq, prev, next);
+	atomic_dec(&wakeup_trace->data[cpu]->disabled);
 }
 
 static void __wakeup_reset(struct trace_array *tr)
@@ -240,19 +215,24 @@ static void wakeup_reset(struct trace_array *tr)
 }
 
 static void
-wakeup_check_start(struct trace_array *tr, struct task_struct *p,
-		   struct task_struct *curr)
+probe_wakeup(struct rq *rq, struct task_struct *p)
 {
 	int cpu = smp_processor_id();
 	unsigned long flags;
 	long disabled;
 
+	if (likely(!tracer_enabled))
+		return;
+
+	tracing_record_cmdline(p);
+	tracing_record_cmdline(current);
+
 	if (likely(!rt_task(p)) ||
 			p->prio >= wakeup_prio ||
-			p->prio >= curr->prio)
+			p->prio >= current->prio)
 		return;
 
-	disabled = atomic_inc_return(&tr->data[cpu]->disabled);
+	disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
 	if (unlikely(disabled != 1))
 		goto out;
 
@@ -264,7 +244,7 @@ wakeup_check_start(struct trace_array *tr, struct task_struct *p,
 		goto out_locked;
 
 	/* reset the trace */
-	__wakeup_reset(tr);
+	__wakeup_reset(wakeup_trace);
 
 	wakeup_cpu = task_cpu(p);
 	wakeup_prio = p->prio;
@@ -274,74 +254,37 @@ wakeup_check_start(struct trace_array *tr, struct task_struct *p,
 
 	local_save_flags(flags);
 
-	tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
-	trace_function(tr, tr->data[wakeup_cpu],
+	wakeup_trace->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
+	trace_function(wakeup_trace, wakeup_trace->data[wakeup_cpu],
 		       CALLER_ADDR1, CALLER_ADDR2, flags);
 
 out_locked:
 	__raw_spin_unlock(&wakeup_lock);
 out:
-	atomic_dec(&tr->data[cpu]->disabled);
-}
-
-static notrace void
-wake_up_callback(void *probe_data, void *call_data,
-		 const char *format, va_list *args)
-{
-	struct trace_array **ptr = probe_data;
-	struct trace_array *tr = *ptr;
-	struct task_struct *curr;
-	struct task_struct *task;
-	struct rq *__rq;
-
-	if (likely(!tracer_enabled))
-		return;
-
-	/* Skip pid %d state %ld */
-	(void)va_arg(*args, int);
-	(void)va_arg(*args, long);
-	/* now get the meat: "rq %p task %p rq->curr %p" */
-	__rq = va_arg(*args, typeof(__rq));
-	task = va_arg(*args, typeof(task));
-	curr = va_arg(*args, typeof(curr));
-
-	tracing_record_cmdline(task);
-	tracing_record_cmdline(curr);
-
-	wakeup_check_start(tr, task, curr);
+	atomic_dec(&wakeup_trace->data[cpu]->disabled);
 }
 
 static void start_wakeup_tracer(struct trace_array *tr)
 {
 	int ret;
 
-	ret = marker_probe_register("kernel_sched_wakeup",
-			"pid %d state %ld ## rq %p task %p rq->curr %p",
-			wake_up_callback,
-			&wakeup_trace);
+	ret = register_trace_sched_wakeup(probe_wakeup);
 	if (ret) {
-		pr_info("wakeup trace: Couldn't add marker"
+		pr_info("wakeup trace: Couldn't activate tracepoint"
 			" probe to kernel_sched_wakeup\n");
 		return;
 	}
 
-	ret = marker_probe_register("kernel_sched_wakeup_new",
-			"pid %d state %ld ## rq %p task %p rq->curr %p",
-			wake_up_callback,
-			&wakeup_trace);
+	ret = register_trace_sched_wakeup_new(probe_wakeup);
 	if (ret) {
-		pr_info("wakeup trace: Couldn't add marker"
+		pr_info("wakeup trace: Couldn't activate tracepoint"
 			" probe to kernel_sched_wakeup_new\n");
 		goto fail_deprobe;
 	}
 
-	ret = marker_probe_register("kernel_sched_schedule",
-		"prev_pid %d next_pid %d prev_state %ld "
-		"## rq %p prev %p next %p",
-		sched_switch_callback,
-		&wakeup_trace);
+	ret = register_trace_sched_switch(probe_wakeup_sched_switch);
 	if (ret) {
-		pr_info("sched trace: Couldn't add marker"
+		pr_info("sched trace: Couldn't activate tracepoint"
 			" probe to kernel_sched_schedule\n");
 		goto fail_deprobe_wake_new;
 	}
@@ -363,28 +306,18 @@ static void start_wakeup_tracer(struct trace_array *tr)
 
 	return;
 fail_deprobe_wake_new:
-	marker_probe_unregister("kernel_sched_wakeup_new",
-				wake_up_callback,
-				&wakeup_trace);
+	unregister_trace_sched_wakeup_new(probe_wakeup);
 fail_deprobe:
-	marker_probe_unregister("kernel_sched_wakeup",
-				wake_up_callback,
-				&wakeup_trace);
+	unregister_trace_sched_wakeup(probe_wakeup);
 }
 
 static void stop_wakeup_tracer(struct trace_array *tr)
 {
 	tracer_enabled = 0;
 	unregister_ftrace_function(&trace_ops);
-	marker_probe_unregister("kernel_sched_schedule",
-				sched_switch_callback,
-				&wakeup_trace);
-	marker_probe_unregister("kernel_sched_wakeup_new",
-				wake_up_callback,
-				&wakeup_trace);
-	marker_probe_unregister("kernel_sched_wakeup",
-				wake_up_callback,
-				&wakeup_trace);
+	unregister_trace_sched_switch(probe_wakeup_sched_switch);
+	unregister_trace_sched_wakeup_new(probe_wakeup);
+	unregister_trace_sched_wakeup(probe_wakeup);
 }
 
 static void wakeup_tracer_init(struct trace_array *tr)
-- 
GitLab


From fa340d9c050e78fb21a142b617304214ae5e0c2d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 23 Jul 2008 13:38:00 +0200
Subject: [PATCH 224/895] tracing: disable tracepoints by default

while it's arguably low overhead, we dont enable new features by default.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 init/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/init/Kconfig b/init/Kconfig
index 70082678a914..d5994490b0b0 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -773,7 +773,6 @@ config PROFILING
 
 config TRACEPOINTS
 	bool "Activate tracepoints"
-	default y
 	help
 	  Place an empty function call at each tracepoint site. Can be
 	  dynamically changed for a probe function.
-- 
GitLab


From cf569a932217b97e2fc2c48aa597fe29519a0cff Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 23 Jul 2008 13:48:22 +0200
Subject: [PATCH 225/895] sched: clean up tracepoints

make it a bit more structured hence more readable.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/sched.h | 35 +++++++++++++++++++++++------------
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/include/trace/sched.h b/include/trace/sched.h
index 506ae1323656..ad47369d01b5 100644
--- a/include/trace/sched.h
+++ b/include/trace/sched.h
@@ -6,40 +6,51 @@
 
 DEFINE_TRACE(sched_kthread_stop,
 	TPPROTO(struct task_struct *t),
-	TPARGS(t));
+		TPARGS(t));
+
 DEFINE_TRACE(sched_kthread_stop_ret,
 	TPPROTO(int ret),
-	TPARGS(ret));
+		TPARGS(ret));
+
 DEFINE_TRACE(sched_wait_task,
 	TPPROTO(struct rq *rq, struct task_struct *p),
-	TPARGS(rq, p));
+		TPARGS(rq, p));
+
 DEFINE_TRACE(sched_wakeup,
 	TPPROTO(struct rq *rq, struct task_struct *p),
-	TPARGS(rq, p));
+		TPARGS(rq, p));
+
 DEFINE_TRACE(sched_wakeup_new,
 	TPPROTO(struct rq *rq, struct task_struct *p),
-	TPARGS(rq, p));
+		TPARGS(rq, p));
+
 DEFINE_TRACE(sched_switch,
 	TPPROTO(struct rq *rq, struct task_struct *prev,
 		struct task_struct *next),
-	TPARGS(rq, prev, next));
+		TPARGS(rq, prev, next));
+
 DEFINE_TRACE(sched_migrate_task,
 	TPPROTO(struct rq *rq, struct task_struct *p, int dest_cpu),
-	TPARGS(rq, p, dest_cpu));
+		TPARGS(rq, p, dest_cpu));
+
 DEFINE_TRACE(sched_process_free,
 	TPPROTO(struct task_struct *p),
-	TPARGS(p));
+		TPARGS(p));
+
 DEFINE_TRACE(sched_process_exit,
 	TPPROTO(struct task_struct *p),
-	TPARGS(p));
+		TPARGS(p));
+
 DEFINE_TRACE(sched_process_wait,
 	TPPROTO(struct pid *pid),
-	TPARGS(pid));
+		TPARGS(pid));
+
 DEFINE_TRACE(sched_process_fork,
 	TPPROTO(struct task_struct *parent, struct task_struct *child),
-	TPARGS(parent, child));
+		TPARGS(parent, child));
+
 DEFINE_TRACE(sched_signal_send,
 	TPPROTO(int sig, struct task_struct *p),
-	TPARGS(sig, p));
+		TPARGS(sig, p));
 
 #endif
-- 
GitLab


From 5f87f1121895dc09d2d1c1db5f14af6aa4ce3e94 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 23 Jul 2008 14:15:22 +0200
Subject: [PATCH 226/895] tracing: clean up tracepoints kconfig structure

do not expose users to CONFIG_TRACEPOINTS - tracers can select it
just fine.

update ftrace to select CONFIG_TRACEPOINTS.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 init/Kconfig         | 9 +++++----
 kernel/trace/Kconfig | 1 +
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/init/Kconfig b/init/Kconfig
index d5994490b0b0..031344f954fd 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -771,11 +771,12 @@ config PROFILING
 	  Say Y here to enable the extended profiling support mechanisms used
 	  by profilers such as OProfile.
 
+#
+# Place an empty function call at each tracepoint site. Can be
+# dynamically changed for a probe function.
+#
 config TRACEPOINTS
-	bool "Activate tracepoints"
-	help
-	  Place an empty function call at each tracepoint site. Can be
-	  dynamically changed for a probe function.
+	bool
 
 config MARKERS
 	bool "Activate markers"
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 263e9e6bbd60..cae2637d5e68 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -14,6 +14,7 @@ config TRACING
 	bool
 	select DEBUG_FS
 	select STACKTRACE
+	select TRACEPOINTS
 
 config FTRACE
 	bool "Kernel Function Tracer"
-- 
GitLab


From 611b1597680dd4a57896bcca1af0484be463c55e Mon Sep 17 00:00:00 2001
From: Pekka Paalanen <pq@iki.fi>
Date: Mon, 21 Jul 2008 18:49:56 +0300
Subject: [PATCH 227/895] x86: fix mmiotrace 8-bit register decoding

When SIL, DIL, BPL or SPL registers were used in MMIO, the datum
was extracted from AH, BH, CH, or DH, which are incorrect.

Signed-off-by: Pekka Paalanen <pq@iki.fi>
Cc: "Vegard Nossum" <vegard.nossum@gmail.com>
Cc: "Steven Rostedt" <srostedt@redhat.com>
Cc: proski@gnu.org
Cc: "Pekka Enberg"
	<penberg@cs.helsinki.fi>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/pf_in.c | 121 ++++++++++++++++++++++++++++++--------------
 1 file changed, 84 insertions(+), 37 deletions(-)

diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c
index efa1911e20ca..df3d5c861cda 100644
--- a/arch/x86/mm/pf_in.c
+++ b/arch/x86/mm/pf_in.c
@@ -79,25 +79,34 @@ static unsigned int mw32[] = { 0xC7 };
 static unsigned int mw64[] = { 0x89, 0x8B };
 #endif /* not __i386__ */
 
-static int skip_prefix(unsigned char *addr, int *shorted, int *enlarged,
-								int *rexr)
+struct prefix_bits {
+	unsigned shorted:1;
+	unsigned enlarged:1;
+	unsigned rexr:1;
+	unsigned rex:1;
+};
+
+static int skip_prefix(unsigned char *addr, struct prefix_bits *prf)
 {
 	int i;
 	unsigned char *p = addr;
-	*shorted = 0;
-	*enlarged = 0;
-	*rexr = 0;
+	prf->shorted = 0;
+	prf->enlarged = 0;
+	prf->rexr = 0;
+	prf->rex = 0;
 
 restart:
 	for (i = 0; i < ARRAY_SIZE(prefix_codes); i++) {
 		if (*p == prefix_codes[i]) {
 			if (*p == 0x66)
-				*shorted = 1;
+				prf->shorted = 1;
 #ifdef __amd64__
 			if ((*p & 0xf8) == 0x48)
-				*enlarged = 1;
+				prf->enlarged = 1;
 			if ((*p & 0xf4) == 0x44)
-				*rexr = 1;
+				prf->rexr = 1;
+			if ((*p & 0xf0) == 0x40)
+				prf->rex = 1;
 #endif
 			p++;
 			goto restart;
@@ -135,12 +144,12 @@ enum reason_type get_ins_type(unsigned long ins_addr)
 {
 	unsigned int opcode;
 	unsigned char *p;
-	int shorted, enlarged, rexr;
+	struct prefix_bits prf;
 	int i;
 	enum reason_type rv = OTHERS;
 
 	p = (unsigned char *)ins_addr;
-	p += skip_prefix(p, &shorted, &enlarged, &rexr);
+	p += skip_prefix(p, &prf);
 	p += get_opcode(p, &opcode);
 
 	CHECK_OP_TYPE(opcode, reg_rop, REG_READ);
@@ -156,10 +165,11 @@ static unsigned int get_ins_reg_width(unsigned long ins_addr)
 {
 	unsigned int opcode;
 	unsigned char *p;
-	int i, shorted, enlarged, rexr;
+	struct prefix_bits prf;
+	int i;
 
 	p = (unsigned char *)ins_addr;
-	p += skip_prefix(p, &shorted, &enlarged, &rexr);
+	p += skip_prefix(p, &prf);
 	p += get_opcode(p, &opcode);
 
 	for (i = 0; i < ARRAY_SIZE(rw8); i++)
@@ -168,7 +178,7 @@ static unsigned int get_ins_reg_width(unsigned long ins_addr)
 
 	for (i = 0; i < ARRAY_SIZE(rw32); i++)
 		if (rw32[i] == opcode)
-			return (shorted ? 2 : (enlarged ? 8 : 4));
+			return prf.shorted ? 2 : (prf.enlarged ? 8 : 4);
 
 	printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode);
 	return 0;
@@ -178,10 +188,11 @@ unsigned int get_ins_mem_width(unsigned long ins_addr)
 {
 	unsigned int opcode;
 	unsigned char *p;
-	int i, shorted, enlarged, rexr;
+	struct prefix_bits prf;
+	int i;
 
 	p = (unsigned char *)ins_addr;
-	p += skip_prefix(p, &shorted, &enlarged, &rexr);
+	p += skip_prefix(p, &prf);
 	p += get_opcode(p, &opcode);
 
 	for (i = 0; i < ARRAY_SIZE(mw8); i++)
@@ -194,11 +205,11 @@ unsigned int get_ins_mem_width(unsigned long ins_addr)
 
 	for (i = 0; i < ARRAY_SIZE(mw32); i++)
 		if (mw32[i] == opcode)
-			return shorted ? 2 : 4;
+			return prf.shorted ? 2 : 4;
 
 	for (i = 0; i < ARRAY_SIZE(mw64); i++)
 		if (mw64[i] == opcode)
-			return shorted ? 2 : (enlarged ? 8 : 4);
+			return prf.shorted ? 2 : (prf.enlarged ? 8 : 4);
 
 	printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode);
 	return 0;
@@ -238,7 +249,7 @@ enum {
 #endif
 };
 
-static unsigned char *get_reg_w8(int no, struct pt_regs *regs)
+static unsigned char *get_reg_w8(int no, int rex, struct pt_regs *regs)
 {
 	unsigned char *rv = NULL;
 
@@ -255,18 +266,6 @@ static unsigned char *get_reg_w8(int no, struct pt_regs *regs)
 	case arg_DL:
 		rv = (unsigned char *)&regs->dx;
 		break;
-	case arg_AH:
-		rv = 1 + (unsigned char *)&regs->ax;
-		break;
-	case arg_BH:
-		rv = 1 + (unsigned char *)&regs->bx;
-		break;
-	case arg_CH:
-		rv = 1 + (unsigned char *)&regs->cx;
-		break;
-	case arg_DH:
-		rv = 1 + (unsigned char *)&regs->dx;
-		break;
 #ifdef __amd64__
 	case arg_R8:
 		rv = (unsigned char *)&regs->r8;
@@ -294,9 +293,55 @@ static unsigned char *get_reg_w8(int no, struct pt_regs *regs)
 		break;
 #endif
 	default:
-		printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no);
 		break;
 	}
+
+	if (rv)
+		return rv;
+
+	if (rex) {
+		/*
+		 * If REX prefix exists, access low bytes of SI etc.
+		 * instead of AH etc.
+		 */
+		switch (no) {
+		case arg_SI:
+			rv = (unsigned char *)&regs->si;
+			break;
+		case arg_DI:
+			rv = (unsigned char *)&regs->di;
+			break;
+		case arg_BP:
+			rv = (unsigned char *)&regs->bp;
+			break;
+		case arg_SP:
+			rv = (unsigned char *)&regs->sp;
+			break;
+		default:
+			break;
+		}
+	} else {
+		switch (no) {
+		case arg_AH:
+			rv = 1 + (unsigned char *)&regs->ax;
+			break;
+		case arg_BH:
+			rv = 1 + (unsigned char *)&regs->bx;
+			break;
+		case arg_CH:
+			rv = 1 + (unsigned char *)&regs->cx;
+			break;
+		case arg_DH:
+			rv = 1 + (unsigned char *)&regs->dx;
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (!rv)
+		printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no);
+
 	return rv;
 }
 
@@ -368,11 +413,12 @@ unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)
 	unsigned char mod_rm;
 	int reg;
 	unsigned char *p;
-	int i, shorted, enlarged, rexr;
+	struct prefix_bits prf;
+	int i;
 	unsigned long rv;
 
 	p = (unsigned char *)ins_addr;
-	p += skip_prefix(p, &shorted, &enlarged, &rexr);
+	p += skip_prefix(p, &prf);
 	p += get_opcode(p, &opcode);
 	for (i = 0; i < ARRAY_SIZE(reg_rop); i++)
 		if (reg_rop[i] == opcode) {
@@ -392,10 +438,10 @@ unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)
 
 do_work:
 	mod_rm = *p;
-	reg = ((mod_rm >> 3) & 0x7) | (rexr << 3);
+	reg = ((mod_rm >> 3) & 0x7) | (prf.rexr << 3);
 	switch (get_ins_reg_width(ins_addr)) {
 	case 1:
-		return *get_reg_w8(reg, regs);
+		return *get_reg_w8(reg, prf.rex, regs);
 
 	case 2:
 		return *(unsigned short *)get_reg_w32(reg, regs);
@@ -422,11 +468,12 @@ unsigned long get_ins_imm_val(unsigned long ins_addr)
 	unsigned char mod_rm;
 	unsigned char mod;
 	unsigned char *p;
-	int i, shorted, enlarged, rexr;
+	struct prefix_bits prf;
+	int i;
 	unsigned long rv;
 
 	p = (unsigned char *)ins_addr;
-	p += skip_prefix(p, &shorted, &enlarged, &rexr);
+	p += skip_prefix(p, &prf);
 	p += get_opcode(p, &opcode);
 	for (i = 0; i < ARRAY_SIZE(imm_wop); i++)
 		if (imm_wop[i] == opcode) {
-- 
GitLab


From 9795302acf2817d0842e56d23df6008e43df0970 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Thu, 24 Jul 2008 16:37:23 -0400
Subject: [PATCH 228/895] tracepoints: use TABLE_SIZE macro

Steven Rostedt suggested:

| Wouldn't it look nicer to have: (TRACEPOINT_TABLE_SIZE - 1) ?

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/tracepoint.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 42e86ddbd2a0..c7c62a4a75f5 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -177,7 +177,7 @@ static struct tracepoint_entry *get_tracepoint(const char *name)
 	struct tracepoint_entry *e;
 	u32 hash = jhash(name, strlen(name), 0);
 
-	head = &tracepoint_table[hash & ((1 << TRACEPOINT_HASH_BITS)-1)];
+	head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
 	hlist_for_each_entry(e, node, head, hlist) {
 		if (!strcmp(name, e->name))
 			return e;
@@ -197,7 +197,7 @@ static struct tracepoint_entry *add_tracepoint(const char *name)
 	size_t name_len = strlen(name) + 1;
 	u32 hash = jhash(name, name_len-1, 0);
 
-	head = &tracepoint_table[hash & ((1 << TRACEPOINT_HASH_BITS)-1)];
+	head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
 	hlist_for_each_entry(e, node, head, hlist) {
 		if (!strcmp(name, e->name)) {
 			printk(KERN_NOTICE
@@ -233,7 +233,7 @@ static int remove_tracepoint(const char *name)
 	size_t len = strlen(name) + 1;
 	u32 hash = jhash(name, len-1, 0);
 
-	head = &tracepoint_table[hash & ((1 << TRACEPOINT_HASH_BITS)-1)];
+	head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
 	hlist_for_each_entry(e, node, head, hlist) {
 		if (!strcmp(name, e->name)) {
 			found = 1;
-- 
GitLab


From 36dcd67ae994fece615b7c700958d215e884b9ae Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 29 Jul 2008 12:00:59 +0200
Subject: [PATCH 229/895] ftrace: ignore functions that cannot be kprobe-ed

kprobes already has an extensive list of annotations for functions
that should not be instrumented. Add notrace annotations to these
functions as well.

This is particularly useful for functions called by the NMI path.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/kprobes.h | 5 +++--
 kernel/notifier.c       | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 0be7795655fa..497b1d1f7a05 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -29,6 +29,7 @@
  *		<jkenisto@us.ibm.com>  and Prasanna S Panchamukhi
  *		<prasanna@in.ibm.com> added function-return probes.
  */
+#include <linux/linkage.h>
 #include <linux/list.h>
 #include <linux/notifier.h>
 #include <linux/smp.h>
@@ -47,7 +48,7 @@
 #define KPROBE_HIT_SSDONE	0x00000008
 
 /* Attach to insert probes on any functions which should be ignored*/
-#define __kprobes	__attribute__((__section__(".kprobes.text")))
+#define __kprobes	__attribute__((__section__(".kprobes.text"))) notrace
 
 struct kprobe;
 struct pt_regs;
@@ -256,7 +257,7 @@ void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head);
 
 #else /* CONFIG_KPROBES */
 
-#define __kprobes	/**/
+#define __kprobes	notrace
 struct jprobe;
 struct kretprobe;
 
diff --git a/kernel/notifier.c b/kernel/notifier.c
index 823be11584ef..4282c0a40a57 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -550,7 +550,7 @@ EXPORT_SYMBOL(unregister_reboot_notifier);
 
 static ATOMIC_NOTIFIER_HEAD(die_chain);
 
-int notify_die(enum die_val val, const char *str,
+int notrace notify_die(enum die_val val, const char *str,
 	       struct pt_regs *regs, long err, int trap, int sig)
 {
 	struct die_args args = {
-- 
GitLab


From 8b1fa1d7b22f386747c7b78b918d4c680c16066f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 29 Jul 2008 12:36:02 +0200
Subject: [PATCH 230/895] ftrace: mark lapic_wd_event() notrace

it can be called in the NMI path:

[    0.645999] calling  ftrace_dynamic_init+0x0/0xd6
[    0.647521] ------------[ cut here ]------------
[    0.647521] WARNING: at kernel/trace/ftrace.c:348 ftrace_record_ip+0x4e/0x252()
[    0.647521] Modules linked in:
[    0.647521] Pid: 15, comm: kstop1 Not tainted 2.6.27-rc1-tip #22686
[    0.647521]
[    0.647521] Call Trace:
[    0.647521]  <NMI>  [<ffffffff8024593f>] warn_on_slowpath+0x5d/0x84
[    0.647521]  [<ffffffff80220b99>] ? lapic_wd_event+0xb/0x5c
[    0.647521]  [<ffffffff80287b3b>] ftrace_record_ip+0x4e/0x252
[    0.647521]  [<ffffffff80211274>] mcount_call+0x5/0x31
[    0.647521]  [<ffffffff80220b9e>] ? lapic_wd_event+0x10/0x5c
[    0.647521]  [<ffffffff8083f3ec>] nmi_watchdog_tick+0x19d/0x1ad
[    0.647521]  [<ffffffff8083e875>] default_do_nmi+0x75/0x1e3
[    0.647521]  [<ffffffff8083f0b3>] do_nmi+0x5d/0x94
[    0.647521]  [<ffffffff8083e2d2>] nmi+0xa2/0xc2
[    0.647521]  [<ffffffff802b48c3>] ? check_bytes_and_report+0x11/0xcc
[    0.647521]  <<EOE>>  [<ffffffff80211274>] ? mcount_call+0x5/0x31
[    0.647521]  [<ffffffff802b49df>] check_object+0x61/0x1b0
[    0.647521]  [<ffffffff802b502a>] __slab_free+0x169/0x2ae
[    0.647521]  [<ffffffff80242dbf>] ? __cleanup_sighand+0x25/0x27
[    0.647521]  [<ffffffff80242dbf>] ? __cleanup_sighand+0x25/0x27
[    0.647521]  [<ffffffff802b60cd>] kmem_cache_free+0x85/0xb9
[    0.647521]  [<ffffffff80242dbf>] __cleanup_sighand+0x25/0x27
[    0.647521]  [<ffffffff80247b3d>] release_task+0x256/0x339
[    0.647521]  [<ffffffff802490b4>] do_exit+0x764/0x7ef
[    0.647521]  [<ffffffff8027624c>] __xchg+0x0/0x38
[    0.647521]  [<ffffffff8027619a>] ? stop_cpu+0x0/0xb2
[    0.647521]  [<ffffffff8027619a>] ? stop_cpu+0x0/0xb2
[    0.647521]  [<ffffffff8025922f>] kthread+0x4e/0x7b
[    0.647521]  [<ffffffff80212979>] child_rip+0xa/0x11
[    0.647521]  [<ffffffff80211c17>] ? restore_args+0x0/0x30
[    0.647521]  [<ffffffff802283a5>] ? native_load_tls+0x14/0x2e
[    0.647521]  [<ffffffff802591e1>] ? kthread+0x0/0x7b
[    0.647521]  [<ffffffff8021296f>] ? child_rip+0x0/0x11
[    0.647521]
[    0.647521] ---[ end trace 4eaa2a86a8e2da22 ]---
[    0.672032] initcall ftrace_dynamic_init+0x0/0xd6 returned 0 after 19 msecs

also mark it no-kprobes while at it.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perfctr-watchdog.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 6bff382094f5..9abd48b22674 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -17,6 +17,8 @@
 #include <linux/bitops.h>
 #include <linux/smp.h>
 #include <linux/nmi.h>
+#include <linux/kprobes.h>
+
 #include <asm/apic.h>
 #include <asm/intel_arch_perfmon.h>
 
@@ -336,7 +338,8 @@ static void single_msr_unreserve(void)
 	release_perfctr_nmi(wd_ops->perfctr);
 }
 
-static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
+static void __kprobes
+single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
 {
 	/* start the cycle over again */
 	write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
@@ -401,7 +404,7 @@ static int setup_p6_watchdog(unsigned nmi_hz)
 	return 1;
 }
 
-static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
+static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
 {
 	/*
 	 * P6 based Pentium M need to re-unmask
@@ -605,7 +608,7 @@ static void p4_unreserve(void)
 	release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
 }
 
-static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
+static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
 {
 	unsigned dummy;
 	/*
@@ -784,7 +787,7 @@ unsigned lapic_adjust_nmi_hz(unsigned hz)
 	return hz;
 }
 
-int lapic_wd_event(unsigned nmi_hz)
+int __kprobes lapic_wd_event(unsigned nmi_hz)
 {
 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
 	u64 ctr;
-- 
GitLab


From 8da3821ba5634497da63d58a69e24a97697c4a2b Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Thu, 14 Aug 2008 15:45:07 -0400
Subject: [PATCH 231/895] ftrace: create __mcount_loc section

This patch creates a section in the kernel called "__mcount_loc".
This will hold a list of pointers to the mcount relocation for
each call site of mcount.

For example:

objdump -dr init/main.o
[...]
Disassembly of section .text:

0000000000000000 <do_one_initcall>:
   0:   55                      push   %rbp
[...]
000000000000017b <init_post>:
 17b:   55                      push   %rbp
 17c:   48 89 e5                mov    %rsp,%rbp
 17f:   53                      push   %rbx
 180:   48 83 ec 08             sub    $0x8,%rsp
 184:   e8 00 00 00 00          callq  189 <init_post+0xe>
                        185: R_X86_64_PC32      mcount+0xfffffffffffffffc
[...]

We will add a section to point to each function call.

   .section __mcount_loc,"a",@progbits
[...]
   .quad .text + 0x185
[...]

The offset to of the mcount call site in init_post is an offset from
the start of the section, and not the start of the function init_post.
The mcount relocation is at the call site 0x185 from the start of the
.text section.

  .text + 0x185  == init_post + 0xa

We need a way to add this __mcount_loc section in a way that we do not
lose the relocations after final link.  The .text section here will
be attached to all other .text sections after final link and the
offsets will be meaningless.  We need to keep track of where these
.text sections are.

To do this, we use the start of the first function in the section.
do_one_initcall.  We can make a tmp.s file with this function as a reference
to the start of the .text section.

   .section __mcount_loc,"a",@progbits
[...]
   .quad do_one_initcall + 0x185
[...]

Then we can compile the tmp.s into a tmp.o

  gcc -c tmp.s -o tmp.o

And link it into back into main.o.

  ld -r main.o tmp.o -o tmp_main.o
  mv tmp_main.o main.o

But we have a problem.  What happens if the first function in a section
is not exported, and is a static function. The linker will not let
the tmp.o use it.  This case exists in main.o as well.

Disassembly of section .init.text:

0000000000000000 <set_reset_devices>:
   0:   55                      push   %rbp
   1:   48 89 e5                mov    %rsp,%rbp
   4:   e8 00 00 00 00          callq  9 <set_reset_devices+0x9>
                        5: R_X86_64_PC32        mcount+0xfffffffffffffffc

The first function in .init.text is a static function.

00000000000000a8 t __setup_set_reset_devices
000000000000105f t __setup_str_set_reset_devices
0000000000000000 t set_reset_devices

The lowercase 't' means that set_reset_devices is local and is not exported.
If we simply try to link the tmp.o with the set_reset_devices we end
up with two symbols: one local and one global.

 .section __mcount_loc,"a",@progbits
 .quad set_reset_devices + 0x10

00000000000000a8 t __setup_set_reset_devices
000000000000105f t __setup_str_set_reset_devices
0000000000000000 t set_reset_devices
                 U set_reset_devices

We still have an undefined reference to set_reset_devices, and if we try
to compile the kernel, we will end up with an undefined reference to
set_reset_devices, or even worst, it could be exported someplace else,
and then we will have a reference to the wrong location.

To handle this case, we make an intermediate step using objcopy.
We convert set_reset_devices into a global exported symbol before linking
it with tmp.o and set it back afterwards.

00000000000000a8 t __setup_set_reset_devices
000000000000105f t __setup_str_set_reset_devices
0000000000000000 T set_reset_devices

00000000000000a8 t __setup_set_reset_devices
000000000000105f t __setup_str_set_reset_devices
0000000000000000 T set_reset_devices

00000000000000a8 t __setup_set_reset_devices
000000000000105f t __setup_str_set_reset_devices
0000000000000000 t set_reset_devices

Now we have a section in main.o called __mcount_loc that we can place
somewhere in the kernel using vmlinux.ld.S and access it to convert
all these locations that call mcount into nops before starting SMP
and thus, eliminating the need to do this with kstop_machine.

Note, A well documented perl script (scripts/recordmcount.pl) is used
to do all this in one location.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-generic/vmlinux.lds.h |   8 +
 kernel/trace/Kconfig              |   8 +
 scripts/Makefile.build            |   6 +
 scripts/recordmcount.pl           | 280 ++++++++++++++++++++++++++++++
 4 files changed, 302 insertions(+)
 create mode 100755 scripts/recordmcount.pl

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 3d8e472a09c8..838d9b2a0da1 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -37,6 +37,13 @@
 #define MEM_DISCARD(sec) *(.mem##sec)
 #endif
 
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+#define MCOUNT_REC()	VMLINUX_SYMBOL(__start_mcount_loc) = .; \
+			*(__mcount_loc)				\
+			VMLINUX_SYMBOL(__stop_mcount_loc) = .;
+#else
+#define MCOUNT_REC()
+#endif
 
 /* .data section */
 #define DATA_DATA							\
@@ -192,6 +199,7 @@
 	/* __*init sections */						\
 	__init_rodata : AT(ADDR(__init_rodata) - LOAD_OFFSET) {		\
 		*(.ref.rodata)						\
+		MCOUNT_REC()						\
 		DEV_KEEP(init.rodata)					\
 		DEV_KEEP(exit.rodata)					\
 		CPU_KEEP(init.rodata)					\
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index cae2637d5e68..14d9505178ca 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -7,6 +7,9 @@ config HAVE_FTRACE
 config HAVE_DYNAMIC_FTRACE
 	bool
 
+config HAVE_FTRACE_MCOUNT_RECORD
+	bool
+
 config TRACER_MAX_TRACE
 	bool
 
@@ -122,6 +125,11 @@ config DYNAMIC_FTRACE
 	 were made. If so, it runs stop_machine (stops all CPUS)
 	 and modifies the code to jump over the call to ftrace.
 
+config FTRACE_MCOUNT_RECORD
+	def_bool y
+	depends on DYNAMIC_FTRACE
+	depends on HAVE_FTRACE_MCOUNT_RECORD
+
 config FTRACE_SELFTEST
 	bool
 
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 277cfe0b7100..463ddcc583ed 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -198,10 +198,16 @@ cmd_modversions =							\
 	fi;
 endif
 
+ifdef CONFIG_FTRACE_MCOUNT_RECORD
+cmd_record_mcount = scripts/recordmcount.pl "$(ARCH)" \
+	"$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" "$(@)";
+endif
+
 define rule_cc_o_c
 	$(call echo-cmd,checksrc) $(cmd_checksrc)			  \
 	$(call echo-cmd,cc_o_c) $(cmd_cc_o_c);				  \
 	$(cmd_modversions)						  \
+	$(cmd_record_mcount)						  \
 	scripts/basic/fixdep $(depfile) $@ '$(call make-cmd,cc_o_c)' >    \
 	                                              $(dot-target).tmp;  \
 	rm -f $(depfile);						  \
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
new file mode 100755
index 000000000000..44b4b23e91b2
--- /dev/null
+++ b/scripts/recordmcount.pl
@@ -0,0 +1,280 @@
+#!/usr/bin/perl -w
+# (c) 2008, Steven Rostedt <srostedt@redhat.com>
+# Licensed under the terms of the GNU GPL License version 2
+#
+# recordmcount.pl - makes a section called __mcount_loc that holds
+#                   all the offsets to the calls to mcount.
+#
+#
+# What we want to end up with is a section in vmlinux called
+# __mcount_loc that contains a list of pointers to all the
+# call sites in the kernel that call mcount. Later on boot up, the kernel
+# will read this list, save the locations and turn them into nops.
+# When tracing or profiling is later enabled, these locations will then
+# be converted back to pointers to some function.
+#
+# This is no easy feat. This script is called just after the original
+# object is compiled and before it is linked.
+#
+# The references to the call sites are offsets from the section of text
+# that the call site is in. Hence, all functions in a section that
+# has a call site to mcount, will have the offset from the beginning of
+# the section and not the beginning of the function.
+#
+# The trick is to find a way to record the beginning of the section.
+# The way we do this is to look at the first function in the section
+# which will also be the location of that section after final link.
+# e.g.
+#
+#  .section ".text.sched"
+#  .globl my_func
+#  my_func:
+#        [...]
+#        call mcount  (offset: 0x5)
+#        [...]
+#        ret
+#  other_func:
+#        [...]
+#        call mcount (offset: 0x1b)
+#        [...]
+#
+# Both relocation offsets for the mcounts in the above example will be
+# offset from .text.sched. If we make another file called tmp.s with:
+#
+#  .section __mcount_loc
+#  .quad  my_func + 0x5
+#  .quad  my_func + 0x1b
+#
+# We can then compile this tmp.s into tmp.o, and link it to the original
+# object.
+#
+# But this gets hard if my_func is not globl (a static function).
+# In such a case we have:
+#
+#  .section ".text.sched"
+#  my_func:
+#        [...]
+#        call mcount  (offset: 0x5)
+#        [...]
+#        ret
+#  .globl my_func
+#  other_func:
+#        [...]
+#        call mcount (offset: 0x1b)
+#        [...]
+#
+# If we make the tmp.s the same as above, when we link together with
+# the original object, we will end up with two symbols for my_func:
+# one local, one global.  After final compile, we will end up with
+# an undefined reference to my_func.
+#
+# Since local objects can reference local variables, we need to find
+# a way to make tmp.o reference the local objects of the original object
+# file after it is linked together. To do this, we convert the my_func
+# into a global symbol before linking tmp.o. Then after we link tmp.o
+# we will only have a single symbol for my_func that is global.
+# We can convert my_func back into a local symbol and we are done.
+#
+# Here are the steps we take:
+#
+# 1) Record all the local symbols by using 'nm'
+# 2) Use objdump to find all the call site offsets and sections for
+#    mcount.
+# 3) Compile the list into its own object.
+# 4) Do we have to deal with local functions? If not, go to step 8.
+# 5) Make an object that converts these local functions to global symbols
+#    with objcopy.
+# 6) Link together this new object with the list object.
+# 7) Convert the local functions back to local symbols and rename
+#    the result as the original object.
+#    End.
+# 8) Link the object with the list object.
+# 9) Move the result back to the original object.
+#    End.
+#
+
+use strict;
+
+my $P = $0;
+$P =~ s@.*/@@g;
+
+my $V = '0.1';
+
+if ($#ARGV < 6) {
+	print "usage: $P arch objdump objcopy cc ld nm rm mv inputfile\n";
+	print "version: $V\n";
+	exit(1);
+}
+
+my ($arch, $objdump, $objcopy, $cc, $ld, $nm, $rm, $mv, $inputfile) = @ARGV;
+
+$objdump = "objdump" if ((length $objdump) == 0);
+$objcopy = "objcopy" if ((length $objcopy) == 0);
+$cc = "gcc" if ((length $cc) == 0);
+$ld = "ld" if ((length $ld) == 0);
+$nm = "nm" if ((length $nm) == 0);
+$rm = "rm" if ((length $rm) == 0);
+$mv = "mv" if ((length $mv) == 0);
+
+#print STDERR "running: $P '$arch' '$objdump' '$objcopy' '$cc' '$ld' " .
+#    "'$nm' '$rm' '$mv' '$inputfile'\n";
+
+my %locals;
+my %convert;
+
+my $type;
+my $section_regex;	# Find the start of a section
+my $function_regex;	# Find the name of a function (return func name)
+my $mcount_regex;	# Find the call site to mcount (return offset)
+
+if ($arch eq "x86_64") {
+    $section_regex = "Disassembly of section";
+    $function_regex = "<(.*?)>:";
+    $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount([+-]0x[0-9a-zA-Z]+)?\$";
+    $type = ".quad";
+} elsif ($arch eq "i386") {
+    $section_regex = "Disassembly of section";
+    $function_regex = "<(.*?)>:";
+    $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount\$";
+    $type = ".long";
+} else {
+    die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD";
+}
+
+my $text_found = 0;
+my $read_function = 0;
+my $opened = 0;
+my $text = "";
+my $mcount_section = "__mcount_loc";
+
+my $dirname;
+my $filename;
+my $prefix;
+my $ext;
+
+if ($inputfile =~ m,^(.*)/([^/]*)$,) {
+    $dirname = $1;
+    $filename = $2;
+} else {
+    $dirname = ".";
+    $filename = $inputfile;
+}
+
+if ($filename =~ m,^(.*)(\.\S),) {
+    $prefix = $1;
+    $ext = $2;
+} else {
+    $prefix = $filename;
+    $ext = "";
+}
+
+my $mcount_s = $dirname . "/.tmp_mc_" . $prefix . ".s";
+my $mcount_o = $dirname . "/.tmp_mc_" . $prefix . ".o";
+
+#
+# Step 1: find all the local symbols (static functions).
+#
+open (IN, "$nm $inputfile|") || die "error running $nm";
+while (<IN>) {
+    if (/^[0-9a-fA-F]+\s+t\s+(\S+)/) {
+	$locals{$1} = 1;
+    }
+}
+close(IN);
+
+#
+# Step 2: find the sections and mcount call sites
+#
+open(IN, "$objdump -dr $inputfile|") || die "error running $objdump";
+
+while (<IN>) {
+    # is it a section?
+    if (/$section_regex/) {
+	$read_function = 1;
+	$text_found = 0;
+    # section found, now is this a start of a function?
+    } elsif ($read_function && /$function_regex/) {
+	$read_function = 0;
+	$text_found = 1;
+	$text = $1;
+	# is this function static? If so, note this fact.
+	if (defined $locals{$text}) {
+	    $convert{$text} = 1;
+	}
+    # is this a call site to mcount? If so, print the offset from the section
+    } elsif ($text_found && /$mcount_regex/) {
+	if (!$opened) {
+	    open(FILE, ">$mcount_s") || die "can't create $mcount_s\n";
+	    $opened = 1;
+	    print FILE "\t.section $mcount_section,\"a\",\@progbits\n";
+	}
+	print FILE "\t$type $text + 0x$1\n";
+    }
+}
+
+# If we did not find any mcount callers, we are done (do nothing).
+if (!$opened) {
+    exit(0);
+}
+
+close(FILE);
+
+#
+# Step 3: Compile the file that holds the list of call sites to mcount.
+#
+`$cc -o $mcount_o -c $mcount_s`;
+
+my @converts = keys %convert;
+
+#
+# Step 4: Do we have sections that started with local functions?
+#
+if ($#converts >= 0) {
+    my $globallist = "";
+    my $locallist = "";
+
+    foreach my $con (@converts) {
+	$globallist .= " --globalize-symbol $con";
+	$locallist .= " --localize-symbol $con";
+    }
+
+    my $globalobj = $dirname . "/.tmp_gl_" . $filename;
+    my $globalmix = $dirname . "/.tmp_mx_" . $filename;
+
+    #
+    # Step 5: set up each local function as a global
+    #
+    `$objcopy $globallist $inputfile $globalobj`;
+
+    #
+    # Step 6: Link the global version to our list.
+    #
+    `$ld -r $globalobj $mcount_o -o $globalmix`;
+
+    #
+    # Step 7: Convert the local functions back into local symbols
+    #
+    `$objcopy $locallist $globalmix $inputfile`;
+
+    # Remove the temp files
+    `$rm $globalobj $globalmix`;
+
+} else {
+
+    my $mix = $dirname . "/.tmp_mx_" . $filename;
+
+    #
+    # Step 8: Link the object with our list of call sites object.
+    #
+    `$ld -r $inputfile $mcount_o -o $mix`;
+
+    #
+    # Step 9: Move the result back to the original object.
+    #
+    `$mv $mix $inputfile`;
+}
+
+# Clean up the temp files
+`$rm $mcount_o $mcount_s`;
+
+exit(0);
-- 
GitLab


From 68bf21aa15c85d2e9b623dcda2b1ed8893275fa1 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Thu, 14 Aug 2008 15:45:08 -0400
Subject: [PATCH 232/895] ftrace: mcount call site on boot nops core

This is the infrastructure to the converting the mcount call sites
recorded by the __mcount_loc section into nops on boot. It also allows
for using these sites to enable tracing as normal. When the __mcount_loc
section is used, the "ftraced" kernel thread is disabled.

This uses the current infrastructure to record the mcount call sites
as well as convert them to nops. The mcount function is kept as a stub
on boot up and not converted to the ftrace_record_ip function. We use the
ftrace_record_ip to only record from the table.

This patch does not handle modules. That comes with a later patch.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/ftrace.h |  10 +++
 include/linux/ftrace.h   |   6 ++
 init/main.c              |   3 +
 kernel/trace/ftrace.c    | 148 +++++++++++++++++++++++++++------------
 4 files changed, 124 insertions(+), 43 deletions(-)

diff --git a/include/asm-x86/ftrace.h b/include/asm-x86/ftrace.h
index be0e004ad148..1bb6f9bbe1ab 100644
--- a/include/asm-x86/ftrace.h
+++ b/include/asm-x86/ftrace.h
@@ -7,6 +7,16 @@
 
 #ifndef __ASSEMBLY__
 extern void mcount(void);
+
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+	/*
+	 * call mcount is "e8 <4 byte offset>"
+	 * The addr points to the 4 byte offset and the caller of this
+	 * function wants the pointer to e8. Simply subtract one.
+	 */
+	return addr - 1;
+}
 #endif
 
 #endif /* CONFIG_FTRACE */
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index bb384068272e..d4d6ab453b78 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -162,4 +162,10 @@ static inline void
 ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
 #endif
 
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+extern void ftrace_init(void);
+#else
+static inline void ftrace_init(void) { }
+#endif
+
 #endif /* _LINUX_FTRACE_H */
diff --git a/init/main.c b/init/main.c
index 3820323c4c84..ded1fae965ab 100644
--- a/init/main.c
+++ b/init/main.c
@@ -60,6 +60,7 @@
 #include <linux/sched.h>
 #include <linux/signal.h>
 #include <linux/idr.h>
+#include <linux/ftrace.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -687,6 +688,8 @@ asmlinkage void __init start_kernel(void)
 
 	acpi_early_init(); /* before LAPIC and SMP init */
 
+	ftrace_init();
+
 	/* Do the rest non-__init'ed, we're now alive */
 	rest_init();
 }
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f6e3af31b403..df96d5990c04 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -792,47 +792,7 @@ static int ftrace_update_code(void)
 	return 1;
 }
 
-static int ftraced(void *ignore)
-{
-	unsigned long usecs;
-
-	while (!kthread_should_stop()) {
-
-		set_current_state(TASK_INTERRUPTIBLE);
-
-		/* check once a second */
-		schedule_timeout(HZ);
-
-		if (unlikely(ftrace_disabled))
-			continue;
-
-		mutex_lock(&ftrace_sysctl_lock);
-		mutex_lock(&ftraced_lock);
-		if (!ftraced_suspend && !ftraced_stop &&
-		    ftrace_update_code()) {
-			usecs = nsecs_to_usecs(ftrace_update_time);
-			if (ftrace_update_tot_cnt > 100000) {
-				ftrace_update_tot_cnt = 0;
-				pr_info("hm, dftrace overflow: %lu change%s"
-					" (%lu total) in %lu usec%s\n",
-					ftrace_update_cnt,
-					ftrace_update_cnt != 1 ? "s" : "",
-					ftrace_update_tot_cnt,
-					usecs, usecs != 1 ? "s" : "");
-				ftrace_disabled = 1;
-				WARN_ON_ONCE(1);
-			}
-		}
-		mutex_unlock(&ftraced_lock);
-		mutex_unlock(&ftrace_sysctl_lock);
-
-		ftrace_shutdown_replenish();
-	}
-	__set_current_state(TASK_RUNNING);
-	return 0;
-}
-
-static int __init ftrace_dyn_table_alloc(void)
+static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
 {
 	struct ftrace_page *pg;
 	int cnt;
@@ -859,7 +819,9 @@ static int __init ftrace_dyn_table_alloc(void)
 
 	pg = ftrace_pages = ftrace_pages_start;
 
-	cnt = NR_TO_INIT / ENTRIES_PER_PAGE;
+	cnt = num_to_init / ENTRIES_PER_PAGE;
+	pr_info("ftrace: allocating %ld hash entries in %d pages\n",
+		num_to_init, cnt);
 
 	for (i = 0; i < cnt; i++) {
 		pg->next = (void *)get_zeroed_page(GFP_KERNEL);
@@ -1556,6 +1518,104 @@ static __init int ftrace_init_debugfs(void)
 
 fs_initcall(ftrace_init_debugfs);
 
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+static int ftrace_convert_nops(unsigned long *start,
+			       unsigned long *end)
+{
+	unsigned long *p;
+	unsigned long addr;
+	unsigned long flags;
+
+	p = start;
+	while (p < end) {
+		addr = ftrace_call_adjust(*p++);
+		ftrace_record_ip(addr);
+		ftrace_shutdown_replenish();
+	}
+
+	/* p is ignored */
+	local_irq_save(flags);
+	__ftrace_update_code(p);
+	local_irq_restore(flags);
+
+	return 0;
+}
+
+extern unsigned long __start_mcount_loc[];
+extern unsigned long __stop_mcount_loc[];
+
+void __init ftrace_init(void)
+{
+	unsigned long count, addr, flags;
+	int ret;
+
+	/* Keep the ftrace pointer to the stub */
+	addr = (unsigned long)ftrace_stub;
+
+	local_irq_save(flags);
+	ftrace_dyn_arch_init(&addr);
+	local_irq_restore(flags);
+
+	/* ftrace_dyn_arch_init places the return code in addr */
+	if (addr)
+		goto failed;
+
+	count = __stop_mcount_loc - __start_mcount_loc;
+
+	ret = ftrace_dyn_table_alloc(count);
+	if (ret)
+		goto failed;
+
+	last_ftrace_enabled = ftrace_enabled = 1;
+
+	ret = ftrace_convert_nops(__start_mcount_loc,
+				  __stop_mcount_loc);
+
+	return;
+ failed:
+	ftrace_disabled = 1;
+}
+#else /* CONFIG_FTRACE_MCOUNT_RECORD */
+static int ftraced(void *ignore)
+{
+	unsigned long usecs;
+
+	while (!kthread_should_stop()) {
+
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		/* check once a second */
+		schedule_timeout(HZ);
+
+		if (unlikely(ftrace_disabled))
+			continue;
+
+		mutex_lock(&ftrace_sysctl_lock);
+		mutex_lock(&ftraced_lock);
+		if (!ftraced_suspend && !ftraced_stop &&
+		    ftrace_update_code()) {
+			usecs = nsecs_to_usecs(ftrace_update_time);
+			if (ftrace_update_tot_cnt > 100000) {
+				ftrace_update_tot_cnt = 0;
+				pr_info("hm, dftrace overflow: %lu change%s"
+					" (%lu total) in %lu usec%s\n",
+					ftrace_update_cnt,
+					ftrace_update_cnt != 1 ? "s" : "",
+					ftrace_update_tot_cnt,
+					usecs, usecs != 1 ? "s" : "");
+				ftrace_disabled = 1;
+				WARN_ON_ONCE(1);
+			}
+		}
+		mutex_unlock(&ftraced_lock);
+		mutex_unlock(&ftrace_sysctl_lock);
+
+		ftrace_shutdown_replenish();
+	}
+	__set_current_state(TASK_RUNNING);
+	return 0;
+}
+
 static int __init ftrace_dynamic_init(void)
 {
 	struct task_struct *p;
@@ -1572,7 +1632,7 @@ static int __init ftrace_dynamic_init(void)
 		goto failed;
 	}
 
-	ret = ftrace_dyn_table_alloc();
+	ret = ftrace_dyn_table_alloc(NR_TO_INIT);
 	if (ret)
 		goto failed;
 
@@ -1593,6 +1653,8 @@ static int __init ftrace_dynamic_init(void)
 }
 
 core_initcall(ftrace_dynamic_init);
+#endif /* CONFIG_FTRACE_MCOUNT_RECORD */
+
 #else
 # define ftrace_startup()		do { } while (0)
 # define ftrace_shutdown()		do { } while (0)
-- 
GitLab


From 90d595fe5ca4b685465c068907e6e554760abea8 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Thu, 14 Aug 2008 15:45:09 -0400
Subject: [PATCH 233/895] ftrace: enable mcount recording for modules

This patch enables the loading of the __mcount_section of modules and
changing all the callers of mcount into nops.

The modification is done before the init_module function is called, so
again, we do not need to use kstop_machine to make these changes.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h |  3 +++
 kernel/module.c        | 11 +++++++++++
 kernel/trace/ftrace.c  |  5 +++++
 3 files changed, 19 insertions(+)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index d4d6ab453b78..4936489f9ed8 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -164,8 +164,11 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
 
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
 extern void ftrace_init(void);
+extern void ftrace_init_module(unsigned long *start, unsigned long *end);
 #else
 static inline void ftrace_init(void) { }
+static inline void
+ftrace_init_module(unsigned long *start, unsigned long *end) { }
 #endif
 
 #endif /* _LINUX_FTRACE_H */
diff --git a/kernel/module.c b/kernel/module.c
index 661d73db786e..d753fd9d83ec 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -47,6 +47,7 @@
 #include <linux/license.h>
 #include <asm/sections.h>
 #include <linux/tracepoint.h>
+#include <linux/ftrace.h>
 
 #if 0
 #define DEBUGP printk
@@ -1834,6 +1835,7 @@ static noinline struct module *load_module(void __user *umod,
 	unsigned int markersstringsindex;
 	unsigned int tracepointsindex;
 	unsigned int tracepointsstringsindex;
+	unsigned int mcountindex;
 	struct module *mod;
 	long err = 0;
 	void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
@@ -2124,6 +2126,9 @@ static noinline struct module *load_module(void __user *umod,
 	tracepointsstringsindex = find_sec(hdr, sechdrs, secstrings,
 					"__tracepoints_strings");
 
+	mcountindex = find_sec(hdr, sechdrs, secstrings,
+			       "__mcount_loc");
+
 	/* Now do relocations. */
 	for (i = 1; i < hdr->e_shnum; i++) {
 		const char *strtab = (char *)sechdrs[strindex].sh_addr;
@@ -2184,6 +2189,12 @@ static noinline struct module *load_module(void __user *umod,
 			mod->tracepoints + mod->num_tracepoints);
 #endif
 	}
+
+	if (mcountindex) {
+		void *mseg = (void *)sechdrs[mcountindex].sh_addr;
+		ftrace_init_module(mseg, mseg + sechdrs[mcountindex].sh_size);
+	}
+
 	err = module_finalize(hdr, sechdrs, mod);
 	if (err < 0)
 		goto cleanup;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index df96d5990c04..ea45bb1c0fd6 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1541,6 +1541,11 @@ static int ftrace_convert_nops(unsigned long *start,
 	return 0;
 }
 
+void ftrace_init_module(unsigned long *start, unsigned long *end)
+{
+	ftrace_convert_nops(start, end);
+}
+
 extern unsigned long __start_mcount_loc[];
 extern unsigned long __stop_mcount_loc[];
 
-- 
GitLab


From 29e71abf56cebc5c5a4e184a6eb4360cc58554ad Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Thu, 14 Aug 2008 15:45:10 -0400
Subject: [PATCH 234/895] ftrace: rebuild everything on change to
 FTRACE_MCOUNT_RECORD

When enabling or disabling CONFIG_FTRACE_MCOUNT_RECORD, we want a full
kernel compile to handle the adding of the __mcount_loc sections.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/kernel.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 75d81f157d2e..ecce4a4ccd5f 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -486,4 +486,9 @@ struct sysinfo {
 #define NUMA_BUILD 0
 #endif
 
+/* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+# define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD
+#endif
+
 #endif
-- 
GitLab


From e4b2b8866121bd06d5f3d9de0f79a04339ffa252 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Thu, 14 Aug 2008 15:45:11 -0400
Subject: [PATCH 235/895] ftrace: enable using mcount recording on x86

Enable the use of the __mcount_loc infrastructure on x86_64 and i386.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index fc8351f374fd..5a34c5427a07 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -25,6 +25,7 @@ config X86
 	select HAVE_KPROBES
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select HAVE_KRETPROBES
+	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FTRACE
 	select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
-- 
GitLab


From 0a37605c2261a06d8cafc62dee11374ad676c8c4 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Thu, 14 Aug 2008 15:45:12 -0400
Subject: [PATCH 236/895] ftrace: x86 mcount stub

x86 now sets up the mcount locations through the build and no longer
needs to record the ip when the function is executed. This patch changes
the initial mcount to simply return. There's no need to do any other work.
If the ftrace start up test fails, the original mcount will be what everything
will use, so having this as fast as possible is a good thing.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/entry_32.S | 14 --------------
 arch/x86/kernel/entry_64.S | 26 --------------------------
 arch/x86/kernel/ftrace.c   | 14 ++------------
 3 files changed, 2 insertions(+), 52 deletions(-)

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index b21fbfaffe39..4e4269c73bb7 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1153,20 +1153,6 @@ ENDPROC(xen_failsafe_callback)
 #ifdef CONFIG_DYNAMIC_FTRACE
 
 ENTRY(mcount)
-	pushl %eax
-	pushl %ecx
-	pushl %edx
-	movl 0xc(%esp), %eax
-	subl $MCOUNT_INSN_SIZE, %eax
-
-.globl mcount_call
-mcount_call:
-	call ftrace_stub
-
-	popl %edx
-	popl %ecx
-	popl %eax
-
 	ret
 END(mcount)
 
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 1db6ce4314e1..09e7145484c5 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -64,32 +64,6 @@
 #ifdef CONFIG_FTRACE
 #ifdef CONFIG_DYNAMIC_FTRACE
 ENTRY(mcount)
-
-	subq $0x38, %rsp
-	movq %rax, (%rsp)
-	movq %rcx, 8(%rsp)
-	movq %rdx, 16(%rsp)
-	movq %rsi, 24(%rsp)
-	movq %rdi, 32(%rsp)
-	movq %r8, 40(%rsp)
-	movq %r9, 48(%rsp)
-
-	movq 0x38(%rsp), %rdi
-	subq $MCOUNT_INSN_SIZE, %rdi
-
-.globl mcount_call
-mcount_call:
-	call ftrace_stub
-
-	movq 48(%rsp), %r9
-	movq 40(%rsp), %r8
-	movq 32(%rsp), %rdi
-	movq 24(%rsp), %rsi
-	movq 16(%rsp), %rdx
-	movq 8(%rsp), %rcx
-	movq (%rsp), %rax
-	addq $0x38, %rsp
-
 	retq
 END(mcount)
 
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index ab115cd15fdf..96aadbfedcc6 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -112,18 +112,8 @@ notrace int ftrace_update_ftrace_func(ftrace_func_t func)
 
 notrace int ftrace_mcount_set(unsigned long *data)
 {
-	unsigned long ip = (long)(&mcount_call);
-	unsigned long *addr = data;
-	unsigned char old[MCOUNT_INSN_SIZE], *new;
-
-	/*
-	 * Replace the mcount stub with a pointer to the
-	 * ip recorder function.
-	 */
-	memcpy(old, &mcount_call, MCOUNT_INSN_SIZE);
-	new = ftrace_call_replace(ip, *addr);
-	*addr = ftrace_modify_code(ip, old, new);
-
+	/* mcount is initialized as a nop */
+	*data = 0;
 	return 0;
 }
 
-- 
GitLab


From 732f3ca7d4ba3c1be8d051d52302ef441ee7748b Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Thu, 14 Aug 2008 18:05:05 -0400
Subject: [PATCH 237/895] ftrace: use only 5 byte nops for x86

Mathieu Desnoyers revealed a bug in the original code. The nop that is
used to relpace the mcount caller can be a two part nop. This runs the
risk where a process can be preempted after executing the first nop, but
before the second part of the nop.

The ftrace code calls kstop_machine to keep multiple CPUs from executing
code that is being modified, but it does not protect against a task preempting
in the middle of a two part nop.

If the above preemption happens and the tracer is enabled, after the
kstop_machine runs, all those nops will be calls to the trace function.
If the preempted process that was preempted between the two nops is executed
again, it will execute half of the call to the trace function, and this
might crash the system.

This patch instead uses what both the latest Intel and AMD spec suggests.
That is the P6_NOP5 sequence of "0x0f 0x1f 0x44 0x00 0x00".

Note, some older CPUs and QEMU might fault on this nop, so this nop
is executed with fault handling first. If it detects a fault, it will then
use the code "0x66 0x66 0x66 0x66 0x90". If that faults, it will then
default to a simple "jmp 1f; .byte 0x00 0x00 0x00; 1:". The jmp is
not optimal but will do if the first two can not be executed.

TODO: Examine the cpuid to determine the nop to use.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ftrace.c | 68 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 61 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 96aadbfedcc6..4151c91254e8 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -16,8 +16,8 @@
 #include <linux/init.h>
 #include <linux/list.h>
 
-#include <asm/alternative.h>
 #include <asm/ftrace.h>
+#include <asm/nops.h>
 
 
 /* Long is fine, even if it is only 4 bytes ;-) */
@@ -119,13 +119,67 @@ notrace int ftrace_mcount_set(unsigned long *data)
 
 int __init ftrace_dyn_arch_init(void *data)
 {
-	const unsigned char *const *noptable = find_nop_table();
-
-	/* This is running in kstop_machine */
-
-	ftrace_mcount_set(data);
+	extern const unsigned char ftrace_test_p6nop[];
+	extern const unsigned char ftrace_test_nop5[];
+	extern const unsigned char ftrace_test_jmp[];
+	int faulted = 0;
 
-	ftrace_nop = (unsigned long *)noptable[MCOUNT_INSN_SIZE];
+	/*
+	 * There is no good nop for all x86 archs.
+	 * We will default to using the P6_NOP5, but first we
+	 * will test to make sure that the nop will actually
+	 * work on this CPU. If it faults, we will then
+	 * go to a lesser efficient 5 byte nop. If that fails
+	 * we then just use a jmp as our nop. This isn't the most
+	 * efficient nop, but we can not use a multi part nop
+	 * since we would then risk being preempted in the middle
+	 * of that nop, and if we enabled tracing then, it might
+	 * cause a system crash.
+	 *
+	 * TODO: check the cpuid to determine the best nop.
+	 */
+	asm volatile (
+		"jmp ftrace_test_jmp\n"
+		/* This code needs to stay around */
+		".section .text, \"ax\"\n"
+		"ftrace_test_jmp:"
+		"jmp ftrace_test_p6nop\n"
+		".byte 0x00,0x00,0x00\n"  /* 2 byte jmp + 3 bytes */
+		"ftrace_test_p6nop:"
+		P6_NOP5
+		"jmp 1f\n"
+		"ftrace_test_nop5:"
+		".byte 0x66,0x66,0x66,0x66,0x90\n"
+		"jmp 1f\n"
+		".previous\n"
+		"1:"
+		".section .fixup, \"ax\"\n"
+		"2:	movl $1, %0\n"
+		"	jmp ftrace_test_nop5\n"
+		"3:	movl $2, %0\n"
+		"	jmp 1b\n"
+		".previous\n"
+		_ASM_EXTABLE(ftrace_test_p6nop, 2b)
+		_ASM_EXTABLE(ftrace_test_nop5, 3b)
+		: "=r"(faulted) : "0" (faulted));
+
+	switch (faulted) {
+	case 0:
+		pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n");
+		ftrace_nop = (unsigned long *)ftrace_test_p6nop;
+		break;
+	case 1:
+		pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n");
+		ftrace_nop = (unsigned long *)ftrace_test_nop5;
+		break;
+	case 2:
+		pr_info("ftrace: converting mcount calls to jmp 1f\n");
+		ftrace_nop = (unsigned long *)ftrace_test_jmp;
+		break;
+	}
+
+	/* The return code is retured via data */
+	*(unsigned long *)data = 0;
 
 	return 0;
 }
-- 
GitLab


From a9fdda33cd7c7519b082e37538fe790f9ff684bb Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Thu, 14 Aug 2008 22:47:17 -0400
Subject: [PATCH 238/895] ftrace: do not show freed records in
 available_filter_functions

Seems that freed records can appear in the available_filter_functions list.
This patch fixes that.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ftrace.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index ea45bb1c0fd6..8affb6d00ec1 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -872,15 +872,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 		}
 	} else {
 		rec = &iter->pg->records[iter->idx++];
-		if ((!(iter->flags & FTRACE_ITER_FAILURES) &&
+		if ((rec->flags & FTRACE_FL_FREE) ||
+
+		    (!(iter->flags & FTRACE_ITER_FAILURES) &&
 		     (rec->flags & FTRACE_FL_FAILED)) ||
 
 		    ((iter->flags & FTRACE_ITER_FAILURES) &&
-		     (!(rec->flags & FTRACE_FL_FAILED) ||
-		      (rec->flags & FTRACE_FL_FREE))) ||
-
-		    ((iter->flags & FTRACE_ITER_FILTER) &&
-		     !(rec->flags & FTRACE_FL_FILTER)) ||
+		     !(rec->flags & FTRACE_FL_FAILED)) ||
 
 		    ((iter->flags & FTRACE_ITER_NOTRACE) &&
 		     !(rec->flags & FTRACE_FL_NOTRACE))) {
-- 
GitLab


From 28614889bcb2558a47d02d52394b7fd9795a9547 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Thu, 14 Aug 2008 22:47:18 -0400
Subject: [PATCH 239/895] ftrace: move notrace to compiler.h

The notrace define belongs in compiler.h so that it can be used in
init.h

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/compiler.h | 2 ++
 include/linux/linkage.h  | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 8322141ee480..98115d9d04da 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -44,6 +44,8 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 # error Sorry, your compiler is too old/not recognized.
 #endif
 
+#define notrace __attribute__((no_instrument_function))
+
 /* Intel compiler defines __GNUC__. So we will overwrite implementations
  * coming from above header files here
  */
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index 56ba37394656..9fd1f859021b 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -4,8 +4,6 @@
 #include <linux/compiler.h>
 #include <asm/linkage.h>
 
-#define notrace __attribute__((no_instrument_function))
-
 #ifdef __cplusplus
 #define CPP_ASMLINKAGE extern "C"
 #else
-- 
GitLab


From fed1939c64d2288938fdc1c367d49082da65e195 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Thu, 14 Aug 2008 22:47:19 -0400
Subject: [PATCH 240/895] ftrace: remove old pointers to mcount

When a mcount pointer is recorded into a table, it is used to add or
remove calls to mcount (replacing them with nops). If the code is removed
via removing a module, the pointers still exist.  At modifying the code
a check is always made to make sure the code being replaced is the code
expected. In-other-words, the code being replaced is compared to what
it is expected to be before being replaced.

There is a very small chance that the code being replaced just happens
to look like code that calls mcount (very small since the call to mcount
is relative). To remove this chance, this patch adds ftrace_release to
allow module unloading to remove the pointers to mcount within the module.

Another change for init calls is made to not trace calls marked with
__init. The tracing can not be started until after init is done anyway.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h |  2 ++
 include/linux/init.h   |  2 +-
 kernel/module.c        | 12 ++++++++----
 kernel/trace/ftrace.c  | 32 ++++++++++++++++++++++++++++++--
 4 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 4936489f9ed8..6b232a2460c0 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -165,10 +165,12 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
 extern void ftrace_init(void);
 extern void ftrace_init_module(unsigned long *start, unsigned long *end);
+extern void ftrace_release(void *start, unsigned long size);
 #else
 static inline void ftrace_init(void) { }
 static inline void
 ftrace_init_module(unsigned long *start, unsigned long *end) { }
+static inline void ftrace_release(void *start, unsigned long size) { }
 #endif
 
 #endif /* _LINUX_FTRACE_H */
diff --git a/include/linux/init.h b/include/linux/init.h
index 93538b696e3d..27f61f6b3cb9 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -40,7 +40,7 @@
 
 /* These are for everybody (although not all archs will actually
    discard it in modules) */
-#define __init		__section(.init.text) __cold
+#define __init		__section(.init.text) __cold notrace
 #define __initdata	__section(.init.data)
 #define __initconst	__section(.init.rodata)
 #define __exitdata	__section(.exit.data)
diff --git a/kernel/module.c b/kernel/module.c
index d753fd9d83ec..7576c2d9462f 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1431,6 +1431,9 @@ static void free_module(struct module *mod)
 	/* Module unload stuff */
 	module_unload_free(mod);
 
+	/* release any pointers to mcount in this module */
+	ftrace_release(mod->module_core, mod->core_size);
+
 	/* This may be NULL, but that's OK */
 	module_free(mod, mod->module_init);
 	kfree(mod->args);
@@ -1839,6 +1842,7 @@ static noinline struct module *load_module(void __user *umod,
 	struct module *mod;
 	long err = 0;
 	void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
+	void *mseg;
 	struct exception_table_entry *extable;
 	mm_segment_t old_fs;
 
@@ -2190,10 +2194,9 @@ static noinline struct module *load_module(void __user *umod,
 #endif
 	}
 
-	if (mcountindex) {
-		void *mseg = (void *)sechdrs[mcountindex].sh_addr;
-		ftrace_init_module(mseg, mseg + sechdrs[mcountindex].sh_size);
-	}
+	/* sechdrs[0].sh_size is always zero */
+	mseg = (void *)sechdrs[mcountindex].sh_addr;
+	ftrace_init_module(mseg, mseg + sechdrs[mcountindex].sh_size);
 
 	err = module_finalize(hdr, sechdrs, mod);
 	if (err < 0)
@@ -2264,6 +2267,7 @@ static noinline struct module *load_module(void __user *umod,
  cleanup:
 	kobject_del(&mod->mkobj.kobj);
 	kobject_put(&mod->mkobj.kobj);
+	ftrace_release(mod->module_core, mod->core_size);
  free_unload:
 	module_unload_free(mod);
 	module_free(mod, mod->module_init);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 8affb6d00ec1..eadd0eaea9b6 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -294,13 +294,37 @@ static inline void ftrace_del_hash(struct dyn_ftrace *node)
 
 static void ftrace_free_rec(struct dyn_ftrace *rec)
 {
-	/* no locking, only called from kstop_machine */
-
 	rec->ip = (unsigned long)ftrace_free_records;
 	ftrace_free_records = rec;
 	rec->flags |= FTRACE_FL_FREE;
 }
 
+void ftrace_release(void *start, unsigned long size)
+{
+	struct dyn_ftrace *rec;
+	struct ftrace_page *pg;
+	unsigned long s = (unsigned long)start;
+	unsigned long e = s + size;
+	int i;
+
+	if (!start)
+		return;
+
+	/* No interrupt should call this */
+	spin_lock(&ftrace_lock);
+
+	for (pg = ftrace_pages_start; pg; pg = pg->next) {
+		for (i = 0; i < pg->index; i++) {
+			rec = &pg->records[i];
+
+			if ((rec->ip >= s) && (rec->ip < e))
+				ftrace_free_rec(rec);
+		}
+	}
+	spin_unlock(&ftrace_lock);
+
+}
+
 static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
 {
 	struct dyn_ftrace *rec;
@@ -1527,7 +1551,9 @@ static int ftrace_convert_nops(unsigned long *start,
 	p = start;
 	while (p < end) {
 		addr = ftrace_call_adjust(*p++);
+		spin_lock(&ftrace_lock);
 		ftrace_record_ip(addr);
+		spin_unlock(&ftrace_lock);
 		ftrace_shutdown_replenish();
 	}
 
@@ -1541,6 +1567,8 @@ static int ftrace_convert_nops(unsigned long *start,
 
 void ftrace_init_module(unsigned long *start, unsigned long *end)
 {
+	if (start == end)
+		return;
 	ftrace_convert_nops(start, end);
 }
 
-- 
GitLab


From 2e2ca155cd2213b4f398031180fb3d399d5b7db9 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Fri, 1 Aug 2008 12:26:40 -0400
Subject: [PATCH 241/895] ftrace: new continue entry - separate out from
 trace_entry

Some tracers will need to work with more than one entry. In order to do this
the trace_entry structure was split into two fields. One for the start of
all entries, and one to continue an existing entry.

The trace_entry structure now has a "field" entry that consists of the previous
content of the trace_entry, and a "cont" entry that is just a string buffer
the size of the "field" entry.

Thanks to Andrew Morton for suggesting this idea.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c           | 267 +++++++++++++++++----------------
 kernel/trace/trace.h           |  17 ++-
 kernel/trace/trace_mmiotrace.c |  12 +-
 3 files changed, 160 insertions(+), 136 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8f3fb3db61c3..76dfe6d2466c 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -817,10 +817,11 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
 
 	pc = preempt_count();
 
-	entry->preempt_count	= pc & 0xff;
-	entry->pid		= (tsk) ? tsk->pid : 0;
-	entry->t		= ftrace_now(raw_smp_processor_id());
-	entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
+	entry->field.preempt_count	= pc & 0xff;
+	entry->field.pid		= (tsk) ? tsk->pid : 0;
+	entry->field.t			= ftrace_now(raw_smp_processor_id());
+	entry->field.flags =
+		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
 		((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
 		(need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
@@ -835,11 +836,11 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data,
 
 	raw_local_irq_save(irq_flags);
 	__raw_spin_lock(&data->lock);
-	entry			= tracing_get_trace_entry(tr, data);
+	entry				= tracing_get_trace_entry(tr, data);
 	tracing_generic_entry_update(entry, flags);
-	entry->type		= TRACE_FN;
-	entry->fn.ip		= ip;
-	entry->fn.parent_ip	= parent_ip;
+	entry->type			= TRACE_FN;
+	entry->field.fn.ip		= ip;
+	entry->field.fn.parent_ip	= parent_ip;
 	__raw_spin_unlock(&data->lock);
 	raw_local_irq_restore(irq_flags);
 }
@@ -862,10 +863,10 @@ void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_array_cpu *data,
 	raw_local_irq_save(irq_flags);
 	__raw_spin_lock(&data->lock);
 
-	entry			= tracing_get_trace_entry(tr, data);
+	entry				= tracing_get_trace_entry(tr, data);
 	tracing_generic_entry_update(entry, 0);
-	entry->type		= TRACE_MMIO_RW;
-	entry->mmiorw		= *rw;
+	entry->type			= TRACE_MMIO_RW;
+	entry->field.mmiorw		= *rw;
 
 	__raw_spin_unlock(&data->lock);
 	raw_local_irq_restore(irq_flags);
@@ -882,10 +883,10 @@ void __trace_mmiotrace_map(struct trace_array *tr, struct trace_array_cpu *data,
 	raw_local_irq_save(irq_flags);
 	__raw_spin_lock(&data->lock);
 
-	entry			= tracing_get_trace_entry(tr, data);
+	entry				= tracing_get_trace_entry(tr, data);
 	tracing_generic_entry_update(entry, 0);
-	entry->type		= TRACE_MMIO_MAP;
-	entry->mmiomap		= *map;
+	entry->type			= TRACE_MMIO_MAP;
+	entry->field.mmiomap		= *map;
 
 	__raw_spin_unlock(&data->lock);
 	raw_local_irq_restore(irq_flags);
@@ -909,12 +910,12 @@ void __trace_stack(struct trace_array *tr,
 	tracing_generic_entry_update(entry, flags);
 	entry->type		= TRACE_STACK;
 
-	memset(&entry->stack, 0, sizeof(entry->stack));
+	memset(&entry->field.stack, 0, sizeof(entry->field.stack));
 
 	trace.nr_entries	= 0;
 	trace.max_entries	= FTRACE_STACK_ENTRIES;
 	trace.skip		= skip;
-	trace.entries		= entry->stack.caller;
+	trace.entries		= entry->field.stack.caller;
 
 	save_stack_trace(&trace);
 }
@@ -930,12 +931,12 @@ __trace_special(void *__tr, void *__data,
 
 	raw_local_irq_save(irq_flags);
 	__raw_spin_lock(&data->lock);
-	entry			= tracing_get_trace_entry(tr, data);
+	entry				= tracing_get_trace_entry(tr, data);
 	tracing_generic_entry_update(entry, 0);
-	entry->type		= TRACE_SPECIAL;
-	entry->special.arg1	= arg1;
-	entry->special.arg2	= arg2;
-	entry->special.arg3	= arg3;
+	entry->type			= TRACE_SPECIAL;
+	entry->field.special.arg1	= arg1;
+	entry->field.special.arg2	= arg2;
+	entry->field.special.arg3	= arg3;
 	__trace_stack(tr, data, irq_flags, 4);
 	__raw_spin_unlock(&data->lock);
 	raw_local_irq_restore(irq_flags);
@@ -955,15 +956,15 @@ tracing_sched_switch_trace(struct trace_array *tr,
 
 	raw_local_irq_save(irq_flags);
 	__raw_spin_lock(&data->lock);
-	entry			= tracing_get_trace_entry(tr, data);
+	entry				= tracing_get_trace_entry(tr, data);
 	tracing_generic_entry_update(entry, flags);
-	entry->type		= TRACE_CTX;
-	entry->ctx.prev_pid	= prev->pid;
-	entry->ctx.prev_prio	= prev->prio;
-	entry->ctx.prev_state	= prev->state;
-	entry->ctx.next_pid	= next->pid;
-	entry->ctx.next_prio	= next->prio;
-	entry->ctx.next_state	= next->state;
+	entry->type			= TRACE_CTX;
+	entry->field.ctx.prev_pid	= prev->pid;
+	entry->field.ctx.prev_prio	= prev->prio;
+	entry->field.ctx.prev_state	= prev->state;
+	entry->field.ctx.next_pid	= next->pid;
+	entry->field.ctx.next_prio	= next->prio;
+	entry->field.ctx.next_state	= next->state;
 	__trace_stack(tr, data, flags, 5);
 	__raw_spin_unlock(&data->lock);
 	raw_local_irq_restore(irq_flags);
@@ -984,12 +985,12 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
 	entry			= tracing_get_trace_entry(tr, data);
 	tracing_generic_entry_update(entry, flags);
 	entry->type		= TRACE_WAKE;
-	entry->ctx.prev_pid	= curr->pid;
-	entry->ctx.prev_prio	= curr->prio;
-	entry->ctx.prev_state	= curr->state;
-	entry->ctx.next_pid	= wakee->pid;
-	entry->ctx.next_prio	= wakee->prio;
-	entry->ctx.next_state	= wakee->state;
+	entry->field.ctx.prev_pid	= curr->pid;
+	entry->field.ctx.prev_prio	= curr->prio;
+	entry->field.ctx.prev_state	= curr->state;
+	entry->field.ctx.next_pid	= wakee->pid;
+	entry->field.ctx.next_prio	= wakee->prio;
+	entry->field.ctx.next_state	= wakee->state;
 	__trace_stack(tr, data, flags, 6);
 	__raw_spin_unlock(&data->lock);
 	raw_local_irq_restore(irq_flags);
@@ -1118,7 +1119,7 @@ find_next_entry(struct trace_iterator *iter, int *ent_cpu)
 		/*
 		 * Pick the entry with the smallest timestamp:
 		 */
-		if (ent && (!next || ent->t < next->t)) {
+		if (ent && (!next || ent->field.t < next->field.t)) {
 			next = ent;
 			next_cpu = cpu;
 		}
@@ -1422,19 +1423,20 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
 static void
 lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
 {
+	struct trace_field *field = &entry->field;
 	int hardirq, softirq;
 	char *comm;
 
-	comm = trace_find_cmdline(entry->pid);
+	comm = trace_find_cmdline(field->pid);
 
-	trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
+	trace_seq_printf(s, "%8.8s-%-5d ", comm, field->pid);
 	trace_seq_printf(s, "%d", cpu);
 	trace_seq_printf(s, "%c%c",
-			(entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.',
-			((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
+			(field->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.',
+			((field->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
 
-	hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
-	softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
+	hardirq = field->flags & TRACE_FLAG_HARDIRQ;
+	softirq = field->flags & TRACE_FLAG_SOFTIRQ;
 	if (hardirq && softirq) {
 		trace_seq_putc(s, 'H');
 	} else {
@@ -1448,8 +1450,8 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
 		}
 	}
 
-	if (entry->preempt_count)
-		trace_seq_printf(s, "%x", entry->preempt_count);
+	if (field->preempt_count)
+		trace_seq_printf(s, "%x", field->preempt_count);
 	else
 		trace_seq_puts(s, ".");
 }
@@ -1479,6 +1481,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 	struct trace_entry *next_entry = find_next_entry(iter, NULL);
 	unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
 	struct trace_entry *entry = iter->ent;
+	struct trace_field *field = &entry->field;
 	unsigned long abs_usecs;
 	unsigned long rel_usecs;
 	char *comm;
@@ -1488,17 +1491,17 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 
 	if (!next_entry)
 		next_entry = entry;
-	rel_usecs = ns2usecs(next_entry->t - entry->t);
-	abs_usecs = ns2usecs(entry->t - iter->tr->time_start);
+	rel_usecs = ns2usecs(next_entry->field.t - entry->field.t);
+	abs_usecs = ns2usecs(entry->field.t - iter->tr->time_start);
 
 	if (verbose) {
-		comm = trace_find_cmdline(entry->pid);
+		comm = trace_find_cmdline(field->pid);
 		trace_seq_printf(s, "%16s %5d %d %d %08x %08x [%08lx]"
 				 " %ld.%03ldms (+%ld.%03ldms): ",
 				 comm,
-				 entry->pid, cpu, entry->flags,
-				 entry->preempt_count, trace_idx,
-				 ns2usecs(entry->t),
+				 field->pid, cpu, field->flags,
+				 field->preempt_count, trace_idx,
+				 ns2usecs(field->t),
 				 abs_usecs/1000,
 				 abs_usecs % 1000, rel_usecs/1000,
 				 rel_usecs % 1000);
@@ -1508,41 +1511,42 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 	}
 	switch (entry->type) {
 	case TRACE_FN:
-		seq_print_ip_sym(s, entry->fn.ip, sym_flags);
+		seq_print_ip_sym(s, field->fn.ip, sym_flags);
 		trace_seq_puts(s, " (");
-		if (kretprobed(entry->fn.parent_ip))
+		if (kretprobed(field->fn.parent_ip))
 			trace_seq_puts(s, KRETPROBE_MSG);
 		else
-			seq_print_ip_sym(s, entry->fn.parent_ip, sym_flags);
+			seq_print_ip_sym(s, field->fn.parent_ip, sym_flags);
 		trace_seq_puts(s, ")\n");
 		break;
 	case TRACE_CTX:
 	case TRACE_WAKE:
-		T = entry->ctx.next_state < sizeof(state_to_char) ?
-			state_to_char[entry->ctx.next_state] : 'X';
+		T = field->ctx.next_state < sizeof(state_to_char) ?
+			state_to_char[field->ctx.next_state] : 'X';
 
-		state = entry->ctx.prev_state ? __ffs(entry->ctx.prev_state) + 1 : 0;
+		state = field->ctx.prev_state ?
+			__ffs(field->ctx.prev_state) + 1 : 0;
 		S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X';
-		comm = trace_find_cmdline(entry->ctx.next_pid);
+		comm = trace_find_cmdline(field->ctx.next_pid);
 		trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c %s\n",
-				 entry->ctx.prev_pid,
-				 entry->ctx.prev_prio,
+				 field->ctx.prev_pid,
+				 field->ctx.prev_prio,
 				 S, entry->type == TRACE_CTX ? "==>" : "  +",
-				 entry->ctx.next_pid,
-				 entry->ctx.next_prio,
+				 field->ctx.next_pid,
+				 field->ctx.next_prio,
 				 T, comm);
 		break;
 	case TRACE_SPECIAL:
 		trace_seq_printf(s, "# %ld %ld %ld\n",
-				 entry->special.arg1,
-				 entry->special.arg2,
-				 entry->special.arg3);
+				 field->special.arg1,
+				 field->special.arg2,
+				 field->special.arg3);
 		break;
 	case TRACE_STACK:
 		for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
 			if (i)
 				trace_seq_puts(s, " <= ");
-			seq_print_ip_sym(s, entry->stack.caller[i], sym_flags);
+			seq_print_ip_sym(s, field->stack.caller[i], sym_flags);
 		}
 		trace_seq_puts(s, "\n");
 		break;
@@ -1557,6 +1561,7 @@ static int print_trace_fmt(struct trace_iterator *iter)
 	struct trace_seq *s = &iter->seq;
 	unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
 	struct trace_entry *entry;
+	struct trace_field *field;
 	unsigned long usec_rem;
 	unsigned long long t;
 	unsigned long secs;
@@ -1566,14 +1571,15 @@ static int print_trace_fmt(struct trace_iterator *iter)
 	int i;
 
 	entry = iter->ent;
+	field = &entry->field;
 
-	comm = trace_find_cmdline(iter->ent->pid);
+	comm = trace_find_cmdline(iter->ent->field.pid);
 
-	t = ns2usecs(entry->t);
+	t = ns2usecs(field->t);
 	usec_rem = do_div(t, 1000000ULL);
 	secs = (unsigned long)t;
 
-	ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
+	ret = trace_seq_printf(s, "%16s-%-5d ", comm, field->pid);
 	if (!ret)
 		return 0;
 	ret = trace_seq_printf(s, "[%02d] ", iter->cpu);
@@ -1585,18 +1591,19 @@ static int print_trace_fmt(struct trace_iterator *iter)
 
 	switch (entry->type) {
 	case TRACE_FN:
-		ret = seq_print_ip_sym(s, entry->fn.ip, sym_flags);
+		ret = seq_print_ip_sym(s, field->fn.ip, sym_flags);
 		if (!ret)
 			return 0;
 		if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
-						entry->fn.parent_ip) {
+						field->fn.parent_ip) {
 			ret = trace_seq_printf(s, " <-");
 			if (!ret)
 				return 0;
-			if (kretprobed(entry->fn.parent_ip))
+			if (kretprobed(field->fn.parent_ip))
 				ret = trace_seq_puts(s, KRETPROBE_MSG);
 			else
-				ret = seq_print_ip_sym(s, entry->fn.parent_ip,
+				ret = seq_print_ip_sym(s,
+						       field->fn.parent_ip,
 						       sym_flags);
 			if (!ret)
 				return 0;
@@ -1607,26 +1614,26 @@ static int print_trace_fmt(struct trace_iterator *iter)
 		break;
 	case TRACE_CTX:
 	case TRACE_WAKE:
-		S = entry->ctx.prev_state < sizeof(state_to_char) ?
-			state_to_char[entry->ctx.prev_state] : 'X';
-		T = entry->ctx.next_state < sizeof(state_to_char) ?
-			state_to_char[entry->ctx.next_state] : 'X';
+		S = field->ctx.prev_state < sizeof(state_to_char) ?
+			state_to_char[field->ctx.prev_state] : 'X';
+		T = field->ctx.next_state < sizeof(state_to_char) ?
+			state_to_char[field->ctx.next_state] : 'X';
 		ret = trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c\n",
-				       entry->ctx.prev_pid,
-				       entry->ctx.prev_prio,
+				       field->ctx.prev_pid,
+				       field->ctx.prev_prio,
 				       S,
 				       entry->type == TRACE_CTX ? "==>" : "  +",
-				       entry->ctx.next_pid,
-				       entry->ctx.next_prio,
+				       field->ctx.next_pid,
+				       field->ctx.next_prio,
 				       T);
 		if (!ret)
 			return 0;
 		break;
 	case TRACE_SPECIAL:
 		ret = trace_seq_printf(s, "# %ld %ld %ld\n",
-				 entry->special.arg1,
-				 entry->special.arg2,
-				 entry->special.arg3);
+				 field->special.arg1,
+				 field->special.arg2,
+				 field->special.arg3);
 		if (!ret)
 			return 0;
 		break;
@@ -1637,7 +1644,7 @@ static int print_trace_fmt(struct trace_iterator *iter)
 				if (!ret)
 					return 0;
 			}
-			ret = seq_print_ip_sym(s, entry->stack.caller[i],
+			ret = seq_print_ip_sym(s, field->stack.caller[i],
 					       sym_flags);
 			if (!ret)
 				return 0;
@@ -1654,37 +1661,40 @@ static int print_raw_fmt(struct trace_iterator *iter)
 {
 	struct trace_seq *s = &iter->seq;
 	struct trace_entry *entry;
+	struct trace_field *field;
 	int ret;
 	int S, T;
 
 	entry = iter->ent;
+	field = &entry->field;
 
 	ret = trace_seq_printf(s, "%d %d %llu ",
-		entry->pid, iter->cpu, entry->t);
+		field->pid, iter->cpu, field->t);
 	if (!ret)
 		return 0;
 
 	switch (entry->type) {
 	case TRACE_FN:
 		ret = trace_seq_printf(s, "%x %x\n",
-					entry->fn.ip, entry->fn.parent_ip);
+					field->fn.ip,
+					field->fn.parent_ip);
 		if (!ret)
 			return 0;
 		break;
 	case TRACE_CTX:
 	case TRACE_WAKE:
-		S = entry->ctx.prev_state < sizeof(state_to_char) ?
-			state_to_char[entry->ctx.prev_state] : 'X';
-		T = entry->ctx.next_state < sizeof(state_to_char) ?
-			state_to_char[entry->ctx.next_state] : 'X';
+		S = field->ctx.prev_state < sizeof(state_to_char) ?
+			state_to_char[field->ctx.prev_state] : 'X';
+		T = field->ctx.next_state < sizeof(state_to_char) ?
+			state_to_char[field->ctx.next_state] : 'X';
 		if (entry->type == TRACE_WAKE)
 			S = '+';
 		ret = trace_seq_printf(s, "%d %d %c %d %d %c\n",
-				       entry->ctx.prev_pid,
-				       entry->ctx.prev_prio,
+				       field->ctx.prev_pid,
+				       field->ctx.prev_prio,
 				       S,
-				       entry->ctx.next_pid,
-				       entry->ctx.next_prio,
+				       field->ctx.next_pid,
+				       field->ctx.next_prio,
 				       T);
 		if (!ret)
 			return 0;
@@ -1692,9 +1702,9 @@ static int print_raw_fmt(struct trace_iterator *iter)
 	case TRACE_SPECIAL:
 	case TRACE_STACK:
 		ret = trace_seq_printf(s, "# %ld %ld %ld\n",
-				 entry->special.arg1,
-				 entry->special.arg2,
-				 entry->special.arg3);
+				 field->special.arg1,
+				 field->special.arg2,
+				 field->special.arg3);
 		if (!ret)
 			return 0;
 		break;
@@ -1719,40 +1729,41 @@ static int print_hex_fmt(struct trace_iterator *iter)
 	struct trace_seq *s = &iter->seq;
 	unsigned char newline = '\n';
 	struct trace_entry *entry;
+	struct trace_field *field;
 	int S, T;
 
 	entry = iter->ent;
+	field = &entry->field;
 
-	SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
+	SEQ_PUT_HEX_FIELD_RET(s, field->pid);
 	SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
-	SEQ_PUT_HEX_FIELD_RET(s, entry->t);
+	SEQ_PUT_HEX_FIELD_RET(s, field->t);
 
 	switch (entry->type) {
 	case TRACE_FN:
-		SEQ_PUT_HEX_FIELD_RET(s, entry->fn.ip);
-		SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip);
+		SEQ_PUT_HEX_FIELD_RET(s, field->fn.ip);
+		SEQ_PUT_HEX_FIELD_RET(s, field->fn.parent_ip);
 		break;
 	case TRACE_CTX:
 	case TRACE_WAKE:
-		S = entry->ctx.prev_state < sizeof(state_to_char) ?
-			state_to_char[entry->ctx.prev_state] : 'X';
-		T = entry->ctx.next_state < sizeof(state_to_char) ?
-			state_to_char[entry->ctx.next_state] : 'X';
+		S = field->ctx.prev_state < sizeof(state_to_char) ?
+			state_to_char[field->ctx.prev_state] : 'X';
+		T = field->ctx.next_state < sizeof(state_to_char) ?
+			state_to_char[field->ctx.next_state] : 'X';
 		if (entry->type == TRACE_WAKE)
 			S = '+';
-		SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_pid);
-		SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_prio);
+		SEQ_PUT_HEX_FIELD_RET(s, field->ctx.prev_pid);
+		SEQ_PUT_HEX_FIELD_RET(s, field->ctx.prev_prio);
 		SEQ_PUT_HEX_FIELD_RET(s, S);
-		SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_pid);
-		SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_prio);
-		SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip);
+		SEQ_PUT_HEX_FIELD_RET(s, field->ctx.next_pid);
+		SEQ_PUT_HEX_FIELD_RET(s, field->ctx.next_prio);
 		SEQ_PUT_HEX_FIELD_RET(s, T);
 		break;
 	case TRACE_SPECIAL:
 	case TRACE_STACK:
-		SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg1);
-		SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg2);
-		SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg3);
+		SEQ_PUT_HEX_FIELD_RET(s, field->special.arg1);
+		SEQ_PUT_HEX_FIELD_RET(s, field->special.arg2);
+		SEQ_PUT_HEX_FIELD_RET(s, field->special.arg3);
 		break;
 	}
 	SEQ_PUT_FIELD_RET(s, newline);
@@ -1764,31 +1775,33 @@ static int print_bin_fmt(struct trace_iterator *iter)
 {
 	struct trace_seq *s = &iter->seq;
 	struct trace_entry *entry;
+	struct trace_field *field;
 
 	entry = iter->ent;
+	field = &entry->field;
 
-	SEQ_PUT_FIELD_RET(s, entry->pid);
-	SEQ_PUT_FIELD_RET(s, entry->cpu);
-	SEQ_PUT_FIELD_RET(s, entry->t);
+	SEQ_PUT_FIELD_RET(s, field->pid);
+	SEQ_PUT_FIELD_RET(s, field->cpu);
+	SEQ_PUT_FIELD_RET(s, field->t);
 
 	switch (entry->type) {
 	case TRACE_FN:
-		SEQ_PUT_FIELD_RET(s, entry->fn.ip);
-		SEQ_PUT_FIELD_RET(s, entry->fn.parent_ip);
+		SEQ_PUT_FIELD_RET(s, field->fn.ip);
+		SEQ_PUT_FIELD_RET(s, field->fn.parent_ip);
 		break;
 	case TRACE_CTX:
-		SEQ_PUT_FIELD_RET(s, entry->ctx.prev_pid);
-		SEQ_PUT_FIELD_RET(s, entry->ctx.prev_prio);
-		SEQ_PUT_FIELD_RET(s, entry->ctx.prev_state);
-		SEQ_PUT_FIELD_RET(s, entry->ctx.next_pid);
-		SEQ_PUT_FIELD_RET(s, entry->ctx.next_prio);
-		SEQ_PUT_FIELD_RET(s, entry->ctx.next_state);
+		SEQ_PUT_FIELD_RET(s, field->ctx.prev_pid);
+		SEQ_PUT_FIELD_RET(s, field->ctx.prev_prio);
+		SEQ_PUT_FIELD_RET(s, field->ctx.prev_state);
+		SEQ_PUT_FIELD_RET(s, field->ctx.next_pid);
+		SEQ_PUT_FIELD_RET(s, field->ctx.next_prio);
+		SEQ_PUT_FIELD_RET(s, field->ctx.next_state);
 		break;
 	case TRACE_SPECIAL:
 	case TRACE_STACK:
-		SEQ_PUT_FIELD_RET(s, entry->special.arg1);
-		SEQ_PUT_FIELD_RET(s, entry->special.arg2);
-		SEQ_PUT_FIELD_RET(s, entry->special.arg3);
+		SEQ_PUT_FIELD_RET(s, field->special.arg1);
+		SEQ_PUT_FIELD_RET(s, field->special.arg2);
+		SEQ_PUT_FIELD_RET(s, field->special.arg3);
 		break;
 	}
 	return 1;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f69f86788c2b..6ddd6a6556cf 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -61,13 +61,12 @@ struct stack_entry {
 };
 
 /*
- * The trace entry - the most basic unit of tracing. This is what
+ * The trace field - the most basic unit of tracing. This is what
  * is printed in the end as a single line in the trace output, such as:
  *
  *     bash-15816 [01]   235.197585: idle_cpu <- irq_enter
  */
-struct trace_entry {
-	char			type;
+struct trace_field {
 	char			cpu;
 	char			flags;
 	char			preempt_count;
@@ -83,6 +82,18 @@ struct trace_entry {
 	};
 };
 
+struct trace_field_cont {
+	char				buf[sizeof(struct trace_field)];
+};
+
+struct trace_entry {
+	char 				type;
+	union {
+		struct trace_field	field;
+		struct trace_field_cont	cont;
+	};
+};
+
 #define TRACE_ENTRY_SIZE	sizeof(struct trace_entry)
 
 /*
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index b13dc19dcbb4..9b7a936f4b1f 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -174,14 +174,14 @@ print_out:
 static int mmio_print_rw(struct trace_iterator *iter)
 {
 	struct trace_entry *entry = iter->ent;
-	struct mmiotrace_rw *rw	= &entry->mmiorw;
+	struct mmiotrace_rw *rw	= &entry->field.mmiorw;
 	struct trace_seq *s	= &iter->seq;
-	unsigned long long t	= ns2usecs(entry->t);
+	unsigned long long t	= ns2usecs(entry->field.t);
 	unsigned long usec_rem	= do_div(t, 1000000ULL);
 	unsigned secs		= (unsigned long)t;
 	int ret = 1;
 
-	switch (entry->mmiorw.opcode) {
+	switch (entry->field.mmiorw.opcode) {
 	case MMIO_READ:
 		ret = trace_seq_printf(s,
 			"R %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
@@ -216,14 +216,14 @@ static int mmio_print_rw(struct trace_iterator *iter)
 static int mmio_print_map(struct trace_iterator *iter)
 {
 	struct trace_entry *entry = iter->ent;
-	struct mmiotrace_map *m	= &entry->mmiomap;
+	struct mmiotrace_map *m	= &entry->field.mmiomap;
 	struct trace_seq *s	= &iter->seq;
-	unsigned long long t	= ns2usecs(entry->t);
+	unsigned long long t	= ns2usecs(entry->field.t);
 	unsigned long usec_rem	= do_div(t, 1000000ULL);
 	unsigned secs		= (unsigned long)t;
 	int ret = 1;
 
-	switch (entry->mmiorw.opcode) {
+	switch (entry->field.mmiorw.opcode) {
 	case MMIO_PROBE:
 		ret = trace_seq_printf(s,
 			"MAP %lu.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n",
-- 
GitLab


From dd0e545f061f90099a3dcc13aa77e29c6295cf23 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Fri, 1 Aug 2008 12:26:41 -0400
Subject: [PATCH 242/895] ftrace: printk formatting infrastructure

This patch adds a feature that can help kernel developers debug their
code using ftrace.

  int ftrace_printk(const char *fmt, ...);

This records into the ftrace buffer using printf formatting. The entry
size in the buffers are still a fixed length. A new type has been added
that allows for more entries to be used for a single recording.

The start of the print is still the same as the other entries.

It returns the number of characters written to the ftrace buffer.

For example:

Having a module with the following code:

static int __init ftrace_print_test(void)
{
        ftrace_printk("jiffies are %ld\n", jiffies);
        return 0;
}

Gives me:

  insmod-5441  3...1 7569us : ftrace_print_test: jiffies are 4296626666

for the latency_trace file and:

          insmod-5441  [03]  1959.370498: ftrace_print_test jiffies are 4296626666

for the trace file.

Note: Only the infrastructure should go into the kernel. It is to help
facilitate debugging for other kernel developers. Calls to ftrace_printk
is not intended to be left in the kernel, and should be frowned upon just
like scattering printks around in the code.

But having this easily at your fingertips helps the debugging go faster
and bugs be solved quicker.

Maybe later on, we can hook this with markers and have their printf format
be sucked into ftrace output.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h        |  10 ++
 kernel/trace/trace.c          | 273 ++++++++++++++++++++++++++++++----
 kernel/trace/trace.h          |  11 ++
 kernel/trace/trace_selftest.c |  11 +-
 4 files changed, 272 insertions(+), 33 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 6b232a2460c0..f53b975e32fa 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -157,9 +157,18 @@ static inline void __ftrace_enabled_restore(int enabled)
 #ifdef CONFIG_TRACING
 extern void
 ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
+# define ftrace_printk(x...) __ftrace_printk(_THIS_IP_, x)
+extern int
+__ftrace_printk(unsigned long ip, const char *fmt, ...)
+	__attribute__ ((format (printf, 2, 3)));
 #else
 static inline void
 ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
+static inline int
+ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0)))
+{
+	return 0;
+}
 #endif
 
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
@@ -173,4 +182,5 @@ ftrace_init_module(unsigned long *start, unsigned long *end) { }
 static inline void ftrace_release(void *start, unsigned long size) { }
 #endif
 
+
 #endif /* _LINUX_FTRACE_H */
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 76dfe6d2466c..a917bea82715 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -197,12 +197,14 @@ unsigned long nsecs_to_usecs(unsigned long nsecs)
  *  NEED_RESCED - reschedule is requested
  *  HARDIRQ	- inside an interrupt handler
  *  SOFTIRQ	- inside a softirq handler
+ *  CONT	- multiple entries hold the trace item
  */
 enum trace_flag_type {
 	TRACE_FLAG_IRQS_OFF		= 0x01,
 	TRACE_FLAG_NEED_RESCHED		= 0x02,
 	TRACE_FLAG_HARDIRQ		= 0x04,
 	TRACE_FLAG_SOFTIRQ		= 0x08,
+	TRACE_FLAG_CONT			= 0x10,
 };
 
 /*
@@ -1074,6 +1076,7 @@ enum trace_file_type {
 	TRACE_FILE_LAT_FMT	= 1,
 };
 
+/* Return the current entry.  */
 static struct trace_entry *
 trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data,
 		struct trace_iterator *iter, int cpu)
@@ -1104,8 +1107,58 @@ trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data,
 	return &array[iter->next_page_idx[cpu]];
 }
 
+/* Increment the index counter of an iterator by one */
+static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
+{
+	iter->idx++;
+	iter->next_idx[cpu]++;
+	iter->next_page_idx[cpu]++;
+
+	if (iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE) {
+		struct trace_array_cpu *data = iter->tr->data[cpu];
+
+		iter->next_page_idx[cpu] = 0;
+		iter->next_page[cpu] =
+			trace_next_list(data, iter->next_page[cpu]);
+	}
+}
+
 static struct trace_entry *
-find_next_entry(struct trace_iterator *iter, int *ent_cpu)
+trace_entry_next(struct trace_array *tr, struct trace_array_cpu *data,
+		 struct trace_iterator *iter, int cpu)
+{
+	struct list_head *next_page;
+	struct trace_entry *ent;
+	int idx, next_idx, next_page_idx;
+
+	ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
+
+	if (likely(!ent || ent->type != TRACE_CONT))
+		return ent;
+
+	/* save the iterator details */
+	idx		= iter->idx;
+	next_idx	= iter->next_idx[cpu];
+	next_page_idx	= iter->next_page_idx[cpu];
+	next_page	= iter->next_page[cpu];
+
+	/* find a real entry */
+	do {
+		trace_iterator_increment(iter, cpu);
+		ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
+	} while (ent && ent->type != TRACE_CONT);
+
+	/* reset the iterator */
+	iter->idx			= idx;
+	iter->next_idx[cpu]		= next_idx;
+	iter->next_page_idx[cpu]	= next_page_idx;
+	iter->next_page[cpu]		= next_page;
+
+	return ent;
+}
+
+static struct trace_entry *
+__find_next_entry(struct trace_iterator *iter, int *ent_cpu, int inc)
 {
 	struct trace_array *tr = iter->tr;
 	struct trace_entry *ent, *next = NULL;
@@ -1115,7 +1168,23 @@ find_next_entry(struct trace_iterator *iter, int *ent_cpu)
 	for_each_tracing_cpu(cpu) {
 		if (!head_page(tr->data[cpu]))
 			continue;
+
 		ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
+
+		if (ent && ent->type == TRACE_CONT) {
+			struct trace_array_cpu *data = tr->data[cpu];
+
+			if (!inc)
+				ent = trace_entry_next(tr, data, iter, cpu);
+			else {
+				while (ent && ent->type == TRACE_CONT) {
+					trace_iterator_increment(iter, cpu);
+					ent = trace_entry_idx(tr, tr->data[cpu],
+							      iter, cpu);
+				}
+			}
+		}
+
 		/*
 		 * Pick the entry with the smallest timestamp:
 		 */
@@ -1131,25 +1200,39 @@ find_next_entry(struct trace_iterator *iter, int *ent_cpu)
 	return next;
 }
 
-static void trace_iterator_increment(struct trace_iterator *iter)
+/* Find the next real entry, without updating the iterator itself */
+static struct trace_entry *
+find_next_entry(struct trace_iterator *iter, int *ent_cpu)
 {
-	iter->idx++;
-	iter->next_idx[iter->cpu]++;
-	iter->next_page_idx[iter->cpu]++;
+	return __find_next_entry(iter, ent_cpu, 0);
+}
+
+/* Find the next real entry, and increment the iterator to the next entry */
+static void *find_next_entry_inc(struct trace_iterator *iter)
+{
+	struct trace_entry *next;
+	int next_cpu = -1;
 
-	if (iter->next_page_idx[iter->cpu] >= ENTRIES_PER_PAGE) {
-		struct trace_array_cpu *data = iter->tr->data[iter->cpu];
+	next = __find_next_entry(iter, &next_cpu, 1);
 
-		iter->next_page_idx[iter->cpu] = 0;
-		iter->next_page[iter->cpu] =
-			trace_next_list(data, iter->next_page[iter->cpu]);
-	}
+	iter->prev_ent = iter->ent;
+	iter->prev_cpu = iter->cpu;
+
+	iter->ent = next;
+	iter->cpu = next_cpu;
+
+	if (next)
+		trace_iterator_increment(iter, iter->cpu);
+
+	return next ? iter : NULL;
 }
 
 static void trace_consume(struct trace_iterator *iter)
 {
 	struct trace_array_cpu *data = iter->tr->data[iter->cpu];
+	struct trace_entry *ent;
 
+ again:
 	data->trace_tail_idx++;
 	if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
 		data->trace_tail = trace_next_page(data, data->trace_tail);
@@ -1160,25 +1243,11 @@ static void trace_consume(struct trace_iterator *iter)
 	if (data->trace_head == data->trace_tail &&
 	    data->trace_head_idx == data->trace_tail_idx)
 		data->trace_idx = 0;
-}
-
-static void *find_next_entry_inc(struct trace_iterator *iter)
-{
-	struct trace_entry *next;
-	int next_cpu = -1;
-
-	next = find_next_entry(iter, &next_cpu);
-
-	iter->prev_ent = iter->ent;
-	iter->prev_cpu = iter->cpu;
-
-	iter->ent = next;
-	iter->cpu = next_cpu;
-
-	if (next)
-		trace_iterator_increment(iter);
 
-	return next ? iter : NULL;
+	ent = trace_entry_idx(iter->tr, iter->tr->data[iter->cpu],
+			      iter, iter->cpu);
+	if (ent && ent->type == TRACE_CONT)
+		goto again;
 }
 
 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
@@ -1473,6 +1542,26 @@ lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs,
 
 static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
 
+static void
+trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
+{
+	struct trace_array *tr = iter->tr;
+	struct trace_array_cpu *data = tr->data[iter->cpu];
+	struct trace_entry *ent;
+
+	ent = trace_entry_idx(tr, data, iter, iter->cpu);
+	if (!ent || ent->type != TRACE_CONT) {
+		trace_seq_putc(s, '\n');
+		return;
+	}
+
+	do {
+		trace_seq_printf(s, "%s", ent->cont.buf);
+		trace_iterator_increment(iter, iter->cpu);
+		ent = trace_entry_idx(tr, data, iter, iter->cpu);
+	} while (ent && ent->type == TRACE_CONT);
+}
+
 static int
 print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 {
@@ -1491,6 +1580,10 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 
 	if (!next_entry)
 		next_entry = entry;
+
+	if (entry->type == TRACE_CONT)
+		return 1;
+
 	rel_usecs = ns2usecs(next_entry->field.t - entry->field.t);
 	abs_usecs = ns2usecs(entry->field.t - iter->tr->time_start);
 
@@ -1550,6 +1643,12 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 		}
 		trace_seq_puts(s, "\n");
 		break;
+	case TRACE_PRINT:
+		seq_print_ip_sym(s, field->print.ip, sym_flags);
+		trace_seq_printf(s, ": %s", field->print.buf);
+		if (field->flags && TRACE_FLAG_CONT)
+			trace_seq_print_cont(s, iter);
+		break;
 	default:
 		trace_seq_printf(s, "Unknown type %d\n", entry->type);
 	}
@@ -1571,6 +1670,10 @@ static int print_trace_fmt(struct trace_iterator *iter)
 	int i;
 
 	entry = iter->ent;
+
+	if (entry->type == TRACE_CONT)
+		return 1;
+
 	field = &entry->field;
 
 	comm = trace_find_cmdline(iter->ent->field.pid);
@@ -1653,6 +1756,12 @@ static int print_trace_fmt(struct trace_iterator *iter)
 		if (!ret)
 			return 0;
 		break;
+	case TRACE_PRINT:
+		seq_print_ip_sym(s, field->print.ip, sym_flags);
+		trace_seq_printf(s, ": %s", field->print.buf);
+		if (field->flags && TRACE_FLAG_CONT)
+			trace_seq_print_cont(s, iter);
+		break;
 	}
 	return 1;
 }
@@ -1666,6 +1775,10 @@ static int print_raw_fmt(struct trace_iterator *iter)
 	int S, T;
 
 	entry = iter->ent;
+
+	if (entry->type == TRACE_CONT)
+		return 1;
+
 	field = &entry->field;
 
 	ret = trace_seq_printf(s, "%d %d %llu ",
@@ -1708,6 +1821,12 @@ static int print_raw_fmt(struct trace_iterator *iter)
 		if (!ret)
 			return 0;
 		break;
+	case TRACE_PRINT:
+		trace_seq_printf(s, "# %lx %s",
+				 field->print.ip, field->print.buf);
+		if (field->flags && TRACE_FLAG_CONT)
+			trace_seq_print_cont(s, iter);
+		break;
 	}
 	return 1;
 }
@@ -1733,6 +1852,10 @@ static int print_hex_fmt(struct trace_iterator *iter)
 	int S, T;
 
 	entry = iter->ent;
+
+	if (entry->type == TRACE_CONT)
+		return 1;
+
 	field = &entry->field;
 
 	SEQ_PUT_HEX_FIELD_RET(s, field->pid);
@@ -1778,6 +1901,10 @@ static int print_bin_fmt(struct trace_iterator *iter)
 	struct trace_field *field;
 
 	entry = iter->ent;
+
+	if (entry->type == TRACE_CONT)
+		return 1;
+
 	field = &entry->field;
 
 	SEQ_PUT_FIELD_RET(s, field->pid);
@@ -2943,6 +3070,94 @@ static __init void tracer_init_debugfs(void)
 #endif
 }
 
+#define TRACE_BUF_SIZE 1024
+#define TRACE_PRINT_BUF_SIZE \
+	(sizeof(struct trace_field) - offsetof(struct trace_field, print.buf))
+#define TRACE_CONT_BUF_SIZE sizeof(struct trace_field)
+
+/**
+ * ftrace_printk - printf formatting in the ftrace buffer
+ * @fmt - the printf format for printing.
+ *
+ * Note: __ftrace_printk is an internal function for ftrace_printk and
+ *       the @ip is passed in via the ftrace_printk macro.
+ *
+ * This function allows a kernel developer to debug fast path sections
+ * that printk is not appropriate for. By scattering in various
+ * printk like tracing in the code, a developer can quickly see
+ * where problems are occurring.
+ *
+ * This is intended as a debugging tool for the developer only.
+ * Please reframe from leaving ftrace_printks scattered around in
+ * your code.
+ */
+int __ftrace_printk(unsigned long ip, const char *fmt, ...)
+{
+	struct trace_array *tr = &global_trace;
+	static DEFINE_SPINLOCK(trace_buf_lock);
+	static char trace_buf[TRACE_BUF_SIZE];
+	struct trace_array_cpu *data;
+	struct trace_entry *entry;
+	unsigned long flags;
+	long disabled;
+	va_list ap;
+	int cpu, len = 0, write, written = 0;
+
+	if (likely(!ftrace_function_enabled))
+		return 0;
+
+	local_irq_save(flags);
+	cpu = raw_smp_processor_id();
+	data = tr->data[cpu];
+	disabled = atomic_inc_return(&data->disabled);
+
+	if (unlikely(disabled != 1 || !ftrace_function_enabled))
+		goto out;
+
+	spin_lock(&trace_buf_lock);
+	va_start(ap, fmt);
+	len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, ap);
+	va_end(ap);
+
+	len = min(len, TRACE_BUF_SIZE-1);
+	trace_buf[len] = 0;
+
+	__raw_spin_lock(&data->lock);
+	entry				= tracing_get_trace_entry(tr, data);
+	tracing_generic_entry_update(entry, flags);
+	entry->type			= TRACE_PRINT;
+	entry->field.print.ip		= ip;
+
+	write = min(len, (int)(TRACE_PRINT_BUF_SIZE-1));
+
+	memcpy(&entry->field.print.buf, trace_buf, write);
+	entry->field.print.buf[write] = 0;
+	written = write;
+
+	if (written != len)
+		entry->field.flags |= TRACE_FLAG_CONT;
+
+	while (written != len) {
+		entry = tracing_get_trace_entry(tr, data);
+
+		entry->type = TRACE_CONT;
+		write = min(len - written, (int)(TRACE_CONT_BUF_SIZE-1));
+		memcpy(&entry->cont.buf, trace_buf+written, write);
+		entry->cont.buf[write] = 0;
+		written += write;
+	}
+	__raw_spin_unlock(&data->lock);
+
+	spin_unlock(&trace_buf_lock);
+
+ out:
+	atomic_dec(&data->disabled);
+	local_irq_restore(flags);
+
+	return len;
+}
+EXPORT_SYMBOL_GPL(__ftrace_printk);
+
 static int trace_alloc_page(void)
 {
 	struct trace_array_cpu *data;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 6ddd6a6556cf..50b6d7a6f01a 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -13,7 +13,9 @@ enum trace_type {
 	TRACE_FN,
 	TRACE_CTX,
 	TRACE_WAKE,
+	TRACE_CONT,
 	TRACE_STACK,
+	TRACE_PRINT,
 	TRACE_SPECIAL,
 	TRACE_MMIO_RW,
 	TRACE_MMIO_MAP,
@@ -60,6 +62,14 @@ struct stack_entry {
 	unsigned long		caller[FTRACE_STACK_ENTRIES];
 };
 
+/*
+ * ftrace_printk entry:
+ */
+struct print_entry {
+	unsigned long		ip;
+	char			buf[];
+};
+
 /*
  * The trace field - the most basic unit of tracing. This is what
  * is printed in the end as a single line in the trace output, such as:
@@ -77,6 +87,7 @@ struct trace_field {
 		struct ctx_switch_entry		ctx;
 		struct special_entry		special;
 		struct stack_entry		stack;
+		struct print_entry		print;
 		struct mmiotrace_rw		mmiorw;
 		struct mmiotrace_map		mmiomap;
 	};
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 0911b7e073bf..630715bbd572 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -9,7 +9,9 @@ static inline int trace_valid_entry(struct trace_entry *entry)
 	case TRACE_FN:
 	case TRACE_CTX:
 	case TRACE_WAKE:
+	case TRACE_CONT:
 	case TRACE_STACK:
+	case TRACE_PRINT:
 	case TRACE_SPECIAL:
 		return 1;
 	}
@@ -120,11 +122,11 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
 					   struct trace_array *tr,
 					   int (*func)(void))
 {
-	unsigned long count;
-	int ret;
 	int save_ftrace_enabled = ftrace_enabled;
 	int save_tracer_enabled = tracer_enabled;
+	unsigned long count;
 	char *func_name;
+	int ret;
 
 	/* The ftrace test PASSED */
 	printk(KERN_CONT "PASSED\n");
@@ -157,6 +159,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
 	/* enable tracing */
 	tr->ctrl = 1;
 	trace->init(tr);
+
 	/* Sleep for a 1/10 of a second */
 	msleep(100);
 
@@ -212,10 +215,10 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
 int
 trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
 {
-	unsigned long count;
-	int ret;
 	int save_ftrace_enabled = ftrace_enabled;
 	int save_tracer_enabled = tracer_enabled;
+	unsigned long count;
+	int ret;
 
 	/* make sure msleep has been recorded */
 	msleep(1);
-- 
GitLab


From 2f2c99dba2398ef7d9c21f7c793180a50e68b1f0 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Fri, 1 Aug 2008 16:45:49 -0400
Subject: [PATCH 243/895] ftrace: ftrace_printk doc moved

Based on Randy Dunlap's suggestion, the ftrace_printk kernel-doc belongs
with the ftrace_printk macro that should be used. Not with the
__ftrace_printk internal function.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h | 19 ++++++++++++++++++-
 kernel/trace/trace.c   | 16 ----------------
 2 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index f53b975e32fa..018af16bce5c 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -157,7 +157,24 @@ static inline void __ftrace_enabled_restore(int enabled)
 #ifdef CONFIG_TRACING
 extern void
 ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
-# define ftrace_printk(x...) __ftrace_printk(_THIS_IP_, x)
+
+/**
+ * ftrace_printk - printf formatting in the ftrace buffer
+ * @fmt: the printf format for printing
+ *
+ * Note: __ftrace_printk is an internal function for ftrace_printk and
+ *       the @ip is passed in via the ftrace_printk macro.
+ *
+ * This function allows a kernel developer to debug fast path sections
+ * that printk is not appropriate for. By scattering in various
+ * printk like tracing in the code, a developer can quickly see
+ * where problems are occurring.
+ *
+ * This is intended as a debugging tool for the developer only.
+ * Please refrain from leaving ftrace_printks scattered around in
+ * your code.
+ */
+# define ftrace_printk(fmt...) __ftrace_printk(_THIS_IP_, fmt)
 extern int
 __ftrace_printk(unsigned long ip, const char *fmt, ...)
 	__attribute__ ((format (printf, 2, 3)));
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a917bea82715..2597e7e49c35 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3075,22 +3075,6 @@ static __init void tracer_init_debugfs(void)
 	(sizeof(struct trace_field) - offsetof(struct trace_field, print.buf))
 #define TRACE_CONT_BUF_SIZE sizeof(struct trace_field)
 
-/**
- * ftrace_printk - printf formatting in the ftrace buffer
- * @fmt - the printf format for printing.
- *
- * Note: __ftrace_printk is an internal function for ftrace_printk and
- *       the @ip is passed in via the ftrace_printk macro.
- *
- * This function allows a kernel developer to debug fast path sections
- * that printk is not appropriate for. By scattering in various
- * printk like tracing in the code, a developer can quickly see
- * where problems are occurring.
- *
- * This is intended as a debugging tool for the developer only.
- * Please reframe from leaving ftrace_printks scattered around in
- * your code.
- */
 int __ftrace_printk(unsigned long ip, const char *fmt, ...)
 {
 	struct trace_array *tr = &global_trace;
-- 
GitLab


From 3f5a54e371ca20b119b73704f6c01b71295c1714 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Wed, 30 Jul 2008 22:36:46 -0400
Subject: [PATCH 244/895] ftrace: dump out ftrace buffers to console on panic

At OLS I had a lot of interest to be able to have the ftrace buffers
dumped on panic.  Usually one would expect to uses kexec and examine
the buffers after a new kernel is loaded. But sometimes the resources
do not permit kdump and kexec, so having an option to still see the
sequence of events up to the crash is very advantageous.

This patch adds the option to have the ftrace buffers dumped to the
console in the latency_trace format on a panic. When the option is set,
the default entries per CPU buffer are lowered to 16384, since the writing
to the serial (if that is the console) may take an awful long time
otherwise.

[
 Changes since -v1:
  Got alpine to send correctly (as well as spell check working).
  Removed config option.
  Moved the static variables into ftrace_dump itself.
  Gave printk a log level.
]

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h |   2 +
 kernel/trace/trace.c   | 175 ++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 176 insertions(+), 1 deletion(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 018af16bce5c..f7fb92045bf0 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -178,6 +178,7 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
 extern int
 __ftrace_printk(unsigned long ip, const char *fmt, ...)
 	__attribute__ ((format (printf, 2, 3)));
+extern void ftrace_dump(void);
 #else
 static inline void
 ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
@@ -186,6 +187,7 @@ ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0)))
 {
 	return 0;
 }
+static inline void ftrace_dump(void) { }
 #endif
 
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 2597e7e49c35..97513c8ecd67 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -14,6 +14,7 @@
 #include <linux/utsrelease.h>
 #include <linux/kallsyms.h>
 #include <linux/seq_file.h>
+#include <linux/notifier.h>
 #include <linux/debugfs.h>
 #include <linux/pagemap.h>
 #include <linux/hardirq.h>
@@ -22,6 +23,7 @@
 #include <linux/ftrace.h>
 #include <linux/module.h>
 #include <linux/percpu.h>
+#include <linux/kdebug.h>
 #include <linux/ctype.h>
 #include <linux/init.h>
 #include <linux/poll.h>
@@ -103,8 +105,15 @@ int				ftrace_function_enabled;
  * trace_nr_entries is the number of entries that is allocated
  * for a buffer. Note, the number of entries is always rounded
  * to ENTRIES_PER_PAGE.
+ *
+ * This number is purposely set to a low number of 16384.
+ * If the dump on oops happens, it will be much appreciated
+ * to not have to wait for all that output. Anyway this can be
+ * boot time and run time configurable.
  */
-static unsigned long		trace_nr_entries = 65536UL;
+#define TRACE_ENTRIES_DEFAULT	16384UL
+
+static unsigned long		trace_nr_entries = TRACE_ENTRIES_DEFAULT;
 
 /* trace_types holds a link list of available tracers. */
 static struct tracer		*trace_types __read_mostly;
@@ -3142,6 +3151,165 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...)
 }
 EXPORT_SYMBOL_GPL(__ftrace_printk);
 
+static int trace_panic_handler(struct notifier_block *this,
+			       unsigned long event, void *unused)
+{
+	ftrace_dump();
+	return NOTIFY_OK;
+}
+
+static struct notifier_block trace_panic_notifier = {
+	.notifier_call  = trace_panic_handler,
+	.next           = NULL,
+	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
+};
+
+static int trace_die_handler(struct notifier_block *self,
+			     unsigned long val,
+			     void *data)
+{
+	switch (val) {
+	case DIE_OOPS:
+		ftrace_dump();
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block trace_die_notifier = {
+	.notifier_call = trace_die_handler,
+	.priority = 200
+};
+
+/*
+ * printk is set to max of 1024, we really don't need it that big.
+ * Nothing should be printing 1000 characters anyway.
+ */
+#define TRACE_MAX_PRINT		1000
+
+/*
+ * Define here KERN_TRACE so that we have one place to modify
+ * it if we decide to change what log level the ftrace dump
+ * should be at.
+ */
+#define KERN_TRACE		KERN_INFO
+
+static void
+trace_printk_seq(struct trace_seq *s)
+{
+	/* Probably should print a warning here. */
+	if (s->len >= 1000)
+		s->len = 1000;
+
+	/* should be zero ended, but we are paranoid. */
+	s->buffer[s->len] = 0;
+
+	printk(KERN_TRACE "%s", s->buffer);
+
+	trace_seq_reset(s);
+}
+
+
+void ftrace_dump(void)
+{
+	static DEFINE_SPINLOCK(ftrace_dump_lock);
+	/* use static because iter can be a bit big for the stack */
+	static struct trace_iterator iter;
+	struct trace_array_cpu *data;
+	static cpumask_t mask;
+	static int dump_ran;
+	unsigned long flags;
+	int cnt = 0;
+	int cpu;
+
+	/* only one dump */
+	spin_lock_irqsave(&ftrace_dump_lock, flags);
+	if (dump_ran)
+		goto out;
+
+	dump_ran = 1;
+
+	/* No turning back! */
+	ftrace_kill_atomic();
+
+	printk(KERN_TRACE "Dumping ftrace buffer:\n");
+
+	iter.tr = &global_trace;
+	iter.trace = current_trace;
+
+	/*
+	 * We need to stop all tracing on all CPUS to read the
+	 * the next buffer. This is a bit expensive, but is
+	 * not done often. We fill all what we can read,
+	 * and then release the locks again.
+	 */
+
+	cpus_clear(mask);
+
+	for_each_tracing_cpu(cpu) {
+		data = iter.tr->data[cpu];
+
+		if (!head_page(data) || !data->trace_idx)
+			continue;
+
+		atomic_inc(&data->disabled);
+		cpu_set(cpu, mask);
+	}
+
+	for_each_cpu_mask(cpu, mask) {
+		data = iter.tr->data[cpu];
+		__raw_spin_lock(&data->lock);
+
+		if (data->overrun > iter.last_overrun[cpu])
+			iter.overrun[cpu] +=
+				data->overrun - iter.last_overrun[cpu];
+		iter.last_overrun[cpu] = data->overrun;
+	}
+
+	while (!trace_empty(&iter)) {
+
+		if (!cnt)
+			printk(KERN_TRACE "---------------------------------\n");
+
+		cnt++;
+
+		/* reset all but tr, trace, and overruns */
+		memset(&iter.seq, 0,
+		       sizeof(struct trace_iterator) -
+		       offsetof(struct trace_iterator, seq));
+		iter.iter_flags |= TRACE_FILE_LAT_FMT;
+		iter.pos = -1;
+
+		if (find_next_entry_inc(&iter) != NULL) {
+			print_trace_line(&iter);
+			trace_consume(&iter);
+		}
+
+		trace_printk_seq(&iter.seq);
+	}
+
+	if (!cnt)
+		printk(KERN_TRACE "   (ftrace buffer empty)\n");
+	else
+		printk(KERN_TRACE "---------------------------------\n");
+
+	for_each_cpu_mask(cpu, mask) {
+		data = iter.tr->data[cpu];
+		__raw_spin_unlock(&data->lock);
+	}
+
+	for_each_cpu_mask(cpu, mask) {
+		data = iter.tr->data[cpu];
+		atomic_dec(&data->disabled);
+	}
+
+
+ out:
+	spin_unlock_irqrestore(&ftrace_dump_lock, flags);
+}
+
 static int trace_alloc_page(void)
 {
 	struct trace_array_cpu *data;
@@ -3338,6 +3506,11 @@ __init static int tracer_alloc_buffers(void)
 	global_trace.ctrl = tracer_enabled;
 	tracing_disabled = 0;
 
+	atomic_notifier_chain_register(&panic_notifier_list,
+				       &trace_panic_notifier);
+
+	register_die_notifier(&trace_die_notifier);
+
 	return 0;
 
  free_buffers:
-- 
GitLab


From 7b928c23fa3e9fa37d1d4ba52ba963f41ee5aae0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 15 Aug 2008 17:48:02 +0200
Subject: [PATCH 245/895] ftrace: build fix
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

fix:

 In file included from init/main.c:65:
 include/linux/ftrace.h:166: error: expected â€˜,' or â€˜;' before â€˜{' token
 make[1]: *** [init/main.o] Error 1
 make: *** [init/main.o] Error 2

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index f7fb92045bf0..ce929cb55435 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -183,7 +183,10 @@ extern void ftrace_dump(void);
 static inline void
 ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
 static inline int
-ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0)))
+ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0)));
+
+static inline int
+ftrace_printk(const char *fmt, ...)
 {
 	return 0;
 }
-- 
GitLab


From c5131ad6c3cbe8f6674993e29a76cecf8deb4384 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 15 Aug 2008 18:22:09 +0200
Subject: [PATCH 246/895] ftrace: ftrace_kill_atomic() build fix

fix:

 kernel/built-in.o: In function `ftrace_dump':
 (.text+0x2e2ea): undefined reference to `ftrace_kill_atomic'

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index ce929cb55435..36c439927ff1 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -36,6 +36,7 @@ extern void ftrace_stub(unsigned long a0, unsigned long a1);
 # define register_ftrace_function(ops) do { } while (0)
 # define unregister_ftrace_function(ops) do { } while (0)
 # define clear_ftrace_function(ops) do { } while (0)
+static inline void ftrace_kill_atomic(void) { }
 #endif /* CONFIG_FTRACE */
 
 #ifdef CONFIG_DYNAMIC_FTRACE
-- 
GitLab


From 3989cce82b272bd6312555fcc47c11715c157102 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 9 Jun 2008 20:54:22 +0200
Subject: [PATCH 247/895] ftrace: scripts/recordmcount.pl cross-build hack

hack around:

 ld: Relocatable linking with relocations from format elf32-i386 (init/.tmp_gl_calibrate.o) to format elf64-x86-64 (init/.tmp_mx_calibrate.o) i  CC      arch/x86/mm/extable.o
 objcopy: 'init/.tmp_mx_calibrate.o': No such file
 rm: cannot remove `init/.tmp_mx_calibrate.o': No such file or directory
 ld: Relocatable linking with relocations from format elf32-i386 (arch/x86/mm/extable.o) to format elf64-x86-64 (arch/x86/mm/.tmp_mx_extable.o) is not supported
 mv: cannot stat `arch/x86/mm/.tmp_mx_extable.o': No such file or directory
 ld: Relocatable linking with relocations from format elf32-i386 (arch/x86/mm/fault.o) to format elf64-x86-64 (arch/x86/mm/.tmp_mx_fault.o) is not supported

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 scripts/recordmcount.pl | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 44b4b23e91b2..e4922a6c963b 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -108,6 +108,20 @@ if ($#ARGV < 6) {
 
 my ($arch, $objdump, $objcopy, $cc, $ld, $nm, $rm, $mv, $inputfile) = @ARGV;
 
+if ($arch eq "i386") {
+  $ld = "ld -m elf_i386";
+  $objdump = "objdump -M i386";
+  $objcopy = "objcopy -O elf32-i386";
+  $cc = "gcc -m32";
+}
+
+if ($arch eq "x86_64") {
+  $ld = "ld -m elf_x86_64";
+  $objdump = "objdump -M x86-64";
+  $objcopy = "objcopy -O elf64-x86-64";
+  $cc = "gcc -m64";
+}
+
 $objdump = "objdump" if ((length $objdump) == 0);
 $objcopy = "objcopy" if ((length $objcopy) == 0);
 $cc = "gcc" if ((length $cc) == 0);
-- 
GitLab


From 98a983aad2e5b3dc83a8a761675445cdd8f3e6bd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Weisbecker?= <fweisbec@gmail.com>
Date: Fri, 15 Aug 2008 21:08:22 +0200
Subject: [PATCH 248/895] ftrace: fix some mistakes in error messages

This patch fixes some mistakes on the tracer in warning messages when
debugfs fails to create tracing files.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: srostedt@redhat.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 97513c8ecd67..896e59f772c9 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3030,12 +3030,12 @@ static __init void tracer_init_debugfs(void)
 	entry = debugfs_create_file("available_tracers", 0444, d_tracer,
 				    &global_trace, &show_traces_fops);
 	if (!entry)
-		pr_warning("Could not create debugfs 'trace' entry\n");
+		pr_warning("Could not create debugfs 'available_tracers' entry\n");
 
 	entry = debugfs_create_file("current_tracer", 0444, d_tracer,
 				    &global_trace, &set_tracer_fops);
 	if (!entry)
-		pr_warning("Could not create debugfs 'trace' entry\n");
+		pr_warning("Could not create debugfs 'current_tracer' entry\n");
 
 	entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
 				    &tracing_max_latency,
@@ -3048,7 +3048,7 @@ static __init void tracer_init_debugfs(void)
 				    &tracing_thresh, &tracing_max_lat_fops);
 	if (!entry)
 		pr_warning("Could not create debugfs "
-			   "'tracing_threash' entry\n");
+			   "'tracing_thresh' entry\n");
 	entry = debugfs_create_file("README", 0644, d_tracer,
 				    NULL, &tracing_readme_fops);
 	if (!entry)
@@ -3058,13 +3058,13 @@ static __init void tracer_init_debugfs(void)
 				    NULL, &tracing_pipe_fops);
 	if (!entry)
 		pr_warning("Could not create debugfs "
-			   "'tracing_threash' entry\n");
+			   "'trace_pipe' entry\n");
 
 	entry = debugfs_create_file("trace_entries", 0644, d_tracer,
 				    &global_trace, &tracing_entries_fops);
 	if (!entry)
 		pr_warning("Could not create debugfs "
-			   "'tracing_threash' entry\n");
+			   "'trace_entries' entry\n");
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 	entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
-- 
GitLab


From 00fd61aee10533e003f2f00ab7163207660a4051 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Fri, 15 Aug 2008 21:40:04 -0400
Subject: [PATCH 249/895] ftrace: do not init module on ftrace disabled

If one of the self tests of ftrace has disabled the function tracer,
do not run the code to convert the mcount calls in modules.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ftrace.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index eadd0eaea9b6..11d94f2dc485 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -307,7 +307,7 @@ void ftrace_release(void *start, unsigned long size)
 	unsigned long e = s + size;
 	int i;
 
-	if (!start)
+	if (ftrace_disabled || !start)
 		return;
 
 	/* No interrupt should call this */
@@ -1567,7 +1567,7 @@ static int ftrace_convert_nops(unsigned long *start,
 
 void ftrace_init_module(unsigned long *start, unsigned long *end)
 {
-	if (start == end)
+	if (ftrace_disabled || start == end)
 		return;
 	ftrace_convert_nops(start, end);
 }
-- 
GitLab


From 99ecdc43bc17faf5fa571db8569df171ecd0e5b8 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Fri, 15 Aug 2008 21:40:05 -0400
Subject: [PATCH 250/895] ftrace: add necessary locking for ftrace records

The new design of pre-recorded mcounts and updating the code outside of
kstop_machine has changed the way the records themselves are protected.

This patch uses the ftrace_lock to protect the records. Note, the lock
still does not need to be taken within calls that are only called via
kstop_machine, since the that code can not run while the spin lock is held.

Also removed the hash_lock needed for the daemon when MCOUNT_RECORD is
configured. Also did a slight cleanup of an unused variable.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ftrace.c | 44 +++++++++++++++++++++++++++++--------------
 1 file changed, 30 insertions(+), 14 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 11d94f2dc485..43665add9805 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -81,7 +81,7 @@ void clear_ftrace_function(void)
 
 static int __register_ftrace_function(struct ftrace_ops *ops)
 {
-	/* Should never be called by interrupts */
+	/* should not be called from interrupt context */
 	spin_lock(&ftrace_lock);
 
 	ops->next = ftrace_list;
@@ -115,6 +115,7 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
 	struct ftrace_ops **p;
 	int ret = 0;
 
+	/* should not be called from interrupt context */
 	spin_lock(&ftrace_lock);
 
 	/*
@@ -153,6 +154,21 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 
+#ifndef CONFIG_FTRACE_MCOUNT_RECORD
+/*
+ * The hash lock is only needed when the recording of the mcount
+ * callers are dynamic. That is, by the caller themselves and
+ * not recorded via the compilation.
+ */
+static DEFINE_SPINLOCK(ftrace_hash_lock);
+#define ftrace_hash_lock(flags)	  spin_lock_irqsave(ftrace_hash_lock, flags)
+#define ftrace_hash_unlock(flags) spin_lock_irqsave(ftrace_hash_lock, flags)
+#else
+/* This is protected via the ftrace_lock with MCOUNT_RECORD. */
+#define ftrace_hash_lock(flags)   do { (void)flags; } while (0)
+#define ftrace_hash_unlock(flags) do { } while(0)
+#endif
+
 static struct task_struct *ftraced_task;
 
 enum {
@@ -171,7 +187,6 @@ static struct hlist_head ftrace_hash[FTRACE_HASHSIZE];
 
 static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu);
 
-static DEFINE_SPINLOCK(ftrace_shutdown_lock);
 static DEFINE_MUTEX(ftraced_lock);
 static DEFINE_MUTEX(ftrace_regex_lock);
 
@@ -310,7 +325,7 @@ void ftrace_release(void *start, unsigned long size)
 	if (ftrace_disabled || !start)
 		return;
 
-	/* No interrupt should call this */
+	/* should not be called from interrupt context */
 	spin_lock(&ftrace_lock);
 
 	for (pg = ftrace_pages_start; pg; pg = pg->next) {
@@ -362,7 +377,6 @@ ftrace_record_ip(unsigned long ip)
 	unsigned long flags;
 	unsigned long key;
 	int resched;
-	int atomic;
 	int cpu;
 
 	if (!ftrace_enabled || ftrace_disabled)
@@ -392,9 +406,7 @@ ftrace_record_ip(unsigned long ip)
 	if (ftrace_ip_in_hash(ip, key))
 		goto out;
 
-	atomic = irqs_disabled();
-
-	spin_lock_irqsave(&ftrace_shutdown_lock, flags);
+	ftrace_hash_lock(flags);
 
 	/* This ip may have hit the hash before the lock */
 	if (ftrace_ip_in_hash(ip, key))
@@ -411,7 +423,7 @@ ftrace_record_ip(unsigned long ip)
 	ftraced_trigger = 1;
 
  out_unlock:
-	spin_unlock_irqrestore(&ftrace_shutdown_lock, flags);
+	ftrace_hash_unlock(flags);
  out:
 	per_cpu(ftrace_shutdown_disable_cpu, cpu)--;
 
@@ -887,6 +899,8 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 
 	(*pos)++;
 
+	/* should not be called from interrupt context */
+	spin_lock(&ftrace_lock);
  retry:
 	if (iter->idx >= iter->pg->index) {
 		if (iter->pg->next) {
@@ -910,6 +924,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 			goto retry;
 		}
 	}
+	spin_unlock(&ftrace_lock);
 
 	iter->pos = *pos;
 
@@ -1023,8 +1038,8 @@ static void ftrace_filter_reset(int enable)
 	unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
 	unsigned i;
 
-	/* keep kstop machine from running */
-	preempt_disable();
+	/* should not be called from interrupt context */
+	spin_lock(&ftrace_lock);
 	if (enable)
 		ftrace_filtered = 0;
 	pg = ftrace_pages_start;
@@ -1037,7 +1052,7 @@ static void ftrace_filter_reset(int enable)
 		}
 		pg = pg->next;
 	}
-	preempt_enable();
+	spin_unlock(&ftrace_lock);
 }
 
 static int
@@ -1149,8 +1164,8 @@ ftrace_match(unsigned char *buff, int len, int enable)
 		}
 	}
 
-	/* keep kstop machine from running */
-	preempt_disable();
+	/* should not be called from interrupt context */
+	spin_lock(&ftrace_lock);
 	if (enable)
 		ftrace_filtered = 1;
 	pg = ftrace_pages_start;
@@ -1187,7 +1202,7 @@ ftrace_match(unsigned char *buff, int len, int enable)
 		}
 		pg = pg->next;
 	}
-	preempt_enable();
+	spin_unlock(&ftrace_lock);
 }
 
 static ssize_t
@@ -1551,6 +1566,7 @@ static int ftrace_convert_nops(unsigned long *start,
 	p = start;
 	while (p < end) {
 		addr = ftrace_call_adjust(*p++);
+		/* should not be called from interrupt context */
 		spin_lock(&ftrace_lock);
 		ftrace_record_ip(addr);
 		spin_unlock(&ftrace_lock);
-- 
GitLab


From 3700273586ee6a58b95dd07d9f8a02db4a9b476f Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Mon, 18 Aug 2008 16:24:56 +0800
Subject: [PATCH 251/895] ftrace: fix incorrect comment style of
 __ftrace_enabled_save()

This patch fixes incorrect comment style of __ftrace_enabled_save().

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 36c439927ff1..8b4cf38c80d2 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -99,9 +99,11 @@ static inline void tracer_disable(void)
 #endif
 }
 
-/* Ftrace disable/restore without lock. Some synchronization mechanism
+/*
+ * Ftrace disable/restore without lock. Some synchronization mechanism
  * must be used to prevent ftrace_enabled to be changed between
- * disable/restore. */
+ * disable/restore.
+ */
 static inline int __ftrace_enabled_save(void)
 {
 #ifdef CONFIG_FTRACE
-- 
GitLab


From 6a4917e3ae5194a10e0723f96edc854c381e3063 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 18 Aug 2008 15:58:12 -0700
Subject: [PATCH 252/895] ftrace: fix build problem with CONFIG_FTRACE

I'm seeing when I use separate src/build dirs:

make[3]: *** [arch/x86/kernel/time_32.o] Error 1
/bin/sh: scripts/recordmcount.pl: No such file or directory
make[3]: *** [arch/x86/kernel/irq_32.o] Error 1
/bin/sh: scripts/recordmcount.pl: No such file or directory
make[3]: *** [arch/x86/kernel/ldt.o] Error 1
/bin/sh: scripts/recordmcount.pl: No such file or directory
make[3]: *** [arch/x86/kernel/i8259.o] Error 1
/bin/sh: scripts/recordmcount.pl: No such file or directory

This fixes it.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 scripts/Makefile.build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 463ddcc583ed..232485ec5265 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -199,7 +199,7 @@ cmd_modversions =							\
 endif
 
 ifdef CONFIG_FTRACE_MCOUNT_RECORD
-cmd_record_mcount = scripts/recordmcount.pl "$(ARCH)" \
+cmd_record_mcount = $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
 	"$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" "$(@)";
 endif
 
-- 
GitLab


From d74fcd1e4e8842d5302cd303ef25cef7e67f68b4 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Fri, 15 Aug 2008 11:40:24 -0400
Subject: [PATCH 253/895] ftrace: update recordmount.pl arch changes

I'm trying to keep all the arch changes in recordmcount.pl in one place.
I moved your code into that area, by adding the flags to the commands
that were passed in.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 scripts/recordmcount.pl | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index e4922a6c963b..36c8355c555e 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -108,20 +108,6 @@ if ($#ARGV < 6) {
 
 my ($arch, $objdump, $objcopy, $cc, $ld, $nm, $rm, $mv, $inputfile) = @ARGV;
 
-if ($arch eq "i386") {
-  $ld = "ld -m elf_i386";
-  $objdump = "objdump -M i386";
-  $objcopy = "objcopy -O elf32-i386";
-  $cc = "gcc -m32";
-}
-
-if ($arch eq "x86_64") {
-  $ld = "ld -m elf_x86_64";
-  $objdump = "objdump -M x86-64";
-  $objcopy = "objcopy -O elf64-x86-64";
-  $cc = "gcc -m64";
-}
-
 $objdump = "objdump" if ((length $objdump) == 0);
 $objcopy = "objcopy" if ((length $objcopy) == 0);
 $cc = "gcc" if ((length $cc) == 0);
@@ -146,11 +132,25 @@ if ($arch eq "x86_64") {
     $function_regex = "<(.*?)>:";
     $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount([+-]0x[0-9a-zA-Z]+)?\$";
     $type = ".quad";
+
+    # force flags for this arch
+    $ld .= " -m elf_x86_64";
+    $objdump .= " -M x86-64";
+    $objcopy .= " -O elf64-x86-64";
+    $cc .= " -m64";
+
 } elsif ($arch eq "i386") {
     $section_regex = "Disassembly of section";
     $function_regex = "<(.*?)>:";
     $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount\$";
     $type = ".long";
+
+    # force flags for this arch
+    $ld .= " -m elf_i386";
+    $objdump .= " -M i386";
+    $objcopy .= " -O elf32-i386";
+    $cc .= " -m32";
+
 } else {
     die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD";
 }
-- 
GitLab


From 8feff1cacc29e9cfdc6d1ce5f2108db87b91046e Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Wed, 20 Aug 2008 10:07:35 -0400
Subject: [PATCH 254/895] ftrace: handle weak symbol functions

During tests and checks, I've discovered that there were failures to
convert mcount callers into nops. Looking deeper into these failures,
code that was attempted to be changed was not an mcount caller.
The current code only updates if the code being changed is what it expects,
but I still investigate any time there is a failure.

What was happening is that a weak symbol was being used as a reference
for other mcount callers. That weak symbol was also referenced elsewhere
so the offsets were using the strong symbol and not the function symbol
that it was referenced from.

This patch changes the setting up of the mcount_loc section to search
for a global function that is not weak. It will pick a local over a weak
but if only a weak is found in a section, a warning is printed and the
mcount location is not recorded (just to be safe).

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 scripts/recordmcount.pl | 106 ++++++++++++++++++++++++++++++++--------
 1 file changed, 86 insertions(+), 20 deletions(-)

diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 36c8355c555e..1891cf9743fc 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -119,17 +119,19 @@ $mv = "mv" if ((length $mv) == 0);
 #print STDERR "running: $P '$arch' '$objdump' '$objcopy' '$cc' '$ld' " .
 #    "'$nm' '$rm' '$mv' '$inputfile'\n";
 
-my %locals;
-my %convert;
+my %locals;		# List of local (static) functions
+my %weak;		# List of weak functions
+my %convert;		# List of local functions used that needs conversion
 
 my $type;
 my $section_regex;	# Find the start of a section
-my $function_regex;	# Find the name of a function (return func name)
+my $function_regex;	# Find the name of a function
+			#    (return offset and func name)
 my $mcount_regex;	# Find the call site to mcount (return offset)
 
 if ($arch eq "x86_64") {
     $section_regex = "Disassembly of section";
-    $function_regex = "<(.*?)>:";
+    $function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:";
     $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount([+-]0x[0-9a-zA-Z]+)?\$";
     $type = ".quad";
 
@@ -141,7 +143,7 @@ if ($arch eq "x86_64") {
 
 } elsif ($arch eq "i386") {
     $section_regex = "Disassembly of section";
-    $function_regex = "<(.*?)>:";
+    $function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:";
     $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount\$";
     $type = ".long";
 
@@ -158,7 +160,6 @@ if ($arch eq "x86_64") {
 my $text_found = 0;
 my $read_function = 0;
 my $opened = 0;
-my $text = "";
 my $mcount_section = "__mcount_loc";
 
 my $dirname;
@@ -186,46 +187,111 @@ my $mcount_s = $dirname . "/.tmp_mc_" . $prefix . ".s";
 my $mcount_o = $dirname . "/.tmp_mc_" . $prefix . ".o";
 
 #
-# Step 1: find all the local symbols (static functions).
+# Step 1: find all the local (static functions) and weak symbols.
+#        't' is local, 'w/W' is weak (we never use a weak function)
 #
 open (IN, "$nm $inputfile|") || die "error running $nm";
 while (<IN>) {
     if (/^[0-9a-fA-F]+\s+t\s+(\S+)/) {
 	$locals{$1} = 1;
+    } elsif (/^[0-9a-fA-F]+\s+([wW])\s+(\S+)/) {
+	$weak{$2} = $1;
     }
 }
 close(IN);
 
+my @offsets;		# Array of offsets of mcount callers
+my $ref_func;		# reference function to use for offsets
+my $offset = 0;		# offset of ref_func to section beginning
+
+##
+# update_funcs - print out the current mcount callers
+#
+#  Go through the list of offsets to callers and write them to
+#  the output file in a format that can be read by an assembler.
+#
+sub update_funcs
+{
+    return if ($#offsets < 0);
+
+    defined($ref_func) || die "No function to reference";
+
+    # A section only had a weak function, to represent it.
+    # Unfortunately, a weak function may be overwritten by another
+    # function of the same name, making all these offsets incorrect.
+    # To be safe, we simply print a warning and bail.
+    if (defined $weak{$ref_func}) {
+	print STDERR
+	    "$inputfile: WARNING: referencing weak function" .
+	    " $ref_func for mcount\n";
+	return;
+    }
+
+    # is this function static? If so, note this fact.
+    if (defined $locals{$ref_func}) {
+	$convert{$ref_func} = 1;
+    }
+
+    # Loop through all the mcount caller offsets and print a reference
+    # to the caller based from the ref_func.
+    for (my $i=0; $i <= $#offsets; $i++) {
+	if (!$opened) {
+	    open(FILE, ">$mcount_s") || die "can't create $mcount_s\n";
+	    $opened = 1;
+	    print FILE "\t.section $mcount_section,\"a\",\@progbits\n";
+	}
+	printf FILE "\t%s %s + %d\n", $type, $ref_func, $offsets[$i] - $offset;
+    }
+}
+
 #
 # Step 2: find the sections and mcount call sites
 #
 open(IN, "$objdump -dr $inputfile|") || die "error running $objdump";
 
+my $text;
+
 while (<IN>) {
     # is it a section?
     if (/$section_regex/) {
 	$read_function = 1;
+	# print out any recorded offsets
+	update_funcs() if ($text_found);
+
+	# reset all markers and arrays
 	$text_found = 0;
+	undef($ref_func);
+	undef(@offsets);
+
     # section found, now is this a start of a function?
     } elsif ($read_function && /$function_regex/) {
-	$read_function = 0;
 	$text_found = 1;
-	$text = $1;
-	# is this function static? If so, note this fact.
-	if (defined $locals{$text}) {
-	    $convert{$text} = 1;
+	$offset = hex $1;
+	$text = $2;
+
+	# if this is either a local function or a weak function
+	# keep looking for functions that are global that
+	# we can use safely.
+	if (!defined($locals{$text}) && !defined($weak{$text})) {
+	    $ref_func = $text;
+	    $read_function = 0;
+	} else {
+	    # if we already have a function, and this is weak, skip it
+	    if (!defined($ref_func) || !defined($weak{$text})) {
+		$ref_func = $text;
+	    }
 	}
-    # is this a call site to mcount? If so, print the offset from the section
-    } elsif ($text_found && /$mcount_regex/) {
-	if (!$opened) {
-	    open(FILE, ">$mcount_s") || die "can't create $mcount_s\n";
-	    $opened = 1;
-	    print FILE "\t.section $mcount_section,\"a\",\@progbits\n";
-	}
-	print FILE "\t$type $text + 0x$1\n";
+    }
+
+    # is this a call site to mcount? If so, record it to print later
+    if ($text_found && /$mcount_regex/) {
+	$offsets[$#offsets + 1] = hex $1;
     }
 }
 
+# dump out anymore offsets that may have been found
+update_funcs() if ($text_found);
+
 # If we did not find any mcount callers, we are done (do nothing).
 if (!$opened) {
     exit(0);
-- 
GitLab


From 6f93fc076a464bfe24e8d4c5fea3f6ca5bdb264d Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Wed, 20 Aug 2008 12:55:07 -0400
Subject: [PATCH 255/895] ftrace: x86 use copy to and from user functions

The modification of code is performed either by kstop_machine, before
SMP starts, or on module code before the module is executed. There is
no reason to do the modifications from assembly. The copy to and from
user functions are sufficient and produces cleaner and easier to read
code.

Thanks to Benjamin Herrenschmidt for suggesting the idea.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ftrace.c | 38 +++++++++++++-------------------------
 1 file changed, 13 insertions(+), 25 deletions(-)

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 4151c91254e8..082d99642622 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -11,6 +11,7 @@
 
 #include <linux/spinlock.h>
 #include <linux/hardirq.h>
+#include <linux/uaccess.h>
 #include <linux/ftrace.h>
 #include <linux/percpu.h>
 #include <linux/init.h>
@@ -60,11 +61,7 @@ notrace int
 ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 		   unsigned char *new_code)
 {
-	unsigned replaced;
-	unsigned old = *(unsigned *)old_code; /* 4 bytes */
-	unsigned new = *(unsigned *)new_code; /* 4 bytes */
-	unsigned char newch = new_code[4];
-	int faulted = 0;
+	unsigned char replaced[MCOUNT_INSN_SIZE];
 
 	/*
 	 * Note: Due to modules and __init, code can
@@ -72,29 +69,20 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 	 *  as well as code changing.
 	 *
 	 * No real locking needed, this code is run through
-	 * kstop_machine.
+	 * kstop_machine, or before SMP starts.
 	 */
-	asm volatile (
-		"1: lock\n"
-		"   cmpxchg %3, (%2)\n"
-		"   jnz 2f\n"
-		"   movb %b4, 4(%2)\n"
-		"2:\n"
-		".section .fixup, \"ax\"\n"
-		"3:	movl $1, %0\n"
-		"	jmp 2b\n"
-		".previous\n"
-		_ASM_EXTABLE(1b, 3b)
-		: "=r"(faulted), "=a"(replaced)
-		: "r"(ip), "r"(new), "c"(newch),
-		  "0"(faulted), "a"(old)
-		: "memory");
-	sync_core();
+	if (__copy_from_user(replaced, (char __user *)ip, MCOUNT_INSN_SIZE))
+		return 1;
 
-	if (replaced != old && replaced != new)
-		faulted = 2;
+	if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
+		return 2;
 
-	return faulted;
+	WARN_ON_ONCE(__copy_to_user((char __user *)ip, new_code,
+				    MCOUNT_INSN_SIZE));
+
+	sync_core();
+
+	return 0;
 }
 
 notrace int ftrace_update_ftrace_func(ftrace_func_t func)
-- 
GitLab


From 2d7da80f7138c4276ef4fa0334be400b805d0fbf Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Mon, 25 Aug 2008 13:08:44 +1000
Subject: [PATCH 256/895] ftrace: fix build failure

After disabling FTRACE_MCOUNT_RECORD via a patch, a dormant build
failure surfaced:

 kernel/trace/ftrace.c: In function 'ftrace_record_ip':
 kernel/trace/ftrace.c:416: error: incompatible type for argument 1 of '_spin_lock_irqsave'
 kernel/trace/ftrace.c:433: error: incompatible type for argument 1 of '_spin_lock_irqsave'

Introduced by commit 6dad8e07f4c10b17b038e84d29f3ca41c2e55cd0 ("ftrace:
add necessary locking for ftrace records").

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ftrace.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 43665add9805..7599abdf6d4d 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -161,8 +161,8 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
  * not recorded via the compilation.
  */
 static DEFINE_SPINLOCK(ftrace_hash_lock);
-#define ftrace_hash_lock(flags)	  spin_lock_irqsave(ftrace_hash_lock, flags)
-#define ftrace_hash_unlock(flags) spin_lock_irqsave(ftrace_hash_lock, flags)
+#define ftrace_hash_lock(flags)	  spin_lock_irqsave(&ftrace_hash_lock, flags)
+#define ftrace_hash_unlock(flags) spin_lock_irqsave(&ftrace_hash_lock, flags)
 #else
 /* This is protected via the ftrace_lock with MCOUNT_RECORD. */
 #define ftrace_hash_lock(flags)   do { (void)flags; } while (0)
-- 
GitLab


From ac8825ec6d941b6899331b84c7d6bf027c3bb4f1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 25 Aug 2008 08:12:04 +0200
Subject: [PATCH 257/895] ftrace: clean up macro usage

enclose the argument in parenthesis. (especially since we cast it,
which is a high prio operation)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ftrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 7599abdf6d4d..969a83f75a3e 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -165,7 +165,7 @@ static DEFINE_SPINLOCK(ftrace_hash_lock);
 #define ftrace_hash_unlock(flags) spin_lock_irqsave(&ftrace_hash_lock, flags)
 #else
 /* This is protected via the ftrace_lock with MCOUNT_RECORD. */
-#define ftrace_hash_lock(flags)   do { (void)flags; } while (0)
+#define ftrace_hash_lock(flags)   do { (void)(flags); } while (0)
 #define ftrace_hash_unlock(flags) do { } while(0)
 #endif
 
-- 
GitLab


From f2f8458e751f9ae41dfec3c00a46d3e62dc38f60 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 25 Aug 2008 14:52:11 -0400
Subject: [PATCH 258/895] ftrace: objcopy version test for local symbols

The --globalize-symbols option came out in objcopy version 2.17.
If the kernel is being compiled on a system with a lower version of
objcopy, then we can not use the globalize / localize trick to
link to symbols pointing to local functions.

This patch tests the version of objcopy and will only use the trick
if the version is greater than or equal to 2.17. Otherwise, if an
object has only local functions within a section, it will give a
nice warning and recommend the user to upgrade their objcopy.

Leaving the symbols unrecorded is not that big of a deal, since the
mcount record method changes the actual mcount code to be a simple
"ret" without recording registers or anything.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 scripts/recordmcount.pl | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 1891cf9743fc..ee9e12676776 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -186,6 +186,36 @@ if ($filename =~ m,^(.*)(\.\S),) {
 my $mcount_s = $dirname . "/.tmp_mc_" . $prefix . ".s";
 my $mcount_o = $dirname . "/.tmp_mc_" . $prefix . ".o";
 
+#
+# --globalize-symbols came out in 2.17, we must test the version
+# of objcopy, and if it is less than 2.17, then we can not
+# record local functions.
+my $use_locals = 01;
+my $local_warn_once = 0;
+my $found_version = 0;
+
+open (IN, "$objcopy --version |") || die "error running $objcopy";
+while (<IN>) {
+    if (/objcopy.*\s(\d+)\.(\d+)/) {
+	my $major = $1;
+	my $minor = $2;
+
+	$found_version = 1;
+	if ($major < 2 ||
+	    ($major == 2 && $minor < 17)) {
+	    $use_locals = 0;
+	}
+	last;
+    }
+}
+close (IN);
+
+if (!$found_version) {
+    print STDERR "WARNING: could not find objcopy version.\n" .
+	"\tDisabling local function references.\n";
+}
+
+
 #
 # Step 1: find all the local (static functions) and weak symbols.
 #        't' is local, 'w/W' is weak (we never use a weak function)
@@ -229,6 +259,17 @@ sub update_funcs
 
     # is this function static? If so, note this fact.
     if (defined $locals{$ref_func}) {
+
+	# only use locals if objcopy supports globalize-symbols
+	if (!$use_locals) {
+	    print STDERR
+		"$inputfile: WARNING: referencing local function " .
+		"$ref_func for mcount\n" .
+		"\tConsider upgrading objcopy to support the globalize-" .
+		"symbols option.\n"
+		if (!$local_warn_once++);
+	    return;
+	}
 	$convert{$ref_func} = 1;
     }
 
-- 
GitLab


From b3a320417484a6d6b9d28098944df58341353992 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Wed, 27 Aug 2008 09:08:30 +0200
Subject: [PATCH 259/895] kbuild: ftrace: don't assume that
 scripts/recordmcount.pl is executable

CHK     include/linux/version.h
  CHK     include/linux/utsrelease.h
  CC      scripts/mod/empty.o
/bin/sh: /usr/src/25/scripts/recordmcount.pl: Permission denied

We shouldn't assume that files have their `x' bits set.  There are various
ways in which file permissions get lost, including use of patch(1).

It might not be correct to assume that perl lives in $PATH?

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 scripts/Makefile.build | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 232485ec5265..5ed4cbf1e0e1 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -199,8 +199,9 @@ cmd_modversions =							\
 endif
 
 ifdef CONFIG_FTRACE_MCOUNT_RECORD
-cmd_record_mcount = $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
-	"$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" "$(@)";
+cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl \
+	"$(ARCH)" "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" \
+	"$(MV)" "$(@)";
 endif
 
 define rule_cc_o_c
-- 
GitLab


From e5a81b629ea8feb9e7530cfac35cfb41c45facf3 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Wed, 27 Aug 2008 23:31:01 -0400
Subject: [PATCH 260/895] ftrace: add stack tracer

This is another tracer using the ftrace infrastructure, that examines
at each function call the size of the stack. If the stack use is greater
than the previous max it is recorded.

You can always see (and set) the max stack size seen. By setting it
to zero will start the recording again. The backtrace is also available.

For example:

# cat /debug/tracing/stack_max_size
1856

# cat /debug/tracing/stack_trace
[<c027764d>] stack_trace_call+0x8f/0x101
[<c021b966>] ftrace_call+0x5/0x8
[<c02553cc>] clocksource_get_next+0x12/0x48
[<c02542a5>] update_wall_time+0x538/0x6d1
[<c0245913>] do_timer+0x23/0xb0
[<c0257657>] tick_do_update_jiffies64+0xd9/0xf1
[<c02576b9>] tick_sched_timer+0x4a/0xad
[<c0250fe6>] __run_hrtimer+0x3e/0x75
[<c02518ed>] hrtimer_interrupt+0xf1/0x154
[<c022c870>] smp_apic_timer_interrupt+0x71/0x84
[<c021b7e9>] apic_timer_interrupt+0x2d/0x34
[<c0238597>] finish_task_switch+0x29/0xa0
[<c05abd13>] schedule+0x765/0x7be
[<c05abfca>] schedule_timeout+0x1b/0x90
[<c05ab4d4>] wait_for_common+0xab/0x101
[<c05ab5ac>] wait_for_completion+0x12/0x14
[<c033cfc3>] blk_execute_rq+0x84/0x99
[<c0402470>] scsi_execute+0xc2/0x105
[<c040250a>] scsi_execute_req+0x57/0x7f
[<c043afe0>] sr_test_unit_ready+0x3e/0x97
[<c043bbd6>] sr_media_change+0x43/0x205
[<c046b59f>] media_changed+0x48/0x77
[<c046b5ff>] cdrom_media_changed+0x31/0x37
[<c043b091>] sr_block_media_changed+0x16/0x18
[<c02b9e69>] check_disk_change+0x1b/0x63
[<c046f4c3>] cdrom_open+0x7a1/0x806
[<c043b148>] sr_block_open+0x78/0x8d
[<c02ba4c0>] do_open+0x90/0x257
[<c02ba869>] blkdev_open+0x2d/0x56
[<c0296a1f>] __dentry_open+0x14d/0x23c
[<c0296b32>] nameidata_to_filp+0x24/0x38
[<c02a1c68>] do_filp_open+0x347/0x626
[<c02967ef>] do_sys_open+0x47/0xbc
[<c02968b0>] sys_open+0x23/0x2b
[<c021aadd>] sysenter_do_call+0x12/0x26

I've tested this on both x86_64 and i386.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/Kconfig       |   9 ++
 kernel/trace/Makefile      |   1 +
 kernel/trace/trace_stack.c | 254 +++++++++++++++++++++++++++++++++++++
 3 files changed, 264 insertions(+)
 create mode 100644 kernel/trace/trace_stack.c

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 14d9505178ca..2a22e46390d3 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -106,6 +106,15 @@ config CONTEXT_SWITCH_TRACER
 	  This tracer gets called from the context switch and records
 	  all switching of tasks.
 
+config STACK_TRACER
+	bool "Trace max stack"
+	depends on HAVE_FTRACE
+	select FTRACE
+	select STACKTRACE
+	help
+	  This tracer records the max stack of the kernel, and displays
+	  it in debugfs/tracing/stack_trace
+
 config DYNAMIC_FTRACE
 	bool "enable/disable ftrace tracepoints dynamically"
 	depends on FTRACE
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 71d17de17288..58ec61c44bd6 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_FTRACE) += trace_functions.o
 obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
 obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
 obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
+obj-$(CONFIG_STACK_TRACER) += trace_stack.o
 obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
 
 libftrace-y := ftrace.o
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
new file mode 100644
index 000000000000..4d1e522e3fe8
--- /dev/null
+++ b/kernel/trace/trace_stack.c
@@ -0,0 +1,254 @@
+/*
+ * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <linux/stacktrace.h>
+#include <linux/kallsyms.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+#include <linux/ftrace.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include "trace.h"
+
+#define STACK_TRACE_ENTRIES 500
+
+static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES] =
+	{ [0 ... (STACK_TRACE_ENTRIES-1)] = ULONG_MAX };
+static struct stack_trace max_stack_trace = {
+	.max_entries		= STACK_TRACE_ENTRIES,
+	.entries		= stack_dump_trace,
+};
+
+static unsigned long max_stack_size;
+static raw_spinlock_t max_stack_lock =
+	(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+
+static int stack_trace_disabled __read_mostly;
+static DEFINE_PER_CPU(int, trace_active);
+
+static inline void check_stack(void)
+{
+	unsigned long this_size;
+	unsigned long flags;
+
+	this_size = ((unsigned long)&this_size) & (THREAD_SIZE-1);
+	this_size = THREAD_SIZE - this_size;
+
+	if (this_size <= max_stack_size)
+		return;
+
+	raw_local_irq_save(flags);
+	__raw_spin_lock(&max_stack_lock);
+
+	/* a race could have already updated it */
+	if (this_size <= max_stack_size)
+		goto out;
+
+	max_stack_size = this_size;
+
+	max_stack_trace.nr_entries	= 0;
+	max_stack_trace.skip		= 1;
+
+	save_stack_trace(&max_stack_trace);
+
+ out:
+	__raw_spin_unlock(&max_stack_lock);
+	raw_local_irq_restore(flags);
+}
+
+static void
+stack_trace_call(unsigned long ip, unsigned long parent_ip)
+{
+	int cpu, resched;
+
+	if (unlikely(!ftrace_enabled || stack_trace_disabled))
+		return;
+
+	resched = need_resched();
+	preempt_disable_notrace();
+
+	cpu = raw_smp_processor_id();
+	/* no atomic needed, we only modify this variable by this cpu */
+	if (per_cpu(trace_active, cpu)++ != 0)
+		goto out;
+
+	check_stack();
+
+ out:
+	per_cpu(trace_active, cpu)--;
+	/* prevent recursion in schedule */
+	if (resched)
+		preempt_enable_no_resched_notrace();
+	else
+		preempt_enable_notrace();
+}
+
+static struct ftrace_ops trace_ops __read_mostly =
+{
+	.func = stack_trace_call,
+};
+
+static ssize_t
+stack_max_size_read(struct file *filp, char __user *ubuf,
+		    size_t count, loff_t *ppos)
+{
+	unsigned long *ptr = filp->private_data;
+	char buf[64];
+	int r;
+
+	r = snprintf(buf, sizeof(buf), "%ld\n", *ptr);
+	if (r > sizeof(buf))
+		r = sizeof(buf);
+	return simple_read_from_buffer(ubuf, count, ppos, buf, r);
+}
+
+static ssize_t
+stack_max_size_write(struct file *filp, const char __user *ubuf,
+		     size_t count, loff_t *ppos)
+{
+	long *ptr = filp->private_data;
+	unsigned long val, flags;
+	char buf[64];
+	int ret;
+
+	if (count >= sizeof(buf))
+		return -EINVAL;
+
+	if (copy_from_user(&buf, ubuf, count))
+		return -EFAULT;
+
+	buf[count] = 0;
+
+	ret = strict_strtoul(buf, 10, &val);
+	if (ret < 0)
+		return ret;
+
+	raw_local_irq_save(flags);
+	__raw_spin_lock(&max_stack_lock);
+	*ptr = val;
+	__raw_spin_unlock(&max_stack_lock);
+	raw_local_irq_restore(flags);
+
+	return count;
+}
+
+static struct file_operations stack_max_size_fops = {
+	.open		= tracing_open_generic,
+	.read		= stack_max_size_read,
+	.write		= stack_max_size_write,
+};
+
+static void *
+t_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	unsigned long *t = m->private;
+
+	(*pos)++;
+
+	if (!t || *t == ULONG_MAX)
+		return NULL;
+
+	t++;
+	m->private = t;
+
+	return t;
+}
+
+static void *t_start(struct seq_file *m, loff_t *pos)
+{
+	unsigned long *t = m->private;
+	loff_t l = 0;
+
+	local_irq_disable();
+	__raw_spin_lock(&max_stack_lock);
+
+	for (; t && l < *pos; t = t_next(m, t, &l))
+		;
+
+	return t;
+}
+
+static void t_stop(struct seq_file *m, void *p)
+{
+	__raw_spin_unlock(&max_stack_lock);
+	local_irq_enable();
+}
+
+static int trace_lookup_stack(struct seq_file *m, unsigned long addr)
+{
+#ifdef CONFIG_KALLSYMS
+	char str[KSYM_SYMBOL_LEN];
+
+	sprint_symbol(str, addr);
+
+	return seq_printf(m, "[<%p>] %s\n", (void*)addr, str);
+#else
+	return seq_printf(m, "%p\n", (void*)addr);
+#endif
+}
+
+static int t_show(struct seq_file *m, void *v)
+{
+	unsigned long *t = v;
+
+	if (!t || *t == ULONG_MAX)
+		return 0;
+
+	trace_lookup_stack(m, *t);
+
+	return 0;
+}
+
+static struct seq_operations stack_trace_seq_ops = {
+	.start		= t_start,
+	.next		= t_next,
+	.stop		= t_stop,
+	.show		= t_show,
+};
+
+static int stack_trace_open(struct inode *inode, struct file *file)
+{
+	int ret;
+
+	ret = seq_open(file, &stack_trace_seq_ops);
+	if (!ret) {
+		struct seq_file *m = file->private_data;
+		m->private = stack_dump_trace;
+	}
+
+	return ret;
+}
+
+static struct file_operations stack_trace_fops = {
+	.open		= stack_trace_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+};
+
+static __init int stack_trace_init(void)
+{
+	struct dentry *d_tracer;
+	struct dentry *entry;
+
+	d_tracer = tracing_init_dentry();
+
+	entry = debugfs_create_file("stack_max_size", 0644, d_tracer,
+				    &max_stack_size, &stack_max_size_fops);
+	if (!entry)
+		pr_warning("Could not create debugfs 'stack_max_size' entry\n");
+
+	entry = debugfs_create_file("stack_trace", 0444, d_tracer,
+				    NULL, &stack_trace_fops);
+	if (!entry)
+		pr_warning("Could not create debugfs 'stack_trace' entry\n");
+
+	register_ftrace_function(&trace_ops);
+
+	return 0;
+}
+
+device_initcall(stack_trace_init);
-- 
GitLab


From 3b47bfc1fca01cccad9cce2d18b79b18ef2e4131 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Wed, 27 Aug 2008 23:24:15 -0400
Subject: [PATCH 261/895] ftrace: remove direct reference to mcount in trace
 code

The mcount record method of ftrace scans objdump for references to mcount.
Using mcount as the reference to test if the calls to mcount being replaced
are indeed calls to mcount, this use of mcount was also caught as a
location to change. Using a variable that points to the mcount address
moves this reference into the data section that is not scanned, and
we do not use a false location to try and modify.

The warn on code was what was used to detect this bug.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ftrace.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 969a83f75a3e..b69966f0f144 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -36,6 +36,14 @@
 int ftrace_enabled __read_mostly;
 static int last_ftrace_enabled;
 
+/*
+ * Since MCOUNT_ADDR may point to mcount itself, we do not want
+ * to get it confused by reading a reference in the code as we
+ * are parsing on objcopy output of text. Use a variable for
+ * it instead.
+ */
+static unsigned long mcount_addr = MCOUNT_ADDR;
+
 /*
  * ftrace_disabled is set when an anomaly is discovered.
  * ftrace_disabled is much stronger than ftrace_enabled.
@@ -577,7 +585,7 @@ ftrace_code_disable(struct dyn_ftrace *rec)
 	ip = rec->ip;
 
 	nop = ftrace_nop_replace();
-	call = ftrace_call_replace(ip, MCOUNT_ADDR);
+	call = ftrace_call_replace(ip, mcount_addr);
 
 	failed = ftrace_modify_code(ip, call, nop);
 	if (failed) {
-- 
GitLab


From d53475b5aa946752e3306b2ecb5a8c9c51cf8dd0 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Wed, 27 Aug 2008 13:02:01 -0400
Subject: [PATCH 262/895] ftrace: remove warning of old objcopy and local
 functions

The warning messages about old objcopy and local functions spam the
user quite drastically.  Remove the warning until we can find a nicer
way of tell the user to upgrade their objcopy.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 scripts/recordmcount.pl | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index ee9e12676776..f56d760bd589 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -262,12 +262,6 @@ sub update_funcs
 
 	# only use locals if objcopy supports globalize-symbols
 	if (!$use_locals) {
-	    print STDERR
-		"$inputfile: WARNING: referencing local function " .
-		"$ref_func for mcount\n" .
-		"\tConsider upgrading objcopy to support the globalize-" .
-		"symbols option.\n"
-		if (!$local_warn_once++);
 	    return;
 	}
 	$convert{$ref_func} = 1;
-- 
GitLab


From 1b6cced6ec9677fa65471e890dfdcb4bf5387643 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Fri, 29 Aug 2008 16:51:43 -0400
Subject: [PATCH 263/895] ftrace: stack trace add indexes

This patch adds indexes into the stack that the functions in the
stack dump were found at. As an added bonus, I also added a diff
to show which function is the most notorious consumer of the stack.

The output now looks like this:

# cat /debug/tracing/stack_trace
        Depth   Size      Location    (48 entries)
        -----   ----      --------
  0)     2476     212   blk_recount_segments+0x39/0x59
  1)     2264      12   bio_phys_segments+0x16/0x1d
  2)     2252      20   blk_rq_bio_prep+0x23/0xaf
  3)     2232      12   init_request_from_bio+0x74/0x77
  4)     2220      56   __make_request+0x294/0x331
  5)     2164     136   generic_make_request+0x34f/0x37d
  6)     2028      56   submit_bio+0xe7/0xef
  7)     1972      28   submit_bh+0xd1/0xf0
  8)     1944     112   block_read_full_page+0x299/0x2a9
  9)     1832       8   blkdev_readpage+0x14/0x16
 10)     1824      28   read_cache_page_async+0x7e/0x109
 11)     1796      16   read_cache_page+0x11/0x49
 12)     1780      32   read_dev_sector+0x3c/0x72
 13)     1748      48   read_lba+0x4d/0xaa
 14)     1700     168   efi_partition+0x85/0x61b
 15)     1532      72   rescan_partitions+0x10e/0x266
 16)     1460      40   do_open+0x1c7/0x24e
 17)     1420     292   __blkdev_get+0x79/0x84
 18)     1128      12   blkdev_get+0x12/0x14
 19)     1116      20   register_disk+0xd1/0x11e
 20)     1096      28   add_disk+0x34/0x90
 21)     1068      52   sd_probe+0x2b1/0x366
 22)     1016      20   driver_probe_device+0xa5/0x120
 23)      996       8   __device_attach+0xd/0xf
 24)      988      32   bus_for_each_drv+0x3e/0x68
 25)      956      24   device_attach+0x56/0x6c
 26)      932      16   bus_attach_device+0x26/0x4d
 27)      916      64   device_add+0x380/0x4b4
 28)      852      28   scsi_sysfs_add_sdev+0xa1/0x1c9
 29)      824     160   scsi_probe_and_add_lun+0x919/0xa2a
 30)      664      36   __scsi_add_device+0x88/0xae
 31)      628      44   ata_scsi_scan_host+0x9e/0x21c
 32)      584      28   ata_host_register+0x1cb/0x1db
 33)      556      24   ata_host_activate+0x98/0xb5
 34)      532     192   ahci_init_one+0x9bd/0x9e9
 35)      340      20   pci_device_probe+0x3e/0x5e
 36)      320      20   driver_probe_device+0xa5/0x120
 37)      300      20   __driver_attach+0x3f/0x5e
 38)      280      36   bus_for_each_dev+0x40/0x62
 39)      244      12   driver_attach+0x19/0x1b
 40)      232      28   bus_add_driver+0x9c/0x1af
 41)      204      28   driver_register+0x76/0xd2
 42)      176      20   __pci_register_driver+0x44/0x71
 43)      156       8   ahci_init+0x14/0x16
 44)      148     100   _stext+0x42/0x122
 45)       48      20   kernel_init+0x175/0x1dc
 46)       28      28   kernel_thread_helper+0x7/0x10

The first column is simply an index starting from the inner most function
and counting down to the outer most.

The next column is the location that the function was found on the stack.

The next column is the size of the stack for that function.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace_stack.c | 90 +++++++++++++++++++++++++++++++-------
 1 file changed, 73 insertions(+), 17 deletions(-)

diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 4d1e522e3fe8..74c5d9a3afae 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -16,8 +16,10 @@
 
 #define STACK_TRACE_ENTRIES 500
 
-static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES] =
-	{ [0 ... (STACK_TRACE_ENTRIES-1)] = ULONG_MAX };
+static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] =
+	 { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX };
+static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
+
 static struct stack_trace max_stack_trace = {
 	.max_entries		= STACK_TRACE_ENTRIES,
 	.entries		= stack_dump_trace,
@@ -32,8 +34,9 @@ static DEFINE_PER_CPU(int, trace_active);
 
 static inline void check_stack(void)
 {
-	unsigned long this_size;
-	unsigned long flags;
+	unsigned long this_size, flags;
+	unsigned long *p, *top, *start;
+	int i;
 
 	this_size = ((unsigned long)&this_size) & (THREAD_SIZE-1);
 	this_size = THREAD_SIZE - this_size;
@@ -51,10 +54,42 @@ static inline void check_stack(void)
 	max_stack_size = this_size;
 
 	max_stack_trace.nr_entries	= 0;
-	max_stack_trace.skip		= 1;
+	max_stack_trace.skip		= 3;
 
 	save_stack_trace(&max_stack_trace);
 
+	/*
+	 * Now find where in the stack these are.
+	 */
+	i = 0;
+	start = &this_size;
+	top = (unsigned long *)
+		(((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE);
+
+	/*
+	 * Loop through all the entries. One of the entries may
+	 * for some reason be missed on the stack, so we may
+	 * have to account for them. If they are all there, this
+	 * loop will only happen once. This code only takes place
+	 * on a new max, so it is far from a fast path.
+	 */
+	while (i < max_stack_trace.nr_entries) {
+
+		stack_dump_index[i] = this_size;
+		p = start;
+
+		for (; p < top && i < max_stack_trace.nr_entries; p++) {
+			if (*p == stack_dump_trace[i]) {
+				this_size = stack_dump_index[i++] =
+					(top - p) * sizeof(unsigned long);
+				/* Start the search from here */
+				start = p + 1;
+			}
+		}
+
+		i++;
+	}
+
  out:
 	__raw_spin_unlock(&max_stack_lock);
 	raw_local_irq_restore(flags);
@@ -145,22 +180,24 @@ static struct file_operations stack_max_size_fops = {
 static void *
 t_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	unsigned long *t = m->private;
+	long i = (long)m->private;
 
 	(*pos)++;
 
-	if (!t || *t == ULONG_MAX)
+	i++;
+
+	if (i >= max_stack_trace.nr_entries ||
+	    stack_dump_trace[i] == ULONG_MAX)
 		return NULL;
 
-	t++;
-	m->private = t;
+	m->private = (void *)i;
 
-	return t;
+	return &m->private;
 }
 
 static void *t_start(struct seq_file *m, loff_t *pos)
 {
-	unsigned long *t = m->private;
+	void *t = &m->private;
 	loff_t l = 0;
 
 	local_irq_disable();
@@ -178,14 +215,15 @@ static void t_stop(struct seq_file *m, void *p)
 	local_irq_enable();
 }
 
-static int trace_lookup_stack(struct seq_file *m, unsigned long addr)
+static int trace_lookup_stack(struct seq_file *m, long i)
 {
+	unsigned long addr = stack_dump_trace[i];
 #ifdef CONFIG_KALLSYMS
 	char str[KSYM_SYMBOL_LEN];
 
 	sprint_symbol(str, addr);
 
-	return seq_printf(m, "[<%p>] %s\n", (void*)addr, str);
+	return seq_printf(m, "%s\n", str);
 #else
 	return seq_printf(m, "%p\n", (void*)addr);
 #endif
@@ -193,12 +231,30 @@ static int trace_lookup_stack(struct seq_file *m, unsigned long addr)
 
 static int t_show(struct seq_file *m, void *v)
 {
-	unsigned long *t = v;
+	long i = *(long *)v;
+	int size;
+
+	if (i < 0) {
+		seq_printf(m, "        Depth   Size      Location"
+			   "    (%d entries)\n"
+			   "        -----   ----      --------\n",
+			   max_stack_trace.nr_entries);
+		return 0;
+	}
 
-	if (!t || *t == ULONG_MAX)
+	if (i >= max_stack_trace.nr_entries ||
+	    stack_dump_trace[i] == ULONG_MAX)
 		return 0;
 
-	trace_lookup_stack(m, *t);
+	if (i+1 == max_stack_trace.nr_entries ||
+	    stack_dump_trace[i+1] == ULONG_MAX)
+		size = stack_dump_index[i];
+	else
+		size = stack_dump_index[i] - stack_dump_index[i+1];
+
+	seq_printf(m, "%3ld) %8d   %5d   ", i, stack_dump_index[i], size);
+
+	trace_lookup_stack(m, i);
 
 	return 0;
 }
@@ -217,7 +273,7 @@ static int stack_trace_open(struct inode *inode, struct file *file)
 	ret = seq_open(file, &stack_trace_seq_ops);
 	if (!ret) {
 		struct seq_file *m = file->private_data;
-		m->private = stack_dump_trace;
+		m->private = (void *)-1;
 	}
 
 	return ret;
-- 
GitLab


From 2ff01c6a17391225a18256d510b6e5b4aba40aa1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 4 Sep 2008 15:04:37 +0200
Subject: [PATCH 264/895] stack tracer: depends on DEBUG_KERNEL

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 2a22e46390d3..5a9cffb0fafb 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -109,6 +109,7 @@ config CONTEXT_SWITCH_TRACER
 config STACK_TRACER
 	bool "Trace max stack"
 	depends on HAVE_FTRACE
+	depends on DEBUG_KERNEL
 	select FTRACE
 	select STACKTRACE
 	help
-- 
GitLab


From a6168353d1c0b24b7512e14d1c3e47ed69881a23 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Wed, 20 Aug 2008 16:36:11 -0700
Subject: [PATCH 265/895] ftrace: make output nicely spaced for up to 999 cpus

Currently some of the ftrace output goes skewiff if you have more
than 9 cpus, and some if you have more than 99.

Twiddle with the headers and format strings to make up to 999 cpus
display without causing spacing problems.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Acked-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 896e59f772c9..1801900908e1 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1409,21 +1409,21 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
 
 static void print_lat_help_header(struct seq_file *m)
 {
-	seq_puts(m, "#                _------=> CPU#            \n");
-	seq_puts(m, "#               / _-----=> irqs-off        \n");
-	seq_puts(m, "#              | / _----=> need-resched    \n");
-	seq_puts(m, "#              || / _---=> hardirq/softirq \n");
-	seq_puts(m, "#              ||| / _--=> preempt-depth   \n");
-	seq_puts(m, "#              |||| /                      \n");
-	seq_puts(m, "#              |||||     delay             \n");
-	seq_puts(m, "#  cmd     pid ||||| time  |   caller      \n");
-	seq_puts(m, "#     \\   /    |||||   \\   |   /           \n");
+	seq_puts(m, "#                  _------=> CPU#            \n");
+	seq_puts(m, "#                 / _-----=> irqs-off        \n");
+	seq_puts(m, "#                | / _----=> need-resched    \n");
+	seq_puts(m, "#                || / _---=> hardirq/softirq \n");
+	seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
+	seq_puts(m, "#                |||| /                      \n");
+	seq_puts(m, "#                |||||     delay             \n");
+	seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
+	seq_puts(m, "#     \\   /      |||||   \\   |   /           \n");
 }
 
 static void print_func_help_header(struct seq_file *m)
 {
-	seq_puts(m, "#           TASK-PID   CPU#    TIMESTAMP  FUNCTION\n");
-	seq_puts(m, "#              | |      |          |         |\n");
+	seq_puts(m, "#           TASK-PID    CPU#    TIMESTAMP  FUNCTION\n");
+	seq_puts(m, "#              | |       |          |         |\n");
 }
 
 
@@ -1508,7 +1508,7 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
 	comm = trace_find_cmdline(field->pid);
 
 	trace_seq_printf(s, "%8.8s-%-5d ", comm, field->pid);
-	trace_seq_printf(s, "%d", cpu);
+	trace_seq_printf(s, "%3d", cpu);
 	trace_seq_printf(s, "%c%c",
 			(field->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.',
 			((field->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
@@ -1598,7 +1598,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 
 	if (verbose) {
 		comm = trace_find_cmdline(field->pid);
-		trace_seq_printf(s, "%16s %5d %d %d %08x %08x [%08lx]"
+		trace_seq_printf(s, "%16s %5d %3d %d %08x %08x [%08lx]"
 				 " %ld.%03ldms (+%ld.%03ldms): ",
 				 comm,
 				 field->pid, cpu, field->flags,
@@ -1694,7 +1694,7 @@ static int print_trace_fmt(struct trace_iterator *iter)
 	ret = trace_seq_printf(s, "%16s-%-5d ", comm, field->pid);
 	if (!ret)
 		return 0;
-	ret = trace_seq_printf(s, "[%02d] ", iter->cpu);
+	ret = trace_seq_printf(s, "[%03d] ", iter->cpu);
 	if (!ret)
 		return 0;
 	ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
-- 
GitLab


From 652567aa2000f1d4a1fd434382a30d8dd4a7c980 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Wed, 3 Sep 2008 17:42:50 -0400
Subject: [PATCH 266/895] ftrace: binary and not logical for continue test

Peter Zijlstra provided me with a nice brown paper bag while letting me know
that I was doing a logical AND and not a binary one, making a condition
true more often than it should be.

Luckily, a false true is handled by the calling function and no harm is
done. But this needs to be fixed regardless.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 1801900908e1..9639e45f0860 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1655,7 +1655,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 	case TRACE_PRINT:
 		seq_print_ip_sym(s, field->print.ip, sym_flags);
 		trace_seq_printf(s, ": %s", field->print.buf);
-		if (field->flags && TRACE_FLAG_CONT)
+		if (field->flags & TRACE_FLAG_CONT)
 			trace_seq_print_cont(s, iter);
 		break;
 	default:
@@ -1768,7 +1768,7 @@ static int print_trace_fmt(struct trace_iterator *iter)
 	case TRACE_PRINT:
 		seq_print_ip_sym(s, field->print.ip, sym_flags);
 		trace_seq_printf(s, ": %s", field->print.buf);
-		if (field->flags && TRACE_FLAG_CONT)
+		if (field->flags & TRACE_FLAG_CONT)
 			trace_seq_print_cont(s, iter);
 		break;
 	}
@@ -1833,7 +1833,7 @@ static int print_raw_fmt(struct trace_iterator *iter)
 	case TRACE_PRINT:
 		trace_seq_printf(s, "# %lx %s",
 				 field->print.ip, field->print.buf);
-		if (field->flags && TRACE_FLAG_CONT)
+		if (field->flags & TRACE_FLAG_CONT)
 			trace_seq_print_cont(s, iter);
 		break;
 	}
-- 
GitLab


From 5a90f577e5369a84b720ead42e621fcb1b8a8b21 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Wed, 3 Sep 2008 17:42:51 -0400
Subject: [PATCH 267/895] ftrace: print continue index fix

An item in the trace buffer that is bigger than one entry may be split
up using the TRACE_CONT entry. This makes it a virtual single entry.
The current code increments the iterator index even while traversing
TRACE_CONT entries, making it look like the iterator is further than
it actually is.

This patch adds code to not increment the iterator index while skipping
over TRACE_CONT entries.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 9639e45f0860..d24101cfc425 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1117,9 +1117,8 @@ trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data,
 }
 
 /* Increment the index counter of an iterator by one */
-static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
+static void __trace_iterator_increment(struct trace_iterator *iter, int cpu)
 {
-	iter->idx++;
 	iter->next_idx[cpu]++;
 	iter->next_page_idx[cpu]++;
 
@@ -1132,6 +1131,12 @@ static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
 	}
 }
 
+static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
+{
+	iter->idx++;
+	__trace_iterator_increment(iter, cpu);
+}
+
 static struct trace_entry *
 trace_entry_next(struct trace_array *tr, struct trace_array_cpu *data,
 		 struct trace_iterator *iter, int cpu)
@@ -1153,7 +1158,7 @@ trace_entry_next(struct trace_array *tr, struct trace_array_cpu *data,
 
 	/* find a real entry */
 	do {
-		trace_iterator_increment(iter, cpu);
+		__trace_iterator_increment(iter, cpu);
 		ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
 	} while (ent && ent->type != TRACE_CONT);
 
@@ -1187,7 +1192,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, int inc)
 				ent = trace_entry_next(tr, data, iter, cpu);
 			else {
 				while (ent && ent->type == TRACE_CONT) {
-					trace_iterator_increment(iter, cpu);
+					__trace_iterator_increment(iter, cpu);
 					ent = trace_entry_idx(tr, tr->data[cpu],
 							      iter, cpu);
 				}
@@ -1566,7 +1571,7 @@ trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
 
 	do {
 		trace_seq_printf(s, "%s", ent->cont.buf);
-		trace_iterator_increment(iter, iter->cpu);
+		__trace_iterator_increment(iter, iter->cpu);
 		ent = trace_entry_idx(tr, data, iter, iter->cpu);
 	} while (ent && ent->type == TRACE_CONT);
 }
-- 
GitLab


From f09ce573f57ddc35c67b39e51f34545877b30031 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 4 Sep 2008 10:24:14 +0200
Subject: [PATCH 268/895] ftrace: make ftrace_printk usable with the other
 tracers

Currently ftrace_printk only works with the ftrace tracer, switch it to an
iter_ctrl setting so we can make us of them with other tracers too.

[rostedt@redhat.com: tweak to the disable condition]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c | 8 +++++---
 kernel/trace/trace.h | 1 +
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d24101cfc425..8a00d6c5c0f5 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -235,6 +235,7 @@ static const char *trace_options[] = {
 	"block",
 	"stacktrace",
 	"sched-tree",
+	"ftrace_printk",
 	NULL
 };
 
@@ -3091,9 +3092,10 @@ static __init void tracer_init_debugfs(void)
 
 int __ftrace_printk(unsigned long ip, const char *fmt, ...)
 {
-	struct trace_array *tr = &global_trace;
 	static DEFINE_SPINLOCK(trace_buf_lock);
 	static char trace_buf[TRACE_BUF_SIZE];
+
+	struct trace_array *tr = &global_trace;
 	struct trace_array_cpu *data;
 	struct trace_entry *entry;
 	unsigned long flags;
@@ -3101,7 +3103,7 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...)
 	va_list ap;
 	int cpu, len = 0, write, written = 0;
 
-	if (likely(!ftrace_function_enabled))
+	if (!(trace_flags & TRACE_ITER_PRINTK) || !tr->ctrl || tracing_disabled)
 		return 0;
 
 	local_irq_save(flags);
@@ -3109,7 +3111,7 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...)
 	data = tr->data[cpu];
 	disabled = atomic_inc_return(&data->disabled);
 
-	if (unlikely(disabled != 1 || !ftrace_function_enabled))
+	if (unlikely(disabled != 1))
 		goto out;
 
 	spin_lock(&trace_buf_lock);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 50b6d7a6f01a..5f54c875c8be 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -356,6 +356,7 @@ enum trace_iterator_flags {
 	TRACE_ITER_BLOCK		= 0x80,
 	TRACE_ITER_STACKTRACE		= 0x100,
 	TRACE_ITER_SCHED_TREE		= 0x200,
+	TRACE_ITER_PRINTK		= 0x400,
 };
 
 #endif /* _LINUX_KERNEL_TRACE_H */
-- 
GitLab


From 80b5e940050c273ba277acbf3a9fbc1d4441e681 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 4 Sep 2008 10:24:16 +0200
Subject: [PATCH 269/895] ftrace: sched_switch: show the wakee's cpu

While profiling the smp behaviour of the scheduler it was needed to know to
which cpu a task got woken.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c | 12 +++++++++---
 kernel/trace/trace.h |  1 +
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8a00d6c5c0f5..7e6cb4fe62f2 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -977,6 +977,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
 	entry->field.ctx.next_pid	= next->pid;
 	entry->field.ctx.next_prio	= next->prio;
 	entry->field.ctx.next_state	= next->state;
+	entry->field.ctx.next_cpu	= task_cpu(next);
 	__trace_stack(tr, data, flags, 5);
 	__raw_spin_unlock(&data->lock);
 	raw_local_irq_restore(irq_flags);
@@ -1003,6 +1004,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
 	entry->field.ctx.next_pid	= wakee->pid;
 	entry->field.ctx.next_prio	= wakee->prio;
 	entry->field.ctx.next_state	= wakee->state;
+	entry->field.ctx.next_cpu	= task_cpu(wakee);
 	__trace_stack(tr, data, flags, 6);
 	__raw_spin_unlock(&data->lock);
 	raw_local_irq_restore(irq_flags);
@@ -1636,10 +1638,11 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 			__ffs(field->ctx.prev_state) + 1 : 0;
 		S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X';
 		comm = trace_find_cmdline(field->ctx.next_pid);
-		trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c %s\n",
+		trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
 				 field->ctx.prev_pid,
 				 field->ctx.prev_prio,
 				 S, entry->type == TRACE_CTX ? "==>" : "  +",
+				 field->ctx.next_cpu,
 				 field->ctx.next_pid,
 				 field->ctx.next_prio,
 				 T, comm);
@@ -1736,11 +1739,12 @@ static int print_trace_fmt(struct trace_iterator *iter)
 			state_to_char[field->ctx.prev_state] : 'X';
 		T = field->ctx.next_state < sizeof(state_to_char) ?
 			state_to_char[field->ctx.next_state] : 'X';
-		ret = trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c\n",
+		ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n",
 				       field->ctx.prev_pid,
 				       field->ctx.prev_prio,
 				       S,
 				       entry->type == TRACE_CTX ? "==>" : "  +",
+				       field->ctx.next_cpu,
 				       field->ctx.next_pid,
 				       field->ctx.next_prio,
 				       T);
@@ -1817,10 +1821,11 @@ static int print_raw_fmt(struct trace_iterator *iter)
 			state_to_char[field->ctx.next_state] : 'X';
 		if (entry->type == TRACE_WAKE)
 			S = '+';
-		ret = trace_seq_printf(s, "%d %d %c %d %d %c\n",
+		ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
 				       field->ctx.prev_pid,
 				       field->ctx.prev_prio,
 				       S,
+				       field->ctx.next_cpu,
 				       field->ctx.next_pid,
 				       field->ctx.next_prio,
 				       T);
@@ -1893,6 +1898,7 @@ static int print_hex_fmt(struct trace_iterator *iter)
 		SEQ_PUT_HEX_FIELD_RET(s, field->ctx.prev_pid);
 		SEQ_PUT_HEX_FIELD_RET(s, field->ctx.prev_prio);
 		SEQ_PUT_HEX_FIELD_RET(s, S);
+		SEQ_PUT_HEX_FIELD_RET(s, field->ctx.next_cpu);
 		SEQ_PUT_HEX_FIELD_RET(s, field->ctx.next_pid);
 		SEQ_PUT_HEX_FIELD_RET(s, field->ctx.next_prio);
 		SEQ_PUT_HEX_FIELD_RET(s, T);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 5f54c875c8be..77c265f6a779 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -41,6 +41,7 @@ struct ctx_switch_entry {
 	unsigned int		next_pid;
 	unsigned char		next_prio;
 	unsigned char		next_state;
+	unsigned int		next_cpu;
 };
 
 /*
-- 
GitLab


From d3ee6d992821f471193a7ee7a00af9ebb4bf5d01 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 4 Sep 2008 14:04:51 +0200
Subject: [PATCH 270/895] ftrace: make it depend on DEBUG_KERNEL

make most of the tracers depend on DEBUG_KERNEL - that's their intended
purpose. (most distributions have DEBUG_KERNEL enabled anyway so this is
not a practical limitation - but it simplifies the tracing menu in the
normal case)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/Kconfig | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 5a9cffb0fafb..16e5bb5daaa5 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -22,6 +22,7 @@ config TRACING
 config FTRACE
 	bool "Kernel Function Tracer"
 	depends on HAVE_FTRACE
+	depends on DEBUG_KERNEL
 	select FRAME_POINTER
 	select TRACING
 	select CONTEXT_SWITCH_TRACER
@@ -40,6 +41,7 @@ config IRQSOFF_TRACER
 	depends on TRACE_IRQFLAGS_SUPPORT
 	depends on GENERIC_TIME
 	depends on HAVE_FTRACE
+	depends on DEBUG_KERNEL
 	select TRACE_IRQFLAGS
 	select TRACING
 	select TRACER_MAX_TRACE
@@ -63,6 +65,7 @@ config PREEMPT_TRACER
 	depends on GENERIC_TIME
 	depends on PREEMPT
 	depends on HAVE_FTRACE
+	depends on DEBUG_KERNEL
 	select TRACING
 	select TRACER_MAX_TRACE
 	help
@@ -90,6 +93,7 @@ config SYSPROF_TRACER
 config SCHED_TRACER
 	bool "Scheduling Latency Tracer"
 	depends on HAVE_FTRACE
+	depends on DEBUG_KERNEL
 	select TRACING
 	select CONTEXT_SWITCH_TRACER
 	select TRACER_MAX_TRACE
@@ -100,6 +104,7 @@ config SCHED_TRACER
 config CONTEXT_SWITCH_TRACER
 	bool "Trace process context switches"
 	depends on HAVE_FTRACE
+	depends on DEBUG_KERNEL
 	select TRACING
 	select MARKERS
 	help
@@ -120,6 +125,7 @@ config DYNAMIC_FTRACE
 	bool "enable/disable ftrace tracepoints dynamically"
 	depends on FTRACE
 	depends on HAVE_DYNAMIC_FTRACE
+	depends on DEBUG_KERNEL
 	default y
 	help
          This option will modify all the calls to ftrace dynamically
@@ -146,6 +152,7 @@ config FTRACE_SELFTEST
 config FTRACE_STARTUP_TEST
 	bool "Perform a startup test on ftrace"
 	depends on TRACING
+	depends on DEBUG_KERNEL
 	select FTRACE_SELFTEST
 	help
 	  This option performs a series of startup tests on ftrace. On bootup
-- 
GitLab


From c0719e5a4b1ccc04180b7a7b71095c9fb7131919 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Sat, 6 Sep 2008 01:06:03 -0400
Subject: [PATCH 271/895] ftrace: use ftrace_release for all dynamic ftrace
 functions

ftrace_release is necessary for all uses of dynamic ftrace and not just
the archs that have CONFIG_FTRACE_MCOUNT_RECORD defined.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 8b4cf38c80d2..5de9903645d5 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -77,8 +77,10 @@ extern void mcount_call(void);
 
 extern int skip_trace(unsigned long ip);
 
-void ftrace_disable_daemon(void);
-void ftrace_enable_daemon(void);
+extern void ftrace_release(void *start, unsigned long size);
+
+extern void ftrace_disable_daemon(void);
+extern void ftrace_enable_daemon(void);
 
 #else
 # define skip_trace(ip)				({ 0; })
@@ -86,6 +88,7 @@ void ftrace_enable_daemon(void);
 # define ftrace_set_filter(buf, len, reset)	do { } while (0)
 # define ftrace_disable_daemon()		do { } while (0)
 # define ftrace_enable_daemon()			do { } while (0)
+static inline void ftrace_release(void *start, unsigned long size) { }
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
 /* totally disable ftrace - can not re-enable after this */
@@ -199,12 +202,10 @@ static inline void ftrace_dump(void) { }
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
 extern void ftrace_init(void);
 extern void ftrace_init_module(unsigned long *start, unsigned long *end);
-extern void ftrace_release(void *start, unsigned long size);
 #else
 static inline void ftrace_init(void) { }
 static inline void
 ftrace_init_module(unsigned long *start, unsigned long *end) { }
-static inline void ftrace_release(void *start, unsigned long size) { }
 #endif
 
 
-- 
GitLab


From 644f991d4b920ab1f5043509651479420b293490 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Sat, 6 Sep 2008 01:06:04 -0400
Subject: [PATCH 272/895] ftrace: fix unlocking of hash

This must be brown paper bag week for Steven Rostedt!

While working on ftrace for PPC, I discovered that the hash locking done
when CONFIG_FTRACE_MCOUNT_RECORD is not set, is totally incorrect.

With a cut and paste error, I had the hash lock macro to lock for both
hash_lock _and_ hash_unlock!

This bug did not affect x86 since this bug was introduced when
CONFIG_FTRACE_MCOUNT_RECORD was added to x86.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ftrace.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index b69966f0f144..c9e09d86e1d8 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -170,7 +170,8 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
  */
 static DEFINE_SPINLOCK(ftrace_hash_lock);
 #define ftrace_hash_lock(flags)	  spin_lock_irqsave(&ftrace_hash_lock, flags)
-#define ftrace_hash_unlock(flags) spin_lock_irqsave(&ftrace_hash_lock, flags)
+#define ftrace_hash_unlock(flags) \
+			spin_unlock_irqrestore(&ftrace_hash_lock, flags)
 #else
 /* This is protected via the ftrace_lock with MCOUNT_RECORD. */
 #define ftrace_hash_lock(flags)   do { (void)(flags); } while (0)
-- 
GitLab


From bbe5c7830c6dbde58726d44ec0337bc8b2d95d37 Mon Sep 17 00:00:00 2001
From: Pekka Paalanen <pq@iki.fi>
Date: Tue, 16 Sep 2008 21:54:16 +0300
Subject: [PATCH 273/895] x86 mmiotrace: fix a rare memory leak

Signed-off-by: Pekka Paalanen <pq@iki.fi>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/mmio-mod.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index 635b50e85581..754bd1eaf4f6 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -307,8 +307,10 @@ static void ioremap_trace_core(resource_size_t offset, unsigned long size,
 	map.map_id = trace->id;
 
 	spin_lock_irq(&trace_lock);
-	if (!is_enabled())
+	if (!is_enabled()) {
+		kfree(trace);
 		goto not_enabled;
+	}
 
 	mmio_trace_mapping(&map);
 	list_add_tail(&trace->list, &trace_list);
-- 
GitLab


From 45dcd8b8a8ca855591e3ac882d3a7fc255d09d43 Mon Sep 17 00:00:00 2001
From: Pekka Paalanen <pq@iki.fi>
Date: Tue, 16 Sep 2008 21:56:41 +0300
Subject: [PATCH 274/895] ftrace: move mmiotrace functions out of trace.c

Moves the mmiotrace specific functions from trace.c to
trace_mmiotrace.c. Functions trace_wake_up(), tracing_get_trace_entry(),
and tracing_generic_entry_update() are therefore made available outside
trace.c.

Signed-off-by: Pekka Paalanen <pq@iki.fi>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c           | 46 ++--------------------------------
 kernel/trace/trace.h           | 15 +++++------
 kernel/trace/trace_mmiotrace.c | 42 +++++++++++++++++++++++++++++++
 3 files changed, 50 insertions(+), 53 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 7e6cb4fe62f2..d372bc535963 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -785,7 +785,7 @@ trace_next_page(struct trace_array_cpu *data, void *addr)
 	return page_address(page);
 }
 
-static inline struct trace_entry *
+struct trace_entry *
 tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data)
 {
 	unsigned long idx, idx_next;
@@ -821,7 +821,7 @@ tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data)
 	return entry;
 }
 
-static inline void
+void
 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
 {
 	struct task_struct *tsk = current;
@@ -865,48 +865,6 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data,
 		trace_function(tr, data, ip, parent_ip, flags);
 }
 
-#ifdef CONFIG_MMIOTRACE
-void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_array_cpu *data,
-						struct mmiotrace_rw *rw)
-{
-	struct trace_entry *entry;
-	unsigned long irq_flags;
-
-	raw_local_irq_save(irq_flags);
-	__raw_spin_lock(&data->lock);
-
-	entry				= tracing_get_trace_entry(tr, data);
-	tracing_generic_entry_update(entry, 0);
-	entry->type			= TRACE_MMIO_RW;
-	entry->field.mmiorw		= *rw;
-
-	__raw_spin_unlock(&data->lock);
-	raw_local_irq_restore(irq_flags);
-
-	trace_wake_up();
-}
-
-void __trace_mmiotrace_map(struct trace_array *tr, struct trace_array_cpu *data,
-						struct mmiotrace_map *map)
-{
-	struct trace_entry *entry;
-	unsigned long irq_flags;
-
-	raw_local_irq_save(irq_flags);
-	__raw_spin_lock(&data->lock);
-
-	entry				= tracing_get_trace_entry(tr, data);
-	tracing_generic_entry_update(entry, 0);
-	entry->type			= TRACE_MMIO_MAP;
-	entry->field.mmiomap		= *map;
-
-	__raw_spin_unlock(&data->lock);
-	raw_local_irq_restore(irq_flags);
-
-	trace_wake_up();
-}
-#endif
-
 void __trace_stack(struct trace_array *tr,
 		   struct trace_array_cpu *data,
 		   unsigned long flags,
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 77c265f6a779..9d39aa00a9c6 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -213,11 +213,17 @@ struct trace_iterator {
 	long			idx;
 };
 
+void trace_wake_up(void);
 void tracing_reset(struct trace_array_cpu *data);
 int tracing_open_generic(struct inode *inode, struct file *filp);
 struct dentry *tracing_init_dentry(void);
 void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
 
+struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
+						struct trace_array_cpu *data);
+void tracing_generic_entry_update(struct trace_entry *entry,
+						unsigned long flags);
+
 void ftrace(struct trace_array *tr,
 			    struct trace_array_cpu *data,
 			    unsigned long ip,
@@ -291,15 +297,6 @@ extern unsigned long ftrace_update_tot_cnt;
 extern int DYN_FTRACE_TEST_NAME(void);
 #endif
 
-#ifdef CONFIG_MMIOTRACE
-extern void __trace_mmiotrace_rw(struct trace_array *tr,
-				struct trace_array_cpu *data,
-				struct mmiotrace_rw *rw);
-extern void __trace_mmiotrace_map(struct trace_array *tr,
-				struct trace_array_cpu *data,
-				struct mmiotrace_map *map);
-#endif
-
 #ifdef CONFIG_FTRACE_STARTUP_TEST
 #ifdef CONFIG_FTRACE
 extern int trace_selftest_startup_function(struct tracer *trace,
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 9b7a936f4b1f..ef02747b26d9 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -276,6 +276,27 @@ __init static int init_mmio_trace(void)
 }
 device_initcall(init_mmio_trace);
 
+static void __trace_mmiotrace_rw(struct trace_array *tr,
+				struct trace_array_cpu *data,
+				struct mmiotrace_rw *rw)
+{
+	struct trace_entry *entry;
+	unsigned long irq_flags;
+
+	raw_local_irq_save(irq_flags);
+	__raw_spin_lock(&data->lock);
+
+	entry				= tracing_get_trace_entry(tr, data);
+	tracing_generic_entry_update(entry, 0);
+	entry->type			= TRACE_MMIO_RW;
+	entry->field.mmiorw		= *rw;
+
+	__raw_spin_unlock(&data->lock);
+	raw_local_irq_restore(irq_flags);
+
+	trace_wake_up();
+}
+
 void mmio_trace_rw(struct mmiotrace_rw *rw)
 {
 	struct trace_array *tr = mmio_trace_array;
@@ -283,6 +304,27 @@ void mmio_trace_rw(struct mmiotrace_rw *rw)
 	__trace_mmiotrace_rw(tr, data, rw);
 }
 
+static void __trace_mmiotrace_map(struct trace_array *tr,
+				struct trace_array_cpu *data,
+				struct mmiotrace_map *map)
+{
+	struct trace_entry *entry;
+	unsigned long irq_flags;
+
+	raw_local_irq_save(irq_flags);
+	__raw_spin_lock(&data->lock);
+
+	entry				= tracing_get_trace_entry(tr, data);
+	tracing_generic_entry_update(entry, 0);
+	entry->type			= TRACE_MMIO_MAP;
+	entry->field.mmiomap		= *map;
+
+	__raw_spin_unlock(&data->lock);
+	raw_local_irq_restore(irq_flags);
+
+	trace_wake_up();
+}
+
 void mmio_trace_mapping(struct mmiotrace_map *map)
 {
 	struct trace_array *tr = mmio_trace_array;
-- 
GitLab


From 801fe40001dfc263848552fb28924b766ed44ea4 Mon Sep 17 00:00:00 2001
From: Pekka Paalanen <pq@iki.fi>
Date: Tue, 16 Sep 2008 21:58:24 +0300
Subject: [PATCH 275/895] ftrace: add trace_vprintk()

trace_vprintk() for easier implementation of tracer specific *_printk
functions. Add check check for no_tracer, and implement
__ftrace_printk() as a wrapper.

Signed-off-by: Pekka Paalanen <pq@iki.fi>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c | 24 ++++++++++++++++++------
 kernel/trace/trace.h |  1 +
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d372bc535963..406de9cf2820 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3054,7 +3054,7 @@ static __init void tracer_init_debugfs(void)
 	(sizeof(struct trace_field) - offsetof(struct trace_field, print.buf))
 #define TRACE_CONT_BUF_SIZE sizeof(struct trace_field)
 
-int __ftrace_printk(unsigned long ip, const char *fmt, ...)
+int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 {
 	static DEFINE_SPINLOCK(trace_buf_lock);
 	static char trace_buf[TRACE_BUF_SIZE];
@@ -3064,10 +3064,9 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...)
 	struct trace_entry *entry;
 	unsigned long flags;
 	long disabled;
-	va_list ap;
 	int cpu, len = 0, write, written = 0;
 
-	if (!(trace_flags & TRACE_ITER_PRINTK) || !tr->ctrl || tracing_disabled)
+	if (current_trace == &no_tracer || !tr->ctrl || tracing_disabled)
 		return 0;
 
 	local_irq_save(flags);
@@ -3079,9 +3078,7 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...)
 		goto out;
 
 	spin_lock(&trace_buf_lock);
-	va_start(ap, fmt);
-	len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, ap);
-	va_end(ap);
+	len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
 
 	len = min(len, TRACE_BUF_SIZE-1);
 	trace_buf[len] = 0;
@@ -3120,6 +3117,21 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...)
 
 	return len;
 }
+EXPORT_SYMBOL_GPL(trace_vprintk);
+
+int __ftrace_printk(unsigned long ip, const char *fmt, ...)
+{
+	int ret;
+	va_list ap;
+
+	if (!(trace_flags & TRACE_ITER_PRINTK))
+		return 0;
+
+	va_start(ap, fmt);
+	ret = trace_vprintk(ip, fmt, ap);
+	va_end(ap);
+	return ret;
+}
 EXPORT_SYMBOL_GPL(__ftrace_printk);
 
 static int trace_panic_handler(struct notifier_block *this,
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 9d39aa00a9c6..be3b3cf95f4b 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -333,6 +333,7 @@ extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
 extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
 				 size_t cnt);
 extern long ns2usecs(cycle_t nsec);
+extern int trace_vprintk(unsigned long ip, const char *fmt, va_list args);
 
 extern unsigned long trace_flags;
 
-- 
GitLab


From 9e57fb35d711331a9b1410c5c56ebeb3733428a0 Mon Sep 17 00:00:00 2001
From: Pekka Paalanen <pq@iki.fi>
Date: Tue, 16 Sep 2008 22:00:34 +0300
Subject: [PATCH 276/895] x86 mmiotrace: implement mmiotrace_printk()

Offer mmiotrace users a function to inject markers from inside the kernel.
This depends on the trace_vprintk() patch.

Signed-off-by: Pekka Paalanen <pq@iki.fi>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/mmio-mod.c         | 19 ++++++++++++++++++-
 arch/x86/mm/testmmiotrace.c    |  4 ++++
 include/linux/mmiotrace.h      | 17 +++++++++++++++--
 kernel/trace/trace_mmiotrace.c |  5 +++++
 4 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index 754bd1eaf4f6..5e2e2e72ee80 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -75,7 +75,7 @@ static LIST_HEAD(trace_list);		/* struct remap_trace */
  *   and trace_lock.
  * - Routines depending on is_enabled() must take trace_lock.
  * - trace_list users must hold trace_lock.
- * - is_enabled() guarantees that mmio_trace_record is allowed.
+ * - is_enabled() guarantees that mmio_trace_{rw,mapping} are allowed.
  * - pre/post callbacks assume the effect of is_enabled() being true.
  */
 
@@ -379,6 +379,23 @@ void mmiotrace_iounmap(volatile void __iomem *addr)
 		iounmap_trace_core(addr);
 }
 
+int mmiotrace_printk(const char *fmt, ...)
+{
+	int ret = 0;
+	va_list args;
+	unsigned long flags;
+	va_start(args, fmt);
+
+	spin_lock_irqsave(&trace_lock, flags);
+	if (is_enabled())
+		ret = mmio_trace_printk(fmt, args);
+	spin_unlock_irqrestore(&trace_lock, flags);
+
+	va_end(args);
+	return ret;
+}
+EXPORT_SYMBOL(mmiotrace_printk);
+
 static void clear_trace_list(void)
 {
 	struct remap_trace *trace;
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c
index d877c5b423ef..ab50a8d7402c 100644
--- a/arch/x86/mm/testmmiotrace.c
+++ b/arch/x86/mm/testmmiotrace.c
@@ -3,6 +3,7 @@
  */
 #include <linux/module.h>
 #include <linux/io.h>
+#include <linux/mmiotrace.h>
 
 #define MODULE_NAME "testmmiotrace"
 
@@ -13,6 +14,7 @@ MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB.");
 static void do_write_test(void __iomem *p)
 {
 	unsigned int i;
+	mmiotrace_printk("Write test.\n");
 	for (i = 0; i < 256; i++)
 		iowrite8(i, p + i);
 	for (i = 1024; i < (5 * 1024); i += 2)
@@ -24,6 +26,7 @@ static void do_write_test(void __iomem *p)
 static void do_read_test(void __iomem *p)
 {
 	unsigned int i;
+	mmiotrace_printk("Read test.\n");
 	for (i = 0; i < 256; i++)
 		ioread8(p + i);
 	for (i = 1024; i < (5 * 1024); i += 2)
@@ -39,6 +42,7 @@ static void do_test(void)
 		pr_err(MODULE_NAME ": could not ioremap, aborting.\n");
 		return;
 	}
+	mmiotrace_printk("ioremap returned %p.\n", p);
 	do_write_test(p);
 	do_read_test(p);
 	iounmap(p);
diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h
index 61d19e1b7a0b..60cc3bf5c538 100644
--- a/include/linux/mmiotrace.h
+++ b/include/linux/mmiotrace.h
@@ -34,11 +34,15 @@ extern void unregister_kmmio_probe(struct kmmio_probe *p);
 /* Called from page fault handler. */
 extern int kmmio_handler(struct pt_regs *regs, unsigned long addr);
 
-/* Called from ioremap.c */
 #ifdef CONFIG_MMIOTRACE
+/* Called from ioremap.c */
 extern void mmiotrace_ioremap(resource_size_t offset, unsigned long size,
 							void __iomem *addr);
 extern void mmiotrace_iounmap(volatile void __iomem *addr);
+
+/* For anyone to insert markers. Remember trailing newline. */
+extern int mmiotrace_printk(const char *fmt, ...)
+				__attribute__ ((format (printf, 1, 2)));
 #else
 static inline void mmiotrace_ioremap(resource_size_t offset,
 					unsigned long size, void __iomem *addr)
@@ -48,7 +52,15 @@ static inline void mmiotrace_ioremap(resource_size_t offset,
 static inline void mmiotrace_iounmap(volatile void __iomem *addr)
 {
 }
-#endif /* CONFIG_MMIOTRACE_HOOKS */
+
+static inline int mmiotrace_printk(const char *fmt, ...)
+				__attribute__ ((format (printf, 1, 0)));
+
+static inline int mmiotrace_printk(const char *fmt, ...)
+{
+	return 0;
+}
+#endif /* CONFIG_MMIOTRACE */
 
 enum mm_io_opcode {
 	MMIO_READ = 0x1,     /* struct mmiotrace_rw */
@@ -81,5 +93,6 @@ extern void enable_mmiotrace(void);
 extern void disable_mmiotrace(void);
 extern void mmio_trace_rw(struct mmiotrace_rw *rw);
 extern void mmio_trace_mapping(struct mmiotrace_map *map);
+extern int mmio_trace_printk(const char *fmt, va_list args);
 
 #endif /* MMIOTRACE_H */
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index ef02747b26d9..767d1faf56e5 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -335,3 +335,8 @@ void mmio_trace_mapping(struct mmiotrace_map *map)
 	__trace_mmiotrace_map(tr, data, map);
 	preempt_enable();
 }
+
+int mmio_trace_printk(const char *fmt, va_list args)
+{
+	return trace_vprintk(0, fmt, args);
+}
-- 
GitLab


From fc5e27ae4b45a0619701a83f30d9b7fad7ed9400 Mon Sep 17 00:00:00 2001
From: Pekka Paalanen <pq@iki.fi>
Date: Tue, 16 Sep 2008 22:02:27 +0300
Subject: [PATCH 277/895] mmiotrace: handle TRACE_PRINT entries

Also make trace_seq_print_cont() non-static, and add a newline if the
seq buffer can't hold all data.

Signed-off-by: Pekka Paalanen <pq@iki.fi>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c           | 31 +++++++++++--------------------
 kernel/trace/trace.h           | 19 +++++++++++++++++++
 kernel/trace/trace_mmiotrace.c | 23 +++++++++++++++++++++++
 3 files changed, 53 insertions(+), 20 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 406de9cf2820..7e7154f77009 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -199,23 +199,6 @@ unsigned long nsecs_to_usecs(unsigned long nsecs)
 	return nsecs / 1000;
 }
 
-/*
- * trace_flag_type is an enumeration that holds different
- * states when a trace occurs. These are:
- *  IRQS_OFF	- interrupts were disabled
- *  NEED_RESCED - reschedule is requested
- *  HARDIRQ	- inside an interrupt handler
- *  SOFTIRQ	- inside a softirq handler
- *  CONT	- multiple entries hold the trace item
- */
-enum trace_flag_type {
-	TRACE_FLAG_IRQS_OFF		= 0x01,
-	TRACE_FLAG_NEED_RESCHED		= 0x02,
-	TRACE_FLAG_HARDIRQ		= 0x04,
-	TRACE_FLAG_SOFTIRQ		= 0x08,
-	TRACE_FLAG_CONT			= 0x10,
-};
-
 /*
  * TRACE_ITER_SYM_MASK masks the options in trace_flags that
  * control the output of kernel symbols.
@@ -1517,12 +1500,16 @@ lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs,
 
 static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
 
-static void
-trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
+/*
+ * The message is supposed to contain an ending newline.
+ * If the printing stops prematurely, try to add a newline of our own.
+ */
+void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
 {
 	struct trace_array *tr = iter->tr;
 	struct trace_array_cpu *data = tr->data[iter->cpu];
 	struct trace_entry *ent;
+	bool ok = true;
 
 	ent = trace_entry_idx(tr, data, iter, iter->cpu);
 	if (!ent || ent->type != TRACE_CONT) {
@@ -1531,10 +1518,14 @@ trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
 	}
 
 	do {
-		trace_seq_printf(s, "%s", ent->cont.buf);
+		if (ok)
+			ok = (trace_seq_printf(s, "%s", ent->cont.buf) > 0);
 		__trace_iterator_increment(iter, iter->cpu);
 		ent = trace_entry_idx(tr, data, iter, iter->cpu);
 	} while (ent && ent->type == TRACE_CONT);
+
+	if (!ok)
+		trace_seq_putc(s, '\n');
 }
 
 static int
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index be3b3cf95f4b..648433d18ccb 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -71,6 +71,23 @@ struct print_entry {
 	char			buf[];
 };
 
+/*
+ * trace_flag_type is an enumeration that holds different
+ * states when a trace occurs. These are:
+ *  IRQS_OFF	- interrupts were disabled
+ *  NEED_RESCED - reschedule is requested
+ *  HARDIRQ	- inside an interrupt handler
+ *  SOFTIRQ	- inside a softirq handler
+ *  CONT	- multiple entries hold the trace item
+ */
+enum trace_flag_type {
+	TRACE_FLAG_IRQS_OFF		= 0x01,
+	TRACE_FLAG_NEED_RESCHED		= 0x02,
+	TRACE_FLAG_HARDIRQ		= 0x04,
+	TRACE_FLAG_SOFTIRQ		= 0x08,
+	TRACE_FLAG_CONT			= 0x10,
+};
+
 /*
  * The trace field - the most basic unit of tracing. This is what
  * is printed in the end as a single line in the trace output, such as:
@@ -330,6 +347,8 @@ extern int trace_selftest_startup_sysprof(struct tracer *trace,
 
 extern void *head_page(struct trace_array_cpu *data);
 extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
+extern void trace_seq_print_cont(struct trace_seq *s,
+				 struct trace_iterator *iter);
 extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
 				 size_t cnt);
 extern long ns2usecs(cycle_t nsec);
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 767d1faf56e5..a108c326f36e 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -245,6 +245,27 @@ static int mmio_print_map(struct trace_iterator *iter)
 	return 0;
 }
 
+static int mmio_print_mark(struct trace_iterator *iter)
+{
+	struct trace_entry *entry = iter->ent;
+	const char *msg		= entry->field.print.buf;
+	struct trace_seq *s	= &iter->seq;
+	unsigned long long t	= ns2usecs(entry->field.t);
+	unsigned long usec_rem	= do_div(t, 1000000ULL);
+	unsigned secs		= (unsigned long)t;
+	int ret;
+
+	/* The trailing newline must be in the message. */
+	ret = trace_seq_printf(s, "MARK %lu.%06lu %s", secs, usec_rem, msg);
+	if (!ret)
+		return 0;
+
+	if (entry->field.flags & TRACE_FLAG_CONT)
+		trace_seq_print_cont(s, iter);
+
+	return 1;
+}
+
 /* return 0 to abort printing without consuming current entry in pipe mode */
 static int mmio_print_line(struct trace_iterator *iter)
 {
@@ -253,6 +274,8 @@ static int mmio_print_line(struct trace_iterator *iter)
 		return mmio_print_rw(iter);
 	case TRACE_MMIO_MAP:
 		return mmio_print_map(iter);
+	case TRACE_PRINT:
+		return mmio_print_mark(iter);
 	default:
 		return 1; /* ignore unknown entries */
 	}
-- 
GitLab


From 4427414170a63331a9cc36b9598502c5cdfe453b Mon Sep 17 00:00:00 2001
From: Pekka Paalanen <pq@iki.fi>
Date: Tue, 16 Sep 2008 22:03:56 +0300
Subject: [PATCH 278/895] mmiotrace: remove left-over marker cruft

Signed-off-by: Pekka Paalanen <pq@iki.fi>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/mmio-mod.c    | 64 ---------------------------------------
 include/linux/mmiotrace.h |  3 +-
 2 files changed, 1 insertion(+), 66 deletions(-)

diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index 5e2e2e72ee80..2c4baa88f2cb 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -56,13 +56,6 @@ struct remap_trace {
 static DEFINE_PER_CPU(struct trap_reason, pf_reason);
 static DEFINE_PER_CPU(struct mmiotrace_rw, cpu_trace);
 
-#if 0 /* XXX: no way gather this info anymore */
-/* Access to this is not per-cpu. */
-static DEFINE_PER_CPU(atomic_t, dropped);
-#endif
-
-static struct dentry *marker_file;
-
 static DEFINE_MUTEX(mmiotrace_mutex);
 static DEFINE_SPINLOCK(trace_lock);
 static atomic_t mmiotrace_enabled;
@@ -97,44 +90,6 @@ static bool is_enabled(void)
 	return atomic_read(&mmiotrace_enabled);
 }
 
-#if 0 /* XXX: needs rewrite */
-/*
- * Write callback for the debugfs entry:
- * Read a marker and write it to the mmio trace log
- */
-static ssize_t write_marker(struct file *file, const char __user *buffer,
-						size_t count, loff_t *ppos)
-{
-	char *event = NULL;
-	struct mm_io_header *headp;
-	ssize_t len = (count > 65535) ? 65535 : count;
-
-	event = kzalloc(sizeof(*headp) + len, GFP_KERNEL);
-	if (!event)
-		return -ENOMEM;
-
-	headp = (struct mm_io_header *)event;
-	headp->type = MMIO_MAGIC | (MMIO_MARKER << MMIO_OPCODE_SHIFT);
-	headp->data_len = len;
-
-	if (copy_from_user(event + sizeof(*headp), buffer, len)) {
-		kfree(event);
-		return -EFAULT;
-	}
-
-	spin_lock_irq(&trace_lock);
-#if 0 /* XXX: convert this to use tracing */
-	if (is_enabled())
-		relay_write(chan, event, sizeof(*headp) + len);
-	else
-#endif
-		len = -EINVAL;
-	spin_unlock_irq(&trace_lock);
-	kfree(event);
-	return len;
-}
-#endif
-
 static void print_pte(unsigned long address)
 {
 	unsigned int level;
@@ -481,26 +436,12 @@ static void leave_uniprocessor(void)
 }
 #endif
 
-#if 0 /* XXX: out of order */
-static struct file_operations fops_marker = {
-	.owner =	THIS_MODULE,
-	.write =	write_marker
-};
-#endif
-
 void enable_mmiotrace(void)
 {
 	mutex_lock(&mmiotrace_mutex);
 	if (is_enabled())
 		goto out;
 
-#if 0 /* XXX: tracing does not support text entries */
-	marker_file = debugfs_create_file("marker", 0660, dir, NULL,
-								&fops_marker);
-	if (!marker_file)
-		pr_err(NAME "marker file creation failed.\n");
-#endif
-
 	if (nommiotrace)
 		pr_info(NAME "MMIO tracing disabled.\n");
 	enter_uniprocessor();
@@ -525,11 +466,6 @@ void disable_mmiotrace(void)
 
 	clear_trace_list(); /* guarantees: no more kmmio callbacks */
 	leave_uniprocessor();
-	if (marker_file) {
-		debugfs_remove(marker_file);
-		marker_file = NULL;
-	}
-
 	pr_info(NAME "disabled.\n");
 out:
 	mutex_unlock(&mmiotrace_mutex);
diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h
index 60cc3bf5c538..139d7c88d9c9 100644
--- a/include/linux/mmiotrace.h
+++ b/include/linux/mmiotrace.h
@@ -67,8 +67,7 @@ enum mm_io_opcode {
 	MMIO_WRITE = 0x2,    /* struct mmiotrace_rw */
 	MMIO_PROBE = 0x3,    /* struct mmiotrace_map */
 	MMIO_UNPROBE = 0x4,  /* struct mmiotrace_map */
-	MMIO_MARKER = 0x5,   /* raw char data */
-	MMIO_UNKNOWN_OP = 0x6, /* struct mmiotrace_rw */
+	MMIO_UNKNOWN_OP = 0x5, /* struct mmiotrace_rw */
 };
 
 struct mmiotrace_rw {
-- 
GitLab


From 5bf9a1ee350a10feb94107de32a203d81fbbe706 Mon Sep 17 00:00:00 2001
From: Pekka Paalanen <pq@iki.fi>
Date: Tue, 16 Sep 2008 22:06:42 +0300
Subject: [PATCH 279/895] ftrace: inject markers via trace_marker file
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Allow a user to inject a marker (TRACE_PRINT entry) into the trace ring
buffer. The related file operations are derived from code by FrÃ©dÃ©ric
Weisbecker <fweisbec@gmail.com>.

Signed-off-by: Pekka Paalanen <pq@iki.fi>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/tracers/mmiotrace.txt |  5 +-
 kernel/trace/trace.c                | 76 +++++++++++++++++++++++++++--
 kernel/trace/trace.h                |  4 ++
 3 files changed, 77 insertions(+), 8 deletions(-)

diff --git a/Documentation/tracers/mmiotrace.txt b/Documentation/tracers/mmiotrace.txt
index a4afb560a45b..5bbbe2096223 100644
--- a/Documentation/tracers/mmiotrace.txt
+++ b/Documentation/tracers/mmiotrace.txt
@@ -36,7 +36,7 @@ $ mount -t debugfs debugfs /debug
 $ echo mmiotrace > /debug/tracing/current_tracer
 $ cat /debug/tracing/trace_pipe > mydump.txt &
 Start X or whatever.
-$ echo "X is up" > /debug/tracing/marker
+$ echo "X is up" > /debug/tracing/trace_marker
 $ echo none > /debug/tracing/current_tracer
 Check for lost events.
 
@@ -59,9 +59,8 @@ The 'cat' process should stay running (sleeping) in the background.
 Load the driver you want to trace and use it. Mmiotrace will only catch MMIO
 accesses to areas that are ioremapped while mmiotrace is active.
 
-[Unimplemented feature:]
 During tracing you can place comments (markers) into the trace by
-$ echo "X is up" > /debug/tracing/marker
+$ echo "X is up" > /debug/tracing/trace_marker
 This makes it easier to see which part of the (huge) trace corresponds to
 which action. It is recommended to place descriptive markers about what you
 do.
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 7e7154f77009..eee1fd964898 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2879,6 +2879,66 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
 	return cnt;
 }
 
+static int tracing_open_mark(struct inode *inode, struct file *filp)
+{
+	int ret;
+
+	ret = tracing_open_generic(inode, filp);
+	if (ret)
+		return ret;
+
+	if (current_trace == &no_tracer)
+		return -ENODEV;
+
+	return 0;
+}
+
+static int mark_printk(const char *fmt, ...)
+{
+	int ret;
+	va_list args;
+	va_start(args, fmt);
+	ret = trace_vprintk(0, fmt, args);
+	va_end(args);
+	return ret;
+}
+
+static ssize_t
+tracing_mark_write(struct file *filp, const char __user *ubuf,
+					size_t cnt, loff_t *fpos)
+{
+	char *buf;
+	char *end;
+	struct trace_array *tr = &global_trace;
+
+	if (current_trace == &no_tracer || !tr->ctrl || tracing_disabled)
+		return -EINVAL;
+
+	if (cnt > TRACE_BUF_SIZE)
+		cnt = TRACE_BUF_SIZE;
+
+	buf = kmalloc(cnt + 1, GFP_KERNEL);
+	if (buf == NULL)
+		return -ENOMEM;
+
+	if (copy_from_user(buf, ubuf, cnt)) {
+		kfree(buf);
+		return -EFAULT;
+	}
+
+	/* Cut from the first nil or newline. */
+	buf[cnt] = '\0';
+	end = strchr(buf, '\n');
+	if (end)
+		*end = '\0';
+
+	cnt = mark_printk("%s\n", buf);
+	kfree(buf);
+	*fpos += cnt;
+
+	return cnt;
+}
+
 static struct file_operations tracing_max_lat_fops = {
 	.open		= tracing_open_generic,
 	.read		= tracing_max_lat_read,
@@ -2910,6 +2970,11 @@ static struct file_operations tracing_entries_fops = {
 	.write		= tracing_entries_write,
 };
 
+static struct file_operations tracing_mark_fops = {
+	.open		= tracing_open_mark,
+	.write		= tracing_mark_write,
+};
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 
 static ssize_t
@@ -3027,6 +3092,12 @@ static __init void tracer_init_debugfs(void)
 		pr_warning("Could not create debugfs "
 			   "'trace_entries' entry\n");
 
+	entry = debugfs_create_file("trace_marker", 0220, d_tracer,
+				    NULL, &tracing_mark_fops);
+	if (!entry)
+		pr_warning("Could not create debugfs "
+			   "'trace_marker' entry\n");
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 	entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
 				    &ftrace_update_tot_cnt,
@@ -3040,11 +3111,6 @@ static __init void tracer_init_debugfs(void)
 #endif
 }
 
-#define TRACE_BUF_SIZE 1024
-#define TRACE_PRINT_BUF_SIZE \
-	(sizeof(struct trace_field) - offsetof(struct trace_field, print.buf))
-#define TRACE_CONT_BUF_SIZE sizeof(struct trace_field)
-
 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 {
 	static DEFINE_SPINLOCK(trace_buf_lock);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 648433d18ccb..42f65d0097f0 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -124,6 +124,10 @@ struct trace_entry {
 };
 
 #define TRACE_ENTRY_SIZE	sizeof(struct trace_entry)
+#define TRACE_BUF_SIZE		1024
+#define TRACE_PRINT_BUF_SIZE \
+	(sizeof(struct trace_field) - offsetof(struct trace_field, print.buf))
+#define TRACE_CONT_BUF_SIZE	sizeof(struct trace_field)
 
 /*
  * The CPU trace array - it consists of thousands of trace entries
-- 
GitLab


From fb1b6d8b5154c692172a424e45fbd0573295cb93 Mon Sep 17 00:00:00 2001
From: Steven Noonan <steven@uplinklabs.net>
Date: Fri, 19 Sep 2008 03:06:43 -0700
Subject: [PATCH 280/895] ftrace: add nop tracer

A no-op tracer which can serve two purposes:

 1. A template for development of a new tracer.
 2. A convenient way to see ftrace_printk() calls without
    an irrelevant trace making the output messy.

[ mingo@elte.hu: resolved conflicts ]
Signed-off-by: Steven Noonan <steven@uplinklabs.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/Kconfig          | 10 ++++++
 kernel/trace/Makefile         |  1 +
 kernel/trace/trace.h          |  4 +++
 kernel/trace/trace_nop.c      | 65 +++++++++++++++++++++++++++++++++++
 kernel/trace/trace_selftest.c |  9 +++++
 5 files changed, 89 insertions(+)
 create mode 100644 kernel/trace/trace_nop.c

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 16e5bb5daaa5..d7b2de744631 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -101,6 +101,16 @@ config SCHED_TRACER
 	  This tracer tracks the latency of the highest priority task
 	  to be scheduled in, starting from the point it has woken up.
 
+config NOP_TRACER
+	bool "NOP Tracer"
+	depends on HAVE_FTRACE
+	depends on DEBUG_KERNEL
+	select TRACING
+	help
+	  This tracer does nothing. The primary purpose for it is to
+	  politely print the output of ftrace_printk() calls without
+	  the overhead of an irrelevant trace taking place.
+
 config CONTEXT_SWITCH_TRACER
 	bool "Trace process context switches"
 	depends on HAVE_FTRACE
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 58ec61c44bd6..73ba13f5a461 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_FTRACE) += trace_functions.o
 obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
 obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
 obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
+obj-$(CONFIG_NOP_TRACER) += trace_nop.o
 obj-$(CONFIG_STACK_TRACER) += trace_stack.o
 obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
 
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 42f65d0097f0..447d4b9b6391 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -339,6 +339,10 @@ extern int trace_selftest_startup_preemptirqsoff(struct tracer *trace,
 extern int trace_selftest_startup_wakeup(struct tracer *trace,
 					 struct trace_array *tr);
 #endif
+#ifdef CONFIG_NOP_TRACER
+extern int trace_selftest_startup_nop(struct tracer *trace,
+					 struct trace_array *tr);
+#endif
 #ifdef CONFIG_CONTEXT_SWITCH_TRACER
 extern int trace_selftest_startup_sched_switch(struct tracer *trace,
 					       struct trace_array *tr);
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
new file mode 100644
index 000000000000..dafaefb84038
--- /dev/null
+++ b/kernel/trace/trace_nop.c
@@ -0,0 +1,65 @@
+/*
+ * nop tracer
+ *
+ * Copyright (C) 2008 Steven Noonan <steven@uplinklabs.net>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/ftrace.h>
+
+#include "trace.h"
+
+static struct trace_array	*ctx_trace;
+
+static void start_nop_trace(struct trace_array *tr)
+{
+	/* Nothing to do! */
+}
+
+static void stop_nop_trace(struct trace_array *tr)
+{
+	/* Nothing to do! */
+}
+
+static void nop_trace_init(struct trace_array *tr)
+{
+	ctx_trace = tr;
+
+	if (tr->ctrl)
+		start_nop_trace(tr);
+}
+
+static void nop_trace_reset(struct trace_array *tr)
+{
+	if (tr->ctrl)
+		stop_nop_trace(tr);
+}
+
+static void nop_trace_ctrl_update(struct trace_array *tr)
+{
+	/* When starting a new trace, reset the buffers */
+	if (tr->ctrl)
+		start_nop_trace(tr);
+	else
+		stop_nop_trace(tr);
+}
+
+static struct tracer nop_trace __read_mostly =
+{
+	.name		= "nop",
+	.init		= nop_trace_init,
+	.reset		= nop_trace_reset,
+	.ctrl_update	= nop_trace_ctrl_update,
+#ifdef CONFIG_FTRACE_SELFTEST
+	.selftest	= trace_selftest_startup_nop,
+#endif
+};
+
+__init static int init_nop_trace(void)
+{
+	return register_tracer(&nop_trace);
+}
+device_initcall(init_nop_trace);
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 630715bbd572..82db9103b9bc 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -418,6 +418,15 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
 }
 #endif /* CONFIG_IRQSOFF_TRACER && CONFIG_PREEMPT_TRACER */
 
+#ifdef CONFIG_NOP_TRACER
+int
+trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr)
+{
+	/* What could possibly go wrong? */
+	return 0;
+}
+#endif
+
 #ifdef CONFIG_SCHED_TRACER
 static int trace_wakeup_test_thread(void *data)
 {
-- 
GitLab


From 71c67d58b5660f8e42c7d4c3e77cbc03fac5ed31 Mon Sep 17 00:00:00 2001
From: Steven Noonan <steven@uplinklabs.net>
Date: Sat, 20 Sep 2008 01:00:37 -0700
Subject: [PATCH 281/895] ftrace: mcount_addr defined but not used

When CONFIG_DYNAMIC_FTRACE isn't used, neither is mcount_addr. This
patch eliminates that warning.

Signed-off-by: Steven Noonan <steven@uplinklabs.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ftrace.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index c9e09d86e1d8..7694f3e59797 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -36,14 +36,6 @@
 int ftrace_enabled __read_mostly;
 static int last_ftrace_enabled;
 
-/*
- * Since MCOUNT_ADDR may point to mcount itself, we do not want
- * to get it confused by reading a reference in the code as we
- * are parsing on objcopy output of text. Use a variable for
- * it instead.
- */
-static unsigned long mcount_addr = MCOUNT_ADDR;
-
 /*
  * ftrace_disabled is set when an anomaly is discovered.
  * ftrace_disabled is much stronger than ftrace_enabled.
@@ -178,6 +170,14 @@ static DEFINE_SPINLOCK(ftrace_hash_lock);
 #define ftrace_hash_unlock(flags) do { } while(0)
 #endif
 
+/*
+ * Since MCOUNT_ADDR may point to mcount itself, we do not want
+ * to get it confused by reading a reference in the code as we
+ * are parsing on objcopy output of text. Use a variable for
+ * it instead.
+ */
+static unsigned long mcount_addr = MCOUNT_ADDR;
+
 static struct task_struct *ftraced_task;
 
 enum {
-- 
GitLab


From 8925b394eca0bb0a444a6487d702d0f650e94a10 Mon Sep 17 00:00:00 2001
From: Steven Noonan <steven@uplinklabs.net>
Date: Sat, 20 Sep 2008 01:00:38 -0700
Subject: [PATCH 282/895] trace: remove pointless ifdefs

The functions are already 'extern' anyway, so there's no problem
with linkage. Removing these ifdefs also helps find any potential
compiler errors.

Suggested by Andrew Morton.

Signed-off-by: Steven Noonan <steven@uplinklabs.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 447d4b9b6391..c8c687088b4d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -319,38 +319,22 @@ extern int DYN_FTRACE_TEST_NAME(void);
 #endif
 
 #ifdef CONFIG_FTRACE_STARTUP_TEST
-#ifdef CONFIG_FTRACE
 extern int trace_selftest_startup_function(struct tracer *trace,
 					   struct trace_array *tr);
-#endif
-#ifdef CONFIG_IRQSOFF_TRACER
 extern int trace_selftest_startup_irqsoff(struct tracer *trace,
 					  struct trace_array *tr);
-#endif
-#ifdef CONFIG_PREEMPT_TRACER
 extern int trace_selftest_startup_preemptoff(struct tracer *trace,
 					     struct trace_array *tr);
-#endif
-#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER)
 extern int trace_selftest_startup_preemptirqsoff(struct tracer *trace,
 						 struct trace_array *tr);
-#endif
-#ifdef CONFIG_SCHED_TRACER
 extern int trace_selftest_startup_wakeup(struct tracer *trace,
 					 struct trace_array *tr);
-#endif
-#ifdef CONFIG_NOP_TRACER
 extern int trace_selftest_startup_nop(struct tracer *trace,
 					 struct trace_array *tr);
-#endif
-#ifdef CONFIG_CONTEXT_SWITCH_TRACER
 extern int trace_selftest_startup_sched_switch(struct tracer *trace,
 					       struct trace_array *tr);
-#endif
-#ifdef CONFIG_SYSPROF_TRACER
 extern int trace_selftest_startup_sysprof(struct tracer *trace,
 					       struct trace_array *tr);
-#endif
 #endif /* CONFIG_FTRACE_STARTUP_TEST */
 
 extern void *head_page(struct trace_array_cpu *data);
-- 
GitLab


From 35cb5ed01261f5669657d2f1720aca902299887d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Weisbecker?= <fweisbec@gmail.com>
Date: Sun, 21 Sep 2008 20:10:14 +0200
Subject: [PATCH 283/895] tracing/ftrace: make nop tracer reset previous
 entries

If nop tracer is selected, some old entries from the previous tracer
could still be enqueued. Tracing have to be reset.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Steven Noonan <steven@uplinklabs.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace_nop.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
index dafaefb84038..9fb02c17ad0c 100644
--- a/kernel/trace/trace_nop.c
+++ b/kernel/trace/trace_nop.c
@@ -26,8 +26,12 @@ static void stop_nop_trace(struct trace_array *tr)
 
 static void nop_trace_init(struct trace_array *tr)
 {
+	int cpu;
 	ctx_trace = tr;
 
+	for_each_online_cpu(cpu)
+		tracing_reset(tr->data[cpu]);
+
 	if (tr->ctrl)
 		start_nop_trace(tr);
 }
-- 
GitLab


From 2a3a4f669df2164288d11406d11d5e4933bf5e53 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Weisbecker?= <fweisbec@gmail.com>
Date: Sun, 21 Sep 2008 20:12:14 +0200
Subject: [PATCH 284/895] tracing/ftrace: tracing engine depends on Nop Tracer

Now that the nop tracer is used as the default tracer by
replacing the "none" tracer, tracing engine depends on it.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Steven Noonan <steven@uplinklabs.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/Kconfig | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index d7b2de744631..254328dec672 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -1,8 +1,13 @@
 #
 # Architectures that offer an FTRACE implementation should select HAVE_FTRACE:
 #
+
+config NOP_TRACER
+	bool
+
 config HAVE_FTRACE
 	bool
+	select NOP_TRACER
 
 config HAVE_DYNAMIC_FTRACE
 	bool
@@ -101,16 +106,6 @@ config SCHED_TRACER
 	  This tracer tracks the latency of the highest priority task
 	  to be scheduled in, starting from the point it has woken up.
 
-config NOP_TRACER
-	bool "NOP Tracer"
-	depends on HAVE_FTRACE
-	depends on DEBUG_KERNEL
-	select TRACING
-	help
-	  This tracer does nothing. The primary purpose for it is to
-	  politely print the output of ftrace_printk() calls without
-	  the overhead of an irrelevant trace taking place.
-
 config CONTEXT_SWITCH_TRACER
 	bool "Trace process context switches"
 	depends on HAVE_FTRACE
-- 
GitLab


From 43a15386c4faf913f7d70a47748c266d6210cd6e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Weisbecker?= <fweisbec@gmail.com>
Date: Sun, 21 Sep 2008 20:16:30 +0200
Subject: [PATCH 285/895] tracing/ftrace: replace none tracer by nop tracer

Replace "none" tracer by the recently created "nop" tracer.
Both are pretty similar except that nop accepts TRACE_PRINT
or TRACE_SPECIAL entries.

And as a consequence, changing the size of the ring buffer now
requires that tracing has already been disabled.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Steven Noonan <steven@uplinklabs.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c     | 49 ++++++++--------------------------------
 kernel/trace/trace.h     |  2 ++
 kernel/trace/trace_nop.c |  7 +-----
 3 files changed, 12 insertions(+), 46 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index eee1fd964898..f2ef72fa3f17 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -142,24 +142,6 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
 /* trace_flags holds iter_ctrl options */
 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT;
 
-static notrace void no_trace_init(struct trace_array *tr)
-{
-	int cpu;
-
-	ftrace_function_enabled = 0;
-	if(tr->ctrl)
-		for_each_online_cpu(cpu)
-			tracing_reset(tr->data[cpu]);
-	tracer_enabled = 0;
-}
-
-/* dummy trace to disable tracing */
-static struct tracer no_tracer __read_mostly = {
-	.name		= "none",
-	.init		= no_trace_init
-};
-
-
 /**
  * trace_wake_up - wake up tasks waiting for trace input
  *
@@ -962,7 +944,7 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
 	long disabled;
 	int cpu;
 
-	if (tracing_disabled || current_trace == &no_tracer || !tr->ctrl)
+	if (tracing_disabled || !tr->ctrl)
 		return;
 
 	local_irq_save(flags);
@@ -2795,6 +2777,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
 	unsigned long val;
 	char buf[64];
 	int i, ret;
+	struct trace_array *tr = filp->private_data;
 
 	if (cnt >= sizeof(buf))
 		return -EINVAL;
@@ -2814,9 +2797,9 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
 
 	mutex_lock(&trace_types_lock);
 
-	if (current_trace != &no_tracer) {
+	if (tr->ctrl) {
 		cnt = -EBUSY;
-		pr_info("ftrace: set current_tracer to none"
+		pr_info("ftrace: please disable tracing"
 			" before modifying buffer size\n");
 		goto out;
 	}
@@ -2879,20 +2862,6 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
 	return cnt;
 }
 
-static int tracing_open_mark(struct inode *inode, struct file *filp)
-{
-	int ret;
-
-	ret = tracing_open_generic(inode, filp);
-	if (ret)
-		return ret;
-
-	if (current_trace == &no_tracer)
-		return -ENODEV;
-
-	return 0;
-}
-
 static int mark_printk(const char *fmt, ...)
 {
 	int ret;
@@ -2911,7 +2880,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
 	char *end;
 	struct trace_array *tr = &global_trace;
 
-	if (current_trace == &no_tracer || !tr->ctrl || tracing_disabled)
+	if (!tr->ctrl || tracing_disabled)
 		return -EINVAL;
 
 	if (cnt > TRACE_BUF_SIZE)
@@ -2971,7 +2940,7 @@ static struct file_operations tracing_entries_fops = {
 };
 
 static struct file_operations tracing_mark_fops = {
-	.open		= tracing_open_mark,
+	.open		= tracing_open_generic,
 	.write		= tracing_mark_write,
 };
 
@@ -3123,7 +3092,7 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 	long disabled;
 	int cpu, len = 0, write, written = 0;
 
-	if (current_trace == &no_tracer || !tr->ctrl || tracing_disabled)
+	if (!tr->ctrl || tracing_disabled)
 		return 0;
 
 	local_irq_save(flags);
@@ -3539,8 +3508,8 @@ __init static int tracer_alloc_buffers(void)
 
 	trace_init_cmdlines();
 
-	register_tracer(&no_tracer);
-	current_trace = &no_tracer;
+	register_tracer(&nop_trace);
+	current_trace = &nop_trace;
 
 	/* All seems OK, enable tracing */
 	global_trace.ctrl = tracer_enabled;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index c8c687088b4d..cb2c3fb7dd54 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -369,4 +369,6 @@ enum trace_iterator_flags {
 	TRACE_ITER_PRINTK		= 0x400,
 };
 
+extern struct tracer nop_trace;
+
 #endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
index 9fb02c17ad0c..16c9ba060bab 100644
--- a/kernel/trace/trace_nop.c
+++ b/kernel/trace/trace_nop.c
@@ -51,7 +51,7 @@ static void nop_trace_ctrl_update(struct trace_array *tr)
 		stop_nop_trace(tr);
 }
 
-static struct tracer nop_trace __read_mostly =
+struct tracer nop_trace __read_mostly =
 {
 	.name		= "nop",
 	.init		= nop_trace_init,
@@ -62,8 +62,3 @@ static struct tracer nop_trace __read_mostly =
 #endif
 };
 
-__init static int init_nop_trace(void)
-{
-	return register_tracer(&nop_trace);
-}
-device_initcall(init_nop_trace);
-- 
GitLab


From 05736a427f7e16be948ccbf39782bd3a6ae16b14 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Mon, 22 Sep 2008 14:55:47 -0700
Subject: [PATCH 286/895] ftrace: warn on failure to disable mcount callers

With the recent updates to ftrace, there should not be any failures when
modifying the code. If there is, then we need to warn about it.

This patch has a cleaned up version of the code that I used to discover
that the weak symbols were causing failures.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ftrace.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 7694f3e59797..4dda4f60a2a9 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -576,6 +576,16 @@ static void ftrace_shutdown_replenish(void)
 	ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL);
 }
 
+static void print_ip_ins(const char *fmt, unsigned char *p)
+{
+	int i;
+
+	printk(KERN_CONT "%s", fmt);
+
+	for (i = 0; i < MCOUNT_INSN_SIZE; i++)
+		printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
+}
+
 static int
 ftrace_code_disable(struct dyn_ftrace *rec)
 {
@@ -590,6 +600,23 @@ ftrace_code_disable(struct dyn_ftrace *rec)
 
 	failed = ftrace_modify_code(ip, call, nop);
 	if (failed) {
+		switch (failed) {
+		case 1:
+			WARN_ON_ONCE(1);
+			pr_info("ftrace faulted on modifying ");
+			print_ip_sym(ip);
+			break;
+		case 2:
+			WARN_ON_ONCE(1);
+			pr_info("ftrace failed to modify ");
+			print_ip_sym(ip);
+			print_ip_ins(" expected: ", call);
+			print_ip_ins(" actual: ", (unsigned char *)ip);
+			print_ip_ins(" replace: ", nop);
+			printk(KERN_CONT "\n");
+			break;
+		}
+
 		rec->flags |= FTRACE_FL_FAILED;
 		return 0;
 	}
-- 
GitLab


From 37a52f5ef120b93734bb2461744512b55695f69c Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Tue, 23 Sep 2008 15:05:46 -0700
Subject: [PATCH 287/895] x86: suppress trivial sparse signedness warnings

Could just as easily change the three casts to cast to the correct
type...this patch changes the type of ftrace_nop instead.

Supresses sparse warnings:

 arch/x86/kernel/ftrace.c:157:14: warning: incorrect type in assignment (different signedness)
 arch/x86/kernel/ftrace.c:157:14:    expected long *static [toplevel] ftrace_nop
 arch/x86/kernel/ftrace.c:157:14:    got unsigned long *<noident>
 arch/x86/kernel/ftrace.c:161:14: warning: incorrect type in assignment (different signedness)
 arch/x86/kernel/ftrace.c:161:14:    expected long *static [toplevel] ftrace_nop
 arch/x86/kernel/ftrace.c:161:14:    got unsigned long *<noident>
 arch/x86/kernel/ftrace.c:165:14: warning: incorrect type in assignment (different signedness)
 arch/x86/kernel/ftrace.c:165:14:    expected long *static [toplevel] ftrace_nop
 arch/x86/kernel/ftrace.c:165:14:    got unsigned long *<noident>

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ftrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 082d99642622..66d900248fc2 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -22,7 +22,7 @@
 
 
 /* Long is fine, even if it is only 4 bytes ;-) */
-static long *ftrace_nop;
+static unsigned long *ftrace_nop;
 
 union ftrace_code_union {
 	char code[MCOUNT_INSN_SIZE];
-- 
GitLab


From ac2b86fdef5b44f194eefaa6b7b6aea9423d1bc2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Weisbecker?= <fweisbec@gmail.com>
Date: Wed, 24 Sep 2008 16:31:56 +0100
Subject: [PATCH 288/895] x86/ftrace: use uaccess in atomic context

With latest -tip I get this bug:

[   49.439988] in_atomic():0, irqs_disabled():1
[   49.440118] INFO: lockdep is turned off.
[   49.440118] Pid: 2814, comm: modprobe Tainted: G        W 2.6.27-rc7 #4
[   49.440118]  [<c01215e1>] __might_sleep+0xe1/0x120
[   49.440118]  [<c01148ea>] ftrace_modify_code+0x2a/0xd0
[   49.440118]  [<c01148a2>] ? ftrace_test_p6nop+0x0/0xa
[   49.440118]  [<c016e80e>] __ftrace_update_code+0xfe/0x2f0
[   49.440118]  [<c01148a2>] ? ftrace_test_p6nop+0x0/0xa
[   49.440118]  [<c016f190>] ftrace_convert_nops+0x50/0x80
[   49.440118]  [<c016f1d6>] ftrace_init_module+0x16/0x20
[   49.440118]  [<c015498b>] load_module+0x185b/0x1d30
[   49.440118]  [<c01767a0>] ? find_get_page+0x0/0xf0
[   49.440118]  [<c02463c0>] ? sprintf+0x0/0x30
[   49.440118]  [<c034e012>] ? mutex_lock_interruptible_nested+0x1f2/0x350
[   49.440118]  [<c0154eb3>] sys_init_module+0x53/0x1b0
[   49.440118]  [<c0352340>] ? do_page_fault+0x0/0x740
[   49.440118]  [<c0104012>] syscall_call+0x7/0xb
[   49.440118]  =======================

It is because ftrace_modify_code() calls copy_to_user and
copy_from_user.
These functions have been inserted after guessing that there
couldn't be any race condition but copy_[to/from]_user might
sleep and __ftrace_update_code is called with local_irq_saved.

These function have been inserted since this commit:
d5e92e8978fd2574e415dc2792c5eb592978243d:
"ftrace: x86 use copy from user function"

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ftrace.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 66d900248fc2..222507e8157b 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -71,13 +71,13 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 	 * No real locking needed, this code is run through
 	 * kstop_machine, or before SMP starts.
 	 */
-	if (__copy_from_user(replaced, (char __user *)ip, MCOUNT_INSN_SIZE))
+	if (__copy_from_user_inatomic(replaced, (char __user *)ip, MCOUNT_INSN_SIZE))
 		return 1;
 
 	if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
 		return 2;
 
-	WARN_ON_ONCE(__copy_to_user((char __user *)ip, new_code,
+	WARN_ON_ONCE(__copy_to_user_inatomic((char __user *)ip, new_code,
 				    MCOUNT_INSN_SIZE));
 
 	sync_core();
-- 
GitLab


From d74185ed27651ad8d920b37d7851306ad01f7d6f Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Mon, 29 Sep 2008 16:00:05 +0800
Subject: [PATCH 289/895] markers: fix unregister bug and reenter bug

unregister bug:

codes using makers are typically calling marker_probe_unregister()
and then destroying the data that marker_probe_func needs(or
unloading this module). This is bug when the corresponding
marker_probe_func is still running(on other cpus),
it is using the destroying/ed data.

we should call synchronize_sched() after marker_update_probes().

reenter bug:

marker_probe_register(), marker_probe_unregister() and
marker_probe_unregister_private_data() are not reentrant safe
functions. these 3 functions release markers_mutex and then
require it again and do "entry->oldptr = old; ...", but entry->oldptr
maybe is using now for these 3 functions may reenter when markers_mutex
is released.

we use synchronize_sched() instead of call_rcu_sched() to fix
this bug. actually we can do:
"
if (entry->rcu_pending)
		rcu_barrier_sched();
"
after require markers_mutex again. but synchronize_sched()
is better and simpler. For these 3 functions are not critical path.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/marker.c | 52 ++++++-------------------------------------------
 1 file changed, 6 insertions(+), 46 deletions(-)

diff --git a/kernel/marker.c b/kernel/marker.c
index 7d1faecd7a51..9f76c4a622ec 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -60,9 +60,6 @@ struct marker_entry {
 	struct marker_probe_closure single;
 	struct marker_probe_closure *multi;
 	int refcount;	/* Number of times armed. 0 if disarmed. */
-	struct rcu_head rcu;
-	void *oldptr;
-	unsigned char rcu_pending:1;
 	unsigned char ptype:1;
 	char name[0];	/* Contains name'\0'format'\0' */
 };
@@ -199,16 +196,6 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
 }
 EXPORT_SYMBOL_GPL(marker_probe_cb_noarg);
 
-static void free_old_closure(struct rcu_head *head)
-{
-	struct marker_entry *entry = container_of(head,
-		struct marker_entry, rcu);
-	kfree(entry->oldptr);
-	/* Make sure we free the data before setting the pending flag to 0 */
-	smp_wmb();
-	entry->rcu_pending = 0;
-}
-
 static void debug_print_probes(struct marker_entry *entry)
 {
 	int i;
@@ -417,7 +404,6 @@ static struct marker_entry *add_marker(const char *name, const char *format)
 	e->multi = NULL;
 	e->ptype = 0;
 	e->refcount = 0;
-	e->rcu_pending = 0;
 	hlist_add_head(&e->hlist, head);
 	return e;
 }
@@ -447,9 +433,6 @@ static int remove_marker(const char *name)
 	if (e->single.func != __mark_empty_function)
 		return -EBUSY;
 	hlist_del(&e->hlist);
-	/* Make sure the call_rcu has been executed */
-	if (e->rcu_pending)
-		rcu_barrier_sched();
 	kfree(e);
 	return 0;
 }
@@ -479,12 +462,8 @@ static int marker_set_format(struct marker_entry **entry, const char *format)
 	e->multi = (*entry)->multi;
 	e->ptype = (*entry)->ptype;
 	e->refcount = (*entry)->refcount;
-	e->rcu_pending = 0;
 	hlist_add_before(&e->hlist, &(*entry)->hlist);
 	hlist_del(&(*entry)->hlist);
-	/* Make sure the call_rcu has been executed */
-	if ((*entry)->rcu_pending)
-		rcu_barrier_sched();
 	kfree(*entry);
 	*entry = e;
 	trace_mark(core_marker_format, "name %s format %s",
@@ -658,12 +637,6 @@ int marker_probe_register(const char *name, const char *format,
 			goto end;
 		}
 	}
-	/*
-	 * If we detect that a call_rcu is pending for this marker,
-	 * make sure it's executed now.
-	 */
-	if (entry->rcu_pending)
-		rcu_barrier_sched();
 	old = marker_entry_add_probe(entry, probe, probe_private);
 	if (IS_ERR(old)) {
 		ret = PTR_ERR(old);
@@ -671,14 +644,11 @@ int marker_probe_register(const char *name, const char *format,
 	}
 	mutex_unlock(&markers_mutex);
 	marker_update_probes();		/* may update entry */
+	synchronize_sched();
+	kfree(old);
 	mutex_lock(&markers_mutex);
 	entry = get_marker(name);
 	WARN_ON(!entry);
-	entry->oldptr = old;
-	entry->rcu_pending = 1;
-	/* write rcu_pending before calling the RCU callback */
-	smp_wmb();
-	call_rcu_sched(&entry->rcu, free_old_closure);
 end:
 	mutex_unlock(&markers_mutex);
 	return ret;
@@ -708,20 +678,15 @@ int marker_probe_unregister(const char *name,
 	entry = get_marker(name);
 	if (!entry)
 		goto end;
-	if (entry->rcu_pending)
-		rcu_barrier_sched();
 	old = marker_entry_remove_probe(entry, probe, probe_private);
 	mutex_unlock(&markers_mutex);
 	marker_update_probes();		/* may update entry */
+	synchronize_sched();
+	kfree(old);
 	mutex_lock(&markers_mutex);
 	entry = get_marker(name);
 	if (!entry)
 		goto end;
-	entry->oldptr = old;
-	entry->rcu_pending = 1;
-	/* write rcu_pending before calling the RCU callback */
-	smp_wmb();
-	call_rcu_sched(&entry->rcu, free_old_closure);
 	remove_marker(name);	/* Ignore busy error message */
 	ret = 0;
 end:
@@ -787,19 +752,14 @@ int marker_probe_unregister_private_data(marker_probe_func *probe,
 		ret = -ENOENT;
 		goto end;
 	}
-	if (entry->rcu_pending)
-		rcu_barrier_sched();
 	old = marker_entry_remove_probe(entry, NULL, probe_private);
 	mutex_unlock(&markers_mutex);
 	marker_update_probes();		/* may update entry */
+	synchronize_sched();
+	kfree(old);
 	mutex_lock(&markers_mutex);
 	entry = get_marker_from_private_data(probe, probe_private);
 	WARN_ON(!entry);
-	entry->oldptr = old;
-	entry->rcu_pending = 1;
-	/* write rcu_pending before calling the RCU callback */
-	smp_wmb();
-	call_rcu_sched(&entry->rcu, free_old_closure);
 	remove_marker(entry->name);	/* Ignore busy error message */
 end:
 	mutex_unlock(&markers_mutex);
-- 
GitLab


From ca2db6cf30d6a45798b7a760d0c4f7735b16799d Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Tue, 30 Sep 2008 01:49:39 -0400
Subject: [PATCH 290/895] tracepoints: use rcu sched

Make tracepoints use rcu sched. (cleanup)

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/tracepoint.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index c7c62a4a75f5..db39961a0d03 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -80,10 +80,7 @@ static void tracepoint_entry_free_old(struct tracepoint_entry *entry, void *old)
 	entry->rcu_pending = 1;
 	/* write rcu_pending before calling the RCU callback */
 	smp_wmb();
-#ifdef CONFIG_PREEMPT_RCU
-	synchronize_sched();	/* Until we have the call_rcu_sched() */
-#endif
-	call_rcu(&entry->rcu, free_old_closure);
+	call_rcu_sched(&entry->rcu, free_old_closure);
 }
 
 static void debug_print_probes(struct tracepoint_entry *entry)
@@ -245,9 +242,9 @@ static int remove_tracepoint(const char *name)
 	if (e->refcount)
 		return -EBUSY;
 	hlist_del(&e->hlist);
-	/* Make sure the call_rcu has been executed */
+	/* Make sure the call_rcu_sched has been executed */
 	if (e->rcu_pending)
-		rcu_barrier();
+		rcu_barrier_sched();
 	kfree(e);
 	return 0;
 }
@@ -344,11 +341,11 @@ int tracepoint_probe_register(const char *name, void *probe)
 		}
 	}
 	/*
-	 * If we detect that a call_rcu is pending for this tracepoint,
+	 * If we detect that a call_rcu_sched is pending for this tracepoint,
 	 * make sure it's executed now.
 	 */
 	if (entry->rcu_pending)
-		rcu_barrier();
+		rcu_barrier_sched();
 	old = tracepoint_entry_add_probe(entry, probe);
 	if (IS_ERR(old)) {
 		ret = PTR_ERR(old);
@@ -387,7 +384,7 @@ int tracepoint_probe_unregister(const char *name, void *probe)
 	if (!entry)
 		goto end;
 	if (entry->rcu_pending)
-		rcu_barrier();
+		rcu_barrier_sched();
 	old = tracepoint_entry_remove_probe(entry, probe);
 	mutex_unlock(&tracepoints_mutex);
 	tracepoint_update_probes();		/* may update entry */
-- 
GitLab


From 9a1e9693f534174945154197fec4ec92f168ce21 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Tue, 30 Sep 2008 01:51:12 -0400
Subject: [PATCH 291/895] tracepoints: fix reentrancy

The tracepoints had the same problem markers did have wrt reentrancy. Apply a
similar fix using a rcu_barrier after each tracepoint mutex lock.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/tracepoint.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index db39961a0d03..f2b7c28a4708 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -356,6 +356,8 @@ int tracepoint_probe_register(const char *name, void *probe)
 	mutex_lock(&tracepoints_mutex);
 	entry = get_tracepoint(name);
 	WARN_ON(!entry);
+	if (entry->rcu_pending)
+		rcu_barrier_sched();
 	tracepoint_entry_free_old(entry, old);
 end:
 	mutex_unlock(&tracepoints_mutex);
@@ -392,6 +394,8 @@ int tracepoint_probe_unregister(const char *name, void *probe)
 	entry = get_tracepoint(name);
 	if (!entry)
 		goto end;
+	if (entry->rcu_pending)
+		rcu_barrier_sched();
 	tracepoint_entry_free_old(entry, old);
 	remove_tracepoint(name);	/* Ignore busy error message */
 	ret = 0;
-- 
GitLab


From e98d0eabef2748d88fa58760d104e8e68517406b Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Mon, 29 Sep 2008 11:05:13 -0400
Subject: [PATCH 292/895] markers: marker_synchronize_unregister()

Create marker_synchronize_unregister() which must be called before the end of
exit() to make sure every probe callers have exited the non preemptible section
and thus are not executing the probe code anymore.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/marker.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/linux/marker.h b/include/linux/marker.h
index 1290653f9241..889196c7fbb1 100644
--- a/include/linux/marker.h
+++ b/include/linux/marker.h
@@ -160,4 +160,11 @@ extern int marker_probe_unregister_private_data(marker_probe_func *probe,
 extern void *marker_get_private_data(const char *name, marker_probe_func *probe,
 	int num);
 
+/*
+ * marker_synchronize_unregister must be called between the last marker probe
+ * unregistration and the end of module exit to make sure there is no caller
+ * executing a probe when it is freed.
+ */
+#define marker_synchronize_unregister() synchronize_sched()
+
 #endif
-- 
GitLab


From e2d3b75dbc486253c910722486ac64087f96c59f Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Mon, 29 Sep 2008 11:08:03 -0400
Subject: [PATCH 293/895] markers: fix unregister bug and reenter bug, cleanup

Use the new rcu_read_lock_sched/unlock_sched() in marker code around the call
site instead of preempt_disable/enable(). It helps reviewing the code more
easily.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/marker.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/kernel/marker.c b/kernel/marker.c
index 9f76c4a622ec..fe5ca72f9659 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -100,11 +100,11 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
 	char ptype;
 
 	/*
-	 * preempt_disable does two things : disabling preemption to make sure
-	 * the teardown of the callbacks can be done correctly when they are in
-	 * modules and they insure RCU read coherency.
+	 * rcu_read_lock_sched does two things : disabling preemption to make
+	 * sure the teardown of the callbacks can be done correctly when they
+	 * are in modules and they insure RCU read coherency.
 	 */
-	preempt_disable();
+	rcu_read_lock_sched();
 	ptype = mdata->ptype;
 	if (likely(!ptype)) {
 		marker_probe_func *func;
@@ -142,7 +142,7 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
 			va_end(args);
 		}
 	}
-	preempt_enable();
+	rcu_read_unlock_sched();
 }
 EXPORT_SYMBOL_GPL(marker_probe_cb);
 
@@ -159,7 +159,7 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
 	va_list args;	/* not initialized */
 	char ptype;
 
-	preempt_disable();
+	rcu_read_lock_sched();
 	ptype = mdata->ptype;
 	if (likely(!ptype)) {
 		marker_probe_func *func;
@@ -192,7 +192,7 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
 			multi[i].func(multi[i].probe_private, call_private,
 				mdata->format, &args);
 	}
-	preempt_enable();
+	rcu_read_unlock_sched();
 }
 EXPORT_SYMBOL_GPL(marker_probe_cb_noarg);
 
@@ -539,7 +539,7 @@ static int set_marker(struct marker_entry **entry, struct marker *elem,
  * Disable a marker and its probe callback.
  * Note: only waiting an RCU period after setting elem->call to the empty
  * function insures that the original callback is not used anymore. This insured
- * by preempt_disable around the call site.
+ * by rcu_read_lock_sched around the call site.
  */
 static void disable_marker(struct marker *elem)
 {
-- 
GitLab


From 531d297569014e8f889b167a2d15d72429faead1 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Mon, 29 Sep 2008 11:09:15 -0400
Subject: [PATCH 294/895] markers: probe example, fix teardown

Need a marker_synchronize_unregister() before the end of exit() to make sure
every probe callers have exited the non preemptible section and thus are not
executing the probe code anymore.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 samples/markers/probe-example.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/samples/markers/probe-example.c b/samples/markers/probe-example.c
index c8e099d4d1fd..2dfb3b32937e 100644
--- a/samples/markers/probe-example.c
+++ b/samples/markers/probe-example.c
@@ -81,6 +81,7 @@ static void __exit probe_fini(void)
 			probe_array[i].probe_func, &probe_array[i]);
 	printk(KERN_INFO "Number of event b : %u\n",
 			atomic_read(&eventb_count));
+	marker_synchronize_unregister();
 }
 
 module_init(probe_init);
-- 
GitLab


From 91a8d46c47e7eb1c53c181e4328a3cfa45ae4ad3 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Mon, 29 Sep 2008 11:10:34 -0400
Subject: [PATCH 295/895] markers: documentation fix for teardown

Document the need for a marker_synchronize_unregister() before the end of
exit() to make sure every probe callers have exited the non preemptible
section and thus are not executing the probe code anymore.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/markers.txt | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/Documentation/markers.txt b/Documentation/markers.txt
index d9f50a19fa0c..089f6138fcd9 100644
--- a/Documentation/markers.txt
+++ b/Documentation/markers.txt
@@ -50,10 +50,12 @@ Connecting a function (probe) to a marker is done by providing a probe (function
 to call) for the specific marker through marker_probe_register() and can be
 activated by calling marker_arm(). Marker deactivation can be done by calling
 marker_disarm() as many times as marker_arm() has been called. Removing a probe
-is done through marker_probe_unregister(); it will disarm the probe and make
-sure there is no caller left using the probe when it returns. Probe removal is
-preempt-safe because preemption is disabled around the probe call. See the
-"Probe example" section below for a sample probe module.
+is done through marker_probe_unregister(); it will disarm the probe.
+marker_synchronize_unregister() must be called before the end of the module exit
+function to make sure there is no caller left using the probe. This, and the
+fact that preemption is disabled around the probe call, make sure that probe
+removal and module unload are safe. See the "Probe example" section below for a
+sample probe module.
 
 The marker mechanism supports inserting multiple instances of the same marker.
 Markers can be put in inline functions, inlined static functions, and
-- 
GitLab


From 5b9261d93e5fa3db4995d5b77b5ed365166e001c Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Mon, 29 Sep 2008 11:11:47 -0400
Subject: [PATCH 296/895] sputrace: use marker_synchronize_unregister()

We need a marker_synchronize_unregister() before the end of exit() to make sure
every probe callers have exited the non preemptible section and thus are not
executing the probe code anymore.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/platforms/cell/spufs/sputrace.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/platforms/cell/spufs/sputrace.c b/arch/powerpc/platforms/cell/spufs/sputrace.c
index 92d20e993ede..2ece399f2862 100644
--- a/arch/powerpc/platforms/cell/spufs/sputrace.c
+++ b/arch/powerpc/platforms/cell/spufs/sputrace.c
@@ -232,6 +232,7 @@ static void __exit sputrace_exit(void)
 
 	remove_proc_entry("sputrace", NULL);
 	kfree(sputrace_log);
+	marker_synchronize_unregister();
 }
 
 module_init(sputrace_init);
-- 
GitLab


From ed86a59071a4ccd0e9bcefc61e013759a21eda78 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Wed, 1 Oct 2008 12:03:25 -0400
Subject: [PATCH 297/895] markers: re-enable fast batch registration

Lai Jiangshan discovered a reentrancy issue with markers and fixed it by
adding synchronize_sched() calls at each registration/unregistraiton.

It works, but it removes the ability to do batch
registration/unregistration and can cause registration of ~100 markers
to take about 30 seconds on a loaded machine (synchronize_sched() is
much slower on such workloads).

This patch implements a version of the fix which won't slow down marker batch
registration/unregistration. It also go back to the original non-synchronized
reg/unreg.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/marker.c | 58 ++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 52 insertions(+), 6 deletions(-)

diff --git a/kernel/marker.c b/kernel/marker.c
index fe5ca72f9659..05a25776f71f 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -60,6 +60,9 @@ struct marker_entry {
 	struct marker_probe_closure single;
 	struct marker_probe_closure *multi;
 	int refcount;	/* Number of times armed. 0 if disarmed. */
+	struct rcu_head rcu;
+	void *oldptr;
+	unsigned char rcu_pending:1;
 	unsigned char ptype:1;
 	char name[0];	/* Contains name'\0'format'\0' */
 };
@@ -196,6 +199,16 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
 }
 EXPORT_SYMBOL_GPL(marker_probe_cb_noarg);
 
+static void free_old_closure(struct rcu_head *head)
+{
+	struct marker_entry *entry = container_of(head,
+		struct marker_entry, rcu);
+	kfree(entry->oldptr);
+	/* Make sure we free the data before setting the pending flag to 0 */
+	smp_wmb();
+	entry->rcu_pending = 0;
+}
+
 static void debug_print_probes(struct marker_entry *entry)
 {
 	int i;
@@ -404,6 +417,7 @@ static struct marker_entry *add_marker(const char *name, const char *format)
 	e->multi = NULL;
 	e->ptype = 0;
 	e->refcount = 0;
+	e->rcu_pending = 0;
 	hlist_add_head(&e->hlist, head);
 	return e;
 }
@@ -433,6 +447,9 @@ static int remove_marker(const char *name)
 	if (e->single.func != __mark_empty_function)
 		return -EBUSY;
 	hlist_del(&e->hlist);
+	/* Make sure the call_rcu has been executed */
+	if (e->rcu_pending)
+		rcu_barrier_sched();
 	kfree(e);
 	return 0;
 }
@@ -462,8 +479,12 @@ static int marker_set_format(struct marker_entry **entry, const char *format)
 	e->multi = (*entry)->multi;
 	e->ptype = (*entry)->ptype;
 	e->refcount = (*entry)->refcount;
+	e->rcu_pending = 0;
 	hlist_add_before(&e->hlist, &(*entry)->hlist);
 	hlist_del(&(*entry)->hlist);
+	/* Make sure the call_rcu has been executed */
+	if ((*entry)->rcu_pending)
+		rcu_barrier_sched();
 	kfree(*entry);
 	*entry = e;
 	trace_mark(core_marker_format, "name %s format %s",
@@ -637,6 +658,12 @@ int marker_probe_register(const char *name, const char *format,
 			goto end;
 		}
 	}
+	/*
+	 * If we detect that a call_rcu is pending for this marker,
+	 * make sure it's executed now.
+	 */
+	if (entry->rcu_pending)
+		rcu_barrier_sched();
 	old = marker_entry_add_probe(entry, probe, probe_private);
 	if (IS_ERR(old)) {
 		ret = PTR_ERR(old);
@@ -644,11 +671,16 @@ int marker_probe_register(const char *name, const char *format,
 	}
 	mutex_unlock(&markers_mutex);
 	marker_update_probes();		/* may update entry */
-	synchronize_sched();
-	kfree(old);
 	mutex_lock(&markers_mutex);
 	entry = get_marker(name);
 	WARN_ON(!entry);
+	if (entry->rcu_pending)
+		rcu_barrier_sched();
+	entry->oldptr = old;
+	entry->rcu_pending = 1;
+	/* write rcu_pending before calling the RCU callback */
+	smp_wmb();
+	call_rcu_sched(&entry->rcu, free_old_closure);
 end:
 	mutex_unlock(&markers_mutex);
 	return ret;
@@ -678,15 +710,22 @@ int marker_probe_unregister(const char *name,
 	entry = get_marker(name);
 	if (!entry)
 		goto end;
+	if (entry->rcu_pending)
+		rcu_barrier_sched();
 	old = marker_entry_remove_probe(entry, probe, probe_private);
 	mutex_unlock(&markers_mutex);
 	marker_update_probes();		/* may update entry */
-	synchronize_sched();
-	kfree(old);
 	mutex_lock(&markers_mutex);
 	entry = get_marker(name);
 	if (!entry)
 		goto end;
+	if (entry->rcu_pending)
+		rcu_barrier_sched();
+	entry->oldptr = old;
+	entry->rcu_pending = 1;
+	/* write rcu_pending before calling the RCU callback */
+	smp_wmb();
+	call_rcu_sched(&entry->rcu, free_old_closure);
 	remove_marker(name);	/* Ignore busy error message */
 	ret = 0;
 end:
@@ -752,14 +791,21 @@ int marker_probe_unregister_private_data(marker_probe_func *probe,
 		ret = -ENOENT;
 		goto end;
 	}
+	if (entry->rcu_pending)
+		rcu_barrier_sched();
 	old = marker_entry_remove_probe(entry, NULL, probe_private);
 	mutex_unlock(&markers_mutex);
 	marker_update_probes();		/* may update entry */
-	synchronize_sched();
-	kfree(old);
 	mutex_lock(&markers_mutex);
 	entry = get_marker_from_private_data(probe, probe_private);
 	WARN_ON(!entry);
+	if (entry->rcu_pending)
+		rcu_barrier_sched();
+	entry->oldptr = old;
+	entry->rcu_pending = 1;
+	/* write rcu_pending before calling the RCU callback */
+	smp_wmb();
+	call_rcu_sched(&entry->rcu, free_old_closure);
 	remove_marker(entry->name);	/* Ignore busy error message */
 end:
 	mutex_unlock(&markers_mutex);
-- 
GitLab


From 53c8c8fdfd2d2d515bdcb3d0f2a11d1f3f42ece1 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Fri, 3 Oct 2008 11:52:54 -0400
Subject: [PATCH 298/895] markers: turn marker_synchronize_unregister() into an
 inline

Turn marker synchronize unregister into a static inline. There is no
reason to keep it as a macro over a static inline.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/marker.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/linux/marker.h b/include/linux/marker.h
index 889196c7fbb1..38e32e781ed7 100644
--- a/include/linux/marker.h
+++ b/include/linux/marker.h
@@ -13,6 +13,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/rcupdate.h>
 
 struct module;
 struct marker;
@@ -165,6 +166,9 @@ extern void *marker_get_private_data(const char *name, marker_probe_func *probe,
  * unregistration and the end of module exit to make sure there is no caller
  * executing a probe when it is freed.
  */
-#define marker_synchronize_unregister() synchronize_sched()
+static inline void marker_synchronize_unregister(void)
+{
+	synchronize_sched();
+}
 
 #endif
-- 
GitLab


From 48043bcdf8f398d28e75ffed6ee85208d751f87f Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Wed, 8 Oct 2008 10:23:36 +0800
Subject: [PATCH 299/895] markers: fix unchecked format

when the second, third... probe is registered, its format is
not checked, this patch fixes it.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/marker.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/kernel/marker.c b/kernel/marker.c
index 05a25776f71f..3b75d0e8b5a4 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -653,11 +653,17 @@ int marker_probe_register(const char *name, const char *format,
 	entry = get_marker(name);
 	if (!entry) {
 		entry = add_marker(name, format);
-		if (IS_ERR(entry)) {
+		if (IS_ERR(entry))
 			ret = PTR_ERR(entry);
-			goto end;
-		}
+	} else if (format) {
+		if (!entry->format)
+			ret = marker_set_format(&entry, format);
+		else if (strcmp(entry->format, format))
+			ret = -EPERM;
 	}
+	if (ret)
+		goto end;
+
 	/*
 	 * If we detect that a call_rcu is pending for this marker,
 	 * make sure it's executed now.
-- 
GitLab


From 1b7ae37c030a9fbbb5ebbf5d7bbfd7208cf805b7 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Fri, 10 Oct 2008 14:43:57 +0800
Subject: [PATCH 300/895] markers: bit-field is not thread-safe nor smp-safe

bit-field is not thread-safe nor smp-safe.

struct marker_entry.rcu_pending is not protected by any lock
in rcu-callback free_old_closure().
so we must turn it into a safe type.

detail:

I suppose rcu_pending and ptype are store in struct marker_entry.tmp1

free_old_closure() side:           change ptype side:

                                |  load struct marker_entry.tmp1
--------------------------------|--------------------------------
                                |  change ptype bit in tmp1
load struct marker_entry.tmp1   |
change rcu_pending bit in tmp1  |
store tmp1                      |
--------------------------------|--------------------------------
                                |  store tmp1

now this result equals that free_old_closure() do not change rcu_pending
bit, bug! This bug will cause redundant rcu_barrier_sched() called.
not too harmful.

----- corresponding:

free_old_closure() side:           change ptype side:

load struct marker_entry.tmp1   |
--------------------------------|--------------------------------
                                |  load struct marker_entry.tmp1
change rcu_pending bit in tmp1  |
                                |  change ptype bit in tmp1
                                |  store tmp1
--------------------------------|--------------------------------
store tmp1                      |

now this result equals that change ptype side do not change ptype
bit, bug! this bug cause marker_probe_cb() access to invalid memory.
oops!

see also: http://en.wikipedia.org/wiki/Bit_field

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/marker.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/marker.c b/kernel/marker.c
index 3b75d0e8b5a4..e9c6b2bc9400 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -62,7 +62,7 @@ struct marker_entry {
 	int refcount;	/* Number of times armed. 0 if disarmed. */
 	struct rcu_head rcu;
 	void *oldptr;
-	unsigned char rcu_pending:1;
+	int rcu_pending;
 	unsigned char ptype:1;
 	char name[0];	/* Contains name'\0'format'\0' */
 };
-- 
GitLab


From aa5d9151f745b6ee6a236a1f109118034277eb92 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sat, 13 Sep 2008 09:36:06 -0700
Subject: [PATCH 301/895] tracing/fastboot: add a script to visualize the
 kernel boot process / time

When optimizing the kernel boot time, it's very valuable to visualize
what is going on at which time. In addition, with the fastboot asynchronous
initcall level, it's very valuable to see which initcall gets run where
and when.

This patch adds a script to turn a dmesg into a SVG graph (that can be
shown with tools such as InkScape, Gimp or Firefox) and a small change
to the initcall code to print the PID of the thread calling the initcall
(so that the script can work out the parallelism).

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
---
 init/main.c          |   3 +-
 scripts/bootgraph.pl | 138 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 140 insertions(+), 1 deletion(-)
 create mode 100644 scripts/bootgraph.pl

diff --git a/init/main.c b/init/main.c
index ded1fae965ab..16abba05c826 100644
--- a/init/main.c
+++ b/init/main.c
@@ -711,7 +711,8 @@ int do_one_initcall(initcall_t fn)
 	int result;
 
 	if (initcall_debug) {
-		printk("calling  %pF\n", fn);
+		printk("calling  %pF", fn);
+		printk(" @ %i\n",  task_pid_nr(current));
 		t0 = ktime_get();
 	}
 
diff --git a/scripts/bootgraph.pl b/scripts/bootgraph.pl
new file mode 100644
index 000000000000..d459b8bdef02
--- /dev/null
+++ b/scripts/bootgraph.pl
@@ -0,0 +1,138 @@
+#!/usr/bin/perl
+
+# Copyright 2008, Intel Corporation
+#
+# This file is part of the Linux kernel
+#
+# This program file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program in a file named COPYING; if not, write to the
+# Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor,
+# Boston, MA 02110-1301 USA
+#
+# Authors:
+# 	Arjan van de Ven <arjan@linux.intel.com>
+
+
+#
+# This script turns a dmesg output into a SVG graphic that shows which
+# functions take how much time. You can view SVG graphics with various
+# programs, including Inkscape, The Gimp and Firefox.
+#
+#
+# For this script to work, the kernel needs to be compiled with the
+# CONFIG_PRINTK_TIME configuration option enabled, and with
+# "initcall_debug" passed on the kernel command line.
+#
+# usage:
+# 	dmesg | perl scripts/bootgraph.pl > output.svg
+#
+
+my @rows;
+my %start, %end, %row;
+my $done = 0;
+my $rowcount = 0;
+my $maxtime = 0;
+my $count = 0;
+while (<>) {
+	my $line = $_;
+	if ($line =~ /([0-9\.]+)\] calling  ([a-zA-Z\_]+)\+/) {
+		my $func = $2;
+		if ($done == 0) {
+			$start{$func} = $1;
+		}
+		$row{$func} = 1;
+		if ($line =~ /\@ ([0-9]+)/) {
+			my $pid = $1;
+			if (!defined($rows[$pid])) {
+				$rowcount = $rowcount + 1;
+				$rows[$pid] = $rowcount;
+			}
+			$row{$func} = $rows[$pid];
+		}
+		$count = $count + 1;
+	}
+
+	if ($line =~ /([0-9\.]+)\] initcall ([a-zA-Z\_]+)\+.*returned/) {
+		if ($done == 0) {
+			$end{$2} = $1;
+			$maxtime = $1;
+		}
+	}
+	if ($line =~ /Write protecting the/) {
+		$done = 1;
+	}
+}
+
+if ($count == 0) {
+	print "No data found in the dmesg. Make sure CONFIG_PRINTK_TIME is enabled and\n";
+	print "that initcall_debug is passed on the kernel command line.\n\n";
+	print "Usage: \n";
+	print "      dmesg | perl scripts/bootgraph.pl > output.svg\n\n";
+	exit;
+}
+
+print "<?xml version=\"1.0\" standalone=\"no\"?> \n";
+print "<svg width=\"1000\" height=\"100%\" version=\"1.1\" xmlns=\"http://www.w3.org/2000/svg\">\n";
+
+my @styles;
+
+$styles[0] = "fill:rgb(0,0,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
+$styles[1] = "fill:rgb(0,255,0);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
+$styles[2] = "fill:rgb(255,0,20);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
+$styles[3] = "fill:rgb(255,255,20);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
+$styles[4] = "fill:rgb(255,0,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
+$styles[5] = "fill:rgb(0,255,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
+$styles[6] = "fill:rgb(0,128,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
+$styles[7] = "fill:rgb(0,255,128);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
+$styles[8] = "fill:rgb(255,0,128);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
+$styles[9] = "fill:rgb(255,255,128);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
+$styles[10] = "fill:rgb(255,128,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
+$styles[11] = "fill:rgb(128,255,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
+
+my $mult = 950.0 / $maxtime;
+my $threshold = 0.0500 / $maxtime;
+my $stylecounter = 0;
+while (($key,$value) = each %start) {
+	my $duration = $end{$key} - $start{$key};
+
+	if ($duration >= $threshold) {
+		my $s, $s2, $e, $y;
+		$s = $value * $mult;
+		$s2 = $s + 6;
+		$e = $end{$key} * $mult;
+		$w = $e - $s;
+
+		$y = $row{$key} * 150;
+		$y2 = $y + 4;
+
+		$style = $styles[$stylecounter];
+		$stylecounter = $stylecounter + 1;
+		if ($stylecounter > 11) {
+			$stylecounter = 0;
+		};
+
+		print "<rect x=\"$s\" width=\"$w\" y=\"$y\" height=\"145\" style=\"$style\"/>\n";
+		print "<text transform=\"translate($s2,$y2) rotate(90)\">$key</text>\n";
+	}
+}
+
+
+# print the time line on top
+my $time = 0.0;
+while ($time < $maxtime) {
+	my $s2 = $time * $mult;
+	print "<text transform=\"translate($s2,89) rotate(90)\">$time</text>\n";
+	$time = $time + 0.1;
+}
+
+print "</svg>\n";
-- 
GitLab


From d13744cd6e3fef373a3fe656ac349b4e7c49ff79 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Weisbecker?= <fweisbec@gmail.com>
Date: Tue, 23 Sep 2008 11:32:08 +0100
Subject: [PATCH 302/895] tracing/ftrace: add the boot tracer

Add the boot/initcall tracer.

It's primary purpose is to be able to trace the initcalls.

It is intended to be used with scripts/bootgraph.pl after some small
improvements.

Note that it is not active after its init. To avoid tracing (and so
crashing) before the whole tracing engine init, you have to explicitly
call start_boot_trace() after do_pre_smp_initcalls() to enable it.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h    |  19 +++++++
 kernel/trace/trace.h      |   4 ++
 kernel/trace/trace_boot.c | 101 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 124 insertions(+)
 create mode 100644 kernel/trace/trace_boot.c

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 5de9903645d5..91954eb6460f 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -5,6 +5,8 @@
 
 #include <linux/linkage.h>
 #include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/types.h>
 
 extern int ftrace_enabled;
 extern int
@@ -209,4 +211,21 @@ ftrace_init_module(unsigned long *start, unsigned long *end) { }
 #endif
 
 
+struct boot_trace {
+	pid_t			caller;
+	initcall_t		func;
+	int			result;
+	unsigned long long	duration;
+};
+
+#ifdef CONFIG_BOOT_TRACER
+extern void trace_boot(struct boot_trace *it);
+extern void start_boot_trace(void);
+#else
+static inline void trace_boot(struct boot_trace *it) { }
+static inline void start_boot_trace(void) { }
+#endif
+
+
+
 #endif /* _LINUX_FTRACE_H */
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index cb2c3fb7dd54..b28bf8812efc 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -6,6 +6,7 @@
 #include <linux/sched.h>
 #include <linux/clocksource.h>
 #include <linux/mmiotrace.h>
+#include <linux/ftrace.h>
 
 enum trace_type {
 	__TRACE_FIRST_TYPE = 0,
@@ -19,6 +20,7 @@ enum trace_type {
 	TRACE_SPECIAL,
 	TRACE_MMIO_RW,
 	TRACE_MMIO_MAP,
+	TRACE_BOOT,
 
 	__TRACE_LAST_TYPE
 };
@@ -30,6 +32,7 @@ struct ftrace_entry {
 	unsigned long		ip;
 	unsigned long		parent_ip;
 };
+extern struct tracer boot_tracer;
 
 /*
  * Context switch trace entry - which task (and prio) we switched from/to:
@@ -108,6 +111,7 @@ struct trace_field {
 		struct print_entry		print;
 		struct mmiotrace_rw		mmiorw;
 		struct mmiotrace_map		mmiomap;
+		struct boot_trace		initcall;
 	};
 };
 
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
new file mode 100644
index 000000000000..c65ef8ffd6b4
--- /dev/null
+++ b/kernel/trace/trace_boot.c
@@ -0,0 +1,101 @@
+/*
+ * ring buffer based initcalls tracer
+ *
+ * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/debugfs.h>
+#include <linux/ftrace.h>
+
+#include "trace.h"
+
+static struct trace_array *boot_trace;
+static int trace_boot_enabled;
+
+
+/* Should be started after do_pre_smp_initcalls() in init/main.c */
+void start_boot_trace(void)
+{
+	trace_boot_enabled = 1;
+}
+
+void stop_boot_trace(struct trace_array *tr)
+{
+	trace_boot_enabled = 0;
+}
+
+static void boot_trace_init(struct trace_array *tr)
+{
+	int cpu;
+	boot_trace = tr;
+
+	trace_boot_enabled = 0;
+
+	for_each_cpu_mask(cpu, cpu_possible_map)
+		tracing_reset(tr->data[cpu]);
+}
+
+static void boot_trace_ctrl_update(struct trace_array *tr)
+{
+	if (tr->ctrl)
+		start_boot_trace();
+	else
+		stop_boot_trace(tr);
+}
+
+static int initcall_print_line(struct trace_iterator *iter)
+{
+	int ret = 1;
+	struct trace_entry *entry = iter->ent;
+	struct boot_trace *it = &entry->field.initcall;
+	struct trace_seq *s = &iter->seq;
+
+	if (iter->ent->type == TRACE_BOOT)
+		ret = trace_seq_printf(s, "%pF called from %i "
+				       "returned %d after %lld msecs\n",
+				       it->func, it->caller, it->result,
+				       it->duration);
+	if (ret)
+		return 1;
+	return 0;
+}
+
+struct tracer boot_tracer __read_mostly =
+{
+	.name		= "initcall",
+	.init		= boot_trace_init,
+	.reset		= stop_boot_trace,
+	.ctrl_update	= boot_trace_ctrl_update,
+	.print_line	= initcall_print_line,
+};
+
+
+void trace_boot(struct boot_trace *it)
+{
+	struct trace_entry *entry;
+	struct trace_array_cpu *data;
+	unsigned long irq_flags;
+	struct trace_array *tr = boot_trace;
+
+	if (!trace_boot_enabled)
+		return;
+
+	preempt_disable();
+	data = tr->data[smp_processor_id()];
+
+	raw_local_irq_save(irq_flags);
+	__raw_spin_lock(&data->lock);
+
+	entry = tracing_get_trace_entry(tr, data);
+	tracing_generic_entry_update(entry, 0);
+	entry->type = TRACE_BOOT;
+	entry->field.initcall = *it;
+
+	__raw_spin_unlock(&data->lock);
+	raw_local_irq_restore(irq_flags);
+	trace_wake_up();
+
+	preempt_enable();
+}
-- 
GitLab


From b5ad384e79add1d87fff54070000dadcf218ffab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Weisbecker?= <fweisbec@gmail.com>
Date: Tue, 23 Sep 2008 11:34:32 +0100
Subject: [PATCH 303/895] tracing/ftrace: make tracing suitable to run the boot
 tracer

The tracing engine have now to be init in early_initcall to set the
boot tracer. Only the debugfs settings will be initialized at
fs_initcall time.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index f2ef72fa3f17..6ada059832a6 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2990,7 +2990,7 @@ struct dentry *tracing_init_dentry(void)
 #include "trace_selftest.c"
 #endif
 
-static __init void tracer_init_debugfs(void)
+static __init int tracer_init_debugfs(void)
 {
 	struct dentry *d_tracer;
 	struct dentry *entry;
@@ -3078,6 +3078,7 @@ static __init void tracer_init_debugfs(void)
 #ifdef CONFIG_SYSPROF_TRACER
 	init_tracer_sysprof_debugfs(d_tracer);
 #endif
+	return 0;
 }
 
 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
@@ -3504,17 +3505,20 @@ __init static int tracer_alloc_buffers(void)
 		pages, trace_nr_entries, (long)TRACE_ENTRY_SIZE);
 	pr_info("   actual entries %ld\n", global_trace.entries);
 
-	tracer_init_debugfs();
-
 	trace_init_cmdlines();
 
 	register_tracer(&nop_trace);
+#ifdef CONFIG_BOOT_TRACER
+	register_tracer(&boot_tracer);
+	current_trace = &boot_tracer;
+	current_trace->init(&global_trace);
+#else
 	current_trace = &nop_trace;
+#endif
 
 	/* All seems OK, enable tracing */
 	global_trace.ctrl = tracer_enabled;
 	tracing_disabled = 0;
-
 	atomic_notifier_chain_register(&panic_notifier_list,
 				       &trace_panic_notifier);
 
@@ -3548,4 +3552,5 @@ __init static int tracer_alloc_buffers(void)
 	}
 	return ret;
 }
-fs_initcall(tracer_alloc_buffers);
+early_initcall(tracer_alloc_buffers);
+fs_initcall(tracer_init_debugfs);
-- 
GitLab


From 1f5c2abbdeb2bb07b20c6a66bfecefe6c867b1ee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Weisbecker?= <fweisbec@gmail.com>
Date: Tue, 23 Sep 2008 11:36:20 +0100
Subject: [PATCH 304/895] tracing/ftrace: give an entry on the config for boot
 tracer

Bring the entry to choose the boot tracer on the kernel config.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/Kconfig  | 12 ++++++++++++
 kernel/trace/Makefile |  1 +
 2 files changed, 13 insertions(+)

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 254328dec672..81a17ef6b942 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -116,6 +116,18 @@ config CONTEXT_SWITCH_TRACER
 	  This tracer gets called from the context switch and records
 	  all switching of tasks.
 
+config BOOT_TRACER
+	bool "Trace boot initcalls"
+	depends on HAVE_FTRACE
+	depends on DEBUG_KERNEL
+	select TRACING
+	help
+	  This tracer helps developers to optimize boot times: it records
+	  the timings of the initcalls. Its aim is to be parsed by the
+	  /scripts/bootgraph.pl tool to produce pretty graphics about
+	  boot inefficiencies, giving a visual representation of the
+	  delays during initcalls.
+
 config STACK_TRACER
 	bool "Trace max stack"
 	depends on HAVE_FTRACE
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 73ba13f5a461..35a07f7cfa86 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -22,5 +22,6 @@ obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
 obj-$(CONFIG_NOP_TRACER) += trace_nop.o
 obj-$(CONFIG_STACK_TRACER) += trace_stack.o
 obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
+obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
 
 libftrace-y := ftrace.o
-- 
GitLab


From 3bf77af6e1fef1124bf71d81f9f84885f0ee0dea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Weisbecker?= <fweisbec@gmail.com>
Date: Tue, 23 Sep 2008 11:38:18 +0100
Subject: [PATCH 305/895] tracing/ftrace: launch boot tracing after pre-smp
 initcalls

Launch the boot tracing inside the initcall_debug area. Old printk
have not been removed to keep the old way of initcall tracing for
backward compatibility.

[ mingo@elte.hu: resolved conflicts ]
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 init/main.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/init/main.c b/init/main.c
index 16abba05c826..1e39a1eab190 100644
--- a/init/main.c
+++ b/init/main.c
@@ -709,10 +709,12 @@ int do_one_initcall(initcall_t fn)
 	ktime_t t0, t1, delta;
 	char msgbuf[64];
 	int result;
+	struct boot_trace it;
 
 	if (initcall_debug) {
-		printk("calling  %pF", fn);
-		printk(" @ %i\n",  task_pid_nr(current));
+		it.caller = task_pid_nr(current);
+		it.func = fn;
+		printk("calling  %pF @ %i\n", fn, it.caller);
 		t0 = ktime_get();
 	}
 
@@ -721,10 +723,11 @@ int do_one_initcall(initcall_t fn)
 	if (initcall_debug) {
 		t1 = ktime_get();
 		delta = ktime_sub(t1, t0);
-
-		printk("initcall %pF returned %d after %Ld msecs\n",
-			fn, result,
-			(unsigned long long) delta.tv64 >> 20);
+		it.result = result;
+		it.duration = (unsigned long long) delta.tv64 >> 20;
+		printk("initcall %pF returned %d after %Ld msecs\n", fn,
+			result, it.duration);
+		trace_boot(&it);
 	}
 
 	msgbuf[0] = 0;
@@ -859,6 +862,7 @@ static int __init kernel_init(void * unused)
 	smp_prepare_cpus(setup_max_cpus);
 
 	do_pre_smp_initcalls();
+	start_boot_trace();
 
 	smp_init();
 	sched_init_smp();
-- 
GitLab


From 3ce2b9200da8b7170cc7463b7ee4212fad7b291e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Weisbecker?= <fweisbec@gmail.com>
Date: Wed, 24 Sep 2008 10:36:09 +0100
Subject: [PATCH 306/895] ftrace/fastboot: disable tracers self-tests when boot
 tracer is selected

The tracing engine resets the ring buffer and the tracers touch it
too during self-tests. These self-tests happen during tracers registering
and work against boot tracing which is logging initcalls.

We have to disable tracing self-tests if the boot-tracer is selected.

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/Kconfig | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 81a17ef6b942..4feb3c81f94d 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -126,7 +126,9 @@ config BOOT_TRACER
 	  the timings of the initcalls. Its aim is to be parsed by the
 	  /scripts/bootgraph.pl tool to produce pretty graphics about
 	  boot inefficiencies, giving a visual representation of the
-	  delays during initcalls.
+	  delays during initcalls. Note that tracers self tests can't
+	  be enabled if this tracer is selected since only one tracer
+	  should touch the tracing buffer at a time.
 
 config STACK_TRACER
 	bool "Trace max stack"
@@ -168,8 +170,7 @@ config FTRACE_SELFTEST
 
 config FTRACE_STARTUP_TEST
 	bool "Perform a startup test on ftrace"
-	depends on TRACING
-	depends on DEBUG_KERNEL
+	depends on TRACING && DEBUG_KERNEL && !BOOT_TRACER
 	select FTRACE_SELFTEST
 	help
 	  This option performs a series of startup tests on ftrace. On bootup
-- 
GitLab


From 7c572ac0cf5e8cd8e17f084bc6c253296cc42279 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Weisbecker?= <fweisbec@gmail.com>
Date: Thu, 25 Sep 2008 13:25:30 +0100
Subject: [PATCH 307/895] tracing/ftrace: don't consume unhandled entries by
 boot tracer

When the boot tracer can't handle an entry output, it returns 1.
It should return 0 to relay on other output functions.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace_boot.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index c65ef8ffd6b4..d5c9e2e4a9c4 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -47,7 +47,7 @@ static void boot_trace_ctrl_update(struct trace_array *tr)
 
 static int initcall_print_line(struct trace_iterator *iter)
 {
-	int ret = 1;
+	int ret = 0;
 	struct trace_entry *entry = iter->ent;
 	struct boot_trace *it = &entry->field.initcall;
 	struct trace_seq *s = &iter->seq;
-- 
GitLab


From 5aa60c6073456812251caf9177cb921b2de68f77 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Mon, 29 Sep 2008 23:02:37 -0400
Subject: [PATCH 308/895] ftrace: give time for wakeup test to run

It is possible that the testing thread in the ftrace wakeup test does not
run before we stop the trace. This will cause the trace to fail since nothing
will be in the buffers.

This patch adds a small wait in the wakeup test to allow for the woken task
to run and be traced.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace_selftest.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 82db9103b9bc..5ebd4b135498 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -498,6 +498,9 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
 
 	wake_up_process(p);
 
+	/* give a little time to let the thread wake up */
+	msleep(100);
+
 	/* stop the tracing. */
 	tr->ctrl = 0;
 	trace->ctrl_update(tr);
-- 
GitLab


From 7a8e76a3829f1067b70f715771ff88baf2fbf3c3 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Mon, 29 Sep 2008 23:02:38 -0400
Subject: [PATCH 309/895] tracing: unified trace buffer

This is a unified tracing buffer that implements a ring buffer that
hopefully everyone will eventually be able to use.

The events recorded into the buffer have the following structure:

  struct ring_buffer_event {
	u32 type:2, len:3, time_delta:27;
	u32 array[];
  };

The minimum size of an event is 8 bytes. All events are 4 byte
aligned inside the buffer.

There are 4 types (all internal use for the ring buffer, only
the data type is exported to the interface users).

 RINGBUF_TYPE_PADDING: this type is used to note extra space at the end
	of a buffer page.

 RINGBUF_TYPE_TIME_EXTENT: This type is used when the time between events
	is greater than the 27 bit delta can hold. We add another
	32 bits, and record that in its own event (8 byte size).

 RINGBUF_TYPE_TIME_STAMP: (Not implemented yet). This will hold data to
	help keep the buffer timestamps in sync.

RINGBUF_TYPE_DATA: The event actually holds user data.

The "len" field is only three bits. Since the data must be
4 byte aligned, this field is shifted left by 2, giving a
max length of 28 bytes. If the data load is greater than 28
bytes, the first array field holds the full length of the
data load and the len field is set to zero.

Example, data size of 7 bytes:

	type = RINGBUF_TYPE_DATA
	len = 2
	time_delta: <time-stamp> - <prev_event-time-stamp>
	array[0..1]: <7 bytes of data> <1 byte empty>

This event is saved in 12 bytes of the buffer.

An event with 82 bytes of data:

	type = RINGBUF_TYPE_DATA
	len = 0
	time_delta: <time-stamp> - <prev_event-time-stamp>
	array[0]: 84 (Note the alignment)
	array[1..14]: <82 bytes of data> <2 bytes empty>

The above event is saved in 92 bytes (if my math is correct).
82 bytes of data, 2 bytes empty, 4 byte header, 4 byte length.

Do not reference the above event struct directly. Use the following
functions to gain access to the event table, since the
ring_buffer_event structure may change in the future.

ring_buffer_event_length(event): get the length of the event.
	This is the size of the memory used to record this
	event, and not the size of the data pay load.

ring_buffer_time_delta(event): get the time delta of the event
	This returns the delta time stamp since the last event.
	Note: Even though this is in the header, there should
		be no reason to access this directly, accept
		for debugging.

ring_buffer_event_data(event): get the data from the event
	This is the function to use to get the actual data
	from the event. Note, it is only a pointer to the
	data inside the buffer. This data must be copied to
	another location otherwise you risk it being written
	over in the buffer.

ring_buffer_lock: A way to lock the entire buffer.
ring_buffer_unlock: unlock the buffer.

ring_buffer_alloc: create a new ring buffer. Can choose between
	overwrite or consumer/producer mode. Overwrite will
	overwrite old data, where as consumer producer will
	throw away new data if the consumer catches up with the
	producer.  The consumer/producer is the default.

ring_buffer_free: free the ring buffer.

ring_buffer_resize: resize the buffer. Changes the size of each cpu
	buffer. Note, it is up to the caller to provide that
	the buffer is not being used while this is happening.
	This requirement may go away but do not count on it.

ring_buffer_lock_reserve: locks the ring buffer and allocates an
	entry on the buffer to write to.
ring_buffer_unlock_commit: unlocks the ring buffer and commits it to
	the buffer.

ring_buffer_write: writes some data into the ring buffer.

ring_buffer_peek: Look at a next item in the cpu buffer.
ring_buffer_consume: get the next item in the cpu buffer and
	consume it. That is, this function increments the head
	pointer.

ring_buffer_read_start: Start an iterator of a cpu buffer.
	For now, this disables the cpu buffer, until you issue
	a finish. This is just because we do not want the iterator
	to be overwritten. This restriction may change in the future.
	But note, this is used for static reading of a buffer which
	is usually done "after" a trace. Live readings would want
	to use the ring_buffer_consume above, which will not
	disable the ring buffer.

ring_buffer_read_finish: Finishes the read iterator and reenables
	the ring buffer.

ring_buffer_iter_peek: Look at the next item in the cpu iterator.
ring_buffer_read: Read the iterator and increment it.
ring_buffer_iter_reset: Reset the iterator to point to the beginning
	of the cpu buffer.
ring_buffer_iter_empty: Returns true if the iterator is at the end
	of the cpu buffer.

ring_buffer_size: returns the size in bytes of each cpu buffer.
	Note, the real size is this times the number of CPUs.

ring_buffer_reset_cpu: Sets the cpu buffer to empty
ring_buffer_reset: sets all cpu buffers to empty

ring_buffer_swap_cpu: swaps a cpu buffer from one buffer with a
	cpu buffer of another buffer. This is handy when you
	want to take a snap shot of a running trace on just one
	cpu. Having a backup buffer, to swap with facilitates this.
	Ftrace max latencies use this.

ring_buffer_empty: Returns true if the ring buffer is empty.
ring_buffer_empty_cpu: Returns true if the cpu buffer is empty.

ring_buffer_record_disable: disable all cpu buffers (read only)
ring_buffer_record_disable_cpu: disable a single cpu buffer (read only)
ring_buffer_record_enable: enable all cpu buffers.
ring_buffer_record_enabl_cpu: enable a single cpu buffer.

ring_buffer_entries: The number of entries in a ring buffer.
ring_buffer_overruns: The number of entries removed due to writing wrap.

ring_buffer_time_stamp: Get the time stamp used by the ring buffer
ring_buffer_normalize_time_stamp: normalize the ring buffer time stamp
	into nanosecs.

I still need to implement the GTOD feature. But we need support from
the cpu frequency infrastructure.  But this can be done at a later
time without affecting the ring buffer interface.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ring_buffer.h |  130 +++
 kernel/trace/Kconfig        |    4 +
 kernel/trace/Makefile       |    1 +
 kernel/trace/ring_buffer.c  | 1672 +++++++++++++++++++++++++++++++++++
 4 files changed, 1807 insertions(+)
 create mode 100644 include/linux/ring_buffer.h
 create mode 100644 kernel/trace/ring_buffer.c

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
new file mode 100644
index 000000000000..c52375b8330d
--- /dev/null
+++ b/include/linux/ring_buffer.h
@@ -0,0 +1,130 @@
+#ifndef _LINUX_RING_BUFFER_H
+#define _LINUX_RING_BUFFER_H
+
+#include <linux/mm.h>
+#include <linux/seq_file.h>
+
+struct ring_buffer;
+struct ring_buffer_iter;
+
+/*
+ * Don't reference this struct directly, use functions below.
+ */
+struct ring_buffer_event {
+	u32		type:2, len:3, time_delta:27;
+	u32		array[];
+};
+
+/**
+ * enum ring_buffer_type - internal ring buffer types
+ *
+ * @RINGBUF_TYPE_PADDING:	Left over page padding
+ *				 array is ignored
+ *				 size is variable depending on how much
+ *				  padding is needed
+ *
+ * @RINGBUF_TYPE_TIME_EXTEND:	Extend the time delta
+ *				 array[0] = time delta (28 .. 59)
+ *				 size = 8 bytes
+ *
+ * @RINGBUF_TYPE_TIME_STAMP:	Sync time stamp with external clock
+ *				 array[0] = tv_nsec
+ *				 array[1] = tv_sec
+ *				 size = 16 bytes
+ *
+ * @RINGBUF_TYPE_DATA:		Data record
+ *				 If len is zero:
+ *				  array[0] holds the actual length
+ *				  array[1..(length+3)/4-1] holds data
+ *				 else
+ *				  length = len << 2
+ *				  array[0..(length+3)/4] holds data
+ */
+enum ring_buffer_type {
+	RINGBUF_TYPE_PADDING,
+	RINGBUF_TYPE_TIME_EXTEND,
+	/* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */
+	RINGBUF_TYPE_TIME_STAMP,
+	RINGBUF_TYPE_DATA,
+};
+
+unsigned ring_buffer_event_length(struct ring_buffer_event *event);
+void *ring_buffer_event_data(struct ring_buffer_event *event);
+
+/**
+ * ring_buffer_event_time_delta - return the delta timestamp of the event
+ * @event: the event to get the delta timestamp of
+ *
+ * The delta timestamp is the 27 bit timestamp since the last event.
+ */
+static inline unsigned
+ring_buffer_event_time_delta(struct ring_buffer_event *event)
+{
+	return event->time_delta;
+}
+
+void ring_buffer_lock(struct ring_buffer *buffer, unsigned long *flags);
+void ring_buffer_unlock(struct ring_buffer *buffer, unsigned long flags);
+
+/*
+ * size is in bytes for each per CPU buffer.
+ */
+struct ring_buffer *
+ring_buffer_alloc(unsigned long size, unsigned flags);
+void ring_buffer_free(struct ring_buffer *buffer);
+
+int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size);
+
+struct ring_buffer_event *
+ring_buffer_lock_reserve(struct ring_buffer *buffer,
+			 unsigned long length,
+			 unsigned long *flags);
+int ring_buffer_unlock_commit(struct ring_buffer *buffer,
+			      struct ring_buffer_event *event,
+			      unsigned long flags);
+int ring_buffer_write(struct ring_buffer *buffer,
+		      unsigned long length, void *data);
+
+struct ring_buffer_event *
+ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts);
+struct ring_buffer_event *
+ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts);
+
+struct ring_buffer_iter *
+ring_buffer_read_start(struct ring_buffer *buffer, int cpu);
+void ring_buffer_read_finish(struct ring_buffer_iter *iter);
+
+struct ring_buffer_event *
+ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts);
+struct ring_buffer_event *
+ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts);
+void ring_buffer_iter_reset(struct ring_buffer_iter *iter);
+int ring_buffer_iter_empty(struct ring_buffer_iter *iter);
+
+unsigned long ring_buffer_size(struct ring_buffer *buffer);
+
+void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu);
+void ring_buffer_reset(struct ring_buffer *buffer);
+
+int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
+			 struct ring_buffer *buffer_b, int cpu);
+
+int ring_buffer_empty(struct ring_buffer *buffer);
+int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu);
+
+void ring_buffer_record_disable(struct ring_buffer *buffer);
+void ring_buffer_record_enable(struct ring_buffer *buffer);
+void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu);
+void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu);
+
+unsigned long ring_buffer_entries(struct ring_buffer *buffer);
+unsigned long ring_buffer_overruns(struct ring_buffer *buffer);
+
+u64 ring_buffer_time_stamp(int cpu);
+void ring_buffer_normalize_time_stamp(int cpu, u64 *ts);
+
+enum ring_buffer_flags {
+	RB_FL_OVERWRITE		= 1 << 0,
+};
+
+#endif /* _LINUX_RING_BUFFER_H */
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 4feb3c81f94d..396aea11398e 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -18,9 +18,13 @@ config HAVE_FTRACE_MCOUNT_RECORD
 config TRACER_MAX_TRACE
 	bool
 
+config RING_BUFFER
+	bool
+
 config TRACING
 	bool
 	select DEBUG_FS
+	select RING_BUFFER
 	select STACKTRACE
 	select TRACEPOINTS
 
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 35a07f7cfa86..a85dfba88ba0 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -11,6 +11,7 @@ obj-y += trace_selftest_dynamic.o
 endif
 
 obj-$(CONFIG_FTRACE) += libftrace.o
+obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
 
 obj-$(CONFIG_TRACING) += trace.o
 obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
new file mode 100644
index 000000000000..830a2930dd91
--- /dev/null
+++ b/kernel/trace/ring_buffer.c
@@ -0,0 +1,1672 @@
+/*
+ * Generic ring buffer
+ *
+ * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
+ */
+#include <linux/ring_buffer.h>
+#include <linux/spinlock.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>	/* used for sched_clock() (for now) */
+#include <linux/init.h>
+#include <linux/hash.h>
+#include <linux/list.h>
+#include <linux/fs.h>
+
+/* Up this if you want to test the TIME_EXTENTS and normalization */
+#define DEBUG_SHIFT 0
+
+/* FIXME!!! */
+u64 ring_buffer_time_stamp(int cpu)
+{
+	/* shift to debug/test normalization and TIME_EXTENTS */
+	return sched_clock() << DEBUG_SHIFT;
+}
+
+void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
+{
+	/* Just stupid testing the normalize function and deltas */
+	*ts >>= DEBUG_SHIFT;
+}
+
+#define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event))
+#define RB_ALIGNMENT_SHIFT	2
+#define RB_ALIGNMENT		(1 << RB_ALIGNMENT_SHIFT)
+#define RB_MAX_SMALL_DATA	28
+
+enum {
+	RB_LEN_TIME_EXTEND = 8,
+	RB_LEN_TIME_STAMP = 16,
+};
+
+/* inline for ring buffer fast paths */
+static inline unsigned
+rb_event_length(struct ring_buffer_event *event)
+{
+	unsigned length;
+
+	switch (event->type) {
+	case RINGBUF_TYPE_PADDING:
+		/* undefined */
+		return -1;
+
+	case RINGBUF_TYPE_TIME_EXTEND:
+		return RB_LEN_TIME_EXTEND;
+
+	case RINGBUF_TYPE_TIME_STAMP:
+		return RB_LEN_TIME_STAMP;
+
+	case RINGBUF_TYPE_DATA:
+		if (event->len)
+			length = event->len << RB_ALIGNMENT_SHIFT;
+		else
+			length = event->array[0];
+		return length + RB_EVNT_HDR_SIZE;
+	default:
+		BUG();
+	}
+	/* not hit */
+	return 0;
+}
+
+/**
+ * ring_buffer_event_length - return the length of the event
+ * @event: the event to get the length of
+ */
+unsigned ring_buffer_event_length(struct ring_buffer_event *event)
+{
+	return rb_event_length(event);
+}
+
+/* inline for ring buffer fast paths */
+static inline void *
+rb_event_data(struct ring_buffer_event *event)
+{
+	BUG_ON(event->type != RINGBUF_TYPE_DATA);
+	/* If length is in len field, then array[0] has the data */
+	if (event->len)
+		return (void *)&event->array[0];
+	/* Otherwise length is in array[0] and array[1] has the data */
+	return (void *)&event->array[1];
+}
+
+/**
+ * ring_buffer_event_data - return the data of the event
+ * @event: the event to get the data from
+ */
+void *ring_buffer_event_data(struct ring_buffer_event *event)
+{
+	return rb_event_data(event);
+}
+
+#define for_each_buffer_cpu(buffer, cpu)		\
+	for_each_cpu_mask(cpu, buffer->cpumask)
+
+#define TS_SHIFT	27
+#define TS_MASK		((1ULL << TS_SHIFT) - 1)
+#define TS_DELTA_TEST	(~TS_MASK)
+
+/*
+ * This hack stolen from mm/slob.c.
+ * We can store per page timing information in the page frame of the page.
+ * Thanks to Peter Zijlstra for suggesting this idea.
+ */
+struct buffer_page {
+	union {
+		struct {
+			unsigned long	 flags;		/* mandatory */
+			atomic_t	 _count;	/* mandatory */
+			u64		 time_stamp;	/* page time stamp */
+			unsigned	 size;		/* size of page data */
+			struct list_head list;		/* list of free pages */
+		};
+		struct page page;
+	};
+};
+
+/*
+ * We need to fit the time_stamp delta into 27 bits.
+ */
+static inline int test_time_stamp(u64 delta)
+{
+	if (delta & TS_DELTA_TEST)
+		return 1;
+	return 0;
+}
+
+#define BUF_PAGE_SIZE PAGE_SIZE
+
+/*
+ * head_page == tail_page && head == tail then buffer is empty.
+ */
+struct ring_buffer_per_cpu {
+	int				cpu;
+	struct ring_buffer		*buffer;
+	spinlock_t			lock;
+	struct lock_class_key		lock_key;
+	struct list_head		pages;
+	unsigned long			head;	/* read from head */
+	unsigned long			tail;	/* write to tail */
+	struct buffer_page		*head_page;
+	struct buffer_page		*tail_page;
+	unsigned long			overrun;
+	unsigned long			entries;
+	u64				write_stamp;
+	u64				read_stamp;
+	atomic_t			record_disabled;
+};
+
+struct ring_buffer {
+	unsigned long			size;
+	unsigned			pages;
+	unsigned			flags;
+	int				cpus;
+	cpumask_t			cpumask;
+	atomic_t			record_disabled;
+
+	struct mutex			mutex;
+
+	struct ring_buffer_per_cpu	**buffers;
+};
+
+struct ring_buffer_iter {
+	struct ring_buffer_per_cpu	*cpu_buffer;
+	unsigned long			head;
+	struct buffer_page		*head_page;
+	u64				read_stamp;
+};
+
+#define RB_WARN_ON(buffer, cond)			\
+	if (unlikely(cond)) {				\
+		atomic_inc(&buffer->record_disabled);	\
+		WARN_ON(1);				\
+		return -1;				\
+	}
+
+/**
+ * check_pages - integrity check of buffer pages
+ * @cpu_buffer: CPU buffer with pages to test
+ *
+ * As a safty measure we check to make sure the data pages have not
+ * been corrupted.
+ */
+static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	struct list_head *head = &cpu_buffer->pages;
+	struct buffer_page *page, *tmp;
+
+	RB_WARN_ON(cpu_buffer, head->next->prev != head);
+	RB_WARN_ON(cpu_buffer, head->prev->next != head);
+
+	list_for_each_entry_safe(page, tmp, head, list) {
+		RB_WARN_ON(cpu_buffer, page->list.next->prev != &page->list);
+		RB_WARN_ON(cpu_buffer, page->list.prev->next != &page->list);
+	}
+
+	return 0;
+}
+
+static unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	return cpu_buffer->head_page->size;
+}
+
+static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
+			     unsigned nr_pages)
+{
+	struct list_head *head = &cpu_buffer->pages;
+	struct buffer_page *page, *tmp;
+	unsigned long addr;
+	LIST_HEAD(pages);
+	unsigned i;
+
+	for (i = 0; i < nr_pages; i++) {
+		addr = __get_free_page(GFP_KERNEL);
+		if (!addr)
+			goto free_pages;
+		page = (struct buffer_page *)virt_to_page(addr);
+		list_add(&page->list, &pages);
+	}
+
+	list_splice(&pages, head);
+
+	rb_check_pages(cpu_buffer);
+
+	return 0;
+
+ free_pages:
+	list_for_each_entry_safe(page, tmp, &pages, list) {
+		list_del_init(&page->list);
+		__free_page(&page->page);
+	}
+	return -ENOMEM;
+}
+
+static struct ring_buffer_per_cpu *
+rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	int ret;
+
+	cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
+				  GFP_KERNEL, cpu_to_node(cpu));
+	if (!cpu_buffer)
+		return NULL;
+
+	cpu_buffer->cpu = cpu;
+	cpu_buffer->buffer = buffer;
+	spin_lock_init(&cpu_buffer->lock);
+	INIT_LIST_HEAD(&cpu_buffer->pages);
+
+	ret = rb_allocate_pages(cpu_buffer, buffer->pages);
+	if (ret < 0)
+		goto fail_free_buffer;
+
+	cpu_buffer->head_page
+		= list_entry(cpu_buffer->pages.next, struct buffer_page, list);
+	cpu_buffer->tail_page
+		= list_entry(cpu_buffer->pages.next, struct buffer_page, list);
+
+	return cpu_buffer;
+
+ fail_free_buffer:
+	kfree(cpu_buffer);
+	return NULL;
+}
+
+static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	struct list_head *head = &cpu_buffer->pages;
+	struct buffer_page *page, *tmp;
+
+	list_for_each_entry_safe(page, tmp, head, list) {
+		list_del_init(&page->list);
+		__free_page(&page->page);
+	}
+	kfree(cpu_buffer);
+}
+
+/**
+ * ring_buffer_alloc - allocate a new ring_buffer
+ * @size: the size in bytes that is needed.
+ * @flags: attributes to set for the ring buffer.
+ *
+ * Currently the only flag that is available is the RB_FL_OVERWRITE
+ * flag. This flag means that the buffer will overwrite old data
+ * when the buffer wraps. If this flag is not set, the buffer will
+ * drop data when the tail hits the head.
+ */
+struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
+{
+	struct ring_buffer *buffer;
+	int bsize;
+	int cpu;
+
+	/* keep it in its own cache line */
+	buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
+			 GFP_KERNEL);
+	if (!buffer)
+		return NULL;
+
+	buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
+	buffer->flags = flags;
+
+	/* need at least two pages */
+	if (buffer->pages == 1)
+		buffer->pages++;
+
+	buffer->cpumask = cpu_possible_map;
+	buffer->cpus = nr_cpu_ids;
+
+	bsize = sizeof(void *) * nr_cpu_ids;
+	buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()),
+				  GFP_KERNEL);
+	if (!buffer->buffers)
+		goto fail_free_buffer;
+
+	for_each_buffer_cpu(buffer, cpu) {
+		buffer->buffers[cpu] =
+			rb_allocate_cpu_buffer(buffer, cpu);
+		if (!buffer->buffers[cpu])
+			goto fail_free_buffers;
+	}
+
+	mutex_init(&buffer->mutex);
+
+	return buffer;
+
+ fail_free_buffers:
+	for_each_buffer_cpu(buffer, cpu) {
+		if (buffer->buffers[cpu])
+			rb_free_cpu_buffer(buffer->buffers[cpu]);
+	}
+	kfree(buffer->buffers);
+
+ fail_free_buffer:
+	kfree(buffer);
+	return NULL;
+}
+
+/**
+ * ring_buffer_free - free a ring buffer.
+ * @buffer: the buffer to free.
+ */
+void
+ring_buffer_free(struct ring_buffer *buffer)
+{
+	int cpu;
+
+	for_each_buffer_cpu(buffer, cpu)
+		rb_free_cpu_buffer(buffer->buffers[cpu]);
+
+	kfree(buffer);
+}
+
+static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
+
+static void
+rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
+{
+	struct buffer_page *page;
+	struct list_head *p;
+	unsigned i;
+
+	atomic_inc(&cpu_buffer->record_disabled);
+	synchronize_sched();
+
+	for (i = 0; i < nr_pages; i++) {
+		BUG_ON(list_empty(&cpu_buffer->pages));
+		p = cpu_buffer->pages.next;
+		page = list_entry(p, struct buffer_page, list);
+		list_del_init(&page->list);
+		__free_page(&page->page);
+	}
+	BUG_ON(list_empty(&cpu_buffer->pages));
+
+	rb_reset_cpu(cpu_buffer);
+
+	rb_check_pages(cpu_buffer);
+
+	atomic_dec(&cpu_buffer->record_disabled);
+
+}
+
+static void
+rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
+		struct list_head *pages, unsigned nr_pages)
+{
+	struct buffer_page *page;
+	struct list_head *p;
+	unsigned i;
+
+	atomic_inc(&cpu_buffer->record_disabled);
+	synchronize_sched();
+
+	for (i = 0; i < nr_pages; i++) {
+		BUG_ON(list_empty(pages));
+		p = pages->next;
+		page = list_entry(p, struct buffer_page, list);
+		list_del_init(&page->list);
+		list_add_tail(&page->list, &cpu_buffer->pages);
+	}
+	rb_reset_cpu(cpu_buffer);
+
+	rb_check_pages(cpu_buffer);
+
+	atomic_dec(&cpu_buffer->record_disabled);
+}
+
+/**
+ * ring_buffer_resize - resize the ring buffer
+ * @buffer: the buffer to resize.
+ * @size: the new size.
+ *
+ * The tracer is responsible for making sure that the buffer is
+ * not being used while changing the size.
+ * Note: We may be able to change the above requirement by using
+ *  RCU synchronizations.
+ *
+ * Minimum size is 2 * BUF_PAGE_SIZE.
+ *
+ * Returns -1 on failure.
+ */
+int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	unsigned nr_pages, rm_pages, new_pages;
+	struct buffer_page *page, *tmp;
+	unsigned long buffer_size;
+	unsigned long addr;
+	LIST_HEAD(pages);
+	int i, cpu;
+
+	size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
+	size *= BUF_PAGE_SIZE;
+	buffer_size = buffer->pages * BUF_PAGE_SIZE;
+
+	/* we need a minimum of two pages */
+	if (size < BUF_PAGE_SIZE * 2)
+		size = BUF_PAGE_SIZE * 2;
+
+	if (size == buffer_size)
+		return size;
+
+	mutex_lock(&buffer->mutex);
+
+	nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
+
+	if (size < buffer_size) {
+
+		/* easy case, just free pages */
+		BUG_ON(nr_pages >= buffer->pages);
+
+		rm_pages = buffer->pages - nr_pages;
+
+		for_each_buffer_cpu(buffer, cpu) {
+			cpu_buffer = buffer->buffers[cpu];
+			rb_remove_pages(cpu_buffer, rm_pages);
+		}
+		goto out;
+	}
+
+	/*
+	 * This is a bit more difficult. We only want to add pages
+	 * when we can allocate enough for all CPUs. We do this
+	 * by allocating all the pages and storing them on a local
+	 * link list. If we succeed in our allocation, then we
+	 * add these pages to the cpu_buffers. Otherwise we just free
+	 * them all and return -ENOMEM;
+	 */
+	BUG_ON(nr_pages <= buffer->pages);
+	new_pages = nr_pages - buffer->pages;
+
+	for_each_buffer_cpu(buffer, cpu) {
+		for (i = 0; i < new_pages; i++) {
+			addr = __get_free_page(GFP_KERNEL);
+			if (!addr)
+				goto free_pages;
+			page = (struct buffer_page *)virt_to_page(addr);
+			list_add(&page->list, &pages);
+		}
+	}
+
+	for_each_buffer_cpu(buffer, cpu) {
+		cpu_buffer = buffer->buffers[cpu];
+		rb_insert_pages(cpu_buffer, &pages, new_pages);
+	}
+
+	BUG_ON(!list_empty(&pages));
+
+ out:
+	buffer->pages = nr_pages;
+	mutex_unlock(&buffer->mutex);
+
+	return size;
+
+ free_pages:
+	list_for_each_entry_safe(page, tmp, &pages, list) {
+		list_del_init(&page->list);
+		__free_page(&page->page);
+	}
+	return -ENOMEM;
+}
+
+static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	return cpu_buffer->head_page == cpu_buffer->tail_page &&
+		cpu_buffer->head == cpu_buffer->tail;
+}
+
+static inline int rb_null_event(struct ring_buffer_event *event)
+{
+	return event->type == RINGBUF_TYPE_PADDING;
+}
+
+static inline void *rb_page_index(struct buffer_page *page, unsigned index)
+{
+	void *addr = page_address(&page->page);
+
+	return addr + index;
+}
+
+static inline struct ring_buffer_event *
+rb_head_event(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	return rb_page_index(cpu_buffer->head_page,
+			     cpu_buffer->head);
+}
+
+static inline struct ring_buffer_event *
+rb_iter_head_event(struct ring_buffer_iter *iter)
+{
+	return rb_page_index(iter->head_page,
+			     iter->head);
+}
+
+/*
+ * When the tail hits the head and the buffer is in overwrite mode,
+ * the head jumps to the next page and all content on the previous
+ * page is discarded. But before doing so, we update the overrun
+ * variable of the buffer.
+ */
+static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	struct ring_buffer_event *event;
+	unsigned long head;
+
+	for (head = 0; head < rb_head_size(cpu_buffer);
+	     head += rb_event_length(event)) {
+
+		event = rb_page_index(cpu_buffer->head_page, head);
+		BUG_ON(rb_null_event(event));
+		/* Only count data entries */
+		if (event->type != RINGBUF_TYPE_DATA)
+			continue;
+		cpu_buffer->overrun++;
+		cpu_buffer->entries--;
+	}
+}
+
+static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
+			       struct buffer_page **page)
+{
+	struct list_head *p = (*page)->list.next;
+
+	if (p == &cpu_buffer->pages)
+		p = p->next;
+
+	*page = list_entry(p, struct buffer_page, list);
+}
+
+static inline void
+rb_add_stamp(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts)
+{
+	cpu_buffer->tail_page->time_stamp = *ts;
+	cpu_buffer->write_stamp = *ts;
+}
+
+static void rb_reset_read_page(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	cpu_buffer->read_stamp = cpu_buffer->head_page->time_stamp;
+	cpu_buffer->head = 0;
+}
+
+static void
+rb_reset_iter_read_page(struct ring_buffer_iter *iter)
+{
+	iter->read_stamp = iter->head_page->time_stamp;
+	iter->head = 0;
+}
+
+/**
+ * ring_buffer_update_event - update event type and data
+ * @event: the even to update
+ * @type: the type of event
+ * @length: the size of the event field in the ring buffer
+ *
+ * Update the type and data fields of the event. The length
+ * is the actual size that is written to the ring buffer,
+ * and with this, we can determine what to place into the
+ * data field.
+ */
+static inline void
+rb_update_event(struct ring_buffer_event *event,
+			 unsigned type, unsigned length)
+{
+	event->type = type;
+
+	switch (type) {
+
+	case RINGBUF_TYPE_PADDING:
+		break;
+
+	case RINGBUF_TYPE_TIME_EXTEND:
+		event->len =
+			(RB_LEN_TIME_EXTEND + (RB_ALIGNMENT-1))
+			>> RB_ALIGNMENT_SHIFT;
+		break;
+
+	case RINGBUF_TYPE_TIME_STAMP:
+		event->len =
+			(RB_LEN_TIME_STAMP + (RB_ALIGNMENT-1))
+			>> RB_ALIGNMENT_SHIFT;
+		break;
+
+	case RINGBUF_TYPE_DATA:
+		length -= RB_EVNT_HDR_SIZE;
+		if (length > RB_MAX_SMALL_DATA) {
+			event->len = 0;
+			event->array[0] = length;
+		} else
+			event->len =
+				(length + (RB_ALIGNMENT-1))
+				>> RB_ALIGNMENT_SHIFT;
+		break;
+	default:
+		BUG();
+	}
+}
+
+static inline unsigned rb_calculate_event_length(unsigned length)
+{
+	struct ring_buffer_event event; /* Used only for sizeof array */
+
+	/* zero length can cause confusions */
+	if (!length)
+		length = 1;
+
+	if (length > RB_MAX_SMALL_DATA)
+		length += sizeof(event.array[0]);
+
+	length += RB_EVNT_HDR_SIZE;
+	length = ALIGN(length, RB_ALIGNMENT);
+
+	return length;
+}
+
+static struct ring_buffer_event *
+__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
+		  unsigned type, unsigned long length, u64 *ts)
+{
+	struct buffer_page *head_page, *tail_page;
+	unsigned long tail;
+	struct ring_buffer *buffer = cpu_buffer->buffer;
+	struct ring_buffer_event *event;
+
+	tail_page = cpu_buffer->tail_page;
+	head_page = cpu_buffer->head_page;
+	tail = cpu_buffer->tail;
+
+	if (tail + length > BUF_PAGE_SIZE) {
+		struct buffer_page *next_page = tail_page;
+
+		rb_inc_page(cpu_buffer, &next_page);
+
+		if (next_page == head_page) {
+			if (!(buffer->flags & RB_FL_OVERWRITE))
+				return NULL;
+
+			/* count overflows */
+			rb_update_overflow(cpu_buffer);
+
+			rb_inc_page(cpu_buffer, &head_page);
+			cpu_buffer->head_page = head_page;
+			rb_reset_read_page(cpu_buffer);
+		}
+
+		if (tail != BUF_PAGE_SIZE) {
+			event = rb_page_index(tail_page, tail);
+			/* page padding */
+			event->type = RINGBUF_TYPE_PADDING;
+		}
+
+		tail_page->size = tail;
+		tail_page = next_page;
+		tail_page->size = 0;
+		tail = 0;
+		cpu_buffer->tail_page = tail_page;
+		cpu_buffer->tail = tail;
+		rb_add_stamp(cpu_buffer, ts);
+	}
+
+	BUG_ON(tail + length > BUF_PAGE_SIZE);
+
+	event = rb_page_index(tail_page, tail);
+	rb_update_event(event, type, length);
+
+	return event;
+}
+
+static int
+rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
+		  u64 *ts, u64 *delta)
+{
+	struct ring_buffer_event *event;
+	static int once;
+
+	if (unlikely(*delta > (1ULL << 59) && !once++)) {
+		printk(KERN_WARNING "Delta way too big! %llu"
+		       " ts=%llu write stamp = %llu\n",
+		       *delta, *ts, cpu_buffer->write_stamp);
+		WARN_ON(1);
+	}
+
+	/*
+	 * The delta is too big, we to add a
+	 * new timestamp.
+	 */
+	event = __rb_reserve_next(cpu_buffer,
+				  RINGBUF_TYPE_TIME_EXTEND,
+				  RB_LEN_TIME_EXTEND,
+				  ts);
+	if (!event)
+		return -1;
+
+	/* check to see if we went to the next page */
+	if (cpu_buffer->tail) {
+		/* Still on same page, update timestamp */
+		event->time_delta = *delta & TS_MASK;
+		event->array[0] = *delta >> TS_SHIFT;
+		/* commit the time event */
+		cpu_buffer->tail +=
+			rb_event_length(event);
+		cpu_buffer->write_stamp = *ts;
+		*delta = 0;
+	}
+
+	return 0;
+}
+
+static struct ring_buffer_event *
+rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
+		      unsigned type, unsigned long length)
+{
+	struct ring_buffer_event *event;
+	u64 ts, delta;
+
+	ts = ring_buffer_time_stamp(cpu_buffer->cpu);
+
+	if (cpu_buffer->tail) {
+		delta = ts - cpu_buffer->write_stamp;
+
+		if (test_time_stamp(delta)) {
+			int ret;
+
+			ret = rb_add_time_stamp(cpu_buffer, &ts, &delta);
+			if (ret < 0)
+				return NULL;
+		}
+	} else {
+		rb_add_stamp(cpu_buffer, &ts);
+		delta = 0;
+	}
+
+	event = __rb_reserve_next(cpu_buffer, type, length, &ts);
+	if (!event)
+		return NULL;
+
+	/* If the reserve went to the next page, our delta is zero */
+	if (!cpu_buffer->tail)
+		delta = 0;
+
+	event->time_delta = delta;
+
+	return event;
+}
+
+/**
+ * ring_buffer_lock_reserve - reserve a part of the buffer
+ * @buffer: the ring buffer to reserve from
+ * @length: the length of the data to reserve (excluding event header)
+ * @flags: a pointer to save the interrupt flags
+ *
+ * Returns a reseverd event on the ring buffer to copy directly to.
+ * The user of this interface will need to get the body to write into
+ * and can use the ring_buffer_event_data() interface.
+ *
+ * The length is the length of the data needed, not the event length
+ * which also includes the event header.
+ *
+ * Must be paired with ring_buffer_unlock_commit, unless NULL is returned.
+ * If NULL is returned, then nothing has been allocated or locked.
+ */
+struct ring_buffer_event *
+ring_buffer_lock_reserve(struct ring_buffer *buffer,
+			 unsigned long length,
+			 unsigned long *flags)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	struct ring_buffer_event *event;
+	int cpu;
+
+	if (atomic_read(&buffer->record_disabled))
+		return NULL;
+
+	raw_local_irq_save(*flags);
+	cpu = raw_smp_processor_id();
+
+	if (!cpu_isset(cpu, buffer->cpumask))
+		goto out_irq;
+
+	cpu_buffer = buffer->buffers[cpu];
+	spin_lock(&cpu_buffer->lock);
+
+	if (atomic_read(&cpu_buffer->record_disabled))
+		goto no_record;
+
+	length = rb_calculate_event_length(length);
+	if (length > BUF_PAGE_SIZE)
+		return NULL;
+
+	event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length);
+	if (!event)
+		goto no_record;
+
+	return event;
+
+ no_record:
+	spin_unlock(&cpu_buffer->lock);
+ out_irq:
+	local_irq_restore(*flags);
+	return NULL;
+}
+
+static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
+		      struct ring_buffer_event *event)
+{
+	cpu_buffer->tail += rb_event_length(event);
+	cpu_buffer->tail_page->size = cpu_buffer->tail;
+	cpu_buffer->write_stamp += event->time_delta;
+	cpu_buffer->entries++;
+}
+
+/**
+ * ring_buffer_unlock_commit - commit a reserved
+ * @buffer: The buffer to commit to
+ * @event: The event pointer to commit.
+ * @flags: the interrupt flags received from ring_buffer_lock_reserve.
+ *
+ * This commits the data to the ring buffer, and releases any locks held.
+ *
+ * Must be paired with ring_buffer_lock_reserve.
+ */
+int ring_buffer_unlock_commit(struct ring_buffer *buffer,
+			      struct ring_buffer_event *event,
+			      unsigned long flags)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	int cpu = raw_smp_processor_id();
+
+	cpu_buffer = buffer->buffers[cpu];
+
+	assert_spin_locked(&cpu_buffer->lock);
+
+	rb_commit(cpu_buffer, event);
+
+	spin_unlock(&cpu_buffer->lock);
+	raw_local_irq_restore(flags);
+
+	return 0;
+}
+
+/**
+ * ring_buffer_write - write data to the buffer without reserving
+ * @buffer: The ring buffer to write to.
+ * @length: The length of the data being written (excluding the event header)
+ * @data: The data to write to the buffer.
+ *
+ * This is like ring_buffer_lock_reserve and ring_buffer_unlock_commit as
+ * one function. If you already have the data to write to the buffer, it
+ * may be easier to simply call this function.
+ *
+ * Note, like ring_buffer_lock_reserve, the length is the length of the data
+ * and not the length of the event which would hold the header.
+ */
+int ring_buffer_write(struct ring_buffer *buffer,
+			unsigned long length,
+			void *data)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	struct ring_buffer_event *event;
+	unsigned long event_length, flags;
+	void *body;
+	int ret = -EBUSY;
+	int cpu;
+
+	if (atomic_read(&buffer->record_disabled))
+		return -EBUSY;
+
+	local_irq_save(flags);
+	cpu = raw_smp_processor_id();
+
+	if (!cpu_isset(cpu, buffer->cpumask))
+		goto out_irq;
+
+	cpu_buffer = buffer->buffers[cpu];
+	spin_lock(&cpu_buffer->lock);
+
+	if (atomic_read(&cpu_buffer->record_disabled))
+		goto out;
+
+	event_length = rb_calculate_event_length(length);
+	event = rb_reserve_next_event(cpu_buffer,
+				      RINGBUF_TYPE_DATA, event_length);
+	if (!event)
+		goto out;
+
+	body = rb_event_data(event);
+
+	memcpy(body, data, length);
+
+	rb_commit(cpu_buffer, event);
+
+	ret = 0;
+ out:
+	spin_unlock(&cpu_buffer->lock);
+ out_irq:
+	local_irq_restore(flags);
+
+	return ret;
+}
+
+/**
+ * ring_buffer_lock - lock the ring buffer
+ * @buffer: The ring buffer to lock
+ * @flags: The place to store the interrupt flags
+ *
+ * This locks all the per CPU buffers.
+ *
+ * Must be unlocked by ring_buffer_unlock.
+ */
+void ring_buffer_lock(struct ring_buffer *buffer, unsigned long *flags)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	int cpu;
+
+	local_irq_save(*flags);
+
+	for_each_buffer_cpu(buffer, cpu) {
+		cpu_buffer = buffer->buffers[cpu];
+		spin_lock(&cpu_buffer->lock);
+	}
+}
+
+/**
+ * ring_buffer_unlock - unlock a locked buffer
+ * @buffer: The locked buffer to unlock
+ * @flags: The interrupt flags received by ring_buffer_lock
+ */
+void ring_buffer_unlock(struct ring_buffer *buffer, unsigned long flags)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	int cpu;
+
+	for (cpu = buffer->cpus - 1; cpu >= 0; cpu--) {
+		if (!cpu_isset(cpu, buffer->cpumask))
+			continue;
+		cpu_buffer = buffer->buffers[cpu];
+		spin_unlock(&cpu_buffer->lock);
+	}
+
+	local_irq_restore(flags);
+}
+
+/**
+ * ring_buffer_record_disable - stop all writes into the buffer
+ * @buffer: The ring buffer to stop writes to.
+ *
+ * This prevents all writes to the buffer. Any attempt to write
+ * to the buffer after this will fail and return NULL.
+ *
+ * The caller should call synchronize_sched() after this.
+ */
+void ring_buffer_record_disable(struct ring_buffer *buffer)
+{
+	atomic_inc(&buffer->record_disabled);
+}
+
+/**
+ * ring_buffer_record_enable - enable writes to the buffer
+ * @buffer: The ring buffer to enable writes
+ *
+ * Note, multiple disables will need the same number of enables
+ * to truely enable the writing (much like preempt_disable).
+ */
+void ring_buffer_record_enable(struct ring_buffer *buffer)
+{
+	atomic_dec(&buffer->record_disabled);
+}
+
+/**
+ * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer
+ * @buffer: The ring buffer to stop writes to.
+ * @cpu: The CPU buffer to stop
+ *
+ * This prevents all writes to the buffer. Any attempt to write
+ * to the buffer after this will fail and return NULL.
+ *
+ * The caller should call synchronize_sched() after this.
+ */
+void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+
+	if (!cpu_isset(cpu, buffer->cpumask))
+		return;
+
+	cpu_buffer = buffer->buffers[cpu];
+	atomic_inc(&cpu_buffer->record_disabled);
+}
+
+/**
+ * ring_buffer_record_enable_cpu - enable writes to the buffer
+ * @buffer: The ring buffer to enable writes
+ * @cpu: The CPU to enable.
+ *
+ * Note, multiple disables will need the same number of enables
+ * to truely enable the writing (much like preempt_disable).
+ */
+void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+
+	if (!cpu_isset(cpu, buffer->cpumask))
+		return;
+
+	cpu_buffer = buffer->buffers[cpu];
+	atomic_dec(&cpu_buffer->record_disabled);
+}
+
+/**
+ * ring_buffer_entries_cpu - get the number of entries in a cpu buffer
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to get the entries from.
+ */
+unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+
+	if (!cpu_isset(cpu, buffer->cpumask))
+		return 0;
+
+	cpu_buffer = buffer->buffers[cpu];
+	return cpu_buffer->entries;
+}
+
+/**
+ * ring_buffer_overrun_cpu - get the number of overruns in a cpu_buffer
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to get the number of overruns from
+ */
+unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+
+	if (!cpu_isset(cpu, buffer->cpumask))
+		return 0;
+
+	cpu_buffer = buffer->buffers[cpu];
+	return cpu_buffer->overrun;
+}
+
+/**
+ * ring_buffer_entries - get the number of entries in a buffer
+ * @buffer: The ring buffer
+ *
+ * Returns the total number of entries in the ring buffer
+ * (all CPU entries)
+ */
+unsigned long ring_buffer_entries(struct ring_buffer *buffer)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	unsigned long entries = 0;
+	int cpu;
+
+	/* if you care about this being correct, lock the buffer */
+	for_each_buffer_cpu(buffer, cpu) {
+		cpu_buffer = buffer->buffers[cpu];
+		entries += cpu_buffer->entries;
+	}
+
+	return entries;
+}
+
+/**
+ * ring_buffer_overrun_cpu - get the number of overruns in buffer
+ * @buffer: The ring buffer
+ *
+ * Returns the total number of overruns in the ring buffer
+ * (all CPU entries)
+ */
+unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	unsigned long overruns = 0;
+	int cpu;
+
+	/* if you care about this being correct, lock the buffer */
+	for_each_buffer_cpu(buffer, cpu) {
+		cpu_buffer = buffer->buffers[cpu];
+		overruns += cpu_buffer->overrun;
+	}
+
+	return overruns;
+}
+
+/**
+ * ring_buffer_iter_reset - reset an iterator
+ * @iter: The iterator to reset
+ *
+ * Resets the iterator, so that it will start from the beginning
+ * again.
+ */
+void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
+{
+	struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+
+	iter->head_page = cpu_buffer->head_page;
+	iter->head = cpu_buffer->head;
+	rb_reset_iter_read_page(iter);
+}
+
+/**
+ * ring_buffer_iter_empty - check if an iterator has no more to read
+ * @iter: The iterator to check
+ */
+int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+
+	cpu_buffer = iter->cpu_buffer;
+
+	return iter->head_page == cpu_buffer->tail_page &&
+		iter->head == cpu_buffer->tail;
+}
+
+static void
+rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
+		     struct ring_buffer_event *event)
+{
+	u64 delta;
+
+	switch (event->type) {
+	case RINGBUF_TYPE_PADDING:
+		return;
+
+	case RINGBUF_TYPE_TIME_EXTEND:
+		delta = event->array[0];
+		delta <<= TS_SHIFT;
+		delta += event->time_delta;
+		cpu_buffer->read_stamp += delta;
+		return;
+
+	case RINGBUF_TYPE_TIME_STAMP:
+		/* FIXME: not implemented */
+		return;
+
+	case RINGBUF_TYPE_DATA:
+		cpu_buffer->read_stamp += event->time_delta;
+		return;
+
+	default:
+		BUG();
+	}
+	return;
+}
+
+static void
+rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
+			  struct ring_buffer_event *event)
+{
+	u64 delta;
+
+	switch (event->type) {
+	case RINGBUF_TYPE_PADDING:
+		return;
+
+	case RINGBUF_TYPE_TIME_EXTEND:
+		delta = event->array[0];
+		delta <<= TS_SHIFT;
+		delta += event->time_delta;
+		iter->read_stamp += delta;
+		return;
+
+	case RINGBUF_TYPE_TIME_STAMP:
+		/* FIXME: not implemented */
+		return;
+
+	case RINGBUF_TYPE_DATA:
+		iter->read_stamp += event->time_delta;
+		return;
+
+	default:
+		BUG();
+	}
+	return;
+}
+
+static void rb_advance_head(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	struct ring_buffer_event *event;
+	unsigned length;
+
+	/*
+	 * Check if we are at the end of the buffer.
+	 */
+	if (cpu_buffer->head >= cpu_buffer->head_page->size) {
+		BUG_ON(cpu_buffer->head_page == cpu_buffer->tail_page);
+		rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
+		rb_reset_read_page(cpu_buffer);
+		return;
+	}
+
+	event = rb_head_event(cpu_buffer);
+
+	if (event->type == RINGBUF_TYPE_DATA)
+		cpu_buffer->entries--;
+
+	length = rb_event_length(event);
+
+	/*
+	 * This should not be called to advance the header if we are
+	 * at the tail of the buffer.
+	 */
+	BUG_ON((cpu_buffer->head_page == cpu_buffer->tail_page) &&
+	       (cpu_buffer->head + length > cpu_buffer->tail));
+
+	rb_update_read_stamp(cpu_buffer, event);
+
+	cpu_buffer->head += length;
+
+	/* check for end of page */
+	if ((cpu_buffer->head >= cpu_buffer->head_page->size) &&
+	    (cpu_buffer->head_page != cpu_buffer->tail_page))
+		rb_advance_head(cpu_buffer);
+}
+
+static void rb_advance_iter(struct ring_buffer_iter *iter)
+{
+	struct ring_buffer *buffer;
+	struct ring_buffer_per_cpu *cpu_buffer;
+	struct ring_buffer_event *event;
+	unsigned length;
+
+	cpu_buffer = iter->cpu_buffer;
+	buffer = cpu_buffer->buffer;
+
+	/*
+	 * Check if we are at the end of the buffer.
+	 */
+	if (iter->head >= iter->head_page->size) {
+		BUG_ON(iter->head_page == cpu_buffer->tail_page);
+		rb_inc_page(cpu_buffer, &iter->head_page);
+		rb_reset_iter_read_page(iter);
+		return;
+	}
+
+	event = rb_iter_head_event(iter);
+
+	length = rb_event_length(event);
+
+	/*
+	 * This should not be called to advance the header if we are
+	 * at the tail of the buffer.
+	 */
+	BUG_ON((iter->head_page == cpu_buffer->tail_page) &&
+	       (iter->head + length > cpu_buffer->tail));
+
+	rb_update_iter_read_stamp(iter, event);
+
+	iter->head += length;
+
+	/* check for end of page padding */
+	if ((iter->head >= iter->head_page->size) &&
+	    (iter->head_page != cpu_buffer->tail_page))
+		rb_advance_iter(iter);
+}
+
+/**
+ * ring_buffer_peek - peek at the next event to be read
+ * @buffer: The ring buffer to read
+ * @cpu: The cpu to peak at
+ * @ts: The timestamp counter of this event.
+ *
+ * This will return the event that will be read next, but does
+ * not consume the data.
+ */
+struct ring_buffer_event *
+ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	struct ring_buffer_event *event;
+
+	if (!cpu_isset(cpu, buffer->cpumask))
+		return NULL;
+
+	cpu_buffer = buffer->buffers[cpu];
+
+ again:
+	if (rb_per_cpu_empty(cpu_buffer))
+		return NULL;
+
+	event = rb_head_event(cpu_buffer);
+
+	switch (event->type) {
+	case RINGBUF_TYPE_PADDING:
+		rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
+		rb_reset_read_page(cpu_buffer);
+		goto again;
+
+	case RINGBUF_TYPE_TIME_EXTEND:
+		/* Internal data, OK to advance */
+		rb_advance_head(cpu_buffer);
+		goto again;
+
+	case RINGBUF_TYPE_TIME_STAMP:
+		/* FIXME: not implemented */
+		rb_advance_head(cpu_buffer);
+		goto again;
+
+	case RINGBUF_TYPE_DATA:
+		if (ts) {
+			*ts = cpu_buffer->read_stamp + event->time_delta;
+			ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts);
+		}
+		return event;
+
+	default:
+		BUG();
+	}
+
+	return NULL;
+}
+
+/**
+ * ring_buffer_iter_peek - peek at the next event to be read
+ * @iter: The ring buffer iterator
+ * @ts: The timestamp counter of this event.
+ *
+ * This will return the event that will be read next, but does
+ * not increment the iterator.
+ */
+struct ring_buffer_event *
+ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
+{
+	struct ring_buffer *buffer;
+	struct ring_buffer_per_cpu *cpu_buffer;
+	struct ring_buffer_event *event;
+
+	if (ring_buffer_iter_empty(iter))
+		return NULL;
+
+	cpu_buffer = iter->cpu_buffer;
+	buffer = cpu_buffer->buffer;
+
+ again:
+	if (rb_per_cpu_empty(cpu_buffer))
+		return NULL;
+
+	event = rb_iter_head_event(iter);
+
+	switch (event->type) {
+	case RINGBUF_TYPE_PADDING:
+		rb_inc_page(cpu_buffer, &iter->head_page);
+		rb_reset_iter_read_page(iter);
+		goto again;
+
+	case RINGBUF_TYPE_TIME_EXTEND:
+		/* Internal data, OK to advance */
+		rb_advance_iter(iter);
+		goto again;
+
+	case RINGBUF_TYPE_TIME_STAMP:
+		/* FIXME: not implemented */
+		rb_advance_iter(iter);
+		goto again;
+
+	case RINGBUF_TYPE_DATA:
+		if (ts) {
+			*ts = iter->read_stamp + event->time_delta;
+			ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts);
+		}
+		return event;
+
+	default:
+		BUG();
+	}
+
+	return NULL;
+}
+
+/**
+ * ring_buffer_consume - return an event and consume it
+ * @buffer: The ring buffer to get the next event from
+ *
+ * Returns the next event in the ring buffer, and that event is consumed.
+ * Meaning, that sequential reads will keep returning a different event,
+ * and eventually empty the ring buffer if the producer is slower.
+ */
+struct ring_buffer_event *
+ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	struct ring_buffer_event *event;
+
+	if (!cpu_isset(cpu, buffer->cpumask))
+		return NULL;
+
+	event = ring_buffer_peek(buffer, cpu, ts);
+	if (!event)
+		return NULL;
+
+	cpu_buffer = buffer->buffers[cpu];
+	rb_advance_head(cpu_buffer);
+
+	return event;
+}
+
+/**
+ * ring_buffer_read_start - start a non consuming read of the buffer
+ * @buffer: The ring buffer to read from
+ * @cpu: The cpu buffer to iterate over
+ *
+ * This starts up an iteration through the buffer. It also disables
+ * the recording to the buffer until the reading is finished.
+ * This prevents the reading from being corrupted. This is not
+ * a consuming read, so a producer is not expected.
+ *
+ * Must be paired with ring_buffer_finish.
+ */
+struct ring_buffer_iter *
+ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	struct ring_buffer_iter *iter;
+
+	if (!cpu_isset(cpu, buffer->cpumask))
+		return NULL;
+
+	iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+	if (!iter)
+		return NULL;
+
+	cpu_buffer = buffer->buffers[cpu];
+
+	iter->cpu_buffer = cpu_buffer;
+
+	atomic_inc(&cpu_buffer->record_disabled);
+	synchronize_sched();
+
+	spin_lock(&cpu_buffer->lock);
+	iter->head = cpu_buffer->head;
+	iter->head_page = cpu_buffer->head_page;
+	rb_reset_iter_read_page(iter);
+	spin_unlock(&cpu_buffer->lock);
+
+	return iter;
+}
+
+/**
+ * ring_buffer_finish - finish reading the iterator of the buffer
+ * @iter: The iterator retrieved by ring_buffer_start
+ *
+ * This re-enables the recording to the buffer, and frees the
+ * iterator.
+ */
+void
+ring_buffer_read_finish(struct ring_buffer_iter *iter)
+{
+	struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+
+	atomic_dec(&cpu_buffer->record_disabled);
+	kfree(iter);
+}
+
+/**
+ * ring_buffer_read - read the next item in the ring buffer by the iterator
+ * @iter: The ring buffer iterator
+ * @ts: The time stamp of the event read.
+ *
+ * This reads the next event in the ring buffer and increments the iterator.
+ */
+struct ring_buffer_event *
+ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
+{
+	struct ring_buffer_event *event;
+
+	event = ring_buffer_iter_peek(iter, ts);
+	if (!event)
+		return NULL;
+
+	rb_advance_iter(iter);
+
+	return event;
+}
+
+/**
+ * ring_buffer_size - return the size of the ring buffer (in bytes)
+ * @buffer: The ring buffer.
+ */
+unsigned long ring_buffer_size(struct ring_buffer *buffer)
+{
+	return BUF_PAGE_SIZE * buffer->pages;
+}
+
+static void
+rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	cpu_buffer->head_page
+		= list_entry(cpu_buffer->pages.next, struct buffer_page, list);
+	cpu_buffer->tail_page
+		= list_entry(cpu_buffer->pages.next, struct buffer_page, list);
+
+	cpu_buffer->head = cpu_buffer->tail = 0;
+	cpu_buffer->overrun = 0;
+	cpu_buffer->entries = 0;
+}
+
+/**
+ * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer
+ * @buffer: The ring buffer to reset a per cpu buffer of
+ * @cpu: The CPU buffer to be reset
+ */
+void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
+	unsigned long flags;
+
+	if (!cpu_isset(cpu, buffer->cpumask))
+		return;
+
+	raw_local_irq_save(flags);
+	spin_lock(&cpu_buffer->lock);
+
+	rb_reset_cpu(cpu_buffer);
+
+	spin_unlock(&cpu_buffer->lock);
+	raw_local_irq_restore(flags);
+}
+
+/**
+ * ring_buffer_reset - reset a ring buffer
+ * @buffer: The ring buffer to reset all cpu buffers
+ */
+void ring_buffer_reset(struct ring_buffer *buffer)
+{
+	unsigned long flags;
+	int cpu;
+
+	ring_buffer_lock(buffer, &flags);
+
+	for_each_buffer_cpu(buffer, cpu)
+		rb_reset_cpu(buffer->buffers[cpu]);
+
+	ring_buffer_unlock(buffer, flags);
+}
+
+/**
+ * rind_buffer_empty - is the ring buffer empty?
+ * @buffer: The ring buffer to test
+ */
+int ring_buffer_empty(struct ring_buffer *buffer)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	int cpu;
+
+	/* yes this is racy, but if you don't like the race, lock the buffer */
+	for_each_buffer_cpu(buffer, cpu) {
+		cpu_buffer = buffer->buffers[cpu];
+		if (!rb_per_cpu_empty(cpu_buffer))
+			return 0;
+	}
+	return 1;
+}
+
+/**
+ * ring_buffer_empty_cpu - is a cpu buffer of a ring buffer empty?
+ * @buffer: The ring buffer
+ * @cpu: The CPU buffer to test
+ */
+int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+
+	if (!cpu_isset(cpu, buffer->cpumask))
+		return 1;
+
+	cpu_buffer = buffer->buffers[cpu];
+	return rb_per_cpu_empty(cpu_buffer);
+}
+
+/**
+ * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers
+ * @buffer_a: One buffer to swap with
+ * @buffer_b: The other buffer to swap with
+ *
+ * This function is useful for tracers that want to take a "snapshot"
+ * of a CPU buffer and has another back up buffer lying around.
+ * it is expected that the tracer handles the cpu buffer not being
+ * used at the moment.
+ */
+int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
+			 struct ring_buffer *buffer_b, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer_a;
+	struct ring_buffer_per_cpu *cpu_buffer_b;
+
+	if (!cpu_isset(cpu, buffer_a->cpumask) ||
+	    !cpu_isset(cpu, buffer_b->cpumask))
+		return -EINVAL;
+
+	/* At least make sure the two buffers are somewhat the same */
+	if (buffer_a->size != buffer_b->size ||
+	    buffer_a->pages != buffer_b->pages)
+		return -EINVAL;
+
+	cpu_buffer_a = buffer_a->buffers[cpu];
+	cpu_buffer_b = buffer_b->buffers[cpu];
+
+	/*
+	 * We can't do a synchronize_sched here because this
+	 * function can be called in atomic context.
+	 * Normally this will be called from the same CPU as cpu.
+	 * If not it's up to the caller to protect this.
+	 */
+	atomic_inc(&cpu_buffer_a->record_disabled);
+	atomic_inc(&cpu_buffer_b->record_disabled);
+
+	buffer_a->buffers[cpu] = cpu_buffer_b;
+	buffer_b->buffers[cpu] = cpu_buffer_a;
+
+	cpu_buffer_b->buffer = buffer_a;
+	cpu_buffer_a->buffer = buffer_b;
+
+	atomic_dec(&cpu_buffer_a->record_disabled);
+	atomic_dec(&cpu_buffer_b->record_disabled);
+
+	return 0;
+}
+
-- 
GitLab


From a7b1374333407f409cf8df7e623b12490f073c84 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Mon, 29 Sep 2008 23:02:39 -0400
Subject: [PATCH 310/895] ring_buffer: add paranoid check for buffer page

If for some strange reason the buffer_page gets bigger, or the page struct
gets smaller, I want to know this ASAP.  The best way is to not let the
kernel compile.

This patch adds code to test the size of the struct buffer_page against the
page struct and will cause compile issues if the buffer_page ever gets bigger
than the page struct.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ring_buffer.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 830a2930dd91..95ca9338cb6c 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -289,6 +289,12 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
 	kfree(cpu_buffer);
 }
 
+/*
+ * Causes compile errors if the struct buffer_page gets bigger
+ * than the struct page.
+ */
+extern int ring_buffer_page_too_big(void);
+
 /**
  * ring_buffer_alloc - allocate a new ring_buffer
  * @size: the size in bytes that is needed.
@@ -305,6 +311,11 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
 	int bsize;
 	int cpu;
 
+	/* Paranoid! Optimizes out when all is well */
+	if (sizeof(struct buffer_page) > sizeof(struct page))
+		ring_buffer_page_too_big();
+
+
 	/* keep it in its own cache line */
 	buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
 			 GFP_KERNEL);
-- 
GitLab


From ed56829cb3195de499f97fa6108fe9134319bae6 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Mon, 29 Sep 2008 23:02:40 -0400
Subject: [PATCH 311/895] ring_buffer: reset buffer page when freeing

Mathieu Desnoyers pointed out that the freeing of the page frame needs
to be reset otherwise we might trigger BUG_ON in the page free code.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ring_buffer.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 95ca9338cb6c..cfa711374d9a 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -127,6 +127,17 @@ struct buffer_page {
 	};
 };
 
+/*
+ * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
+ * this issue out.
+ */
+static inline void free_buffer_page(struct buffer_page *bpage)
+{
+	reset_page_mapcount(&bpage->page);
+	bpage->page.mapping = NULL;
+	__free_page(&bpage->page);
+}
+
 /*
  * We need to fit the time_stamp delta into 27 bits.
  */
@@ -240,7 +251,7 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
  free_pages:
 	list_for_each_entry_safe(page, tmp, &pages, list) {
 		list_del_init(&page->list);
-		__free_page(&page->page);
+		free_buffer_page(page);
 	}
 	return -ENOMEM;
 }
@@ -284,7 +295,7 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
 
 	list_for_each_entry_safe(page, tmp, head, list) {
 		list_del_init(&page->list);
-		__free_page(&page->page);
+		free_buffer_page(page);
 	}
 	kfree(cpu_buffer);
 }
@@ -393,7 +404,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
 		p = cpu_buffer->pages.next;
 		page = list_entry(p, struct buffer_page, list);
 		list_del_init(&page->list);
-		__free_page(&page->page);
+		free_buffer_page(page);
 	}
 	BUG_ON(list_empty(&cpu_buffer->pages));
 
@@ -520,7 +531,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
  free_pages:
 	list_for_each_entry_safe(page, tmp, &pages, list) {
 		list_del_init(&page->list);
-		__free_page(&page->page);
+		free_buffer_page(page);
 	}
 	return -ENOMEM;
 }
-- 
GitLab


From 3928a8a2d98081d1bc3c0a84a2d70e29b90ecf1c Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Mon, 29 Sep 2008 23:02:41 -0400
Subject: [PATCH 312/895] ftrace: make work with new ring buffer

This patch ports ftrace over to the new ring buffer.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c              | 932 ++++++++----------------------
 kernel/trace/trace.h              |  22 +-
 kernel/trace/trace_boot.c         |  16 +-
 kernel/trace/trace_functions.c    |   2 +-
 kernel/trace/trace_irqsoff.c      |   6 +-
 kernel/trace/trace_mmiotrace.c    |  40 +-
 kernel/trace/trace_nop.c          |   2 +-
 kernel/trace/trace_sched_switch.c |   2 +-
 kernel/trace/trace_sched_wakeup.c |   2 +-
 kernel/trace/trace_selftest.c     |  60 +-
 kernel/trace/trace_sysprof.c      |   2 +-
 11 files changed, 288 insertions(+), 798 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 6ada059832a6..ef80793858b8 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -33,25 +33,22 @@
 #include <linux/writeback.h>
 
 #include <linux/stacktrace.h>
+#include <linux/ring_buffer.h>
 
 #include "trace.h"
 
+#define TRACE_BUFFER_FLAGS	(RB_FL_OVERWRITE)
+
 unsigned long __read_mostly	tracing_max_latency = (cycle_t)ULONG_MAX;
 unsigned long __read_mostly	tracing_thresh;
 
-static unsigned long __read_mostly	tracing_nr_buffers;
 static cpumask_t __read_mostly		tracing_buffer_mask;
 
 #define for_each_tracing_cpu(cpu)	\
 	for_each_cpu_mask(cpu, tracing_buffer_mask)
 
-static int trace_alloc_page(void);
-static int trace_free_page(void);
-
 static int tracing_disabled = 1;
 
-static unsigned long tracing_pages_allocated;
-
 long
 ns2usecs(cycle_t nsec)
 {
@@ -62,7 +59,9 @@ ns2usecs(cycle_t nsec)
 
 cycle_t ftrace_now(int cpu)
 {
-	return cpu_clock(cpu);
+	u64 ts = ring_buffer_time_stamp(cpu);
+	ring_buffer_normalize_time_stamp(cpu, &ts);
+	return ts;
 }
 
 /*
@@ -102,18 +101,18 @@ static int			tracer_enabled = 1;
 int				ftrace_function_enabled;
 
 /*
- * trace_nr_entries is the number of entries that is allocated
- * for a buffer. Note, the number of entries is always rounded
- * to ENTRIES_PER_PAGE.
+ * trace_buf_size is the size in bytes that is allocated
+ * for a buffer. Note, the number of bytes is always rounded
+ * to page size.
  *
  * This number is purposely set to a low number of 16384.
  * If the dump on oops happens, it will be much appreciated
  * to not have to wait for all that output. Anyway this can be
  * boot time and run time configurable.
  */
-#define TRACE_ENTRIES_DEFAULT	16384UL
+#define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
 
-static unsigned long		trace_nr_entries = TRACE_ENTRIES_DEFAULT;
+static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
 
 /* trace_types holds a link list of available tracers. */
 static struct tracer		*trace_types __read_mostly;
@@ -158,23 +157,21 @@ void trace_wake_up(void)
 		wake_up(&trace_wait);
 }
 
-#define ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(struct trace_entry))
-
-static int __init set_nr_entries(char *str)
+static int __init set_buf_size(char *str)
 {
-	unsigned long nr_entries;
+	unsigned long buf_size;
 	int ret;
 
 	if (!str)
 		return 0;
-	ret = strict_strtoul(str, 0, &nr_entries);
+	ret = strict_strtoul(str, 0, &buf_size);
 	/* nr_entries can not be zero */
-	if (ret < 0 || nr_entries == 0)
+	if (ret < 0 || buf_size == 0)
 		return 0;
-	trace_nr_entries = nr_entries;
+	trace_buf_size = buf_size;
 	return 1;
 }
-__setup("trace_entries=", set_nr_entries);
+__setup("trace_buf_size=", set_buf_size);
 
 unsigned long nsecs_to_usecs(unsigned long nsecs)
 {
@@ -243,54 +240,6 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 	tracing_record_cmdline(current);
 }
 
-#define CHECK_COND(cond)			\
-	if (unlikely(cond)) {			\
-		tracing_disabled = 1;		\
-		WARN_ON(1);			\
-		return -1;			\
-	}
-
-/**
- * check_pages - integrity check of trace buffers
- *
- * As a safty measure we check to make sure the data pages have not
- * been corrupted.
- */
-int check_pages(struct trace_array_cpu *data)
-{
-	struct page *page, *tmp;
-
-	CHECK_COND(data->trace_pages.next->prev != &data->trace_pages);
-	CHECK_COND(data->trace_pages.prev->next != &data->trace_pages);
-
-	list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) {
-		CHECK_COND(page->lru.next->prev != &page->lru);
-		CHECK_COND(page->lru.prev->next != &page->lru);
-	}
-
-	return 0;
-}
-
-/**
- * head_page - page address of the first page in per_cpu buffer.
- *
- * head_page returns the page address of the first page in
- * a per_cpu buffer. This also preforms various consistency
- * checks to make sure the buffer has not been corrupted.
- */
-void *head_page(struct trace_array_cpu *data)
-{
-	struct page *page;
-
-	if (list_empty(&data->trace_pages))
-		return NULL;
-
-	page = list_entry(data->trace_pages.next, struct page, lru);
-	BUG_ON(&page->lru == &data->trace_pages);
-
-	return page_address(page);
-}
-
 /**
  * trace_seq_printf - sequence printing of trace information
  * @s: trace sequence descriptor
@@ -437,34 +386,6 @@ trace_print_seq(struct seq_file *m, struct trace_seq *s)
 	trace_seq_reset(s);
 }
 
-/*
- * flip the trace buffers between two trace descriptors.
- * This usually is the buffers between the global_trace and
- * the max_tr to record a snapshot of a current trace.
- *
- * The ftrace_max_lock must be held.
- */
-static void
-flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
-{
-	struct list_head flip_pages;
-
-	INIT_LIST_HEAD(&flip_pages);
-
-	memcpy(&tr1->trace_head_idx, &tr2->trace_head_idx,
-		sizeof(struct trace_array_cpu) -
-		offsetof(struct trace_array_cpu, trace_head_idx));
-
-	check_pages(tr1);
-	check_pages(tr2);
-	list_splice_init(&tr1->trace_pages, &flip_pages);
-	list_splice_init(&tr2->trace_pages, &tr1->trace_pages);
-	list_splice_init(&flip_pages, &tr2->trace_pages);
-	BUG_ON(!list_empty(&flip_pages));
-	check_pages(tr1);
-	check_pages(tr2);
-}
-
 /**
  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
  * @tr: tracer
@@ -477,17 +398,15 @@ flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
 void
 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 {
-	struct trace_array_cpu *data;
-	int i;
+	struct ring_buffer *buf = tr->buffer;
 
 	WARN_ON_ONCE(!irqs_disabled());
 	__raw_spin_lock(&ftrace_max_lock);
-	/* clear out all the previous traces */
-	for_each_tracing_cpu(i) {
-		data = tr->data[i];
-		flip_trace(max_tr.data[i], data);
-		tracing_reset(data);
-	}
+
+	tr->buffer = max_tr.buffer;
+	max_tr.buffer = buf;
+
+	ring_buffer_reset(tr->buffer);
 
 	__update_max_tr(tr, tsk, cpu);
 	__raw_spin_unlock(&ftrace_max_lock);
@@ -504,16 +423,15 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 void
 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
 {
-	struct trace_array_cpu *data = tr->data[cpu];
-	int i;
+	int ret;
 
 	WARN_ON_ONCE(!irqs_disabled());
 	__raw_spin_lock(&ftrace_max_lock);
-	for_each_tracing_cpu(i)
-		tracing_reset(max_tr.data[i]);
 
-	flip_trace(max_tr.data[cpu], data);
-	tracing_reset(data);
+	ring_buffer_reset(max_tr.buffer);
+	ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
+
+	WARN_ON_ONCE(ret);
 
 	__update_max_tr(tr, tsk, cpu);
 	__raw_spin_unlock(&ftrace_max_lock);
@@ -550,7 +468,6 @@ int register_tracer(struct tracer *type)
 #ifdef CONFIG_FTRACE_STARTUP_TEST
 	if (type->selftest) {
 		struct tracer *saved_tracer = current_trace;
-		struct trace_array_cpu *data;
 		struct trace_array *tr = &global_trace;
 		int saved_ctrl = tr->ctrl;
 		int i;
@@ -562,10 +479,7 @@ int register_tracer(struct tracer *type)
 		 * If we fail, we do not register this tracer.
 		 */
 		for_each_tracing_cpu(i) {
-			data = tr->data[i];
-			if (!head_page(data))
-				continue;
-			tracing_reset(data);
+			tracing_reset(tr, i);
 		}
 		current_trace = type;
 		tr->ctrl = 0;
@@ -581,10 +495,7 @@ int register_tracer(struct tracer *type)
 		}
 		/* Only reset on passing, to avoid touching corrupted buffers */
 		for_each_tracing_cpu(i) {
-			data = tr->data[i];
-			if (!head_page(data))
-				continue;
-			tracing_reset(data);
+			tracing_reset(tr, i);
 		}
 		printk(KERN_CONT "PASSED\n");
 	}
@@ -630,13 +541,9 @@ void unregister_tracer(struct tracer *type)
 	mutex_unlock(&trace_types_lock);
 }
 
-void tracing_reset(struct trace_array_cpu *data)
+void tracing_reset(struct trace_array *tr, int cpu)
 {
-	data->trace_idx = 0;
-	data->overrun = 0;
-	data->trace_head = data->trace_tail = head_page(data);
-	data->trace_head_idx = 0;
-	data->trace_tail_idx = 0;
+	ring_buffer_reset_cpu(tr->buffer, cpu);
 }
 
 #define SAVED_CMDLINES 128
@@ -722,70 +629,6 @@ void tracing_record_cmdline(struct task_struct *tsk)
 	trace_save_cmdline(tsk);
 }
 
-static inline struct list_head *
-trace_next_list(struct trace_array_cpu *data, struct list_head *next)
-{
-	/*
-	 * Roundrobin - but skip the head (which is not a real page):
-	 */
-	next = next->next;
-	if (unlikely(next == &data->trace_pages))
-		next = next->next;
-	BUG_ON(next == &data->trace_pages);
-
-	return next;
-}
-
-static inline void *
-trace_next_page(struct trace_array_cpu *data, void *addr)
-{
-	struct list_head *next;
-	struct page *page;
-
-	page = virt_to_page(addr);
-
-	next = trace_next_list(data, &page->lru);
-	page = list_entry(next, struct page, lru);
-
-	return page_address(page);
-}
-
-struct trace_entry *
-tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data)
-{
-	unsigned long idx, idx_next;
-	struct trace_entry *entry;
-
-	data->trace_idx++;
-	idx = data->trace_head_idx;
-	idx_next = idx + 1;
-
-	BUG_ON(idx * TRACE_ENTRY_SIZE >= PAGE_SIZE);
-
-	entry = data->trace_head + idx * TRACE_ENTRY_SIZE;
-
-	if (unlikely(idx_next >= ENTRIES_PER_PAGE)) {
-		data->trace_head = trace_next_page(data, data->trace_head);
-		idx_next = 0;
-	}
-
-	if (data->trace_head == data->trace_tail &&
-	    idx_next == data->trace_tail_idx) {
-		/* overrun */
-		data->overrun++;
-		data->trace_tail_idx++;
-		if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
-			data->trace_tail =
-				trace_next_page(data, data->trace_tail);
-			data->trace_tail_idx = 0;
-		}
-	}
-
-	data->trace_head_idx = idx_next;
-
-	return entry;
-}
-
 void
 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
 {
@@ -796,7 +639,6 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
 
 	entry->field.preempt_count	= pc & 0xff;
 	entry->field.pid		= (tsk) ? tsk->pid : 0;
-	entry->field.t			= ftrace_now(raw_smp_processor_id());
 	entry->field.flags =
 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
@@ -808,18 +650,20 @@ void
 trace_function(struct trace_array *tr, struct trace_array_cpu *data,
 	       unsigned long ip, unsigned long parent_ip, unsigned long flags)
 {
+	struct ring_buffer_event *event;
 	struct trace_entry *entry;
 	unsigned long irq_flags;
 
-	raw_local_irq_save(irq_flags);
-	__raw_spin_lock(&data->lock);
-	entry				= tracing_get_trace_entry(tr, data);
+	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					 &irq_flags);
+	if (!event)
+		return;
+	entry	= ring_buffer_event_data(event);
 	tracing_generic_entry_update(entry, flags);
 	entry->type			= TRACE_FN;
 	entry->field.fn.ip		= ip;
 	entry->field.fn.parent_ip	= parent_ip;
-	__raw_spin_unlock(&data->lock);
-	raw_local_irq_restore(irq_flags);
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 }
 
 void
@@ -835,13 +679,19 @@ void __trace_stack(struct trace_array *tr,
 		   unsigned long flags,
 		   int skip)
 {
+	struct ring_buffer_event *event;
 	struct trace_entry *entry;
 	struct stack_trace trace;
+	unsigned long irq_flags;
 
 	if (!(trace_flags & TRACE_ITER_STACKTRACE))
 		return;
 
-	entry			= tracing_get_trace_entry(tr, data);
+	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					 &irq_flags);
+	if (!event)
+		return;
+	entry	= ring_buffer_event_data(event);
 	tracing_generic_entry_update(entry, flags);
 	entry->type		= TRACE_STACK;
 
@@ -853,28 +703,31 @@ void __trace_stack(struct trace_array *tr,
 	trace.entries		= entry->field.stack.caller;
 
 	save_stack_trace(&trace);
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 }
 
 void
 __trace_special(void *__tr, void *__data,
 		unsigned long arg1, unsigned long arg2, unsigned long arg3)
 {
+	struct ring_buffer_event *event;
 	struct trace_array_cpu *data = __data;
 	struct trace_array *tr = __tr;
 	struct trace_entry *entry;
 	unsigned long irq_flags;
 
-	raw_local_irq_save(irq_flags);
-	__raw_spin_lock(&data->lock);
-	entry				= tracing_get_trace_entry(tr, data);
+	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					 &irq_flags);
+	if (!event)
+		return;
+	entry	= ring_buffer_event_data(event);
 	tracing_generic_entry_update(entry, 0);
 	entry->type			= TRACE_SPECIAL;
 	entry->field.special.arg1	= arg1;
 	entry->field.special.arg2	= arg2;
 	entry->field.special.arg3	= arg3;
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 	__trace_stack(tr, data, irq_flags, 4);
-	__raw_spin_unlock(&data->lock);
-	raw_local_irq_restore(irq_flags);
 
 	trace_wake_up();
 }
@@ -886,12 +739,15 @@ tracing_sched_switch_trace(struct trace_array *tr,
 			   struct task_struct *next,
 			   unsigned long flags)
 {
+	struct ring_buffer_event *event;
 	struct trace_entry *entry;
 	unsigned long irq_flags;
 
-	raw_local_irq_save(irq_flags);
-	__raw_spin_lock(&data->lock);
-	entry				= tracing_get_trace_entry(tr, data);
+	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					   &irq_flags);
+	if (!event)
+		return;
+	entry	= ring_buffer_event_data(event);
 	tracing_generic_entry_update(entry, flags);
 	entry->type			= TRACE_CTX;
 	entry->field.ctx.prev_pid	= prev->pid;
@@ -901,9 +757,8 @@ tracing_sched_switch_trace(struct trace_array *tr,
 	entry->field.ctx.next_prio	= next->prio;
 	entry->field.ctx.next_state	= next->state;
 	entry->field.ctx.next_cpu	= task_cpu(next);
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 	__trace_stack(tr, data, flags, 5);
-	__raw_spin_unlock(&data->lock);
-	raw_local_irq_restore(irq_flags);
 }
 
 void
@@ -913,12 +768,15 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
 			   struct task_struct *curr,
 			   unsigned long flags)
 {
+	struct ring_buffer_event *event;
 	struct trace_entry *entry;
 	unsigned long irq_flags;
 
-	raw_local_irq_save(irq_flags);
-	__raw_spin_lock(&data->lock);
-	entry			= tracing_get_trace_entry(tr, data);
+	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					   &irq_flags);
+	if (!event)
+		return;
+	entry	= ring_buffer_event_data(event);
 	tracing_generic_entry_update(entry, flags);
 	entry->type		= TRACE_WAKE;
 	entry->field.ctx.prev_pid	= curr->pid;
@@ -928,9 +786,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
 	entry->field.ctx.next_prio	= wakee->prio;
 	entry->field.ctx.next_state	= wakee->state;
 	entry->field.ctx.next_cpu	= task_cpu(wakee);
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 	__trace_stack(tr, data, flags, 6);
-	__raw_spin_unlock(&data->lock);
-	raw_local_irq_restore(irq_flags);
 
 	trace_wake_up();
 }
@@ -1011,183 +868,77 @@ enum trace_file_type {
 	TRACE_FILE_LAT_FMT	= 1,
 };
 
-/* Return the current entry.  */
-static struct trace_entry *
-trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data,
-		struct trace_iterator *iter, int cpu)
-{
-	struct page *page;
-	struct trace_entry *array;
-
-	if (iter->next_idx[cpu] >= tr->entries ||
-	    iter->next_idx[cpu] >= data->trace_idx ||
-	    (data->trace_head == data->trace_tail &&
-	     data->trace_head_idx == data->trace_tail_idx))
-		return NULL;
-
-	if (!iter->next_page[cpu]) {
-		/* Initialize the iterator for this cpu trace buffer */
-		WARN_ON(!data->trace_tail);
-		page = virt_to_page(data->trace_tail);
-		iter->next_page[cpu] = &page->lru;
-		iter->next_page_idx[cpu] = data->trace_tail_idx;
-	}
-
-	page = list_entry(iter->next_page[cpu], struct page, lru);
-	BUG_ON(&data->trace_pages == &page->lru);
-
-	array = page_address(page);
-
-	WARN_ON(iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE);
-	return &array[iter->next_page_idx[cpu]];
-}
-
-/* Increment the index counter of an iterator by one */
-static void __trace_iterator_increment(struct trace_iterator *iter, int cpu)
-{
-	iter->next_idx[cpu]++;
-	iter->next_page_idx[cpu]++;
-
-	if (iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE) {
-		struct trace_array_cpu *data = iter->tr->data[cpu];
-
-		iter->next_page_idx[cpu] = 0;
-		iter->next_page[cpu] =
-			trace_next_list(data, iter->next_page[cpu]);
-	}
-}
-
 static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
 {
 	iter->idx++;
-	__trace_iterator_increment(iter, cpu);
+	ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
 }
 
 static struct trace_entry *
-trace_entry_next(struct trace_array *tr, struct trace_array_cpu *data,
-		 struct trace_iterator *iter, int cpu)
+peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
 {
-	struct list_head *next_page;
-	struct trace_entry *ent;
-	int idx, next_idx, next_page_idx;
-
-	ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
-
-	if (likely(!ent || ent->type != TRACE_CONT))
-		return ent;
-
-	/* save the iterator details */
-	idx		= iter->idx;
-	next_idx	= iter->next_idx[cpu];
-	next_page_idx	= iter->next_page_idx[cpu];
-	next_page	= iter->next_page[cpu];
-
-	/* find a real entry */
-	do {
-		__trace_iterator_increment(iter, cpu);
-		ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
-	} while (ent && ent->type != TRACE_CONT);
-
-	/* reset the iterator */
-	iter->idx			= idx;
-	iter->next_idx[cpu]		= next_idx;
-	iter->next_page_idx[cpu]	= next_page_idx;
-	iter->next_page[cpu]		= next_page;
+	struct ring_buffer_event *event;
+	struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
 
-	return ent;
+	event = ring_buffer_iter_peek(buf_iter, ts);
+	return event ? ring_buffer_event_data(event) : NULL;
 }
-
 static struct trace_entry *
-__find_next_entry(struct trace_iterator *iter, int *ent_cpu, int inc)
+__find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
 {
-	struct trace_array *tr = iter->tr;
+	struct ring_buffer *buffer = iter->tr->buffer;
 	struct trace_entry *ent, *next = NULL;
+	u64 next_ts = 0, ts;
 	int next_cpu = -1;
 	int cpu;
 
 	for_each_tracing_cpu(cpu) {
-		if (!head_page(tr->data[cpu]))
-			continue;
 
-		ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
+		if (ring_buffer_empty_cpu(buffer, cpu))
+			continue;
 
-		if (ent && ent->type == TRACE_CONT) {
-			struct trace_array_cpu *data = tr->data[cpu];
-
-			if (!inc)
-				ent = trace_entry_next(tr, data, iter, cpu);
-			else {
-				while (ent && ent->type == TRACE_CONT) {
-					__trace_iterator_increment(iter, cpu);
-					ent = trace_entry_idx(tr, tr->data[cpu],
-							      iter, cpu);
-				}
-			}
-		}
+		ent = peek_next_entry(iter, cpu, &ts);
 
 		/*
 		 * Pick the entry with the smallest timestamp:
 		 */
-		if (ent && (!next || ent->field.t < next->field.t)) {
+		if (ent && (!next || ts < next_ts)) {
 			next = ent;
 			next_cpu = cpu;
+			next_ts = ts;
 		}
 	}
 
 	if (ent_cpu)
 		*ent_cpu = next_cpu;
 
+	if (ent_ts)
+		*ent_ts = next_ts;
+
 	return next;
 }
 
 /* Find the next real entry, without updating the iterator itself */
 static struct trace_entry *
-find_next_entry(struct trace_iterator *iter, int *ent_cpu)
+find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
 {
-	return __find_next_entry(iter, ent_cpu, 0);
+	return __find_next_entry(iter, ent_cpu, ent_ts);
 }
 
 /* Find the next real entry, and increment the iterator to the next entry */
 static void *find_next_entry_inc(struct trace_iterator *iter)
 {
-	struct trace_entry *next;
-	int next_cpu = -1;
-
-	next = __find_next_entry(iter, &next_cpu, 1);
-
-	iter->prev_ent = iter->ent;
-	iter->prev_cpu = iter->cpu;
+	iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts);
 
-	iter->ent = next;
-	iter->cpu = next_cpu;
-
-	if (next)
+	if (iter->ent)
 		trace_iterator_increment(iter, iter->cpu);
 
-	return next ? iter : NULL;
+	return iter->ent ? iter : NULL;
 }
 
 static void trace_consume(struct trace_iterator *iter)
 {
-	struct trace_array_cpu *data = iter->tr->data[iter->cpu];
-	struct trace_entry *ent;
-
- again:
-	data->trace_tail_idx++;
-	if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
-		data->trace_tail = trace_next_page(data, data->trace_tail);
-		data->trace_tail_idx = 0;
-	}
-
-	/* Check if we empty it, then reset the index */
-	if (data->trace_head == data->trace_tail &&
-	    data->trace_head_idx == data->trace_tail_idx)
-		data->trace_idx = 0;
-
-	ent = trace_entry_idx(iter->tr, iter->tr->data[iter->cpu],
-			      iter, iter->cpu);
-	if (ent && ent->type == TRACE_CONT)
-		goto again;
+	ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts);
 }
 
 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
@@ -1220,7 +971,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
 	struct trace_iterator *iter = m->private;
 	void *p = NULL;
 	loff_t l = 0;
-	int i;
+	int cpu;
 
 	mutex_lock(&trace_types_lock);
 
@@ -1239,12 +990,9 @@ static void *s_start(struct seq_file *m, loff_t *pos)
 		iter->ent = NULL;
 		iter->cpu = 0;
 		iter->idx = -1;
-		iter->prev_ent = NULL;
-		iter->prev_cpu = -1;
 
-		for_each_tracing_cpu(i) {
-			iter->next_idx[i] = 0;
-			iter->next_page[i] = NULL;
+		for_each_tracing_cpu(cpu) {
+			ring_buffer_iter_reset(iter->buffer_iter[cpu]);
 		}
 
 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
@@ -1365,23 +1113,16 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
 	struct trace_array *tr = iter->tr;
 	struct trace_array_cpu *data = tr->data[tr->cpu];
 	struct tracer *type = current_trace;
-	unsigned long total   = 0;
-	unsigned long entries = 0;
-	int cpu;
+	unsigned long total;
+	unsigned long entries;
 	const char *name = "preemption";
 
 	if (type)
 		name = type->name;
 
-	for_each_tracing_cpu(cpu) {
-		if (head_page(tr->data[cpu])) {
-			total += tr->data[cpu]->trace_idx;
-			if (tr->data[cpu]->trace_idx > tr->entries)
-				entries += tr->entries;
-			else
-				entries += tr->data[cpu]->trace_idx;
-		}
-	}
+	entries = ring_buffer_entries(iter->tr->buffer);
+	total = entries +
+		ring_buffer_overruns(iter->tr->buffer);
 
 	seq_printf(m, "%s latency trace v1.1.5 on %s\n",
 		   name, UTS_RELEASE);
@@ -1468,7 +1209,7 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
 unsigned long preempt_mark_thresh = 100;
 
 static void
-lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs,
+lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
 		    unsigned long rel_usecs)
 {
 	trace_seq_printf(s, " %4lldus", abs_usecs);
@@ -1488,12 +1229,10 @@ static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
  */
 void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
 {
-	struct trace_array *tr = iter->tr;
-	struct trace_array_cpu *data = tr->data[iter->cpu];
 	struct trace_entry *ent;
 	bool ok = true;
 
-	ent = trace_entry_idx(tr, data, iter, iter->cpu);
+	ent = peek_next_entry(iter, iter->cpu, NULL);
 	if (!ent || ent->type != TRACE_CONT) {
 		trace_seq_putc(s, '\n');
 		return;
@@ -1502,8 +1241,8 @@ void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
 	do {
 		if (ok)
 			ok = (trace_seq_printf(s, "%s", ent->cont.buf) > 0);
-		__trace_iterator_increment(iter, iter->cpu);
-		ent = trace_entry_idx(tr, data, iter, iter->cpu);
+		ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
+		ent = peek_next_entry(iter, iter->cpu, NULL);
 	} while (ent && ent->type == TRACE_CONT);
 
 	if (!ok)
@@ -1515,25 +1254,26 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 {
 	struct trace_seq *s = &iter->seq;
 	unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
-	struct trace_entry *next_entry = find_next_entry(iter, NULL);
+	struct trace_entry *next_entry;
 	unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
 	struct trace_entry *entry = iter->ent;
 	struct trace_field *field = &entry->field;
 	unsigned long abs_usecs;
 	unsigned long rel_usecs;
+	u64 next_ts;
 	char *comm;
 	int S, T;
 	int i;
 	unsigned state;
 
-	if (!next_entry)
-		next_entry = entry;
-
 	if (entry->type == TRACE_CONT)
 		return 1;
 
-	rel_usecs = ns2usecs(next_entry->field.t - entry->field.t);
-	abs_usecs = ns2usecs(entry->field.t - iter->tr->time_start);
+	next_entry = find_next_entry(iter, NULL, &next_ts);
+	if (!next_entry)
+		next_ts = iter->ts;
+	rel_usecs = ns2usecs(next_ts - iter->ts);
+	abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
 
 	if (verbose) {
 		comm = trace_find_cmdline(field->pid);
@@ -1542,7 +1282,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 				 comm,
 				 field->pid, cpu, field->flags,
 				 field->preempt_count, trace_idx,
-				 ns2usecs(field->t),
+				 ns2usecs(iter->ts),
 				 abs_usecs/1000,
 				 abs_usecs % 1000, rel_usecs/1000,
 				 rel_usecs % 1000);
@@ -1627,7 +1367,7 @@ static int print_trace_fmt(struct trace_iterator *iter)
 
 	comm = trace_find_cmdline(iter->ent->field.pid);
 
-	t = ns2usecs(field->t);
+	t = ns2usecs(iter->ts);
 	usec_rem = do_div(t, 1000000ULL);
 	secs = (unsigned long)t;
 
@@ -1732,7 +1472,7 @@ static int print_raw_fmt(struct trace_iterator *iter)
 	field = &entry->field;
 
 	ret = trace_seq_printf(s, "%d %d %llu ",
-		field->pid, iter->cpu, field->t);
+		field->pid, iter->cpu, iter->ts);
 	if (!ret)
 		return 0;
 
@@ -1811,7 +1551,7 @@ static int print_hex_fmt(struct trace_iterator *iter)
 
 	SEQ_PUT_HEX_FIELD_RET(s, field->pid);
 	SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
-	SEQ_PUT_HEX_FIELD_RET(s, field->t);
+	SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
 
 	switch (entry->type) {
 	case TRACE_FN:
@@ -1861,7 +1601,7 @@ static int print_bin_fmt(struct trace_iterator *iter)
 
 	SEQ_PUT_FIELD_RET(s, field->pid);
 	SEQ_PUT_FIELD_RET(s, field->cpu);
-	SEQ_PUT_FIELD_RET(s, field->t);
+	SEQ_PUT_FIELD_RET(s, iter->ts);
 
 	switch (entry->type) {
 	case TRACE_FN:
@@ -1888,15 +1628,10 @@ static int print_bin_fmt(struct trace_iterator *iter)
 
 static int trace_empty(struct trace_iterator *iter)
 {
-	struct trace_array_cpu *data;
 	int cpu;
 
 	for_each_tracing_cpu(cpu) {
-		data = iter->tr->data[cpu];
-
-		if (head_page(data) && data->trace_idx &&
-		    (data->trace_tail != data->trace_head ||
-		     data->trace_tail_idx != data->trace_head_idx))
+		if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
 			return 0;
 	}
 	return 1;
@@ -1961,6 +1696,8 @@ static struct trace_iterator *
 __tracing_open(struct inode *inode, struct file *file, int *ret)
 {
 	struct trace_iterator *iter;
+	struct seq_file *m;
+	int cpu;
 
 	if (tracing_disabled) {
 		*ret = -ENODEV;
@@ -1981,28 +1718,43 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
 	iter->trace = current_trace;
 	iter->pos = -1;
 
+	for_each_tracing_cpu(cpu) {
+		iter->buffer_iter[cpu] =
+			ring_buffer_read_start(iter->tr->buffer, cpu);
+		if (!iter->buffer_iter[cpu])
+			goto fail_buffer;
+	}
+
 	/* TODO stop tracer */
 	*ret = seq_open(file, &tracer_seq_ops);
-	if (!*ret) {
-		struct seq_file *m = file->private_data;
-		m->private = iter;
+	if (*ret)
+		goto fail_buffer;
 
-		/* stop the trace while dumping */
-		if (iter->tr->ctrl) {
-			tracer_enabled = 0;
-			ftrace_function_enabled = 0;
-		}
+	m = file->private_data;
+	m->private = iter;
 
-		if (iter->trace && iter->trace->open)
-			iter->trace->open(iter);
-	} else {
-		kfree(iter);
-		iter = NULL;
+	/* stop the trace while dumping */
+	if (iter->tr->ctrl) {
+		tracer_enabled = 0;
+		ftrace_function_enabled = 0;
 	}
+
+	if (iter->trace && iter->trace->open)
+			iter->trace->open(iter);
+
 	mutex_unlock(&trace_types_lock);
 
  out:
 	return iter;
+
+ fail_buffer:
+	for_each_tracing_cpu(cpu) {
+		if (iter->buffer_iter[cpu])
+			ring_buffer_read_finish(iter->buffer_iter[cpu]);
+	}
+	mutex_unlock(&trace_types_lock);
+
+	return ERR_PTR(-ENOMEM);
 }
 
 int tracing_open_generic(struct inode *inode, struct file *filp)
@@ -2018,8 +1770,14 @@ int tracing_release(struct inode *inode, struct file *file)
 {
 	struct seq_file *m = (struct seq_file *)file->private_data;
 	struct trace_iterator *iter = m->private;
+	int cpu;
 
 	mutex_lock(&trace_types_lock);
+	for_each_tracing_cpu(cpu) {
+		if (iter->buffer_iter[cpu])
+			ring_buffer_read_finish(iter->buffer_iter[cpu]);
+	}
+
 	if (iter->trace && iter->trace->close)
 		iter->trace->close(iter);
 
@@ -2526,6 +2284,7 @@ static atomic_t tracing_reader;
 static int tracing_open_pipe(struct inode *inode, struct file *filp)
 {
 	struct trace_iterator *iter;
+	int cpu;
 
 	if (tracing_disabled)
 		return -ENODEV;
@@ -2546,17 +2305,38 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
 	iter->trace = current_trace;
 	filp->private_data = iter;
 
+	for_each_tracing_cpu(cpu) {
+		iter->buffer_iter[cpu] =
+			ring_buffer_read_start(iter->tr->buffer, cpu);
+		if (!iter->buffer_iter[cpu])
+			goto fail_buffer;
+	}
+
 	if (iter->trace->pipe_open)
 		iter->trace->pipe_open(iter);
 	mutex_unlock(&trace_types_lock);
 
 	return 0;
+
+ fail_buffer:
+	for_each_tracing_cpu(cpu) {
+		if (iter->buffer_iter[cpu])
+			ring_buffer_read_finish(iter->buffer_iter[cpu]);
+	}
+	mutex_unlock(&trace_types_lock);
+
+	return -ENOMEM;
 }
 
 static int tracing_release_pipe(struct inode *inode, struct file *file)
 {
 	struct trace_iterator *iter = file->private_data;
+	int cpu;
 
+	for_each_tracing_cpu(cpu) {
+		if (iter->buffer_iter[cpu])
+			ring_buffer_read_finish(iter->buffer_iter[cpu]);
+	}
 	kfree(iter);
 	atomic_dec(&tracing_reader);
 
@@ -2592,13 +2372,10 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
 		  size_t cnt, loff_t *ppos)
 {
 	struct trace_iterator *iter = filp->private_data;
-	struct trace_array_cpu *data;
-	static cpumask_t mask;
 	unsigned long flags;
 #ifdef CONFIG_FTRACE
 	int ftrace_save;
 #endif
-	int cpu;
 	ssize_t sret;
 
 	/* return any leftover data */
@@ -2687,32 +2464,13 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
 	 * and then release the locks again.
 	 */
 
-	cpus_clear(mask);
-	local_irq_save(flags);
+	local_irq_disable();
 #ifdef CONFIG_FTRACE
 	ftrace_save = ftrace_enabled;
 	ftrace_enabled = 0;
 #endif
 	smp_wmb();
-	for_each_tracing_cpu(cpu) {
-		data = iter->tr->data[cpu];
-
-		if (!head_page(data) || !data->trace_idx)
-			continue;
-
-		atomic_inc(&data->disabled);
-		cpu_set(cpu, mask);
-	}
-
-	for_each_cpu_mask(cpu, mask) {
-		data = iter->tr->data[cpu];
-		__raw_spin_lock(&data->lock);
-
-		if (data->overrun > iter->last_overrun[cpu])
-			iter->overrun[cpu] +=
-				data->overrun - iter->last_overrun[cpu];
-		iter->last_overrun[cpu] = data->overrun;
-	}
+	ring_buffer_lock(iter->tr->buffer, &flags);
 
 	while (find_next_entry_inc(iter) != NULL) {
 		int ret;
@@ -2731,19 +2489,11 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
 			break;
 	}
 
-	for_each_cpu_mask(cpu, mask) {
-		data = iter->tr->data[cpu];
-		__raw_spin_unlock(&data->lock);
-	}
-
-	for_each_cpu_mask(cpu, mask) {
-		data = iter->tr->data[cpu];
-		atomic_dec(&data->disabled);
-	}
+	ring_buffer_unlock(iter->tr->buffer, flags);
 #ifdef CONFIG_FTRACE
 	ftrace_enabled = ftrace_save;
 #endif
-	local_irq_restore(flags);
+	local_irq_enable();
 
 	/* Now copy what we have to the user */
 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
@@ -2776,7 +2526,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
 {
 	unsigned long val;
 	char buf[64];
-	int i, ret;
+	int ret;
 	struct trace_array *tr = filp->private_data;
 
 	if (cnt >= sizeof(buf))
@@ -2804,52 +2554,31 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
 		goto out;
 	}
 
-	if (val > global_trace.entries) {
-		long pages_requested;
-		unsigned long freeable_pages;
-
-		/* make sure we have enough memory before mapping */
-		pages_requested =
-			(val + (ENTRIES_PER_PAGE-1)) / ENTRIES_PER_PAGE;
-
-		/* account for each buffer (and max_tr) */
-		pages_requested *= tracing_nr_buffers * 2;
-
-		/* Check for overflow */
-		if (pages_requested < 0) {
-			cnt = -ENOMEM;
-			goto out;
-		}
-
-		freeable_pages = determine_dirtyable_memory();
-
-		/* we only allow to request 1/4 of useable memory */
-		if (pages_requested >
-		    ((freeable_pages + tracing_pages_allocated) / 4)) {
-			cnt = -ENOMEM;
+	if (val != global_trace.entries) {
+		ret = ring_buffer_resize(global_trace.buffer, val);
+		if (ret < 0) {
+			cnt = ret;
 			goto out;
 		}
 
-		while (global_trace.entries < val) {
-			if (trace_alloc_page()) {
-				cnt = -ENOMEM;
-				goto out;
+		ret = ring_buffer_resize(max_tr.buffer, val);
+		if (ret < 0) {
+			int r;
+			cnt = ret;
+			r = ring_buffer_resize(global_trace.buffer,
+					       global_trace.entries);
+			if (r < 0) {
+				/* AARGH! We are left with different
+				 * size max buffer!!!! */
+				WARN_ON(1);
+				tracing_disabled = 1;
 			}
-			/* double check that we don't go over the known pages */
-			if (tracing_pages_allocated > pages_requested)
-				break;
+			goto out;
 		}
 
-	} else {
-		/* include the number of entries in val (inc of page entries) */
-		while (global_trace.entries > val + (ENTRIES_PER_PAGE - 1))
-			trace_free_page();
+		global_trace.entries = val;
 	}
 
-	/* check integrity */
-	for_each_tracing_cpu(i)
-		check_pages(global_trace.data[i]);
-
 	filp->f_pos += cnt;
 
 	/* If check pages failed, return ENOMEM */
@@ -3086,10 +2815,11 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 	static DEFINE_SPINLOCK(trace_buf_lock);
 	static char trace_buf[TRACE_BUF_SIZE];
 
+	struct ring_buffer_event *event;
 	struct trace_array *tr = &global_trace;
 	struct trace_array_cpu *data;
 	struct trace_entry *entry;
-	unsigned long flags;
+	unsigned long flags, irq_flags;
 	long disabled;
 	int cpu, len = 0, write, written = 0;
 
@@ -3110,8 +2840,11 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 	len = min(len, TRACE_BUF_SIZE-1);
 	trace_buf[len] = 0;
 
-	__raw_spin_lock(&data->lock);
-	entry				= tracing_get_trace_entry(tr, data);
+	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					 &irq_flags);
+	if (!event)
+		goto out_unlock;
+	entry	= ring_buffer_event_data(event);
 	tracing_generic_entry_update(entry, flags);
 	entry->type			= TRACE_PRINT;
 	entry->field.print.ip		= ip;
@@ -3121,21 +2854,27 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 	memcpy(&entry->field.print.buf, trace_buf, write);
 	entry->field.print.buf[write] = 0;
 	written = write;
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 
 	if (written != len)
 		entry->field.flags |= TRACE_FLAG_CONT;
 
 	while (written != len) {
-		entry = tracing_get_trace_entry(tr, data);
+		event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+						 &irq_flags);
+		if (!event)
+			goto out_unlock;
+		entry	= ring_buffer_event_data(event);
 
 		entry->type = TRACE_CONT;
 		write = min(len - written, (int)(TRACE_CONT_BUF_SIZE-1));
 		memcpy(&entry->cont.buf, trace_buf+written, write);
 		entry->cont.buf[write] = 0;
 		written += write;
+		ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 	}
-	__raw_spin_unlock(&data->lock);
 
+ out_unlock:
 	spin_unlock(&trace_buf_lock);
 
  out:
@@ -3227,12 +2966,10 @@ void ftrace_dump(void)
 	static DEFINE_SPINLOCK(ftrace_dump_lock);
 	/* use static because iter can be a bit big for the stack */
 	static struct trace_iterator iter;
-	struct trace_array_cpu *data;
 	static cpumask_t mask;
 	static int dump_ran;
-	unsigned long flags;
+	unsigned long flags, irq_flags;
 	int cnt = 0;
-	int cpu;
 
 	/* only one dump */
 	spin_lock_irqsave(&ftrace_dump_lock, flags);
@@ -3258,25 +2995,7 @@ void ftrace_dump(void)
 
 	cpus_clear(mask);
 
-	for_each_tracing_cpu(cpu) {
-		data = iter.tr->data[cpu];
-
-		if (!head_page(data) || !data->trace_idx)
-			continue;
-
-		atomic_inc(&data->disabled);
-		cpu_set(cpu, mask);
-	}
-
-	for_each_cpu_mask(cpu, mask) {
-		data = iter.tr->data[cpu];
-		__raw_spin_lock(&data->lock);
-
-		if (data->overrun > iter.last_overrun[cpu])
-			iter.overrun[cpu] +=
-				data->overrun - iter.last_overrun[cpu];
-		iter.last_overrun[cpu] = data->overrun;
-	}
+	ring_buffer_lock(iter.tr->buffer, &irq_flags);
 
 	while (!trace_empty(&iter)) {
 
@@ -3305,205 +3024,47 @@ void ftrace_dump(void)
 	else
 		printk(KERN_TRACE "---------------------------------\n");
 
-	for_each_cpu_mask(cpu, mask) {
-		data = iter.tr->data[cpu];
-		__raw_spin_unlock(&data->lock);
-	}
-
-	for_each_cpu_mask(cpu, mask) {
-		data = iter.tr->data[cpu];
-		atomic_dec(&data->disabled);
-	}
-
+	ring_buffer_unlock(iter.tr->buffer, irq_flags);
 
  out:
 	spin_unlock_irqrestore(&ftrace_dump_lock, flags);
 }
 
-static int trace_alloc_page(void)
+__init static int tracer_alloc_buffers(void)
 {
 	struct trace_array_cpu *data;
-	struct page *page, *tmp;
-	LIST_HEAD(pages);
-	void *array;
-	unsigned pages_allocated = 0;
 	int i;
 
-	/* first allocate a page for each CPU */
-	for_each_tracing_cpu(i) {
-		array = (void *)__get_free_page(GFP_KERNEL);
-		if (array == NULL) {
-			printk(KERN_ERR "tracer: failed to allocate page"
-			       "for trace buffer!\n");
-			goto free_pages;
-		}
-
-		pages_allocated++;
-		page = virt_to_page(array);
-		list_add(&page->lru, &pages);
+	/* TODO: make the number of buffers hot pluggable with CPUS */
+	tracing_buffer_mask = cpu_possible_map;
 
-/* Only allocate if we are actually using the max trace */
-#ifdef CONFIG_TRACER_MAX_TRACE
-		array = (void *)__get_free_page(GFP_KERNEL);
-		if (array == NULL) {
-			printk(KERN_ERR "tracer: failed to allocate page"
-			       "for trace buffer!\n");
-			goto free_pages;
-		}
-		pages_allocated++;
-		page = virt_to_page(array);
-		list_add(&page->lru, &pages);
-#endif
+	global_trace.buffer = ring_buffer_alloc(trace_buf_size,
+						   TRACE_BUFFER_FLAGS);
+	if (!global_trace.buffer) {
+		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
+		WARN_ON(1);
+		return 0;
 	}
-
-	/* Now that we successfully allocate a page per CPU, add them */
-	for_each_tracing_cpu(i) {
-		data = global_trace.data[i];
-		page = list_entry(pages.next, struct page, lru);
-		list_del_init(&page->lru);
-		list_add_tail(&page->lru, &data->trace_pages);
-		ClearPageLRU(page);
+	global_trace.entries = ring_buffer_size(global_trace.buffer);
 
 #ifdef CONFIG_TRACER_MAX_TRACE
-		data = max_tr.data[i];
-		page = list_entry(pages.next, struct page, lru);
-		list_del_init(&page->lru);
-		list_add_tail(&page->lru, &data->trace_pages);
-		SetPageLRU(page);
-#endif
-	}
-	tracing_pages_allocated += pages_allocated;
-	global_trace.entries += ENTRIES_PER_PAGE;
-
-	return 0;
-
- free_pages:
-	list_for_each_entry_safe(page, tmp, &pages, lru) {
-		list_del_init(&page->lru);
-		__free_page(page);
+	max_tr.buffer = ring_buffer_alloc(trace_buf_size,
+					     TRACE_BUFFER_FLAGS);
+	if (!max_tr.buffer) {
+		printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
+		WARN_ON(1);
+		ring_buffer_free(global_trace.buffer);
+		return 0;
 	}
-	return -ENOMEM;
-}
-
-static int trace_free_page(void)
-{
-	struct trace_array_cpu *data;
-	struct page *page;
-	struct list_head *p;
-	int i;
-	int ret = 0;
-
-	/* free one page from each buffer */
-	for_each_tracing_cpu(i) {
-		data = global_trace.data[i];
-		p = data->trace_pages.next;
-		if (p == &data->trace_pages) {
-			/* should never happen */
-			WARN_ON(1);
-			tracing_disabled = 1;
-			ret = -1;
-			break;
-		}
-		page = list_entry(p, struct page, lru);
-		ClearPageLRU(page);
-		list_del(&page->lru);
-		tracing_pages_allocated--;
-		tracing_pages_allocated--;
-		__free_page(page);
-
-		tracing_reset(data);
-
-#ifdef CONFIG_TRACER_MAX_TRACE
-		data = max_tr.data[i];
-		p = data->trace_pages.next;
-		if (p == &data->trace_pages) {
-			/* should never happen */
-			WARN_ON(1);
-			tracing_disabled = 1;
-			ret = -1;
-			break;
-		}
-		page = list_entry(p, struct page, lru);
-		ClearPageLRU(page);
-		list_del(&page->lru);
-		__free_page(page);
-
-		tracing_reset(data);
+	max_tr.entries = ring_buffer_size(max_tr.buffer);
+	WARN_ON(max_tr.entries != global_trace.entries);
 #endif
-	}
-	global_trace.entries -= ENTRIES_PER_PAGE;
-
-	return ret;
-}
-
-__init static int tracer_alloc_buffers(void)
-{
-	struct trace_array_cpu *data;
-	void *array;
-	struct page *page;
-	int pages = 0;
-	int ret = -ENOMEM;
-	int i;
-
-	/* TODO: make the number of buffers hot pluggable with CPUS */
-	tracing_nr_buffers = num_possible_cpus();
-	tracing_buffer_mask = cpu_possible_map;
 
 	/* Allocate the first page for all buffers */
 	for_each_tracing_cpu(i) {
 		data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
 		max_tr.data[i] = &per_cpu(max_data, i);
-
-		array = (void *)__get_free_page(GFP_KERNEL);
-		if (array == NULL) {
-			printk(KERN_ERR "tracer: failed to allocate page"
-			       "for trace buffer!\n");
-			goto free_buffers;
-		}
-
-		/* set the array to the list */
-		INIT_LIST_HEAD(&data->trace_pages);
-		page = virt_to_page(array);
-		list_add(&page->lru, &data->trace_pages);
-		/* use the LRU flag to differentiate the two buffers */
-		ClearPageLRU(page);
-
-		data->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
-		max_tr.data[i]->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
-
-/* Only allocate if we are actually using the max trace */
-#ifdef CONFIG_TRACER_MAX_TRACE
-		array = (void *)__get_free_page(GFP_KERNEL);
-		if (array == NULL) {
-			printk(KERN_ERR "tracer: failed to allocate page"
-			       "for trace buffer!\n");
-			goto free_buffers;
-		}
-
-		INIT_LIST_HEAD(&max_tr.data[i]->trace_pages);
-		page = virt_to_page(array);
-		list_add(&page->lru, &max_tr.data[i]->trace_pages);
-		SetPageLRU(page);
-#endif
-	}
-
-	/*
-	 * Since we allocate by orders of pages, we may be able to
-	 * round up a bit.
-	 */
-	global_trace.entries = ENTRIES_PER_PAGE;
-	pages++;
-
-	while (global_trace.entries < trace_nr_entries) {
-		if (trace_alloc_page())
-			break;
-		pages++;
 	}
-	max_tr.entries = global_trace.entries;
-
-	pr_info("tracer: %d pages allocated for %ld entries of %ld bytes\n",
-		pages, trace_nr_entries, (long)TRACE_ENTRY_SIZE);
-	pr_info("   actual entries %ld\n", global_trace.entries);
 
 	trace_init_cmdlines();
 
@@ -3519,38 +3080,13 @@ __init static int tracer_alloc_buffers(void)
 	/* All seems OK, enable tracing */
 	global_trace.ctrl = tracer_enabled;
 	tracing_disabled = 0;
+
 	atomic_notifier_chain_register(&panic_notifier_list,
 				       &trace_panic_notifier);
 
 	register_die_notifier(&trace_die_notifier);
 
 	return 0;
-
- free_buffers:
-	for (i-- ; i >= 0; i--) {
-		struct page *page, *tmp;
-		struct trace_array_cpu *data = global_trace.data[i];
-
-		if (data) {
-			list_for_each_entry_safe(page, tmp,
-						 &data->trace_pages, lru) {
-				list_del_init(&page->lru);
-				__free_page(page);
-			}
-		}
-
-#ifdef CONFIG_TRACER_MAX_TRACE
-		data = max_tr.data[i];
-		if (data) {
-			list_for_each_entry_safe(page, tmp,
-						 &data->trace_pages, lru) {
-				list_del_init(&page->lru);
-				__free_page(page);
-			}
-		}
-#endif
-	}
-	return ret;
 }
 early_initcall(tracer_alloc_buffers);
 fs_initcall(tracer_init_debugfs);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index b28bf8812efc..f6965f775b43 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -5,6 +5,7 @@
 #include <asm/atomic.h>
 #include <linux/sched.h>
 #include <linux/clocksource.h>
+#include <linux/ring_buffer.h>
 #include <linux/mmiotrace.h>
 #include <linux/ftrace.h>
 
@@ -102,7 +103,6 @@ struct trace_field {
 	char			flags;
 	char			preempt_count;
 	int			pid;
-	cycle_t			t;
 	union {
 		struct ftrace_entry		fn;
 		struct ctx_switch_entry		ctx;
@@ -139,16 +139,9 @@ struct trace_entry {
  * the trace, etc.)
  */
 struct trace_array_cpu {
-	struct list_head	trace_pages;
 	atomic_t		disabled;
-	raw_spinlock_t		lock;
-	struct lock_class_key	lock_key;
 
 	/* these fields get copied into max-trace: */
-	unsigned		trace_head_idx;
-	unsigned		trace_tail_idx;
-	void			*trace_head; /* producer */
-	void			*trace_tail; /* consumer */
 	unsigned long		trace_idx;
 	unsigned long		overrun;
 	unsigned long		saved_latency;
@@ -172,6 +165,7 @@ struct trace_iterator;
  * They have on/off state as well:
  */
 struct trace_array {
+	struct ring_buffer	*buffer;
 	unsigned long		entries;
 	long			ctrl;
 	int			cpu;
@@ -219,27 +213,21 @@ struct trace_iterator {
 	struct trace_array	*tr;
 	struct tracer		*trace;
 	void			*private;
-	long			last_overrun[NR_CPUS];
-	long			overrun[NR_CPUS];
+	struct ring_buffer_iter	*buffer_iter[NR_CPUS];
 
 	/* The below is zeroed out in pipe_read */
 	struct trace_seq	seq;
 	struct trace_entry	*ent;
 	int			cpu;
-
-	struct trace_entry	*prev_ent;
-	int			prev_cpu;
+	u64			ts;
 
 	unsigned long		iter_flags;
 	loff_t			pos;
-	unsigned long		next_idx[NR_CPUS];
-	struct list_head	*next_page[NR_CPUS];
-	unsigned		next_page_idx[NR_CPUS];
 	long			idx;
 };
 
 void trace_wake_up(void);
-void tracing_reset(struct trace_array_cpu *data);
+void tracing_reset(struct trace_array *tr, int cpu);
 int tracing_open_generic(struct inode *inode, struct file *filp);
 struct dentry *tracing_init_dentry(void);
 void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index d5c9e2e4a9c4..3657eec6b87d 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -34,7 +34,7 @@ static void boot_trace_init(struct trace_array *tr)
 	trace_boot_enabled = 0;
 
 	for_each_cpu_mask(cpu, cpu_possible_map)
-		tracing_reset(tr->data[cpu]);
+		tracing_reset(tr, cpu);
 }
 
 static void boot_trace_ctrl_update(struct trace_array *tr)
@@ -74,6 +74,7 @@ struct tracer boot_tracer __read_mostly =
 
 void trace_boot(struct boot_trace *it)
 {
+	struct ring_buffer_event *event;
 	struct trace_entry *entry;
 	struct trace_array_cpu *data;
 	unsigned long irq_flags;
@@ -85,17 +86,18 @@ void trace_boot(struct boot_trace *it)
 	preempt_disable();
 	data = tr->data[smp_processor_id()];
 
-	raw_local_irq_save(irq_flags);
-	__raw_spin_lock(&data->lock);
-
-	entry = tracing_get_trace_entry(tr, data);
+	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					 &irq_flags);
+	if (!event)
+		goto out;
+	entry	= ring_buffer_event_data(event);
 	tracing_generic_entry_update(entry, 0);
 	entry->type = TRACE_BOOT;
 	entry->field.initcall = *it;
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 
-	__raw_spin_unlock(&data->lock);
-	raw_local_irq_restore(irq_flags);
 	trace_wake_up();
 
+ out:
 	preempt_enable();
 }
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 312144897970..e90eb0c2c56c 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -23,7 +23,7 @@ static void function_reset(struct trace_array *tr)
 	tr->time_start = ftrace_now(tr->cpu);
 
 	for_each_online_cpu(cpu)
-		tracing_reset(tr->data[cpu]);
+		tracing_reset(tr, cpu);
 }
 
 static void start_function_trace(struct trace_array *tr)
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index ece6cfb649fa..37ad49407f27 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -173,7 +173,7 @@ out_unlock:
 out:
 	data->critical_sequence = max_sequence;
 	data->preempt_timestamp = ftrace_now(cpu);
-	tracing_reset(data);
+	tracing_reset(tr, cpu);
 	trace_function(tr, data, CALLER_ADDR0, parent_ip, flags);
 }
 
@@ -203,7 +203,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
 	data->critical_sequence = max_sequence;
 	data->preempt_timestamp = ftrace_now(cpu);
 	data->critical_start = parent_ip ? : ip;
-	tracing_reset(data);
+	tracing_reset(tr, cpu);
 
 	local_save_flags(flags);
 
@@ -234,7 +234,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
 
 	data = tr->data[cpu];
 
-	if (unlikely(!data) || unlikely(!head_page(data)) ||
+	if (unlikely(!data) ||
 	    !data->critical_start || atomic_read(&data->disabled))
 		return;
 
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index a108c326f36e..bdbf09d8413c 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -27,7 +27,7 @@ static void mmio_reset_data(struct trace_array *tr)
 	tr->time_start = ftrace_now(tr->cpu);
 
 	for_each_online_cpu(cpu)
-		tracing_reset(tr->data[cpu]);
+		tracing_reset(tr, cpu);
 }
 
 static void mmio_trace_init(struct trace_array *tr)
@@ -130,10 +130,14 @@ static unsigned long count_overruns(struct trace_iterator *iter)
 {
 	int cpu;
 	unsigned long cnt = 0;
+/* FIXME: */
+#if 0
 	for_each_online_cpu(cpu) {
 		cnt += iter->overrun[cpu];
 		iter->overrun[cpu] = 0;
 	}
+#endif
+	(void)cpu;
 	return cnt;
 }
 
@@ -176,7 +180,7 @@ static int mmio_print_rw(struct trace_iterator *iter)
 	struct trace_entry *entry = iter->ent;
 	struct mmiotrace_rw *rw	= &entry->field.mmiorw;
 	struct trace_seq *s	= &iter->seq;
-	unsigned long long t	= ns2usecs(entry->field.t);
+	unsigned long long t	= ns2usecs(iter->ts);
 	unsigned long usec_rem	= do_div(t, 1000000ULL);
 	unsigned secs		= (unsigned long)t;
 	int ret = 1;
@@ -218,7 +222,7 @@ static int mmio_print_map(struct trace_iterator *iter)
 	struct trace_entry *entry = iter->ent;
 	struct mmiotrace_map *m	= &entry->field.mmiomap;
 	struct trace_seq *s	= &iter->seq;
-	unsigned long long t	= ns2usecs(entry->field.t);
+	unsigned long long t	= ns2usecs(iter->ts);
 	unsigned long usec_rem	= do_div(t, 1000000ULL);
 	unsigned secs		= (unsigned long)t;
 	int ret = 1;
@@ -250,7 +254,7 @@ static int mmio_print_mark(struct trace_iterator *iter)
 	struct trace_entry *entry = iter->ent;
 	const char *msg		= entry->field.print.buf;
 	struct trace_seq *s	= &iter->seq;
-	unsigned long long t	= ns2usecs(entry->field.t);
+	unsigned long long t	= ns2usecs(iter->ts);
 	unsigned long usec_rem	= do_div(t, 1000000ULL);
 	unsigned secs		= (unsigned long)t;
 	int ret;
@@ -303,19 +307,19 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
 				struct trace_array_cpu *data,
 				struct mmiotrace_rw *rw)
 {
+	struct ring_buffer_event *event;
 	struct trace_entry *entry;
 	unsigned long irq_flags;
 
-	raw_local_irq_save(irq_flags);
-	__raw_spin_lock(&data->lock);
-
-	entry				= tracing_get_trace_entry(tr, data);
+	event	= ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					   &irq_flags);
+	if (!event)
+		return;
+	entry	= ring_buffer_event_data(event);
 	tracing_generic_entry_update(entry, 0);
 	entry->type			= TRACE_MMIO_RW;
 	entry->field.mmiorw		= *rw;
-
-	__raw_spin_unlock(&data->lock);
-	raw_local_irq_restore(irq_flags);
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 
 	trace_wake_up();
 }
@@ -331,19 +335,19 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
 				struct trace_array_cpu *data,
 				struct mmiotrace_map *map)
 {
+	struct ring_buffer_event *event;
 	struct trace_entry *entry;
 	unsigned long irq_flags;
 
-	raw_local_irq_save(irq_flags);
-	__raw_spin_lock(&data->lock);
-
-	entry				= tracing_get_trace_entry(tr, data);
+	event	= ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					   &irq_flags);
+	if (!event)
+		return;
+	entry	= ring_buffer_event_data(event);
 	tracing_generic_entry_update(entry, 0);
 	entry->type			= TRACE_MMIO_MAP;
 	entry->field.mmiomap		= *map;
-
-	__raw_spin_unlock(&data->lock);
-	raw_local_irq_restore(irq_flags);
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 
 	trace_wake_up();
 }
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
index 16c9ba060bab..4592b4862515 100644
--- a/kernel/trace/trace_nop.c
+++ b/kernel/trace/trace_nop.c
@@ -30,7 +30,7 @@ static void nop_trace_init(struct trace_array *tr)
 	ctx_trace = tr;
 
 	for_each_online_cpu(cpu)
-		tracing_reset(tr->data[cpu]);
+		tracing_reset(tr, cpu);
 
 	if (tr->ctrl)
 		start_nop_trace(tr);
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 789e927abc9c..e0b06db0f7af 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -81,7 +81,7 @@ static void sched_switch_reset(struct trace_array *tr)
 	tr->time_start = ftrace_now(tr->cpu);
 
 	for_each_online_cpu(cpu)
-		tracing_reset(tr->data[cpu]);
+		tracing_reset(tr, cpu);
 }
 
 static int tracing_sched_register(void)
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 08206b4e29c4..01e75e0639b7 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -191,7 +191,7 @@ static void __wakeup_reset(struct trace_array *tr)
 
 	for_each_possible_cpu(cpu) {
 		data = tr->data[cpu];
-		tracing_reset(data);
+		tracing_reset(tr, cpu);
 	}
 
 	wakeup_cpu = -1;
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 5ebd4b135498..09cf230d7eca 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -18,58 +18,20 @@ static inline int trace_valid_entry(struct trace_entry *entry)
 	return 0;
 }
 
-static int
-trace_test_buffer_cpu(struct trace_array *tr, struct trace_array_cpu *data)
+static int trace_test_buffer_cpu(struct trace_array *tr, int cpu)
 {
-	struct trace_entry *entries;
-	struct page *page;
-	int idx = 0;
-	int i;
+	struct ring_buffer_event *event;
+	struct trace_entry *entry;
 
-	BUG_ON(list_empty(&data->trace_pages));
-	page = list_entry(data->trace_pages.next, struct page, lru);
-	entries = page_address(page);
+	while ((event = ring_buffer_consume(tr->buffer, cpu, NULL))) {
+		entry = ring_buffer_event_data(event);
 
-	check_pages(data);
-	if (head_page(data) != entries)
-		goto failed;
-
-	/*
-	 * The starting trace buffer always has valid elements,
-	 * if any element exists.
-	 */
-	entries = head_page(data);
-
-	for (i = 0; i < tr->entries; i++) {
-
-		if (i < data->trace_idx && !trace_valid_entry(&entries[idx])) {
+		if (!trace_valid_entry(entry)) {
 			printk(KERN_CONT ".. invalid entry %d ",
-				entries[idx].type);
+				entry->type);
 			goto failed;
 		}
-
-		idx++;
-		if (idx >= ENTRIES_PER_PAGE) {
-			page = virt_to_page(entries);
-			if (page->lru.next == &data->trace_pages) {
-				if (i != tr->entries - 1) {
-					printk(KERN_CONT ".. entries buffer mismatch");
-					goto failed;
-				}
-			} else {
-				page = list_entry(page->lru.next, struct page, lru);
-				entries = page_address(page);
-			}
-			idx = 0;
-		}
-	}
-
-	page = virt_to_page(entries);
-	if (page->lru.next != &data->trace_pages) {
-		printk(KERN_CONT ".. too many entries");
-		goto failed;
 	}
-
 	return 0;
 
  failed:
@@ -91,13 +53,11 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
 	/* Don't allow flipping of max traces now */
 	raw_local_irq_save(flags);
 	__raw_spin_lock(&ftrace_max_lock);
-	for_each_possible_cpu(cpu) {
-		if (!head_page(tr->data[cpu]))
-			continue;
 
-		cnt += tr->data[cpu]->trace_idx;
+	cnt = ring_buffer_entries(tr->buffer);
 
-		ret = trace_test_buffer_cpu(tr, tr->data[cpu]);
+	for_each_possible_cpu(cpu) {
+		ret = trace_test_buffer_cpu(tr, cpu);
 		if (ret)
 			break;
 	}
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index db58fb66a135..9587d3bcba55 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -241,7 +241,7 @@ static void stack_reset(struct trace_array *tr)
 	tr->time_start = ftrace_now(tr->cpu);
 
 	for_each_online_cpu(cpu)
-		tracing_reset(tr->data[cpu]);
+		tracing_reset(tr, cpu);
 }
 
 static void start_stack_trace(struct trace_array *tr)
-- 
GitLab


From 777e208d40d0953efc6fb4ab58590da3f7d8f02d Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Mon, 29 Sep 2008 23:02:42 -0400
Subject: [PATCH 313/895] ftrace: take advantage of variable length entries

Now that the underlining ring buffer for ftrace now hold variable length
entries, we can take advantage of this by only storing the size of the
actual event into the buffer. This happens to increase the number of
entries in the buffer dramatically.

We can also get rid of the "trace_cont" operation, but I'm keeping that
until we have no more users. Some of the ftrace tracers can now change
their code to adapt to this new feature.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c           | 439 ++++++++++++++++++---------------
 kernel/trace/trace.h           |  81 +++---
 kernel/trace/trace_boot.c      |  13 +-
 kernel/trace/trace_mmiotrace.c |  31 +--
 4 files changed, 301 insertions(+), 263 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ef80793858b8..ed9e47c18810 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -637,9 +637,9 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
 
 	pc = preempt_count();
 
-	entry->field.preempt_count	= pc & 0xff;
-	entry->field.pid		= (tsk) ? tsk->pid : 0;
-	entry->field.flags =
+	entry->preempt_count		= pc & 0xff;
+	entry->pid			= (tsk) ? tsk->pid : 0;
+	entry->flags =
 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
 		((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
@@ -651,7 +651,7 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data,
 	       unsigned long ip, unsigned long parent_ip, unsigned long flags)
 {
 	struct ring_buffer_event *event;
-	struct trace_entry *entry;
+	struct ftrace_entry *entry;
 	unsigned long irq_flags;
 
 	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
@@ -659,10 +659,10 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data,
 	if (!event)
 		return;
 	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(entry, flags);
-	entry->type			= TRACE_FN;
-	entry->field.fn.ip		= ip;
-	entry->field.fn.parent_ip	= parent_ip;
+	tracing_generic_entry_update(&entry->ent, flags);
+	entry->ent.type			= TRACE_FN;
+	entry->ip			= ip;
+	entry->parent_ip		= parent_ip;
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 }
 
@@ -680,7 +680,7 @@ void __trace_stack(struct trace_array *tr,
 		   int skip)
 {
 	struct ring_buffer_event *event;
-	struct trace_entry *entry;
+	struct stack_entry *entry;
 	struct stack_trace trace;
 	unsigned long irq_flags;
 
@@ -692,15 +692,15 @@ void __trace_stack(struct trace_array *tr,
 	if (!event)
 		return;
 	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(entry, flags);
-	entry->type		= TRACE_STACK;
+	tracing_generic_entry_update(&entry->ent, flags);
+	entry->ent.type		= TRACE_STACK;
 
-	memset(&entry->field.stack, 0, sizeof(entry->field.stack));
+	memset(&entry->caller, 0, sizeof(entry->caller));
 
 	trace.nr_entries	= 0;
 	trace.max_entries	= FTRACE_STACK_ENTRIES;
 	trace.skip		= skip;
-	trace.entries		= entry->field.stack.caller;
+	trace.entries		= entry->caller;
 
 	save_stack_trace(&trace);
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
@@ -713,7 +713,7 @@ __trace_special(void *__tr, void *__data,
 	struct ring_buffer_event *event;
 	struct trace_array_cpu *data = __data;
 	struct trace_array *tr = __tr;
-	struct trace_entry *entry;
+	struct special_entry *entry;
 	unsigned long irq_flags;
 
 	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
@@ -721,11 +721,11 @@ __trace_special(void *__tr, void *__data,
 	if (!event)
 		return;
 	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(entry, 0);
-	entry->type			= TRACE_SPECIAL;
-	entry->field.special.arg1	= arg1;
-	entry->field.special.arg2	= arg2;
-	entry->field.special.arg3	= arg3;
+	tracing_generic_entry_update(&entry->ent, 0);
+	entry->ent.type			= TRACE_SPECIAL;
+	entry->arg1			= arg1;
+	entry->arg2			= arg2;
+	entry->arg3			= arg3;
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 	__trace_stack(tr, data, irq_flags, 4);
 
@@ -740,7 +740,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
 			   unsigned long flags)
 {
 	struct ring_buffer_event *event;
-	struct trace_entry *entry;
+	struct ctx_switch_entry *entry;
 	unsigned long irq_flags;
 
 	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
@@ -748,15 +748,15 @@ tracing_sched_switch_trace(struct trace_array *tr,
 	if (!event)
 		return;
 	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(entry, flags);
-	entry->type			= TRACE_CTX;
-	entry->field.ctx.prev_pid	= prev->pid;
-	entry->field.ctx.prev_prio	= prev->prio;
-	entry->field.ctx.prev_state	= prev->state;
-	entry->field.ctx.next_pid	= next->pid;
-	entry->field.ctx.next_prio	= next->prio;
-	entry->field.ctx.next_state	= next->state;
-	entry->field.ctx.next_cpu	= task_cpu(next);
+	tracing_generic_entry_update(&entry->ent, flags);
+	entry->ent.type			= TRACE_CTX;
+	entry->prev_pid			= prev->pid;
+	entry->prev_prio		= prev->prio;
+	entry->prev_state		= prev->state;
+	entry->next_pid			= next->pid;
+	entry->next_prio		= next->prio;
+	entry->next_state		= next->state;
+	entry->next_cpu	= task_cpu(next);
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 	__trace_stack(tr, data, flags, 5);
 }
@@ -769,7 +769,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
 			   unsigned long flags)
 {
 	struct ring_buffer_event *event;
-	struct trace_entry *entry;
+	struct ctx_switch_entry *entry;
 	unsigned long irq_flags;
 
 	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
@@ -777,15 +777,15 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
 	if (!event)
 		return;
 	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(entry, flags);
-	entry->type		= TRACE_WAKE;
-	entry->field.ctx.prev_pid	= curr->pid;
-	entry->field.ctx.prev_prio	= curr->prio;
-	entry->field.ctx.prev_state	= curr->state;
-	entry->field.ctx.next_pid	= wakee->pid;
-	entry->field.ctx.next_prio	= wakee->prio;
-	entry->field.ctx.next_state	= wakee->state;
-	entry->field.ctx.next_cpu	= task_cpu(wakee);
+	tracing_generic_entry_update(&entry->ent, flags);
+	entry->ent.type			= TRACE_WAKE;
+	entry->prev_pid			= curr->pid;
+	entry->prev_prio		= curr->prio;
+	entry->prev_state		= curr->state;
+	entry->next_pid			= wakee->pid;
+	entry->next_prio		= wakee->prio;
+	entry->next_state		= wakee->state;
+	entry->next_cpu			= task_cpu(wakee);
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 	__trace_stack(tr, data, flags, 6);
 
@@ -1173,20 +1173,19 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
 static void
 lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
 {
-	struct trace_field *field = &entry->field;
 	int hardirq, softirq;
 	char *comm;
 
-	comm = trace_find_cmdline(field->pid);
+	comm = trace_find_cmdline(entry->pid);
 
-	trace_seq_printf(s, "%8.8s-%-5d ", comm, field->pid);
+	trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
 	trace_seq_printf(s, "%3d", cpu);
 	trace_seq_printf(s, "%c%c",
-			(field->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.',
-			((field->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
+			(entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.',
+			((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
 
-	hardirq = field->flags & TRACE_FLAG_HARDIRQ;
-	softirq = field->flags & TRACE_FLAG_SOFTIRQ;
+	hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
+	softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
 	if (hardirq && softirq) {
 		trace_seq_putc(s, 'H');
 	} else {
@@ -1200,8 +1199,8 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
 		}
 	}
 
-	if (field->preempt_count)
-		trace_seq_printf(s, "%x", field->preempt_count);
+	if (entry->preempt_count)
+		trace_seq_printf(s, "%x", entry->preempt_count);
 	else
 		trace_seq_puts(s, ".");
 }
@@ -1230,6 +1229,7 @@ static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
 void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
 {
 	struct trace_entry *ent;
+	struct trace_field_cont *cont;
 	bool ok = true;
 
 	ent = peek_next_entry(iter, iter->cpu, NULL);
@@ -1239,8 +1239,9 @@ void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
 	}
 
 	do {
+		cont = (struct trace_field_cont *)ent;
 		if (ok)
-			ok = (trace_seq_printf(s, "%s", ent->cont.buf) > 0);
+			ok = (trace_seq_printf(s, "%s", cont->buf) > 0);
 		ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
 		ent = peek_next_entry(iter, iter->cpu, NULL);
 	} while (ent && ent->type == TRACE_CONT);
@@ -1257,7 +1258,6 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 	struct trace_entry *next_entry;
 	unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
 	struct trace_entry *entry = iter->ent;
-	struct trace_field *field = &entry->field;
 	unsigned long abs_usecs;
 	unsigned long rel_usecs;
 	u64 next_ts;
@@ -1276,12 +1276,12 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 	abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
 
 	if (verbose) {
-		comm = trace_find_cmdline(field->pid);
+		comm = trace_find_cmdline(entry->pid);
 		trace_seq_printf(s, "%16s %5d %3d %d %08x %08x [%08lx]"
 				 " %ld.%03ldms (+%ld.%03ldms): ",
 				 comm,
-				 field->pid, cpu, field->flags,
-				 field->preempt_count, trace_idx,
+				 entry->pid, cpu, entry->flags,
+				 entry->preempt_count, trace_idx,
 				 ns2usecs(iter->ts),
 				 abs_usecs/1000,
 				 abs_usecs % 1000, rel_usecs/1000,
@@ -1291,53 +1291,69 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 		lat_print_timestamp(s, abs_usecs, rel_usecs);
 	}
 	switch (entry->type) {
-	case TRACE_FN:
-		seq_print_ip_sym(s, field->fn.ip, sym_flags);
+	case TRACE_FN: {
+		struct ftrace_entry *field = (struct ftrace_entry *)entry;
+
+		seq_print_ip_sym(s, field->ip, sym_flags);
 		trace_seq_puts(s, " (");
-		if (kretprobed(field->fn.parent_ip))
+		if (kretprobed(field->parent_ip))
 			trace_seq_puts(s, KRETPROBE_MSG);
 		else
-			seq_print_ip_sym(s, field->fn.parent_ip, sym_flags);
+			seq_print_ip_sym(s, field->parent_ip, sym_flags);
 		trace_seq_puts(s, ")\n");
 		break;
+	}
 	case TRACE_CTX:
-	case TRACE_WAKE:
-		T = field->ctx.next_state < sizeof(state_to_char) ?
-			state_to_char[field->ctx.next_state] : 'X';
+	case TRACE_WAKE: {
+		struct ctx_switch_entry *field =
+			(struct ctx_switch_entry *)entry;
+
+		T = field->next_state < sizeof(state_to_char) ?
+			state_to_char[field->next_state] : 'X';
 
-		state = field->ctx.prev_state ?
-			__ffs(field->ctx.prev_state) + 1 : 0;
+		state = field->prev_state ?
+			__ffs(field->prev_state) + 1 : 0;
 		S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X';
-		comm = trace_find_cmdline(field->ctx.next_pid);
+		comm = trace_find_cmdline(field->next_pid);
 		trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
-				 field->ctx.prev_pid,
-				 field->ctx.prev_prio,
+				 field->prev_pid,
+				 field->prev_prio,
 				 S, entry->type == TRACE_CTX ? "==>" : "  +",
-				 field->ctx.next_cpu,
-				 field->ctx.next_pid,
-				 field->ctx.next_prio,
+				 field->next_cpu,
+				 field->next_pid,
+				 field->next_prio,
 				 T, comm);
 		break;
-	case TRACE_SPECIAL:
+	}
+	case TRACE_SPECIAL: {
+		struct special_entry *field = (struct special_entry *)entry;
+
 		trace_seq_printf(s, "# %ld %ld %ld\n",
-				 field->special.arg1,
-				 field->special.arg2,
-				 field->special.arg3);
+				 field->arg1,
+				 field->arg2,
+				 field->arg3);
 		break;
-	case TRACE_STACK:
+	}
+	case TRACE_STACK: {
+		struct stack_entry *field = (struct stack_entry *)entry;
+
 		for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
 			if (i)
 				trace_seq_puts(s, " <= ");
-			seq_print_ip_sym(s, field->stack.caller[i], sym_flags);
+			seq_print_ip_sym(s, field->caller[i], sym_flags);
 		}
 		trace_seq_puts(s, "\n");
 		break;
-	case TRACE_PRINT:
-		seq_print_ip_sym(s, field->print.ip, sym_flags);
-		trace_seq_printf(s, ": %s", field->print.buf);
-		if (field->flags & TRACE_FLAG_CONT)
+	}
+	case TRACE_PRINT: {
+		struct print_entry *field = (struct print_entry *)entry;
+
+		seq_print_ip_sym(s, field->ip, sym_flags);
+		trace_seq_printf(s, ": %s", field->buf);
+		if (entry->flags & TRACE_FLAG_CONT)
 			trace_seq_print_cont(s, iter);
 		break;
+	}
 	default:
 		trace_seq_printf(s, "Unknown type %d\n", entry->type);
 	}
@@ -1349,7 +1365,6 @@ static int print_trace_fmt(struct trace_iterator *iter)
 	struct trace_seq *s = &iter->seq;
 	unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
 	struct trace_entry *entry;
-	struct trace_field *field;
 	unsigned long usec_rem;
 	unsigned long long t;
 	unsigned long secs;
@@ -1363,15 +1378,13 @@ static int print_trace_fmt(struct trace_iterator *iter)
 	if (entry->type == TRACE_CONT)
 		return 1;
 
-	field = &entry->field;
-
-	comm = trace_find_cmdline(iter->ent->field.pid);
+	comm = trace_find_cmdline(iter->ent->pid);
 
 	t = ns2usecs(iter->ts);
 	usec_rem = do_div(t, 1000000ULL);
 	secs = (unsigned long)t;
 
-	ret = trace_seq_printf(s, "%16s-%-5d ", comm, field->pid);
+	ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
 	if (!ret)
 		return 0;
 	ret = trace_seq_printf(s, "[%03d] ", iter->cpu);
@@ -1382,20 +1395,22 @@ static int print_trace_fmt(struct trace_iterator *iter)
 		return 0;
 
 	switch (entry->type) {
-	case TRACE_FN:
-		ret = seq_print_ip_sym(s, field->fn.ip, sym_flags);
+	case TRACE_FN: {
+		struct ftrace_entry *field = (struct ftrace_entry *)entry;
+
+		ret = seq_print_ip_sym(s, field->ip, sym_flags);
 		if (!ret)
 			return 0;
 		if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
-						field->fn.parent_ip) {
+						field->parent_ip) {
 			ret = trace_seq_printf(s, " <-");
 			if (!ret)
 				return 0;
-			if (kretprobed(field->fn.parent_ip))
+			if (kretprobed(field->parent_ip))
 				ret = trace_seq_puts(s, KRETPROBE_MSG);
 			else
 				ret = seq_print_ip_sym(s,
-						       field->fn.parent_ip,
+						       field->parent_ip,
 						       sym_flags);
 			if (!ret)
 				return 0;
@@ -1404,40 +1419,50 @@ static int print_trace_fmt(struct trace_iterator *iter)
 		if (!ret)
 			return 0;
 		break;
+	}
 	case TRACE_CTX:
-	case TRACE_WAKE:
-		S = field->ctx.prev_state < sizeof(state_to_char) ?
-			state_to_char[field->ctx.prev_state] : 'X';
-		T = field->ctx.next_state < sizeof(state_to_char) ?
-			state_to_char[field->ctx.next_state] : 'X';
+	case TRACE_WAKE: {
+		struct ctx_switch_entry *field =
+			(struct ctx_switch_entry *)entry;
+
+		S = field->prev_state < sizeof(state_to_char) ?
+			state_to_char[field->prev_state] : 'X';
+		T = field->next_state < sizeof(state_to_char) ?
+			state_to_char[field->next_state] : 'X';
 		ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n",
-				       field->ctx.prev_pid,
-				       field->ctx.prev_prio,
+				       field->prev_pid,
+				       field->prev_prio,
 				       S,
 				       entry->type == TRACE_CTX ? "==>" : "  +",
-				       field->ctx.next_cpu,
-				       field->ctx.next_pid,
-				       field->ctx.next_prio,
+				       field->next_cpu,
+				       field->next_pid,
+				       field->next_prio,
 				       T);
 		if (!ret)
 			return 0;
 		break;
-	case TRACE_SPECIAL:
+	}
+	case TRACE_SPECIAL: {
+		struct special_entry *field = (struct special_entry *)entry;
+
 		ret = trace_seq_printf(s, "# %ld %ld %ld\n",
-				 field->special.arg1,
-				 field->special.arg2,
-				 field->special.arg3);
+				 field->arg1,
+				 field->arg2,
+				 field->arg3);
 		if (!ret)
 			return 0;
 		break;
-	case TRACE_STACK:
+	}
+	case TRACE_STACK: {
+		struct stack_entry *field = (struct stack_entry *)entry;
+
 		for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
 			if (i) {
 				ret = trace_seq_puts(s, " <= ");
 				if (!ret)
 					return 0;
 			}
-			ret = seq_print_ip_sym(s, field->stack.caller[i],
+			ret = seq_print_ip_sym(s, field->caller[i],
 					       sym_flags);
 			if (!ret)
 				return 0;
@@ -1446,13 +1471,17 @@ static int print_trace_fmt(struct trace_iterator *iter)
 		if (!ret)
 			return 0;
 		break;
-	case TRACE_PRINT:
-		seq_print_ip_sym(s, field->print.ip, sym_flags);
-		trace_seq_printf(s, ": %s", field->print.buf);
-		if (field->flags & TRACE_FLAG_CONT)
+	}
+	case TRACE_PRINT: {
+		struct print_entry *field = (struct print_entry *)entry;
+
+		seq_print_ip_sym(s, field->ip, sym_flags);
+		trace_seq_printf(s, ": %s", field->buf);
+		if (entry->flags & TRACE_FLAG_CONT)
 			trace_seq_print_cont(s, iter);
 		break;
 	}
+	}
 	return 1;
 }
 
@@ -1460,7 +1489,6 @@ static int print_raw_fmt(struct trace_iterator *iter)
 {
 	struct trace_seq *s = &iter->seq;
 	struct trace_entry *entry;
-	struct trace_field *field;
 	int ret;
 	int S, T;
 
@@ -1469,56 +1497,66 @@ static int print_raw_fmt(struct trace_iterator *iter)
 	if (entry->type == TRACE_CONT)
 		return 1;
 
-	field = &entry->field;
-
 	ret = trace_seq_printf(s, "%d %d %llu ",
-		field->pid, iter->cpu, iter->ts);
+		entry->pid, iter->cpu, iter->ts);
 	if (!ret)
 		return 0;
 
 	switch (entry->type) {
-	case TRACE_FN:
+	case TRACE_FN: {
+		struct ftrace_entry *field = (struct ftrace_entry *)entry;
+
 		ret = trace_seq_printf(s, "%x %x\n",
-					field->fn.ip,
-					field->fn.parent_ip);
+					field->ip,
+					field->parent_ip);
 		if (!ret)
 			return 0;
 		break;
+	}
 	case TRACE_CTX:
-	case TRACE_WAKE:
-		S = field->ctx.prev_state < sizeof(state_to_char) ?
-			state_to_char[field->ctx.prev_state] : 'X';
-		T = field->ctx.next_state < sizeof(state_to_char) ?
-			state_to_char[field->ctx.next_state] : 'X';
+	case TRACE_WAKE: {
+		struct ctx_switch_entry *field =
+			(struct ctx_switch_entry *)entry;
+
+		S = field->prev_state < sizeof(state_to_char) ?
+			state_to_char[field->prev_state] : 'X';
+		T = field->next_state < sizeof(state_to_char) ?
+			state_to_char[field->next_state] : 'X';
 		if (entry->type == TRACE_WAKE)
 			S = '+';
 		ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
-				       field->ctx.prev_pid,
-				       field->ctx.prev_prio,
+				       field->prev_pid,
+				       field->prev_prio,
 				       S,
-				       field->ctx.next_cpu,
-				       field->ctx.next_pid,
-				       field->ctx.next_prio,
+				       field->next_cpu,
+				       field->next_pid,
+				       field->next_prio,
 				       T);
 		if (!ret)
 			return 0;
 		break;
+	}
 	case TRACE_SPECIAL:
-	case TRACE_STACK:
+	case TRACE_STACK: {
+		struct special_entry *field = (struct special_entry *)entry;
+
 		ret = trace_seq_printf(s, "# %ld %ld %ld\n",
-				 field->special.arg1,
-				 field->special.arg2,
-				 field->special.arg3);
+				 field->arg1,
+				 field->arg2,
+				 field->arg3);
 		if (!ret)
 			return 0;
 		break;
-	case TRACE_PRINT:
-		trace_seq_printf(s, "# %lx %s",
-				 field->print.ip, field->print.buf);
-		if (field->flags & TRACE_FLAG_CONT)
+	}
+	case TRACE_PRINT: {
+		struct print_entry *field = (struct print_entry *)entry;
+
+		trace_seq_printf(s, "# %lx %s", field->ip, field->buf);
+		if (entry->flags & TRACE_FLAG_CONT)
 			trace_seq_print_cont(s, iter);
 		break;
 	}
+	}
 	return 1;
 }
 
@@ -1539,7 +1577,6 @@ static int print_hex_fmt(struct trace_iterator *iter)
 	struct trace_seq *s = &iter->seq;
 	unsigned char newline = '\n';
 	struct trace_entry *entry;
-	struct trace_field *field;
 	int S, T;
 
 	entry = iter->ent;
@@ -1547,40 +1584,48 @@ static int print_hex_fmt(struct trace_iterator *iter)
 	if (entry->type == TRACE_CONT)
 		return 1;
 
-	field = &entry->field;
-
-	SEQ_PUT_HEX_FIELD_RET(s, field->pid);
+	SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
 	SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
 	SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
 
 	switch (entry->type) {
-	case TRACE_FN:
-		SEQ_PUT_HEX_FIELD_RET(s, field->fn.ip);
-		SEQ_PUT_HEX_FIELD_RET(s, field->fn.parent_ip);
+	case TRACE_FN: {
+		struct ftrace_entry *field = (struct ftrace_entry *)entry;
+
+		SEQ_PUT_HEX_FIELD_RET(s, field->ip);
+		SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip);
 		break;
+	}
 	case TRACE_CTX:
-	case TRACE_WAKE:
-		S = field->ctx.prev_state < sizeof(state_to_char) ?
-			state_to_char[field->ctx.prev_state] : 'X';
-		T = field->ctx.next_state < sizeof(state_to_char) ?
-			state_to_char[field->ctx.next_state] : 'X';
+	case TRACE_WAKE: {
+		struct ctx_switch_entry *field =
+			(struct ctx_switch_entry *)entry;
+
+		S = field->prev_state < sizeof(state_to_char) ?
+			state_to_char[field->prev_state] : 'X';
+		T = field->next_state < sizeof(state_to_char) ?
+			state_to_char[field->next_state] : 'X';
 		if (entry->type == TRACE_WAKE)
 			S = '+';
-		SEQ_PUT_HEX_FIELD_RET(s, field->ctx.prev_pid);
-		SEQ_PUT_HEX_FIELD_RET(s, field->ctx.prev_prio);
+		SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
+		SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
 		SEQ_PUT_HEX_FIELD_RET(s, S);
-		SEQ_PUT_HEX_FIELD_RET(s, field->ctx.next_cpu);
-		SEQ_PUT_HEX_FIELD_RET(s, field->ctx.next_pid);
-		SEQ_PUT_HEX_FIELD_RET(s, field->ctx.next_prio);
+		SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu);
+		SEQ_PUT_HEX_FIELD_RET(s, field->next_pid);
+		SEQ_PUT_HEX_FIELD_RET(s, field->next_prio);
 		SEQ_PUT_HEX_FIELD_RET(s, T);
 		break;
+	}
 	case TRACE_SPECIAL:
-	case TRACE_STACK:
-		SEQ_PUT_HEX_FIELD_RET(s, field->special.arg1);
-		SEQ_PUT_HEX_FIELD_RET(s, field->special.arg2);
-		SEQ_PUT_HEX_FIELD_RET(s, field->special.arg3);
+	case TRACE_STACK: {
+		struct special_entry *field = (struct special_entry *)entry;
+
+		SEQ_PUT_HEX_FIELD_RET(s, field->arg1);
+		SEQ_PUT_HEX_FIELD_RET(s, field->arg2);
+		SEQ_PUT_HEX_FIELD_RET(s, field->arg3);
 		break;
 	}
+	}
 	SEQ_PUT_FIELD_RET(s, newline);
 
 	return 1;
@@ -1590,39 +1635,46 @@ static int print_bin_fmt(struct trace_iterator *iter)
 {
 	struct trace_seq *s = &iter->seq;
 	struct trace_entry *entry;
-	struct trace_field *field;
 
 	entry = iter->ent;
 
 	if (entry->type == TRACE_CONT)
 		return 1;
 
-	field = &entry->field;
-
-	SEQ_PUT_FIELD_RET(s, field->pid);
-	SEQ_PUT_FIELD_RET(s, field->cpu);
+	SEQ_PUT_FIELD_RET(s, entry->pid);
+	SEQ_PUT_FIELD_RET(s, iter->cpu);
 	SEQ_PUT_FIELD_RET(s, iter->ts);
 
 	switch (entry->type) {
-	case TRACE_FN:
-		SEQ_PUT_FIELD_RET(s, field->fn.ip);
-		SEQ_PUT_FIELD_RET(s, field->fn.parent_ip);
+	case TRACE_FN: {
+		struct ftrace_entry *field = (struct ftrace_entry *)entry;
+
+		SEQ_PUT_FIELD_RET(s, field->ip);
+		SEQ_PUT_FIELD_RET(s, field->parent_ip);
 		break;
-	case TRACE_CTX:
-		SEQ_PUT_FIELD_RET(s, field->ctx.prev_pid);
-		SEQ_PUT_FIELD_RET(s, field->ctx.prev_prio);
-		SEQ_PUT_FIELD_RET(s, field->ctx.prev_state);
-		SEQ_PUT_FIELD_RET(s, field->ctx.next_pid);
-		SEQ_PUT_FIELD_RET(s, field->ctx.next_prio);
-		SEQ_PUT_FIELD_RET(s, field->ctx.next_state);
+	}
+	case TRACE_CTX: {
+		struct ctx_switch_entry *field =
+			(struct ctx_switch_entry *)entry;
+
+		SEQ_PUT_FIELD_RET(s, field->prev_pid);
+		SEQ_PUT_FIELD_RET(s, field->prev_prio);
+		SEQ_PUT_FIELD_RET(s, field->prev_state);
+		SEQ_PUT_FIELD_RET(s, field->next_pid);
+		SEQ_PUT_FIELD_RET(s, field->next_prio);
+		SEQ_PUT_FIELD_RET(s, field->next_state);
 		break;
+	}
 	case TRACE_SPECIAL:
-	case TRACE_STACK:
-		SEQ_PUT_FIELD_RET(s, field->special.arg1);
-		SEQ_PUT_FIELD_RET(s, field->special.arg2);
-		SEQ_PUT_FIELD_RET(s, field->special.arg3);
+	case TRACE_STACK: {
+		struct special_entry *field = (struct special_entry *)entry;
+
+		SEQ_PUT_FIELD_RET(s, field->arg1);
+		SEQ_PUT_FIELD_RET(s, field->arg2);
+		SEQ_PUT_FIELD_RET(s, field->arg3);
 		break;
 	}
+	}
 	return 1;
 }
 
@@ -2818,10 +2870,10 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 	struct ring_buffer_event *event;
 	struct trace_array *tr = &global_trace;
 	struct trace_array_cpu *data;
-	struct trace_entry *entry;
+	struct print_entry *entry;
 	unsigned long flags, irq_flags;
 	long disabled;
-	int cpu, len = 0, write, written = 0;
+	int cpu, len = 0, size;
 
 	if (!tr->ctrl || tracing_disabled)
 		return 0;
@@ -2840,40 +2892,19 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 	len = min(len, TRACE_BUF_SIZE-1);
 	trace_buf[len] = 0;
 
-	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
-					 &irq_flags);
+	size = sizeof(*entry) + len + 1;
+	event = ring_buffer_lock_reserve(tr->buffer, size, &irq_flags);
 	if (!event)
 		goto out_unlock;
-	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(entry, flags);
-	entry->type			= TRACE_PRINT;
-	entry->field.print.ip		= ip;
+	entry = ring_buffer_event_data(event);
+	tracing_generic_entry_update(&entry->ent, flags);
+	entry->ent.type			= TRACE_PRINT;
+	entry->ip			= ip;
 
-	write = min(len, (int)(TRACE_PRINT_BUF_SIZE-1));
-
-	memcpy(&entry->field.print.buf, trace_buf, write);
-	entry->field.print.buf[write] = 0;
-	written = write;
+	memcpy(&entry->buf, trace_buf, len);
+	entry->buf[len] = 0;
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 
-	if (written != len)
-		entry->field.flags |= TRACE_FLAG_CONT;
-
-	while (written != len) {
-		event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
-						 &irq_flags);
-		if (!event)
-			goto out_unlock;
-		entry	= ring_buffer_event_data(event);
-
-		entry->type = TRACE_CONT;
-		write = min(len - written, (int)(TRACE_CONT_BUF_SIZE-1));
-		memcpy(&entry->cont.buf, trace_buf+written, write);
-		entry->cont.buf[write] = 0;
-		written += write;
-		ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
-	}
-
  out_unlock:
 	spin_unlock(&trace_buf_lock);
 
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f6965f775b43..e541a6b7e312 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -26,10 +26,25 @@ enum trace_type {
 	__TRACE_LAST_TYPE
 };
 
+/*
+ * The trace entry - the most basic unit of tracing. This is what
+ * is printed in the end as a single line in the trace output, such as:
+ *
+ *     bash-15816 [01]   235.197585: idle_cpu <- irq_enter
+ */
+struct trace_entry {
+	unsigned char		type;
+	unsigned char		cpu;
+	unsigned char		flags;
+	unsigned char		preempt_count;
+	int			pid;
+};
+
 /*
  * Function trace entry - function address and parent function addres:
  */
 struct ftrace_entry {
+	struct trace_entry	ent;
 	unsigned long		ip;
 	unsigned long		parent_ip;
 };
@@ -39,6 +54,7 @@ extern struct tracer boot_tracer;
  * Context switch trace entry - which task (and prio) we switched from/to:
  */
 struct ctx_switch_entry {
+	struct trace_entry	ent;
 	unsigned int		prev_pid;
 	unsigned char		prev_prio;
 	unsigned char		prev_state;
@@ -52,6 +68,7 @@ struct ctx_switch_entry {
  * Special (free-form) trace entry:
  */
 struct special_entry {
+	struct trace_entry	ent;
 	unsigned long		arg1;
 	unsigned long		arg2;
 	unsigned long		arg3;
@@ -64,6 +81,7 @@ struct special_entry {
 #define FTRACE_STACK_ENTRIES	8
 
 struct stack_entry {
+	struct trace_entry	ent;
 	unsigned long		caller[FTRACE_STACK_ENTRIES];
 };
 
@@ -71,10 +89,34 @@ struct stack_entry {
  * ftrace_printk entry:
  */
 struct print_entry {
+	struct trace_entry	ent;
 	unsigned long		ip;
 	char			buf[];
 };
 
+#define TRACE_OLD_SIZE		88
+
+struct trace_field_cont {
+	unsigned char		type;
+	/* Temporary till we get rid of this completely */
+	char			buf[TRACE_OLD_SIZE - 1];
+};
+
+struct trace_mmiotrace_rw {
+	struct trace_entry	ent;
+	struct mmiotrace_rw	rw;
+};
+
+struct trace_mmiotrace_map {
+	struct trace_entry	ent;
+	struct mmiotrace_map	map;
+};
+
+struct trace_boot {
+	struct trace_entry	ent;
+	struct boot_trace	initcall;
+};
+
 /*
  * trace_flag_type is an enumeration that holds different
  * states when a trace occurs. These are:
@@ -92,46 +134,7 @@ enum trace_flag_type {
 	TRACE_FLAG_CONT			= 0x10,
 };
 
-/*
- * The trace field - the most basic unit of tracing. This is what
- * is printed in the end as a single line in the trace output, such as:
- *
- *     bash-15816 [01]   235.197585: idle_cpu <- irq_enter
- */
-struct trace_field {
-	char			cpu;
-	char			flags;
-	char			preempt_count;
-	int			pid;
-	union {
-		struct ftrace_entry		fn;
-		struct ctx_switch_entry		ctx;
-		struct special_entry		special;
-		struct stack_entry		stack;
-		struct print_entry		print;
-		struct mmiotrace_rw		mmiorw;
-		struct mmiotrace_map		mmiomap;
-		struct boot_trace		initcall;
-	};
-};
-
-struct trace_field_cont {
-	char				buf[sizeof(struct trace_field)];
-};
-
-struct trace_entry {
-	char 				type;
-	union {
-		struct trace_field	field;
-		struct trace_field_cont	cont;
-	};
-};
-
-#define TRACE_ENTRY_SIZE	sizeof(struct trace_entry)
 #define TRACE_BUF_SIZE		1024
-#define TRACE_PRINT_BUF_SIZE \
-	(sizeof(struct trace_field) - offsetof(struct trace_field, print.buf))
-#define TRACE_CONT_BUF_SIZE	sizeof(struct trace_field)
 
 /*
  * The CPU trace array - it consists of thousands of trace entries
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index 3657eec6b87d..fa8cca1be115 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -49,10 +49,11 @@ static int initcall_print_line(struct trace_iterator *iter)
 {
 	int ret = 0;
 	struct trace_entry *entry = iter->ent;
-	struct boot_trace *it = &entry->field.initcall;
+	struct trace_boot *field = (struct trace_boot *)entry;
+	struct boot_trace *it = &field->initcall;
 	struct trace_seq *s = &iter->seq;
 
-	if (iter->ent->type == TRACE_BOOT)
+	if (entry->type == TRACE_BOOT)
 		ret = trace_seq_printf(s, "%pF called from %i "
 				       "returned %d after %lld msecs\n",
 				       it->func, it->caller, it->result,
@@ -75,7 +76,7 @@ struct tracer boot_tracer __read_mostly =
 void trace_boot(struct boot_trace *it)
 {
 	struct ring_buffer_event *event;
-	struct trace_entry *entry;
+	struct trace_boot *entry;
 	struct trace_array_cpu *data;
 	unsigned long irq_flags;
 	struct trace_array *tr = boot_trace;
@@ -91,9 +92,9 @@ void trace_boot(struct boot_trace *it)
 	if (!event)
 		goto out;
 	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(entry, 0);
-	entry->type = TRACE_BOOT;
-	entry->field.initcall = *it;
+	tracing_generic_entry_update(&entry->ent, 0);
+	entry->ent.type = TRACE_BOOT;
+	entry->initcall = *it;
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 
 	trace_wake_up();
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index bdbf09d8413c..3df441ea2749 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -178,14 +178,16 @@ print_out:
 static int mmio_print_rw(struct trace_iterator *iter)
 {
 	struct trace_entry *entry = iter->ent;
-	struct mmiotrace_rw *rw	= &entry->field.mmiorw;
+	struct trace_mmiotrace_rw *field =
+		(struct trace_mmiotrace_rw *)entry;
+	struct mmiotrace_rw *rw	= &field->rw;
 	struct trace_seq *s	= &iter->seq;
 	unsigned long long t	= ns2usecs(iter->ts);
 	unsigned long usec_rem	= do_div(t, 1000000ULL);
 	unsigned secs		= (unsigned long)t;
 	int ret = 1;
 
-	switch (entry->field.mmiorw.opcode) {
+	switch (rw->opcode) {
 	case MMIO_READ:
 		ret = trace_seq_printf(s,
 			"R %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
@@ -220,14 +222,14 @@ static int mmio_print_rw(struct trace_iterator *iter)
 static int mmio_print_map(struct trace_iterator *iter)
 {
 	struct trace_entry *entry = iter->ent;
-	struct mmiotrace_map *m	= &entry->field.mmiomap;
+	struct mmiotrace_map *m	= (struct mmiotrace_map *)entry;
 	struct trace_seq *s	= &iter->seq;
 	unsigned long long t	= ns2usecs(iter->ts);
 	unsigned long usec_rem	= do_div(t, 1000000ULL);
 	unsigned secs		= (unsigned long)t;
 	int ret = 1;
 
-	switch (entry->field.mmiorw.opcode) {
+	switch (m->opcode) {
 	case MMIO_PROBE:
 		ret = trace_seq_printf(s,
 			"MAP %lu.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n",
@@ -252,7 +254,8 @@ static int mmio_print_map(struct trace_iterator *iter)
 static int mmio_print_mark(struct trace_iterator *iter)
 {
 	struct trace_entry *entry = iter->ent;
-	const char *msg		= entry->field.print.buf;
+	struct print_entry *print = (struct print_entry *)entry;
+	const char *msg		= print->buf;
 	struct trace_seq *s	= &iter->seq;
 	unsigned long long t	= ns2usecs(iter->ts);
 	unsigned long usec_rem	= do_div(t, 1000000ULL);
@@ -264,7 +267,7 @@ static int mmio_print_mark(struct trace_iterator *iter)
 	if (!ret)
 		return 0;
 
-	if (entry->field.flags & TRACE_FLAG_CONT)
+	if (entry->flags & TRACE_FLAG_CONT)
 		trace_seq_print_cont(s, iter);
 
 	return 1;
@@ -308,7 +311,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
 				struct mmiotrace_rw *rw)
 {
 	struct ring_buffer_event *event;
-	struct trace_entry *entry;
+	struct trace_mmiotrace_rw *entry;
 	unsigned long irq_flags;
 
 	event	= ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
@@ -316,9 +319,9 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
 	if (!event)
 		return;
 	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(entry, 0);
-	entry->type			= TRACE_MMIO_RW;
-	entry->field.mmiorw		= *rw;
+	tracing_generic_entry_update(&entry->ent, 0);
+	entry->ent.type			= TRACE_MMIO_RW;
+	entry->rw			= *rw;
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 
 	trace_wake_up();
@@ -336,7 +339,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
 				struct mmiotrace_map *map)
 {
 	struct ring_buffer_event *event;
-	struct trace_entry *entry;
+	struct trace_mmiotrace_map *entry;
 	unsigned long irq_flags;
 
 	event	= ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
@@ -344,9 +347,9 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
 	if (!event)
 		return;
 	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(entry, 0);
-	entry->type			= TRACE_MMIO_MAP;
-	entry->field.mmiomap		= *map;
+	tracing_generic_entry_update(&entry->ent, 0);
+	entry->ent.type			= TRACE_MMIO_MAP;
+	entry->map			= *map;
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 
 	trace_wake_up();
-- 
GitLab


From 2c4f035f6c3e8fda661eb6105aa51ef07aa71607 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 29 Sep 2008 20:18:34 +0200
Subject: [PATCH 314/895] tracing/ftrace: change the type of the print_line
 callback

We need a kind of disambiguation when a print_line callback
returns 0.

_There is not enough space to print all the entry.
 Please flush the seq and retry.
_I can't handle this type of entry

This patch changes the type of this callback for better information.

Also some changes have been made in this V2.

_ Only relay to default functions after the print_line callback fails.
_ This patch doesn't fix the issue with the broken pipe (see patch 2/4 for that)

Some things are still in discussion:

_ Find better names for the enum print_line_t values
_ Change the type of print_trace_line into boolean.

Patches to change that can be sent later.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Pekka Paalanen <pq@iki.fi>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c | 77 +++++++++++++++++++++++---------------------
 kernel/trace/trace.h | 10 +++++-
 2 files changed, 50 insertions(+), 37 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ed9e47c18810..b38a4bb40548 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1250,7 +1250,7 @@ void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
 		trace_seq_putc(s, '\n');
 }
 
-static int
+static enum print_line_t
 print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 {
 	struct trace_seq *s = &iter->seq;
@@ -1267,7 +1267,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 	unsigned state;
 
 	if (entry->type == TRACE_CONT)
-		return 1;
+		return TRACE_TYPE_HANDLED;
 
 	next_entry = find_next_entry(iter, NULL, &next_ts);
 	if (!next_entry)
@@ -1357,10 +1357,10 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 	default:
 		trace_seq_printf(s, "Unknown type %d\n", entry->type);
 	}
-	return 1;
+	return TRACE_TYPE_HANDLED;
 }
 
-static int print_trace_fmt(struct trace_iterator *iter)
+static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
 {
 	struct trace_seq *s = &iter->seq;
 	unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
@@ -1376,7 +1376,7 @@ static int print_trace_fmt(struct trace_iterator *iter)
 	entry = iter->ent;
 
 	if (entry->type == TRACE_CONT)
-		return 1;
+		return TRACE_TYPE_HANDLED;
 
 	comm = trace_find_cmdline(iter->ent->pid);
 
@@ -1386,13 +1386,13 @@ static int print_trace_fmt(struct trace_iterator *iter)
 
 	ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
 	if (!ret)
-		return 0;
+		return TRACE_TYPE_PARTIAL_LINE;
 	ret = trace_seq_printf(s, "[%03d] ", iter->cpu);
 	if (!ret)
-		return 0;
+		return TRACE_TYPE_PARTIAL_LINE;
 	ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
 	if (!ret)
-		return 0;
+		return TRACE_TYPE_PARTIAL_LINE;
 
 	switch (entry->type) {
 	case TRACE_FN: {
@@ -1400,12 +1400,12 @@ static int print_trace_fmt(struct trace_iterator *iter)
 
 		ret = seq_print_ip_sym(s, field->ip, sym_flags);
 		if (!ret)
-			return 0;
+			return TRACE_TYPE_PARTIAL_LINE;
 		if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
 						field->parent_ip) {
 			ret = trace_seq_printf(s, " <-");
 			if (!ret)
-				return 0;
+				return TRACE_TYPE_PARTIAL_LINE;
 			if (kretprobed(field->parent_ip))
 				ret = trace_seq_puts(s, KRETPROBE_MSG);
 			else
@@ -1413,11 +1413,11 @@ static int print_trace_fmt(struct trace_iterator *iter)
 						       field->parent_ip,
 						       sym_flags);
 			if (!ret)
-				return 0;
+				return TRACE_TYPE_PARTIAL_LINE;
 		}
 		ret = trace_seq_printf(s, "\n");
 		if (!ret)
-			return 0;
+			return TRACE_TYPE_PARTIAL_LINE;
 		break;
 	}
 	case TRACE_CTX:
@@ -1439,7 +1439,7 @@ static int print_trace_fmt(struct trace_iterator *iter)
 				       field->next_prio,
 				       T);
 		if (!ret)
-			return 0;
+			return TRACE_TYPE_PARTIAL_LINE;
 		break;
 	}
 	case TRACE_SPECIAL: {
@@ -1450,7 +1450,7 @@ static int print_trace_fmt(struct trace_iterator *iter)
 				 field->arg2,
 				 field->arg3);
 		if (!ret)
-			return 0;
+			return TRACE_TYPE_PARTIAL_LINE;
 		break;
 	}
 	case TRACE_STACK: {
@@ -1460,16 +1460,16 @@ static int print_trace_fmt(struct trace_iterator *iter)
 			if (i) {
 				ret = trace_seq_puts(s, " <= ");
 				if (!ret)
-					return 0;
+					return TRACE_TYPE_PARTIAL_LINE;
 			}
 			ret = seq_print_ip_sym(s, field->caller[i],
 					       sym_flags);
 			if (!ret)
-				return 0;
+				return TRACE_TYPE_PARTIAL_LINE;
 		}
 		ret = trace_seq_puts(s, "\n");
 		if (!ret)
-			return 0;
+			return TRACE_TYPE_PARTIAL_LINE;
 		break;
 	}
 	case TRACE_PRINT: {
@@ -1482,10 +1482,10 @@ static int print_trace_fmt(struct trace_iterator *iter)
 		break;
 	}
 	}
-	return 1;
+	return TRACE_TYPE_HANDLED;
 }
 
-static int print_raw_fmt(struct trace_iterator *iter)
+static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
 {
 	struct trace_seq *s = &iter->seq;
 	struct trace_entry *entry;
@@ -1495,12 +1495,12 @@ static int print_raw_fmt(struct trace_iterator *iter)
 	entry = iter->ent;
 
 	if (entry->type == TRACE_CONT)
-		return 1;
+		return TRACE_TYPE_HANDLED;
 
 	ret = trace_seq_printf(s, "%d %d %llu ",
 		entry->pid, iter->cpu, iter->ts);
 	if (!ret)
-		return 0;
+		return TRACE_TYPE_PARTIAL_LINE;
 
 	switch (entry->type) {
 	case TRACE_FN: {
@@ -1510,7 +1510,7 @@ static int print_raw_fmt(struct trace_iterator *iter)
 					field->ip,
 					field->parent_ip);
 		if (!ret)
-			return 0;
+			return TRACE_TYPE_PARTIAL_LINE;
 		break;
 	}
 	case TRACE_CTX:
@@ -1533,7 +1533,7 @@ static int print_raw_fmt(struct trace_iterator *iter)
 				       field->next_prio,
 				       T);
 		if (!ret)
-			return 0;
+			return TRACE_TYPE_PARTIAL_LINE;
 		break;
 	}
 	case TRACE_SPECIAL:
@@ -1545,7 +1545,7 @@ static int print_raw_fmt(struct trace_iterator *iter)
 				 field->arg2,
 				 field->arg3);
 		if (!ret)
-			return 0;
+			return TRACE_TYPE_PARTIAL_LINE;
 		break;
 	}
 	case TRACE_PRINT: {
@@ -1557,7 +1557,7 @@ static int print_raw_fmt(struct trace_iterator *iter)
 		break;
 	}
 	}
-	return 1;
+	return TRACE_TYPE_HANDLED;
 }
 
 #define SEQ_PUT_FIELD_RET(s, x)				\
@@ -1572,7 +1572,7 @@ do {							\
 		return 0;				\
 } while (0)
 
-static int print_hex_fmt(struct trace_iterator *iter)
+static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
 {
 	struct trace_seq *s = &iter->seq;
 	unsigned char newline = '\n';
@@ -1582,7 +1582,7 @@ static int print_hex_fmt(struct trace_iterator *iter)
 	entry = iter->ent;
 
 	if (entry->type == TRACE_CONT)
-		return 1;
+		return TRACE_TYPE_HANDLED;
 
 	SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
 	SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
@@ -1628,10 +1628,10 @@ static int print_hex_fmt(struct trace_iterator *iter)
 	}
 	SEQ_PUT_FIELD_RET(s, newline);
 
-	return 1;
+	return TRACE_TYPE_HANDLED;
 }
 
-static int print_bin_fmt(struct trace_iterator *iter)
+static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
 {
 	struct trace_seq *s = &iter->seq;
 	struct trace_entry *entry;
@@ -1639,7 +1639,7 @@ static int print_bin_fmt(struct trace_iterator *iter)
 	entry = iter->ent;
 
 	if (entry->type == TRACE_CONT)
-		return 1;
+		return TRACE_TYPE_HANDLED;
 
 	SEQ_PUT_FIELD_RET(s, entry->pid);
 	SEQ_PUT_FIELD_RET(s, iter->cpu);
@@ -1686,13 +1686,18 @@ static int trace_empty(struct trace_iterator *iter)
 		if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
 			return 0;
 	}
-	return 1;
+	return TRACE_TYPE_HANDLED;
 }
 
-static int print_trace_line(struct trace_iterator *iter)
+static enum print_line_t print_trace_line(struct trace_iterator *iter)
 {
-	if (iter->trace && iter->trace->print_line)
-		return iter->trace->print_line(iter);
+	enum print_line_t ret;
+
+	if (iter->trace && iter->trace->print_line) {
+		ret = iter->trace->print_line(iter);
+		if (ret != TRACE_TYPE_UNHANDLED)
+			return ret;
+	}
 
 	if (trace_flags & TRACE_ITER_BIN)
 		return print_bin_fmt(iter);
@@ -2525,11 +2530,11 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
 	ring_buffer_lock(iter->tr->buffer, &flags);
 
 	while (find_next_entry_inc(iter) != NULL) {
-		int ret;
+		enum print_line_t ret;
 		int len = iter->seq.len;
 
 		ret = print_trace_line(iter);
-		if (!ret) {
+		if (ret == TRACE_TYPE_PARTIAL_LINE) {
 			/* don't print partial lines */
 			iter->seq.len = len;
 			break;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index e541a6b7e312..a921ba5d292d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -177,6 +177,14 @@ struct trace_array {
 	struct trace_array_cpu	*data[NR_CPUS];
 };
 
+
+/* Return values for print_line callback */
+enum print_line_t {
+	TRACE_TYPE_PARTIAL_LINE	= 0,	/* Retry after flushing the seq */
+	TRACE_TYPE_HANDLED	= 1,
+	TRACE_TYPE_UNHANDLED	= 2	/* Relay to other output functions */
+};
+
 /*
  * A specific tracer, represented by methods that operate on a trace array:
  */
@@ -197,7 +205,7 @@ struct tracer {
 	int			(*selftest)(struct tracer *trace,
 					    struct trace_array *tr);
 #endif
-	int			(*print_line)(struct trace_iterator *iter);
+	enum print_line_t	(*print_line)(struct trace_iterator *iter);
 	struct tracer		*next;
 	int			print_max;
 };
-- 
GitLab


From 9ff4b9744c187cae58c3774361ea090addbc4130 Mon Sep 17 00:00:00 2001
From: Pekka Paalanen <pq@iki.fi>
Date: Mon, 29 Sep 2008 20:23:48 +0200
Subject: [PATCH 315/895] tracing/ftrace: fix pipe breaking

This patch fixes a bug which break the pipe when the seq is empty.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index b38a4bb40548..6a1c76bb56ba 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2439,7 +2439,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
 	if (sret != -EBUSY)
 		return sret;
-	sret = 0;
 
 	trace_seq_reset(&iter->seq);
 
@@ -2450,6 +2449,8 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
 			goto out;
 	}
 
+waitagain:
+	sret = 0;
 	while (trace_empty(iter)) {
 
 		if ((filp->f_flags & O_NONBLOCK)) {
@@ -2556,8 +2557,13 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
 	if (iter->seq.readpos >= iter->seq.len)
 		trace_seq_reset(&iter->seq);
+
+	/*
+	 * If there was nothing to send to user, inspite of consuming trace
+	 * entries, go back to wait for more entries.
+	 */
 	if (sret == -EBUSY)
-		sret = 0;
+		goto waitagain;
 
 out:
 	mutex_unlock(&trace_types_lock);
-- 
GitLab


From 07f4e4f790895d55f46aaf89e7437da4a585989c Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 29 Sep 2008 20:27:42 +0200
Subject: [PATCH 316/895] tracing/ftrace: adapt mmiotrace to the new type of
 print_line

Adapt mmiotrace to the new print_line type.
By default, it ignores (and consumes) types it doesn't support.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Pekka Paalanen <pq@iki.fi>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace_mmiotrace.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 3df441ea2749..1a266aa08e1a 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -175,7 +175,7 @@ print_out:
 	return (ret == -EBUSY) ? 0 : ret;
 }
 
-static int mmio_print_rw(struct trace_iterator *iter)
+static enum print_line_t mmio_print_rw(struct trace_iterator *iter)
 {
 	struct trace_entry *entry = iter->ent;
 	struct trace_mmiotrace_rw *field =
@@ -215,11 +215,11 @@ static int mmio_print_rw(struct trace_iterator *iter)
 		break;
 	}
 	if (ret)
-		return 1;
-	return 0;
+		return TRACE_TYPE_HANDLED;
+	return TRACE_TYPE_PARTIAL_LINE;
 }
 
-static int mmio_print_map(struct trace_iterator *iter)
+static enum print_line_t mmio_print_map(struct trace_iterator *iter)
 {
 	struct trace_entry *entry = iter->ent;
 	struct mmiotrace_map *m	= (struct mmiotrace_map *)entry;
@@ -227,7 +227,7 @@ static int mmio_print_map(struct trace_iterator *iter)
 	unsigned long long t	= ns2usecs(iter->ts);
 	unsigned long usec_rem	= do_div(t, 1000000ULL);
 	unsigned secs		= (unsigned long)t;
-	int ret = 1;
+	int ret;
 
 	switch (m->opcode) {
 	case MMIO_PROBE:
@@ -247,11 +247,11 @@ static int mmio_print_map(struct trace_iterator *iter)
 		break;
 	}
 	if (ret)
-		return 1;
-	return 0;
+		return TRACE_TYPE_HANDLED;
+	return TRACE_TYPE_PARTIAL_LINE;
 }
 
-static int mmio_print_mark(struct trace_iterator *iter)
+static enum print_line_t mmio_print_mark(struct trace_iterator *iter)
 {
 	struct trace_entry *entry = iter->ent;
 	struct print_entry *print = (struct print_entry *)entry;
@@ -265,16 +265,15 @@ static int mmio_print_mark(struct trace_iterator *iter)
 	/* The trailing newline must be in the message. */
 	ret = trace_seq_printf(s, "MARK %lu.%06lu %s", secs, usec_rem, msg);
 	if (!ret)
-		return 0;
+		return TRACE_TYPE_PARTIAL_LINE;
 
 	if (entry->flags & TRACE_FLAG_CONT)
 		trace_seq_print_cont(s, iter);
 
-	return 1;
+	return TRACE_TYPE_HANDLED;
 }
 
-/* return 0 to abort printing without consuming current entry in pipe mode */
-static int mmio_print_line(struct trace_iterator *iter)
+static enum print_line_t mmio_print_line(struct trace_iterator *iter)
 {
 	switch (iter->ent->type) {
 	case TRACE_MMIO_RW:
@@ -284,7 +283,7 @@ static int mmio_print_line(struct trace_iterator *iter)
 	case TRACE_PRINT:
 		return mmio_print_mark(iter);
 	default:
-		return 1; /* ignore unknown entries */
+		return TRACE_TYPE_HANDLED; /* ignore unknown entries */
 	}
 }
 
-- 
GitLab


From 9e9efffb7848fe53c334996819139b431b983ac2 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 29 Sep 2008 20:31:58 +0200
Subject: [PATCH 317/895] tracing/ftrace: adapt the boot tracer to the new
 print_line type

This patch adapts the boot tracer to the new type of the
print_line callback.

It still relays entries it doesn't support to default output
functions.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Pekka Paalanen <pq@iki.fi>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace_boot.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index fa8cca1be115..43bde20b95bd 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -45,22 +45,25 @@ static void boot_trace_ctrl_update(struct trace_array *tr)
 		stop_boot_trace(tr);
 }
 
-static int initcall_print_line(struct trace_iterator *iter)
+static enum print_line_t initcall_print_line(struct trace_iterator *iter)
 {
-	int ret = 0;
+	int ret;
 	struct trace_entry *entry = iter->ent;
 	struct trace_boot *field = (struct trace_boot *)entry;
 	struct boot_trace *it = &field->initcall;
 	struct trace_seq *s = &iter->seq;
 
-	if (entry->type == TRACE_BOOT)
+	if (entry->type == TRACE_BOOT) {
 		ret = trace_seq_printf(s, "%pF called from %i "
 				       "returned %d after %lld msecs\n",
 				       it->func, it->caller, it->result,
 				       it->duration);
-	if (ret)
-		return 1;
-	return 0;
+		if (ret)
+			return TRACE_TYPE_HANDLED;
+		else
+			return TRACE_TYPE_PARTIAL_LINE;
+	}
+	return TRACE_TYPE_UNHANDLED;
 }
 
 struct tracer boot_tracer __read_mostly =
-- 
GitLab


From 70255b5e3f1bd1a5af5b1e425ec2c4db7c735112 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Wed, 1 Oct 2008 00:29:52 -0400
Subject: [PATCH 318/895] ring_buffer: remove raw from local_irq_save

The raw_local_irq_save causes issues with lockdep. We don't need it
so replace them with local_irq_save.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ring_buffer.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index cfa711374d9a..8e7392fd0db9 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -847,7 +847,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
 	if (atomic_read(&buffer->record_disabled))
 		return NULL;
 
-	raw_local_irq_save(*flags);
+	local_irq_save(*flags);
 	cpu = raw_smp_processor_id();
 
 	if (!cpu_isset(cpu, buffer->cpumask))
@@ -909,7 +909,7 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
 	rb_commit(cpu_buffer, event);
 
 	spin_unlock(&cpu_buffer->lock);
-	raw_local_irq_restore(flags);
+	local_irq_restore(flags);
 
 	return 0;
 }
@@ -1583,13 +1583,13 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
 	if (!cpu_isset(cpu, buffer->cpumask))
 		return;
 
-	raw_local_irq_save(flags);
+	local_irq_save(flags);
 	spin_lock(&cpu_buffer->lock);
 
 	rb_reset_cpu(cpu_buffer);
 
 	spin_unlock(&cpu_buffer->lock);
-	raw_local_irq_restore(flags);
+	local_irq_restore(flags);
 }
 
 /**
-- 
GitLab


From d769041f865330034131525ee6a7f72eb4af2a24 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Wed, 1 Oct 2008 00:29:53 -0400
Subject: [PATCH 319/895] ring_buffer: implement new locking

The old "lock always" scheme had issues with lockdep, and was not very
efficient anyways.

This patch does a new design to be partially lockless on writes.
Writes will add new entries to the per cpu pages by simply disabling
interrupts. When a write needs to go to another page than it will
grab the lock.

A new "read page" has been added so that the reader can pull out a page
from the ring buffer to read without worrying about the writer writing over
it. This allows us to not take the lock for all reads. The lock is
now only taken when a read needs to go to a new page.

This is far from lockless, and interrupts still need to be disabled,
but it is a step towards a more lockless solution, and it also
solves a lot of the issues that were noticed by the first conversion
of ftrace to the ring buffers.

Note: the ring_buffer_{un}lock API has been removed.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ring_buffer.h |   3 -
 kernel/trace/ring_buffer.c  | 298 ++++++++++++++++++++----------------
 kernel/trace/trace.c        | 113 +++++++++-----
 3 files changed, 247 insertions(+), 167 deletions(-)

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index c52375b8330d..536b0ca46a03 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -63,9 +63,6 @@ ring_buffer_event_time_delta(struct ring_buffer_event *event)
 	return event->time_delta;
 }
 
-void ring_buffer_lock(struct ring_buffer *buffer, unsigned long *flags);
-void ring_buffer_unlock(struct ring_buffer *buffer, unsigned long flags);
-
 /*
  * size is in bytes for each per CPU buffer.
  */
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 8e7392fd0db9..9631abf2ae29 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -161,8 +161,10 @@ struct ring_buffer_per_cpu {
 	struct list_head		pages;
 	unsigned long			head;	/* read from head */
 	unsigned long			tail;	/* write to tail */
+	unsigned long			reader;
 	struct buffer_page		*head_page;
 	struct buffer_page		*tail_page;
+	struct buffer_page		*reader_page;
 	unsigned long			overrun;
 	unsigned long			entries;
 	u64				write_stamp;
@@ -260,6 +262,7 @@ static struct ring_buffer_per_cpu *
 rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
+	unsigned long addr;
 	int ret;
 
 	cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
@@ -272,9 +275,16 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
 	spin_lock_init(&cpu_buffer->lock);
 	INIT_LIST_HEAD(&cpu_buffer->pages);
 
+	addr = __get_free_page(GFP_KERNEL);
+	if (!addr)
+		goto fail_free_buffer;
+	cpu_buffer->reader_page = (struct buffer_page *)virt_to_page(addr);
+	INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
+	cpu_buffer->reader_page->size = 0;
+
 	ret = rb_allocate_pages(cpu_buffer, buffer->pages);
 	if (ret < 0)
-		goto fail_free_buffer;
+		goto fail_free_reader;
 
 	cpu_buffer->head_page
 		= list_entry(cpu_buffer->pages.next, struct buffer_page, list);
@@ -283,6 +293,9 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
 
 	return cpu_buffer;
 
+ fail_free_reader:
+	free_buffer_page(cpu_buffer->reader_page);
+
  fail_free_buffer:
 	kfree(cpu_buffer);
 	return NULL;
@@ -293,6 +306,9 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
 	struct list_head *head = &cpu_buffer->pages;
 	struct buffer_page *page, *tmp;
 
+	list_del_init(&cpu_buffer->reader_page->list);
+	free_buffer_page(cpu_buffer->reader_page);
+
 	list_for_each_entry_safe(page, tmp, head, list) {
 		list_del_init(&page->list);
 		free_buffer_page(page);
@@ -538,8 +554,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
 
 static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
 {
-	return cpu_buffer->head_page == cpu_buffer->tail_page &&
-		cpu_buffer->head == cpu_buffer->tail;
+	return (cpu_buffer->reader == cpu_buffer->reader_page->size &&
+		(cpu_buffer->tail_page == cpu_buffer->reader_page ||
+		 (cpu_buffer->tail_page == cpu_buffer->head_page &&
+		  cpu_buffer->head == cpu_buffer->tail)));
 }
 
 static inline int rb_null_event(struct ring_buffer_event *event)
@@ -555,10 +573,10 @@ static inline void *rb_page_index(struct buffer_page *page, unsigned index)
 }
 
 static inline struct ring_buffer_event *
-rb_head_event(struct ring_buffer_per_cpu *cpu_buffer)
+rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
 {
-	return rb_page_index(cpu_buffer->head_page,
-			     cpu_buffer->head);
+	return rb_page_index(cpu_buffer->reader_page,
+			     cpu_buffer->reader);
 }
 
 static inline struct ring_buffer_event *
@@ -610,15 +628,32 @@ rb_add_stamp(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts)
 	cpu_buffer->write_stamp = *ts;
 }
 
-static void rb_reset_read_page(struct ring_buffer_per_cpu *cpu_buffer)
+static void rb_reset_head_page(struct ring_buffer_per_cpu *cpu_buffer)
 {
-	cpu_buffer->read_stamp = cpu_buffer->head_page->time_stamp;
 	cpu_buffer->head = 0;
 }
 
-static void
-rb_reset_iter_read_page(struct ring_buffer_iter *iter)
+static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 {
+	cpu_buffer->read_stamp = cpu_buffer->reader_page->time_stamp;
+	cpu_buffer->reader = 0;
+}
+
+static inline void rb_inc_iter(struct ring_buffer_iter *iter)
+{
+	struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+
+	/*
+	 * The iterator could be on the reader page (it starts there).
+	 * But the head could have moved, since the reader was
+	 * found. Check for this case and assign the iterator
+	 * to the head page instead of next.
+	 */
+	if (iter->head_page == cpu_buffer->reader_page)
+		iter->head_page = cpu_buffer->head_page;
+	else
+		rb_inc_page(cpu_buffer, &iter->head_page);
+
 	iter->read_stamp = iter->head_page->time_stamp;
 	iter->head = 0;
 }
@@ -693,30 +728,39 @@ static struct ring_buffer_event *
 __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 		  unsigned type, unsigned long length, u64 *ts)
 {
-	struct buffer_page *head_page, *tail_page;
+	struct buffer_page *tail_page, *head_page, *reader_page;
 	unsigned long tail;
 	struct ring_buffer *buffer = cpu_buffer->buffer;
 	struct ring_buffer_event *event;
 
+	/* No locking needed for tail page */
 	tail_page = cpu_buffer->tail_page;
-	head_page = cpu_buffer->head_page;
 	tail = cpu_buffer->tail;
 
 	if (tail + length > BUF_PAGE_SIZE) {
 		struct buffer_page *next_page = tail_page;
 
+		spin_lock(&cpu_buffer->lock);
 		rb_inc_page(cpu_buffer, &next_page);
 
+		head_page = cpu_buffer->head_page;
+		reader_page = cpu_buffer->reader_page;
+
+		/* we grabbed the lock before incrementing */
+		WARN_ON(next_page == reader_page);
+
 		if (next_page == head_page) {
-			if (!(buffer->flags & RB_FL_OVERWRITE))
+			if (!(buffer->flags & RB_FL_OVERWRITE)) {
+				spin_unlock(&cpu_buffer->lock);
 				return NULL;
+			}
 
 			/* count overflows */
 			rb_update_overflow(cpu_buffer);
 
 			rb_inc_page(cpu_buffer, &head_page);
 			cpu_buffer->head_page = head_page;
-			rb_reset_read_page(cpu_buffer);
+			rb_reset_head_page(cpu_buffer);
 		}
 
 		if (tail != BUF_PAGE_SIZE) {
@@ -732,6 +776,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 		cpu_buffer->tail_page = tail_page;
 		cpu_buffer->tail = tail;
 		rb_add_stamp(cpu_buffer, ts);
+		spin_unlock(&cpu_buffer->lock);
 	}
 
 	BUG_ON(tail + length > BUF_PAGE_SIZE);
@@ -802,7 +847,9 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
 				return NULL;
 		}
 	} else {
+		spin_lock(&cpu_buffer->lock);
 		rb_add_stamp(cpu_buffer, &ts);
+		spin_unlock(&cpu_buffer->lock);
 		delta = 0;
 	}
 
@@ -851,13 +898,12 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
 	cpu = raw_smp_processor_id();
 
 	if (!cpu_isset(cpu, buffer->cpumask))
-		goto out_irq;
+		goto out;
 
 	cpu_buffer = buffer->buffers[cpu];
-	spin_lock(&cpu_buffer->lock);
 
 	if (atomic_read(&cpu_buffer->record_disabled))
-		goto no_record;
+		goto out;
 
 	length = rb_calculate_event_length(length);
 	if (length > BUF_PAGE_SIZE)
@@ -865,13 +911,11 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
 
 	event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length);
 	if (!event)
-		goto no_record;
+		goto out;
 
 	return event;
 
- no_record:
-	spin_unlock(&cpu_buffer->lock);
- out_irq:
+ out:
 	local_irq_restore(*flags);
 	return NULL;
 }
@@ -904,11 +948,8 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
 
 	cpu_buffer = buffer->buffers[cpu];
 
-	assert_spin_locked(&cpu_buffer->lock);
-
 	rb_commit(cpu_buffer, event);
 
-	spin_unlock(&cpu_buffer->lock);
 	local_irq_restore(flags);
 
 	return 0;
@@ -945,10 +986,9 @@ int ring_buffer_write(struct ring_buffer *buffer,
 	cpu = raw_smp_processor_id();
 
 	if (!cpu_isset(cpu, buffer->cpumask))
-		goto out_irq;
+		goto out;
 
 	cpu_buffer = buffer->buffers[cpu];
-	spin_lock(&cpu_buffer->lock);
 
 	if (atomic_read(&cpu_buffer->record_disabled))
 		goto out;
@@ -967,55 +1007,11 @@ int ring_buffer_write(struct ring_buffer *buffer,
 
 	ret = 0;
  out:
-	spin_unlock(&cpu_buffer->lock);
- out_irq:
 	local_irq_restore(flags);
 
 	return ret;
 }
 
-/**
- * ring_buffer_lock - lock the ring buffer
- * @buffer: The ring buffer to lock
- * @flags: The place to store the interrupt flags
- *
- * This locks all the per CPU buffers.
- *
- * Must be unlocked by ring_buffer_unlock.
- */
-void ring_buffer_lock(struct ring_buffer *buffer, unsigned long *flags)
-{
-	struct ring_buffer_per_cpu *cpu_buffer;
-	int cpu;
-
-	local_irq_save(*flags);
-
-	for_each_buffer_cpu(buffer, cpu) {
-		cpu_buffer = buffer->buffers[cpu];
-		spin_lock(&cpu_buffer->lock);
-	}
-}
-
-/**
- * ring_buffer_unlock - unlock a locked buffer
- * @buffer: The locked buffer to unlock
- * @flags: The interrupt flags received by ring_buffer_lock
- */
-void ring_buffer_unlock(struct ring_buffer *buffer, unsigned long flags)
-{
-	struct ring_buffer_per_cpu *cpu_buffer;
-	int cpu;
-
-	for (cpu = buffer->cpus - 1; cpu >= 0; cpu--) {
-		if (!cpu_isset(cpu, buffer->cpumask))
-			continue;
-		cpu_buffer = buffer->buffers[cpu];
-		spin_unlock(&cpu_buffer->lock);
-	}
-
-	local_irq_restore(flags);
-}
-
 /**
  * ring_buffer_record_disable - stop all writes into the buffer
  * @buffer: The ring buffer to stop writes to.
@@ -1169,9 +1165,18 @@ void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
 {
 	struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
 
-	iter->head_page = cpu_buffer->head_page;
-	iter->head = cpu_buffer->head;
-	rb_reset_iter_read_page(iter);
+	/* Iterator usage is expected to have record disabled */
+	if (list_empty(&cpu_buffer->reader_page->list)) {
+		iter->head_page = cpu_buffer->head_page;
+		iter->head = cpu_buffer->head;
+	} else {
+		iter->head_page = cpu_buffer->reader_page;
+		iter->head = cpu_buffer->reader;
+	}
+	if (iter->head)
+		iter->read_stamp = cpu_buffer->read_stamp;
+	else
+		iter->read_stamp = iter->head_page->time_stamp;
 }
 
 /**
@@ -1250,43 +1255,84 @@ rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
 	return;
 }
 
-static void rb_advance_head(struct ring_buffer_per_cpu *cpu_buffer)
+static struct buffer_page *
+rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 {
-	struct ring_buffer_event *event;
-	unsigned length;
+	struct buffer_page *reader = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cpu_buffer->lock, flags);
+
+ again:
+	reader = cpu_buffer->reader_page;
+
+	/* If there's more to read, return this page */
+	if (cpu_buffer->reader < reader->size)
+		goto out;
+
+	/* Never should we have an index greater than the size */
+	WARN_ON(cpu_buffer->reader > reader->size);
+
+	/* check if we caught up to the tail */
+	reader = NULL;
+	if (cpu_buffer->tail_page == cpu_buffer->reader_page)
+		goto out;
 
 	/*
-	 * Check if we are at the end of the buffer.
+	 * Splice the empty reader page into the list around the head.
+	 * Reset the reader page to size zero.
 	 */
-	if (cpu_buffer->head >= cpu_buffer->head_page->size) {
-		BUG_ON(cpu_buffer->head_page == cpu_buffer->tail_page);
-		rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
-		rb_reset_read_page(cpu_buffer);
-		return;
-	}
 
-	event = rb_head_event(cpu_buffer);
+	reader = cpu_buffer->head_page;
+	cpu_buffer->reader_page->list.next = reader->list.next;
+	cpu_buffer->reader_page->list.prev = reader->list.prev;
+	cpu_buffer->reader_page->size = 0;
 
-	if (event->type == RINGBUF_TYPE_DATA)
-		cpu_buffer->entries--;
-
-	length = rb_event_length(event);
+	/* Make the reader page now replace the head */
+	reader->list.prev->next = &cpu_buffer->reader_page->list;
+	reader->list.next->prev = &cpu_buffer->reader_page->list;
 
 	/*
-	 * This should not be called to advance the header if we are
-	 * at the tail of the buffer.
+	 * If the tail is on the reader, then we must set the head
+	 * to the inserted page, otherwise we set it one before.
 	 */
-	BUG_ON((cpu_buffer->head_page == cpu_buffer->tail_page) &&
-	       (cpu_buffer->head + length > cpu_buffer->tail));
+	cpu_buffer->head_page = cpu_buffer->reader_page;
 
-	rb_update_read_stamp(cpu_buffer, event);
+	if (cpu_buffer->tail_page != reader)
+		rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
+
+	/* Finally update the reader page to the new head */
+	cpu_buffer->reader_page = reader;
+	rb_reset_reader_page(cpu_buffer);
+
+	goto again;
+
+ out:
+	spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+
+	return reader;
+}
+
+static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	struct ring_buffer_event *event;
+	struct buffer_page *reader;
+	unsigned length;
+
+	reader = rb_get_reader_page(cpu_buffer);
 
-	cpu_buffer->head += length;
+	/* This function should not be called when buffer is empty */
+	BUG_ON(!reader);
 
-	/* check for end of page */
-	if ((cpu_buffer->head >= cpu_buffer->head_page->size) &&
-	    (cpu_buffer->head_page != cpu_buffer->tail_page))
-		rb_advance_head(cpu_buffer);
+	event = rb_reader_event(cpu_buffer);
+
+	if (event->type == RINGBUF_TYPE_DATA)
+		cpu_buffer->entries--;
+
+	rb_update_read_stamp(cpu_buffer, event);
+
+	length = rb_event_length(event);
+	cpu_buffer->reader += length;
 }
 
 static void rb_advance_iter(struct ring_buffer_iter *iter)
@@ -1304,8 +1350,7 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
 	 */
 	if (iter->head >= iter->head_page->size) {
 		BUG_ON(iter->head_page == cpu_buffer->tail_page);
-		rb_inc_page(cpu_buffer, &iter->head_page);
-		rb_reset_iter_read_page(iter);
+		rb_inc_iter(iter);
 		return;
 	}
 
@@ -1344,6 +1389,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 	struct ring_buffer_event *event;
+	struct buffer_page *reader;
 
 	if (!cpu_isset(cpu, buffer->cpumask))
 		return NULL;
@@ -1351,25 +1397,26 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
 	cpu_buffer = buffer->buffers[cpu];
 
  again:
-	if (rb_per_cpu_empty(cpu_buffer))
+	reader = rb_get_reader_page(cpu_buffer);
+	if (!reader)
 		return NULL;
 
-	event = rb_head_event(cpu_buffer);
+	event = rb_reader_event(cpu_buffer);
 
 	switch (event->type) {
 	case RINGBUF_TYPE_PADDING:
-		rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
-		rb_reset_read_page(cpu_buffer);
-		goto again;
+		WARN_ON(1);
+		rb_advance_reader(cpu_buffer);
+		return NULL;
 
 	case RINGBUF_TYPE_TIME_EXTEND:
 		/* Internal data, OK to advance */
-		rb_advance_head(cpu_buffer);
+		rb_advance_reader(cpu_buffer);
 		goto again;
 
 	case RINGBUF_TYPE_TIME_STAMP:
 		/* FIXME: not implemented */
-		rb_advance_head(cpu_buffer);
+		rb_advance_reader(cpu_buffer);
 		goto again;
 
 	case RINGBUF_TYPE_DATA:
@@ -1415,8 +1462,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
 
 	switch (event->type) {
 	case RINGBUF_TYPE_PADDING:
-		rb_inc_page(cpu_buffer, &iter->head_page);
-		rb_reset_iter_read_page(iter);
+		rb_inc_iter(iter);
 		goto again;
 
 	case RINGBUF_TYPE_TIME_EXTEND:
@@ -1465,7 +1511,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
 		return NULL;
 
 	cpu_buffer = buffer->buffers[cpu];
-	rb_advance_head(cpu_buffer);
+	rb_advance_reader(cpu_buffer);
 
 	return event;
 }
@@ -1487,6 +1533,7 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 	struct ring_buffer_iter *iter;
+	unsigned long flags;
 
 	if (!cpu_isset(cpu, buffer->cpumask))
 		return NULL;
@@ -1502,11 +1549,9 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
 	atomic_inc(&cpu_buffer->record_disabled);
 	synchronize_sched();
 
-	spin_lock(&cpu_buffer->lock);
-	iter->head = cpu_buffer->head;
-	iter->head_page = cpu_buffer->head_page;
-	rb_reset_iter_read_page(iter);
-	spin_unlock(&cpu_buffer->lock);
+	spin_lock_irqsave(&cpu_buffer->lock, flags);
+	ring_buffer_iter_reset(iter);
+	spin_unlock_irqrestore(&cpu_buffer->lock, flags);
 
 	return iter;
 }
@@ -1562,10 +1607,14 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
 {
 	cpu_buffer->head_page
 		= list_entry(cpu_buffer->pages.next, struct buffer_page, list);
-	cpu_buffer->tail_page
-		= list_entry(cpu_buffer->pages.next, struct buffer_page, list);
+	cpu_buffer->head_page->size = 0;
+	cpu_buffer->tail_page = cpu_buffer->head_page;
+	cpu_buffer->tail_page->size = 0;
+	INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
+	cpu_buffer->reader_page->size = 0;
+
+	cpu_buffer->head = cpu_buffer->tail = cpu_buffer->reader = 0;
 
-	cpu_buffer->head = cpu_buffer->tail = 0;
 	cpu_buffer->overrun = 0;
 	cpu_buffer->entries = 0;
 }
@@ -1583,13 +1632,11 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
 	if (!cpu_isset(cpu, buffer->cpumask))
 		return;
 
-	local_irq_save(flags);
-	spin_lock(&cpu_buffer->lock);
+	spin_lock_irqsave(&cpu_buffer->lock, flags);
 
 	rb_reset_cpu(cpu_buffer);
 
-	spin_unlock(&cpu_buffer->lock);
-	local_irq_restore(flags);
+	spin_unlock_irqrestore(&cpu_buffer->lock, flags);
 }
 
 /**
@@ -1598,15 +1645,10 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
  */
 void ring_buffer_reset(struct ring_buffer *buffer)
 {
-	unsigned long flags;
 	int cpu;
 
-	ring_buffer_lock(buffer, &flags);
-
 	for_each_buffer_cpu(buffer, cpu)
-		rb_reset_cpu(buffer->buffers[cpu]);
-
-	ring_buffer_unlock(buffer, flags);
+		ring_buffer_reset_cpu(buffer, cpu);
 }
 
 /**
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 6a1c76bb56ba..b542f8837801 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -42,6 +42,20 @@
 unsigned long __read_mostly	tracing_max_latency = (cycle_t)ULONG_MAX;
 unsigned long __read_mostly	tracing_thresh;
 
+static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
+
+static inline void ftrace_disable_cpu(void)
+{
+	preempt_disable();
+	local_inc(&__get_cpu_var(ftrace_cpu_disabled));
+}
+
+static inline void ftrace_enable_cpu(void)
+{
+	local_dec(&__get_cpu_var(ftrace_cpu_disabled));
+	preempt_enable();
+}
+
 static cpumask_t __read_mostly		tracing_buffer_mask;
 
 #define for_each_tracing_cpu(cpu)	\
@@ -406,7 +420,9 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 	tr->buffer = max_tr.buffer;
 	max_tr.buffer = buf;
 
+	ftrace_disable_cpu();
 	ring_buffer_reset(tr->buffer);
+	ftrace_enable_cpu();
 
 	__update_max_tr(tr, tsk, cpu);
 	__raw_spin_unlock(&ftrace_max_lock);
@@ -428,9 +444,13 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
 	WARN_ON_ONCE(!irqs_disabled());
 	__raw_spin_lock(&ftrace_max_lock);
 
+	ftrace_disable_cpu();
+
 	ring_buffer_reset(max_tr.buffer);
 	ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
 
+	ftrace_enable_cpu();
+
 	WARN_ON_ONCE(ret);
 
 	__update_max_tr(tr, tsk, cpu);
@@ -543,7 +563,9 @@ void unregister_tracer(struct tracer *type)
 
 void tracing_reset(struct trace_array *tr, int cpu)
 {
+	ftrace_disable_cpu();
 	ring_buffer_reset_cpu(tr->buffer, cpu);
+	ftrace_enable_cpu();
 }
 
 #define SAVED_CMDLINES 128
@@ -654,6 +676,10 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data,
 	struct ftrace_entry *entry;
 	unsigned long irq_flags;
 
+	/* If we are reading the ring buffer, don't trace */
+	if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+		return;
+
 	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
 					 &irq_flags);
 	if (!event)
@@ -870,8 +896,14 @@ enum trace_file_type {
 
 static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
 {
+	/* Don't allow ftrace to trace into the ring buffers */
+	ftrace_disable_cpu();
+
 	iter->idx++;
-	ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
+	if (iter->buffer_iter[iter->cpu])
+		ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
+
+	ftrace_enable_cpu();
 }
 
 static struct trace_entry *
@@ -880,9 +912,19 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
 	struct ring_buffer_event *event;
 	struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
 
-	event = ring_buffer_iter_peek(buf_iter, ts);
+	/* Don't allow ftrace to trace into the ring buffers */
+	ftrace_disable_cpu();
+
+	if (buf_iter)
+		event = ring_buffer_iter_peek(buf_iter, ts);
+	else
+		event = ring_buffer_peek(iter->tr->buffer, cpu, ts);
+
+	ftrace_enable_cpu();
+
 	return event ? ring_buffer_event_data(event) : NULL;
 }
+
 static struct trace_entry *
 __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
 {
@@ -938,7 +980,10 @@ static void *find_next_entry_inc(struct trace_iterator *iter)
 
 static void trace_consume(struct trace_iterator *iter)
 {
+	/* Don't allow ftrace to trace into the ring buffers */
+	ftrace_disable_cpu();
 	ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts);
+	ftrace_enable_cpu();
 }
 
 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
@@ -991,10 +1036,14 @@ static void *s_start(struct seq_file *m, loff_t *pos)
 		iter->cpu = 0;
 		iter->idx = -1;
 
+		ftrace_disable_cpu();
+
 		for_each_tracing_cpu(cpu) {
 			ring_buffer_iter_reset(iter->buffer_iter[cpu]);
 		}
 
+		ftrace_enable_cpu();
+
 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
 			;
 
@@ -1242,7 +1291,16 @@ void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
 		cont = (struct trace_field_cont *)ent;
 		if (ok)
 			ok = (trace_seq_printf(s, "%s", cont->buf) > 0);
-		ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
+
+		ftrace_disable_cpu();
+
+		if (iter->buffer_iter[iter->cpu])
+			ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
+		else
+			ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
+
+		ftrace_enable_cpu();
+
 		ent = peek_next_entry(iter, iter->cpu, NULL);
 	} while (ent && ent->type == TRACE_CONT);
 
@@ -1683,9 +1741,15 @@ static int trace_empty(struct trace_iterator *iter)
 	int cpu;
 
 	for_each_tracing_cpu(cpu) {
-		if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
-			return 0;
+		if (iter->buffer_iter[cpu]) {
+			if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
+				return 0;
+		} else {
+			if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
+				return 0;
+		}
 	}
+
 	return TRACE_TYPE_HANDLED;
 }
 
@@ -1776,8 +1840,10 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
 	iter->pos = -1;
 
 	for_each_tracing_cpu(cpu) {
+
 		iter->buffer_iter[cpu] =
 			ring_buffer_read_start(iter->tr->buffer, cpu);
+
 		if (!iter->buffer_iter[cpu])
 			goto fail_buffer;
 	}
@@ -2341,7 +2407,6 @@ static atomic_t tracing_reader;
 static int tracing_open_pipe(struct inode *inode, struct file *filp)
 {
 	struct trace_iterator *iter;
-	int cpu;
 
 	if (tracing_disabled)
 		return -ENODEV;
@@ -2362,38 +2427,17 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
 	iter->trace = current_trace;
 	filp->private_data = iter;
 
-	for_each_tracing_cpu(cpu) {
-		iter->buffer_iter[cpu] =
-			ring_buffer_read_start(iter->tr->buffer, cpu);
-		if (!iter->buffer_iter[cpu])
-			goto fail_buffer;
-	}
-
 	if (iter->trace->pipe_open)
 		iter->trace->pipe_open(iter);
 	mutex_unlock(&trace_types_lock);
 
 	return 0;
-
- fail_buffer:
-	for_each_tracing_cpu(cpu) {
-		if (iter->buffer_iter[cpu])
-			ring_buffer_read_finish(iter->buffer_iter[cpu]);
-	}
-	mutex_unlock(&trace_types_lock);
-
-	return -ENOMEM;
 }
 
 static int tracing_release_pipe(struct inode *inode, struct file *file)
 {
 	struct trace_iterator *iter = file->private_data;
-	int cpu;
 
-	for_each_tracing_cpu(cpu) {
-		if (iter->buffer_iter[cpu])
-			ring_buffer_read_finish(iter->buffer_iter[cpu]);
-	}
 	kfree(iter);
 	atomic_dec(&tracing_reader);
 
@@ -2429,7 +2473,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
 		  size_t cnt, loff_t *ppos)
 {
 	struct trace_iterator *iter = filp->private_data;
-	unsigned long flags;
 #ifdef CONFIG_FTRACE
 	int ftrace_save;
 #endif
@@ -2528,7 +2571,6 @@ waitagain:
 	ftrace_enabled = 0;
 #endif
 	smp_wmb();
-	ring_buffer_lock(iter->tr->buffer, &flags);
 
 	while (find_next_entry_inc(iter) != NULL) {
 		enum print_line_t ret;
@@ -2547,7 +2589,6 @@ waitagain:
 			break;
 	}
 
-	ring_buffer_unlock(iter->tr->buffer, flags);
 #ifdef CONFIG_FTRACE
 	ftrace_enabled = ftrace_save;
 #endif
@@ -3010,8 +3051,8 @@ void ftrace_dump(void)
 	static struct trace_iterator iter;
 	static cpumask_t mask;
 	static int dump_ran;
-	unsigned long flags, irq_flags;
-	int cnt = 0;
+	unsigned long flags;
+	int cnt = 0, cpu;
 
 	/* only one dump */
 	spin_lock_irqsave(&ftrace_dump_lock, flags);
@@ -3023,6 +3064,10 @@ void ftrace_dump(void)
 	/* No turning back! */
 	ftrace_kill_atomic();
 
+	for_each_tracing_cpu(cpu) {
+		atomic_inc(&global_trace.data[cpu]->disabled);
+	}
+
 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
 
 	iter.tr = &global_trace;
@@ -3037,8 +3082,6 @@ void ftrace_dump(void)
 
 	cpus_clear(mask);
 
-	ring_buffer_lock(iter.tr->buffer, &irq_flags);
-
 	while (!trace_empty(&iter)) {
 
 		if (!cnt)
@@ -3066,8 +3109,6 @@ void ftrace_dump(void)
 	else
 		printk(KERN_TRACE "---------------------------------\n");
 
-	ring_buffer_unlock(iter.tr->buffer, irq_flags);
-
  out:
 	spin_unlock_irqrestore(&ftrace_dump_lock, flags);
 }
-- 
GitLab


From 797d3712a9dd75c720558612be05f42c031a7bb5 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 30 Sep 2008 18:13:45 +0200
Subject: [PATCH 320/895] tracing/ftrace: adapt mmiotrace to the new type of
 print_line, fix

Correct the value's type of trace_empty function

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index b542f8837801..c1634068adfa 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1750,7 +1750,7 @@ static int trace_empty(struct trace_iterator *iter)
 		}
 	}
 
-	return TRACE_TYPE_HANDLED;
+	return 1;
 }
 
 static enum print_line_t print_trace_line(struct trace_iterator *iter)
-- 
GitLab


From 7104f300c5a69b46dda00d898034dd05c9f21739 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Wed, 1 Oct 2008 10:52:51 -0400
Subject: [PATCH 321/895] ftrace: type cast filter+verifier

The mmiotrace map had a bug that would typecast the entry from
the trace to the wrong type. That is a known danger of C typecasts,
there's absolutely zero checking done on them.

Help that problem a bit by using a GCC extension to implement a
type filter that restricts the types that a trace record can be
cast into, and by adding a dynamic check (in debug mode) to verify
the type of the entry.

This patch adds a macro to assign all entries of ftrace using the type
of the variable and checking the entry id. The typecasts are now done
in the macro for only those types that it knows about, which should
be all the types that are allowed to be read from the tracer.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c           | 85 ++++++++++++++++++++++++----------
 kernel/trace/trace.h           | 42 +++++++++++++++++
 kernel/trace/trace_mmiotrace.c | 14 ++++--
 3 files changed, 112 insertions(+), 29 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c1634068adfa..948f7d821c62 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1350,7 +1350,9 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 	}
 	switch (entry->type) {
 	case TRACE_FN: {
-		struct ftrace_entry *field = (struct ftrace_entry *)entry;
+		struct ftrace_entry *field;
+
+		trace_assign_type(field, entry);
 
 		seq_print_ip_sym(s, field->ip, sym_flags);
 		trace_seq_puts(s, " (");
@@ -1363,8 +1365,9 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 	}
 	case TRACE_CTX:
 	case TRACE_WAKE: {
-		struct ctx_switch_entry *field =
-			(struct ctx_switch_entry *)entry;
+		struct ctx_switch_entry *field;
+
+		trace_assign_type(field, entry);
 
 		T = field->next_state < sizeof(state_to_char) ?
 			state_to_char[field->next_state] : 'X';
@@ -1384,7 +1387,9 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 		break;
 	}
 	case TRACE_SPECIAL: {
-		struct special_entry *field = (struct special_entry *)entry;
+		struct special_entry *field;
+
+		trace_assign_type(field, entry);
 
 		trace_seq_printf(s, "# %ld %ld %ld\n",
 				 field->arg1,
@@ -1393,7 +1398,9 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 		break;
 	}
 	case TRACE_STACK: {
-		struct stack_entry *field = (struct stack_entry *)entry;
+		struct stack_entry *field;
+
+		trace_assign_type(field, entry);
 
 		for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
 			if (i)
@@ -1404,7 +1411,9 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 		break;
 	}
 	case TRACE_PRINT: {
-		struct print_entry *field = (struct print_entry *)entry;
+		struct print_entry *field;
+
+		trace_assign_type(field, entry);
 
 		seq_print_ip_sym(s, field->ip, sym_flags);
 		trace_seq_printf(s, ": %s", field->buf);
@@ -1454,7 +1463,9 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
 
 	switch (entry->type) {
 	case TRACE_FN: {
-		struct ftrace_entry *field = (struct ftrace_entry *)entry;
+		struct ftrace_entry *field;
+
+		trace_assign_type(field, entry);
 
 		ret = seq_print_ip_sym(s, field->ip, sym_flags);
 		if (!ret)
@@ -1480,8 +1491,9 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
 	}
 	case TRACE_CTX:
 	case TRACE_WAKE: {
-		struct ctx_switch_entry *field =
-			(struct ctx_switch_entry *)entry;
+		struct ctx_switch_entry *field;
+
+		trace_assign_type(field, entry);
 
 		S = field->prev_state < sizeof(state_to_char) ?
 			state_to_char[field->prev_state] : 'X';
@@ -1501,7 +1513,9 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
 		break;
 	}
 	case TRACE_SPECIAL: {
-		struct special_entry *field = (struct special_entry *)entry;
+		struct special_entry *field;
+
+		trace_assign_type(field, entry);
 
 		ret = trace_seq_printf(s, "# %ld %ld %ld\n",
 				 field->arg1,
@@ -1512,7 +1526,9 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
 		break;
 	}
 	case TRACE_STACK: {
-		struct stack_entry *field = (struct stack_entry *)entry;
+		struct stack_entry *field;
+
+		trace_assign_type(field, entry);
 
 		for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
 			if (i) {
@@ -1531,7 +1547,9 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
 		break;
 	}
 	case TRACE_PRINT: {
-		struct print_entry *field = (struct print_entry *)entry;
+		struct print_entry *field;
+
+		trace_assign_type(field, entry);
 
 		seq_print_ip_sym(s, field->ip, sym_flags);
 		trace_seq_printf(s, ": %s", field->buf);
@@ -1562,7 +1580,9 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
 
 	switch (entry->type) {
 	case TRACE_FN: {
-		struct ftrace_entry *field = (struct ftrace_entry *)entry;
+		struct ftrace_entry *field;
+
+		trace_assign_type(field, entry);
 
 		ret = trace_seq_printf(s, "%x %x\n",
 					field->ip,
@@ -1573,8 +1593,9 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
 	}
 	case TRACE_CTX:
 	case TRACE_WAKE: {
-		struct ctx_switch_entry *field =
-			(struct ctx_switch_entry *)entry;
+		struct ctx_switch_entry *field;
+
+		trace_assign_type(field, entry);
 
 		S = field->prev_state < sizeof(state_to_char) ?
 			state_to_char[field->prev_state] : 'X';
@@ -1596,7 +1617,9 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
 	}
 	case TRACE_SPECIAL:
 	case TRACE_STACK: {
-		struct special_entry *field = (struct special_entry *)entry;
+		struct special_entry *field;
+
+		trace_assign_type(field, entry);
 
 		ret = trace_seq_printf(s, "# %ld %ld %ld\n",
 				 field->arg1,
@@ -1607,7 +1630,9 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
 		break;
 	}
 	case TRACE_PRINT: {
-		struct print_entry *field = (struct print_entry *)entry;
+		struct print_entry *field;
+
+		trace_assign_type(field, entry);
 
 		trace_seq_printf(s, "# %lx %s", field->ip, field->buf);
 		if (entry->flags & TRACE_FLAG_CONT)
@@ -1648,7 +1673,9 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
 
 	switch (entry->type) {
 	case TRACE_FN: {
-		struct ftrace_entry *field = (struct ftrace_entry *)entry;
+		struct ftrace_entry *field;
+
+		trace_assign_type(field, entry);
 
 		SEQ_PUT_HEX_FIELD_RET(s, field->ip);
 		SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip);
@@ -1656,8 +1683,9 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
 	}
 	case TRACE_CTX:
 	case TRACE_WAKE: {
-		struct ctx_switch_entry *field =
-			(struct ctx_switch_entry *)entry;
+		struct ctx_switch_entry *field;
+
+		trace_assign_type(field, entry);
 
 		S = field->prev_state < sizeof(state_to_char) ?
 			state_to_char[field->prev_state] : 'X';
@@ -1676,7 +1704,9 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
 	}
 	case TRACE_SPECIAL:
 	case TRACE_STACK: {
-		struct special_entry *field = (struct special_entry *)entry;
+		struct special_entry *field;
+
+		trace_assign_type(field, entry);
 
 		SEQ_PUT_HEX_FIELD_RET(s, field->arg1);
 		SEQ_PUT_HEX_FIELD_RET(s, field->arg2);
@@ -1705,15 +1735,18 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
 
 	switch (entry->type) {
 	case TRACE_FN: {
-		struct ftrace_entry *field = (struct ftrace_entry *)entry;
+		struct ftrace_entry *field;
+
+		trace_assign_type(field, entry);
 
 		SEQ_PUT_FIELD_RET(s, field->ip);
 		SEQ_PUT_FIELD_RET(s, field->parent_ip);
 		break;
 	}
 	case TRACE_CTX: {
-		struct ctx_switch_entry *field =
-			(struct ctx_switch_entry *)entry;
+		struct ctx_switch_entry *field;
+
+		trace_assign_type(field, entry);
 
 		SEQ_PUT_FIELD_RET(s, field->prev_pid);
 		SEQ_PUT_FIELD_RET(s, field->prev_prio);
@@ -1725,7 +1758,9 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
 	}
 	case TRACE_SPECIAL:
 	case TRACE_STACK: {
-		struct special_entry *field = (struct special_entry *)entry;
+		struct special_entry *field;
+
+		trace_assign_type(field, entry);
 
 		SEQ_PUT_FIELD_RET(s, field->arg1);
 		SEQ_PUT_FIELD_RET(s, field->arg2);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index a921ba5d292d..f02042d0d828 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -177,6 +177,48 @@ struct trace_array {
 	struct trace_array_cpu	*data[NR_CPUS];
 };
 
+#define FTRACE_CMP_TYPE(var, type) \
+	__builtin_types_compatible_p(typeof(var), type *)
+
+#undef IF_ASSIGN
+#define IF_ASSIGN(var, entry, etype, id)		\
+	if (FTRACE_CMP_TYPE(var, etype)) {		\
+		var = (typeof(var))(entry);		\
+		WARN_ON(id && (entry)->type != id);	\
+		break;					\
+	}
+
+/* Will cause compile errors if type is not found. */
+extern void __ftrace_bad_type(void);
+
+/*
+ * The trace_assign_type is a verifier that the entry type is
+ * the same as the type being assigned. To add new types simply
+ * add a line with the following format:
+ *
+ * IF_ASSIGN(var, ent, type, id);
+ *
+ *  Where "type" is the trace type that includes the trace_entry
+ *  as the "ent" item. And "id" is the trace identifier that is
+ *  used in the trace_type enum.
+ *
+ *  If the type can have more than one id, then use zero.
+ */
+#define trace_assign_type(var, ent)					\
+	do {								\
+		IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN);	\
+		IF_ASSIGN(var, ent, struct ctx_switch_entry, 0);	\
+		IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \
+		IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK);	\
+		IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT);	\
+		IF_ASSIGN(var, ent, struct special_entry, 0);		\
+		IF_ASSIGN(var, ent, struct trace_mmiotrace_rw,		\
+			  TRACE_MMIO_RW);				\
+		IF_ASSIGN(var, ent, struct trace_mmiotrace_map,		\
+			  TRACE_MMIO_MAP);				\
+		IF_ASSIGN(var, ent, struct trace_boot, TRACE_BOOT);	\
+		__ftrace_bad_type();					\
+	} while (0)
 
 /* Return values for print_line callback */
 enum print_line_t {
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 1a266aa08e1a..0e819f47bb7a 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -178,15 +178,17 @@ print_out:
 static enum print_line_t mmio_print_rw(struct trace_iterator *iter)
 {
 	struct trace_entry *entry = iter->ent;
-	struct trace_mmiotrace_rw *field =
-		(struct trace_mmiotrace_rw *)entry;
-	struct mmiotrace_rw *rw	= &field->rw;
+	struct trace_mmiotrace_rw *field;
+	struct mmiotrace_rw *rw;
 	struct trace_seq *s	= &iter->seq;
 	unsigned long long t	= ns2usecs(iter->ts);
 	unsigned long usec_rem	= do_div(t, 1000000ULL);
 	unsigned secs		= (unsigned long)t;
 	int ret = 1;
 
+	trace_assign_type(field, entry);
+	rw = &field->rw;
+
 	switch (rw->opcode) {
 	case MMIO_READ:
 		ret = trace_seq_printf(s,
@@ -222,13 +224,17 @@ static enum print_line_t mmio_print_rw(struct trace_iterator *iter)
 static enum print_line_t mmio_print_map(struct trace_iterator *iter)
 {
 	struct trace_entry *entry = iter->ent;
-	struct mmiotrace_map *m	= (struct mmiotrace_map *)entry;
+	struct trace_mmiotrace_map *field;
+	struct mmiotrace_map *m;
 	struct trace_seq *s	= &iter->seq;
 	unsigned long long t	= ns2usecs(iter->ts);
 	unsigned long usec_rem	= do_div(t, 1000000ULL);
 	unsigned secs		= (unsigned long)t;
 	int ret;
 
+	trace_assign_type(field, entry);
+	m = &field->map;
+
 	switch (m->opcode) {
 	case MMIO_PROBE:
 		ret = trace_seq_printf(s,
-- 
GitLab


From e4c2ce82ca2710e17cb4df8eb2b249fa2eb5af30 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Wed, 1 Oct 2008 11:14:54 -0400
Subject: [PATCH 322/895] ring_buffer: allocate buffer page pointer

The current method of overlaying the page frame as the buffer page pointer
can be very dangerous and limits our ability to do other things with
a page from the buffer, like send it off to disk.

This patch allocates the buffer_page instead of overlaying the page's
page frame. The use of the buffer_page has hardly changed due to this.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ring_buffer.c | 54 ++++++++++++++++++++++----------------
 1 file changed, 32 insertions(+), 22 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 9631abf2ae29..98145718988d 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -115,16 +115,10 @@ void *ring_buffer_event_data(struct ring_buffer_event *event)
  * Thanks to Peter Zijlstra for suggesting this idea.
  */
 struct buffer_page {
-	union {
-		struct {
-			unsigned long	 flags;		/* mandatory */
-			atomic_t	 _count;	/* mandatory */
-			u64		 time_stamp;	/* page time stamp */
-			unsigned	 size;		/* size of page data */
-			struct list_head list;		/* list of free pages */
-		};
-		struct page page;
-	};
+	u64		 time_stamp;	/* page time stamp */
+	unsigned	 size;		/* size of page data */
+	struct list_head list;		/* list of free pages */
+	void *page;			/* Actual data page */
 };
 
 /*
@@ -133,9 +127,9 @@ struct buffer_page {
  */
 static inline void free_buffer_page(struct buffer_page *bpage)
 {
-	reset_page_mapcount(&bpage->page);
-	bpage->page.mapping = NULL;
-	__free_page(&bpage->page);
+	if (bpage->page)
+		__free_page(bpage->page);
+	kfree(bpage);
 }
 
 /*
@@ -237,11 +231,16 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
 	unsigned i;
 
 	for (i = 0; i < nr_pages; i++) {
+		page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
+				    GFP_KERNEL, cpu_to_node(cpu));
+		if (!page)
+			goto free_pages;
+		list_add(&page->list, &pages);
+
 		addr = __get_free_page(GFP_KERNEL);
 		if (!addr)
 			goto free_pages;
-		page = (struct buffer_page *)virt_to_page(addr);
-		list_add(&page->list, &pages);
+		page->page = (void *)addr;
 	}
 
 	list_splice(&pages, head);
@@ -262,6 +261,7 @@ static struct ring_buffer_per_cpu *
 rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
+	struct buffer_page *page;
 	unsigned long addr;
 	int ret;
 
@@ -275,10 +275,17 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
 	spin_lock_init(&cpu_buffer->lock);
 	INIT_LIST_HEAD(&cpu_buffer->pages);
 
+	page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
+			    GFP_KERNEL, cpu_to_node(cpu));
+	if (!page)
+		goto fail_free_buffer;
+
+	cpu_buffer->reader_page = page;
 	addr = __get_free_page(GFP_KERNEL);
 	if (!addr)
-		goto fail_free_buffer;
-	cpu_buffer->reader_page = (struct buffer_page *)virt_to_page(addr);
+		goto fail_free_reader;
+	page->page = (void *)addr;
+
 	INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
 	cpu_buffer->reader_page->size = 0;
 
@@ -523,11 +530,16 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
 
 	for_each_buffer_cpu(buffer, cpu) {
 		for (i = 0; i < new_pages; i++) {
+			page = kzalloc_node(ALIGN(sizeof(*page),
+						  cache_line_size()),
+					    GFP_KERNEL, cpu_to_node(cpu));
+			if (!page)
+				goto free_pages;
+			list_add(&page->list, &pages);
 			addr = __get_free_page(GFP_KERNEL);
 			if (!addr)
 				goto free_pages;
-			page = (struct buffer_page *)virt_to_page(addr);
-			list_add(&page->list, &pages);
+			page->page = (void *)addr;
 		}
 	}
 
@@ -567,9 +579,7 @@ static inline int rb_null_event(struct ring_buffer_event *event)
 
 static inline void *rb_page_index(struct buffer_page *page, unsigned index)
 {
-	void *addr = page_address(&page->page);
-
-	return addr + index;
+	return page->page + index;
 }
 
 static inline struct ring_buffer_event *
-- 
GitLab


From 38697053fa006411224a1790e2adb8216440ab0f Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Wed, 1 Oct 2008 13:14:09 -0400
Subject: [PATCH 323/895] ftrace: preempt disable over interrupt disable

With the new ring buffer infrastructure in ftrace, I'm trying to make
ftrace a little more light weight.

This patch converts a lot of the local_irq_save/restore into
preempt_disable/enable.  The original preempt count in a lot of cases
has to be sent in as a parameter so that it can be recorded correctly.
Some places were recording it incorrectly before anyway.

This is also laying the ground work to make ftrace a little bit
more reentrant, and remove all locking. The function tracers must
still protect from reentrancy.

Note: All the function tracers must be careful when using preempt_disable.
  It must do the following:

  resched = need_resched();
  preempt_disable_notrace();
  [...]
  if (resched)
	preempt_enable_no_resched_notrace();
  else
	preempt_enable_notrace();

The reason is that if this function traces schedule() itself, the
preempt_enable_notrace() will cause a schedule, which will lead
us into a recursive failure.

If we needed to reschedule before calling preempt_disable, we
should have already scheduled. Since we did not, this is most
likely that we should not and are probably inside a schedule
function.

If resched was not set, we still need to catch the need resched
flag being set when preemption was off and the if case at the
end will catch that for us.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c              | 123 +++++++++++++++---------------
 kernel/trace/trace.h              |  13 ++--
 kernel/trace/trace_boot.c         |   2 +-
 kernel/trace/trace_irqsoff.c      |  13 ++--
 kernel/trace/trace_mmiotrace.c    |   4 +-
 kernel/trace/trace_sched_switch.c |   9 ++-
 kernel/trace/trace_sched_wakeup.c |  13 +++-
 7 files changed, 97 insertions(+), 80 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 948f7d821c62..1cd2e8143bb4 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -652,12 +652,10 @@ void tracing_record_cmdline(struct task_struct *tsk)
 }
 
 void
-tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
+tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
+			     int pc)
 {
 	struct task_struct *tsk = current;
-	unsigned long pc;
-
-	pc = preempt_count();
 
 	entry->preempt_count		= pc & 0xff;
 	entry->pid			= (tsk) ? tsk->pid : 0;
@@ -670,7 +668,8 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
 
 void
 trace_function(struct trace_array *tr, struct trace_array_cpu *data,
-	       unsigned long ip, unsigned long parent_ip, unsigned long flags)
+	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
+	       int pc)
 {
 	struct ring_buffer_event *event;
 	struct ftrace_entry *entry;
@@ -685,7 +684,7 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data,
 	if (!event)
 		return;
 	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(&entry->ent, flags);
+	tracing_generic_entry_update(&entry->ent, flags, pc);
 	entry->ent.type			= TRACE_FN;
 	entry->ip			= ip;
 	entry->parent_ip		= parent_ip;
@@ -694,16 +693,17 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data,
 
 void
 ftrace(struct trace_array *tr, struct trace_array_cpu *data,
-       unsigned long ip, unsigned long parent_ip, unsigned long flags)
+       unsigned long ip, unsigned long parent_ip, unsigned long flags,
+       int pc)
 {
 	if (likely(!atomic_read(&data->disabled)))
-		trace_function(tr, data, ip, parent_ip, flags);
+		trace_function(tr, data, ip, parent_ip, flags, pc);
 }
 
-void __trace_stack(struct trace_array *tr,
-		   struct trace_array_cpu *data,
-		   unsigned long flags,
-		   int skip)
+static void ftrace_trace_stack(struct trace_array *tr,
+			       struct trace_array_cpu *data,
+			       unsigned long flags,
+			       int skip, int pc)
 {
 	struct ring_buffer_event *event;
 	struct stack_entry *entry;
@@ -718,7 +718,7 @@ void __trace_stack(struct trace_array *tr,
 	if (!event)
 		return;
 	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(&entry->ent, flags);
+	tracing_generic_entry_update(&entry->ent, flags, pc);
 	entry->ent.type		= TRACE_STACK;
 
 	memset(&entry->caller, 0, sizeof(entry->caller));
@@ -732,9 +732,18 @@ void __trace_stack(struct trace_array *tr,
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 }
 
-void
-__trace_special(void *__tr, void *__data,
-		unsigned long arg1, unsigned long arg2, unsigned long arg3)
+void __trace_stack(struct trace_array *tr,
+		   struct trace_array_cpu *data,
+		   unsigned long flags,
+		   int skip)
+{
+	ftrace_trace_stack(tr, data, flags, skip, preempt_count());
+}
+
+static void
+ftrace_trace_special(void *__tr, void *__data,
+		     unsigned long arg1, unsigned long arg2, unsigned long arg3,
+		     int pc)
 {
 	struct ring_buffer_event *event;
 	struct trace_array_cpu *data = __data;
@@ -747,23 +756,30 @@ __trace_special(void *__tr, void *__data,
 	if (!event)
 		return;
 	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(&entry->ent, 0);
+	tracing_generic_entry_update(&entry->ent, 0, pc);
 	entry->ent.type			= TRACE_SPECIAL;
 	entry->arg1			= arg1;
 	entry->arg2			= arg2;
 	entry->arg3			= arg3;
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
-	__trace_stack(tr, data, irq_flags, 4);
+	ftrace_trace_stack(tr, data, irq_flags, 4, pc);
 
 	trace_wake_up();
 }
 
+void
+__trace_special(void *__tr, void *__data,
+		unsigned long arg1, unsigned long arg2, unsigned long arg3)
+{
+	ftrace_trace_special(__tr, __data, arg1, arg2, arg3, preempt_count());
+}
+
 void
 tracing_sched_switch_trace(struct trace_array *tr,
 			   struct trace_array_cpu *data,
 			   struct task_struct *prev,
 			   struct task_struct *next,
-			   unsigned long flags)
+			   unsigned long flags, int pc)
 {
 	struct ring_buffer_event *event;
 	struct ctx_switch_entry *entry;
@@ -774,7 +790,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
 	if (!event)
 		return;
 	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(&entry->ent, flags);
+	tracing_generic_entry_update(&entry->ent, flags, pc);
 	entry->ent.type			= TRACE_CTX;
 	entry->prev_pid			= prev->pid;
 	entry->prev_prio		= prev->prio;
@@ -784,7 +800,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
 	entry->next_state		= next->state;
 	entry->next_cpu	= task_cpu(next);
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
-	__trace_stack(tr, data, flags, 5);
+	ftrace_trace_stack(tr, data, flags, 5, pc);
 }
 
 void
@@ -792,7 +808,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
 			   struct trace_array_cpu *data,
 			   struct task_struct *wakee,
 			   struct task_struct *curr,
-			   unsigned long flags)
+			   unsigned long flags, int pc)
 {
 	struct ring_buffer_event *event;
 	struct ctx_switch_entry *entry;
@@ -803,7 +819,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
 	if (!event)
 		return;
 	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(&entry->ent, flags);
+	tracing_generic_entry_update(&entry->ent, flags, pc);
 	entry->ent.type			= TRACE_WAKE;
 	entry->prev_pid			= curr->pid;
 	entry->prev_prio		= curr->prio;
@@ -813,7 +829,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
 	entry->next_state		= wakee->state;
 	entry->next_cpu			= task_cpu(wakee);
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
-	__trace_stack(tr, data, flags, 6);
+	ftrace_trace_stack(tr, data, flags, 6, pc);
 
 	trace_wake_up();
 }
@@ -823,23 +839,24 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
 {
 	struct trace_array *tr = &global_trace;
 	struct trace_array_cpu *data;
-	unsigned long flags;
 	long disabled;
 	int cpu;
+	int pc;
 
 	if (tracing_disabled || !tr->ctrl)
 		return;
 
-	local_irq_save(flags);
+	pc = preempt_count();
+	preempt_disable_notrace();
 	cpu = raw_smp_processor_id();
 	data = tr->data[cpu];
 	disabled = atomic_inc_return(&data->disabled);
 
 	if (likely(disabled == 1))
-		__trace_special(tr, data, arg1, arg2, arg3);
+		ftrace_trace_special(tr, data, arg1, arg2, arg3, pc);
 
 	atomic_dec(&data->disabled);
-	local_irq_restore(flags);
+	preempt_enable_notrace();
 }
 
 #ifdef CONFIG_FTRACE
@@ -850,7 +867,8 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
 	struct trace_array_cpu *data;
 	unsigned long flags;
 	long disabled;
-	int cpu;
+	int cpu, resched;
+	int pc;
 
 	if (unlikely(!ftrace_function_enabled))
 		return;
@@ -858,16 +876,22 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
 	if (skip_trace(ip))
 		return;
 
-	local_irq_save(flags);
+	pc = preempt_count();
+	resched = need_resched();
+	preempt_disable_notrace();
+	local_save_flags(flags);
 	cpu = raw_smp_processor_id();
 	data = tr->data[cpu];
 	disabled = atomic_inc_return(&data->disabled);
 
 	if (likely(disabled == 1))
-		trace_function(tr, data, ip, parent_ip, flags);
+		trace_function(tr, data, ip, parent_ip, flags, pc);
 
 	atomic_dec(&data->disabled);
-	local_irq_restore(flags);
+	if (resched)
+		preempt_enable_no_resched_notrace();
+	else
+		preempt_enable_notrace();
 }
 
 static struct ftrace_ops trace_ops __read_mostly =
@@ -2508,9 +2532,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
 		  size_t cnt, loff_t *ppos)
 {
 	struct trace_iterator *iter = filp->private_data;
-#ifdef CONFIG_FTRACE
-	int ftrace_save;
-#endif
 	ssize_t sret;
 
 	/* return any leftover data */
@@ -2593,20 +2614,6 @@ waitagain:
 	       offsetof(struct trace_iterator, seq));
 	iter->pos = -1;
 
-	/*
-	 * We need to stop all tracing on all CPUS to read the
-	 * the next buffer. This is a bit expensive, but is
-	 * not done often. We fill all what we can read,
-	 * and then release the locks again.
-	 */
-
-	local_irq_disable();
-#ifdef CONFIG_FTRACE
-	ftrace_save = ftrace_enabled;
-	ftrace_enabled = 0;
-#endif
-	smp_wmb();
-
 	while (find_next_entry_inc(iter) != NULL) {
 		enum print_line_t ret;
 		int len = iter->seq.len;
@@ -2624,11 +2631,6 @@ waitagain:
 			break;
 	}
 
-#ifdef CONFIG_FTRACE
-	ftrace_enabled = ftrace_save;
-#endif
-	local_irq_enable();
-
 	/* Now copy what we have to the user */
 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
 	if (iter->seq.readpos >= iter->seq.len)
@@ -2960,12 +2962,13 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 	struct print_entry *entry;
 	unsigned long flags, irq_flags;
 	long disabled;
-	int cpu, len = 0, size;
+	int cpu, len = 0, size, pc;
 
 	if (!tr->ctrl || tracing_disabled)
 		return 0;
 
-	local_irq_save(flags);
+	pc = preempt_count();
+	preempt_disable_notrace();
 	cpu = raw_smp_processor_id();
 	data = tr->data[cpu];
 	disabled = atomic_inc_return(&data->disabled);
@@ -2973,7 +2976,7 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 	if (unlikely(disabled != 1))
 		goto out;
 
-	spin_lock(&trace_buf_lock);
+	spin_lock_irqsave(&trace_buf_lock, flags);
 	len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
 
 	len = min(len, TRACE_BUF_SIZE-1);
@@ -2984,7 +2987,7 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 	if (!event)
 		goto out_unlock;
 	entry = ring_buffer_event_data(event);
-	tracing_generic_entry_update(&entry->ent, flags);
+	tracing_generic_entry_update(&entry->ent, flags, pc);
 	entry->ent.type			= TRACE_PRINT;
 	entry->ip			= ip;
 
@@ -2993,11 +2996,11 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 
  out_unlock:
-	spin_unlock(&trace_buf_lock);
+	spin_unlock_irqrestore(&trace_buf_lock, flags);
 
  out:
 	atomic_dec(&data->disabled);
-	local_irq_restore(flags);
+	preempt_enable_notrace();
 
 	return len;
 }
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f02042d0d828..f1f99572cde7 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -288,35 +288,36 @@ void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
 struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
 						struct trace_array_cpu *data);
 void tracing_generic_entry_update(struct trace_entry *entry,
-						unsigned long flags);
+				  unsigned long flags,
+				  int pc);
 
 void ftrace(struct trace_array *tr,
 			    struct trace_array_cpu *data,
 			    unsigned long ip,
 			    unsigned long parent_ip,
-			    unsigned long flags);
+			    unsigned long flags, int pc);
 void tracing_sched_switch_trace(struct trace_array *tr,
 				struct trace_array_cpu *data,
 				struct task_struct *prev,
 				struct task_struct *next,
-				unsigned long flags);
+				unsigned long flags, int pc);
 void tracing_record_cmdline(struct task_struct *tsk);
 
 void tracing_sched_wakeup_trace(struct trace_array *tr,
 				struct trace_array_cpu *data,
 				struct task_struct *wakee,
 				struct task_struct *cur,
-				unsigned long flags);
+				unsigned long flags, int pc);
 void trace_special(struct trace_array *tr,
 		   struct trace_array_cpu *data,
 		   unsigned long arg1,
 		   unsigned long arg2,
-		   unsigned long arg3);
+		   unsigned long arg3, int pc);
 void trace_function(struct trace_array *tr,
 		    struct trace_array_cpu *data,
 		    unsigned long ip,
 		    unsigned long parent_ip,
-		    unsigned long flags);
+		    unsigned long flags, int pc);
 
 void tracing_start_cmdline_record(void);
 void tracing_stop_cmdline_record(void);
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index 43bde20b95bd..f2dac6f1cf06 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -95,7 +95,7 @@ void trace_boot(struct boot_trace *it)
 	if (!event)
 		goto out;
 	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(&entry->ent, 0);
+	tracing_generic_entry_update(&entry->ent, 0, 0);
 	entry->ent.type = TRACE_BOOT;
 	entry->initcall = *it;
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 37ad49407f27..f925dbbff2a6 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -95,7 +95,7 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
 	disabled = atomic_inc_return(&data->disabled);
 
 	if (likely(disabled == 1))
-		trace_function(tr, data, ip, parent_ip, flags);
+		trace_function(tr, data, ip, parent_ip, flags, preempt_count());
 
 	atomic_dec(&data->disabled);
 }
@@ -130,6 +130,7 @@ check_critical_timing(struct trace_array *tr,
 	unsigned long latency, t0, t1;
 	cycle_t T0, T1, delta;
 	unsigned long flags;
+	int pc;
 
 	/*
 	 * usecs conversion is slow so we try to delay the conversion
@@ -144,13 +145,15 @@ check_critical_timing(struct trace_array *tr,
 	if (!report_latency(delta))
 		goto out;
 
+	pc = preempt_count();
+
 	spin_lock_irqsave(&max_trace_lock, flags);
 
 	/* check if we are still the max latency */
 	if (!report_latency(delta))
 		goto out_unlock;
 
-	trace_function(tr, data, CALLER_ADDR0, parent_ip, flags);
+	trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc);
 
 	latency = nsecs_to_usecs(delta);
 
@@ -174,7 +177,7 @@ out:
 	data->critical_sequence = max_sequence;
 	data->preempt_timestamp = ftrace_now(cpu);
 	tracing_reset(tr, cpu);
-	trace_function(tr, data, CALLER_ADDR0, parent_ip, flags);
+	trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc);
 }
 
 static inline void
@@ -207,7 +210,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
 
 	local_save_flags(flags);
 
-	trace_function(tr, data, ip, parent_ip, flags);
+	trace_function(tr, data, ip, parent_ip, flags, preempt_count());
 
 	per_cpu(tracing_cpu, cpu) = 1;
 
@@ -241,7 +244,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
 	atomic_inc(&data->disabled);
 
 	local_save_flags(flags);
-	trace_function(tr, data, ip, parent_ip, flags);
+	trace_function(tr, data, ip, parent_ip, flags, preempt_count());
 	check_critical_timing(tr, data, parent_ip ? : ip, cpu);
 	data->critical_start = 0;
 	atomic_dec(&data->disabled);
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 0e819f47bb7a..f28484618ff0 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -324,7 +324,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
 	if (!event)
 		return;
 	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(&entry->ent, 0);
+	tracing_generic_entry_update(&entry->ent, 0, preempt_count());
 	entry->ent.type			= TRACE_MMIO_RW;
 	entry->rw			= *rw;
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
@@ -352,7 +352,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
 	if (!event)
 		return;
 	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(&entry->ent, 0);
+	tracing_generic_entry_update(&entry->ent, 0, preempt_count());
 	entry->ent.type			= TRACE_MMIO_MAP;
 	entry->map			= *map;
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index e0b06db0f7af..c7fa08a5b7f4 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -26,6 +26,7 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
 	unsigned long flags;
 	long disabled;
 	int cpu;
+	int pc;
 
 	if (!atomic_read(&sched_ref))
 		return;
@@ -36,13 +37,14 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
 	if (!tracer_enabled)
 		return;
 
+	pc = preempt_count();
 	local_irq_save(flags);
 	cpu = raw_smp_processor_id();
 	data = ctx_trace->data[cpu];
 	disabled = atomic_inc_return(&data->disabled);
 
 	if (likely(disabled == 1))
-		tracing_sched_switch_trace(ctx_trace, data, prev, next, flags);
+		tracing_sched_switch_trace(ctx_trace, data, prev, next, flags, pc);
 
 	atomic_dec(&data->disabled);
 	local_irq_restore(flags);
@@ -54,11 +56,12 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee)
 	struct trace_array_cpu *data;
 	unsigned long flags;
 	long disabled;
-	int cpu;
+	int cpu, pc;
 
 	if (!likely(tracer_enabled))
 		return;
 
+	pc = preempt_count();
 	tracing_record_cmdline(current);
 
 	local_irq_save(flags);
@@ -68,7 +71,7 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee)
 
 	if (likely(disabled == 1))
 		tracing_sched_wakeup_trace(ctx_trace, data, wakee, current,
-			flags);
+					   flags, pc);
 
 	atomic_dec(&data->disabled);
 	local_irq_restore(flags);
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 01e75e0639b7..fe4a252c2363 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -44,10 +44,12 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
 	long disabled;
 	int resched;
 	int cpu;
+	int pc;
 
 	if (likely(!wakeup_task))
 		return;
 
+	pc = preempt_count();
 	resched = need_resched();
 	preempt_disable_notrace();
 
@@ -70,7 +72,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
 	if (task_cpu(wakeup_task) != cpu)
 		goto unlock;
 
-	trace_function(tr, data, ip, parent_ip, flags);
+	trace_function(tr, data, ip, parent_ip, flags, pc);
 
  unlock:
 	__raw_spin_unlock(&wakeup_lock);
@@ -121,6 +123,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
 	unsigned long flags;
 	long disabled;
 	int cpu;
+	int pc;
 
 	tracing_record_cmdline(prev);
 
@@ -139,6 +142,8 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
 	if (next != wakeup_task)
 		return;
 
+	pc = preempt_count();
+
 	/* The task we are waiting for is waking up */
 	data = wakeup_trace->data[wakeup_cpu];
 
@@ -155,7 +160,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
 	if (unlikely(!tracer_enabled || next != wakeup_task))
 		goto out_unlock;
 
-	trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags);
+	trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
 
 	/*
 	 * usecs conversion is slow so we try to delay the conversion
@@ -220,6 +225,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p)
 	int cpu = smp_processor_id();
 	unsigned long flags;
 	long disabled;
+	int pc;
 
 	if (likely(!tracer_enabled))
 		return;
@@ -232,6 +238,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p)
 			p->prio >= current->prio)
 		return;
 
+	pc = preempt_count();
 	disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
 	if (unlikely(disabled != 1))
 		goto out;
@@ -256,7 +263,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p)
 
 	wakeup_trace->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
 	trace_function(wakeup_trace, wakeup_trace->data[wakeup_cpu],
-		       CALLER_ADDR1, CALLER_ADDR2, flags);
+		       CALLER_ADDR1, CALLER_ADDR2, flags, pc);
 
 out_locked:
 	__raw_spin_unlock(&wakeup_lock);
-- 
GitLab


From 77ae11f63befb7fc41ec256f1fcb72ca7e4160d5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 2 Oct 2008 11:04:14 +0200
Subject: [PATCH 324/895] ring-buffer: fix build error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

fix:

 kernel/trace/ring_buffer.c: In function â€˜rb_allocate_pagesâ€™:
 kernel/trace/ring_buffer.c:235: error: â€˜cpuâ€™ undeclared (first use in this function)
 kernel/trace/ring_buffer.c:235: error: (Each undeclared identifier is reported only once
 kernel/trace/ring_buffer.c:235: error: for each function it appears in.)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ring_buffer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 98145718988d..54a30986493a 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -232,7 +232,7 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
 
 	for (i = 0; i < nr_pages; i++) {
 		page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
-				    GFP_KERNEL, cpu_to_node(cpu));
+				    GFP_KERNEL, cpu_to_node(i));
 		if (!page)
 			goto free_pages;
 		list_add(&page->list, &pages);
-- 
GitLab


From cb5ab74204a6e2579d1119bf1348eb806526b12b Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 2 Oct 2008 12:59:20 +0200
Subject: [PATCH 325/895] tracing/fastboot: change the printing of boot tracer
 according to bootgraph.pl

Change the boot tracer printing to make it parsable for
the scripts/bootgraph.pl script.

We have now to output two lines for each initcall, according to the
printk in do_one_initcall() in init/main.c
We need now the call's time and the return's time.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h    |  2 ++
 init/main.c               | 20 +++++++++-----------
 kernel/trace/trace_boot.c | 22 +++++++++++++++-------
 3 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 91954eb6460f..4455490d91bd 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -216,6 +216,8 @@ struct boot_trace {
 	initcall_t		func;
 	int			result;
 	unsigned long long	duration;
+	ktime_t			calltime;
+	ktime_t			rettime;
 };
 
 #ifdef CONFIG_BOOT_TRACER
diff --git a/init/main.c b/init/main.c
index 1e39a1eab190..61eb66159391 100644
--- a/init/main.c
+++ b/init/main.c
@@ -706,34 +706,32 @@ __setup("initcall_debug", initcall_debug_setup);
 int do_one_initcall(initcall_t fn)
 {
 	int count = preempt_count();
-	ktime_t t0, t1, delta;
+	ktime_t delta;
 	char msgbuf[64];
-	int result;
 	struct boot_trace it;
 
 	if (initcall_debug) {
 		it.caller = task_pid_nr(current);
 		it.func = fn;
 		printk("calling  %pF @ %i\n", fn, it.caller);
-		t0 = ktime_get();
+		it.calltime = ktime_get();
 	}
 
-	result = fn();
+	it.result = fn();
 
 	if (initcall_debug) {
-		t1 = ktime_get();
-		delta = ktime_sub(t1, t0);
-		it.result = result;
+		it.rettime = ktime_get();
+		delta = ktime_sub(it.rettime, it.calltime);
 		it.duration = (unsigned long long) delta.tv64 >> 20;
 		printk("initcall %pF returned %d after %Ld msecs\n", fn,
-			result, it.duration);
+			it.result, it.duration);
 		trace_boot(&it);
 	}
 
 	msgbuf[0] = 0;
 
-	if (result && result != -ENODEV && initcall_debug)
-		sprintf(msgbuf, "error code %d ", result);
+	if (it.result && it.result != -ENODEV && initcall_debug)
+		sprintf(msgbuf, "error code %d ", it.result);
 
 	if (preempt_count() != count) {
 		strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf));
@@ -747,7 +745,7 @@ int do_one_initcall(initcall_t fn)
 		printk("initcall %pF returned with %s\n", fn, msgbuf);
 	}
 
-	return result;
+	return it.result;
 }
 
 
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index f2dac6f1cf06..7c15f3e68ba3 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -52,16 +52,24 @@ static enum print_line_t initcall_print_line(struct trace_iterator *iter)
 	struct trace_boot *field = (struct trace_boot *)entry;
 	struct boot_trace *it = &field->initcall;
 	struct trace_seq *s = &iter->seq;
+	struct timespec calltime = ktime_to_timespec(it->calltime);
+	struct timespec rettime = ktime_to_timespec(it->rettime);
 
 	if (entry->type == TRACE_BOOT) {
-		ret = trace_seq_printf(s, "%pF called from %i "
-				       "returned %d after %lld msecs\n",
-				       it->func, it->caller, it->result,
-				       it->duration);
-		if (ret)
-			return TRACE_TYPE_HANDLED;
-		else
+		ret = trace_seq_printf(s, "[%5ld.%06ld] calling  %pF @ %i\n",
+					  calltime.tv_sec,
+					  calltime.tv_nsec,
+					  it->func, it->caller);
+		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
+		ret = trace_seq_printf(s, "[%5ld.%06ld] initcall %pF "
+					  "returned %d after %lld msecs\n",
+					  rettime.tv_sec,
+					  rettime.tv_nsec,
+					  it->func, it->result, it->duration);
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+		return TRACE_TYPE_HANDLED;
 	}
 	return TRACE_TYPE_UNHANDLED;
 }
-- 
GitLab


From 5601020feb0c3010e9e3e0131e9697ac6a06777b Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 2 Oct 2008 13:26:05 +0200
Subject: [PATCH 326/895] tracing/fastboot: get the initcall name before it
 disappears

After some initcall traces, some initcall names may be inconsistent.
That's because these functions will disappear from the .init section
and also their name from the symbols table.

So we have to copy the name of the function in a buffer large enough
during the trace appending. It is not costly for the ring_buffer because
the number of initcall entries is commonly not really large.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h    |  7 ++++---
 init/main.c               |  3 +--
 kernel/trace/trace_boot.c | 14 ++++++++++----
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 4455490d91bd..e672e51c40a9 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -7,6 +7,7 @@
 #include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/types.h>
+#include <linux/kallsyms.h>
 
 extern int ftrace_enabled;
 extern int
@@ -213,7 +214,7 @@ ftrace_init_module(unsigned long *start, unsigned long *end) { }
 
 struct boot_trace {
 	pid_t			caller;
-	initcall_t		func;
+	char 			func[KSYM_NAME_LEN];
 	int			result;
 	unsigned long long	duration;
 	ktime_t			calltime;
@@ -221,10 +222,10 @@ struct boot_trace {
 };
 
 #ifdef CONFIG_BOOT_TRACER
-extern void trace_boot(struct boot_trace *it);
+extern void trace_boot(struct boot_trace *it, initcall_t fn);
 extern void start_boot_trace(void);
 #else
-static inline void trace_boot(struct boot_trace *it) { }
+static inline void trace_boot(struct boot_trace *it, initcall_t fn) { }
 static inline void start_boot_trace(void) { }
 #endif
 
diff --git a/init/main.c b/init/main.c
index 61eb66159391..8e96a0ef17f4 100644
--- a/init/main.c
+++ b/init/main.c
@@ -712,7 +712,6 @@ int do_one_initcall(initcall_t fn)
 
 	if (initcall_debug) {
 		it.caller = task_pid_nr(current);
-		it.func = fn;
 		printk("calling  %pF @ %i\n", fn, it.caller);
 		it.calltime = ktime_get();
 	}
@@ -725,7 +724,7 @@ int do_one_initcall(initcall_t fn)
 		it.duration = (unsigned long long) delta.tv64 >> 20;
 		printk("initcall %pF returned %d after %Ld msecs\n", fn,
 			it.result, it.duration);
-		trace_boot(&it);
+		trace_boot(&it, fn);
 	}
 
 	msgbuf[0] = 0;
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index 7c15f3e68ba3..b9dc2c0093ab 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -8,6 +8,7 @@
 #include <linux/init.h>
 #include <linux/debugfs.h>
 #include <linux/ftrace.h>
+#include <linux/kallsyms.h>
 
 #include "trace.h"
 
@@ -56,17 +57,19 @@ static enum print_line_t initcall_print_line(struct trace_iterator *iter)
 	struct timespec rettime = ktime_to_timespec(it->rettime);
 
 	if (entry->type == TRACE_BOOT) {
-		ret = trace_seq_printf(s, "[%5ld.%06ld] calling  %pF @ %i\n",
+		ret = trace_seq_printf(s, "[%5ld.%06ld] calling  %s @ %i\n",
 					  calltime.tv_sec,
 					  calltime.tv_nsec,
 					  it->func, it->caller);
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
-		ret = trace_seq_printf(s, "[%5ld.%06ld] initcall %pF "
+
+		ret = trace_seq_printf(s, "[%5ld.%06ld] initcall %s "
 					  "returned %d after %lld msecs\n",
 					  rettime.tv_sec,
 					  rettime.tv_nsec,
 					  it->func, it->result, it->duration);
+
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
 		return TRACE_TYPE_HANDLED;
@@ -83,8 +86,7 @@ struct tracer boot_tracer __read_mostly =
 	.print_line	= initcall_print_line,
 };
 
-
-void trace_boot(struct boot_trace *it)
+void trace_boot(struct boot_trace *it, initcall_t fn)
 {
 	struct ring_buffer_event *event;
 	struct trace_boot *entry;
@@ -95,6 +97,10 @@ void trace_boot(struct boot_trace *it)
 	if (!trace_boot_enabled)
 		return;
 
+	/* Get its name now since this function could
+	 * disappear because it is in the .init section.
+	 */
+	sprint_symbol(it->func, (unsigned long)fn);
 	preempt_disable();
 	data = tr->data[smp_processor_id()];
 
-- 
GitLab


From 3e1932ad59726d794a865cc159c0593d54bf0cb6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 2 Oct 2008 17:45:47 +0200
Subject: [PATCH 327/895] tracing/fastboot: build fix

fix:

 In file included from kernel/sysctl.c:52:
 include/linux/ftrace.h:217: error: 'KSYM_NAME_LEN' undeclared here (not in a function)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index e672e51c40a9..deded114dffd 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1,14 +1,14 @@
 #ifndef _LINUX_FTRACE_H
 #define _LINUX_FTRACE_H
 
-#ifdef CONFIG_FTRACE
-
 #include <linux/linkage.h>
 #include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/kallsyms.h>
 
+#ifdef CONFIG_FTRACE
+
 extern int ftrace_enabled;
 extern int
 ftrace_enable_sysctl(struct ctl_table *table, int write,
-- 
GitLab


From eb7fa935274bb233686fdf7a53f40c5d9ee76ed6 Mon Sep 17 00:00:00 2001
From: Steven Noonan <steven@uplinklabs.net>
Date: Thu, 2 Oct 2008 12:00:07 -0700
Subject: [PATCH 328/895] ftrace: ktime.h not included in ftrace.h

Including <linux/ktime.h> eliminates the following error:

include/linux/ftrace.h:220: error: expected specifier-qualifier-list
before 'ktime_t'

Signed-off-by: Steven Noonan <steven@uplinklabs.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index deded114dffd..ed53265d1f63 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -3,6 +3,7 @@
 
 #include <linux/linkage.h>
 #include <linux/fs.h>
+#include <linux/ktime.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/kallsyms.h>
-- 
GitLab


From aa1e0e3bcf95ce684d005bedb16e5d4559455685 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Thu, 2 Oct 2008 19:18:09 -0400
Subject: [PATCH 329/895] ring_buffer: map to cpu not page

My original patch had a compile bug when NUMA was configured. I
referenced cpu when it should have been cpu_buffer->cpu.

Ingo quickly fixed this bug by replacing cpu with 'i' because that
was the loop counter. Unfortunately, the 'i' was the counter of
pages, not CPUs. This caused a crash when the number of pages allocated
for the buffers exceeded the number of pages, which would usually
be the case.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ring_buffer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 54a30986493a..6b8dac02364f 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -232,7 +232,7 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
 
 	for (i = 0; i < nr_pages; i++) {
 		page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
-				    GFP_KERNEL, cpu_to_node(i));
+				    GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
 		if (!page)
 			goto free_pages;
 		list_add(&page->list, &pages);
-- 
GitLab


From 6450c1d3213e27b0dcbf34cce7ad1ae74244c520 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Thu, 2 Oct 2008 19:23:04 -0400
Subject: [PATCH 330/895] ftrace: move pc counter in irqtrace

The assigning of the pc counter is in the wrong spot in the
check_critical_timing function. The pc variable is used in the
out jump.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace_irqsoff.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index f925dbbff2a6..a7db7f040ae0 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -142,11 +142,11 @@ check_critical_timing(struct trace_array *tr,
 
 	local_save_flags(flags);
 
+	pc = preempt_count();
+
 	if (!report_latency(delta))
 		goto out;
 
-	pc = preempt_count();
-
 	spin_lock_irqsave(&max_trace_lock, flags);
 
 	/* check if we are still the max latency */
-- 
GitLab


From 097d036a2f25eecc42435c57e010aaf4a2eed2d9 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 3 Oct 2008 15:39:21 +0200
Subject: [PATCH 331/895] tracing/fastboot: only trace non-module initcalls

At this time, only built-in initcalls interest us.
We can't really produce a relevant graph if we include
the modules initcall too.

I had good results after this patch (see svg in attachment).

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h    |  2 ++
 init/main.c               |  1 +
 kernel/trace/trace_boot.c | 11 ++++++++---
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index ed53265d1f63..5812dba4ee24 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -225,9 +225,11 @@ struct boot_trace {
 #ifdef CONFIG_BOOT_TRACER
 extern void trace_boot(struct boot_trace *it, initcall_t fn);
 extern void start_boot_trace(void);
+extern void stop_boot_trace(void);
 #else
 static inline void trace_boot(struct boot_trace *it, initcall_t fn) { }
 static inline void start_boot_trace(void) { }
+static inline void stop_boot_trace(void) { }
 #endif
 
 
diff --git a/init/main.c b/init/main.c
index 8e96a0ef17f4..e7939de80f3e 100644
--- a/init/main.c
+++ b/init/main.c
@@ -886,6 +886,7 @@ static int __init kernel_init(void * unused)
 	 * we're essentially up and running. Get rid of the
 	 * initmem segments and start the user-mode stuff..
 	 */
+	stop_boot_trace();
 	init_post();
 	return 0;
 }
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index b9dc2c0093ab..a7efe3559654 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -22,11 +22,16 @@ void start_boot_trace(void)
 	trace_boot_enabled = 1;
 }
 
-void stop_boot_trace(struct trace_array *tr)
+void stop_boot_trace(void)
 {
 	trace_boot_enabled = 0;
 }
 
+void reset_boot_trace(struct trace_array *tr)
+{
+	stop_boot_trace();
+}
+
 static void boot_trace_init(struct trace_array *tr)
 {
 	int cpu;
@@ -43,7 +48,7 @@ static void boot_trace_ctrl_update(struct trace_array *tr)
 	if (tr->ctrl)
 		start_boot_trace();
 	else
-		stop_boot_trace(tr);
+		stop_boot_trace();
 }
 
 static enum print_line_t initcall_print_line(struct trace_iterator *iter)
@@ -81,7 +86,7 @@ struct tracer boot_tracer __read_mostly =
 {
 	.name		= "initcall",
 	.init		= boot_trace_init,
-	.reset		= stop_boot_trace,
+	.reset		= reset_boot_trace,
 	.ctrl_update	= boot_trace_ctrl_update,
 	.print_line	= initcall_print_line,
 };
-- 
GitLab


From 6f807acd27734197b11d42829d3cbb9c0937b572 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Sat, 4 Oct 2008 02:00:58 -0400
Subject: [PATCH 332/895] ring-buffer: move page indexes into page headers

Remove the global head and tail indexes and move them into the
page header. Each page will now keep track of where the last
write and read was made. We also rename the head and tail to read
and write for better clarification.

This patch is needed for future enhancements to move the ring buffer
to a lockless solution.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ring_buffer.c | 75 +++++++++++++++++++++-----------------
 1 file changed, 41 insertions(+), 34 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 6b8dac02364f..09d4f0d879a7 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -117,6 +117,8 @@ void *ring_buffer_event_data(struct ring_buffer_event *event)
 struct buffer_page {
 	u64		 time_stamp;	/* page time stamp */
 	unsigned	 size;		/* size of page data */
+	unsigned	 write;		/* index for next write */
+	unsigned	 read;		/* index for next read */
 	struct list_head list;		/* list of free pages */
 	void *page;			/* Actual data page */
 };
@@ -153,11 +155,8 @@ struct ring_buffer_per_cpu {
 	spinlock_t			lock;
 	struct lock_class_key		lock_key;
 	struct list_head		pages;
-	unsigned long			head;	/* read from head */
-	unsigned long			tail;	/* write to tail */
-	unsigned long			reader;
-	struct buffer_page		*head_page;
-	struct buffer_page		*tail_page;
+	struct buffer_page		*head_page;	/* read from head */
+	struct buffer_page		*tail_page;	/* write to tail */
 	struct buffer_page		*reader_page;
 	unsigned long			overrun;
 	unsigned long			entries;
@@ -566,10 +565,11 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
 
 static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
 {
-	return (cpu_buffer->reader == cpu_buffer->reader_page->size &&
+	return cpu_buffer->reader_page->read == cpu_buffer->reader_page->size &&
 		(cpu_buffer->tail_page == cpu_buffer->reader_page ||
 		 (cpu_buffer->tail_page == cpu_buffer->head_page &&
-		  cpu_buffer->head == cpu_buffer->tail)));
+		  cpu_buffer->head_page->read ==
+		  cpu_buffer->tail_page->write));
 }
 
 static inline int rb_null_event(struct ring_buffer_event *event)
@@ -577,7 +577,7 @@ static inline int rb_null_event(struct ring_buffer_event *event)
 	return event->type == RINGBUF_TYPE_PADDING;
 }
 
-static inline void *rb_page_index(struct buffer_page *page, unsigned index)
+static inline void *__rb_page_index(struct buffer_page *page, unsigned index)
 {
 	return page->page + index;
 }
@@ -585,15 +585,21 @@ static inline void *rb_page_index(struct buffer_page *page, unsigned index)
 static inline struct ring_buffer_event *
 rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
 {
-	return rb_page_index(cpu_buffer->reader_page,
-			     cpu_buffer->reader);
+	return __rb_page_index(cpu_buffer->reader_page,
+			       cpu_buffer->reader_page->read);
+}
+
+static inline struct ring_buffer_event *
+rb_head_event(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	return __rb_page_index(cpu_buffer->head_page,
+			       cpu_buffer->head_page->read);
 }
 
 static inline struct ring_buffer_event *
 rb_iter_head_event(struct ring_buffer_iter *iter)
 {
-	return rb_page_index(iter->head_page,
-			     iter->head);
+	return __rb_page_index(iter->head_page, iter->head);
 }
 
 /*
@@ -610,7 +616,7 @@ static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
 	for (head = 0; head < rb_head_size(cpu_buffer);
 	     head += rb_event_length(event)) {
 
-		event = rb_page_index(cpu_buffer->head_page, head);
+		event = __rb_page_index(cpu_buffer->head_page, head);
 		BUG_ON(rb_null_event(event));
 		/* Only count data entries */
 		if (event->type != RINGBUF_TYPE_DATA)
@@ -640,13 +646,13 @@ rb_add_stamp(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts)
 
 static void rb_reset_head_page(struct ring_buffer_per_cpu *cpu_buffer)
 {
-	cpu_buffer->head = 0;
+	cpu_buffer->head_page->read = 0;
 }
 
 static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 {
 	cpu_buffer->read_stamp = cpu_buffer->reader_page->time_stamp;
-	cpu_buffer->reader = 0;
+	cpu_buffer->reader_page->read = 0;
 }
 
 static inline void rb_inc_iter(struct ring_buffer_iter *iter)
@@ -743,9 +749,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 	struct ring_buffer *buffer = cpu_buffer->buffer;
 	struct ring_buffer_event *event;
 
-	/* No locking needed for tail page */
 	tail_page = cpu_buffer->tail_page;
-	tail = cpu_buffer->tail;
+	tail = cpu_buffer->tail_page->write;
 
 	if (tail + length > BUF_PAGE_SIZE) {
 		struct buffer_page *next_page = tail_page;
@@ -774,7 +779,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 		}
 
 		if (tail != BUF_PAGE_SIZE) {
-			event = rb_page_index(tail_page, tail);
+			event = __rb_page_index(tail_page, tail);
 			/* page padding */
 			event->type = RINGBUF_TYPE_PADDING;
 		}
@@ -784,14 +789,14 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 		tail_page->size = 0;
 		tail = 0;
 		cpu_buffer->tail_page = tail_page;
-		cpu_buffer->tail = tail;
+		cpu_buffer->tail_page->write = tail;
 		rb_add_stamp(cpu_buffer, ts);
 		spin_unlock(&cpu_buffer->lock);
 	}
 
 	BUG_ON(tail + length > BUF_PAGE_SIZE);
 
-	event = rb_page_index(tail_page, tail);
+	event = __rb_page_index(tail_page, tail);
 	rb_update_event(event, type, length);
 
 	return event;
@@ -823,12 +828,12 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
 		return -1;
 
 	/* check to see if we went to the next page */
-	if (cpu_buffer->tail) {
+	if (cpu_buffer->tail_page->write) {
 		/* Still on same page, update timestamp */
 		event->time_delta = *delta & TS_MASK;
 		event->array[0] = *delta >> TS_SHIFT;
 		/* commit the time event */
-		cpu_buffer->tail +=
+		cpu_buffer->tail_page->write +=
 			rb_event_length(event);
 		cpu_buffer->write_stamp = *ts;
 		*delta = 0;
@@ -846,7 +851,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
 
 	ts = ring_buffer_time_stamp(cpu_buffer->cpu);
 
-	if (cpu_buffer->tail) {
+	if (cpu_buffer->tail_page->write) {
 		delta = ts - cpu_buffer->write_stamp;
 
 		if (test_time_stamp(delta)) {
@@ -868,7 +873,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
 		return NULL;
 
 	/* If the reserve went to the next page, our delta is zero */
-	if (!cpu_buffer->tail)
+	if (!cpu_buffer->tail_page->write)
 		delta = 0;
 
 	event->time_delta = delta;
@@ -933,8 +938,8 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
 static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
 		      struct ring_buffer_event *event)
 {
-	cpu_buffer->tail += rb_event_length(event);
-	cpu_buffer->tail_page->size = cpu_buffer->tail;
+	cpu_buffer->tail_page->write += rb_event_length(event);
+	cpu_buffer->tail_page->size = cpu_buffer->tail_page->write;
 	cpu_buffer->write_stamp += event->time_delta;
 	cpu_buffer->entries++;
 }
@@ -1178,10 +1183,10 @@ void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
 	/* Iterator usage is expected to have record disabled */
 	if (list_empty(&cpu_buffer->reader_page->list)) {
 		iter->head_page = cpu_buffer->head_page;
-		iter->head = cpu_buffer->head;
+		iter->head = cpu_buffer->head_page->read;
 	} else {
 		iter->head_page = cpu_buffer->reader_page;
-		iter->head = cpu_buffer->reader;
+		iter->head = cpu_buffer->reader_page->read;
 	}
 	if (iter->head)
 		iter->read_stamp = cpu_buffer->read_stamp;
@@ -1200,7 +1205,7 @@ int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
 	cpu_buffer = iter->cpu_buffer;
 
 	return iter->head_page == cpu_buffer->tail_page &&
-		iter->head == cpu_buffer->tail;
+		iter->head == cpu_buffer->tail_page->write;
 }
 
 static void
@@ -1277,11 +1282,11 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 	reader = cpu_buffer->reader_page;
 
 	/* If there's more to read, return this page */
-	if (cpu_buffer->reader < reader->size)
+	if (cpu_buffer->reader_page->read < reader->size)
 		goto out;
 
 	/* Never should we have an index greater than the size */
-	WARN_ON(cpu_buffer->reader > reader->size);
+	WARN_ON(cpu_buffer->reader_page->read > reader->size);
 
 	/* check if we caught up to the tail */
 	reader = NULL;
@@ -1342,7 +1347,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
 	rb_update_read_stamp(cpu_buffer, event);
 
 	length = rb_event_length(event);
-	cpu_buffer->reader += length;
+	cpu_buffer->reader_page->read += length;
 }
 
 static void rb_advance_iter(struct ring_buffer_iter *iter)
@@ -1373,7 +1378,7 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
 	 * at the tail of the buffer.
 	 */
 	BUG_ON((iter->head_page == cpu_buffer->tail_page) &&
-	       (iter->head + length > cpu_buffer->tail));
+	       (iter->head + length > cpu_buffer->tail_page->write));
 
 	rb_update_iter_read_stamp(iter, event);
 
@@ -1623,7 +1628,9 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
 	INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
 	cpu_buffer->reader_page->size = 0;
 
-	cpu_buffer->head = cpu_buffer->tail = cpu_buffer->reader = 0;
+	cpu_buffer->head_page->read = 0;
+	cpu_buffer->tail_page->write = 0;
+	cpu_buffer->reader_page->read = 0;
 
 	cpu_buffer->overrun = 0;
 	cpu_buffer->entries = 0;
-- 
GitLab


From bf41a158cacba6ca5fc6407a54e7ad8ce1567e2e Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Sat, 4 Oct 2008 02:00:59 -0400
Subject: [PATCH 333/895] ring-buffer: make reentrant

This patch replaces the local_irq_save/restore with preempt_disable/
enable. This allows for interrupts to enter while recording.
To write to the ring buffer, you must reserve data, and then
commit it. During this time, an interrupt may call a trace function
that will also record into the buffer before the commit is made.

The interrupt will reserve its entry after the first entry, even
though the first entry did not finish yet.

The time stamp delta of the interrupt entry will be zero, since
in the view of the trace, the interrupt happened during the
first field anyway.

Locking still takes place when the tail/write moves from one page
to the next. The reader always takes the locks.

A new page pointer is added, called the commit. The write/tail will
always point to the end of all entries. The commit field will
point to the last committed entry. Only this commit entry may
update the write time stamp.

The reader can only go up to the commit. It cannot go past it.

If a lot of interrupts come in during a commit that fills up the
buffer, and it happens to make it all the way around the buffer
back to the commit, then a warning is printed and new events will
be dropped.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ring_buffer.c | 487 ++++++++++++++++++++++++++++---------
 1 file changed, 374 insertions(+), 113 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 09d4f0d879a7..94af1fe56bb4 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -116,8 +116,8 @@ void *ring_buffer_event_data(struct ring_buffer_event *event)
  */
 struct buffer_page {
 	u64		 time_stamp;	/* page time stamp */
-	unsigned	 size;		/* size of page data */
-	unsigned	 write;		/* index for next write */
+	local_t		 write;		/* index for next write */
+	local_t		 commit;	/* write commited index */
 	unsigned	 read;		/* index for next read */
 	struct list_head list;		/* list of free pages */
 	void *page;			/* Actual data page */
@@ -157,6 +157,7 @@ struct ring_buffer_per_cpu {
 	struct list_head		pages;
 	struct buffer_page		*head_page;	/* read from head */
 	struct buffer_page		*tail_page;	/* write to tail */
+	struct buffer_page		*commit_page;	/* commited pages */
 	struct buffer_page		*reader_page;
 	unsigned long			overrun;
 	unsigned long			entries;
@@ -185,12 +186,32 @@ struct ring_buffer_iter {
 	u64				read_stamp;
 };
 
-#define RB_WARN_ON(buffer, cond)			\
-	if (unlikely(cond)) {				\
-		atomic_inc(&buffer->record_disabled);	\
-		WARN_ON(1);				\
-		return -1;				\
-	}
+#define RB_WARN_ON(buffer, cond)				\
+	do {							\
+		if (unlikely(cond)) {				\
+			atomic_inc(&buffer->record_disabled);	\
+			WARN_ON(1);				\
+		}						\
+	} while (0)
+
+#define RB_WARN_ON_RET(buffer, cond)				\
+	do {							\
+		if (unlikely(cond)) {				\
+			atomic_inc(&buffer->record_disabled);	\
+			WARN_ON(1);				\
+			return -1;				\
+		}						\
+	} while (0)
+
+#define RB_WARN_ON_ONCE(buffer, cond)				\
+	do {							\
+		static int once;				\
+		if (unlikely(cond) && !once) {			\
+			once++;					\
+			atomic_inc(&buffer->record_disabled);	\
+			WARN_ON(1);				\
+		}						\
+	} while (0)
 
 /**
  * check_pages - integrity check of buffer pages
@@ -204,22 +225,19 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
 	struct list_head *head = &cpu_buffer->pages;
 	struct buffer_page *page, *tmp;
 
-	RB_WARN_ON(cpu_buffer, head->next->prev != head);
-	RB_WARN_ON(cpu_buffer, head->prev->next != head);
+	RB_WARN_ON_RET(cpu_buffer, head->next->prev != head);
+	RB_WARN_ON_RET(cpu_buffer, head->prev->next != head);
 
 	list_for_each_entry_safe(page, tmp, head, list) {
-		RB_WARN_ON(cpu_buffer, page->list.next->prev != &page->list);
-		RB_WARN_ON(cpu_buffer, page->list.prev->next != &page->list);
+		RB_WARN_ON_RET(cpu_buffer,
+			       page->list.next->prev != &page->list);
+		RB_WARN_ON_RET(cpu_buffer,
+			       page->list.prev->next != &page->list);
 	}
 
 	return 0;
 }
 
-static unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer)
-{
-	return cpu_buffer->head_page->size;
-}
-
 static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
 			     unsigned nr_pages)
 {
@@ -286,7 +304,6 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
 	page->page = (void *)addr;
 
 	INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
-	cpu_buffer->reader_page->size = 0;
 
 	ret = rb_allocate_pages(cpu_buffer, buffer->pages);
 	if (ret < 0)
@@ -294,8 +311,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
 
 	cpu_buffer->head_page
 		= list_entry(cpu_buffer->pages.next, struct buffer_page, list);
-	cpu_buffer->tail_page
-		= list_entry(cpu_buffer->pages.next, struct buffer_page, list);
+	cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page;
 
 	return cpu_buffer;
 
@@ -563,15 +579,6 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
 	return -ENOMEM;
 }
 
-static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
-{
-	return cpu_buffer->reader_page->read == cpu_buffer->reader_page->size &&
-		(cpu_buffer->tail_page == cpu_buffer->reader_page ||
-		 (cpu_buffer->tail_page == cpu_buffer->head_page &&
-		  cpu_buffer->head_page->read ==
-		  cpu_buffer->tail_page->write));
-}
-
 static inline int rb_null_event(struct ring_buffer_event *event)
 {
 	return event->type == RINGBUF_TYPE_PADDING;
@@ -602,6 +609,33 @@ rb_iter_head_event(struct ring_buffer_iter *iter)
 	return __rb_page_index(iter->head_page, iter->head);
 }
 
+static inline unsigned rb_page_write(struct buffer_page *bpage)
+{
+	return local_read(&bpage->write);
+}
+
+static inline unsigned rb_page_commit(struct buffer_page *bpage)
+{
+	return local_read(&bpage->commit);
+}
+
+/* Size is determined by what has been commited */
+static inline unsigned rb_page_size(struct buffer_page *bpage)
+{
+	return rb_page_commit(bpage);
+}
+
+static inline unsigned
+rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	return rb_page_commit(cpu_buffer->commit_page);
+}
+
+static inline unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	return rb_page_commit(cpu_buffer->head_page);
+}
+
 /*
  * When the tail hits the head and the buffer is in overwrite mode,
  * the head jumps to the next page and all content on the previous
@@ -637,16 +671,76 @@ static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
 	*page = list_entry(p, struct buffer_page, list);
 }
 
-static inline void
-rb_add_stamp(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts)
+static inline unsigned
+rb_event_index(struct ring_buffer_event *event)
 {
-	cpu_buffer->tail_page->time_stamp = *ts;
-	cpu_buffer->write_stamp = *ts;
+	unsigned long addr = (unsigned long)event;
+
+	return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE);
 }
 
-static void rb_reset_head_page(struct ring_buffer_per_cpu *cpu_buffer)
+static inline int
+rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
+	     struct ring_buffer_event *event)
 {
-	cpu_buffer->head_page->read = 0;
+	unsigned long addr = (unsigned long)event;
+	unsigned long index;
+
+	index = rb_event_index(event);
+	addr &= PAGE_MASK;
+
+	return cpu_buffer->commit_page->page == (void *)addr &&
+		rb_commit_index(cpu_buffer) == index;
+}
+
+static inline void
+rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
+		    struct ring_buffer_event *event)
+{
+	unsigned long addr = (unsigned long)event;
+	unsigned long index;
+
+	index = rb_event_index(event);
+	addr &= PAGE_MASK;
+
+	while (cpu_buffer->commit_page->page != (void *)addr) {
+		RB_WARN_ON(cpu_buffer,
+			   cpu_buffer->commit_page == cpu_buffer->tail_page);
+		cpu_buffer->commit_page->commit =
+			cpu_buffer->commit_page->write;
+		rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
+		cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp;
+	}
+
+	/* Now set the commit to the event's index */
+	local_set(&cpu_buffer->commit_page->commit, index);
+}
+
+static inline void
+rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	/*
+	 * We only race with interrupts and NMIs on this CPU.
+	 * If we own the commit event, then we can commit
+	 * all others that interrupted us, since the interruptions
+	 * are in stack format (they finish before they come
+	 * back to us). This allows us to do a simple loop to
+	 * assign the commit to the tail.
+	 */
+	while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
+		cpu_buffer->commit_page->commit =
+			cpu_buffer->commit_page->write;
+		rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
+		cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp;
+		/* add barrier to keep gcc from optimizing too much */
+		barrier();
+	}
+	while (rb_commit_index(cpu_buffer) !=
+	       rb_page_write(cpu_buffer->commit_page)) {
+		cpu_buffer->commit_page->commit =
+			cpu_buffer->commit_page->write;
+		barrier();
+	}
 }
 
 static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
@@ -745,61 +839,120 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 		  unsigned type, unsigned long length, u64 *ts)
 {
 	struct buffer_page *tail_page, *head_page, *reader_page;
-	unsigned long tail;
+	unsigned long tail, write;
 	struct ring_buffer *buffer = cpu_buffer->buffer;
 	struct ring_buffer_event *event;
+	unsigned long flags;
 
 	tail_page = cpu_buffer->tail_page;
-	tail = cpu_buffer->tail_page->write;
+	write = local_add_return(length, &tail_page->write);
+	tail = write - length;
 
-	if (tail + length > BUF_PAGE_SIZE) {
+	/* See if we shot pass the end of this buffer page */
+	if (write > BUF_PAGE_SIZE) {
 		struct buffer_page *next_page = tail_page;
 
-		spin_lock(&cpu_buffer->lock);
+		spin_lock_irqsave(&cpu_buffer->lock, flags);
+
 		rb_inc_page(cpu_buffer, &next_page);
 
 		head_page = cpu_buffer->head_page;
 		reader_page = cpu_buffer->reader_page;
 
 		/* we grabbed the lock before incrementing */
-		WARN_ON(next_page == reader_page);
+		RB_WARN_ON(cpu_buffer, next_page == reader_page);
+
+		/*
+		 * If for some reason, we had an interrupt storm that made
+		 * it all the way around the buffer, bail, and warn
+		 * about it.
+		 */
+		if (unlikely(next_page == cpu_buffer->commit_page)) {
+			WARN_ON_ONCE(1);
+			goto out_unlock;
+		}
 
 		if (next_page == head_page) {
 			if (!(buffer->flags & RB_FL_OVERWRITE)) {
-				spin_unlock(&cpu_buffer->lock);
-				return NULL;
+				/* reset write */
+				if (tail <= BUF_PAGE_SIZE)
+					local_set(&tail_page->write, tail);
+				goto out_unlock;
 			}
 
-			/* count overflows */
-			rb_update_overflow(cpu_buffer);
+			/* tail_page has not moved yet? */
+			if (tail_page == cpu_buffer->tail_page) {
+				/* count overflows */
+				rb_update_overflow(cpu_buffer);
+
+				rb_inc_page(cpu_buffer, &head_page);
+				cpu_buffer->head_page = head_page;
+				cpu_buffer->head_page->read = 0;
+			}
+		}
 
-			rb_inc_page(cpu_buffer, &head_page);
-			cpu_buffer->head_page = head_page;
-			rb_reset_head_page(cpu_buffer);
+		/*
+		 * If the tail page is still the same as what we think
+		 * it is, then it is up to us to update the tail
+		 * pointer.
+		 */
+		if (tail_page == cpu_buffer->tail_page) {
+			local_set(&next_page->write, 0);
+			local_set(&next_page->commit, 0);
+			cpu_buffer->tail_page = next_page;
+
+			/* reread the time stamp */
+			*ts = ring_buffer_time_stamp(cpu_buffer->cpu);
+			cpu_buffer->tail_page->time_stamp = *ts;
 		}
 
-		if (tail != BUF_PAGE_SIZE) {
+		/*
+		 * The actual tail page has moved forward.
+		 */
+		if (tail < BUF_PAGE_SIZE) {
+			/* Mark the rest of the page with padding */
 			event = __rb_page_index(tail_page, tail);
-			/* page padding */
 			event->type = RINGBUF_TYPE_PADDING;
 		}
 
-		tail_page->size = tail;
-		tail_page = next_page;
-		tail_page->size = 0;
-		tail = 0;
-		cpu_buffer->tail_page = tail_page;
-		cpu_buffer->tail_page->write = tail;
-		rb_add_stamp(cpu_buffer, ts);
-		spin_unlock(&cpu_buffer->lock);
+		if (tail <= BUF_PAGE_SIZE)
+			/* Set the write back to the previous setting */
+			local_set(&tail_page->write, tail);
+
+		/*
+		 * If this was a commit entry that failed,
+		 * increment that too
+		 */
+		if (tail_page == cpu_buffer->commit_page &&
+		    tail == rb_commit_index(cpu_buffer)) {
+			rb_set_commit_to_write(cpu_buffer);
+		}
+
+		spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+
+		/* fail and let the caller try again */
+		return ERR_PTR(-EAGAIN);
 	}
 
-	BUG_ON(tail + length > BUF_PAGE_SIZE);
+	/* We reserved something on the buffer */
+
+	BUG_ON(write > BUF_PAGE_SIZE);
 
 	event = __rb_page_index(tail_page, tail);
 	rb_update_event(event, type, length);
 
+	/*
+	 * If this is a commit and the tail is zero, then update
+	 * this page's time stamp.
+	 */
+	if (!tail && rb_is_commit(cpu_buffer, event))
+		cpu_buffer->commit_page->time_stamp = *ts;
+
 	return event;
+
+ out_unlock:
+	spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+	return NULL;
 }
 
 static int
@@ -808,6 +961,7 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
 {
 	struct ring_buffer_event *event;
 	static int once;
+	int ret;
 
 	if (unlikely(*delta > (1ULL << 59) && !once++)) {
 		printk(KERN_WARNING "Delta way too big! %llu"
@@ -825,21 +979,38 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
 				  RB_LEN_TIME_EXTEND,
 				  ts);
 	if (!event)
-		return -1;
+		return -EBUSY;
 
-	/* check to see if we went to the next page */
-	if (cpu_buffer->tail_page->write) {
-		/* Still on same page, update timestamp */
-		event->time_delta = *delta & TS_MASK;
-		event->array[0] = *delta >> TS_SHIFT;
-		/* commit the time event */
-		cpu_buffer->tail_page->write +=
-			rb_event_length(event);
+	if (PTR_ERR(event) == -EAGAIN)
+		return -EAGAIN;
+
+	/* Only a commited time event can update the write stamp */
+	if (rb_is_commit(cpu_buffer, event)) {
+		/*
+		 * If this is the first on the page, then we need to
+		 * update the page itself, and just put in a zero.
+		 */
+		if (rb_event_index(event)) {
+			event->time_delta = *delta & TS_MASK;
+			event->array[0] = *delta >> TS_SHIFT;
+		} else {
+			cpu_buffer->commit_page->time_stamp = *ts;
+			event->time_delta = 0;
+			event->array[0] = 0;
+		}
 		cpu_buffer->write_stamp = *ts;
-		*delta = 0;
+		/* let the caller know this was the commit */
+		ret = 1;
+	} else {
+		/* Darn, this is just wasted space */
+		event->time_delta = 0;
+		event->array[0] = 0;
+		ret = 0;
 	}
 
-	return 0;
+	*delta = 0;
+
+	return ret;
 }
 
 static struct ring_buffer_event *
@@ -848,32 +1019,69 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
 {
 	struct ring_buffer_event *event;
 	u64 ts, delta;
+	int commit = 0;
 
+ again:
 	ts = ring_buffer_time_stamp(cpu_buffer->cpu);
 
-	if (cpu_buffer->tail_page->write) {
+	/*
+	 * Only the first commit can update the timestamp.
+	 * Yes there is a race here. If an interrupt comes in
+	 * just after the conditional and it traces too, then it
+	 * will also check the deltas. More than one timestamp may
+	 * also be made. But only the entry that did the actual
+	 * commit will be something other than zero.
+	 */
+	if (cpu_buffer->tail_page == cpu_buffer->commit_page &&
+	    rb_page_write(cpu_buffer->tail_page) ==
+	    rb_commit_index(cpu_buffer)) {
+
 		delta = ts - cpu_buffer->write_stamp;
 
+		/* make sure this delta is calculated here */
+		barrier();
+
+		/* Did the write stamp get updated already? */
+		if (unlikely(ts < cpu_buffer->write_stamp))
+			goto again;
+
 		if (test_time_stamp(delta)) {
-			int ret;
 
-			ret = rb_add_time_stamp(cpu_buffer, &ts, &delta);
-			if (ret < 0)
+			commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
+
+			if (commit == -EBUSY)
 				return NULL;
+
+			if (commit == -EAGAIN)
+				goto again;
+
+			RB_WARN_ON(cpu_buffer, commit < 0);
 		}
-	} else {
-		spin_lock(&cpu_buffer->lock);
-		rb_add_stamp(cpu_buffer, &ts);
-		spin_unlock(&cpu_buffer->lock);
+	} else
+		/* Non commits have zero deltas */
 		delta = 0;
-	}
 
 	event = __rb_reserve_next(cpu_buffer, type, length, &ts);
-	if (!event)
+	if (PTR_ERR(event) == -EAGAIN)
+		goto again;
+
+	if (!event) {
+		if (unlikely(commit))
+			/*
+			 * Ouch! We needed a timestamp and it was commited. But
+			 * we didn't get our event reserved.
+			 */
+			rb_set_commit_to_write(cpu_buffer);
 		return NULL;
+	}
 
-	/* If the reserve went to the next page, our delta is zero */
-	if (!cpu_buffer->tail_page->write)
+	/*
+	 * If the timestamp was commited, make the commit our entry
+	 * now so that we will update it when needed.
+	 */
+	if (commit)
+		rb_set_commit_event(cpu_buffer, event);
+	else if (!rb_is_commit(cpu_buffer, event))
 		delta = 0;
 
 	event->time_delta = delta;
@@ -881,6 +1089,8 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
 	return event;
 }
 
+static DEFINE_PER_CPU(int, rb_need_resched);
+
 /**
  * ring_buffer_lock_reserve - reserve a part of the buffer
  * @buffer: the ring buffer to reserve from
@@ -904,12 +1114,15 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 	struct ring_buffer_event *event;
-	int cpu;
+	int cpu, resched;
 
 	if (atomic_read(&buffer->record_disabled))
 		return NULL;
 
-	local_irq_save(*flags);
+	/* If we are tracing schedule, we don't want to recurse */
+	resched = need_resched();
+	preempt_disable_notrace();
+
 	cpu = raw_smp_processor_id();
 
 	if (!cpu_isset(cpu, buffer->cpumask))
@@ -922,26 +1135,42 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
 
 	length = rb_calculate_event_length(length);
 	if (length > BUF_PAGE_SIZE)
-		return NULL;
+		goto out;
 
 	event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length);
 	if (!event)
 		goto out;
 
+	/*
+	 * Need to store resched state on this cpu.
+	 * Only the first needs to.
+	 */
+
+	if (preempt_count() == 1)
+		per_cpu(rb_need_resched, cpu) = resched;
+
 	return event;
 
  out:
-	local_irq_restore(*flags);
+	if (resched)
+		preempt_enable_notrace();
+	else
+		preempt_enable_notrace();
 	return NULL;
 }
 
 static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
 		      struct ring_buffer_event *event)
 {
-	cpu_buffer->tail_page->write += rb_event_length(event);
-	cpu_buffer->tail_page->size = cpu_buffer->tail_page->write;
-	cpu_buffer->write_stamp += event->time_delta;
 	cpu_buffer->entries++;
+
+	/* Only process further if we own the commit */
+	if (!rb_is_commit(cpu_buffer, event))
+		return;
+
+	cpu_buffer->write_stamp += event->time_delta;
+
+	rb_set_commit_to_write(cpu_buffer);
 }
 
 /**
@@ -965,7 +1194,16 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
 
 	rb_commit(cpu_buffer, event);
 
-	local_irq_restore(flags);
+	/*
+	 * Only the last preempt count needs to restore preemption.
+	 */
+	if (preempt_count() == 1) {
+		if (per_cpu(rb_need_resched, cpu))
+			preempt_enable_no_resched_notrace();
+		else
+			preempt_enable_notrace();
+	} else
+		preempt_enable_no_resched_notrace();
 
 	return 0;
 }
@@ -989,15 +1227,17 @@ int ring_buffer_write(struct ring_buffer *buffer,
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 	struct ring_buffer_event *event;
-	unsigned long event_length, flags;
+	unsigned long event_length;
 	void *body;
 	int ret = -EBUSY;
-	int cpu;
+	int cpu, resched;
 
 	if (atomic_read(&buffer->record_disabled))
 		return -EBUSY;
 
-	local_irq_save(flags);
+	resched = need_resched();
+	preempt_disable_notrace();
+
 	cpu = raw_smp_processor_id();
 
 	if (!cpu_isset(cpu, buffer->cpumask))
@@ -1022,11 +1262,26 @@ int ring_buffer_write(struct ring_buffer *buffer,
 
 	ret = 0;
  out:
-	local_irq_restore(flags);
+	if (resched)
+		preempt_enable_no_resched_notrace();
+	else
+		preempt_enable_notrace();
 
 	return ret;
 }
 
+static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	struct buffer_page *reader = cpu_buffer->reader_page;
+	struct buffer_page *head = cpu_buffer->head_page;
+	struct buffer_page *commit = cpu_buffer->commit_page;
+
+	return reader->read == rb_page_commit(reader) &&
+		(commit == reader ||
+		 (commit == head &&
+		  head->read == rb_page_commit(commit)));
+}
+
 /**
  * ring_buffer_record_disable - stop all writes into the buffer
  * @buffer: The ring buffer to stop writes to.
@@ -1204,8 +1459,8 @@ int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
 
 	cpu_buffer = iter->cpu_buffer;
 
-	return iter->head_page == cpu_buffer->tail_page &&
-		iter->head == cpu_buffer->tail_page->write;
+	return iter->head_page == cpu_buffer->commit_page &&
+		iter->head == rb_commit_index(cpu_buffer);
 }
 
 static void
@@ -1282,15 +1537,16 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 	reader = cpu_buffer->reader_page;
 
 	/* If there's more to read, return this page */
-	if (cpu_buffer->reader_page->read < reader->size)
+	if (cpu_buffer->reader_page->read < rb_page_size(reader))
 		goto out;
 
 	/* Never should we have an index greater than the size */
-	WARN_ON(cpu_buffer->reader_page->read > reader->size);
+	RB_WARN_ON(cpu_buffer,
+		   cpu_buffer->reader_page->read > rb_page_size(reader));
 
 	/* check if we caught up to the tail */
 	reader = NULL;
-	if (cpu_buffer->tail_page == cpu_buffer->reader_page)
+	if (cpu_buffer->commit_page == cpu_buffer->reader_page)
 		goto out;
 
 	/*
@@ -1301,7 +1557,9 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 	reader = cpu_buffer->head_page;
 	cpu_buffer->reader_page->list.next = reader->list.next;
 	cpu_buffer->reader_page->list.prev = reader->list.prev;
-	cpu_buffer->reader_page->size = 0;
+
+	local_set(&cpu_buffer->reader_page->write, 0);
+	local_set(&cpu_buffer->reader_page->commit, 0);
 
 	/* Make the reader page now replace the head */
 	reader->list.prev->next = &cpu_buffer->reader_page->list;
@@ -1313,7 +1571,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 	 */
 	cpu_buffer->head_page = cpu_buffer->reader_page;
 
-	if (cpu_buffer->tail_page != reader)
+	if (cpu_buffer->commit_page != reader)
 		rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
 
 	/* Finally update the reader page to the new head */
@@ -1363,8 +1621,8 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
 	/*
 	 * Check if we are at the end of the buffer.
 	 */
-	if (iter->head >= iter->head_page->size) {
-		BUG_ON(iter->head_page == cpu_buffer->tail_page);
+	if (iter->head >= rb_page_size(iter->head_page)) {
+		BUG_ON(iter->head_page == cpu_buffer->commit_page);
 		rb_inc_iter(iter);
 		return;
 	}
@@ -1377,16 +1635,16 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
 	 * This should not be called to advance the header if we are
 	 * at the tail of the buffer.
 	 */
-	BUG_ON((iter->head_page == cpu_buffer->tail_page) &&
-	       (iter->head + length > cpu_buffer->tail_page->write));
+	BUG_ON((iter->head_page == cpu_buffer->commit_page) &&
+	       (iter->head + length > rb_commit_index(cpu_buffer)));
 
 	rb_update_iter_read_stamp(iter, event);
 
 	iter->head += length;
 
 	/* check for end of page padding */
-	if ((iter->head >= iter->head_page->size) &&
-	    (iter->head_page != cpu_buffer->tail_page))
+	if ((iter->head >= rb_page_size(iter->head_page)) &&
+	    (iter->head_page != cpu_buffer->commit_page))
 		rb_advance_iter(iter);
 }
 
@@ -1420,7 +1678,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
 
 	switch (event->type) {
 	case RINGBUF_TYPE_PADDING:
-		WARN_ON(1);
+		RB_WARN_ON(cpu_buffer, 1);
 		rb_advance_reader(cpu_buffer);
 		return NULL;
 
@@ -1622,14 +1880,17 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
 {
 	cpu_buffer->head_page
 		= list_entry(cpu_buffer->pages.next, struct buffer_page, list);
-	cpu_buffer->head_page->size = 0;
-	cpu_buffer->tail_page = cpu_buffer->head_page;
-	cpu_buffer->tail_page->size = 0;
-	INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
-	cpu_buffer->reader_page->size = 0;
+	local_set(&cpu_buffer->head_page->write, 0);
+	local_set(&cpu_buffer->head_page->commit, 0);
 
 	cpu_buffer->head_page->read = 0;
-	cpu_buffer->tail_page->write = 0;
+
+	cpu_buffer->tail_page = cpu_buffer->head_page;
+	cpu_buffer->commit_page = cpu_buffer->head_page;
+
+	INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
+	local_set(&cpu_buffer->reader_page->write, 0);
+	local_set(&cpu_buffer->reader_page->commit, 0);
 	cpu_buffer->reader_page->read = 0;
 
 	cpu_buffer->overrun = 0;
-- 
GitLab


From 3ea2e6d71aafe35b8aaf89ed711a283815acfae6 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Sat, 4 Oct 2008 02:01:00 -0400
Subject: [PATCH 334/895] ftrace: make some tracers reentrant

Now that the ring buffer is reentrant, some of the ftrace tracers
(sched_swich, debugging traces) can also be reentrant.

Note: Never make the function tracer reentrant, that can cause
  recursion problems all over the kernel. The function tracer
  must disable reentrancy.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c              | 10 ++--------
 kernel/trace/trace_sched_switch.c | 10 ++--------
 2 files changed, 4 insertions(+), 16 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 1cd2e8143bb4..caa4051ce778 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -839,7 +839,6 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
 {
 	struct trace_array *tr = &global_trace;
 	struct trace_array_cpu *data;
-	long disabled;
 	int cpu;
 	int pc;
 
@@ -850,12 +849,10 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
 	preempt_disable_notrace();
 	cpu = raw_smp_processor_id();
 	data = tr->data[cpu];
-	disabled = atomic_inc_return(&data->disabled);
 
-	if (likely(disabled == 1))
+	if (likely(!atomic_read(&data->disabled)))
 		ftrace_trace_special(tr, data, arg1, arg2, arg3, pc);
 
-	atomic_dec(&data->disabled);
 	preempt_enable_notrace();
 }
 
@@ -2961,7 +2958,6 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 	struct trace_array_cpu *data;
 	struct print_entry *entry;
 	unsigned long flags, irq_flags;
-	long disabled;
 	int cpu, len = 0, size, pc;
 
 	if (!tr->ctrl || tracing_disabled)
@@ -2971,9 +2967,8 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 	preempt_disable_notrace();
 	cpu = raw_smp_processor_id();
 	data = tr->data[cpu];
-	disabled = atomic_inc_return(&data->disabled);
 
-	if (unlikely(disabled != 1))
+	if (unlikely(atomic_read(&data->disabled)))
 		goto out;
 
 	spin_lock_irqsave(&trace_buf_lock, flags);
@@ -2999,7 +2994,6 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 	spin_unlock_irqrestore(&trace_buf_lock, flags);
 
  out:
-	atomic_dec(&data->disabled);
 	preempt_enable_notrace();
 
 	return len;
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index c7fa08a5b7f4..b8f56beb1a62 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -24,7 +24,6 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
 {
 	struct trace_array_cpu *data;
 	unsigned long flags;
-	long disabled;
 	int cpu;
 	int pc;
 
@@ -41,12 +40,10 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
 	local_irq_save(flags);
 	cpu = raw_smp_processor_id();
 	data = ctx_trace->data[cpu];
-	disabled = atomic_inc_return(&data->disabled);
 
-	if (likely(disabled == 1))
+	if (likely(!atomic_read(&data->disabled)))
 		tracing_sched_switch_trace(ctx_trace, data, prev, next, flags, pc);
 
-	atomic_dec(&data->disabled);
 	local_irq_restore(flags);
 }
 
@@ -55,7 +52,6 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee)
 {
 	struct trace_array_cpu *data;
 	unsigned long flags;
-	long disabled;
 	int cpu, pc;
 
 	if (!likely(tracer_enabled))
@@ -67,13 +63,11 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee)
 	local_irq_save(flags);
 	cpu = raw_smp_processor_id();
 	data = ctx_trace->data[cpu];
-	disabled = atomic_inc_return(&data->disabled);
 
-	if (likely(disabled == 1))
+	if (likely(!atomic_read(&data->disabled)))
 		tracing_sched_wakeup_trace(ctx_trace, data, wakee, current,
 					   flags, pc);
 
-	atomic_dec(&data->disabled);
 	local_irq_restore(flags);
 }
 
-- 
GitLab


From c2931e05ec5965597cbfb79ad332d4a29aeceb23 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 4 Oct 2008 22:04:44 +0200
Subject: [PATCH 335/895] ftrace: return an error when setting a nonexistent
 tracer

When one try to set a nonexistent tracer, no error is returned
as if the name of the tracer was correct.
We should return -EINVAL.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index caa4051ce778..78d56614c95b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2381,9 +2381,11 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
 	struct tracer *t;
 	char buf[max_tracer_type_len+1];
 	int i;
+	size_t ret;
 
 	if (cnt > max_tracer_type_len)
 		cnt = max_tracer_type_len;
+	ret = cnt;
 
 	if (copy_from_user(&buf, ubuf, cnt))
 		return -EFAULT;
@@ -2399,7 +2401,11 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
 		if (strcmp(t->name, buf) == 0)
 			break;
 	}
-	if (!t || t == current_trace)
+	if (!t) {
+		ret = -EINVAL;
+		goto out;
+	}
+	if (t == current_trace)
 		goto out;
 
 	if (current_trace && current_trace->reset)
@@ -2412,9 +2418,10 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
  out:
 	mutex_unlock(&trace_types_lock);
 
-	filp->f_pos += cnt;
+	if (ret == cnt)
+		filp->f_pos += cnt;
 
-	return cnt;
+	return ret;
 }
 
 static ssize_t
-- 
GitLab


From 8a5d900cca57115326bc5b9e7f3bac980986c2c8 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Sat, 4 Oct 2008 13:42:27 -0700
Subject: [PATCH 336/895] tracing/fastboot: fix printk format typo in boot
 tracer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When printing nanoseconds, the right printk format string is %09 not %06...

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Acked-by: FrÃ©dÃ©ric Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace_boot.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index a7efe3559654..d0a5e50eeff2 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -62,14 +62,14 @@ static enum print_line_t initcall_print_line(struct trace_iterator *iter)
 	struct timespec rettime = ktime_to_timespec(it->rettime);
 
 	if (entry->type == TRACE_BOOT) {
-		ret = trace_seq_printf(s, "[%5ld.%06ld] calling  %s @ %i\n",
+		ret = trace_seq_printf(s, "[%5ld.%09ld] calling  %s @ %i\n",
 					  calltime.tv_sec,
 					  calltime.tv_nsec,
 					  it->func, it->caller);
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
 
-		ret = trace_seq_printf(s, "[%5ld.%06ld] initcall %s "
+		ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
 					  "returned %d after %lld msecs\n",
 					  rettime.tv_sec,
 					  rettime.tv_nsec,
-- 
GitLab


From ddc7a01aad195708fc943d9446411d11e3547784 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 4 Oct 2008 21:35:48 +0200
Subject: [PATCH 337/895] tracing/fastboot: fix initcalls disposition in
 bootgraph.pl

When bootgraph.pl parses a file, it gives one row
for each initcall's pid. But only few of them will
be displayed => the longest.

This patch corrects it by giving only a rows for pids
which have initcalls that will be displayed.

[ mingo@elte.hu: resolved conflicts ]
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 scripts/bootgraph.pl | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/scripts/bootgraph.pl b/scripts/bootgraph.pl
index d459b8bdef02..c34b359c34a3 100644
--- a/scripts/bootgraph.pl
+++ b/scripts/bootgraph.pl
@@ -37,12 +37,12 @@
 # 	dmesg | perl scripts/bootgraph.pl > output.svg
 #
 
-my @rows;
-my %start, %end, %row;
+my %start, %end;
 my $done = 0;
-my $rowcount = 0;
 my $maxtime = 0;
 my $count = 0;
+my %pids;
+
 while (<>) {
 	my $line = $_;
 	if ($line =~ /([0-9\.]+)\] calling  ([a-zA-Z\_]+)\+/) {
@@ -50,14 +50,8 @@ while (<>) {
 		if ($done == 0) {
 			$start{$func} = $1;
 		}
-		$row{$func} = 1;
 		if ($line =~ /\@ ([0-9]+)/) {
-			my $pid = $1;
-			if (!defined($rows[$pid])) {
-				$rowcount = $rowcount + 1;
-				$rows[$pid] = $rowcount;
-			}
-			$row{$func} = $rows[$pid];
+			$pids{$func} = $1;
 		}
 		$count = $count + 1;
 	}
@@ -102,17 +96,25 @@ $styles[11] = "fill:rgb(128,255,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(
 my $mult = 950.0 / $maxtime;
 my $threshold = 0.0500 / $maxtime;
 my $stylecounter = 0;
+my %rows;
+my $rowscount = 1;
 while (($key,$value) = each %start) {
 	my $duration = $end{$key} - $start{$key};
 
 	if ($duration >= $threshold) {
 		my $s, $s2, $e, $y;
-		$s = $value * $mult;
+		$pid = $pids{$key};
+
+		if (!defined($rows{$pid})) {
+			$rows{$pid} = $rowscount;
+			$rowscount = $rowscount + 1;
+		}
+		$s = ($value - $firsttime) * $mult;
 		$s2 = $s + 6;
 		$e = $end{$key} * $mult;
 		$w = $e - $s;
 
-		$y = $row{$key} * 150;
+		$y = $rows{$pid} * 150;
 		$y2 = $y + 4;
 
 		$style = $styles[$stylecounter];
-- 
GitLab


From 2fbc474901933c8f0c09b0280dfbb6780cb8bd60 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 8 Oct 2008 16:51:49 -0700
Subject: [PATCH 338/895] ftrace: fix hex output mode of ftrace

Fix the output of ftrace in hex mode as the hi/lo nibbles are output in
reverse order. Without this patch, the output of ftrace is:

raw mode : 6474 0 141531612444 0 140 + 6402 120 S
hex mode : 000091a4 00000000 000000023f1f50c1 00000000 c8 000000b2 00009120 87 ffff00c8 00000035

There is an inversion on ouput hex(6474) is 194a

[based on a patch by Philippe Reynes <tremyfr@yahoo.fr>]
Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 78d56614c95b..36cbb873845f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -336,14 +336,12 @@ trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
 }
 
 #define HEX_CHARS 17
-static const char hex2asc[] = "0123456789abcdef";
 
 static int
 trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
 {
 	unsigned char hex[HEX_CHARS];
 	unsigned char *data = mem;
-	unsigned char byte;
 	int i, j;
 
 	BUG_ON(len >= HEX_CHARS);
@@ -353,10 +351,8 @@ trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
 #else
 	for (i = len-1, j = 0; i >= 0; i--) {
 #endif
-		byte = data[i];
-
-		hex[j++] = hex2asc[byte & 0x0f];
-		hex[j++] = hex2asc[byte >> 4];
+		hex[j++] = hex_asc_hi(data[i]);
+		hex[j++] = hex_asc_lo(data[i]);
 	}
 	hex[j++] = ' ';
 
-- 
GitLab


From ad0a3b68114e8f3c25ac0045b45a2838f23e3b3a Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 8 Oct 2008 17:44:55 -0700
Subject: [PATCH 339/895] trace: add build-time check to avoid overrunning hex
 buffer

Remove the runtime BUG_ON and change to a compile-time check in
the macro that calls the hex format routine

[Noticed by Joe Perches]
Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 36cbb873845f..d345d649d073 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -335,7 +335,8 @@ trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
 	return len;
 }
 
-#define HEX_CHARS 17
+#define MAX_MEMHEX_BYTES	8
+#define HEX_CHARS		(MAX_MEMHEX_BYTES*2 + 1)
 
 static int
 trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
@@ -344,8 +345,6 @@ trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
 	unsigned char *data = mem;
 	int i, j;
 
-	BUG_ON(len >= HEX_CHARS);
-
 #ifdef __BIG_ENDIAN
 	for (i = 0, j = 0; i < len; i++) {
 #else
@@ -1668,6 +1667,7 @@ do {							\
 
 #define SEQ_PUT_HEX_FIELD_RET(s, x)			\
 do {							\
+	BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES);	\
 	if (!trace_seq_putmem_hex(s, &(x), sizeof(x)))	\
 		return 0;				\
 } while (0)
-- 
GitLab


From ca538f6bbe583406f941f3041d40c41f9a13d1de Mon Sep 17 00:00:00 2001
From: Tim Bird <tim.bird@am.sony.com>
Date: Thu, 9 Oct 2008 15:23:05 -0700
Subject: [PATCH 340/895] tracing/fastboot: add better resolution to initcall
 debug/tracing

Change the time resolution for initcall_debug to microseconds, from
milliseconds.  This is handy to determine which initcalls you want to work
on for faster booting.

One one of my test machines, over 90% of the initcalls are less than a
millisecond and (without this patch) these are all reported as 0 msecs.
Working on the 900 us ones is more important than the 4 us ones.

With 'quiet' on the kernel command line, this adds no significant overhead
to kernel boot time.

Signed-off-by: Tim Bird <tim.bird@am.sony.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h | 4 ++--
 init/main.c            | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 5812dba4ee24..a3d46151be19 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -215,9 +215,9 @@ ftrace_init_module(unsigned long *start, unsigned long *end) { }
 
 struct boot_trace {
 	pid_t			caller;
-	char 			func[KSYM_NAME_LEN];
+	char			func[KSYM_NAME_LEN];
 	int			result;
-	unsigned long long	duration;
+	unsigned long long	duration;		/* usecs */
 	ktime_t			calltime;
 	ktime_t			rettime;
 };
diff --git a/init/main.c b/init/main.c
index e7939de80f3e..b2e7ff4a5349 100644
--- a/init/main.c
+++ b/init/main.c
@@ -721,8 +721,8 @@ int do_one_initcall(initcall_t fn)
 	if (initcall_debug) {
 		it.rettime = ktime_get();
 		delta = ktime_sub(it.rettime, it.calltime);
-		it.duration = (unsigned long long) delta.tv64 >> 20;
-		printk("initcall %pF returned %d after %Ld msecs\n", fn,
+		it.duration = (unsigned long long) delta.tv64 >> 10;
+		printk("initcall %pF returned %d after %Ld usecs\n", fn,
 			it.result, it.duration);
 		trace_boot(&it, fn);
 	}
-- 
GitLab


From bfadadfccc19e36f7d600c5ce7b3e5ba5197fbf0 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Fri, 10 Oct 2008 03:48:25 -0400
Subject: [PATCH 341/895] markers: fix synchronize marker unregister static
 inline

Use a #define for synchronize marker unregister to fix include dependencies.

Fixes the slab circular inclusion which triggers when slab.git is combined
with tracing.git, where rcupdate includes slab, which includes markers
which includes rcupdate.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/marker.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/include/linux/marker.h b/include/linux/marker.h
index 38e32e781ed7..889196c7fbb1 100644
--- a/include/linux/marker.h
+++ b/include/linux/marker.h
@@ -13,7 +13,6 @@
  */
 
 #include <linux/types.h>
-#include <linux/rcupdate.h>
 
 struct module;
 struct marker;
@@ -166,9 +165,6 @@ extern void *marker_get_private_data(const char *name, marker_probe_func *probe,
  * unregistration and the end of module exit to make sure there is no caller
  * executing a probe when it is freed.
  */
-static inline void marker_synchronize_unregister(void)
-{
-	synchronize_sched();
-}
+#define marker_synchronize_unregister() synchronize_sched()
 
 #endif
-- 
GitLab


From 8b27386a9ce9c7f0f8702cff7565a46802ad57d1 Mon Sep 17 00:00:00 2001
From: Anders Kaseorg <andersk@MIT.EDU>
Date: Thu, 9 Oct 2008 22:19:08 -0400
Subject: [PATCH 342/895] ftrace: make ftrace_test_p6nop disassembler-friendly

Commit 4c3dc21b136f8cb4b72afee16c3ba7e961656c0b in tip introduced the
5-byte NOP ftrace_test_p6nop:

   jmp . + 5
   .byte 0x00, 0x00, 0x00

This is not friendly to disassemblers because an odd number of 0x00s
ends in the middle of an instruction boundary.  This changes the 0x00s
to 1-byte NOPs (0x90).

Signed-off-by: Anders Kaseorg <andersk@mit.edu>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ftrace.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 222507e8157b..d073d981a730 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -132,7 +132,9 @@ int __init ftrace_dyn_arch_init(void *data)
 		".section .text, \"ax\"\n"
 		"ftrace_test_jmp:"
 		"jmp ftrace_test_p6nop\n"
-		".byte 0x00,0x00,0x00\n"  /* 2 byte jmp + 3 bytes */
+		"nop\n"
+		"nop\n"
+		"nop\n"  /* 2 byte jmp + 3 bytes */
 		"ftrace_test_p6nop:"
 		P6_NOP5
 		"jmp 1f\n"
@@ -161,7 +163,7 @@ int __init ftrace_dyn_arch_init(void *data)
 		ftrace_nop = (unsigned long *)ftrace_test_nop5;
 		break;
 	case 2:
-		pr_info("ftrace: converting mcount calls to jmp 1f\n");
+		pr_info("ftrace: converting mcount calls to jmp . + 5\n");
 		ftrace_nop = (unsigned long *)ftrace_test_jmp;
 		break;
 	}
-- 
GitLab


From f2461fc82a083dd60062e05e704c5fcc1c658ba1 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Mon, 6 Oct 2008 10:33:00 -0400
Subject: [PATCH 343/895] tracepoints: tracepoint_synchronize_unregister()

Create tracepoint_synchronize_unregister() which must be called before the end
of exit() to make sure every probe callers have exited the non preemptible
section and thus are not executing the probe code anymore.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/tracepoint.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index e623a6fca5c3..199f4c207c1e 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -124,4 +124,11 @@ extern void tracepoint_iter_reset(struct tracepoint_iter *iter);
 extern int tracepoint_get_iter_range(struct tracepoint **tracepoint,
 	struct tracepoint *begin, struct tracepoint *end);
 
+/*
+ * tracepoint_synchronize_unregister must be called between the last tracepoint
+ * probe unregistration and the end of module exit to make sure there is no
+ * caller executing a probe when it is freed.
+ */
+#define tracepoint_synchronize_unregister() synchronize_sched()
+
 #endif
-- 
GitLab


From 231375cc5cc3549bb413f94a164bdcbd5f9ce943 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Fri, 3 Oct 2008 15:01:33 -0400
Subject: [PATCH 344/895] tracepoints: synchronize unregister static inline

Turn tracepoint synchronize unregister into a static inline. There is no
reason to keep it as a macro over a static inline.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/tracepoint.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 199f4c207c1e..c5bb39c7a770 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -129,6 +129,9 @@ extern int tracepoint_get_iter_range(struct tracepoint **tracepoint,
  * probe unregistration and the end of module exit to make sure there is no
  * caller executing a probe when it is freed.
  */
-#define tracepoint_synchronize_unregister() synchronize_sched()
+static inline void tracepoint_synchronize_unregister(void)
+{
+	synchronize_sched();
+}
 
 #endif
-- 
GitLab


From 80a398a55d3096ff4572b44271d095413749ebb4 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sun, 14 Sep 2008 15:30:52 -0700
Subject: [PATCH 345/895] tracing/fastboot: fix issues and improve output of
 bootgraph.pl

David Sanders reported some issues with bootgraph.pl's display
of his sytems bootup; this commit fixes these by scaling the graph
not from 0 - end time but from the first initcall to the end time;
the minimum display size etc also now need to scale with this, as does
the axis display.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
---
 scripts/bootgraph.pl | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/scripts/bootgraph.pl b/scripts/bootgraph.pl
index c34b359c34a3..4baf49985589 100644
--- a/scripts/bootgraph.pl
+++ b/scripts/bootgraph.pl
@@ -40,6 +40,7 @@
 my %start, %end;
 my $done = 0;
 my $maxtime = 0;
+my $firsttime = 100;
 my $count = 0;
 my %pids;
 
@@ -49,6 +50,9 @@ while (<>) {
 		my $func = $2;
 		if ($done == 0) {
 			$start{$func} = $1;
+			if ($1 < $firsttime) {
+				$firsttime = $1;
+			}
 		}
 		if ($line =~ /\@ ([0-9]+)/) {
 			$pids{$func} = $1;
@@ -65,6 +69,9 @@ while (<>) {
 	if ($line =~ /Write protecting the/) {
 		$done = 1;
 	}
+	if ($line =~ /Freeing unused kernel memory/) {
+		$done = 1;
+	}
 }
 
 if ($count == 0) {
@@ -93,8 +100,8 @@ $styles[9] = "fill:rgb(255,255,128);fill-opacity:0.5;stroke-width:1;stroke:rgb(0
 $styles[10] = "fill:rgb(255,128,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
 $styles[11] = "fill:rgb(128,255,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
 
-my $mult = 950.0 / $maxtime;
-my $threshold = 0.0500 / $maxtime;
+my $mult = 950.0 / ($maxtime - $firsttime);
+my $threshold = ($maxtime - $firsttime) / 60.0;
 my $stylecounter = 0;
 my %rows;
 my $rowscount = 1;
@@ -103,15 +110,9 @@ while (($key,$value) = each %start) {
 
 	if ($duration >= $threshold) {
 		my $s, $s2, $e, $y;
-		$pid = $pids{$key};
-
-		if (!defined($rows{$pid})) {
-			$rows{$pid} = $rowscount;
-			$rowscount = $rowscount + 1;
-		}
 		$s = ($value - $firsttime) * $mult;
 		$s2 = $s + 6;
-		$e = $end{$key} * $mult;
+		$e = ($end{$key} - $firsttime) * $mult;
 		$w = $e - $s;
 
 		$y = $rows{$pid} * 150;
@@ -130,11 +131,13 @@ while (($key,$value) = each %start) {
 
 
 # print the time line on top
-my $time = 0.0;
+my $time = $firsttime;
+my $step = ($maxtime - $firsttime) / 15;
 while ($time < $maxtime) {
-	my $s2 = $time * $mult;
-	print "<text transform=\"translate($s2,89) rotate(90)\">$time</text>\n";
-	$time = $time + 0.1;
+	my $s2 = ($time - $firsttime) * $mult;
+	my $tm = int($time * 100) / 100.0;
+	print "<text transform=\"translate($s2,89) rotate(90)\">$tm</text>\n";
+	$time = $time + $step;
 }
 
 print "</svg>\n";
-- 
GitLab


From 5c542368a3ded88bed98239fb3570dda416203ee Mon Sep 17 00:00:00 2001
From: Arnaud Patard <apatard@mandriva.com>
Date: Fri, 19 Sep 2008 20:16:25 -0700
Subject: [PATCH 346/895] tracing/fastboot: fix bootgraph.pl initcall name
 regexp

The regexp used to match the start and the end of an initcall
are matching only on [a-zA-Z\_]. This rules out initcalls with
a number in them. This patch is fixing that.

Signed-off-by: Arnaud Patard <apatard@mandriva.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 scripts/bootgraph.pl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/bootgraph.pl b/scripts/bootgraph.pl
index 4baf49985589..479fb4ea8914 100644
--- a/scripts/bootgraph.pl
+++ b/scripts/bootgraph.pl
@@ -46,7 +46,7 @@ my %pids;
 
 while (<>) {
 	my $line = $_;
-	if ($line =~ /([0-9\.]+)\] calling  ([a-zA-Z\_]+)\+/) {
+	if ($line =~ /([0-9\.]+)\] calling  ([a-zA-Z0-9\_]+)\+/) {
 		my $func = $2;
 		if ($done == 0) {
 			$start{$func} = $1;
@@ -60,7 +60,7 @@ while (<>) {
 		$count = $count + 1;
 	}
 
-	if ($line =~ /([0-9\.]+)\] initcall ([a-zA-Z\_]+)\+.*returned/) {
+	if ($line =~ /([0-9\.]+)\] initcall ([a-zA-Z0-9\_]+)\+.*returned/) {
 		if ($done == 0) {
 			$end{$2} = $1;
 			$maxtime = $1;
@@ -75,8 +75,8 @@ while (<>) {
 }
 
 if ($count == 0) {
-	print "No data found in the dmesg. Make sure CONFIG_PRINTK_TIME is enabled and\n";
-	print "that initcall_debug is passed on the kernel command line.\n\n";
+	print "No data found in the dmesg. Make sure that 'printk.time=1' and\n";
+	print "'initcall_debug' are passed on the kernel command line.\n\n";
 	print "Usage: \n";
 	print "      dmesg | perl scripts/bootgraph.pl > output.svg\n\n";
 	exit;
-- 
GitLab


From 07d1890420cce95c577736e4d67f70cbd39845fe Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 4 Oct 2008 21:35:48 +0200
Subject: [PATCH 347/895] tracing/fastboot: fix initcalls disposition in
 bootgraph.pl

When bootgraph.pl parses a file, it gives one row
for each initcall's pid. But only few of them will
be displayed => the longest.

This patch corrects it by giving only a rows for pids
which have initcalls that will be displayed.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 scripts/bootgraph.pl | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/scripts/bootgraph.pl b/scripts/bootgraph.pl
index 479fb4ea8914..5e7316e5aa39 100644
--- a/scripts/bootgraph.pl
+++ b/scripts/bootgraph.pl
@@ -110,6 +110,12 @@ while (($key,$value) = each %start) {
 
 	if ($duration >= $threshold) {
 		my $s, $s2, $e, $y;
+		$pid = $pids{$key};
+
+		if (!defined($rows{$pid})) {
+			$rows{$pid} = $rowscount;
+			$rowscount = $rowscount + 1;
+		}
 		$s = ($value - $firsttime) * $mult;
 		$s2 = $s + 6;
 		$e = ($end{$key} - $firsttime) * $mult;
-- 
GitLab


From daa847356a4f2b2722d78b389ec4f172f24fecd5 Mon Sep 17 00:00:00 2001
From: Chen Gong <g.chen@freescale.com>
Date: Tue, 16 Sep 2008 14:14:12 +0800
Subject: [PATCH 348/895] [MTD] m25p80.c extended jedec support (v2)

Include missing parts of previous patch.

Signed-off-by: Chen Gong <g.chen@freescale.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/devices/m25p80.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index 4d3ae085b1d2..697a3a217837 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -548,7 +548,7 @@ static struct flash_info *__devinit jedec_probe(struct spi_device *spi)
 {
 	int			tmp;
 	u8			code = OPCODE_RDID;
-	u8			id[3];
+	u8			id[5];
 	u32			jedec;
 	u16                     ext_jedec;
 	struct flash_info	*info;
@@ -557,7 +557,7 @@ static struct flash_info *__devinit jedec_probe(struct spi_device *spi)
 	 * string for after vendor-specific data, after the three bytes
 	 * we use here.  Supporting some chips might require using it.
 	 */
-	tmp = spi_write_then_read(spi, &code, 1, id, 3);
+	tmp = spi_write_then_read(spi, &code, 1, id, 5);
 	if (tmp < 0) {
 		DEBUG(MTD_DEBUG_LEVEL0, "%s: error %d reading JEDEC ID\n",
 			spi->dev.bus_id, tmp);
-- 
GitLab


From 08d790432906b3815a1dc91a826ca85ff2a73b6c Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Tue, 14 Oct 2008 11:00:51 +0100
Subject: [PATCH 349/895] [MTD] [MAPS] Remove unused variable after ROM API
 cleanup.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/maps/pci.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/mtd/maps/pci.c b/drivers/mtd/maps/pci.c
index d978c2e28b1d..48f4cf5cb9d1 100644
--- a/drivers/mtd/maps/pci.c
+++ b/drivers/mtd/maps/pci.c
@@ -225,8 +225,6 @@ intel_dc21285_init(struct pci_dev *dev, struct map_pci_info *map)
 static void
 intel_dc21285_exit(struct pci_dev *dev, struct map_pci_info *map)
 {
-	u32 val;
-
 	if (map->base)
 		iounmap(map->base);
 
-- 
GitLab


From 3fc2389847a84d06263c13a3b6dfe1f1d6eea935 Mon Sep 17 00:00:00 2001
From: Richard Genoud <richard.genoud@gmail.com>
Date: Sun, 12 Oct 2008 08:42:28 +0200
Subject: [PATCH 350/895] [MTD] [NAND] Bug on atmel_nand HW ECC : OOB info not
 correctly written

The functions that write the OOB info (on hardware ECC only) use the
HW_SYNDROME method.

This is not correct : the start position is "pos = eccsize + chunk" and
should be eccsize. So, the standard (nand_write_oob_std) function should
be used. This patch corrects this by using NAND_ECC_HW instead of
NAND_ECC_HW_SYNDROME.

This has only been tested on small pages nand flash.
(if anyone can test it on large pages that would be great).

kernel version : 2.6.27-rc2 (current git mtd-2.6)

Signed-off-by: Richard Genoud <richard.genoud@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/atmel_nand.c | 58 ++++-------------------------------
 1 file changed, 6 insertions(+), 52 deletions(-)

diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c
index 3387e0d5076b..c98c1570a40b 100644
--- a/drivers/mtd/nand/atmel_nand.c
+++ b/drivers/mtd/nand/atmel_nand.c
@@ -173,48 +173,6 @@ static void atmel_write_buf16(struct mtd_info *mtd, const u8 *buf, int len)
 	__raw_writesw(nand_chip->IO_ADDR_W, buf, len / 2);
 }
 
-/*
- * write oob for small pages
- */
-static int atmel_nand_write_oob_512(struct mtd_info *mtd,
-		struct nand_chip *chip, int page)
-{
-	int chunk = chip->ecc.bytes + chip->ecc.prepad + chip->ecc.postpad;
-	int eccsize = chip->ecc.size, length = mtd->oobsize;
-	int len, pos, status = 0;
-	const uint8_t *bufpoi = chip->oob_poi;
-
-	pos = eccsize + chunk;
-
-	chip->cmdfunc(mtd, NAND_CMD_SEQIN, pos, page);
-	len = min_t(int, length, chunk);
-	chip->write_buf(mtd, bufpoi, len);
-	bufpoi += len;
-	length -= len;
-	if (length > 0)
-		chip->write_buf(mtd, bufpoi, length);
-
-	chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
-	status = chip->waitfunc(mtd, chip);
-
-	return status & NAND_STATUS_FAIL ? -EIO : 0;
-
-}
-
-/*
- * read oob for small pages
- */
-static int atmel_nand_read_oob_512(struct mtd_info *mtd,
-		struct nand_chip *chip,	int page, int sndcmd)
-{
-	if (sndcmd) {
-		chip->cmdfunc(mtd, NAND_CMD_READOOB, 0, page);
-		sndcmd = 0;
-	}
-	chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
-	return sndcmd;
-}
-
 /*
  * Calculate HW ECC
  *
@@ -235,14 +193,14 @@ static int atmel_nand_calculate(struct mtd_info *mtd,
 	/* get the first 2 ECC bytes */
 	ecc_value = ecc_readl(host->ecc, PR);
 
-	ecc_code[eccpos[0]] = ecc_value & 0xFF;
-	ecc_code[eccpos[1]] = (ecc_value >> 8) & 0xFF;
+	ecc_code[0] = ecc_value & 0xFF;
+	ecc_code[1] = (ecc_value >> 8) & 0xFF;
 
 	/* get the last 2 ECC bytes */
 	ecc_value = ecc_readl(host->ecc, NPR) & ATMEL_ECC_NPARITY;
 
-	ecc_code[eccpos[2]] = ecc_value & 0xFF;
-	ecc_code[eccpos[3]] = (ecc_value >> 8) & 0xFF;
+	ecc_code[2] = ecc_value & 0xFF;
+	ecc_code[3] = (ecc_value >> 8) & 0xFF;
 
 	return 0;
 }
@@ -476,14 +434,12 @@ static int __init atmel_nand_probe(struct platform_device *pdev)
 			res = -EIO;
 			goto err_ecc_ioremap;
 		}
-		nand_chip->ecc.mode = NAND_ECC_HW_SYNDROME;
+		nand_chip->ecc.mode = NAND_ECC_HW;
 		nand_chip->ecc.calculate = atmel_nand_calculate;
 		nand_chip->ecc.correct = atmel_nand_correct;
 		nand_chip->ecc.hwctl = atmel_nand_hwctl;
 		nand_chip->ecc.read_page = atmel_nand_read_page;
 		nand_chip->ecc.bytes = 4;
-		nand_chip->ecc.prepad = 0;
-		nand_chip->ecc.postpad = 0;
 	}
 
 	nand_chip->chip_delay = 20;		/* 20us command delay time */
@@ -514,7 +470,7 @@ static int __init atmel_nand_probe(struct platform_device *pdev)
 		goto err_scan_ident;
 	}
 
-	if (nand_chip->ecc.mode == NAND_ECC_HW_SYNDROME) {
+	if (nand_chip->ecc.mode == NAND_ECC_HW) {
 		/* ECC is calculated for the whole page (1 step) */
 		nand_chip->ecc.size = mtd->writesize;
 
@@ -522,8 +478,6 @@ static int __init atmel_nand_probe(struct platform_device *pdev)
 		switch (mtd->writesize) {
 		case 512:
 			nand_chip->ecc.layout = &atmel_oobinfo_small;
-			nand_chip->ecc.read_oob = atmel_nand_read_oob_512;
-			nand_chip->ecc.write_oob = atmel_nand_write_oob_512;
 			ecc_writel(host->ecc, MR, ATMEL_ECC_PAGESIZE_528);
 			break;
 		case 1024:
-- 
GitLab


From 4519d9e54dcd273975ad0adebad2a08c20428029 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 14 Oct 2008 14:15:43 +0200
Subject: [PATCH 351/895] tracing/stacktrace: improve help text

Improve the help text that is displayed for CONFIG_STACK_TRACER.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/Kconfig | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 396aea11398e..11fd03a429f0 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -141,8 +141,17 @@ config STACK_TRACER
 	select FTRACE
 	select STACKTRACE
 	help
-	  This tracer records the max stack of the kernel, and displays
-	  it in debugfs/tracing/stack_trace
+	  This special tracer records the maximum stack footprint of the
+	  kernel and displays it in debugfs/tracing/stack_trace.
+
+	  This tracer works by hooking into every function call that the
+	  kernel executes, and keeping a maximum stack depth value and
+	  stack-trace saved. Because this logic has to execute in every
+	  kernel function, all the time, this option can slow down the
+	  kernel measurably and is generally intended for kernel
+	  developers only.
+
+	  Say N if unsure.
 
 config DYNAMIC_FTRACE
 	bool "enable/disable ftrace tracepoints dynamically"
-- 
GitLab


From 98d9c66ab07471006fd7910cb16453581c41a3e7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 14 Oct 2008 14:27:20 +0200
Subject: [PATCH 352/895] tracing/fastboot: improve help text

Improve the help text of the boot tracer.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/Kconfig | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 11fd03a429f0..1cb3e1f616af 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -127,12 +127,17 @@ config BOOT_TRACER
 	select TRACING
 	help
 	  This tracer helps developers to optimize boot times: it records
-	  the timings of the initcalls. Its aim is to be parsed by the
-	  /scripts/bootgraph.pl tool to produce pretty graphics about
-	  boot inefficiencies, giving a visual representation of the
-	  delays during initcalls. Note that tracers self tests can't
-	  be enabled if this tracer is selected since only one tracer
-	  should touch the tracing buffer at a time.
+	  the timings of the initcalls and traces key events and the identity
+	  of tasks that can cause boot delays, such as context-switches.
+
+	  Its aim is to be parsed by the /scripts/bootgraph.pl tool to
+	  produce pretty graphics about boot inefficiencies, giving a visual
+	  representation of the delays during initcalls - but the raw
+	  /debug/tracing/trace text output is readable too.
+
+	  ( Note that tracing self tests can't be enabled if this tracer is
+	    selected, because the self-tests are an initcall as well and that
+	    would invalidate the boot trace. )
 
 config STACK_TRACER
 	bool "Trace max stack"
-- 
GitLab


From 6028aa01f759a1dae11e5d0e495b3dc9d2b0a47b Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <shimoda.yoshihiro@renesas.com>
Date: Tue, 14 Oct 2008 21:23:26 +0900
Subject: [PATCH 353/895] [MTD] [NAND] sh_flctl: add support for Renesas SuperH
 FLCTL

Several Renesas SuperH CPU has FLCTL. The FLCTL support NAND Flash.
This driver support SH7723.

Signed-off-by: Yoshihiro Shimoda <shimoda.yoshihiro@renesas.com>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/Kconfig     |   7 +
 drivers/mtd/nand/Makefile    |   1 +
 drivers/mtd/nand/sh_flctl.c  | 301 +++++++++++++++++++++++++++++++++++
 include/linux/mtd/sh_flctl.h | 125 +++++++++++++++
 4 files changed, 434 insertions(+)
 create mode 100644 drivers/mtd/nand/sh_flctl.c
 create mode 100644 include/linux/mtd/sh_flctl.h

diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 82815dd64bf6..89b4d39386ab 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -407,4 +407,11 @@ config MTD_NAND_MXC
 	  This enables the driver for the NAND flash controller on the
 	  MXC processors.
 
+config MTD_NAND_SH_FLCTL
+	tristate "Support for NAND on Renesas SuperH FLCTL"
+	depends on MTD_NAND && SUPERH && CPU_SUBTYPE_SH7723
+	help
+	  Several Renesas SuperH CPU has FLCTL. This option enables support
+	  for NAND Flash using FLCTL. This driver support SH7723.
+
 endif # MTD_NAND
diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index e0fee048c1b4..9bfeca324b32 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -33,6 +33,7 @@ obj-$(CONFIG_MTD_NAND_PASEMI)		+= pasemi_nand.o
 obj-$(CONFIG_MTD_NAND_ORION)		+= orion_nand.o
 obj-$(CONFIG_MTD_NAND_FSL_ELBC)		+= fsl_elbc_nand.o
 obj-$(CONFIG_MTD_NAND_FSL_UPM)		+= fsl_upm.o
+obj-$(CONFIG_MTD_NAND_SH_FLCTL)		+= sh_flctl.o
 obj-$(CONFIG_MTD_NAND_MXC)		+= mxc_nand.o
 
 nand-objs := nand_base.o nand_bbt.o
diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c
new file mode 100644
index 000000000000..600a76f5580e
--- /dev/null
+++ b/drivers/mtd/nand/sh_flctl.c
@@ -0,0 +1,301 @@
+/*
+ * SuperH FLCTL nand controller
+ *
+ * Copyright Â© 2008 Renesas Solutions Corp.
+ * Copyright Â© 2008 Atom Create Engineering Co., Ltd.
+ *
+ * Based on fsl_elbc_nand.c, Copyright Â© 2006-2007 Freescale Semiconductor
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/sh_flctl.h>
+
+static struct nand_ecclayout flctl_4secc_oob_16 = {
+	.eccbytes = 10,
+	.eccpos = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
+	.oobfree = {
+		{.offset = 12,
+		. length = 4} },
+};
+
+static struct nand_ecclayout flctl_4secc_oob_64 = {
+	.eccbytes = 10,
+	.eccpos = {48, 49, 50, 51, 52, 53, 54, 55, 56, 57},
+	.oobfree = {
+		{.offset = 60,
+		. length = 4} },
+};
+
+static uint8_t scan_ff_pattern[] = { 0xff, 0xff };
+
+static struct nand_bbt_descr flctl_4secc_smallpage = {
+	.options = NAND_BBT_SCAN2NDPAGE,
+	.offs = 11,
+	.len = 1,
+	.pattern = scan_ff_pattern,
+};
+
+static struct nand_bbt_descr flctl_4secc_largepage = {
+	.options = 0,
+	.offs = 58,
+	.len = 2,
+	.pattern = scan_ff_pattern,
+};
+
+static void empty_fifo(struct sh_flctl *flctl)
+{
+	writel(0x000c0000, FLINTDMACR(flctl));	/* FIFO Clear */
+	writel(0x00000000, FLINTDMACR(flctl));	/* Clear Error flags */
+}
+
+static void start_translation(struct sh_flctl *flctl)
+{
+	writeb(TRSTRT, FLTRCR(flctl));
+}
+
+static void wait_completion(struct sh_flctl *flctl)
+{
+	uint32_t timeout = LOOP_TIMEOUT_MAX;
+
+	while (timeout--) {
+		if (readb(FLTRCR(flctl)) & TREND) {
+			writeb(0x0, FLTRCR(flctl));
+			return;
+		}
+		udelay(1);
+	}
+
+	printk(KERN_ERR "wait_completion(): Timeout occured \n");
+	writeb(0x0, FLTRCR(flctl));
+}
+
+static void set_addr(struct mtd_info *mtd, int column, int page_addr)
+{
+	struct sh_flctl *flctl = mtd_to_flctl(mtd);
+	uint32_t addr = 0;
+
+	if (column == -1) {
+		addr = page_addr;	/* ERASE1 */
+	} else if (page_addr != -1) {
+		/* SEQIN, READ0, etc.. */
+		if (flctl->page_size) {
+			addr = column & 0x0FFF;
+			addr |= (page_addr & 0xff) << 16;
+			addr |= ((page_addr >> 8) & 0xff) << 24;
+			/* big than 128MB */
+			if (flctl->rw_ADRCNT == ADRCNT2_E) {
+				uint32_t 	addr2;
+				addr2 = (page_addr >> 16) & 0xff;
+				writel(addr2, FLADR2(flctl));
+			}
+		} else {
+			addr = column;
+			addr |= (page_addr & 0xff) << 8;
+			addr |= ((page_addr >> 8) & 0xff) << 16;
+			addr |= ((page_addr >> 16) & 0xff) << 24;
+		}
+	}
+	writel(addr, FLADR(flctl));
+}
+
+static void wait_rfifo_ready(struct sh_flctl *flctl)
+{
+	uint32_t timeout = LOOP_TIMEOUT_MAX;
+
+	while (timeout--) {
+		uint32_t val;
+		/* check FIFO */
+		val = readl(FLDTCNTR(flctl)) >> 16;
+		if (val & 0xFF)
+			return;
+		udelay(1);
+	}
+	printk(KERN_ERR "wait_rfifo_ready(): Timeout occured \n");
+}
+
+static void wait_wfifo_ready(struct sh_flctl *flctl)
+{
+	uint32_t len, timeout = LOOP_TIMEOUT_MAX;
+
+	while (timeout--) {
+		/* check FIFO */
+		len = (readl(FLDTCNTR(flctl)) >> 16) & 0xFF;
+		if (len >= 4)
+			return;
+		udelay(1);
+	}
+	printk(KERN_ERR "wait_wfifo_ready(): Timeout occured \n");
+}
+
+static int wait_recfifo_ready(struct sh_flctl *flctl)
+{
+	uint32_t timeout = LOOP_TIMEOUT_MAX;
+	int checked[4];
+	void __iomem *ecc_reg[4];
+	int i;
+	uint32_t data, size;
+
+	memset(checked, 0, sizeof(checked));
+
+	while (timeout--) {
+		size = readl(FLDTCNTR(flctl)) >> 24;
+		if (size & 0xFF)
+			return 0;	/* success */
+
+		if (readl(FL4ECCCR(flctl)) & _4ECCFA)
+			return 1;	/* can't correct */
+
+		udelay(1);
+		if (!(readl(FL4ECCCR(flctl)) & _4ECCEND))
+			continue;
+
+		/* start error correction */
+		ecc_reg[0] = FL4ECCRESULT0(flctl);
+		ecc_reg[1] = FL4ECCRESULT1(flctl);
+		ecc_reg[2] = FL4ECCRESULT2(flctl);
+		ecc_reg[3] = FL4ECCRESULT3(flctl);
+
+		for (i = 0; i < 3; i++) {
+			data = readl(ecc_reg[i]);
+			if (data != INIT_FL4ECCRESULT_VAL && !checked[i]) {
+				uint8_t org;
+				int index;
+
+				index = data >> 16;
+				org = flctl->done_buff[index];
+				flctl->done_buff[index] = org ^ (data & 0xFF);
+				checked[i] = 1;
+			}
+		}
+
+		writel(0, FL4ECCCR(flctl));
+	}
+
+	printk(KERN_ERR "wait_recfifo_ready(): Timeout occured \n");
+	return 1;	/* timeout */
+}
+
+static void wait_wecfifo_ready(struct sh_flctl *flctl)
+{
+	uint32_t timeout = LOOP_TIMEOUT_MAX;
+	uint32_t len;
+
+	while (timeout--) {
+		/* check FLECFIFO */
+		len = (readl(FLDTCNTR(flctl)) >> 24) & 0xFF;
+		if (len >= 4)
+			return;
+		udelay(1);
+	}
+	printk(KERN_ERR "wait_wecfifo_ready(): Timeout occured \n");
+}
+
+static void read_datareg(struct sh_flctl *flctl, int offset)
+{
+	unsigned long data;
+	unsigned long *buf = (unsigned long *)&flctl->done_buff[offset];
+
+	wait_completion(flctl);
+
+	data = readl(FLDATAR(flctl));
+	*buf = le32_to_cpu(data);
+}
+
+static void read_fiforeg(struct sh_flctl *flctl, int rlen, int offset)
+{
+	int i, len_4align;
+	unsigned long *buf = (unsigned long *)&flctl->done_buff[offset];
+	void *fifo_addr = (void *)FLDTFIFO(flctl);
+
+	len_4align = (rlen + 3) / 4;
+
+	for (i = 0; i < len_4align; i++) {
+		wait_rfifo_ready(flctl);
+		buf[i] = readl(fifo_addr);
+		buf[i] = be32_to_cpu(buf[i]);
+	}
+}
+
+static int read_ecfiforeg(struct sh_flctl *flctl, uint8_t *buff)
+{
+	int i;
+	unsigned long *ecc_buf = (unsigned long *)buff;
+	void *fifo_addr = (void *)FLECFIFO(flctl);
+
+	for (i = 0; i < 4; i++) {
+		if (wait_recfifo_ready(flctl))
+			return 1;
+		ecc_buf[i] = readl(fifo_addr);
+		ecc_buf[i] = be32_to_cpu(ecc_buf[i]);
+	}
+
+	return 0;
+}
+
+static void write_fiforeg(struct sh_flctl *flctl, int rlen, int offset)
+{
+	int i, len_4align;
+	unsigned long *data = (unsigned long *)&flctl->done_buff[offset];
+	void *fifo_addr = (void *)FLDTFIFO(flctl);
+
+	len_4align = (rlen + 3) / 4;
+	for (i = 0; i < len_4align; i++) {
+		wait_wfifo_ready(flctl);
+		writel(cpu_to_be32(data[i]), fifo_addr);
+	}
+}
+
+static void set_cmd_regs(struct mtd_info *mtd, uint32_t cmd, uint32_t flcmcdr_val)
+{
+	struct sh_flctl *flctl = mtd_to_flctl(mtd);
+	uint32_t flcmncr_val = readl(FLCMNCR(flctl));
+	uint32_t flcmdcr_val, addr_len_bytes = 0;
+
+	/* Set SNAND bit if page size is 2048byte */
+	if (flctl->page_size)
+		flcmncr_val |= SNAND_E;
+	else
+		flcmncr_val &= ~SNAND_E;
+
+	/* default FLCMDCR val */
+	flcmdcr_val = DOCMD1_E | DOADR_E;
+
+	/* Set for FLCMDCR */
+	switch (cmd) {
+	case NAND_CMD_ERASE1:
+		addr_len_bytes = flctl->erase_ADRCNT;
+		flcmdcr_val |= DOCMD2_E;
+		break;
+	case NAND_CMD_READ0:
+	case NAND_CMD_READOOB:
+		addr_len_bytes = flctl->rw_ADRCNT;
+		flcmdcr_val |= CDSRC_E;
+		break;
+	case NAND_CMD_SEQIN:
+		/* This case is that cmd is READ0 or READ1 or READ00 */
+		flcmdcr_val &= ~DOADR_E;	/* ONLY execute 1st cmd */
+		break;
+	case NAND_CMD_PAGEPROG:
+		addr_len_bytes = flctl->rw_ADRCNT;
diff --git a/include/linux/mtd/sh_flctl.h b/include/linux/mtd/sh_flctl.h
new file mode 100644
index 000000000000..e77c1cea404d
--- /dev/null
+++ b/include/linux/mtd/sh_flctl.h
@@ -0,0 +1,125 @@
+/*
+ * SuperH FLCTL nand controller
+ *
+ * Copyright Â© 2008 Renesas Solutions Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#ifndef __SH_FLCTL_H__
+#define __SH_FLCTL_H__
+
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/partitions.h>
+
+/* FLCTL registers */
+#define FLCMNCR(f)		(f->reg + 0x0)
+#define FLCMDCR(f)		(f->reg + 0x4)
+#define FLCMCDR(f)		(f->reg + 0x8)
+#define FLADR(f)		(f->reg + 0xC)
+#define FLADR2(f)		(f->reg + 0x3C)
+#define FLDATAR(f)		(f->reg + 0x10)
+#define FLDTCNTR(f)		(f->reg + 0x14)
+#define FLINTDMACR(f)		(f->reg + 0x18)
+#define FLBSYTMR(f)		(f->reg + 0x1C)
+#define FLBSYCNT(f)		(f->reg + 0x20)
+#define FLDTFIFO(f)		(f->reg + 0x24)
+#define FLECFIFO(f)		(f->reg + 0x28)
+#define FLTRCR(f)		(f->reg + 0x2C)
+#define	FL4ECCRESULT0(f)	(f->reg + 0x80)
+#define	FL4ECCRESULT1(f)	(f->reg + 0x84)
+#define	FL4ECCRESULT2(f)	(f->reg + 0x88)
+#define	FL4ECCRESULT3(f)	(f->reg + 0x8C)
+#define	FL4ECCCR(f)		(f->reg + 0x90)
+#define	FL4ECCCNT(f)		(f->reg + 0x94)
+#define	FLERRADR(f)		(f->reg + 0x98)
+
+/* FLCMNCR control bits */
+#define ECCPOS2		(0x1 << 25)
+#define _4ECCCNTEN	(0x1 << 24)
+#define _4ECCEN		(0x1 << 23)
+#define _4ECCCORRECT	(0x1 << 22)
+#define SNAND_E		(0x1 << 18)	/* SNAND (0=512 1=2048)*/
+#define QTSEL_E		(0x1 << 17)
+#define ENDIAN		(0x1 << 16)	/* 1 = little endian */
+#define FCKSEL_E	(0x1 << 15)
+#define ECCPOS_00	(0x00 << 12)
+#define ECCPOS_01	(0x01 << 12)
+#define ECCPOS_02	(0x02 << 12)
+#define ACM_SACCES_MODE	(0x01 << 10)
+#define NANWF_E		(0x1 << 9)
+#define SE_D		(0x1 << 8)	/* Spare area disable */
+#define	CE1_ENABLE	(0x1 << 4)	/* Chip Enable 1 */
+#define	CE0_ENABLE	(0x1 << 3)	/* Chip Enable 0 */
+#define	TYPESEL_SET	(0x1 << 0)
+
+/* FLCMDCR control bits */
+#define ADRCNT2_E	(0x1 << 31)	/* 5byte address enable */
+#define ADRMD_E		(0x1 << 26)	/* Sector address access */
+#define CDSRC_E		(0x1 << 25)	/* Data buffer selection */
+#define DOSR_E		(0x1 << 24)	/* Status read check */
+#define SELRW		(0x1 << 21)	/*  0:read 1:write */
+#define DOADR_E		(0x1 << 20)	/* Address stage execute */
+#define ADRCNT_1	(0x00 << 18)	/* Address data bytes: 1byte */
+#define ADRCNT_2	(0x01 << 18)	/* Address data bytes: 2byte */
+#define ADRCNT_3	(0x02 << 18)	/* Address data bytes: 3byte */
+#define ADRCNT_4	(0x03 << 18)	/* Address data bytes: 4byte */
+#define DOCMD2_E	(0x1 << 17)	/* 2nd cmd stage execute */
+#define DOCMD1_E	(0x1 << 16)	/* 1st cmd stage execute */
+
+/* FLTRCR control bits */
+#define TRSTRT		(0x1 << 0)	/* translation start */
+#define TREND		(0x1 << 1)	/* translation end */
+
+/* FL4ECCCR control bits */
+#define	_4ECCFA		(0x1 << 2)	/* 4 symbols correct fault */
+#define	_4ECCEND	(0x1 << 1)	/* 4 symbols end */
+#define	_4ECCEXST	(0x1 << 0)	/* 4 symbols exist */
+
+#define INIT_FL4ECCRESULT_VAL	0x03FF03FF
+#define LOOP_TIMEOUT_MAX	0x00010000
+
+#define mtd_to_flctl(mtd)	container_of(mtd, struct sh_flctl, mtd)
+
+struct sh_flctl {
+	struct mtd_info		mtd;
+	struct nand_chip	chip;
+	void __iomem		*reg;
+
+	uint8_t	done_buff[2048 + 64];	/* max size 2048 + 64 */
+	int	read_bytes;
+	int	index;
+	int	seqin_column;		/* column in SEQIN cmd */
+	int	seqin_page_addr;	/* page_addr in SEQIN cmd */
+	uint32_t seqin_read_cmd;		/* read cmd in SEQIN cmd */
+	int	erase1_page_addr;	/* page_addr in ERASE1 cmd */
+	uint32_t erase_ADRCNT;		/* bits of FLCMDCR in ERASE1 cmd */
+	uint32_t rw_ADRCNT;	/* bits of FLCMDCR in READ WRITE cmd */
+
+	int	hwecc_cant_correct[4];
+
+	unsigned page_size:1;	/* NAND page size (0 = 512, 1 = 2048) */
+	unsigned hwecc:1;	/* Hardware ECC (0 = disabled, 1 = enabled) */
+};
+
+struct sh_flctl_platform_data {
+	struct mtd_partition	*parts;
+	int			nr_parts;
+	unsigned long		flcmncr_val;
+
+	unsigned has_hwecc:1;
+};
+
+#endif	/* __SH_FLCTL_H__ */
-- 
GitLab


From 8dddfe192632361b62eee9c8320bc9feff57898b Mon Sep 17 00:00:00 2001
From: Pierre Ossman <drzeus@drzeus.cx>
Date: Tue, 14 Oct 2008 20:04:46 +0200
Subject: [PATCH 354/895] mmc_block: tell block layer there is no seek penalty

Signed-off-by: Pierre Ossman <drzeus@drzeus.cx>
---
 drivers/mmc/card/queue.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index 406989e992ba..7a72e75d5c67 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -132,6 +132,7 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock
 
 	blk_queue_prep_rq(mq->queue, mmc_prep_request);
 	blk_queue_ordered(mq->queue, QUEUE_ORDERED_DRAIN, NULL);
+	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue);
 
 #ifdef CONFIG_MMC_BLOCK_BOUNCE
 	if (host->max_hw_segs == 1) {
-- 
GitLab


From 7c2500f17d65092d93345f3996cf82ebca17e9ff Mon Sep 17 00:00:00 2001
From: Wim Van Sebroeck <wim@iguana.be>
Date: Wed, 15 Oct 2008 08:53:06 +0000
Subject: [PATCH 355/895] [WATCHDOG] ib700wdt.c - fix buffer_underflow bug

This fixes Bug 11399:
if ibwdt_set_heartbeat(int t) is called with value 30 then
the check "if ((t < 0) || (t > 30))" in ibwdt_set_heartbeat
is not going to fail because t == 30, but in the loop, the
check wd_times[i] > t is never going to be true because
none of the wd_times are greater than the value of t (i.e. 30).
So we are exiting the loop with i == -1 and therefore setting
wd_margin to -1 which is wrong.

Reported-by: Zvonimir Rakamaric <zrakamar@cs.ubc.ca>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/ib700wdt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/watchdog/ib700wdt.c b/drivers/watchdog/ib700wdt.c
index 05a28106e8eb..8782ec1f5aa0 100644
--- a/drivers/watchdog/ib700wdt.c
+++ b/drivers/watchdog/ib700wdt.c
@@ -154,7 +154,7 @@ static int ibwdt_set_heartbeat(int t)
 		return -EINVAL;
 
 	for (i = 0x0F; i > -1; i--)
-		if (wd_times[i] > t)
+		if (wd_times[i] >= t)
 			break;
 	wd_margin = i;
 	return 0;
-- 
GitLab


From 9c2e7e40bf85684eebc019e915c39c4c07c734fa Mon Sep 17 00:00:00 2001
From: "ben@fluff.org.uk" <ben@fluff.org.uk>
Date: Wed, 15 Oct 2008 00:17:15 +0100
Subject: [PATCH 356/895] s3cmci: Make general protocol errors less noisy

General errors, such as timeouts during probe do not need to
be sent to the console, so move them down to be included if the
debug is enabled.

Such errors include:
s3c2440-sdi s3c2440-sdi: s3cmci_request: no medium present

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Pierre Ossman <drzeus@drzeus.cx>
---
 drivers/mmc/host/s3cmci.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c
index ae16d845d746..2af630639910 100644
--- a/drivers/mmc/host/s3cmci.c
+++ b/drivers/mmc/host/s3cmci.c
@@ -39,9 +39,9 @@ enum dbg_channels {
 	dbg_conf  = (1 << 8),
 };
 
-static const int dbgmap_err   = dbg_err | dbg_fail;
+static const int dbgmap_err   = dbg_fail;
 static const int dbgmap_info  = dbg_info | dbg_conf;
-static const int dbgmap_debug = dbg_debug;
+static const int dbgmap_debug = dbg_err | dbg_debug;
 
 #define dbg(host, channels, args...)		  \
 	do {					  \
-- 
GitLab


From f87e6d00fbd367f2d61fd600b5f8bd6e39d63f3f Mon Sep 17 00:00:00 2001
From: "ben@fluff.org.uk" <ben@fluff.org.uk>
Date: Wed, 15 Oct 2008 00:17:16 +0100
Subject: [PATCH 357/895] s3cmci: cpufreq support

Support for cpu frequency changing.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Pierre Ossman <drzeus@drzeus.cx>
---
 drivers/mmc/host/s3cmci.c | 111 +++++++++++++++++++++++++++++++-------
 drivers/mmc/host/s3cmci.h |   4 ++
 2 files changed, 96 insertions(+), 19 deletions(-)

diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c
index 2af630639910..a73ffb9d7b21 100644
--- a/drivers/mmc/host/s3cmci.c
+++ b/drivers/mmc/host/s3cmci.c
@@ -13,6 +13,7 @@
 #include <linux/clk.h>
 #include <linux/mmc/host.h>
 #include <linux/platform_device.h>
+#include <linux/cpufreq.h>
 #include <linux/irq.h>
 #include <linux/io.h>
 
@@ -1033,10 +1034,33 @@ static void s3cmci_request(struct mmc_host *mmc, struct mmc_request *mrq)
 		s3cmci_send_request(mmc);
 }
 
+static void s3cmci_set_clk(struct s3cmci_host *host, struct mmc_ios *ios)
+{
+	u32 mci_psc;
+
+	/* Set clock */
+	for (mci_psc = 0; mci_psc < 255; mci_psc++) {
+		host->real_rate = host->clk_rate / (host->clk_div*(mci_psc+1));
+
+		if (host->real_rate <= ios->clock)
+			break;
+	}
+
+	if (mci_psc > 255)
+		mci_psc = 255;
+
+	host->prescaler = mci_psc;
+	writel(host->prescaler, host->base + S3C2410_SDIPRE);
+
+	/* If requested clock is 0, real_rate will be 0, too */
+	if (ios->clock == 0)
+		host->real_rate = 0;
+}
+
 static void s3cmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 {
 	struct s3cmci_host *host = mmc_priv(mmc);
-	u32 mci_psc, mci_con;
+	u32 mci_con;
 
 	/* Set the power state */
 
@@ -1074,23 +1098,7 @@ static void s3cmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 		break;
 	}
 
-	/* Set clock */
-	for (mci_psc = 0; mci_psc < 255; mci_psc++) {
-		host->real_rate = host->clk_rate / (host->clk_div*(mci_psc+1));
-
-		if (host->real_rate <= ios->clock)
-			break;
-	}
-
-	if (mci_psc > 255)
-		mci_psc = 255;
-
-	host->prescaler = mci_psc;
-	writel(host->prescaler, host->base + S3C2410_SDIPRE);
-
-	/* If requested clock is 0, real_rate will be 0, too */
-	if (ios->clock == 0)
-		host->real_rate = 0;
+	s3cmci_set_clk(host, ios);
 
 	/* Set CLOCK_ENABLE */
 	if (ios->clock)
@@ -1148,6 +1156,61 @@ static struct s3c24xx_mci_pdata s3cmci_def_pdata = {
 	 * checks. Any zero fields to ensure reaonable defaults are picked. */
 };
 
+#ifdef CONFIG_CPU_FREQ
+
+static int s3cmci_cpufreq_transition(struct notifier_block *nb,
+				     unsigned long val, void *data)
+{
+	struct s3cmci_host *host;
+	struct mmc_host *mmc;
+	unsigned long newclk;
+	unsigned long flags;
+
+	host = container_of(nb, struct s3cmci_host, freq_transition);
+	newclk = clk_get_rate(host->clk);
+	mmc = host->mmc;
+
+	if ((val == CPUFREQ_PRECHANGE && newclk > host->clk_rate) ||
+	    (val == CPUFREQ_POSTCHANGE && newclk < host->clk_rate)) {
+		spin_lock_irqsave(&mmc->lock, flags);
+
+		host->clk_rate = newclk;
+
+		if (mmc->ios.power_mode != MMC_POWER_OFF &&
+		    mmc->ios.clock != 0)
+			s3cmci_set_clk(host, &mmc->ios);
+
+		spin_unlock_irqrestore(&mmc->lock, flags);
+	}
+
+	return 0;
+}
+
+static inline int s3cmci_cpufreq_register(struct s3cmci_host *host)
+{
+	host->freq_transition.notifier_call = s3cmci_cpufreq_transition;
+
+	return cpufreq_register_notifier(&host->freq_transition,
+					 CPUFREQ_TRANSITION_NOTIFIER);
+}
+
+static inline void s3cmci_cpufreq_deregister(struct s3cmci_host *host)
+{
+	cpufreq_unregister_notifier(&host->freq_transition,
+				    CPUFREQ_TRANSITION_NOTIFIER);
+}
+
+#else
+static inline int s3cmci_cpufreq_register(struct s3cmci_host *host)
+{
+	return 0;
+}
+
+static inline void s3cmci_cpufreq_deregister(struct s3cmci_host *host)
+{
+}
+#endif
+
 static int __devinit s3cmci_probe(struct platform_device *pdev, int is2440)
 {
 	struct s3cmci_host *host;
@@ -1298,10 +1361,16 @@ static int __devinit s3cmci_probe(struct platform_device *pdev, int is2440)
 	    (host->is2440?"2440":""),
 	    host->base, host->irq, host->irq_cd, host->dma);
 
+	ret = s3cmci_cpufreq_register(host);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to register cpufreq\n");
+		goto free_dmabuf;
+	}
+
 	ret = mmc_add_host(mmc);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to add mmc host.\n");
-		goto free_dmabuf;
+		goto free_cpufreq;
 	}
 
 	platform_set_drvdata(pdev, mmc);
@@ -1309,6 +1378,9 @@ static int __devinit s3cmci_probe(struct platform_device *pdev, int is2440)
 
 	return 0;
 
+ free_cpufreq:
+	s3cmci_cpufreq_deregister(host);
+
  free_dmabuf:
 	clk_disable(host->clk);
 
@@ -1342,6 +1414,7 @@ static void s3cmci_shutdown(struct platform_device *pdev)
 	if (host->irq_cd >= 0)
 		free_irq(host->irq_cd, host);
 
+	s3cmci_cpufreq_deregister(host);
 	mmc_remove_host(mmc);
 	clk_disable(host->clk);
 }
diff --git a/drivers/mmc/host/s3cmci.h b/drivers/mmc/host/s3cmci.h
index 37d9c60010c9..7e39109587f9 100644
--- a/drivers/mmc/host/s3cmci.h
+++ b/drivers/mmc/host/s3cmci.h
@@ -67,4 +67,8 @@ struct s3cmci_host {
 
 	unsigned int		ccnt, dcnt;
 	struct tasklet_struct	pio_tasklet;
+
+#ifdef CONFIG_CPU_FREQ
+	struct notifier_block	freq_transition;
+#endif
 };
-- 
GitLab


From 088a78af978d0c8e339071a9b2bca1f4cb368f30 Mon Sep 17 00:00:00 2001
From: Christer Weinigel <christer@weinigel.se>
Date: Wed, 15 Oct 2008 00:17:17 +0100
Subject: [PATCH 358/895] s3cmci: Support transfers which are not multiple of
 32 bits.

To be able to do SDIO the s3cmci driver has to support non-word-sized
transfers.  Change pio_words into pio_bytes and fix up all the places
where it is used.

This variant of the patch will not overrun the buffer when reading an
odd number of bytes.  When writing, this variant will still read past
the end of the buffer, but since the driver can't support non-word-
aligned transfers anyway, this should not be a problem, since a
word-aligned transfer will never cross a page boundary.

This has been tested with a CSR SDIO Bluetooth Type A device on a
Samsung S3C24A0 processor.

Signed-off-by: Christer Weinigel <christer@weinigel.se>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Pierre Ossman <drzeus@drzeus.cx>
---
 drivers/mmc/host/s3cmci.c | 80 ++++++++++++++++++++++++++-------------
 drivers/mmc/host/s3cmci.h |  2 +-
 2 files changed, 54 insertions(+), 28 deletions(-)

diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c
index a73ffb9d7b21..bb412331e3d7 100644
--- a/drivers/mmc/host/s3cmci.c
+++ b/drivers/mmc/host/s3cmci.c
@@ -190,7 +190,7 @@ static inline void clear_imask(struct s3cmci_host *host)
 }
 
 static inline int get_data_buffer(struct s3cmci_host *host,
-				  u32 *words, u32 **pointer)
+				  u32 *bytes, u32 **pointer)
 {
 	struct scatterlist *sg;
 
@@ -207,7 +207,7 @@ static inline int get_data_buffer(struct s3cmci_host *host,
 	}
 	sg = &host->mrq->data->sg[host->pio_sgptr];
 
-	*words = sg->length >> 2;
+	*bytes = sg->length;
 	*pointer = sg_virt(sg);
 
 	host->pio_sgptr++;
@@ -223,7 +223,7 @@ static inline u32 fifo_count(struct s3cmci_host *host)
 	u32 fifostat = readl(host->base + S3C2410_SDIFSTA);
 
 	fifostat &= S3C2410_SDIFSTA_COUNTMASK;
-	return fifostat >> 2;
+	return fifostat;
 }
 
 static inline u32 fifo_free(struct s3cmci_host *host)
@@ -231,13 +231,14 @@ static inline u32 fifo_free(struct s3cmci_host *host)
 	u32 fifostat = readl(host->base + S3C2410_SDIFSTA);
 
 	fifostat &= S3C2410_SDIFSTA_COUNTMASK;
-	return (63 - fifostat) >> 2;
+	return 63 - fifostat;
 }
 
 static void do_pio_read(struct s3cmci_host *host)
 {
 	int res;
 	u32 fifo;
+	u32 fifo_words;
 	void __iomem *from_ptr;
 
 	/* write real prescaler to host, it might be set slow to fix */
@@ -246,8 +247,8 @@ static void do_pio_read(struct s3cmci_host *host)
 	from_ptr = host->base + host->sdidata;
 
 	while ((fifo = fifo_count(host))) {
-		if (!host->pio_words) {
-			res = get_data_buffer(host, &host->pio_words,
+		if (!host->pio_bytes) {
+			res = get_data_buffer(host, &host->pio_bytes,
 					      &host->pio_ptr);
 			if (res) {
 				host->pio_active = XFER_NONE;
@@ -260,26 +261,45 @@ static void do_pio_read(struct s3cmci_host *host)
 
 			dbg(host, dbg_pio,
 			    "pio_read(): new target: [%i]@[%p]\n",
-			    host->pio_words, host->pio_ptr);
+			    host->pio_bytes, host->pio_ptr);
 		}
 
 		dbg(host, dbg_pio,
 		    "pio_read(): fifo:[%02i] buffer:[%03i] dcnt:[%08X]\n",
-		    fifo, host->pio_words,
+		    fifo, host->pio_bytes,
 		    readl(host->base + S3C2410_SDIDCNT));
 
-		if (fifo > host->pio_words)
-			fifo = host->pio_words;
+		/* If we have reached the end of the block, we can
+		 * read a word and get 1 to 3 bytes.  If we in the
+		 * middle of the block, we have to read full words,
+		 * otherwise we will write garbage, so round down to
+		 * an even multiple of 4. */
+		if (fifo >= host->pio_bytes)
+			fifo = host->pio_bytes;
+		else
+			fifo -= fifo & 3;
 
-		host->pio_words -= fifo;
+		host->pio_bytes -= fifo;
 		host->pio_count += fifo;
 
-		while (fifo--)
+		fifo_words = fifo >> 2;
+		while (fifo_words--)
 			*(host->pio_ptr++) = readl(from_ptr);
+
+		if (fifo & 3) {
+			u32 n = fifo & 3;
+			u32 data = readl(from_ptr);
+			u8 *p = (u8 *)host->pio_ptr;
+
+			while (n--) {
+				*p++ = data;
+				data >>= 8;
+			}
+		}
 	}
 
-	if (!host->pio_words) {
-		res = get_data_buffer(host, &host->pio_words, &host->pio_ptr);
+	if (!host->pio_bytes) {
+		res = get_data_buffer(host, &host->pio_bytes, &host->pio_ptr);
 		if (res) {
 			dbg(host, dbg_pio,
 			    "pio_read(): complete (no more buffers).\n");
@@ -303,8 +323,8 @@ static void do_pio_write(struct s3cmci_host *host)
 	to_ptr = host->base + host->sdidata;
 
 	while ((fifo = fifo_free(host))) {
-		if (!host->pio_words) {
-			res = get_data_buffer(host, &host->pio_words,
+		if (!host->pio_bytes) {
+			res = get_data_buffer(host, &host->pio_bytes,
 							&host->pio_ptr);
 			if (res) {
 				dbg(host, dbg_pio,
@@ -316,16 +336,23 @@ static void do_pio_write(struct s3cmci_host *host)
 
 			dbg(host, dbg_pio,
 			    "pio_write(): new source: [%i]@[%p]\n",
-			    host->pio_words, host->pio_ptr);
+			    host->pio_bytes, host->pio_ptr);
 
 		}
 
-		if (fifo > host->pio_words)
-			fifo = host->pio_words;
+		/* If we have reached the end of the block, we have to
+		 * write exactly the remaining number of bytes.  If we
+		 * in the middle of the block, we have to write full
+		 * words, so round down to an even multiple of 4. */
+		if (fifo >= host->pio_bytes)
+			fifo = host->pio_bytes;
+		else
+			fifo -= fifo & 3;
 
-		host->pio_words -= fifo;
+		host->pio_bytes -= fifo;
 		host->pio_count += fifo;
 
+		fifo = (fifo + 3) >> 2;
 		while (fifo--)
 			writel(*(host->pio_ptr++), to_ptr);
 	}
@@ -350,9 +377,9 @@ static void pio_tasklet(unsigned long data)
 		clear_imask(host);
 		if (host->pio_active != XFER_NONE) {
 			dbg(host, dbg_err, "unfinished %s "
-			    "- pio_count:[%u] pio_words:[%u]\n",
+			    "- pio_count:[%u] pio_bytes:[%u]\n",
 			    (host->pio_active == XFER_READ) ? "read" : "write",
-			    host->pio_count, host->pio_words);
+			    host->pio_count, host->pio_bytes);
 
 			if (host->mrq->data)
 				host->mrq->data->error = -EINVAL;
@@ -813,11 +840,10 @@ static int s3cmci_setup_data(struct s3cmci_host *host, struct mmc_data *data)
 		/* We cannot deal with unaligned blocks with more than
 		 * one block being transfered. */
 
-		if (data->blocks > 1)
+		if (data->blocks > 1) {
+			pr_warning("%s: can't do non-word sized block transfers (blksz %d)\n", __func__, data->blksz);
 			return -EINVAL;
-
-		/* No support yet for non-word block transfers. */
-		return -EINVAL;
+		}
 	}
 
 	while (readl(host->base + S3C2410_SDIDSTA) &
@@ -897,7 +923,7 @@ static int s3cmci_prepare_pio(struct s3cmci_host *host, struct mmc_data *data)
 	BUG_ON((data->flags & BOTH_DIR) == BOTH_DIR);
 
 	host->pio_sgptr = 0;
-	host->pio_words = 0;
+	host->pio_bytes = 0;
 	host->pio_count = 0;
 	host->pio_active = rw ? XFER_WRITE : XFER_READ;
 
diff --git a/drivers/mmc/host/s3cmci.h b/drivers/mmc/host/s3cmci.h
index 7e39109587f9..ca1ba3d58cfd 100644
--- a/drivers/mmc/host/s3cmci.h
+++ b/drivers/mmc/host/s3cmci.h
@@ -51,7 +51,7 @@ struct s3cmci_host {
 	int			dma_complete;
 
 	u32			pio_sgptr;
-	u32			pio_words;
+	u32			pio_bytes;
 	u32			pio_count;
 	u32			*pio_ptr;
 #define XFER_NONE 0
-- 
GitLab


From 18280fff663b8ba57e349a81b999604bc1106926 Mon Sep 17 00:00:00 2001
From: "ben@fluff.org.uk" <ben@fluff.org.uk>
Date: Wed, 15 Oct 2008 00:17:18 +0100
Subject: [PATCH 359/895] s3cmci: fix continual accesses to host->pio_ptr

The s3cmci driver uses the host->pio_ptr field to
point to the current position into the buffer for data
transfer. During the transfers it does the following:

	while (fifo_words--)
		*(host->pio_ptr++) = readl(from_ptr);

This is inefficent, as host->pio_ptr is not used in any
other part of the transfer but the compiler emits code
which does the following:

	while (fifo_words--) {
		u32 *ptr = host->pio_ptr;
		*ptr = readl(from_ptr);
		ptr++;
		host->pio_ptr = ptr;
	}

This is obviously a waste of a load and store each time
around the loop, which could be up to 16 times depending
on how much needs to be transfered.

Move the ptr accesses to outside the while loop so that
we do not end up reloading/re-writing the pointer.

Note, this seems to make the code 16 bytes larger.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Pierre Ossman <drzeus@drzeus.cx>
---
 drivers/mmc/host/s3cmci.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c
index bb412331e3d7..5211d90d34ef 100644
--- a/drivers/mmc/host/s3cmci.c
+++ b/drivers/mmc/host/s3cmci.c
@@ -238,6 +238,7 @@ static void do_pio_read(struct s3cmci_host *host)
 {
 	int res;
 	u32 fifo;
+	u32 *ptr;
 	u32 fifo_words;
 	void __iomem *from_ptr;
 
@@ -283,8 +284,10 @@ static void do_pio_read(struct s3cmci_host *host)
 		host->pio_count += fifo;
 
 		fifo_words = fifo >> 2;
+		ptr = host->pio_ptr;
 		while (fifo_words--)
-			*(host->pio_ptr++) = readl(from_ptr);
+			*ptr++ = readl(from_ptr);
+		host->pio_ptr = ptr;
 
 		if (fifo & 3) {
 			u32 n = fifo & 3;
@@ -319,6 +322,7 @@ static void do_pio_write(struct s3cmci_host *host)
 	void __iomem *to_ptr;
 	int res;
 	u32 fifo;
+	u32 *ptr;
 
 	to_ptr = host->base + host->sdidata;
 
@@ -353,8 +357,10 @@ static void do_pio_write(struct s3cmci_host *host)
 		host->pio_count += fifo;
 
 		fifo = (fifo + 3) >> 2;
+		ptr = host->pio_ptr;
 		while (fifo--)
-			writel(*(host->pio_ptr++), to_ptr);
+			writel(*ptr++, to_ptr);
+		host->pio_ptr = ptr;
 	}
 
 	enable_imask(host, S3C2410_SDIIMSK_TXFIFOHALF);
-- 
GitLab


From 08c55e22df26ef1ae8cbe53fbca42476f18a8fdb Mon Sep 17 00:00:00 2001
From: "ben@fluff.org.uk" <ben@fluff.org.uk>
Date: Wed, 15 Oct 2008 00:17:19 +0100
Subject: [PATCH 360/895] s3cmci: Add Ben Dooks/Simtec Electronics to header &
 copyright

Since the original authour (Thomas Kleffel) has been too busy to
merge the s3cmci driver and keep it up to date, I (mostly as part
of my role with Simtec Electronics) got the driver to a mergable
state and have been maintaining it since I think that I should
be added to the header. Also add a copyright statement for the
new work.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Pierre Ossman <drzeus@drzeus.cx>
---
 drivers/mmc/host/s3cmci.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c
index 5211d90d34ef..3b2085b57769 100644
--- a/drivers/mmc/host/s3cmci.c
+++ b/drivers/mmc/host/s3cmci.c
@@ -3,6 +3,9 @@
  *
  *  Copyright (C) 2004-2006 maintech GmbH, Thomas Kleffel <tk@maintech.de>
  *
+ * Current driver maintained by Ben Dooks and Simtec Electronics
+ *  Copyright (C) 2008 Simtec Electronics <ben-linux@fluff.org>
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
@@ -1560,7 +1563,7 @@ module_exit(s3cmci_exit);
 
 MODULE_DESCRIPTION("Samsung S3C MMC/SD Card Interface driver");
 MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Thomas Kleffel <tk@maintech.de>");
+MODULE_AUTHOR("Thomas Kleffel <tk@maintech.de>, Ben Dooks <ben-linux@fluff.org>");
 MODULE_ALIAS("platform:s3c2410-sdi");
 MODULE_ALIAS("platform:s3c2412-sdi");
 MODULE_ALIAS("platform:s3c2440-sdi");
-- 
GitLab


From 73bdf0a60e607f4b8ecc5aec597105976565a84f Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 15 Oct 2008 08:35:12 -0700
Subject: [PATCH 361/895] Introduce is_vmalloc_or_module_addr() and use with
 DEBUG_VIRTUAL

Impact: crash on module insertion with CONFIG_DEBUG_VIRTUAL

We would incorrectly BUG due to:

   VIRTUAL_BUG_ON(!is_vmalloc_addr(vmalloc_addr) &&
   	          !is_module_address(addr));

... because, at least on x86-64, is_module_address() doesn't do what
it should.  This patch introduces is_vmalloc_or_module_addr(), which
is what we really want anyway, and uses it instead.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 mm/vmalloc.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index bba06c41fc59..f018d7e0addb 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -168,6 +168,21 @@ int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
 }
 EXPORT_SYMBOL_GPL(map_vm_area);
 
+static inline int is_vmalloc_or_module_addr(const void *x)
+{
+	/*
+	 * x86-64 and sparc64 put modules in a special place,
+	 * and fall back on vmalloc() if that fails. Others
+	 * just put it in the vmalloc space.
+	 */
+#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
+	unsigned long addr = (unsigned long)x;
+	if (addr >= MODULES_VADDR && addr < MODULES_END)
+		return 1;
+#endif
+	return is_vmalloc_addr(x);
+}
+
 /*
  * Map a vmalloc()-space virtual address to the physical page.
  */
@@ -184,8 +199,7 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
 	 * XXX we might need to change this if we add VIRTUAL_BUG_ON for
 	 * architectures that do not vmalloc module space
 	 */
-	VIRTUAL_BUG_ON(!is_vmalloc_addr(vmalloc_addr) &&
-			!is_module_address(addr));
+	VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr));
 
 	if (!pgd_none(*pgd)) {
 		pud = pud_offset(pgd, addr);
-- 
GitLab


From 463baa8a0947f858d6db1c56d87eeaf1176ba7bb Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 16 Oct 2008 20:29:07 +1100
Subject: [PATCH 362/895] powerpc: fix linux-next build failure

Today's linux-next build (powerpc allyesconfig) failed like this:

In file included from arch/powerpc/include/asm/mmu-hash64.h:17,
                 from arch/powerpc/include/asm/mmu.h:8,
                 from arch/powerpc/include/asm/pgtable.h:8,
                 from arch/powerpc/mm/slb.c:20:
arch/powerpc/include/asm/page.h:76: error: expected '=', ',', ';', 'asm' or '__attribute__' before 'memstart_addr'
arch/powerpc/include/asm/page.h:77: error: expected '=', ',', ';', 'asm' or '__attribute__' before 'kernstart_addr'

Caused by commit 600715dcdf567c86f8b2c6173fcfb4b873e25a19 ("generic: add
phys_addr_t for holding physical addresses") from the tip-core tree.
This only fails if CONFIG_RELOCATABLE is set.

So include that instead of asm/types.h in asm/page.h for
the CONFIG_RELOCATABLE case.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: ppc-dev <linuxppc-dev@ozlabs.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/include/asm/page.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index e088545cb3f5..94fe5138b30f 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -10,9 +10,13 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+#else
+#include <asm/types.h>
+#endif
 #include <asm/asm-compat.h>
 #include <asm/kdump.h>
-#include <asm/types.h>
 
 /*
  * On PPC32 page size is 4K. For PPC64 we support either 4K or 64K software
-- 
GitLab


From 769415c61191bc860f60c6edc3cb7cba24fb3218 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Thu, 16 Oct 2008 16:08:56 +0200
Subject: [PATCH 363/895] fuse: fix SEEK_END incorrectness

Update file size before using it in lseek(..., SEEK_END).

Reported-by: Amnon Shiloh <u3557@miso.sublimeip.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/file.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 2bada6bbc317..98079aa800e8 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1448,6 +1448,9 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
 	mutex_lock(&inode->i_mutex);
 	switch (origin) {
 	case SEEK_END:
+		retval = fuse_update_attributes(inode, NULL, file, NULL);
+		if (retval)
+			return retval;
 		offset += i_size_read(inode);
 		break;
 	case SEEK_CUR:
-- 
GitLab


From 17e18ab6ff6ec44e95514c7346d2cbd0363ef640 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Thu, 16 Oct 2008 16:08:56 +0200
Subject: [PATCH 364/895] fuse: add missing fuse_request_free

The error handling code for the second call to fuse_request_alloc should
include freeing the result of the first one.

This bug was found by the Coccinelle project:

  http://www.emn.fr/x-info/coccinelle/

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/inode.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 6a84388cacff..54b1f0e1ef58 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -865,7 +865,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 	if (is_bdev) {
 		fc->destroy_req = fuse_request_alloc();
 		if (!fc->destroy_req)
-			goto err_put_root;
+			goto err_free_init_req;
 	}
 
 	mutex_lock(&fuse_mutex);
@@ -895,6 +895,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 
  err_unlock:
 	mutex_unlock(&fuse_mutex);
+ err_free_init_req:
 	fuse_request_free(init_req);
  err_put_root:
 	dput(root_dentry);
-- 
GitLab


From 37194d0723b9b68b4b299b2564ca99e3d0a094c3 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Thu, 16 Oct 2008 16:08:57 +0200
Subject: [PATCH 365/895] fuse: config description improvement

Make the short description of the FUSE_FS config option clearer.

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/Kconfig b/fs/Kconfig
index 9e9d70c02a07..9c43045ebbf9 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -665,7 +665,7 @@ config AUTOFS4_FS
 	  N here.
 
 config FUSE_FS
-	tristate "Filesystem in Userspace support"
+	tristate "FUSE (Filesystem in Userspace) support"
 	help
 	  With FUSE it is possible to implement a fully functional filesystem
 	  in a userspace program.
-- 
GitLab


From 29d434b39c807320fbe4bcdce0ab98a0b9fcb285 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 16 Oct 2008 16:08:57 +0200
Subject: [PATCH 366/895] fuse: add include protectors

Add include protectors to include/linux/fuse.h and fs/fuse/fuse_i.h.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/fuse_i.h     | 5 +++++
 include/linux/fuse.h | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 3a876076bdd1..35accfdd747f 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -6,6 +6,9 @@
   See the file COPYING.
 */
 
+#ifndef _FS_FUSE_I_H
+#define _FS_FUSE_I_H
+
 #include <linux/fuse.h>
 #include <linux/fs.h>
 #include <linux/mount.h>
@@ -655,3 +658,5 @@ void fuse_set_nowrite(struct inode *inode);
 void fuse_release_nowrite(struct inode *inode);
 
 u64 fuse_get_attr_version(struct fuse_conn *fc);
+
+#endif /* _FS_FUSE_I_H */
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 265635dc9908..8bc1101e9b35 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -19,6 +19,9 @@
  *  - add file flags field to fuse_read_in and fuse_write_in
  */
 
+#ifndef _LINUX_FUSE_H
+#define _LINUX_FUSE_H
+
 #include <asm/types.h>
 #include <linux/major.h>
 
@@ -409,3 +412,5 @@ struct fuse_dirent {
 #define FUSE_DIRENT_ALIGN(x) (((x) + sizeof(__u64) - 1) & ~(sizeof(__u64) - 1))
 #define FUSE_DIRENT_SIZE(d) \
 	FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen)
+
+#endif /* _LINUX_FUSE_H */
-- 
GitLab


From a7c1b990f71574e077b94ce4582e2cf11cb891fe Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 16 Oct 2008 16:08:57 +0200
Subject: [PATCH 367/895] fuse: implement nonseekable open

Let the client request nonseekable open using FOPEN_NONSEEKABLE and
call nonseekable_open() on the file if requested.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/file.c       | 2 ++
 include/linux/fuse.h | 7 ++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 98079aa800e8..34930a964b82 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -101,6 +101,8 @@ void fuse_finish_open(struct inode *inode, struct file *file,
 		file->f_op = &fuse_direct_io_file_operations;
 	if (!(outarg->open_flags & FOPEN_KEEP_CACHE))
 		invalidate_inode_pages2(inode->i_mapping);
+	if (outarg->open_flags & FOPEN_NONSEEKABLE)
+		nonseekable_open(inode, file);
 	ff->fh = outarg->fh;
 	file->private_data = fuse_file_get(ff);
 }
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 8bc1101e9b35..350fe9767bbc 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -17,6 +17,9 @@
  *  - add lock_owner field to fuse_setattr_in, fuse_read_in and fuse_write_in
  *  - add blksize field to fuse_attr
  *  - add file flags field to fuse_read_in and fuse_write_in
+ *
+ * 7.10
+ *  - add nonseekable open flag
  */
 
 #ifndef _LINUX_FUSE_H
@@ -29,7 +32,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 9
+#define FUSE_KERNEL_MINOR_VERSION 10
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -101,9 +104,11 @@ struct fuse_file_lock {
  *
  * FOPEN_DIRECT_IO: bypass page cache for this open file
  * FOPEN_KEEP_CACHE: don't invalidate the data cache on open
+ * FOPEN_NONSEEKABLE: the file is not seekable
  */
 #define FOPEN_DIRECT_IO		(1 << 0)
 #define FOPEN_KEEP_CACHE	(1 << 1)
+#define FOPEN_NONSEEKABLE	(1 << 2)
 
 /**
  * INIT request/reply flags
-- 
GitLab


From 25db8ad5c56700e7716fe23426b16c5e3b1674b4 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Tue, 19 Aug 2008 20:49:40 -0700
Subject: [PATCH 368/895] serial, 8250: remove NR_IRQ usage

Works on my test box with a quick test.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/serial/68328serial.c | 11 ++----
 drivers/serial/8250.c        | 67 +++++++++++++++++++++++++++++-------
 2 files changed, 57 insertions(+), 21 deletions(-)

diff --git a/drivers/serial/68328serial.c b/drivers/serial/68328serial.c
index 381b12ac20e0..d935b2d04f93 100644
--- a/drivers/serial/68328serial.c
+++ b/drivers/serial/68328serial.c
@@ -66,7 +66,6 @@
 #endif
 
 static struct m68k_serial m68k_soft[NR_PORTS];
-struct m68k_serial *IRQ_ports[NR_IRQS];
 
 static unsigned int uart_irqs[NR_PORTS] = UART_IRQ_DEFNS;
 
@@ -375,15 +374,11 @@ clear_and_return:
  */
 irqreturn_t rs_interrupt(int irq, void *dev_id)
 {
-	struct m68k_serial * info;
+	struct m68k_serial *info = dev_id;
 	m68328_uart *uart;
 	unsigned short rx;
 	unsigned short tx;
 
-	info = IRQ_ports[irq];
-	if(!info)
-	    return IRQ_NONE;
-
 	uart = &uart_addr[info->line];
 	rx = uart->urx.w;
 
@@ -1383,8 +1378,6 @@ rs68328_init(void)
 		   info->port, info->irq);
 	    printk(" is a builtin MC68328 UART\n");
 	    
-	    IRQ_ports[info->irq] = info;	/* waste of space */
-
 #ifdef CONFIG_M68VZ328
 		if (i > 0 )
 			PJSEL &= 0xCF;  /* PSW enable second port output */
@@ -1393,7 +1386,7 @@ rs68328_init(void)
 	    if (request_irq(uart_irqs[i],
 			    rs_interrupt,
 			    IRQF_DISABLED,
-			    "M68328_UART", NULL))
+			    "M68328_UART", info))
                 panic("Unable to attach 68328 serial interrupt\n");
 	}
 	local_irq_restore(flags);
diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index d3ca7d32abe0..c66bb44c4747 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -156,11 +156,15 @@ struct uart_8250_port {
 };
 
 struct irq_info {
-	spinlock_t		lock;
+	struct			hlist_node node;
+	int			irq;
+	spinlock_t		lock;	/* Protects list not the hash */
 	struct list_head	*head;
 };
 
-static struct irq_info irq_lists[NR_IRQS];
+#define NR_IRQ_HASH		32	/* Can be adjusted later */
+static struct hlist_head irq_lists[NR_IRQ_HASH];
+static DEFINE_MUTEX(hash_mutex);	/* Used to walk the hash */
 
 /*
  * Here we define the default xmit fifo size used for each type of UART.
@@ -1545,15 +1549,43 @@ static void serial_do_unlink(struct irq_info *i, struct uart_8250_port *up)
 		BUG_ON(i->head != &up->list);
 		i->head = NULL;
 	}
-
 	spin_unlock_irq(&i->lock);
+	/* List empty so throw away the hash node */
+	if (i->head == NULL) {
+		hlist_del(&i->node);
+		kfree(i);
+	}
 }
 
 static int serial_link_irq_chain(struct uart_8250_port *up)
 {
-	struct irq_info *i = irq_lists + up->port.irq;
+	struct hlist_head *h;
+	struct hlist_node *n;
+	struct irq_info *i;
 	int ret, irq_flags = up->port.flags & UPF_SHARE_IRQ ? IRQF_SHARED : 0;
 
+	mutex_lock(&hash_mutex);
+
+	h = &irq_lists[up->port.irq % NR_IRQ_HASH];
+
+	hlist_for_each(n, h) {
+		i = hlist_entry(n, struct irq_info, node);
+		if (i->irq == up->port.irq)
+			break;
+	}
+
+	if (n == NULL) {
+		i = kzalloc(sizeof(struct irq_info), GFP_KERNEL);
+		if (i == NULL) {
+			mutex_unlock(&hash_mutex);
+			return -ENOMEM;
+		}
+		spin_lock_init(&i->lock);
+		i->irq = up->port.irq;
+		hlist_add_head(&i->node, h);
+	}
+	mutex_unlock(&hash_mutex);
+
 	spin_lock_irq(&i->lock);
 
 	if (i->head) {
@@ -1577,14 +1609,28 @@ static int serial_link_irq_chain(struct uart_8250_port *up)
 
 static void serial_unlink_irq_chain(struct uart_8250_port *up)
 {
-	struct irq_info *i = irq_lists + up->port.irq;
+	struct irq_info *i;
+	struct hlist_node *n;
+	struct hlist_head *h;
 
+	mutex_lock(&hash_mutex);
+
+	h = &irq_lists[up->port.irq % NR_IRQ_HASH];
+
+	hlist_for_each(n, h) {
+		i = hlist_entry(n, struct irq_info, node);
+		if (i->irq == up->port.irq)
+			break;
+	}
+
+	BUG_ON(n == NULL);
 	BUG_ON(i->head == NULL);
 
 	if (list_empty(i->head))
 		free_irq(up->port.irq, i);
 
 	serial_do_unlink(i, up);
+	mutex_unlock(&hash_mutex);
 }
 
 /* Base timer interval for polling */
@@ -2960,7 +3006,7 @@ EXPORT_SYMBOL(serial8250_unregister_port);
 
 static int __init serial8250_init(void)
 {
-	int ret, i;
+	int ret;
 
 	if (nr_uarts > UART_NR)
 		nr_uarts = UART_NR;
@@ -2969,9 +3015,6 @@ static int __init serial8250_init(void)
 		"%d ports, IRQ sharing %sabled\n", nr_uarts,
 		share_irqs ? "en" : "dis");
 
-	for (i = 0; i < NR_IRQS; i++)
-		spin_lock_init(&irq_lists[i].lock);
-
 #ifdef CONFIG_SPARC
 	ret = sunserial_register_minors(&serial8250_reg, UART_NR);
 #else
@@ -2999,15 +3042,15 @@ static int __init serial8250_init(void)
 		goto out;
 
 	platform_device_del(serial8250_isa_devs);
- put_dev:
+put_dev:
 	platform_device_put(serial8250_isa_devs);
- unreg_uart_drv:
+unreg_uart_drv:
 #ifdef CONFIG_SPARC
 	sunserial_unregister_minors(&serial8250_reg, UART_NR);
 #else
 	uart_unregister_driver(&serial8250_reg);
 #endif
- out:
+out:
 	return ret;
 }
 
-- 
GitLab


From fe648be019119722ec0ac54e3a4b2e5bf5168589 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:49:41 -0700
Subject: [PATCH 369/895] x86: add after_bootmem flag for 32bit

to prepare to use dyn_array support etc.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/init_32.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 8396868e82c5..91343c2694b4 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -66,6 +66,7 @@ static unsigned long __meminitdata table_end;
 static unsigned long __meminitdata table_top;
 
 static int __initdata after_init_bootmem;
+int after_bootmem;
 
 static __init void *alloc_low_page(unsigned long *phys)
 {
@@ -988,6 +989,8 @@ void __init mem_init(void)
 
 	set_highmem_pages_init();
 
+	after_bootmem = 1;
+
 	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
 	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
-- 
GitLab


From 38395738f581ce9417402f28774a8c4650264a00 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:49:42 -0700
Subject: [PATCH 370/895] x86: remove irq_vectors_limits

there's no user left.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/mach-generic/irq_vectors_limits.h | 14 --------------
 include/asm-x86/summit/irq_vectors_limits.h       | 14 --------------
 2 files changed, 28 deletions(-)
 delete mode 100644 include/asm-x86/mach-generic/irq_vectors_limits.h
 delete mode 100644 include/asm-x86/summit/irq_vectors_limits.h

diff --git a/include/asm-x86/mach-generic/irq_vectors_limits.h b/include/asm-x86/mach-generic/irq_vectors_limits.h
deleted file mode 100644
index f7870e1a220d..000000000000
--- a/include/asm-x86/mach-generic/irq_vectors_limits.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef ASM_X86__MACH_GENERIC__IRQ_VECTORS_LIMITS_H
-#define ASM_X86__MACH_GENERIC__IRQ_VECTORS_LIMITS_H
-
-/*
- * For Summit or generic (i.e. installer) kernels, we have lots of I/O APICs,
- * even with uni-proc kernels, so use a big array.
- *
- * This value should be the same in both the generic and summit subarches.
- * Change one, change 'em both.
- */
-#define NR_IRQS	224
-#define NR_IRQ_VECTORS	1024
-
-#endif /* ASM_X86__MACH_GENERIC__IRQ_VECTORS_LIMITS_H */
diff --git a/include/asm-x86/summit/irq_vectors_limits.h b/include/asm-x86/summit/irq_vectors_limits.h
deleted file mode 100644
index 890ce3f5e09a..000000000000
--- a/include/asm-x86/summit/irq_vectors_limits.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef _ASM_IRQ_VECTORS_LIMITS_H
-#define _ASM_IRQ_VECTORS_LIMITS_H
-
-/*
- * For Summit or generic (i.e. installer) kernels, we have lots of I/O APICs,
- * even with uni-proc kernels, so use a big array.
- *
- * This value should be the same in both the generic and summit subarches.
- * Change one, change 'em both.
- */
-#define NR_IRQS	224
-#define NR_IRQ_VECTORS	1024
-
-#endif /* _ASM_IRQ_VECTORS_LIMITS_H */
-- 
GitLab


From 3ddfda11861d305b02ed810b522dcf48b74ca808 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:49:43 -0700
Subject: [PATCH 371/895] generic: add dyn_array support

Allow crazy big arrays via bootmem at init stage.
Architectures use CONFIG_HAVE_DYN_ARRAY to enable it.

usage:

| static struct irq_desc irq_desc_init __initdata = {
|        .status = IRQ_DISABLED,
|        .chip = &no_irq_chip,
|        .handle_irq = handle_bad_irq,
|        .depth = 1,
|        .lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock),
| #ifdef CONFIG_SMP
|        .affinity = CPU_MASK_ALL
| #endif
| };
|
| static void __init init_work(void *data)
| {
|        struct dyn_array *da = data;
|        struct  irq_desc *desc;
|        int i;
|
|        desc = *da->name;
|
|        for (i = 0; i < *da->nr; i++)
|                memcpy(&desc[i], &irq_desc_init, sizeof(struct irq_desc));
| }
|
| struct irq_desc *irq_desc;
| DEFINE_DYN_ARRAY(irq_desc, sizeof(struct irq_desc), nr_irqs, PAGE_SIZE, init_work);

after pre_alloc_dyn_array() after setup_arch(), the array is ready to be
used.

Via this facility we can replace irq_desc[NR_IRQS] array with dyn_array
irq_desc[nr_irqs].

v2: remove _nopanic in pre_alloc_dyn_array()

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-generic/vmlinux.lds.h |  7 +++++++
 include/linux/init.h              | 23 +++++++++++++++++++++++
 init/main.c                       | 24 ++++++++++++++++++++++++
 3 files changed, 54 insertions(+)

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 7440a0dceddb..7881406c03ec 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -210,6 +210,13 @@
  * All archs are supposed to use RO_DATA() */
 #define RODATA RO_DATA(4096)
 
+#define DYN_ARRAY_INIT(align)							\
+	. = ALIGN((align));						\
+	.dyn_array.init : AT(ADDR(.dyn_array.init) - LOAD_OFFSET) {	\
+		VMLINUX_SYMBOL(__dyn_array_start) = .;			\
+		*(.dyn_array.init)					\
+		VMLINUX_SYMBOL(__dyn_array_end) = .;			\
+	}
 #define SECURITY_INIT							\
 	.security_initcall.init : AT(ADDR(.security_initcall.init) - LOAD_OFFSET) { \
 		VMLINUX_SYMBOL(__security_initcall_start) = .;		\
diff --git a/include/linux/init.h b/include/linux/init.h
index 93538b696e3d..cf9fa7f174af 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -246,6 +246,29 @@ struct obs_kernel_param {
 
 /* Relies on boot_command_line being set */
 void __init parse_early_param(void);
+
+struct dyn_array {
+	void **name;
+	unsigned long size;
+	unsigned int *nr;
+	unsigned long align;
+	void (*init_work)(void *);
+};
+extern struct dyn_array *__dyn_array_start[], *__dyn_array_end[];
+
+#define DEFINE_DYN_ARRAY(nameX, sizeX, nrX, alignX, init_workX) \
+		static struct dyn_array __dyn_array_##nameX __initdata = \
+		{	.name = (void **)&nameX,\
+			.size = sizeX,\
+			.nr   = &nrX,\
+			.align = alignX,\
+			.init_work = init_workX,\
+		}; \
+		static struct dyn_array *__dyn_array_ptr_##nameX __used \
+		__attribute__((__section__(".dyn_array.init"))) = \
+			&__dyn_array_##nameX
+
+extern void pre_alloc_dyn_array(void);
 #endif /* __ASSEMBLY__ */
 
 /**
diff --git a/init/main.c b/init/main.c
index 27f6bf6108e9..638d3a786412 100644
--- a/init/main.c
+++ b/init/main.c
@@ -536,6 +536,29 @@ void __init __weak thread_info_cache_init(void)
 {
 }
 
+void pre_alloc_dyn_array(void)
+{
+#ifdef CONFIG_HAVE_DYN_ARRAY
+	unsigned long size, phys = 0;
+	struct dyn_array **daa;
+
+	for (daa = __dyn_array_start ; daa < __dyn_array_end; daa++) {
+		struct dyn_array *da = *daa;
+
+		size = da->size * (*da->nr);
+		print_fn_descriptor_symbol("dyna_array %s ", da->name);
+		printk(KERN_CONT "size:%#lx nr:%d align:%#lx",
+			da->size, *da->nr, da->align);
+		*da->name = __alloc_bootmem(size, da->align, phys);
+		phys = virt_to_phys(*da->name);
+		printk(KERN_CONT " ==> [%#lx - %#lx]\n", phys, phys + size);
+
+		if (da->init_work)
+			da->init_work(da);
+	}
+#endif
+}
+
 asmlinkage void __init start_kernel(void)
 {
 	char * command_line;
@@ -567,6 +590,7 @@ asmlinkage void __init start_kernel(void)
 	printk(KERN_NOTICE);
 	printk(linux_banner);
 	setup_arch(&command_line);
+	pre_alloc_dyn_array();
 	mm_init_owner(&init_mm, &init_task);
 	setup_command_line(command_line);
 	unwind_setup();
-- 
GitLab


From 1f3fcd4b1adc972d5c6a34cfed98931c46575b49 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:49:44 -0700
Subject: [PATCH 372/895] add per_cpu_dyn_array support

allow dyn-array in per_cpu area, allocated dynamically.

usage:

|  /* in .h */
| struct kernel_stat {
|        struct cpu_usage_stat   cpustat;
|        unsigned int *irqs;
| };
|
|  /* in .c */
| DEFINE_PER_CPU(struct kernel_stat, kstat);
|
| DEFINE_PER_CPU_DYN_ARRAY_ADDR(per_cpu__kstat_irqs, per_cpu__kstat.irqs, sizeof(unsigned int), nr_irqs, sizeof(unsigned long), NULL);

after setup_percpu()/per_cpu_alloc_dyn_array(), the dyn_array in
per_cpu area is ready to use.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup_percpu.c    |  7 +++-
 include/asm-generic/vmlinux.lds.h |  6 +++
 include/linux/init.h              | 27 +++++++++++--
 init/main.c                       | 63 ++++++++++++++++++++++++++++++-
 4 files changed, 96 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 0e67f72d9316..13ba7a83808d 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -140,7 +140,7 @@ static void __init setup_cpu_pda_map(void)
  */
 void __init setup_per_cpu_areas(void)
 {
-	ssize_t size = PERCPU_ENOUGH_ROOM;
+	ssize_t size, old_size;
 	char *ptr;
 	int cpu;
 
@@ -148,7 +148,8 @@ void __init setup_per_cpu_areas(void)
 	setup_cpu_pda_map();
 
 	/* Copy section for each CPU (we discard the original) */
-	size = PERCPU_ENOUGH_ROOM;
+	old_size = PERCPU_ENOUGH_ROOM;
+	size = old_size + per_cpu_dyn_array_size();
 	printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
 			  size);
 
@@ -176,6 +177,8 @@ void __init setup_per_cpu_areas(void)
 		per_cpu_offset(cpu) = ptr - __per_cpu_start;
 		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
 
+		per_cpu_alloc_dyn_array(cpu, ptr + old_size);
+
 	}
 
 	printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n",
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 7881406c03ec..c68eda9d9a90 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -216,6 +216,12 @@
 		VMLINUX_SYMBOL(__dyn_array_start) = .;			\
 		*(.dyn_array.init)					\
 		VMLINUX_SYMBOL(__dyn_array_end) = .;			\
+	}								\
+	. = ALIGN((align));						\
+	.per_cpu_dyn_array.init : AT(ADDR(.per_cpu_dyn_array.init) - LOAD_OFFSET) {	\
+		VMLINUX_SYMBOL(__per_cpu_dyn_array_start) = .;		\
+		*(.per_cpu_dyn_array.init)				\
+		VMLINUX_SYMBOL(__per_cpu_dyn_array_end) = .;		\
 	}
 #define SECURITY_INIT							\
 	.security_initcall.init : AT(ADDR(.security_initcall.init) - LOAD_OFFSET) { \
diff --git a/include/linux/init.h b/include/linux/init.h
index cf9fa7f174af..332806826b8e 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -255,12 +255,13 @@ struct dyn_array {
 	void (*init_work)(void *);
 };
 extern struct dyn_array *__dyn_array_start[], *__dyn_array_end[];
+extern struct dyn_array *__per_cpu_dyn_array_start[], *__per_cpu_dyn_array_end[];
 
-#define DEFINE_DYN_ARRAY(nameX, sizeX, nrX, alignX, init_workX) \
+#define DEFINE_DYN_ARRAY_ADDR(nameX, addrX, sizeX, nrX, alignX, init_workX) \
 		static struct dyn_array __dyn_array_##nameX __initdata = \
-		{	.name = (void **)&nameX,\
+		{	.name = (void **)&(nameX),\
 			.size = sizeX,\
-			.nr   = &nrX,\
+			.nr   = &(nrX),\
 			.align = alignX,\
 			.init_work = init_workX,\
 		}; \
@@ -268,7 +269,27 @@ extern struct dyn_array *__dyn_array_start[], *__dyn_array_end[];
 		__attribute__((__section__(".dyn_array.init"))) = \
 			&__dyn_array_##nameX
 
+#define DEFINE_DYN_ARRAY(nameX, sizeX, nrX, alignX, init_workX) \
+	DEFINE_DYN_ARRAY_ADDR(nameX, nameX, sizeX, nrX, alignX, init_workX)
+
+#define DEFINE_PER_CPU_DYN_ARRAY_ADDR(nameX, addrX, sizeX, nrX, alignX, init_workX) \
+		static struct dyn_array __per_cpu_dyn_array_##nameX __initdata = \
+		{	.name = (void **)&(addrX),\
+			.size = sizeX,\
+			.nr   = &(nrX),\
+			.align = alignX,\
+			.init_work = init_workX,\
+		}; \
+		static struct dyn_array *__per_cpu_dyn_array_ptr_##nameX __used \
+		__attribute__((__section__(".per_cpu_dyn_array.init"))) = \
+			&__per_cpu_dyn_array_##nameX
+
+#define DEFINE_PER_CPU_DYN_ARRAY(nameX, sizeX, nrX, alignX, init_workX) \
+	DEFINE_PER_CPU_DYN_ARRAY_ADDR(nameX, nameX, nrX, alignX, init_workX)
+
 extern void pre_alloc_dyn_array(void);
+extern unsigned long per_cpu_dyn_array_size(void);
+extern void per_cpu_alloc_dyn_array(int cpu, char *ptr);
 #endif /* __ASSEMBLY__ */
 
 /**
diff --git a/init/main.c b/init/main.c
index 638d3a786412..416bca4f734f 100644
--- a/init/main.c
+++ b/init/main.c
@@ -391,17 +391,19 @@ EXPORT_SYMBOL(__per_cpu_offset);
 
 static void __init setup_per_cpu_areas(void)
 {
-	unsigned long size, i;
+	unsigned long size, i, old_size;
 	char *ptr;
 	unsigned long nr_possible_cpus = num_possible_cpus();
 
 	/* Copy section for each CPU (we discard the original) */
-	size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
+	old_size = PERCPU_ENOUGH_ROOM;
+	size = ALIGN(old_size + per_cpu_dyn_array_size(), PAGE_SIZE);
 	ptr = alloc_bootmem_pages(size * nr_possible_cpus);
 
 	for_each_possible_cpu(i) {
 		__per_cpu_offset[i] = ptr - __per_cpu_start;
 		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
+		per_cpu_alloc_dyn_array(i, ptr + old_size);
 		ptr += size;
 	}
 }
@@ -559,6 +561,63 @@ void pre_alloc_dyn_array(void)
 #endif
 }
 
+unsigned long per_cpu_dyn_array_size(void)
+{
+	unsigned long total_size = 0;
+#ifdef CONFIG_HAVE_DYN_ARRAY
+	unsigned long size;
+	struct dyn_array **daa;
+
+	for (daa = __per_cpu_dyn_array_start ; daa < __per_cpu_dyn_array_end; daa++) {
+		struct dyn_array *da = *daa;
+
+		size = da->size * (*da->nr);
+		print_fn_descriptor_symbol("per_cpu_dyna_array %s ", da->name);
+		printk(KERN_CONT "size:%#lx nr:%d align:%#lx\n",
+			da->size, *da->nr, da->align);
+		total_size += roundup(size, da->align);
+	}
+	if (total_size)
+		printk(KERN_DEBUG "per_cpu_dyna_array total_size: %#lx\n",
+			 total_size);
+#endif
+	return total_size;
+}
+
+void per_cpu_alloc_dyn_array(int cpu, char *ptr)
+{
+#ifdef CONFIG_HAVE_DYN_ARRAY
+	unsigned long size, phys;
+	struct dyn_array **daa;
+	unsigned long addr;
+	void **array;
+
+	phys = virt_to_phys(ptr);
+
+	for (daa = __per_cpu_dyn_array_start ; daa < __per_cpu_dyn_array_end; daa++) {
+		struct dyn_array *da = *daa;
+
+		size = da->size * (*da->nr);
+		print_fn_descriptor_symbol("per_cpu_dyna_array %s ", da->name);
+		printk(KERN_CONT "size:%#lx nr:%d align:%#lx",
+			da->size, *da->nr, da->align);
+
+		phys = roundup(phys, da->align);
+		addr = (unsigned long)da->name;
+		addr += per_cpu_offset(cpu);
+		array = (void **)addr;
+		*array = phys_to_virt(phys);
+		*da->name = *array; /* so init_work could use it directly */
+		printk(KERN_CONT " %p ==> [%#lx - %#lx]\n", array, phys, phys + size);
+		phys += size;
+
+		if (da->init_work) {
+			da->init_work(da);
+		}
+	}
+#endif
+}
+
 asmlinkage void __init start_kernel(void)
 {
 	char * command_line;
-- 
GitLab


From 1f8ff037a871690c762d267d8a052529d3102fc9 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:49:45 -0700
Subject: [PATCH 373/895] x86: alloc dyn_array all together

so could spare some memory with small alignment in bootmem

also tighten the alignment checking, and make print out less debug info.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup_percpu.c | 16 ++++++---
 include/linux/init.h           |  2 +-
 init/main.c                    | 65 ++++++++++++++++++++++++++--------
 3 files changed, 62 insertions(+), 21 deletions(-)

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 13ba7a83808d..2b7dab699e83 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -140,26 +140,31 @@ static void __init setup_cpu_pda_map(void)
  */
 void __init setup_per_cpu_areas(void)
 {
-	ssize_t size, old_size;
+	ssize_t size, old_size, da_size;
 	char *ptr;
 	int cpu;
+	unsigned long align = 1;
 
 	/* Setup cpu_pda map */
 	setup_cpu_pda_map();
 
 	/* Copy section for each CPU (we discard the original) */
 	old_size = PERCPU_ENOUGH_ROOM;
-	size = old_size + per_cpu_dyn_array_size();
+	da_size = per_cpu_dyn_array_size(&align);
+	align = max_t(unsigned long, PAGE_SIZE, align);
+	size = roundup(old_size + da_size, align);
 	printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
 			  size);
 
 	for_each_possible_cpu(cpu) {
 #ifndef CONFIG_NEED_MULTIPLE_NODES
-		ptr = alloc_bootmem_pages(size);
+		ptr = __alloc_bootmem(size, align,
+				 __pa(MAX_DMA_ADDRESS));
 #else
 		int node = early_cpu_to_node(cpu);
 		if (!node_online(node) || !NODE_DATA(node)) {
-			ptr = alloc_bootmem_pages(size);
+			ptr = __alloc_bootmem(size, align,
+					 __pa(MAX_DMA_ADDRESS));
 			printk(KERN_INFO
 			       "cpu %d has no node %d or node-local memory\n",
 				cpu, node);
@@ -168,7 +173,8 @@ void __init setup_per_cpu_areas(void)
 					 cpu, __pa(ptr));
 		}
 		else {
-			ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
+			ptr = __alloc_bootmem_node(NODE_DATA(node), size, align,
+							__pa(MAX_DMA_ADDRESS));
 			if (ptr)
 				printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n",
 					 cpu, node, __pa(ptr));
diff --git a/include/linux/init.h b/include/linux/init.h
index 332806826b8e..59fbb4aaba6a 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -288,7 +288,7 @@ extern struct dyn_array *__per_cpu_dyn_array_start[], *__per_cpu_dyn_array_end[]
 	DEFINE_PER_CPU_DYN_ARRAY_ADDR(nameX, nameX, nrX, alignX, init_workX)
 
 extern void pre_alloc_dyn_array(void);
-extern unsigned long per_cpu_dyn_array_size(void);
+extern unsigned long per_cpu_dyn_array_size(unsigned long *align);
 extern void per_cpu_alloc_dyn_array(int cpu, char *ptr);
 #endif /* __ASSEMBLY__ */
 
diff --git a/init/main.c b/init/main.c
index 416bca4f734f..ab97d0877acc 100644
--- a/init/main.c
+++ b/init/main.c
@@ -394,10 +394,14 @@ static void __init setup_per_cpu_areas(void)
 	unsigned long size, i, old_size;
 	char *ptr;
 	unsigned long nr_possible_cpus = num_possible_cpus();
+	unsigned long align = 1;
+	unsigned da_size;
 
 	/* Copy section for each CPU (we discard the original) */
 	old_size = PERCPU_ENOUGH_ROOM;
-	size = ALIGN(old_size + per_cpu_dyn_array_size(), PAGE_SIZE);
+	da_size = per_cpu_dyn_array_size(&align);
+	align = max_t(unsigned long, PAGE_SIZE, align);
+	size = ALIGN(old_size + da_size, align);
 	ptr = alloc_bootmem_pages(size * nr_possible_cpus);
 
 	for_each_possible_cpu(i) {
@@ -541,45 +545,78 @@ void __init __weak thread_info_cache_init(void)
 void pre_alloc_dyn_array(void)
 {
 #ifdef CONFIG_HAVE_DYN_ARRAY
-	unsigned long size, phys = 0;
+	unsigned long total_size = 0, size, phys;
+	unsigned long max_align = 1;
 	struct dyn_array **daa;
+	char *ptr;
 
+	/* get the total size at first */
 	for (daa = __dyn_array_start ; daa < __dyn_array_end; daa++) {
 		struct dyn_array *da = *daa;
 
 		size = da->size * (*da->nr);
-		print_fn_descriptor_symbol("dyna_array %s ", da->name);
-		printk(KERN_CONT "size:%#lx nr:%d align:%#lx",
+		print_fn_descriptor_symbol("dyn_array %s ", da->name);
+		printk(KERN_CONT "size:%#lx nr:%d align:%#lx\n",
 			da->size, *da->nr, da->align);
-		*da->name = __alloc_bootmem(size, da->align, phys);
-		phys = virt_to_phys(*da->name);
+		total_size += roundup(size, da->align);
+		if (da->align > max_align)
+			max_align = da->align;
+	}
+	if (total_size)
+		printk(KERN_DEBUG "dyn_array total_size: %#lx\n",
+			 total_size);
+	else
+		return;
+
+	/* allocate them all together */
+	max_align = max_t(unsigned long, max_align, PAGE_SIZE);
+	ptr = __alloc_bootmem_nopanic(total_size, max_align, 0);
+	if (!ptr)
+		panic("Can not alloc dyn_alloc\n");
+
+	phys = virt_to_phys(ptr);
+	for (daa = __dyn_array_start ; daa < __dyn_array_end; daa++) {
+		struct dyn_array *da = *daa;
+
+		size = da->size * (*da->nr);
+		print_fn_descriptor_symbol("dyn_array %s ", da->name);
+
+		phys = roundup(phys, da->align);
+		*da->name = phys_to_virt(phys);
 		printk(KERN_CONT " ==> [%#lx - %#lx]\n", phys, phys + size);
 
+		phys += size;
+
 		if (da->init_work)
 			da->init_work(da);
 	}
 #endif
 }
 
-unsigned long per_cpu_dyn_array_size(void)
+unsigned long per_cpu_dyn_array_size(unsigned long *align)
 {
 	unsigned long total_size = 0;
 #ifdef CONFIG_HAVE_DYN_ARRAY
 	unsigned long size;
 	struct dyn_array **daa;
+	unsigned max_align = 1;
 
 	for (daa = __per_cpu_dyn_array_start ; daa < __per_cpu_dyn_array_end; daa++) {
 		struct dyn_array *da = *daa;
 
 		size = da->size * (*da->nr);
-		print_fn_descriptor_symbol("per_cpu_dyna_array %s ", da->name);
+		print_fn_descriptor_symbol("per_cpu_dyn_array %s ", da->name);
 		printk(KERN_CONT "size:%#lx nr:%d align:%#lx\n",
 			da->size, *da->nr, da->align);
 		total_size += roundup(size, da->align);
+		if (da->align > max_align)
+			max_align = da->align;
 	}
-	if (total_size)
-		printk(KERN_DEBUG "per_cpu_dyna_array total_size: %#lx\n",
+	if (total_size) {
+		printk(KERN_DEBUG "per_cpu_dyn_array total_size: %#lx\n",
 			 total_size);
+		*align = max_align;
+	}
 #endif
 	return total_size;
 }
@@ -593,14 +630,11 @@ void per_cpu_alloc_dyn_array(int cpu, char *ptr)
 	void **array;
 
 	phys = virt_to_phys(ptr);
-
 	for (daa = __per_cpu_dyn_array_start ; daa < __per_cpu_dyn_array_end; daa++) {
 		struct dyn_array *da = *daa;
 
 		size = da->size * (*da->nr);
-		print_fn_descriptor_symbol("per_cpu_dyna_array %s ", da->name);
-		printk(KERN_CONT "size:%#lx nr:%d align:%#lx",
-			da->size, *da->nr, da->align);
+		print_fn_descriptor_symbol("per_cpu_dyn_array %s ", da->name);
 
 		phys = roundup(phys, da->align);
 		addr = (unsigned long)da->name;
@@ -608,7 +642,8 @@ void per_cpu_alloc_dyn_array(int cpu, char *ptr)
 		array = (void **)addr;
 		*array = phys_to_virt(phys);
 		*da->name = *array; /* so init_work could use it directly */
-		printk(KERN_CONT " %p ==> [%#lx - %#lx]\n", array, phys, phys + size);
+		printk(KERN_CONT " ==> [%#lx - %#lx]\n", phys, phys + size);
+
 		phys += size;
 
 		if (da->init_work) {
-- 
GitLab


From 6da55c3e8da88e8a7cb6452160776ad6706798ad Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:49:46 -0700
Subject: [PATCH 374/895] x86: enable dyn_array support

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/Kconfig                     | 2 ++
 arch/x86/Kconfig                 | 1 +
 arch/x86/kernel/vmlinux_32.lds.S | 1 +
 arch/x86/kernel/vmlinux_64.lds.S | 3 +++
 4 files changed, 7 insertions(+)

diff --git a/arch/Kconfig b/arch/Kconfig
index 0267babe5eb9..c1f9febb404f 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -103,3 +103,5 @@ config HAVE_CLK
 	  The <linux/clk.h> calls support software clock gating and
 	  thus are a key power management tool on many systems.
 
+config HAVE_DYN_ARRAY
+	def_bool n
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f65c2744d573..42f98009d752 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -33,6 +33,7 @@ config X86
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_GENERIC_DMA_COHERENT if X86_32
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
+	select HAVE_DYN_ARRAY
 
 config ARCH_DEFCONFIG
 	string
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index a9b8560adbc2..c36007ab3940 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -145,6 +145,7 @@ SECTIONS
 	*(.x86_cpu_dev.init)
 	__x86_cpu_dev_end = .;
   }
+  DYN_ARRAY_INIT(8)
   SECURITY_INIT
   . = ALIGN(4);
   .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 46e05447405b..30973dbac8c2 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -173,6 +173,9 @@ SECTIONS
 	*(.x86_cpu_dev.init)
   }
   __x86_cpu_dev_end = .;
+
+  DYN_ARRAY_INIT(8)
+
   SECURITY_INIT
 
   . = ALIGN(8);
-- 
GitLab


From 85c0f90978bf50596dbd23854648020f1f9b5bfd Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:49:47 -0700
Subject: [PATCH 375/895] irq: introduce nr_irqs

at this point nr_irqs is equal NR_IRQS

convert a few easy users from NR_IRQS to dynamic nr_irqs.

v2: according to Eric, we need to take care of arch without generic_hardirqs

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/m68k/kernel/ints.c   |  2 ++
 arch/s390/kernel/irq.c    |  2 ++
 arch/sparc/kernel/irq.c   |  3 +++
 include/linux/interrupt.h |  2 ++
 kernel/irq/autoprobe.c    | 10 +++++-----
 kernel/irq/chip.c         | 20 ++++++++++----------
 kernel/irq/handle.c       |  3 ++-
 kernel/irq/manage.c       | 16 ++++++++--------
 kernel/irq/proc.c         |  2 +-
 kernel/irq/resend.c       |  4 ++--
 kernel/irq/spurious.c     |  4 ++--
 11 files changed, 39 insertions(+), 29 deletions(-)

diff --git a/arch/m68k/kernel/ints.c b/arch/m68k/kernel/ints.c
index 7e8a0d394e61..74453d15692e 100644
--- a/arch/m68k/kernel/ints.c
+++ b/arch/m68k/kernel/ints.c
@@ -46,6 +46,8 @@
 #include <asm/q40ints.h>
 #endif
 
+int nr_irqs = NR_IRQS;
+
 extern u32 auto_irqhandler_fixup[];
 extern u32 user_irqhandler_fixup[];
 extern u16 user_irqvec_fixup[];
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index e7c5bfb7c755..14eb5496c8a8 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -17,6 +17,8 @@
 #include <linux/proc_fs.h>
 #include <linux/profile.h>
 
+int nr_irqs = NR_IRQS;
+
 /*
  * show_interrupts is needed by /proc/interrupts.
  */
diff --git a/arch/sparc/kernel/irq.c b/arch/sparc/kernel/irq.c
index 93e1d1c65290..059598b7e0f0 100644
--- a/arch/sparc/kernel/irq.c
+++ b/arch/sparc/kernel/irq.c
@@ -55,6 +55,9 @@
 #define SMP_NOP2
 #define SMP_NOP3
 #endif /* SMP */
+
+int nr_irqs = NR_IRQS;
+
 unsigned long __raw_local_irq_save(void)
 {
 	unsigned long retval;
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 58ff4e74b2f3..511803853a5b 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -15,6 +15,8 @@
 #include <asm/ptrace.h>
 #include <asm/system.h>
 
+extern int nr_irqs;
+
 /*
  * These correspond to the IORESOURCE_IRQ_* defines in
  * linux/ioport.h to select the interrupt line behaviour.  When
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 533068cfb607..c689e9851a80 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -38,7 +38,7 @@ unsigned long probe_irq_on(void)
 	 * something may have generated an irq long ago and we want to
 	 * flush such a longstanding irq before considering it as spurious.
 	 */
-	for (i = NR_IRQS-1; i > 0; i--) {
+	for (i = nr_irqs-1; i > 0; i--) {
 		desc = irq_desc + i;
 
 		spin_lock_irq(&desc->lock);
@@ -68,7 +68,7 @@ unsigned long probe_irq_on(void)
 	 * (we must startup again here because if a longstanding irq
 	 * happened in the previous stage, it may have masked itself)
 	 */
-	for (i = NR_IRQS-1; i > 0; i--) {
+	for (i = nr_irqs-1; i > 0; i--) {
 		desc = irq_desc + i;
 
 		spin_lock_irq(&desc->lock);
@@ -89,7 +89,7 @@ unsigned long probe_irq_on(void)
 	 * Now filter out any obviously spurious interrupts
 	 */
 	mask = 0;
-	for (i = 0; i < NR_IRQS; i++) {
+	for (i = 0; i < nr_irqs; i++) {
 		unsigned int status;
 
 		desc = irq_desc + i;
@@ -130,7 +130,7 @@ unsigned int probe_irq_mask(unsigned long val)
 	int i;
 
 	mask = 0;
-	for (i = 0; i < NR_IRQS; i++) {
+	for (i = 0; i < nr_irqs; i++) {
 		struct irq_desc *desc = irq_desc + i;
 		unsigned int status;
 
@@ -173,7 +173,7 @@ int probe_irq_off(unsigned long val)
 {
 	int i, irq_found = 0, nr_irqs = 0;
 
-	for (i = 0; i < NR_IRQS; i++) {
+	for (i = 0; i < nr_irqs; i++) {
 		struct irq_desc *desc = irq_desc + i;
 		unsigned int status;
 
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 5203a599d211..bba66e098703 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -27,7 +27,7 @@ void dynamic_irq_init(unsigned int irq)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= NR_IRQS) {
+	if (irq >= nr_irqs) {
 		WARN(1, KERN_ERR "Trying to initialize invalid IRQ%d\n", irq);
 		return;
 	}
@@ -60,7 +60,7 @@ void dynamic_irq_cleanup(unsigned int irq)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= NR_IRQS) {
+	if (irq >= nr_irqs) {
 		WARN(1, KERN_ERR "Trying to cleanup invalid IRQ%d\n", irq);
 		return;
 	}
@@ -92,7 +92,7 @@ int set_irq_chip(unsigned int irq, struct irq_chip *chip)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= NR_IRQS) {
+	if (irq >= nr_irqs) {
 		WARN(1, KERN_ERR "Trying to install chip for IRQ%d\n", irq);
 		return -EINVAL;
 	}
@@ -121,7 +121,7 @@ int set_irq_type(unsigned int irq, unsigned int type)
 	unsigned long flags;
 	int ret = -ENXIO;
 
-	if (irq >= NR_IRQS) {
+	if (irq >= nr_irqs) {
 		printk(KERN_ERR "Trying to set irq type for IRQ%d\n", irq);
 		return -ENODEV;
 	}
@@ -149,7 +149,7 @@ int set_irq_data(unsigned int irq, void *data)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= NR_IRQS) {
+	if (irq >= nr_irqs) {
 		printk(KERN_ERR
 		       "Trying to install controller data for IRQ%d\n", irq);
 		return -EINVAL;
@@ -175,7 +175,7 @@ int set_irq_msi(unsigned int irq, struct msi_desc *entry)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= NR_IRQS) {
+	if (irq >= nr_irqs) {
 		printk(KERN_ERR
 		       "Trying to install msi data for IRQ%d\n", irq);
 		return -EINVAL;
@@ -201,7 +201,7 @@ int set_irq_chip_data(unsigned int irq, void *data)
 	struct irq_desc *desc = irq_desc + irq;
 	unsigned long flags;
 
-	if (irq >= NR_IRQS || !desc->chip) {
+	if (irq >= nr_irqs || !desc->chip) {
 		printk(KERN_ERR "BUG: bad set_irq_chip_data(IRQ#%d)\n", irq);
 		return -EINVAL;
 	}
@@ -545,7 +545,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= NR_IRQS) {
+	if (irq >= nr_irqs) {
 		printk(KERN_ERR
 		       "Trying to install type control for IRQ%d\n", irq);
 		return;
@@ -610,7 +610,7 @@ void __init set_irq_noprobe(unsigned int irq)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= NR_IRQS) {
+	if (irq >= nr_irqs) {
 		printk(KERN_ERR "Trying to mark IRQ%d non-probeable\n", irq);
 
 		return;
@@ -628,7 +628,7 @@ void __init set_irq_probe(unsigned int irq)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= NR_IRQS) {
+	if (irq >= nr_irqs) {
 		printk(KERN_ERR "Trying to mark IRQ%d probeable\n", irq);
 
 		return;
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index f4c8a03a9fbb..e9d022cf593e 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -47,6 +47,7 @@ handle_bad_irq(unsigned int irq, struct irq_desc *desc)
  *
  * Controller mappings for all interrupt sources:
  */
+int nr_irqs = NR_IRQS;
 struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
 	[0 ... NR_IRQS-1] = {
 		.status = IRQ_DISABLED,
@@ -265,7 +266,7 @@ void early_init_irq_lock_class(void)
 {
 	int i;
 
-	for (i = 0; i < NR_IRQS; i++)
+	for (i = 0; i < nr_irqs; i++)
 		lockdep_set_class(&irq_desc[i].lock, &irq_desc_lock_class);
 }
 
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index d363f32dba7d..d5a4333d8f1f 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -34,7 +34,7 @@ void synchronize_irq(unsigned int irq)
 	struct irq_desc *desc = irq_desc + irq;
 	unsigned int status;
 
-	if (irq >= NR_IRQS)
+	if (irq >= nr_irqs)
 		return;
 
 	do {
@@ -143,7 +143,7 @@ void disable_irq_nosync(unsigned int irq)
 	struct irq_desc *desc = irq_desc + irq;
 	unsigned long flags;
 
-	if (irq >= NR_IRQS)
+	if (irq >= nr_irqs)
 		return;
 
 	spin_lock_irqsave(&desc->lock, flags);
@@ -171,7 +171,7 @@ void disable_irq(unsigned int irq)
 {
 	struct irq_desc *desc = irq_desc + irq;
 
-	if (irq >= NR_IRQS)
+	if (irq >= nr_irqs)
 		return;
 
 	disable_irq_nosync(irq);
@@ -214,7 +214,7 @@ void enable_irq(unsigned int irq)
 	struct irq_desc *desc = irq_desc + irq;
 	unsigned long flags;
 
-	if (irq >= NR_IRQS)
+	if (irq >= nr_irqs)
 		return;
 
 	spin_lock_irqsave(&desc->lock, flags);
@@ -290,7 +290,7 @@ int can_request_irq(unsigned int irq, unsigned long irqflags)
 {
 	struct irqaction *action;
 
-	if (irq >= NR_IRQS || irq_desc[irq].status & IRQ_NOREQUEST)
+	if (irq >= nr_irqs || irq_desc[irq].status & IRQ_NOREQUEST)
 		return 0;
 
 	action = irq_desc[irq].action;
@@ -356,7 +356,7 @@ int setup_irq(unsigned int irq, struct irqaction *new)
 	int shared = 0;
 	int ret;
 
-	if (irq >= NR_IRQS)
+	if (irq >= nr_irqs)
 		return -EINVAL;
 
 	if (desc->chip == &no_irq_chip)
@@ -515,7 +515,7 @@ void free_irq(unsigned int irq, void *dev_id)
 	unsigned long flags;
 
 	WARN_ON(in_interrupt());
-	if (irq >= NR_IRQS)
+	if (irq >= nr_irqs)
 		return;
 
 	desc = irq_desc + irq;
@@ -630,7 +630,7 @@ int request_irq(unsigned int irq, irq_handler_t handler,
 	 */
 	if ((irqflags & IRQF_SHARED) && !dev_id)
 		return -EINVAL;
-	if (irq >= NR_IRQS)
+	if (irq >= nr_irqs)
 		return -EINVAL;
 	if (irq_desc[irq].status & IRQ_NOREQUEST)
 		return -EINVAL;
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index a09dd29c2fd7..e5225a65a4f9 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -240,7 +240,7 @@ void init_irq_proc(void)
 	/*
 	 * Create entries for all existing IRQs.
 	 */
-	for (i = 0; i < NR_IRQS; i++)
+	for (i = 0; i < nr_irqs; i++)
 		register_irq_proc(i);
 }
 
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index a8046791ba2d..cba8aa5bc7f4 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -33,8 +33,8 @@ static void resend_irqs(unsigned long arg)
 	struct irq_desc *desc;
 	int irq;
 
-	while (!bitmap_empty(irqs_resend, NR_IRQS)) {
-		irq = find_first_bit(irqs_resend, NR_IRQS);
+	while (!bitmap_empty(irqs_resend, nr_irqs)) {
+		irq = find_first_bit(irqs_resend, nr_irqs);
 		clear_bit(irq, irqs_resend);
 		desc = irq_desc + irq;
 		local_irq_disable();
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 19fe9d6ebfe8..e26ca1e90c08 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -91,7 +91,7 @@ static int misrouted_irq(int irq)
 	int i;
 	int ok = 0;
 
-	for (i = 1; i < NR_IRQS; i++) {
+	for (i = 1; i < nr_irqs; i++) {
 		struct irq_desc *desc = irq_desc + i;
 
 		if (i == irq)	/* Already tried */
@@ -107,7 +107,7 @@ static int misrouted_irq(int irq)
 static void poll_spurious_irqs(unsigned long dummy)
 {
 	int i;
-	for (i = 1; i < NR_IRQS; i++) {
+	for (i = 1; i < nr_irqs; i++) {
 		struct irq_desc *desc = irq_desc + i;
 		unsigned int status;
 
-- 
GitLab


From 0799e432acfda879eaeef9622426bfa1434f3786 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:49:48 -0700
Subject: [PATCH 376/895] x86: use nr_irqs

also add first_free_entry and pin_map_size, which were NR_IRQS derived
constants.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_32.c | 26 ++++++++++++++------------
 arch/x86/kernel/io_apic_64.c | 33 +++++++++++++++++----------------
 arch/x86/kernel/irq_32.c     |  8 ++++----
 arch/x86/kernel/irq_64.c     |  8 ++++----
 arch/x86/kernel/irqinit_32.c |  2 +-
 arch/x86/kernel/irqinit_64.c |  2 +-
 include/asm-x86/irq.h        |  3 +++
 7 files changed, 44 insertions(+), 38 deletions(-)

diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index e710289f673e..d382990244f0 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -70,6 +70,7 @@ int timer_through_8259 __initdata;
  */
 int sis_apic_bug = -1;
 
+int first_free_entry = NR_IRQS;
 /*
  * # of IRQ routing registers
  */
@@ -100,6 +101,8 @@ static int disable_timer_pin_1 __initdata;
 #define MAX_PLUS_SHARED_IRQS NR_IRQS
 #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
 
+int pin_map_size = PIN_MAP_SIZE;
+
 /*
  * This is performance-critical, we want to do it O(1)
  *
@@ -213,7 +216,6 @@ static void ioapic_mask_entry(int apic, int pin)
  */
 static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 {
-	static int first_free_entry = NR_IRQS;
 	struct irq_pin_list *entry = irq_2_pin + irq;
 
 	while (entry->next)
@@ -222,7 +224,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 	if (entry->pin != -1) {
 		entry->next = first_free_entry;
 		entry = irq_2_pin + entry->next;
-		if (++first_free_entry >= PIN_MAP_SIZE)
+		if (++first_free_entry >= pin_map_size)
 			panic("io_apic.c: whoops");
 	}
 	entry->apic = apic;
@@ -457,7 +459,7 @@ static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
 	int i, j;
 
 	for_each_online_cpu(i) {
-		for (j = 0; j < NR_IRQS; j++) {
+		for (j = 0; j < nr_irqs; j++) {
 			if (!irq_desc[j].action)
 				continue;
 			/* Is it a significant load ?  */
@@ -492,7 +494,7 @@ static void do_irq_balance(void)
 		if (!cpu_online(i))
 			continue;
 		package_index = CPU_TO_PACKAGEINDEX(i);
-		for (j = 0; j < NR_IRQS; j++) {
+		for (j = 0; j < nr_irqs; j++) {
 			unsigned long value_now, delta;
 			/* Is this an active IRQ or balancing disabled ? */
 			if (!irq_desc[j].action || irq_balancing_disabled(j))
@@ -587,7 +589,7 @@ tryanotherirq:
 	 */
 	move_this_load = 0;
 	selected_irq = -1;
-	for (j = 0; j < NR_IRQS; j++) {
+	for (j = 0; j < nr_irqs; j++) {
 		/* Is this an active IRQ? */
 		if (!irq_desc[j].action)
 			continue;
@@ -664,7 +666,7 @@ static int balanced_irq(void *unused)
 	long time_remaining = balanced_irq_interval;
 
 	/* push everything to CPU 0 to give us a starting point.  */
-	for (i = 0 ; i < NR_IRQS ; i++) {
+	for (i = 0 ; i < nr_irqs ; i++) {
 		irq_desc[i].pending_mask = cpumask_of_cpu(0);
 		set_pending_irq(i, cpumask_of_cpu(0));
 	}
@@ -712,8 +714,8 @@ static int __init balanced_irq_init(void)
 		physical_balance = 1;
 
 	for_each_online_cpu(i) {
-		irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
-		irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
+		irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * nr_irqs, GFP_KERNEL);
+		irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * nr_irqs, GFP_KERNEL);
 		if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
 			printk(KERN_ERR "balanced_irq_init: out of memory");
 			goto failed;
@@ -1441,7 +1443,7 @@ __apicdebuginit(void) print_IO_APIC(void)
 	}
 	}
 	printk(KERN_DEBUG "IRQ to pin mappings:\n");
-	for (i = 0; i < NR_IRQS; i++) {
+	for (i = 0; i < nr_irqs; i++) {
 		struct irq_pin_list *entry = irq_2_pin + i;
 		if (entry->pin < 0)
 			continue;
@@ -1621,7 +1623,7 @@ static void __init enable_IO_APIC(void)
 	int i, apic;
 	unsigned long flags;
 
-	for (i = 0; i < PIN_MAP_SIZE; i++) {
+	for (i = 0; i < pin_map_size; i++) {
 		irq_2_pin[i].pin = -1;
 		irq_2_pin[i].next = 0;
 	}
@@ -2005,7 +2007,7 @@ static inline void init_IO_APIC_traps(void)
 	 * Also, we've got to be careful not to trash gate
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
-	for (irq = 0; irq < NR_IRQS ; irq++) {
+	for (irq = 0; irq < nr_irqs ; irq++) {
 		if (IO_APIC_IRQ(irq) && !irq_vector[irq]) {
 			/*
 			 * Hmm.. We don't have an entry for this,
@@ -2449,7 +2451,7 @@ int create_irq(void)
 
 	irq = -ENOSPC;
 	spin_lock_irqsave(&vector_lock, flags);
-	for (new = (NR_IRQS - 1); new >= 0; new--) {
+	for (new = (nr_irqs - 1); new >= 0; new--) {
 		if (platform_legacy_irq(new))
 			continue;
 		if (irq_vector[new] != 0)
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 02063ae042f7..448384c7c1e8 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -132,6 +132,7 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
 #define MAX_PLUS_SHARED_IRQS NR_IRQS
 #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
 
+int pin_map_size = PIN_MAP_SIZE;
 /*
  * This is performance-critical, we want to do it O(1)
  *
@@ -224,7 +225,7 @@ static inline void io_apic_sync(unsigned int apic)
 	int pin;							\
 	struct irq_pin_list *entry = irq_2_pin + irq;			\
 									\
-	BUG_ON(irq >= NR_IRQS);						\
+	BUG_ON(irq >= nr_irqs);						\
 	for (;;) {							\
 		unsigned int reg;					\
 		pin = entry->pin;					\
@@ -301,7 +302,7 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
 	int apic, pin;
 	struct irq_pin_list *entry = irq_2_pin + irq;
 
-	BUG_ON(irq >= NR_IRQS);
+	BUG_ON(irq >= nr_irqs);
 	for (;;) {
 		unsigned int reg;
 		apic = entry->apic;
@@ -358,19 +359,19 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
  * shared ISA-space IRQs, so we have to support them. We are super
  * fast in the common case, and fast for shared ISA-space IRQs.
  */
+int first_free_entry = NR_IRQS;
 static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 {
-	static int first_free_entry = NR_IRQS;
 	struct irq_pin_list *entry = irq_2_pin + irq;
 
-	BUG_ON(irq >= NR_IRQS);
+	BUG_ON(irq >= nr_irqs);
 	while (entry->next)
 		entry = irq_2_pin + entry->next;
 
 	if (entry->pin != -1) {
 		entry->next = first_free_entry;
 		entry = irq_2_pin + entry->next;
-		if (++first_free_entry >= PIN_MAP_SIZE)
+		if (++first_free_entry >= pin_map_size)
 			panic("io_apic.c: ran out of irq_2_pin entries!");
 	}
 	entry->apic = apic;
@@ -634,7 +635,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 				best_guess = irq;
 		}
 	}
-	BUG_ON(best_guess >= NR_IRQS);
+	BUG_ON(best_guess >= nr_irqs);
 	return best_guess;
 }
 
@@ -766,7 +767,7 @@ static int pin_2_irq(int idx, int apic, int pin)
 			irq += nr_ioapic_registers[i++];
 		irq += pin;
 	}
-	BUG_ON(irq >= NR_IRQS);
+	BUG_ON(irq >= nr_irqs);
 	return irq;
 }
 
@@ -801,7 +802,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
 	int cpu;
 	struct irq_cfg *cfg;
 
-	BUG_ON((unsigned)irq >= NR_IRQS);
+	BUG_ON((unsigned)irq >= nr_irqs);
 	cfg = &irq_cfg[irq];
 
 	/* Only try and allocate irqs on cpus that are present */
@@ -875,7 +876,7 @@ static void __clear_irq_vector(int irq)
 	cpumask_t mask;
 	int cpu, vector;
 
-	BUG_ON((unsigned)irq >= NR_IRQS);
+	BUG_ON((unsigned)irq >= nr_irqs);
 	cfg = &irq_cfg[irq];
 	BUG_ON(!cfg->vector);
 
@@ -895,7 +896,7 @@ void __setup_vector_irq(int cpu)
 	int irq, vector;
 
 	/* Mark the inuse vectors */
-	for (irq = 0; irq < NR_IRQS; ++irq) {
+	for (irq = 0; irq < nr_irqs; ++irq) {
 		if (!cpu_isset(cpu, irq_cfg[irq].domain))
 			continue;
 		vector = irq_cfg[irq].vector;
@@ -1193,7 +1194,7 @@ __apicdebuginit(void) print_IO_APIC(void)
 	}
 	}
 	printk(KERN_DEBUG "IRQ to pin mappings:\n");
-	for (i = 0; i < NR_IRQS; i++) {
+	for (i = 0; i < nr_irqs; i++) {
 		struct irq_pin_list *entry = irq_2_pin + i;
 		if (entry->pin < 0)
 			continue;
@@ -1366,7 +1367,7 @@ void __init enable_IO_APIC(void)
 	int i, apic;
 	unsigned long flags;
 
-	for (i = 0; i < PIN_MAP_SIZE; i++) {
+	for (i = 0; i < pin_map_size; i++) {
 		irq_2_pin[i].pin = -1;
 		irq_2_pin[i].next = 0;
 	}
@@ -1658,7 +1659,7 @@ static void ir_irq_migration(struct work_struct *work)
 {
 	int irq;
 
-	for (irq = 0; irq < NR_IRQS; irq++) {
+	for (irq = 0; irq < nr_irqs; irq++) {
 		struct irq_desc *desc = irq_desc + irq;
 		if (desc->status & IRQ_MOVE_PENDING) {
 			unsigned long flags;
@@ -1707,7 +1708,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
 		struct irq_desc *desc;
 		struct irq_cfg *cfg;
 		irq = __get_cpu_var(vector_irq)[vector];
-		if (irq >= NR_IRQS)
+		if (irq >= nr_irqs)
 			continue;
 
 		desc = irq_desc + irq;
@@ -1865,7 +1866,7 @@ static inline void init_IO_APIC_traps(void)
 	 * Also, we've got to be careful not to trash gate
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
-	for (irq = 0; irq < NR_IRQS ; irq++) {
+	for (irq = 0; irq < nr_irqs ; irq++) {
 		if (IO_APIC_IRQ(irq) && !irq_cfg[irq].vector) {
 			/*
 			 * Hmm.. We don't have an entry for this,
@@ -2279,7 +2280,7 @@ int create_irq(void)
 
 	irq = -ENOSPC;
 	spin_lock_irqsave(&vector_lock, flags);
-	for (new = (NR_IRQS - 1); new >= 0; new--) {
+	for (new = (nr_irqs - 1); new >= 0; new--) {
 		if (platform_legacy_irq(new))
 			continue;
 		if (irq_cfg[new].vector != 0)
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index b71e02d42f4f..4c7ffb32854c 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -226,7 +226,7 @@ unsigned int do_IRQ(struct pt_regs *regs)
 	int overflow, irq = ~regs->orig_ax;
 	struct irq_desc *desc = irq_desc + irq;
 
-	if (unlikely((unsigned)irq >= NR_IRQS)) {
+	if (unlikely((unsigned)irq >= nr_irqs)) {
 		printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
 					__func__, irq);
 		BUG();
@@ -271,7 +271,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_putc(p, '\n');
 	}
 
-	if (i < NR_IRQS) {
+	if (i < nr_irqs) {
 		unsigned any_count = 0;
 
 		spin_lock_irqsave(&irq_desc[i].lock, flags);
@@ -303,7 +303,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_putc(p, '\n');
 skip:
 		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
-	} else if (i == NR_IRQS) {
+	} else if (i == nr_irqs) {
 		seq_printf(p, "NMI: ");
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", nmi_count(j));
@@ -396,7 +396,7 @@ void fixup_irqs(cpumask_t map)
 	unsigned int irq;
 	static int warned;
 
-	for (irq = 0; irq < NR_IRQS; irq++) {
+	for (irq = 0; irq < nr_irqs; irq++) {
 		cpumask_t mask;
 		if (irq == 2)
 			continue;
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index f065fe9071b9..e1f0839430d2 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -81,7 +81,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_putc(p, '\n');
 	}
 
-	if (i < NR_IRQS) {
+	if (i < nr_irqs) {
 		unsigned any_count = 0;
 
 		spin_lock_irqsave(&irq_desc[i].lock, flags);
@@ -112,7 +112,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_putc(p, '\n');
 skip:
 		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
-	} else if (i == NR_IRQS) {
+	} else if (i == nr_irqs) {
 		seq_printf(p, "NMI: ");
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count);
@@ -201,7 +201,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
 	stack_overflow_check(regs);
 #endif
 
-	if (likely(irq < NR_IRQS))
+	if (likely(irq < nr_irqs))
 		generic_handle_irq(irq);
 	else {
 		if (!disable_apic)
@@ -224,7 +224,7 @@ void fixup_irqs(cpumask_t map)
 	unsigned int irq;
 	static int warned;
 
-	for (irq = 0; irq < NR_IRQS; irq++) {
+	for (irq = 0; irq < nr_irqs; irq++) {
 		cpumask_t mask;
 		int break_affinity = 0;
 		int set_affinity = 1;
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 9200a1e2752d..65c1c9507707 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -100,7 +100,7 @@ void __init native_init_IRQ(void)
 	 */
 	for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
 		int vector = FIRST_EXTERNAL_VECTOR + i;
-		if (i >= NR_IRQS)
+		if (i >= nr_irqs)
 			break;
 		/* SYSCALL_VECTOR was reserved in trap_init. */
 		if (!test_bit(vector, used_vectors))
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 5b5be9d43c2a..165c5d9b0d1a 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -142,7 +142,7 @@ void __init init_ISA_irqs(void)
 	init_bsp_APIC();
 	init_8259A(0);
 
-	for (i = 0; i < NR_IRQS; i++) {
+	for (i = 0; i < nr_irqs; i++) {
 		irq_desc[i].status = IRQ_DISABLED;
 		irq_desc[i].action = NULL;
 		irq_desc[i].depth = 1;
diff --git a/include/asm-x86/irq.h b/include/asm-x86/irq.h
index 1e5f2909c1db..2a130c44f5de 100644
--- a/include/asm-x86/irq.h
+++ b/include/asm-x86/irq.h
@@ -10,6 +10,9 @@
 #include <asm/apicdef.h>
 #include <asm/irq_vectors.h>
 
+extern int pin_map_size;
+extern int first_free_entry;
+
 static inline int irq_canonicalize(int irq)
 {
 	return ((irq == 2) ? 9 : irq);
-- 
GitLab


From 1f45f5621df82033cb4964d03530ade2f9a25e7b Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:49:49 -0700
Subject: [PATCH 377/895] drivers/char: use nr_irqs

convert them to nr_irqs.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/char/hpet.c       | 2 +-
 drivers/char/random.c     | 4 ++--
 drivers/char/vr41xx_giu.c | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index f3cfb4c76125..408f5f92cb4e 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -219,7 +219,7 @@ static void hpet_timer_set_irq(struct hpet_dev *devp)
 	for (irq = find_first_bit(&v, HPET_MAX_IRQ); irq < HPET_MAX_IRQ;
 		irq = find_next_bit(&v, HPET_MAX_IRQ, 1 + irq)) {
 
-		if (irq >= NR_IRQS) {
+		if (irq >= nr_irqs) {
 			irq = HPET_MAX_IRQ;
 			break;
 		}
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 6af435b89867..31456472829e 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -648,7 +648,7 @@ EXPORT_SYMBOL_GPL(add_input_randomness);
 
 void add_interrupt_randomness(int irq)
 {
-	if (irq >= NR_IRQS || irq_timer_state[irq] == NULL)
+	if (irq >= nr_irqs || irq_timer_state[irq] == NULL)
 		return;
 
 	DEBUG_ENT("irq event %d\n", irq);
@@ -912,7 +912,7 @@ void rand_initialize_irq(int irq)
 {
 	struct timer_rand_state *state;
 
-	if (irq >= NR_IRQS || irq_timer_state[irq])
+	if (irq >= nr_irqs || irq_timer_state[irq])
 		return;
 
 	/*
diff --git a/drivers/char/vr41xx_giu.c b/drivers/char/vr41xx_giu.c
index ffe9b4e3072e..54c837288d19 100644
--- a/drivers/char/vr41xx_giu.c
+++ b/drivers/char/vr41xx_giu.c
@@ -641,7 +641,7 @@ static int __devinit giu_probe(struct platform_device *dev)
 	}
 
 	irq = platform_get_irq(dev, 0);
-	if (irq < 0 || irq >= NR_IRQS)
+	if (irq < 0 || irq >= nr_irqs)
 		return -EBUSY;
 
 	return cascade_irq(irq, giu_get_irq);
-- 
GitLab


From 60e4ad7a72fd7ce562cdf8dd850289e1e76bc1b1 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:49:50 -0700
Subject: [PATCH 378/895] drivers/net: use nr_irqs

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/net/3c59x.c                   | 4 ++--
 drivers/net/hamradio/baycom_ser_fdx.c | 4 ++--
 drivers/net/hamradio/scc.c            | 6 +++---
 drivers/net/wan/sbni.c                | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c
index 491ee16da5c1..9ba295d9dd97 100644
--- a/drivers/net/3c59x.c
+++ b/drivers/net/3c59x.c
@@ -90,7 +90,7 @@ static int vortex_debug = 1;
 #include <linux/eisa.h>
 #include <linux/bitops.h>
 #include <linux/jiffies.h>
-#include <asm/irq.h>			/* For NR_IRQS only. */
+#include <asm/irq.h>			/* For nr_irqs only. */
 #include <asm/io.h>
 #include <asm/uaccess.h>
 
@@ -1221,7 +1221,7 @@ static int __devinit vortex_probe1(struct device *gendev,
 	if (print_info)
 		printk(", IRQ %d\n", dev->irq);
 	/* Tell them about an invalid IRQ. */
-	if (dev->irq <= 0 || dev->irq >= NR_IRQS)
+	if (dev->irq <= 0 || dev->irq >= nr_irqs)
 		printk(KERN_WARNING " *** Warning: IRQ %d is unlikely to work! ***\n",
 			   dev->irq);
 
diff --git a/drivers/net/hamradio/baycom_ser_fdx.c b/drivers/net/hamradio/baycom_ser_fdx.c
index 17ac6975d70d..b6a816e60c0f 100644
--- a/drivers/net/hamradio/baycom_ser_fdx.c
+++ b/drivers/net/hamradio/baycom_ser_fdx.c
@@ -416,10 +416,10 @@ static int ser12_open(struct net_device *dev)
 	if (!dev || !bc)
 		return -ENXIO;
 	if (!dev->base_addr || dev->base_addr > 0xffff-SER12_EXTENT ||
-	    dev->irq < 2 || dev->irq > NR_IRQS) {
+	    dev->irq < 2 || dev->irq > nr_irqs) {
 		printk(KERN_INFO "baycom_ser_fdx: invalid portnumber (max %u) "
 				"or irq (2 <= irq <= %d)\n",
-				0xffff-SER12_EXTENT, NR_IRQS);
+				0xffff-SER12_EXTENT, nr_irqs);
 		return -ENXIO;
 	}
 	if (bc->baud < 300 || bc->baud > 4800) {
diff --git a/drivers/net/hamradio/scc.c b/drivers/net/hamradio/scc.c
index 45ae9d1191d7..c17e39bc5460 100644
--- a/drivers/net/hamradio/scc.c
+++ b/drivers/net/hamradio/scc.c
@@ -1465,7 +1465,7 @@ static void z8530_init(void)
 	printk(KERN_INFO "Init Z8530 driver: %u channels, IRQ", Nchips*2);
 	
 	flag=" ";
-	for (k = 0; k < NR_IRQS; k++)
+	for (k = 0; k < nr_irqs; k++)
 		if (Ivec[k].used) 
 		{
 			printk("%s%d", flag, k);
@@ -1728,7 +1728,7 @@ static int scc_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 
 			if (hwcfg.irq == 2) hwcfg.irq = 9;
 
-			if (hwcfg.irq < 0 || hwcfg.irq >= NR_IRQS)
+			if (hwcfg.irq < 0 || hwcfg.irq >= nr_irqs)
 				return -EINVAL;
 				
 			if (!Ivec[hwcfg.irq].used && hwcfg.irq)
@@ -2148,7 +2148,7 @@ static void __exit scc_cleanup_driver(void)
 		}
 		
 	/* To unload the port must be closed so no real IRQ pending */
-	for (k=0; k < NR_IRQS ; k++)
+	for (k = 0; k < nr_irqs ; k++)
 		if (Ivec[k].used) free_irq(k, NULL);
 		
 	local_irq_enable();
diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c
index f972fef87c98..ee51b6a5e605 100644
--- a/drivers/net/wan/sbni.c
+++ b/drivers/net/wan/sbni.c
@@ -318,7 +318,7 @@ sbni_pci_probe( struct net_device  *dev )
 				continue;
 		}
 
-		if( pci_irq_line <= 0  ||  pci_irq_line >= NR_IRQS )
+		if (pci_irq_line <= 0 || pci_irq_line >= nr_irqs)
 			printk( KERN_WARNING "  WARNING: The PCI BIOS assigned "
 				"this PCI card to IRQ %d, which is unlikely "
 				"to work!.\n"
-- 
GitLab


From a06148c36d3163aee4d5d2cad4e87032d74eaa6d Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:49:51 -0700
Subject: [PATCH 379/895] drivers/pci/ intr remapping: use nr_irqs

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/pci/intr_remapping.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index bb642cc5e18c..980566e3352b 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -22,7 +22,7 @@ static DEFINE_SPINLOCK(irq_2_ir_lock);
 
 int irq_remapped(int irq)
 {
-	if (irq > NR_IRQS)
+	if (irq > nr_irqs)
 		return 0;
 
 	if (!irq_2_iommu[irq].iommu)
@@ -35,7 +35,7 @@ int get_irte(int irq, struct irte *entry)
 {
 	int index;
 
-	if (!entry || irq > NR_IRQS)
+	if (!entry || irq > nr_irqs)
 		return -1;
 
 	spin_lock(&irq_2_ir_lock);
@@ -126,7 +126,7 @@ int map_irq_to_irte_handle(int irq, u16 *sub_handle)
 	int index;
 
 	spin_lock(&irq_2_ir_lock);
-	if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+	if (irq >= nr_irqs || !irq_2_iommu[irq].iommu) {
 		spin_unlock(&irq_2_ir_lock);
 		return -1;
 	}
@@ -140,7 +140,7 @@ int map_irq_to_irte_handle(int irq, u16 *sub_handle)
 int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
 {
 	spin_lock(&irq_2_ir_lock);
-	if (irq >= NR_IRQS || irq_2_iommu[irq].iommu) {
+	if (irq >= nr_irqs || irq_2_iommu[irq].iommu) {
 		spin_unlock(&irq_2_ir_lock);
 		return -1;
 	}
@@ -158,7 +158,7 @@ int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
 int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index)
 {
 	spin_lock(&irq_2_ir_lock);
-	if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+	if (irq >= nr_irqs || !irq_2_iommu[irq].iommu) {
 		spin_unlock(&irq_2_ir_lock);
 		return -1;
 	}
@@ -180,7 +180,7 @@ int modify_irte(int irq, struct irte *irte_modified)
 	struct intel_iommu *iommu;
 
 	spin_lock(&irq_2_ir_lock);
-	if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+	if (irq >= nr_irqs || !irq_2_iommu[irq].iommu) {
 		spin_unlock(&irq_2_ir_lock);
 		return -1;
 	}
@@ -205,7 +205,7 @@ int flush_irte(int irq)
 	struct intel_iommu *iommu;
 
 	spin_lock(&irq_2_ir_lock);
-	if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+	if (irq >= nr_irqs || !irq_2_iommu[irq].iommu) {
 		spin_unlock(&irq_2_ir_lock);
 		return -1;
 	}
@@ -248,7 +248,7 @@ int free_irte(int irq)
 	struct intel_iommu *iommu;
 
 	spin_lock(&irq_2_ir_lock);
-	if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+	if (irq >= nr_irqs || !irq_2_iommu[irq].iommu) {
 		spin_unlock(&irq_2_ir_lock);
 		return -1;
 	}
-- 
GitLab


From 9130addad2fb2bbe1847f13c838438ff10a66d3a Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:49:52 -0700
Subject: [PATCH 380/895] drivers/pcmcia: use nr_irqs

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/pcmcia/at91_cf.c      | 2 +-
 drivers/pcmcia/vrc4171_card.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/pcmcia/at91_cf.c b/drivers/pcmcia/at91_cf.c
index a0ffb8ebfe00..9e1140f085fd 100644
--- a/drivers/pcmcia/at91_cf.c
+++ b/drivers/pcmcia/at91_cf.c
@@ -273,7 +273,7 @@ static int __init at91_cf_probe(struct platform_device *pdev)
 			goto fail0d;
 		cf->socket.pci_irq = board->irq_pin;
 	} else
-		cf->socket.pci_irq = NR_IRQS + 1;
+		cf->socket.pci_irq = nr_irqs + 1;
 
 	/* pcmcia layer only remaps "real" memory not iospace */
 	cf->socket.io_offset = (unsigned long)
diff --git a/drivers/pcmcia/vrc4171_card.c b/drivers/pcmcia/vrc4171_card.c
index eee2f1cb213c..b2c412419059 100644
--- a/drivers/pcmcia/vrc4171_card.c
+++ b/drivers/pcmcia/vrc4171_card.c
@@ -639,7 +639,7 @@ static int __devinit vrc4171_card_setup(char *options)
 		int irq;
 		options += 4;
 		irq = simple_strtoul(options, &options, 0);
-		if (irq >= 0 && irq < NR_IRQS)
+		if (irq >= 0 && irq < nr_irqs)
 			vrc4171_irq = irq;
 
 		if (*options != ',')
-- 
GitLab


From c7576b5b339f08c7346591c71a330b08f8b9943f Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:49:53 -0700
Subject: [PATCH 381/895] drivers/rtc: use nr_irqs

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/rtc/rtc-vr41xx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/rtc/rtc-vr41xx.c b/drivers/rtc/rtc-vr41xx.c
index 884b635f028b..834dcc6d785f 100644
--- a/drivers/rtc/rtc-vr41xx.c
+++ b/drivers/rtc/rtc-vr41xx.c
@@ -360,7 +360,7 @@ static int __devinit rtc_probe(struct platform_device *pdev)
 	spin_unlock_irq(&rtc_lock);
 
 	aie_irq = platform_get_irq(pdev, 0);
-	if (aie_irq < 0 || aie_irq >= NR_IRQS) {
+	if (aie_irq < 0 || aie_irq >= nr_irqs) {
 		retval = -EBUSY;
 		goto err_device_unregister;
 	}
@@ -371,7 +371,7 @@ static int __devinit rtc_probe(struct platform_device *pdev)
 		goto err_device_unregister;
 
 	pie_irq = platform_get_irq(pdev, 1);
-	if (pie_irq < 0 || pie_irq >= NR_IRQS)
+	if (pie_irq < 0 || pie_irq >= nr_irqs)
 		goto err_free_irq;
 
 	retval = request_irq(pie_irq, rtclong1_interrupt, IRQF_DISABLED,
-- 
GitLab


From 171ac6ae94e31d0fcb5ae922efd4a77a7e48b4e5 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:49:54 -0700
Subject: [PATCH 382/895] drivers/scsi: use nr_irqs

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/scsi/aha152x.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/aha152x.c b/drivers/scsi/aha152x.c
index b5a868d85eb4..1e5478abd90e 100644
--- a/drivers/scsi/aha152x.c
+++ b/drivers/scsi/aha152x.c
@@ -337,7 +337,7 @@ CMD_INC_RESID(struct scsi_cmnd *cmd, int inc)
 #else
 #define IRQ_MIN 9
 #if defined(__PPC)
-#define IRQ_MAX (NR_IRQS-1)
+#define IRQ_MAX (nr_irqs-1)
 #else
 #define IRQ_MAX 12
 #endif
-- 
GitLab


From a62c41337356989387d15020dc0f0288aaacfa44 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:49:55 -0700
Subject: [PATCH 383/895] drivers/serial: use nr_irqs

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/serial/8250.c                   | 2 +-
 drivers/serial/amba-pl010.c             | 2 +-
 drivers/serial/amba-pl011.c             | 2 +-
 drivers/serial/cpm_uart/cpm_uart_core.c | 2 +-
 drivers/serial/m32r_sio.c               | 4 ++--
 drivers/serial/serial_core.c            | 2 +-
 drivers/serial/serial_lh7a40x.c         | 2 +-
 drivers/serial/sh-sci.c                 | 2 +-
 drivers/serial/ucc_uart.c               | 2 +-
 9 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index c66bb44c4747..006617e2211d 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -2486,7 +2486,7 @@ static void serial8250_config_port(struct uart_port *port, int flags)
 static int
 serial8250_verify_port(struct uart_port *port, struct serial_struct *ser)
 {
-	if (ser->irq >= NR_IRQS || ser->irq < 0 ||
+	if (ser->irq >= nr_irqs || ser->irq < 0 ||
 	    ser->baud_base < 9600 || ser->type < PORT_UNKNOWN ||
 	    ser->type >= ARRAY_SIZE(uart_config) || ser->type == PORT_CIRRUS ||
 	    ser->type == PORT_STARTECH)
diff --git a/drivers/serial/amba-pl010.c b/drivers/serial/amba-pl010.c
index 90b56c2c31e2..71562689116f 100644
--- a/drivers/serial/amba-pl010.c
+++ b/drivers/serial/amba-pl010.c
@@ -512,7 +512,7 @@ static int pl010_verify_port(struct uart_port *port, struct serial_struct *ser)
 	int ret = 0;
 	if (ser->type != PORT_UNKNOWN && ser->type != PORT_AMBA)
 		ret = -EINVAL;
-	if (ser->irq < 0 || ser->irq >= NR_IRQS)
+	if (ser->irq < 0 || ser->irq >= nr_irqs)
 		ret = -EINVAL;
 	if (ser->baud_base < 9600)
 		ret = -EINVAL;
diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c
index 9d08f27208a1..b7180046f8db 100644
--- a/drivers/serial/amba-pl011.c
+++ b/drivers/serial/amba-pl011.c
@@ -572,7 +572,7 @@ static int pl010_verify_port(struct uart_port *port, struct serial_struct *ser)
 	int ret = 0;
 	if (ser->type != PORT_UNKNOWN && ser->type != PORT_AMBA)
 		ret = -EINVAL;
-	if (ser->irq < 0 || ser->irq >= NR_IRQS)
+	if (ser->irq < 0 || ser->irq >= nr_irqs)
 		ret = -EINVAL;
 	if (ser->baud_base < 9600)
 		ret = -EINVAL;
diff --git a/drivers/serial/cpm_uart/cpm_uart_core.c b/drivers/serial/cpm_uart/cpm_uart_core.c
index 25efca5a7a1f..e54574c49a38 100644
--- a/drivers/serial/cpm_uart/cpm_uart_core.c
+++ b/drivers/serial/cpm_uart/cpm_uart_core.c
@@ -623,7 +623,7 @@ static int cpm_uart_verify_port(struct uart_port *port,
 
 	if (ser->type != PORT_UNKNOWN && ser->type != PORT_CPM)
 		ret = -EINVAL;
-	if (ser->irq < 0 || ser->irq >= NR_IRQS)
+	if (ser->irq < 0 || ser->irq >= nr_irqs)
 		ret = -EINVAL;
 	if (ser->baud_base < 9600)
 		ret = -EINVAL;
diff --git a/drivers/serial/m32r_sio.c b/drivers/serial/m32r_sio.c
index 23d030511019..611c97a15654 100644
--- a/drivers/serial/m32r_sio.c
+++ b/drivers/serial/m32r_sio.c
@@ -922,7 +922,7 @@ static void m32r_sio_config_port(struct uart_port *port, int flags)
 static int
 m32r_sio_verify_port(struct uart_port *port, struct serial_struct *ser)
 {
-	if (ser->irq >= NR_IRQS || ser->irq < 0 ||
+	if (ser->irq >= nr_irqs || ser->irq < 0 ||
 	    ser->baud_base < 9600 || ser->type < PORT_UNKNOWN ||
 	    ser->type >= ARRAY_SIZE(uart_config))
 		return -EINVAL;
@@ -1162,7 +1162,7 @@ static int __init m32r_sio_init(void)
 
 	printk(KERN_INFO "Serial: M32R SIO driver\n");
 
-	for (i = 0; i < NR_IRQS; i++)
+	for (i = 0; i < nr_irqs; i++)
 		spin_lock_init(&irq_lists[i].lock);
 
 	ret = uart_register_driver(&m32r_sio_reg);
diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index 6bdf3362e3b1..874786a11fe9 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -741,7 +741,7 @@ static int uart_set_info(struct uart_state *state,
 	if (port->ops->verify_port)
 		retval = port->ops->verify_port(port, &new_serial);
 
-	if ((new_serial.irq >= NR_IRQS) || (new_serial.irq < 0) ||
+	if ((new_serial.irq >= nr_irqs) || (new_serial.irq < 0) ||
 	    (new_serial.baud_base < 9600))
 		retval = -EINVAL;
 
diff --git a/drivers/serial/serial_lh7a40x.c b/drivers/serial/serial_lh7a40x.c
index cb49a5ac022f..61dc8b3daa26 100644
--- a/drivers/serial/serial_lh7a40x.c
+++ b/drivers/serial/serial_lh7a40x.c
@@ -460,7 +460,7 @@ static int lh7a40xuart_verify_port (struct uart_port* port,
 
 	if (ser->type != PORT_UNKNOWN && ser->type != PORT_LH7A40X)
 		ret = -EINVAL;
-	if (ser->irq < 0 || ser->irq >= NR_IRQS)
+	if (ser->irq < 0 || ser->irq >= nr_irqs)
 		ret = -EINVAL;
 	if (ser->baud_base < 9600) /* *** FIXME: is this true? */
 		ret = -EINVAL;
diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c
index 3df2aaec829f..667b4b8fd074 100644
--- a/drivers/serial/sh-sci.c
+++ b/drivers/serial/sh-sci.c
@@ -1157,7 +1157,7 @@ static int sci_verify_port(struct uart_port *port, struct serial_struct *ser)
 {
 	struct sci_port *s = &sci_ports[port->line];
 
-	if (ser->irq != s->irqs[SCIx_TXI_IRQ] || ser->irq > NR_IRQS)
+	if (ser->irq != s->irqs[SCIx_TXI_IRQ] || ser->irq > nr_irqs)
 		return -EINVAL;
 	if (ser->baud_base < 2400)
 		/* No paper tape reader for Mitch.. */
diff --git a/drivers/serial/ucc_uart.c b/drivers/serial/ucc_uart.c
index 5c5d18dcb6ac..503f0b99a3b8 100644
--- a/drivers/serial/ucc_uart.c
+++ b/drivers/serial/ucc_uart.c
@@ -1066,7 +1066,7 @@ static int qe_uart_verify_port(struct uart_port *port,
 	if (ser->type != PORT_UNKNOWN && ser->type != PORT_CPM)
 		return -EINVAL;
 
-	if (ser->irq < 0 || ser->irq >= NR_IRQS)
+	if (ser->irq < 0 || ser->irq >= nr_irqs)
 		return -EINVAL;
 
 	if (ser->baud_base < 9600)
-- 
GitLab


From da27c118eb4f87f27ae8da2635b916daedf7264f Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:49:56 -0700
Subject: [PATCH 384/895] fs/proc: use nr_irqs

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 fs/proc/proc_misc.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index b675a49c1823..a2173a2a5625 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -509,7 +509,7 @@ static int show_stat(struct seq_file *p, void *v)
 	struct timespec boottime;
 	unsigned int *per_irq_sum;
 
-	per_irq_sum = kzalloc(sizeof(unsigned int)*NR_IRQS, GFP_KERNEL);
+	per_irq_sum = kzalloc(sizeof(unsigned int)*nr_irqs, GFP_KERNEL);
 	if (!per_irq_sum)
 		return -ENOMEM;
 
@@ -531,7 +531,7 @@ static int show_stat(struct seq_file *p, void *v)
 		softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
 		steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
 		guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
-		for (j = 0; j < NR_IRQS; j++) {
+		for (j = 0; j < nr_irqs; j++) {
 			unsigned int temp = kstat_cpu(i).irqs[j];
 			sum += temp;
 			per_irq_sum[j] += temp;
@@ -577,7 +577,7 @@ static int show_stat(struct seq_file *p, void *v)
 	}
 	seq_printf(p, "intr %llu", (unsigned long long)sum);
 
-	for (i = 0; i < NR_IRQS; i++)
+	for (i = 0; i < nr_irqs; i++)
 		seq_printf(p, " %u", per_irq_sum[i]);
 
 	seq_printf(p,
@@ -631,13 +631,13 @@ static const struct file_operations proc_stat_operations = {
  */
 static void *int_seq_start(struct seq_file *f, loff_t *pos)
 {
-	return (*pos <= NR_IRQS) ? pos : NULL;
+	return (*pos <= nr_irqs) ? pos : NULL;
 }
 
 static void *int_seq_next(struct seq_file *f, void *v, loff_t *pos)
 {
 	(*pos)++;
-	if (*pos > NR_IRQS)
+	if (*pos > nr_irqs)
 		return NULL;
 	return pos;
 }
-- 
GitLab


From 5a15d7e85582fa84cbd01c6dcc5d927b43fddff4 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:49:57 -0700
Subject: [PATCH 385/895] drivers/xen: use nr_irqs

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/xen/events.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index c3290bc186a0..ed8235187dc0 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -139,7 +139,7 @@ static void init_evtchn_cpu_bindings(void)
 #ifdef CONFIG_SMP
 	int i;
 	/* By default all event channels notify CPU#0. */
-	for (i = 0; i < NR_IRQS; i++)
+	for (i = 0; i < nr_irqs; i++)
 		irq_desc[i].affinity = cpumask_of_cpu(0);
 #endif
 
@@ -229,12 +229,12 @@ static int find_unbound_irq(void)
 	int irq;
 
 	/* Only allocate from dynirq range */
-	for (irq = 0; irq < NR_IRQS; irq++)
+	for (irq = 0; irq < nr_irqs; irq++)
 		if (irq_bindcount[irq] == 0)
 			break;
 
-	if (irq == NR_IRQS)
-		panic("No available IRQ to bind to: increase NR_IRQS!\n");
+	if (irq == nr_irqs)
+		panic("No available IRQ to bind to: increase nr_irqs!\n");
 
 	return irq;
 }
@@ -790,7 +790,7 @@ void xen_irq_resume(void)
 		mask_evtchn(evtchn);
 
 	/* No IRQ <-> event-channel mappings. */
-	for (irq = 0; irq < NR_IRQS; irq++)
+	for (irq = 0; irq < nr_irqs; irq++)
 		irq_info[irq].evtchn = 0; /* zap event-channel binding */
 
 	for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
@@ -822,7 +822,7 @@ void __init xen_init_IRQ(void)
 		mask_evtchn(i);
 
 	/* Dynamic IRQ space is currently unbound. Zero the refcnts. */
-	for (i = 0; i < NR_IRQS; i++)
+	for (i = 0; i < nr_irqs; i++)
 		irq_bindcount[i] = 0;
 
 	irq_ctx_init(smp_processor_id());
-- 
GitLab


From eef1de76da54a2ab6c6659c9a3722fd54a0e3459 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:49:58 -0700
Subject: [PATCH 386/895] irqs: make irq_timer_state to use dyn_array

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/char/random.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 31456472829e..1610aa64c7cf 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -559,7 +559,13 @@ struct timer_rand_state {
 };
 
 static struct timer_rand_state input_timer_state;
+
+#ifdef CONFIG_HAVE_DYN_ARRAY
+static struct timer_rand_state **irq_timer_state;
+DEFINE_DYN_ARRAY(irq_timer_state, sizeof(struct timer_rand_state *), nr_irqs, PAGE_SIZE, NULL);
+#else
 static struct timer_rand_state *irq_timer_state[NR_IRQS];
+#endif
 
 /*
  * This function adds entropy to the entropy "pool" by using timing
-- 
GitLab


From 5aeecaf4908499b1fd006d313ccbacde6a6bac43 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:49:59 -0700
Subject: [PATCH 387/895] irq: make irq2_iommu to use dyn_array

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/pci/intr_remapping.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index 980566e3352b..6961be807684 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -1,3 +1,4 @@
+#include <linux/interrupt.h>
 #include <linux/dmar.h>
 #include <linux/spinlock.h>
 #include <linux/jiffies.h>
@@ -11,12 +12,19 @@ static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
 static int ir_ioapic_num;
 int intr_remapping_enabled;
 
-static struct {
+struct irq_2_iommu {
 	struct intel_iommu *iommu;
 	u16 irte_index;
 	u16 sub_handle;
 	u8  irte_mask;
-} irq_2_iommu[NR_IRQS];
+};
+
+#ifdef CONFIG_HAVE_DYNA_ARRAY
+static struct irq_2_iommu *irq_2_iommu;
+DEFINE_DYN_ARRAY(irq_2_iommu, sizeof(struct irq_2_iommu), nr_irqs, PAGE_SIZE, NULL);
+#else
+static struct irq_2_iommu irq_2_iommu[NR_IRQS];
+#endif
 
 static DEFINE_SPINLOCK(irq_2_ir_lock);
 
-- 
GitLab


From d60458b224d6b997a582a05cb8c4b9bed9e17a1d Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:00 -0700
Subject: [PATCH 388/895] irq: make irq_desc to use dyn_array

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irq.h |  4 ++++
 kernel/irq/handle.c | 31 +++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+)

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 1d73d1abb834..5f4b013624dc 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -179,7 +179,11 @@ struct irq_desc {
 	const char		*name;
 } ____cacheline_internodealigned_in_smp;
 
+#ifdef CONFIG_HAVE_DYN_ARRAY
+extern struct irq_desc *irq_desc;
+#else
 extern struct irq_desc irq_desc[NR_IRQS];
+#endif
 
 /*
  * Migration helpers for obsolete names, they will go away:
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index e9d022cf593e..e94eeca09ea9 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -48,6 +48,36 @@ handle_bad_irq(unsigned int irq, struct irq_desc *desc)
  * Controller mappings for all interrupt sources:
  */
 int nr_irqs = NR_IRQS;
+
+#ifdef CONFIG_HAVE_DYN_ARRAY
+static struct irq_desc irq_desc_init __initdata = {
+	.status = IRQ_DISABLED,
+	.chip = &no_irq_chip,
+	.handle_irq = handle_bad_irq,
+	.depth = 1,
+	.lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
+#ifdef CONFIG_SMP
+	.affinity = CPU_MASK_ALL
+#endif
+};
+
+static void __init init_work(void *data)
+{
+	struct dyn_array *da = data;
+	int i;
+	struct  irq_desc *desc;
+
+	desc = *da->name;
+
+	for (i = 0; i < *da->nr; i++)
+		memcpy(&desc[i], &irq_desc_init, sizeof(struct irq_desc));
+}
+
+struct irq_desc *irq_desc;
+DEFINE_DYN_ARRAY(irq_desc, sizeof(struct irq_desc), nr_irqs, PAGE_SIZE, init_work);
+
+#else
+
 struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
 	[0 ... NR_IRQS-1] = {
 		.status = IRQ_DISABLED,
@@ -60,6 +90,7 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
 #endif
 	}
 };
+#endif
 
 /*
  * What should we do if we get a hw irq event on an illegal vector?
-- 
GitLab


From fa42d10dd5e1ff373061c0526f272106512301f9 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 19 Aug 2008 20:50:30 -0700
Subject: [PATCH 389/895] irq: sparse irqs, export nr_irqs

fix:

  Building modules, stage 2.
  MODPOST 458 modules
  ERROR: "nr_irqs" [drivers/serial/8250.ko] undefined!

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/irq/handle.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index e94eeca09ea9..6ce3bcc2b8f7 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -48,6 +48,7 @@ handle_bad_irq(unsigned int irq, struct irq_desc *desc)
  * Controller mappings for all interrupt sources:
  */
 int nr_irqs = NR_IRQS;
+EXPORT_SYMBOL_GPL(nr_irqs);
 
 #ifdef CONFIG_HAVE_DYN_ARRAY
 static struct irq_desc irq_desc_init __initdata = {
-- 
GitLab


From d17a55ded3393ad3878010bb3a8243a15a8d8df5 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:01 -0700
Subject: [PATCH 390/895] irq: make irqs in kernel stat use per_cpu_dyn_array

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/kernel_stat.h | 4 ++++
 kernel/sched.c              | 5 ++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index cf9f40a91c9c..fe1f7fe534b4 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -28,7 +28,11 @@ struct cpu_usage_stat {
 
 struct kernel_stat {
 	struct cpu_usage_stat	cpustat;
+#ifdef CONFIG_HAVE_DYN_ARRAY
+	unsigned int *irqs;
+#else
 	unsigned int irqs[NR_IRQS];
+#endif
 };
 
 DECLARE_PER_CPU(struct kernel_stat, kstat);
diff --git a/kernel/sched.c b/kernel/sched.c
index 6f230596bd0c..b9d713781b5b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4048,9 +4048,12 @@ static inline void idle_balance(int cpu, struct rq *rq)
 #endif
 
 DEFINE_PER_CPU(struct kernel_stat, kstat);
-
 EXPORT_PER_CPU_SYMBOL(kstat);
 
+#ifdef CONFIG_HAVE_DYN_ARRAY
+DEFINE_PER_CPU_DYN_ARRAY_ADDR(per_cpu__kstat_irqs, per_cpu__kstat.irqs, sizeof(unsigned int), nr_irqs, sizeof(unsigned long), NULL);
+#endif
+
 /*
  * Return p->sum_exec_runtime plus any more ns on the sched_clock
  * that have not yet been banked in case the task is currently running.
-- 
GitLab


From 301e619020dd67bde7e7e64bb9ffb7f30d26c979 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:02 -0700
Subject: [PATCH 391/895] x86: use dyn_array in io_apic_xx.c

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_32.c | 54 ++++++++++++++++++++++++++++--------
 arch/x86/kernel/io_apic_64.c | 28 ++++++++++++++-----
 arch/x86/kernel/setup.c      |  6 ++++
 3 files changed, 70 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index d382990244f0..7f2bcc3dad82 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -70,7 +70,7 @@ int timer_through_8259 __initdata;
  */
 int sis_apic_bug = -1;
 
-int first_free_entry = NR_IRQS;
+int first_free_entry;
 /*
  * # of IRQ routing registers
  */
@@ -98,10 +98,7 @@ static int disable_timer_pin_1 __initdata;
  * Rough estimation of how many shared IRQs there are, can
  * be changed anytime.
  */
-#define MAX_PLUS_SHARED_IRQS NR_IRQS
-#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
-
-int pin_map_size = PIN_MAP_SIZE;
+int pin_map_size;
 
 /*
  * This is performance-critical, we want to do it O(1)
@@ -112,7 +109,9 @@ int pin_map_size = PIN_MAP_SIZE;
 
 static struct irq_pin_list {
 	int apic, pin, next;
-} irq_2_pin[PIN_MAP_SIZE];
+} *irq_2_pin;
+
+DEFINE_DYN_ARRAY(irq_2_pin, sizeof(struct irq_pin_list), pin_map_size, 16, NULL);
 
 struct io_apic {
 	unsigned int index;
@@ -403,9 +402,28 @@ static struct irq_cpu_info {
 
 #define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i)))
 
-static cpumask_t balance_irq_affinity[NR_IRQS] = {
-	[0 ... NR_IRQS-1] = CPU_MASK_ALL
-};
+static cpumask_t balance_irq_affinity_init __initdata = CPU_MASK_ALL;
+
+static cpumask_t *balance_irq_affinity;
+
+
+static void __init irq_affinity_init_work(void *data)
+{
+	struct dyn_array *da = data;
+
+	int i;
+	struct  balance_irq_affinity *affinity;
+
+	affinity = *da->name;
+
+	for (i = 0; i < *da->nr; i++)
+		memcpy(&affinity[i], &balance_irq_affinity_init,
+			 sizeof(struct balance_irq_affinity));
+
+}
+
+DEFINE_DYN_ARRAY(balance_irq_affinity, sizeof(struct balance_irq_affinity), nr_irqs, PAGE_SIZE, irq_affinity_init_work);
+
 
 void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
 {
@@ -1170,14 +1188,28 @@ static inline int IO_APIC_irq_trigger(int irq)
 }
 
 /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
-static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
+static u8 irq_vector_init_first __initdata = FIRST_DEVICE_VECTOR;
+static u8 *irq_vector;
+
+static void __init irq_vector_init_work(void *data)
+{
+	struct dyn_array *da = data;
+
+	u8 *irq_vec;
+
+	irq_vec = *da->name;
+
+	irq_vec[0] = irq_vector_init_first;
+}
+
+DEFINE_DYN_ARRAY(irq_vector, sizeof(u8), nr_irqs, PAGE_SIZE, irq_vector_init_work);
 
 static int __assign_irq_vector(int irq)
 {
 	static int current_vector = FIRST_DEVICE_VECTOR, current_offset;
 	int vector, offset;
 
-	BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
+	BUG_ON((unsigned)irq >= nr_irqs);
 
 	if (irq_vector[irq] > 0)
 		return irq_vector[irq];
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 448384c7c1e8..93a3ffabfe6a 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -66,7 +66,7 @@ struct irq_cfg {
 };
 
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
-static struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = {
+static struct irq_cfg irq_cfg_legacy[] __initdata = {
 	[0]  = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
 	[1]  = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
 	[2]  = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
@@ -85,6 +85,17 @@ static struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = {
 	[15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
 };
 
+static struct irq_cfg *irq_cfg;
+
+static void __init init_work(void *data)
+{
+	struct dyn_array *da = data;
+
+	memcpy(*da->name, irq_cfg_legacy, sizeof(irq_cfg_legacy));
+}
+
+DEFINE_DYN_ARRAY(irq_cfg, sizeof(struct irq_cfg), nr_irqs, PAGE_SIZE, init_work);
+
 static int assign_irq_vector(int irq, cpumask_t mask);
 
 int first_system_vector = 0xfe;
@@ -129,10 +140,9 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
  * Rough estimation of how many shared IRQs there are, can
  * be changed anytime.
  */
-#define MAX_PLUS_SHARED_IRQS NR_IRQS
-#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
 
-int pin_map_size = PIN_MAP_SIZE;
+int pin_map_size;
+
 /*
  * This is performance-critical, we want to do it O(1)
  *
@@ -141,8 +151,12 @@ int pin_map_size = PIN_MAP_SIZE;
  */
 
 static struct irq_pin_list {
-	short apic, pin, next;
-} irq_2_pin[PIN_MAP_SIZE];
+	short apic, pin;
+	int next;
+} *irq_2_pin;
+
+DEFINE_DYN_ARRAY(irq_2_pin, sizeof(struct irq_pin_list), pin_map_size, sizeof(struct irq_pin_list), NULL);
+
 
 struct io_apic {
 	unsigned int index;
@@ -359,7 +373,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
  * shared ISA-space IRQs, so we have to support them. We are super
  * fast in the common case, and fast for shared ISA-space IRQs.
  */
-int first_free_entry = NR_IRQS;
+int first_free_entry;
 static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 {
 	struct irq_pin_list *entry = irq_2_pin + irq;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 2255782e8d4b..558ec26b08e2 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1067,9 +1067,15 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
 	prefill_possible_map();
+
 #ifdef CONFIG_X86_64
+	/* need to wait for nr_cpu_ids settle down */
+	if (nr_irqs == NR_IRQS)
+		nr_irqs = 32 * nr_cpu_ids + 224;
 	init_cpu_to_node();
 #endif
+	pin_map_size = nr_irqs * 2;
+	first_free_entry = nr_irqs;
 
 	init_apic_mappings();
 	ioapic_init_mappings();
-- 
GitLab


From a84488c213a8cfc29200344a6fb6357d48c8ed85 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 19 Aug 2008 20:50:31 -0700
Subject: [PATCH 392/895] irq: sparse irqs, fix #3

fix non-APIC UP build:

 arch/x86/kernel/built-in.o: In function `setup_arch':
 : undefined reference to `pin_map_size'
 arch/x86/kernel/built-in.o: In function `setup_arch':
 : undefined reference to `first_free_entry'

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 558ec26b08e2..02e3a6697977 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1074,8 +1074,10 @@ void __init setup_arch(char **cmdline_p)
 		nr_irqs = 32 * nr_cpu_ids + 224;
 	init_cpu_to_node();
 #endif
+#ifdef CONFIG_X86_IO_APIC
 	pin_map_size = nr_irqs * 2;
 	first_free_entry = nr_irqs;
+#endif
 
 	init_apic_mappings();
 	ioapic_init_mappings();
-- 
GitLab


From 71f521bbaf375b685aeea20c6b0ed8600cd6edfe Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:03 -0700
Subject: [PATCH 393/895] x86, irq: get nr_irqs from madt

Until now, NR_IRQS was derived from black magic defines that had to
be "large enough" to both accomodate NR_CPUS and MAX_NR_IO_APICs.

This resulted in a way too large irq_desc[] array on most x86 systems.
Especially with larger CPU masks, the size of irq_desc can spiral out
of control quickly.

So be smarter about it and use precise allocation instead: determine the
default maximum possible IRQ number from the ACPI MADT. Use a minimum limit
of at least 32 IRQs for broken BIOSes.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/acpi/boot.c | 30 ++++++++++++++++++++++++++++--
 include/asm-x86/mpspec.h    |  1 +
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index eb875cdc7367..3e9d163fd92f 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -957,6 +957,29 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
 	nr_ioapics++;
 }
 
+int get_nr_irqs_via_madt(void)
+{
+	int idx;
+	int nr = 0;
+
+	for (idx = 0; idx < nr_ioapics; idx++) {
+		if (mp_ioapic_routing[idx].gsi_end > nr)
+			nr = mp_ioapic_routing[idx].gsi_end;
+	}
+
+	nr++;
+
+	/* double it for hotplug and msi and nmi */
+	nr <<= 1;
+
+	/* something wrong ? */
+	if (nr < 32)
+		nr = 32;
+
+	return nr;
+
+}
+
 static void assign_to_mp_irq(struct mp_config_intsrc *m,
 				    struct mp_config_intsrc *mp_irq)
 {
@@ -1254,9 +1277,12 @@ static int __init acpi_parse_madt_ioapic_entries(void)
 		return count;
 	}
 
+
+	nr_irqs = get_nr_irqs_via_madt();
+
 	count =
 	    acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr,
-				  NR_IRQ_VECTORS);
+				  nr_irqs);
 	if (count < 0) {
 		printk(KERN_ERR PREFIX
 		       "Error parsing interrupt source overrides entry\n");
@@ -1276,7 +1302,7 @@ static int __init acpi_parse_madt_ioapic_entries(void)
 
 	count =
 	    acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_parse_nmi_src,
-				  NR_IRQ_VECTORS);
+				  nr_irqs);
 	if (count < 0) {
 		printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
 		/* TBD: Cleanup to allow fallback to MPS */
diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h
index be2241a818f1..a0748021250b 100644
--- a/include/asm-x86/mpspec.h
+++ b/include/asm-x86/mpspec.h
@@ -60,6 +60,7 @@ extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
 				   u32 gsi);
 extern void mp_config_acpi_legacy_irqs(void);
 extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low);
+extern int get_nr_irqs_via_madt(void);
 #ifdef CONFIG_X86_IO_APIC
 extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
 				u32 gsi, int triggering, int polarity);
-- 
GitLab


From bfea1238beac9d306eeac081c67de5ca6aec4c7a Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:04 -0700
Subject: [PATCH 394/895] x86: remove nr_irq_vectors

remove unused defines derived from the (now obsolete) NR_IRQS define.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/irq_vectors.h | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/include/asm-x86/irq_vectors.h b/include/asm-x86/irq_vectors.h
index c5d2d767a1f3..cb09802ce651 100644
--- a/include/asm-x86/irq_vectors.h
+++ b/include/asm-x86/irq_vectors.h
@@ -116,7 +116,6 @@
 # else
 #  define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
 # endif
-# define NR_IRQ_VECTORS NR_IRQS
 
 #elif !defined(CONFIG_X86_VOYAGER)
 
@@ -124,23 +123,15 @@
 
 #  define NR_IRQS		224
 
-#  if (224 >= 32 * NR_CPUS)
-#   define NR_IRQ_VECTORS	NR_IRQS
-#  else
-#   define NR_IRQ_VECTORS	(32 * NR_CPUS)
-#  endif
-
 # else /* IO_APIC || PARAVIRT */
 
 #  define NR_IRQS		16
-#  define NR_IRQ_VECTORS	NR_IRQS
 
 # endif
 
 #else /* !VISWS && !VOYAGER */
 
 # define NR_IRQS		224
-# define NR_IRQ_VECTORS		NR_IRQS
 
 #endif /* VISWS */
 
-- 
GitLab


From 08678b0841267c1d00d771fe01548d86043d065e Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:05 -0700
Subject: [PATCH 395/895] generic: sparse irqs: use irq_desc() together with
 dyn_array, instead of irq_desc[]

add CONFIG_HAVE_SPARSE_IRQ to for use condensed array.
Get rid of irq_desc[] array assumptions.

Preallocate 32 irq_desc, and irq_desc() will try to get more.

( No change in functionality is expected anywhere, except the odd build
  failure where we missed a code site or where a crossing commit itroduces
  new irq_desc[] usage. )

v2: according to Eric, change get_irq_desc() to irq_desc()

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/Kconfig                        |   4 +
 arch/x86/Kconfig                    |   1 +
 arch/x86/kernel/io_apic_32.c        |  46 +++++++---
 arch/x86/kernel/io_apic_64.c        |  75 +++++++++------
 arch/x86/kernel/irq_32.c            |  24 +++--
 arch/x86/kernel/irq_64.c            |  35 +++----
 arch/x86/kernel/irqinit_64.c        |  10 +-
 arch/x86/kernel/visws_quirks.c      |  30 +++---
 arch/x86/mach-voyager/voyager_smp.c |   4 +-
 drivers/gpio/gpiolib.c              |   2 +-
 drivers/mfd/asic3.c                 |   4 +-
 drivers/mfd/htc-egpio.c             |   2 +-
 drivers/parisc/dino.c               |   6 +-
 drivers/parisc/eisa.c               |   4 +-
 drivers/parisc/gsc.c                |  12 ++-
 drivers/parisc/iosapic.c            |   4 +-
 drivers/parisc/superio.c            |   4 +-
 drivers/pcmcia/hd64465_ss.c         |  12 ++-
 drivers/xen/events.c                |   8 +-
 include/linux/irq.h                 |  32 ++++---
 kernel/irq/autoprobe.c              |  10 +-
 kernel/irq/chip.c                   |  32 ++++---
 kernel/irq/handle.c                 | 138 +++++++++++++++++++++++++---
 kernel/irq/manage.c                 |  35 ++++---
 kernel/irq/migration.c              |  14 +--
 kernel/irq/proc.c                   |  36 ++++----
 kernel/irq/resend.c                 |   2 +-
 kernel/irq/spurious.c               |   5 +-
 28 files changed, 404 insertions(+), 187 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index c1f9febb404f..b36762246265 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -105,3 +105,7 @@ config HAVE_CLK
 
 config HAVE_DYN_ARRAY
 	def_bool n
+
+config HAVE_SPARSE_IRQ
+	def_bool n
+
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 42f98009d752..1004888e9b13 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -34,6 +34,7 @@ config X86
 	select HAVE_GENERIC_DMA_COHERENT if X86_32
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
 	select HAVE_DYN_ARRAY
+	select HAVE_SPARSE_IRQ if X86_64
 
 config ARCH_DEFCONFIG
 	string
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 7f2bcc3dad82..c2160cfdec9b 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -345,6 +345,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
 	struct irq_pin_list *entry = irq_2_pin + irq;
 	unsigned int apicid_value;
 	cpumask_t tmp;
+	struct irq_desc *desc;
 
 	cpus_and(tmp, cpumask, cpu_online_map);
 	if (cpus_empty(tmp))
@@ -365,7 +366,8 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
 			break;
 		entry = irq_2_pin + entry->next;
 	}
-	irq_desc[irq].affinity = cpumask;
+	desc = irq_to_desc(irq);
+	desc->affinity = cpumask;
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
@@ -475,10 +477,12 @@ static inline void balance_irq(int cpu, int irq)
 static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
 {
 	int i, j;
+	struct irq_desc *desc;
 
 	for_each_online_cpu(i) {
 		for (j = 0; j < nr_irqs; j++) {
-			if (!irq_desc[j].action)
+			desc = irq_to_desc(j);
+			if (!desc->action)
 				continue;
 			/* Is it a significant load ?  */
 			if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) <
@@ -505,6 +509,7 @@ static void do_irq_balance(void)
 	unsigned long tmp_cpu_irq;
 	unsigned long imbalance = 0;
 	cpumask_t allowed_mask, target_cpu_mask, tmp;
+	struct irq_desc *desc;
 
 	for_each_possible_cpu(i) {
 		int package_index;
@@ -515,7 +520,8 @@ static void do_irq_balance(void)
 		for (j = 0; j < nr_irqs; j++) {
 			unsigned long value_now, delta;
 			/* Is this an active IRQ or balancing disabled ? */
-			if (!irq_desc[j].action || irq_balancing_disabled(j))
+			desc = irq_to_desc(j);
+			if (!desc->action || irq_balancing_disabled(j))
 				continue;
 			if (package_index == i)
 				IRQ_DELTA(package_index, j) = 0;
@@ -609,7 +615,8 @@ tryanotherirq:
 	selected_irq = -1;
 	for (j = 0; j < nr_irqs; j++) {
 		/* Is this an active IRQ? */
-		if (!irq_desc[j].action)
+		desc = irq_to_desc(j);
+		if (!desc->action)
 			continue;
 		if (imbalance <= IRQ_DELTA(max_loaded, j))
 			continue;
@@ -682,10 +689,12 @@ static int balanced_irq(void *unused)
 	int i;
 	unsigned long prev_balance_time = jiffies;
 	long time_remaining = balanced_irq_interval;
+	struct irq_desc *desc;
 
 	/* push everything to CPU 0 to give us a starting point.  */
 	for (i = 0 ; i < nr_irqs ; i++) {
-		irq_desc[i].pending_mask = cpumask_of_cpu(0);
+		desc = irq_to_desc(i);
+		desc->pending_mask = cpumask_of_cpu(0);
 		set_pending_irq(i, cpumask_of_cpu(0));
 	}
 
@@ -1254,13 +1263,16 @@ static struct irq_chip ioapic_chip;
 
 static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
 {
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
 	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
 	    trigger == IOAPIC_LEVEL) {
-		irq_desc[irq].status |= IRQ_LEVEL;
+		desc->status |= IRQ_LEVEL;
 		set_irq_chip_and_handler_name(irq, &ioapic_chip,
 					 handle_fasteoi_irq, "fasteoi");
 	} else {
-		irq_desc[irq].status &= ~IRQ_LEVEL;
+		desc->status &= ~IRQ_LEVEL;
 		set_irq_chip_and_handler_name(irq, &ioapic_chip,
 					 handle_edge_irq, "edge");
 	}
@@ -2027,6 +2039,7 @@ static struct irq_chip ioapic_chip __read_mostly = {
 static inline void init_IO_APIC_traps(void)
 {
 	int irq;
+	struct irq_desc *desc;
 
 	/*
 	 * NOTE! The local APIC isn't very good at handling
@@ -2048,9 +2061,11 @@ static inline void init_IO_APIC_traps(void)
 			 */
 			if (irq < 16)
 				make_8259A_irq(irq);
-			else
+			else {
+				desc = irq_to_desc(irq);
 				/* Strange. Oh, well.. */
-				irq_desc[irq].chip = &no_irq_chip;
+				desc->chip = &no_irq_chip;
+			}
 		}
 	}
 }
@@ -2089,7 +2104,10 @@ static struct irq_chip lapic_chip __read_mostly = {
 
 static void lapic_register_intr(int irq, int vector)
 {
-	irq_desc[irq].status &= ~IRQ_LEVEL;
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+	desc->status &= ~IRQ_LEVEL;
 	set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
 				      "edge");
 	set_intr_gate(vector, interrupt[irq]);
@@ -2556,6 +2574,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	unsigned int dest;
 	cpumask_t tmp;
 	int vector;
+	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
@@ -2575,7 +2594,8 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	write_msi_msg(irq, &msg);
-	irq_desc[irq].affinity = mask;
+	desc = irq_to_desc(irq);
+	desc->affinity = mask;
 }
 #endif /* CONFIG_SMP */
 
@@ -2649,6 +2669,7 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 {
 	unsigned int dest;
 	cpumask_t tmp;
+	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
@@ -2659,7 +2680,8 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 	dest = cpu_mask_to_apicid(mask);
 
 	target_ht_irq(irq, dest);
-	irq_desc[irq].affinity = mask;
+	desc = irq_to_desc(irq);
+	desc->affinity = mask;
 }
 #endif
 
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 93a3ffabfe6a..cab5a25d81b1 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -345,6 +345,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 	unsigned long flags;
 	unsigned int dest;
 	cpumask_t tmp;
+	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
@@ -361,9 +362,10 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 	 */
 	dest = SET_APIC_LOGICAL_ID(dest);
 
+	desc = irq_to_desc(irq);
 	spin_lock_irqsave(&ioapic_lock, flags);
 	__target_IO_APIC_irq(irq, dest, cfg->vector);
-	irq_desc[irq].affinity = mask;
+	desc->affinity = mask;
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 #endif
@@ -933,14 +935,17 @@ static struct irq_chip ir_ioapic_chip;
 
 static void ioapic_register_intr(int irq, unsigned long trigger)
 {
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
 	if (trigger)
-		irq_desc[irq].status |= IRQ_LEVEL;
+		desc->status |= IRQ_LEVEL;
 	else
-		irq_desc[irq].status &= ~IRQ_LEVEL;
+		desc->status &= ~IRQ_LEVEL;
 
 #ifdef CONFIG_INTR_REMAP
 	if (irq_remapped(irq)) {
-		irq_desc[irq].status |= IRQ_MOVE_PCNTXT;
+		desc->status |= IRQ_MOVE_PCNTXT;
 		if (trigger)
 			set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
 						      handle_fasteoi_irq,
@@ -1596,10 +1601,10 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
 static void migrate_ioapic_irq(int irq, cpumask_t mask)
 {
 	struct irq_cfg *cfg = irq_cfg + irq;
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc;
 	cpumask_t tmp, cleanup_mask;
 	struct irte irte;
-	int modify_ioapic_rte = desc->status & IRQ_LEVEL;
+	int modify_ioapic_rte;
 	unsigned int dest;
 	unsigned long flags;
 
@@ -1616,6 +1621,8 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask)
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
+	desc = irq_to_desc(irq);
+	modify_ioapic_rte = desc->status & IRQ_LEVEL;
 	if (modify_ioapic_rte) {
 		spin_lock_irqsave(&ioapic_lock, flags);
 		__target_IO_APIC_irq(irq, dest, cfg->vector);
@@ -1637,12 +1644,13 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask)
 		cfg->move_in_progress = 0;
 	}
 
-	irq_desc[irq].affinity = mask;
+	desc->affinity = mask;
 }
 
 static int migrate_irq_remapped_level(int irq)
 {
 	int ret = -1;
+	struct irq_desc *desc = irq_to_desc(irq);
 
 	mask_IO_APIC_irq(irq);
 
@@ -1658,11 +1666,11 @@ static int migrate_irq_remapped_level(int irq)
 	}
 
 	/* everthing is clear. we have right of way */
-	migrate_ioapic_irq(irq, irq_desc[irq].pending_mask);
+	migrate_ioapic_irq(irq, desc->pending_mask);
 
 	ret = 0;
-	irq_desc[irq].status &= ~IRQ_MOVE_PENDING;
-	cpus_clear(irq_desc[irq].pending_mask);
+	desc->status &= ~IRQ_MOVE_PENDING;
+	cpus_clear(desc->pending_mask);
 
 unmask:
 	unmask_IO_APIC_irq(irq);
@@ -1674,7 +1682,7 @@ static void ir_irq_migration(struct work_struct *work)
 	int irq;
 
 	for (irq = 0; irq < nr_irqs; irq++) {
-		struct irq_desc *desc = irq_desc + irq;
+		struct irq_desc *desc = irq_to_desc(irq);
 		if (desc->status & IRQ_MOVE_PENDING) {
 			unsigned long flags;
 
@@ -1686,8 +1694,7 @@ static void ir_irq_migration(struct work_struct *work)
 				continue;
 			}
 
-			desc->chip->set_affinity(irq,
-					         irq_desc[irq].pending_mask);
+			desc->chip->set_affinity(irq, desc->pending_mask);
 			spin_unlock_irqrestore(&desc->lock, flags);
 		}
 	}
@@ -1698,9 +1705,11 @@ static void ir_irq_migration(struct work_struct *work)
  */
 static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 {
-	if (irq_desc[irq].status & IRQ_LEVEL) {
-		irq_desc[irq].status |= IRQ_MOVE_PENDING;
-		irq_desc[irq].pending_mask = mask;
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	if (desc->status & IRQ_LEVEL) {
+		desc->status |= IRQ_MOVE_PENDING;
+		desc->pending_mask = mask;
 		migrate_irq_remapped_level(irq);
 		return;
 	}
@@ -1725,7 +1734,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
 		if (irq >= nr_irqs)
 			continue;
 
-		desc = irq_desc + irq;
+		desc = irq_to_desc(irq);
 		cfg = irq_cfg + irq;
 		spin_lock(&desc->lock);
 		if (!cfg->move_cleanup_count)
@@ -1791,7 +1800,7 @@ static void ack_apic_level(unsigned int irq)
 	irq_complete_move(irq);
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	/* If we are moving the irq we need to mask it */
-	if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
+	if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
 		do_unmask_irq = 1;
 		mask_IO_APIC_irq(irq);
 	}
@@ -1868,6 +1877,7 @@ static struct irq_chip ir_ioapic_chip __read_mostly = {
 static inline void init_IO_APIC_traps(void)
 {
 	int irq;
+	struct irq_desc *desc;
 
 	/*
 	 * NOTE! The local APIC isn't very good at handling
@@ -1889,9 +1899,11 @@ static inline void init_IO_APIC_traps(void)
 			 */
 			if (irq < 16)
 				make_8259A_irq(irq);
-			else
+			else {
+				desc = irq_to_desc(irq);
 				/* Strange. Oh, well.. */
-				irq_desc[irq].chip = &no_irq_chip;
+				desc->chip = &no_irq_chip;
+			}
 		}
 	}
 }
@@ -1926,7 +1938,10 @@ static struct irq_chip lapic_chip __read_mostly = {
 
 static void lapic_register_intr(int irq)
 {
-	irq_desc[irq].status &= ~IRQ_LEVEL;
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+	desc->status &= ~IRQ_LEVEL;
 	set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
 				      "edge");
 }
@@ -2402,6 +2417,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	struct msi_msg msg;
 	unsigned int dest;
 	cpumask_t tmp;
+	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
@@ -2421,7 +2437,8 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	write_msi_msg(irq, &msg);
-	irq_desc[irq].affinity = mask;
+	desc = irq_to_desc(irq);
+	desc->affinity = mask;
 }
 
 #ifdef CONFIG_INTR_REMAP
@@ -2435,6 +2452,7 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	unsigned int dest;
 	cpumask_t tmp, cleanup_mask;
 	struct irte irte;
+	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
@@ -2469,7 +2487,8 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 		cfg->move_in_progress = 0;
 	}
 
-	irq_desc[irq].affinity = mask;
+	desc = irq_to_desc(irq);
+	desc->affinity = mask;
 }
 #endif
 #endif /* CONFIG_SMP */
@@ -2543,7 +2562,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
 
 #ifdef CONFIG_INTR_REMAP
 	if (irq_remapped(irq)) {
-		struct irq_desc *desc = irq_desc + irq;
+		struct irq_desc *desc = irq_to_desc(irq);
 		/*
 		 * irq migration in process context
 		 */
@@ -2655,6 +2674,7 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
 	struct msi_msg msg;
 	unsigned int dest;
 	cpumask_t tmp;
+	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
@@ -2674,7 +2694,8 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	dmar_msi_write(irq, &msg);
-	irq_desc[irq].affinity = mask;
+	desc = irq_to_desc(irq);
+	desc->affinity = mask;
 }
 #endif /* CONFIG_SMP */
 
@@ -2731,6 +2752,7 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 	struct irq_cfg *cfg = irq_cfg + irq;
 	unsigned int dest;
 	cpumask_t tmp;
+	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
@@ -2743,7 +2765,8 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 	dest = cpu_mask_to_apicid(tmp);
 
 	target_ht_irq(irq, dest, cfg->vector);
-	irq_desc[irq].affinity = mask;
+	desc = irq_to_desc(irq);
+	desc->affinity = mask;
 }
 #endif
 
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 4c7ffb32854c..ede513be517d 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -224,7 +224,7 @@ unsigned int do_IRQ(struct pt_regs *regs)
 	struct pt_regs *old_regs;
 	/* high bit used in ret_from_ code */
 	int overflow, irq = ~regs->orig_ax;
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 
 	if (unlikely((unsigned)irq >= nr_irqs)) {
 		printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
@@ -273,15 +273,16 @@ int show_interrupts(struct seq_file *p, void *v)
 
 	if (i < nr_irqs) {
 		unsigned any_count = 0;
+		struct irq_desc *desc = irq_to_desc(i);
 
-		spin_lock_irqsave(&irq_desc[i].lock, flags);
+		spin_lock_irqsave(&desc->lock, flags);
 #ifndef CONFIG_SMP
 		any_count = kstat_irqs(i);
 #else
 		for_each_online_cpu(j)
 			any_count |= kstat_cpu(j).irqs[i];
 #endif
-		action = irq_desc[i].action;
+		action = desc->action;
 		if (!action && !any_count)
 			goto skip;
 		seq_printf(p, "%3d: ",i);
@@ -291,8 +292,8 @@ int show_interrupts(struct seq_file *p, void *v)
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
 #endif
-		seq_printf(p, " %8s", irq_desc[i].chip->name);
-		seq_printf(p, "-%-8s", irq_desc[i].name);
+		seq_printf(p, " %8s", desc->chip->name);
+		seq_printf(p, "-%-8s", desc->name);
 
 		if (action) {
 			seq_printf(p, "  %s", action->name);
@@ -302,7 +303,7 @@ int show_interrupts(struct seq_file *p, void *v)
 
 		seq_putc(p, '\n');
 skip:
-		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+		spin_unlock_irqrestore(&desc->lock, flags);
 	} else if (i == nr_irqs) {
 		seq_printf(p, "NMI: ");
 		for_each_online_cpu(j)
@@ -398,17 +399,20 @@ void fixup_irqs(cpumask_t map)
 
 	for (irq = 0; irq < nr_irqs; irq++) {
 		cpumask_t mask;
+		struct irq_desc *desc;
+
 		if (irq == 2)
 			continue;
 
-		cpus_and(mask, irq_desc[irq].affinity, map);
+		desc = irq_to_desc(irq);
+		cpus_and(mask, desc->affinity, map);
 		if (any_online_cpu(mask) == NR_CPUS) {
 			printk("Breaking affinity for irq %i\n", irq);
 			mask = map;
 		}
-		if (irq_desc[irq].chip->set_affinity)
-			irq_desc[irq].chip->set_affinity(irq, mask);
-		else if (irq_desc[irq].action && !(warned++))
+		if (desc->chip->set_affinity)
+			desc->chip->set_affinity(irq, mask);
+		else if (desc->action && !(warned++))
 			printk("Cannot set affinity for irq %i\n", irq);
 	}
 
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index e1f0839430d2..738eb65a924e 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -83,15 +83,16 @@ int show_interrupts(struct seq_file *p, void *v)
 
 	if (i < nr_irqs) {
 		unsigned any_count = 0;
+		struct irq_desc *desc = irq_to_desc(i);
 
-		spin_lock_irqsave(&irq_desc[i].lock, flags);
+		spin_lock_irqsave(&desc->lock, flags);
 #ifndef CONFIG_SMP
 		any_count = kstat_irqs(i);
 #else
 		for_each_online_cpu(j)
 			any_count |= kstat_cpu(j).irqs[i];
 #endif
-		action = irq_desc[i].action;
+		action = desc->action;
 		if (!action && !any_count)
 			goto skip;
 		seq_printf(p, "%3d: ",i);
@@ -101,8 +102,8 @@ int show_interrupts(struct seq_file *p, void *v)
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
 #endif
-		seq_printf(p, " %8s", irq_desc[i].chip->name);
-		seq_printf(p, "-%-8s", irq_desc[i].name);
+		seq_printf(p, " %8s", desc->chip->name);
+		seq_printf(p, "-%-8s", desc->name);
 
 		if (action) {
 			seq_printf(p, "  %s", action->name);
@@ -111,7 +112,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		}
 		seq_putc(p, '\n');
 skip:
-		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+		spin_unlock_irqrestore(&desc->lock, flags);
 	} else if (i == nr_irqs) {
 		seq_printf(p, "NMI: ");
 		for_each_online_cpu(j)
@@ -228,37 +229,39 @@ void fixup_irqs(cpumask_t map)
 		cpumask_t mask;
 		int break_affinity = 0;
 		int set_affinity = 1;
+		struct irq_desc *desc;
 
 		if (irq == 2)
 			continue;
 
+		desc = irq_to_desc(irq);
 		/* interrupt's are disabled at this point */
-		spin_lock(&irq_desc[irq].lock);
+		spin_lock(&desc->lock);
 
 		if (!irq_has_action(irq) ||
-		    cpus_equal(irq_desc[irq].affinity, map)) {
-			spin_unlock(&irq_desc[irq].lock);
+		    cpus_equal(desc->affinity, map)) {
+			spin_unlock(&desc->lock);
 			continue;
 		}
 
-		cpus_and(mask, irq_desc[irq].affinity, map);
+		cpus_and(mask, desc->affinity, map);
 		if (cpus_empty(mask)) {
 			break_affinity = 1;
 			mask = map;
 		}
 
-		if (irq_desc[irq].chip->mask)
-			irq_desc[irq].chip->mask(irq);
+		if (desc->chip->mask)
+			desc->chip->mask(irq);
 
-		if (irq_desc[irq].chip->set_affinity)
-			irq_desc[irq].chip->set_affinity(irq, mask);
+		if (desc->chip->set_affinity)
+			desc->chip->set_affinity(irq, mask);
 		else if (!(warned++))
 			set_affinity = 0;
 
-		if (irq_desc[irq].chip->unmask)
-			irq_desc[irq].chip->unmask(irq);
+		if (desc->chip->unmask)
+			desc->chip->unmask(irq);
 
-		spin_unlock(&irq_desc[irq].lock);
+		spin_unlock(&desc->lock);
 
 		if (break_affinity && set_affinity)
 			printk("Broke affinity for irq %i\n", irq);
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 165c5d9b0d1a..0744b49b4d12 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -143,9 +143,11 @@ void __init init_ISA_irqs(void)
 	init_8259A(0);
 
 	for (i = 0; i < nr_irqs; i++) {
-		irq_desc[i].status = IRQ_DISABLED;
-		irq_desc[i].action = NULL;
-		irq_desc[i].depth = 1;
+		struct irq_desc *desc = irq_to_desc(i);
+
+		desc->status = IRQ_DISABLED;
+		desc->action = NULL;
+		desc->depth = 1;
 
 		if (i < 16) {
 			/*
@@ -157,7 +159,7 @@ void __init init_ISA_irqs(void)
 			/*
 			 * 'high' PCI IRQs filled in on demand
 			 */
-			irq_desc[i].chip = &no_irq_chip;
+			desc->chip = &no_irq_chip;
 		}
 	}
 }
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index 61a97e616f70..9d85ab384435 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -484,10 +484,11 @@ static void disable_cobalt_irq(unsigned int irq)
 static unsigned int startup_cobalt_irq(unsigned int irq)
 {
 	unsigned long flags;
+	struct irq_desc *desc = irq_to_desc(irq);
 
 	spin_lock_irqsave(&cobalt_lock, flags);
-	if ((irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING)))
-		irq_desc[irq].status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING);
+	if ((desc->status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING)))
+		desc->status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING);
 	enable_cobalt_irq(irq);
 	spin_unlock_irqrestore(&cobalt_lock, flags);
 	return 0;
@@ -506,9 +507,10 @@ static void ack_cobalt_irq(unsigned int irq)
 static void end_cobalt_irq(unsigned int irq)
 {
 	unsigned long flags;
+	struct irq_desc *desc = irq_to_desc(irq);
 
 	spin_lock_irqsave(&cobalt_lock, flags);
-	if (!(irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS)))
+	if (!(desc->status & (IRQ_DISABLED | IRQ_INPROGRESS)))
 		enable_cobalt_irq(irq);
 	spin_unlock_irqrestore(&cobalt_lock, flags);
 }
@@ -626,7 +628,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id)
 
 	spin_unlock_irqrestore(&i8259A_lock, flags);
 
-	desc = irq_desc + realirq;
+	desc = irq_to_desc(realirq);
 
 	/*
 	 * handle this 'virtual interrupt' as a Cobalt one now.
@@ -662,27 +664,29 @@ void init_VISWS_APIC_irqs(void)
 	int i;
 
 	for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) {
-		irq_desc[i].status = IRQ_DISABLED;
-		irq_desc[i].action = 0;
-		irq_desc[i].depth = 1;
+		struct irq_desc *desc = irq_to_desc(i);
+
+		desc->status = IRQ_DISABLED;
+		desc->action = 0;
+		desc->depth = 1;
 
 		if (i == 0) {
-			irq_desc[i].chip = &cobalt_irq_type;
+			desc->chip = &cobalt_irq_type;
 		}
 		else if (i == CO_IRQ_IDE0) {
-			irq_desc[i].chip = &cobalt_irq_type;
+			desc->chip = &cobalt_irq_type;
 		}
 		else if (i == CO_IRQ_IDE1) {
-			irq_desc[i].chip = &cobalt_irq_type;
+			desc->chip = &cobalt_irq_type;
 		}
 		else if (i == CO_IRQ_8259) {
-			irq_desc[i].chip = &piix4_master_irq_type;
+			desc->chip = &piix4_master_irq_type;
 		}
 		else if (i < CO_IRQ_APIC0) {
-			irq_desc[i].chip = &piix4_virtual_irq_type;
+			desc->chip = &piix4_virtual_irq_type;
 		}
 		else if (IS_CO_APIC(i)) {
-			irq_desc[i].chip = &cobalt_irq_type;
+			desc->chip = &cobalt_irq_type;
 		}
 	}
 
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 199a5f4a873c..0f6e8a6523ae 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -1483,7 +1483,7 @@ static void disable_local_vic_irq(unsigned int irq)
  * the interrupt off to another CPU */
 static void before_handle_vic_irq(unsigned int irq)
 {
-	irq_desc_t *desc = irq_desc + irq;
+	irq_desc_t *desc = irq_to_desc(irq);
 	__u8 cpu = smp_processor_id();
 
 	_raw_spin_lock(&vic_irq_lock);
@@ -1518,7 +1518,7 @@ static void before_handle_vic_irq(unsigned int irq)
 /* Finish the VIC interrupt: basically mask */
 static void after_handle_vic_irq(unsigned int irq)
 {
-	irq_desc_t *desc = irq_desc + irq;
+	irq_desc_t *desc = irq_to_desc(irq);
 
 	_raw_spin_lock(&vic_irq_lock);
 	{
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 8d2940517c99..572d372899d3 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1058,7 +1058,7 @@ static void gpiolib_dbg_show(struct seq_file *s, struct gpio_chip *chip)
 
 		if (!is_out) {
 			int		irq = gpio_to_irq(gpio);
-			struct irq_desc	*desc = irq_desc + irq;
+			struct irq_desc	*desc = irq_to_desc(irq);
 
 			/* This races with request_irq(), set_irq_type(),
 			 * and set_irq_wake() ... but those are "rare".
diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c
index ba5aa2008273..e4c0db4dc7b1 100644
--- a/drivers/mfd/asic3.c
+++ b/drivers/mfd/asic3.c
@@ -123,7 +123,7 @@ static void asic3_irq_demux(unsigned int irq, struct irq_desc *desc)
 					irqnr = asic->irq_base +
 						(ASIC3_GPIOS_PER_BANK * bank)
 						+ i;
-					desc = irq_desc + irqnr;
+					desc = irq_to_desc(irqnr);
 					desc->handle_irq(irqnr, desc);
 					if (asic->irq_bothedge[bank] & bit)
 						asic3_irq_flip_edge(asic, base,
@@ -136,7 +136,7 @@ static void asic3_irq_demux(unsigned int irq, struct irq_desc *desc)
 		for (i = ASIC3_NUM_GPIOS; i < ASIC3_NR_IRQS; i++) {
 			/* They start at bit 4 and go up */
 			if (status & (1 << (i - ASIC3_NUM_GPIOS + 4))) {
-				desc = irq_desc + asic->irq_base + i;
+				desc = irq_to_desc(asic->irq_base + i);
 				desc->handle_irq(asic->irq_base + i,
 						 desc);
 			}
diff --git a/drivers/mfd/htc-egpio.c b/drivers/mfd/htc-egpio.c
index 6be43172dc65..ad3379fcd194 100644
--- a/drivers/mfd/htc-egpio.c
+++ b/drivers/mfd/htc-egpio.c
@@ -112,7 +112,7 @@ static void egpio_handler(unsigned int irq, struct irq_desc *desc)
 		/* Run irq handler */
 		pr_debug("got IRQ %d\n", irqpin);
 		irq = ei->irq_start + irqpin;
-		desc = &irq_desc[irq];
+		desc = irq_to_desc(irq);
 		desc->handle_irq(irq, desc);
 	}
 }
diff --git a/drivers/parisc/dino.c b/drivers/parisc/dino.c
index fd56128525d1..3bc54b30c3a1 100644
--- a/drivers/parisc/dino.c
+++ b/drivers/parisc/dino.c
@@ -298,7 +298,8 @@ struct pci_port_ops dino_port_ops = {
 
 static void dino_disable_irq(unsigned int irq)
 {
-	struct dino_device *dino_dev = irq_desc[irq].chip_data;
+	struct irq_desc *desc = irq_to_desc(irq);
+	struct dino_device *dino_dev = desc->chip_data;
 	int local_irq = gsc_find_local_irq(irq, dino_dev->global_irq, DINO_LOCAL_IRQS);
 
 	DBG(KERN_WARNING "%s(0x%p, %d)\n", __func__, dino_dev, irq);
@@ -310,7 +311,8 @@ static void dino_disable_irq(unsigned int irq)
 
 static void dino_enable_irq(unsigned int irq)
 {
-	struct dino_device *dino_dev = irq_desc[irq].chip_data;
+	struct irq_desc *desc = irq_to_desc(irq);
+	struct dino_device *dino_dev = desc->chip_data;
 	int local_irq = gsc_find_local_irq(irq, dino_dev->global_irq, DINO_LOCAL_IRQS);
 	u32 tmp;
 
diff --git a/drivers/parisc/eisa.c b/drivers/parisc/eisa.c
index 771cef592542..7891db50c483 100644
--- a/drivers/parisc/eisa.c
+++ b/drivers/parisc/eisa.c
@@ -346,10 +346,10 @@ static int __init eisa_probe(struct parisc_device *dev)
 	}
 	
 	/* Reserve IRQ2 */
-	irq_desc[2].action = &irq2_action;
+	irq_to_desc(2)->action = &irq2_action;
 	
 	for (i = 0; i < 16; i++) {
-		irq_desc[i].chip = &eisa_interrupt_type;
+		irq_to_desc(i)->chip = &eisa_interrupt_type;
 	}
 	
 	EISA_bus = 1;
diff --git a/drivers/parisc/gsc.c b/drivers/parisc/gsc.c
index f7d088b897ee..e76db9e4d504 100644
--- a/drivers/parisc/gsc.c
+++ b/drivers/parisc/gsc.c
@@ -108,7 +108,8 @@ int gsc_find_local_irq(unsigned int irq, int *global_irqs, int limit)
 
 static void gsc_asic_disable_irq(unsigned int irq)
 {
-	struct gsc_asic *irq_dev = irq_desc[irq].chip_data;
+	struct irq_desc *desc = irq_to_desc(irq);
+	struct gsc_asic *irq_dev = desc->chip_data;
 	int local_irq = gsc_find_local_irq(irq, irq_dev->global_irq, 32);
 	u32 imr;
 
@@ -123,7 +124,8 @@ static void gsc_asic_disable_irq(unsigned int irq)
 
 static void gsc_asic_enable_irq(unsigned int irq)
 {
-	struct gsc_asic *irq_dev = irq_desc[irq].chip_data;
+	struct irq_desc *desc = irq_to_desc(irq);
+	struct gsc_asic *irq_dev = desc->chip_data;
 	int local_irq = gsc_find_local_irq(irq, irq_dev->global_irq, 32);
 	u32 imr;
 
@@ -159,12 +161,14 @@ static struct hw_interrupt_type gsc_asic_interrupt_type = {
 int gsc_assign_irq(struct hw_interrupt_type *type, void *data)
 {
 	static int irq = GSC_IRQ_BASE;
+	struct irq_desc *desc;
 
 	if (irq > GSC_IRQ_MAX)
 		return NO_IRQ;
 
-	irq_desc[irq].chip = type;
-	irq_desc[irq].chip_data = data;
+	desc = irq_to_desc(irq);
+	desc->chip = type;
+	desc->chip_data = data;
 	return irq++;
 }
 
diff --git a/drivers/parisc/iosapic.c b/drivers/parisc/iosapic.c
index 6fb3f7979f21..7beffcab2745 100644
--- a/drivers/parisc/iosapic.c
+++ b/drivers/parisc/iosapic.c
@@ -619,7 +619,9 @@ iosapic_set_irt_data( struct vector_info *vi, u32 *dp0, u32 *dp1)
 
 static struct vector_info *iosapic_get_vector(unsigned int irq)
 {
-	return irq_desc[irq].chip_data;
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	return desc->chip_data;
 }
 
 static void iosapic_disable_irq(unsigned int irq)
diff --git a/drivers/parisc/superio.c b/drivers/parisc/superio.c
index 1e8d2d17f04c..1e93c837514f 100644
--- a/drivers/parisc/superio.c
+++ b/drivers/parisc/superio.c
@@ -363,7 +363,9 @@ int superio_fixup_irq(struct pci_dev *pcidev)
 #endif
 
 	for (i = 0; i < 16; i++) {
-		irq_desc[i].chip = &superio_interrupt_type;
+		struct irq_desc *desc = irq_to_desc(i);
+
+		desc->chip = &superio_interrupt_type;
 	}
 
 	/*
diff --git a/drivers/pcmcia/hd64465_ss.c b/drivers/pcmcia/hd64465_ss.c
index 117dc12ab438..9ef69cdb3183 100644
--- a/drivers/pcmcia/hd64465_ss.c
+++ b/drivers/pcmcia/hd64465_ss.c
@@ -233,15 +233,18 @@ static struct hw_interrupt_type hd64465_ss_irq_type = {
  */
 static void hs_map_irq(hs_socket_t *sp, unsigned int irq)
 {
+	struct irq_desc *desc;
+
     	DPRINTK("hs_map_irq(sock=%d irq=%d)\n", sp->number, irq);
 	
 	if (irq >= HS_NUM_MAPPED_IRQS)
 	    return;
 
+	desc = irq_to_desc(irq);
     	hs_mapped_irq[irq].sock = sp;
 	/* insert ourselves as the irq controller */
-	hs_mapped_irq[irq].old_handler = irq_desc[irq].chip;
-	irq_desc[irq].chip = &hd64465_ss_irq_type;
+	hs_mapped_irq[irq].old_handler = desc->chip;
+	desc->chip = &hd64465_ss_irq_type;
 }
 
 
@@ -250,13 +253,16 @@ static void hs_map_irq(hs_socket_t *sp, unsigned int irq)
  */
 static void hs_unmap_irq(hs_socket_t *sp, unsigned int irq)
 {
+	struct irq_desc *desc;
+
     	DPRINTK("hs_unmap_irq(sock=%d irq=%d)\n", sp->number, irq);
 	
 	if (irq >= HS_NUM_MAPPED_IRQS)
 	    return;
 		
+	desc = irq_to_desc(irq);
 	/* restore the original irq controller */
-	irq_desc[irq].chip = hs_mapped_irq[irq].old_handler;
+	desc->chip = hs_mapped_irq[irq].old_handler;
 }
 
 /*============================================================*/
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index ed8235187dc0..56ace47f24d6 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -125,7 +125,7 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
 
 	BUG_ON(irq == -1);
 #ifdef CONFIG_SMP
-	irq_desc[irq].affinity = cpumask_of_cpu(cpu);
+	irq_to_desc(irq)->affinity = cpumask_of_cpu(cpu);
 #endif
 
 	__clear_bit(chn, cpu_evtchn_mask[cpu_evtchn[chn]]);
@@ -139,8 +139,10 @@ static void init_evtchn_cpu_bindings(void)
 #ifdef CONFIG_SMP
 	int i;
 	/* By default all event channels notify CPU#0. */
-	for (i = 0; i < nr_irqs; i++)
-		irq_desc[i].affinity = cpumask_of_cpu(0);
+	for (i = 0; i < nr_irqs; i++) {
+		struct irq_desc *desc = irq_to_desc(i);
+		desc->affinity = cpumask_of_cpu(0);
+	}
 #endif
 
 	memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 5f4b013624dc..80b8200f2adb 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -152,6 +152,10 @@ struct irq_chip {
  * @name:		flow handler name for /proc/interrupts output
  */
 struct irq_desc {
+	unsigned int		irq;
+#ifdef CONFIG_HAVE_SPARSE_IRQ
+	struct irq_desc		*next;
+#endif
 	irq_flow_handler_t	handle_irq;
 	struct irq_chip		*chip;
 	struct msi_desc		*msi_desc;
@@ -179,9 +183,9 @@ struct irq_desc {
 	const char		*name;
 } ____cacheline_internodealigned_in_smp;
 
-#ifdef CONFIG_HAVE_DYN_ARRAY
-extern struct irq_desc *irq_desc;
-#else
+extern struct irq_desc *irq_to_desc(unsigned int irq);
+#ifndef CONFIG_HAVE_DYN_ARRAY
+/* could be removed if we get rid of all irq_desc reference */
 extern struct irq_desc irq_desc[NR_IRQS];
 #endif
 
@@ -249,7 +253,10 @@ extern int no_irq_affinity;
 
 static inline int irq_balancing_disabled(unsigned int irq)
 {
-	return irq_desc[irq].status & IRQ_NO_BALANCING_MASK;
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+	return desc->status & IRQ_NO_BALANCING_MASK;
 }
 
 /* Handle irq action chains: */
@@ -281,7 +288,7 @@ extern unsigned int __do_IRQ(unsigned int irq);
  */
 static inline void generic_handle_irq(unsigned int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 
 #ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
 	desc->handle_irq(irq, desc);
@@ -325,7 +332,10 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 static inline void __set_irq_handler_unlocked(int irq,
 					      irq_flow_handler_t handler)
 {
-	irq_desc[irq].handle_irq = handler;
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+	desc->handle_irq = handler;
 }
 
 /*
@@ -359,7 +369,7 @@ extern void destroy_irq(unsigned int irq);
 /* Test to see if a driver has successfully requested an irq */
 static inline int irq_has_action(unsigned int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	return desc->action != NULL;
 }
 
@@ -374,10 +384,10 @@ extern int set_irq_chip_data(unsigned int irq, void *data);
 extern int set_irq_type(unsigned int irq, unsigned int type);
 extern int set_irq_msi(unsigned int irq, struct msi_desc *entry);
 
-#define get_irq_chip(irq)	(irq_desc[irq].chip)
-#define get_irq_chip_data(irq)	(irq_desc[irq].chip_data)
-#define get_irq_data(irq)	(irq_desc[irq].handler_data)
-#define get_irq_msi(irq)	(irq_desc[irq].msi_desc)
+#define get_irq_chip(irq)	(irq_to_desc(irq)->chip)
+#define get_irq_chip_data(irq)	(irq_to_desc(irq)->chip_data)
+#define get_irq_data(irq)	(irq_to_desc(irq)->handler_data)
+#define get_irq_msi(irq)	(irq_to_desc(irq)->msi_desc)
 
 #endif /* CONFIG_GENERIC_HARDIRQS */
 
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index c689e9851a80..c45ab718cf07 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -39,7 +39,7 @@ unsigned long probe_irq_on(void)
 	 * flush such a longstanding irq before considering it as spurious.
 	 */
 	for (i = nr_irqs-1; i > 0; i--) {
-		desc = irq_desc + i;
+		desc = irq_to_desc(i);
 
 		spin_lock_irq(&desc->lock);
 		if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
@@ -69,7 +69,7 @@ unsigned long probe_irq_on(void)
 	 * happened in the previous stage, it may have masked itself)
 	 */
 	for (i = nr_irqs-1; i > 0; i--) {
-		desc = irq_desc + i;
+		desc = irq_to_desc(i);
 
 		spin_lock_irq(&desc->lock);
 		if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
@@ -92,7 +92,7 @@ unsigned long probe_irq_on(void)
 	for (i = 0; i < nr_irqs; i++) {
 		unsigned int status;
 
-		desc = irq_desc + i;
+		desc = irq_to_desc(i);
 		spin_lock_irq(&desc->lock);
 		status = desc->status;
 
@@ -131,7 +131,7 @@ unsigned int probe_irq_mask(unsigned long val)
 
 	mask = 0;
 	for (i = 0; i < nr_irqs; i++) {
-		struct irq_desc *desc = irq_desc + i;
+		struct irq_desc *desc = irq_to_desc(i);
 		unsigned int status;
 
 		spin_lock_irq(&desc->lock);
@@ -174,7 +174,7 @@ int probe_irq_off(unsigned long val)
 	int i, irq_found = 0, nr_irqs = 0;
 
 	for (i = 0; i < nr_irqs; i++) {
-		struct irq_desc *desc = irq_desc + i;
+		struct irq_desc *desc = irq_to_desc(i);
 		unsigned int status;
 
 		spin_lock_irq(&desc->lock);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index bba66e098703..76c225cf4b26 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -33,7 +33,7 @@ void dynamic_irq_init(unsigned int irq)
 	}
 
 	/* Ensure we don't have left over values from a previous use of this irq */
-	desc = irq_desc + irq;
+	desc = irq_to_desc(irq);
 	spin_lock_irqsave(&desc->lock, flags);
 	desc->status = IRQ_DISABLED;
 	desc->chip = &no_irq_chip;
@@ -65,7 +65,7 @@ void dynamic_irq_cleanup(unsigned int irq)
 		return;
 	}
 
-	desc = irq_desc + irq;
+	desc = irq_to_desc(irq);
 	spin_lock_irqsave(&desc->lock, flags);
 	if (desc->action) {
 		spin_unlock_irqrestore(&desc->lock, flags);
@@ -100,7 +100,7 @@ int set_irq_chip(unsigned int irq, struct irq_chip *chip)
 	if (!chip)
 		chip = &no_irq_chip;
 
-	desc = irq_desc + irq;
+	desc = irq_to_desc(irq);
 	spin_lock_irqsave(&desc->lock, flags);
 	irq_chip_set_defaults(chip);
 	desc->chip = chip;
@@ -126,7 +126,7 @@ int set_irq_type(unsigned int irq, unsigned int type)
 		return -ENODEV;
 	}
 
-	desc = irq_desc + irq;
+	desc = irq_to_desc(irq);
 	if (type == IRQ_TYPE_NONE)
 		return 0;
 
@@ -155,7 +155,7 @@ int set_irq_data(unsigned int irq, void *data)
 		return -EINVAL;
 	}
 
-	desc = irq_desc + irq;
+	desc = irq_to_desc(irq);
 	spin_lock_irqsave(&desc->lock, flags);
 	desc->handler_data = data;
 	spin_unlock_irqrestore(&desc->lock, flags);
@@ -180,7 +180,7 @@ int set_irq_msi(unsigned int irq, struct msi_desc *entry)
 		       "Trying to install msi data for IRQ%d\n", irq);
 		return -EINVAL;
 	}
-	desc = irq_desc + irq;
+	desc = irq_to_desc(irq);
 	spin_lock_irqsave(&desc->lock, flags);
 	desc->msi_desc = entry;
 	if (entry)
@@ -198,9 +198,10 @@ int set_irq_msi(unsigned int irq, struct msi_desc *entry)
  */
 int set_irq_chip_data(unsigned int irq, void *data)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc;
 	unsigned long flags;
 
+	desc = irq_to_desc(irq);
 	if (irq >= nr_irqs || !desc->chip) {
 		printk(KERN_ERR "BUG: bad set_irq_chip_data(IRQ#%d)\n", irq);
 		return -EINVAL;
@@ -219,8 +220,9 @@ EXPORT_SYMBOL(set_irq_chip_data);
  */
 static void default_enable(unsigned int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc;
 
+	desc = irq_to_desc(irq);
 	desc->chip->unmask(irq);
 	desc->status &= ~IRQ_MASKED;
 }
@@ -237,7 +239,10 @@ static void default_disable(unsigned int irq)
  */
 static unsigned int default_startup(unsigned int irq)
 {
-	irq_desc[irq].chip->enable(irq);
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+	desc->chip->enable(irq);
 
 	return 0;
 }
@@ -247,8 +252,9 @@ static unsigned int default_startup(unsigned int irq)
  */
 static void default_shutdown(unsigned int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc;
 
+	desc = irq_to_desc(irq);
 	desc->chip->mask(irq);
 	desc->status |= IRQ_MASKED;
 }
@@ -551,7 +557,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 		return;
 	}
 
-	desc = irq_desc + irq;
+	desc = irq_to_desc(irq);
 
 	if (!handle)
 		handle = handle_bad_irq;
@@ -616,7 +622,7 @@ void __init set_irq_noprobe(unsigned int irq)
 		return;
 	}
 
-	desc = irq_desc + irq;
+	desc = irq_to_desc(irq);
 
 	spin_lock_irqsave(&desc->lock, flags);
 	desc->status |= IRQ_NOPROBE;
@@ -634,7 +640,7 @@ void __init set_irq_probe(unsigned int irq)
 		return;
 	}
 
-	desc = irq_desc + irq;
+	desc = irq_to_desc(irq);
 
 	spin_lock_irqsave(&desc->lock, flags);
 	desc->status &= ~IRQ_NOPROBE;
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 6ce3bcc2b8f7..9fc33b3378e6 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -18,6 +18,14 @@
 
 #include "internals.h"
 
+#ifdef CONFIG_TRACE_IRQFLAGS
+
+/*
+ * lockdep: we want to handle all irq_desc locks as a single lock-class:
+ */
+static struct lock_class_key irq_desc_lock_class;
+#endif
+
 /**
  * handle_bad_irq - handle spurious and unhandled irqs
  * @irq:       the interrupt number
@@ -51,7 +59,8 @@ int nr_irqs = NR_IRQS;
 EXPORT_SYMBOL_GPL(nr_irqs);
 
 #ifdef CONFIG_HAVE_DYN_ARRAY
-static struct irq_desc irq_desc_init __initdata = {
+static struct irq_desc irq_desc_init = {
+	.irq = -1U,
 	.status = IRQ_DISABLED,
 	.chip = &no_irq_chip,
 	.handle_irq = handle_bad_irq,
@@ -62,6 +71,27 @@ static struct irq_desc irq_desc_init __initdata = {
 #endif
 };
 
+
+static void init_one_irq_desc(struct irq_desc *desc)
+{
+	memcpy(desc, &irq_desc_init, sizeof(struct irq_desc));
+#ifdef CONFIG_TRACE_IRQFLAGS
+	lockdep_set_class(&desc->lock, &irq_desc_lock_class);
+#endif
+}
+
+#ifdef CONFIG_HAVE_SPARSE_IRQ
+static int nr_irq_desc = 32;
+
+static int __init parse_nr_irq_desc(char *arg)
+{
+	if (arg)
+		nr_irq_desc = simple_strtoul(arg, NULL, 0);
+	return 0;
+}
+
+early_param("nr_irq_desc", parse_nr_irq_desc);
+
 static void __init init_work(void *data)
 {
 	struct dyn_array *da = data;
@@ -71,12 +101,83 @@ static void __init init_work(void *data)
 	desc = *da->name;
 
 	for (i = 0; i < *da->nr; i++)
-		memcpy(&desc[i], &irq_desc_init, sizeof(struct irq_desc));
+		init_one_irq_desc(&desc[i]);
+
+	for (i = 1; i < *da->nr; i++)
+		desc[i-1].next = &desc[i];
 }
 
-struct irq_desc *irq_desc;
+static struct irq_desc *sparse_irqs;
+DEFINE_DYN_ARRAY(sparse_irqs, sizeof(struct irq_desc), nr_irq_desc, PAGE_SIZE, init_work);
+
+extern int after_bootmem;
+extern void *__alloc_bootmem_nopanic(unsigned long size,
+			     unsigned long align,
+			     unsigned long goal);
+struct irq_desc *irq_to_desc(unsigned int irq)
+{
+	struct irq_desc *desc, *desc_pri;
+	int i;
+	int count = 0;
+
+	BUG_ON(irq == -1U);
+
+	desc_pri = desc = &sparse_irqs[0];
+	while (desc) {
+		if (desc->irq == irq)
+			return desc;
+
+		if (desc->irq == -1U) {
+			desc->irq = irq;
+			return desc;
+		}
+		desc_pri = desc;
+		desc = desc->next;
+		count++;
+	}
+
+	/*
+	 *  we run out of pre-allocate ones, allocate more
+	 */
+	printk(KERN_DEBUG "try to get more irq_desc %d\n", nr_irq_desc);
+
+	if (after_bootmem)
+		desc = kzalloc(sizeof(struct irq_desc)*nr_irq_desc, GFP_ATOMIC);
+	else
+		desc = __alloc_bootmem_nopanic(sizeof(struct irq_desc)*nr_irq_desc, PAGE_SIZE, 0);
+
+	if (!desc)
+		panic("please boot with nr_irq_desc= %d\n", count * 2);
+
+	for (i = 0; i < nr_irq_desc; i++)
+		init_one_irq_desc(&desc[i]);
+
+	for (i = 1; i < nr_irq_desc; i++)
+		desc[i-1].next = &desc[i];
+
+	desc->irq = irq;
+	desc_pri->next = desc;
+
+	return desc;
+}
+#else
+static void __init init_work(void *data)
+{
+	struct dyn_array *da = data;
+	int i;
+	struct  irq_desc *desc;
+
+	desc = *da->name;
+
+	for (i = 0; i < *da->nr; i++)
+		init_one_irq_desc(&desc[i]);
+
+}
+static struct irq_desc *irq_desc;
 DEFINE_DYN_ARRAY(irq_desc, sizeof(struct irq_desc), nr_irqs, PAGE_SIZE, init_work);
 
+#endif
+
 #else
 
 struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
@@ -85,12 +186,23 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
 		.chip = &no_irq_chip,
 		.handle_irq = handle_bad_irq,
 		.depth = 1,
-		.lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock),
+		.lock = __SPIN_LOCK_UNLOCKED(sparse_irqs->lock),
 #ifdef CONFIG_SMP
 		.affinity = CPU_MASK_ALL
 #endif
 	}
 };
+
+#endif
+
+#ifndef CONFIG_HAVE_SPARSE_IRQ
+struct irq_desc *irq_to_desc(unsigned int irq)
+{
+	if (irq < nr_irqs)
+		return &irq_desc[irq];
+
+	return NULL;
+}
 #endif
 
 /*
@@ -99,7 +211,10 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
  */
 static void ack_bad(unsigned int irq)
 {
-	print_irq_desc(irq, irq_desc + irq);
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+	print_irq_desc(irq, desc);
 	ack_bad_irq(irq);
 }
 
@@ -196,7 +311,7 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
  */
 unsigned int __do_IRQ(unsigned int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irqaction *action;
 	unsigned int status;
 
@@ -287,19 +402,16 @@ out:
 }
 #endif
 
-#ifdef CONFIG_TRACE_IRQFLAGS
-
-/*
- * lockdep: we want to handle all irq_desc locks as a single lock-class:
- */
-static struct lock_class_key irq_desc_lock_class;
 
+#ifdef CONFIG_TRACE_IRQFLAGS
 void early_init_irq_lock_class(void)
 {
+#ifndef CONFIG_HAVE_DYN_ARRAY
 	int i;
 
 	for (i = 0; i < nr_irqs; i++)
 		lockdep_set_class(&irq_desc[i].lock, &irq_desc_lock_class);
+#endif
 }
-
 #endif
+
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index d5a4333d8f1f..b5943e9f95aa 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -31,7 +31,7 @@ cpumask_t irq_default_affinity = CPU_MASK_ALL;
  */
 void synchronize_irq(unsigned int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned int status;
 
 	if (irq >= nr_irqs)
@@ -64,7 +64,7 @@ EXPORT_SYMBOL(synchronize_irq);
  */
 int irq_can_set_affinity(unsigned int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 
 	if (CHECK_IRQ_PER_CPU(desc->status) || !desc->chip ||
 	    !desc->chip->set_affinity)
@@ -81,7 +81,7 @@ int irq_can_set_affinity(unsigned int irq)
  */
 int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 
 	if (!desc->chip->set_affinity)
 		return -EINVAL;
@@ -111,14 +111,16 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
 int irq_select_affinity(unsigned int irq)
 {
 	cpumask_t mask;
+	struct irq_desc *desc;
 
 	if (!irq_can_set_affinity(irq))
 		return 0;
 
 	cpus_and(mask, cpu_online_map, irq_default_affinity);
 
-	irq_desc[irq].affinity = mask;
-	irq_desc[irq].chip->set_affinity(irq, mask);
+	desc = irq_to_desc(irq);
+	desc->affinity = mask;
+	desc->chip->set_affinity(irq, mask);
 
 	set_balance_irq_affinity(irq, mask);
 	return 0;
@@ -140,7 +142,7 @@ int irq_select_affinity(unsigned int irq)
  */
 void disable_irq_nosync(unsigned int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
 	if (irq >= nr_irqs)
@@ -169,7 +171,7 @@ EXPORT_SYMBOL(disable_irq_nosync);
  */
 void disable_irq(unsigned int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 
 	if (irq >= nr_irqs)
 		return;
@@ -211,7 +213,7 @@ static void __enable_irq(struct irq_desc *desc, unsigned int irq)
  */
 void enable_irq(unsigned int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
 	if (irq >= nr_irqs)
@@ -225,7 +227,7 @@ EXPORT_SYMBOL(enable_irq);
 
 static int set_irq_wake_real(unsigned int irq, unsigned int on)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	int ret = -ENXIO;
 
 	if (desc->chip->set_wake)
@@ -248,7 +250,7 @@ static int set_irq_wake_real(unsigned int irq, unsigned int on)
  */
 int set_irq_wake(unsigned int irq, unsigned int on)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 	int ret = 0;
 
@@ -288,12 +290,13 @@ EXPORT_SYMBOL(set_irq_wake);
  */
 int can_request_irq(unsigned int irq, unsigned long irqflags)
 {
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irqaction *action;
 
-	if (irq >= nr_irqs || irq_desc[irq].status & IRQ_NOREQUEST)
+	if (irq >= nr_irqs || desc->status & IRQ_NOREQUEST)
 		return 0;
 
-	action = irq_desc[irq].action;
+	action = desc->action;
 	if (action)
 		if (irqflags & action->flags & IRQF_SHARED)
 			action = NULL;
@@ -349,7 +352,7 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
  */
 int setup_irq(unsigned int irq, struct irqaction *new)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irqaction *old, **p;
 	const char *old_name = NULL;
 	unsigned long flags;
@@ -518,7 +521,7 @@ void free_irq(unsigned int irq, void *dev_id)
 	if (irq >= nr_irqs)
 		return;
 
-	desc = irq_desc + irq;
+	desc = irq_to_desc(irq);
 	spin_lock_irqsave(&desc->lock, flags);
 	p = &desc->action;
 	for (;;) {
@@ -615,6 +618,7 @@ int request_irq(unsigned int irq, irq_handler_t handler,
 {
 	struct irqaction *action;
 	int retval;
+	struct irq_desc *desc;
 
 #ifdef CONFIG_LOCKDEP
 	/*
@@ -632,7 +636,8 @@ int request_irq(unsigned int irq, irq_handler_t handler,
 		return -EINVAL;
 	if (irq >= nr_irqs)
 		return -EINVAL;
-	if (irq_desc[irq].status & IRQ_NOREQUEST)
+	desc = irq_to_desc(irq);
+	if (desc->status & IRQ_NOREQUEST)
 		return -EINVAL;
 	if (!handler)
 		return -EINVAL;
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index 77b7acc875c5..90b920d3f52b 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -3,18 +3,18 @@
 
 void set_pending_irq(unsigned int irq, cpumask_t mask)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
 	spin_lock_irqsave(&desc->lock, flags);
 	desc->status |= IRQ_MOVE_PENDING;
-	irq_desc[irq].pending_mask = mask;
+	desc->pending_mask = mask;
 	spin_unlock_irqrestore(&desc->lock, flags);
 }
 
 void move_masked_irq(int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	cpumask_t tmp;
 
 	if (likely(!(desc->status & IRQ_MOVE_PENDING)))
@@ -30,7 +30,7 @@ void move_masked_irq(int irq)
 
 	desc->status &= ~IRQ_MOVE_PENDING;
 
-	if (unlikely(cpus_empty(irq_desc[irq].pending_mask)))
+	if (unlikely(cpus_empty(desc->pending_mask)))
 		return;
 
 	if (!desc->chip->set_affinity)
@@ -38,7 +38,7 @@ void move_masked_irq(int irq)
 
 	assert_spin_locked(&desc->lock);
 
-	cpus_and(tmp, irq_desc[irq].pending_mask, cpu_online_map);
+	cpus_and(tmp, desc->pending_mask, cpu_online_map);
 
 	/*
 	 * If there was a valid mask to work with, please
@@ -55,12 +55,12 @@ void move_masked_irq(int irq)
 	if (likely(!cpus_empty(tmp))) {
 		desc->chip->set_affinity(irq,tmp);
 	}
-	cpus_clear(irq_desc[irq].pending_mask);
+	cpus_clear(desc->pending_mask);
 }
 
 void move_native_irq(int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 
 	if (likely(!(desc->status & IRQ_MOVE_PENDING)))
 		return;
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index e5225a65a4f9..c2f356c808f6 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -19,7 +19,7 @@ static struct proc_dir_entry *root_irq_dir;
 
 static int irq_affinity_proc_show(struct seq_file *m, void *v)
 {
-	struct irq_desc *desc = irq_desc + (long)m->private;
+	struct irq_desc *desc = irq_to_desc((long)m->private);
 	cpumask_t *mask = &desc->affinity;
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
@@ -43,7 +43,7 @@ static ssize_t irq_affinity_proc_write(struct file *file,
 	cpumask_t new_value;
 	int err;
 
-	if (!irq_desc[irq].chip->set_affinity || no_irq_affinity ||
+	if (!irq_to_desc(irq)->chip->set_affinity || no_irq_affinity ||
 	    irq_balancing_disabled(irq))
 		return -EIO;
 
@@ -132,20 +132,20 @@ static const struct file_operations default_affinity_proc_fops = {
 static int irq_spurious_read(char *page, char **start, off_t off,
 				  int count, int *eof, void *data)
 {
-	struct irq_desc *d = &irq_desc[(long) data];
+	struct irq_desc *desc = irq_to_desc((long) data);
 	return sprintf(page, "count %u\n"
 			     "unhandled %u\n"
 			     "last_unhandled %u ms\n",
-			d->irq_count,
-			d->irqs_unhandled,
-			jiffies_to_msecs(d->last_unhandled));
+			desc->irq_count,
+			desc->irqs_unhandled,
+			jiffies_to_msecs(desc->last_unhandled));
 }
 
 #define MAX_NAMELEN 128
 
 static int name_unique(unsigned int irq, struct irqaction *new_action)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irqaction *action;
 	unsigned long flags;
 	int ret = 1;
@@ -165,8 +165,9 @@ static int name_unique(unsigned int irq, struct irqaction *new_action)
 void register_handler_proc(unsigned int irq, struct irqaction *action)
 {
 	char name [MAX_NAMELEN];
+	struct irq_desc *desc = irq_to_desc(irq);
 
-	if (!irq_desc[irq].dir || action->dir || !action->name ||
+	if (!desc->dir || action->dir || !action->name ||
 					!name_unique(irq, action))
 		return;
 
@@ -174,7 +175,7 @@ void register_handler_proc(unsigned int irq, struct irqaction *action)
 	snprintf(name, MAX_NAMELEN, "%s", action->name);
 
 	/* create /proc/irq/1234/handler/ */
-	action->dir = proc_mkdir(name, irq_desc[irq].dir);
+	action->dir = proc_mkdir(name, desc->dir);
 }
 
 #undef MAX_NAMELEN
@@ -185,25 +186,24 @@ void register_irq_proc(unsigned int irq)
 {
 	char name [MAX_NAMELEN];
 	struct proc_dir_entry *entry;
+	struct irq_desc *desc = irq_to_desc(irq);
 
-	if (!root_irq_dir ||
-		(irq_desc[irq].chip == &no_irq_chip) ||
-			irq_desc[irq].dir)
+	if (!root_irq_dir || (desc->chip == &no_irq_chip) || desc->dir)
 		return;
 
 	memset(name, 0, MAX_NAMELEN);
 	sprintf(name, "%d", irq);
 
 	/* create /proc/irq/1234 */
-	irq_desc[irq].dir = proc_mkdir(name, root_irq_dir);
+	desc->dir = proc_mkdir(name, root_irq_dir);
 
 #ifdef CONFIG_SMP
 	/* create /proc/irq/<irq>/smp_affinity */
-	proc_create_data("smp_affinity", 0600, irq_desc[irq].dir,
+	proc_create_data("smp_affinity", 0600, desc->dir,
 			 &irq_affinity_proc_fops, (void *)(long)irq);
 #endif
 
-	entry = create_proc_entry("spurious", 0444, irq_desc[irq].dir);
+	entry = create_proc_entry("spurious", 0444, desc->dir);
 	if (entry) {
 		entry->data = (void *)(long)irq;
 		entry->read_proc = irq_spurious_read;
@@ -214,8 +214,10 @@ void register_irq_proc(unsigned int irq)
 
 void unregister_handler_proc(unsigned int irq, struct irqaction *action)
 {
-	if (action->dir)
-		remove_proc_entry(action->dir->name, irq_desc[irq].dir);
+	if (action->dir) {
+		struct irq_desc *desc = irq_to_desc(irq);
+		remove_proc_entry(action->dir->name, desc->dir);
+	}
 }
 
 void register_default_affinity_proc(void)
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index cba8aa5bc7f4..89c7117acf2b 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -36,7 +36,7 @@ static void resend_irqs(unsigned long arg)
 	while (!bitmap_empty(irqs_resend, nr_irqs)) {
 		irq = find_first_bit(irqs_resend, nr_irqs);
 		clear_bit(irq, irqs_resend);
-		desc = irq_desc + irq;
+		desc = irq_to_desc(irq);
 		local_irq_disable();
 		desc->handle_irq(irq, desc);
 		local_irq_enable();
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index e26ca1e90c08..b5d906002e1d 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -92,11 +92,12 @@ static int misrouted_irq(int irq)
 	int ok = 0;
 
 	for (i = 1; i < nr_irqs; i++) {
-		struct irq_desc *desc = irq_desc + i;
+		struct irq_desc *desc;
 
 		if (i == irq)	/* Already tried */
 			continue;
 
+		desc = irq_to_desc(i);
 		if (try_one_irq(i, desc))
 			ok = 1;
 	}
@@ -108,7 +109,7 @@ static void poll_spurious_irqs(unsigned long dummy)
 {
 	int i;
 	for (i = 1; i < nr_irqs; i++) {
-		struct irq_desc *desc = irq_desc + i;
+		struct irq_desc *desc = irq_to_desc(i);
 		unsigned int status;
 
 		/* Racy but it doesn't matter */
-- 
GitLab


From 3bf52a4df3ccd25d4154797977c556a2a8b3bc1e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 19 Aug 2008 20:50:29 -0700
Subject: [PATCH 396/895] irq: sparse irqs, fix IRQ auto-probe crash

fix:

[   10.631533] calling  yenta_socket_init+0x0/0x20
[   10.631533] Yenta: CardBus bridge found at 0000:15:00.0 [17aa:2012]
[   10.631533] Yenta: Using INTVAL to route CSC interrupts to PCI
[   10.631533] Yenta: Routing CardBus interrupts to PCI
[   10.631533] Yenta TI: socket 0000:15:00.0, mfunc 0x01d01002, devctl 0x64
[   10.731599] BUG: unable to handle kernel NULL pointer dereference at 00000040
[   10.731838] IP: [<c0c95b5f>] _spin_lock_irq+0xf/0x20
[   10.732221] *pde = 00000000
[   10.732741] Oops: 0002 [#1] SMP
[   10.733453]
[   10.734253] Pid: 1, comm: swapper Tainted: G        W (2.6.27-rc3-tip-00173-gd7eaa4f-dirty #1)
[   10.735188] EIP: 0060:[<c0c95b5f>] EFLAGS: 00010002 CPU: 0
[   10.735523] EIP is at _spin_lock_irq+0xf/0x20
[   10.735523] EAX: 00000040 EBX: 00000000 ECX: f6e04c90 EDX: 00000100
[   10.735523] ESI: 000000df EDI: f6e04c90 EBP: f7867df0 ESP: f7867df0
[   10.735523]  DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
[   10.735523] Process swapper (pid: 1, ti=f7867000 task=f7870000 task.ti=f7867000)
[   10.735523] Stack: f7867e04 c0155fbd 00000000 00000000 f6e04c90 f7867e5c c0c6e319 c0f6a074
[   10.735523]        f6e04c90 000017aa 00002012 c112b648 f791f240 c112b5e0 f7867e44 c010440b
[   10.735523]        f791f240 f791f29c c112b8ec f791f240 00000000 f7867e5c c048f893 03c0b648
[   10.735523] Call Trace:
[   10.735523]  [<c0155fbd>] ? probe_irq_on+0x3d/0x140
[   10.735523]  [<c0c6e319>] ? yenta_probe+0x529/0x640
[   10.735523]  [<c010440b>] ? mcount_call+0x5/0xa
[   10.735523]  [<c048f893>] ? pci_match_device+0xa3/0xb0
[   10.735523]  [<c048fc1e>] ? pci_device_probe+0x5e/0x80
[   10.735523]  [<c0515423>] ? driver_probe_device+0x83/0x180
[   10.735523]  [<c0515594>] ? __driver_attach+0x74/0x80
[   10.735523]  [<c0514b69>] ? bus_for_each_dev+0x49/0x70
[   10.735523]  [<c051528e>] ? driver_attach+0x1e/0x20
[   10.735523]  [<c0515520>] ? __driver_attach+0x0/0x80
[   10.735523]  [<c05150d3>] ? bus_add_driver+0x1a3/0x220
[   10.735523]  [<c048fb60>] ? pci_device_remove+0x0/0x40
[   10.735523]  [<c05157f4>] ? driver_register+0x54/0x130
[   10.735523]  [<c048fe2f>] ? __pci_register_driver+0x4f/0x90
[   10.735523]  [<c11e9419>] ? yenta_socket_init+0x19/0x20
[   10.735523]  [<c0101125>] ? do_one_initcall+0x35/0x160
[   10.735523]  [<c11e9400>] ? yenta_socket_init+0x0/0x20
[   10.735523]  [<c01391a6>] ? __queue_work+0x36/0x50
[   10.735523]  [<c013922d>] ? queue_work_on+0x3d/0x50
[   10.735523]  [<c11a2758>] ? kernel_init+0x148/0x210
[   10.735523]  [<c11a2610>] ? kernel_init+0x0/0x210
[   10.735523]  [<c01043f3>] ? kernel_thread_helper+0x7/0x10
[   10.735523]  =======================
[   10.735523] Code: 10 38 f2 74 06 f3 90 8a 10 eb f6 5d 89 c8 c3 8d b6 00 00 00 00 8d bc 27 00 00 00 00 55 89 e5 e8 a4 e8 46 ff fa ba 00 01 00 00 90 <66> 0f c1 10 38 f2 74 06 f3 90 8a 10 eb f6 5d c3 90 55 89 e5 53

as auto-probing wants to iterate over existing irqs.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/irq/autoprobe.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index c45ab718cf07..b3a5549ea81e 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -40,6 +40,8 @@ unsigned long probe_irq_on(void)
 	 */
 	for (i = nr_irqs-1; i > 0; i--) {
 		desc = irq_to_desc(i);
+		if (!desc)
+			continue;
 
 		spin_lock_irq(&desc->lock);
 		if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
@@ -70,6 +72,8 @@ unsigned long probe_irq_on(void)
 	 */
 	for (i = nr_irqs-1; i > 0; i--) {
 		desc = irq_to_desc(i);
+		if (!desc)
+			continue;
 
 		spin_lock_irq(&desc->lock);
 		if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
@@ -93,6 +97,8 @@ unsigned long probe_irq_on(void)
 		unsigned int status;
 
 		desc = irq_to_desc(i);
+		if (!desc)
+			continue;
 		spin_lock_irq(&desc->lock);
 		status = desc->status;
 
@@ -134,6 +140,8 @@ unsigned int probe_irq_mask(unsigned long val)
 		struct irq_desc *desc = irq_to_desc(i);
 		unsigned int status;
 
+		if (!desc)
+			continue;
 		spin_lock_irq(&desc->lock);
 		status = desc->status;
 
@@ -177,6 +185,8 @@ int probe_irq_off(unsigned long val)
 		struct irq_desc *desc = irq_to_desc(i);
 		unsigned int status;
 
+		if (!desc)
+			continue;
 		spin_lock_irq(&desc->lock);
 		status = desc->status;
 
-- 
GitLab


From 3ac2de48ed3c998df7f366e039c97eedb27e7c3d Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:06 -0700
Subject: [PATCH 397/895] x86: add irq_cfg in io_apic_64.c

preallocate size is 32, and if it is not enough, irq_cfg will more
via alloc_bootmem() or kzalloc(). (depending on how early we are in
system setup)

v2: fix typo about size of init_one_irq_cfg ... should use sizeof(struct irq_cfg)
v3: according to Eric, change get_irq_cfg() to irq_cfg()
v4: squash add irq_cfg_alloc in

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_64.c | 209 ++++++++++++++++++++++++++++-------
 1 file changed, 169 insertions(+), 40 deletions(-)

diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index cab5a25d81b1..858c37a31a2f 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -57,7 +57,11 @@
 
 #define __apicdebuginit(type) static type __init
 
+struct irq_cfg;
+
 struct irq_cfg {
+	unsigned int irq;
+	struct irq_cfg *next;
 	cpumask_t domain;
 	cpumask_t old_domain;
 	unsigned move_cleanup_count;
@@ -67,34 +71,132 @@ struct irq_cfg {
 
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
 static struct irq_cfg irq_cfg_legacy[] __initdata = {
-	[0]  = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
-	[1]  = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
-	[2]  = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
-	[3]  = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
-	[4]  = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
-	[5]  = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
-	[6]  = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
-	[7]  = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
-	[8]  = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
-	[9]  = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
-	[10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
-	[11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
-	[12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
-	[13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
-	[14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
-	[15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
+	[0]  = { .irq =  0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
+	[1]  = { .irq =  1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
+	[2]  = { .irq =  2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
+	[3]  = { .irq =  3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
+	[4]  = { .irq =  4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
+	[5]  = { .irq =  5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
+	[6]  = { .irq =  6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
+	[7]  = { .irq =  7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
+	[8]  = { .irq =  8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
+	[9]  = { .irq =  9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
+	[10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
+	[11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
+	[12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
+	[13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
+	[14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
+	[15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
 };
 
-static struct irq_cfg *irq_cfg;
+static struct irq_cfg irq_cfg_init = { .irq =  -1U, };
+/* need to be biger than size of irq_cfg_legacy */
+static int nr_irq_cfg = 32;
+
+static int __init parse_nr_irq_cfg(char *arg)
+{
+	if (arg) {
+		nr_irq_cfg = simple_strtoul(arg, NULL, 0);
+		if (nr_irq_cfg < 32)
+			nr_irq_cfg = 32;
+	}
+	return 0;
+}
+
+early_param("nr_irq_cfg", parse_nr_irq_cfg);
+
+static void init_one_irq_cfg(struct irq_cfg *cfg)
+{
+	memcpy(cfg, &irq_cfg_init, sizeof(struct irq_cfg));
+}
 
 static void __init init_work(void *data)
 {
 	struct dyn_array *da = data;
+	struct irq_cfg *cfg;
+	int i;
 
-	memcpy(*da->name, irq_cfg_legacy, sizeof(irq_cfg_legacy));
+	cfg = *da->name;
+
+	memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy));
+
+	i = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]);
+	for (; i < *da->nr; i++)
+		init_one_irq_cfg(&cfg[i]);
+
+	for (i = 1; i < *da->nr; i++)
+		cfg[i-1].next = &cfg[i];
 }
 
-DEFINE_DYN_ARRAY(irq_cfg, sizeof(struct irq_cfg), nr_irqs, PAGE_SIZE, init_work);
+static struct irq_cfg *irq_cfgx;
+DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work);
+
+static struct irq_cfg *irq_cfg(unsigned int irq)
+{
+	struct irq_cfg *cfg;
+
+	BUG_ON(irq == -1U);
+
+	cfg = &irq_cfgx[0];
+	while (cfg) {
+		if (cfg->irq == irq)
+			return cfg;
+
+		if (cfg->irq == -1U)
+			return NULL;
+
+		cfg = cfg->next;
+	}
+
+	return NULL;
+}
+
+static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
+{
+	struct irq_cfg *cfg, *cfg_pri;
+	int i;
+	int count = 0;
+
+	BUG_ON(irq == -1U);
+
+	cfg_pri = cfg = &irq_cfgx[0];
+	while (cfg) {
+		if (cfg->irq == irq)
+			return cfg;
+
+		if (cfg->irq == -1U) {
+			cfg->irq = irq;
+			return cfg;
+		}
+		cfg_pri = cfg;
+		cfg = cfg->next;
+		count++;
+	}
+
+	/*
+	 *  we run out of pre-allocate ones, allocate more
+	 */
+	printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg);
+
+	if (after_bootmem)
+		cfg = kzalloc(sizeof(struct irq_cfg)*nr_irq_cfg, GFP_ATOMIC);
+	else
+		cfg = __alloc_bootmem_nopanic(sizeof(struct irq_cfg)*nr_irq_cfg, PAGE_SIZE, 0);
+
+	if (!cfg)
+		panic("please boot with nr_irq_cfg= %d\n", count * 2);
+
+	for (i = 0; i < nr_irq_cfg; i++)
+		init_one_irq_cfg(&cfg[i]);
+
+	for (i = 1; i < nr_irq_cfg; i++)
+		cfg[i-1].next = &cfg[i];
+
+	cfg->irq = irq;
+	cfg_pri->next = cfg;
+
+	return cfg;
+}
 
 static int assign_irq_vector(int irq, cpumask_t mask);
 
@@ -341,7 +443,7 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
 
 static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 {
-	struct irq_cfg *cfg = irq_cfg + irq;
+	struct irq_cfg *cfg = irq_cfg(irq);
 	unsigned long flags;
 	unsigned int dest;
 	cpumask_t tmp;
@@ -381,6 +483,8 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 	struct irq_pin_list *entry = irq_2_pin + irq;
 
 	BUG_ON(irq >= nr_irqs);
+	irq_cfg_alloc(irq);
+
 	while (entry->next)
 		entry = irq_2_pin + entry->next;
 
@@ -819,7 +923,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
 	struct irq_cfg *cfg;
 
 	BUG_ON((unsigned)irq >= nr_irqs);
-	cfg = &irq_cfg[irq];
+	cfg = irq_cfg(irq);
 
 	/* Only try and allocate irqs on cpus that are present */
 	cpus_and(mask, mask, cpu_online_map);
@@ -893,7 +997,7 @@ static void __clear_irq_vector(int irq)
 	int cpu, vector;
 
 	BUG_ON((unsigned)irq >= nr_irqs);
-	cfg = &irq_cfg[irq];
+	cfg = irq_cfg(irq);
 	BUG_ON(!cfg->vector);
 
 	vector = cfg->vector;
@@ -913,17 +1017,23 @@ void __setup_vector_irq(int cpu)
 
 	/* Mark the inuse vectors */
 	for (irq = 0; irq < nr_irqs; ++irq) {
-		if (!cpu_isset(cpu, irq_cfg[irq].domain))
+		struct irq_cfg *cfg = irq_cfg(irq);
+
+		if (!cpu_isset(cpu, cfg->domain))
 			continue;
-		vector = irq_cfg[irq].vector;
+		vector = cfg->vector;
 		per_cpu(vector_irq, cpu)[vector] = irq;
 	}
 	/* Mark the free vectors */
 	for (vector = 0; vector < NR_VECTORS; ++vector) {
+		struct irq_cfg *cfg;
+
 		irq = per_cpu(vector_irq, cpu)[vector];
 		if (irq < 0)
 			continue;
-		if (!cpu_isset(cpu, irq_cfg[irq].domain))
+
+		cfg = irq_cfg(irq);
+		if (!cpu_isset(cpu, cfg->domain))
 			per_cpu(vector_irq, cpu)[vector] = -1;
 	}
 }
@@ -1029,13 +1139,15 @@ static int setup_ioapic_entry(int apic, int irq,
 static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
 			      int trigger, int polarity)
 {
-	struct irq_cfg *cfg = irq_cfg + irq;
+	struct irq_cfg *cfg;
 	struct IO_APIC_route_entry entry;
 	cpumask_t mask;
 
 	if (!IO_APIC_IRQ(irq))
 		return;
 
+	cfg = irq_cfg(irq);
+
 	mask = TARGET_CPUS;
 	if (assign_irq_vector(irq, mask))
 		return;
@@ -1553,7 +1665,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
 
 static int ioapic_retrigger_irq(unsigned int irq)
 {
-	struct irq_cfg *cfg = &irq_cfg[irq];
+	struct irq_cfg *cfg = irq_cfg(irq);
 	unsigned long flags;
 
 	spin_lock_irqsave(&vector_lock, flags);
@@ -1600,7 +1712,7 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
  */
 static void migrate_ioapic_irq(int irq, cpumask_t mask)
 {
-	struct irq_cfg *cfg = irq_cfg + irq;
+	struct irq_cfg *cfg;
 	struct irq_desc *desc;
 	cpumask_t tmp, cleanup_mask;
 	struct irte irte;
@@ -1618,6 +1730,7 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask)
 	if (assign_irq_vector(irq, mask))
 		return;
 
+	cfg = irq_cfg(irq);
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
@@ -1735,7 +1848,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
 			continue;
 
 		desc = irq_to_desc(irq);
-		cfg = irq_cfg + irq;
+		cfg = irq_cfg(irq);
 		spin_lock(&desc->lock);
 		if (!cfg->move_cleanup_count)
 			goto unlock;
@@ -1754,7 +1867,7 @@ unlock:
 
 static void irq_complete_move(unsigned int irq)
 {
-	struct irq_cfg *cfg = irq_cfg + irq;
+	struct irq_cfg *cfg = irq_cfg(irq);
 	unsigned vector, me;
 
 	if (likely(!cfg->move_in_progress))
@@ -1891,7 +2004,10 @@ static inline void init_IO_APIC_traps(void)
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
 	for (irq = 0; irq < nr_irqs ; irq++) {
-		if (IO_APIC_IRQ(irq) && !irq_cfg[irq].vector) {
+		struct irq_cfg *cfg;
+
+		cfg = irq_cfg(irq);
+		if (IO_APIC_IRQ(irq) && !cfg->vector) {
 			/*
 			 * Hmm.. We don't have an entry for this,
 			 * so default to an old-fashioned 8259
@@ -2028,7 +2144,7 @@ static inline void __init unlock_ExtINT_logic(void)
  */
 static inline void __init check_timer(void)
 {
-	struct irq_cfg *cfg = irq_cfg + 0;
+	struct irq_cfg *cfg = irq_cfg(0);
 	int apic1, pin1, apic2, pin2;
 	unsigned long flags;
 	int no_pin1 = 0;
@@ -2306,14 +2422,19 @@ int create_irq(void)
 	int irq;
 	int new;
 	unsigned long flags;
+	struct irq_cfg *cfg_new;
 
 	irq = -ENOSPC;
 	spin_lock_irqsave(&vector_lock, flags);
 	for (new = (nr_irqs - 1); new >= 0; new--) {
 		if (platform_legacy_irq(new))
 			continue;
-		if (irq_cfg[new].vector != 0)
+		cfg_new = irq_cfg(new);
+		if (cfg_new && cfg_new->vector != 0)
 			continue;
+		/* check if need to create one */
+		if (!cfg_new)
+			cfg_new = irq_cfg_alloc(new);
 		if (__assign_irq_vector(new, TARGET_CPUS) == 0)
 			irq = new;
 		break;
@@ -2346,7 +2467,7 @@ void destroy_irq(unsigned int irq)
 #ifdef CONFIG_PCI_MSI
 static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
 {
-	struct irq_cfg *cfg = irq_cfg + irq;
+	struct irq_cfg *cfg;
 	int err;
 	unsigned dest;
 	cpumask_t tmp;
@@ -2356,6 +2477,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 	if (err)
 		return err;
 
+	cfg = irq_cfg(irq);
 	cpus_and(tmp, cfg->domain, tmp);
 	dest = cpu_mask_to_apicid(tmp);
 
@@ -2413,7 +2535,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 #ifdef CONFIG_SMP
 static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 {
-	struct irq_cfg *cfg = irq_cfg + irq;
+	struct irq_cfg *cfg;
 	struct msi_msg msg;
 	unsigned int dest;
 	cpumask_t tmp;
@@ -2426,6 +2548,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	if (assign_irq_vector(irq, mask))
 		return;
 
+	cfg = irq_cfg(irq);
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
@@ -2448,7 +2571,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
  */
 static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 {
-	struct irq_cfg *cfg = irq_cfg + irq;
+	struct irq_cfg *cfg;
 	unsigned int dest;
 	cpumask_t tmp, cleanup_mask;
 	struct irte irte;
@@ -2464,6 +2587,7 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	if (assign_irq_vector(irq, mask))
 		return;
 
+	cfg = irq_cfg(irq);
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
@@ -2670,7 +2794,7 @@ void arch_teardown_msi_irq(unsigned int irq)
 #ifdef CONFIG_SMP
 static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
 {
-	struct irq_cfg *cfg = irq_cfg + irq;
+	struct irq_cfg *cfg;
 	struct msi_msg msg;
 	unsigned int dest;
 	cpumask_t tmp;
@@ -2683,6 +2807,7 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
 	if (assign_irq_vector(irq, mask))
 		return;
 
+	cfg = irq_cfg(irq);
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
@@ -2749,7 +2874,7 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
 
 static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 {
-	struct irq_cfg *cfg = irq_cfg + irq;
+	struct irq_cfg *cfg;
 	unsigned int dest;
 	cpumask_t tmp;
 	struct irq_desc *desc;
@@ -2761,6 +2886,7 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 	if (assign_irq_vector(irq, mask))
 		return;
 
+	cfg = irq_cfg(irq);
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
@@ -2783,7 +2909,7 @@ static struct irq_chip ht_irq_chip = {
 
 int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 {
-	struct irq_cfg *cfg = irq_cfg + irq;
+	struct irq_cfg *cfg;
 	int err;
 	cpumask_t tmp;
 
@@ -2793,6 +2919,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 		struct ht_irq_msg msg;
 		unsigned dest;
 
+		cfg = irq_cfg(irq);
 		cpus_and(tmp, cfg->domain, tmp);
 		dest = cpu_mask_to_apicid(tmp);
 
@@ -2891,6 +3018,7 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
 void __init setup_ioapic_dest(void)
 {
 	int pin, ioapic, irq, irq_entry;
+	struct irq_cfg *cfg;
 
 	if (skip_ioapic_setup == 1)
 		return;
@@ -2906,7 +3034,8 @@ void __init setup_ioapic_dest(void)
 			 * when you have too many devices, because at that time only boot
 			 * cpu is online.
 			 */
-			if (!irq_cfg[irq].vector)
+			cfg = irq_cfg(irq);
+			if (!cfg->vector)
 				setup_IO_APIC_irq(ioapic, pin, irq,
 						  irq_trigger(irq_entry),
 						  irq_polarity(irq_entry));
-- 
GitLab


From e5a53714acfc7b5f868d07d27c5f02cb00b118db Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:07 -0700
Subject: [PATCH 398/895] x86: put irq_2_pin pointer into irq_cfg

preallocate 32 irq_2_pin entries, and use get_one_free_irq_2_pin() to get
one more and link to irq_cfg if needed.

so don't waste one where no irq is enabled.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_64.c | 161 ++++++++++++++++++++++++++---------
 1 file changed, 120 insertions(+), 41 deletions(-)

diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 858c37a31a2f..51ef7eb75f2e 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -58,10 +58,11 @@
 #define __apicdebuginit(type) static type __init
 
 struct irq_cfg;
-
+struct irq_pin_list;
 struct irq_cfg {
 	unsigned int irq;
 	struct irq_cfg *next;
+	struct irq_pin_list *irq_2_pin;
 	cpumask_t domain;
 	cpumask_t old_domain;
 	unsigned move_cleanup_count;
@@ -252,13 +253,66 @@ int pin_map_size;
  * between pins and IRQs.
  */
 
-static struct irq_pin_list {
-	short apic, pin;
-	int next;
-} *irq_2_pin;
+struct irq_pin_list {
+	int apic, pin;
+	struct irq_pin_list *next;
+};
+
+static struct irq_pin_list *irq_2_pin_head;
+/* fill one page ? */
+static int nr_irq_2_pin = 0x100;
+static struct irq_pin_list *irq_2_pin_ptr;
+static void __init irq_2_pin_init_work(void *data)
+{
+	struct dyn_array *da = data;
+	struct irq_pin_list *pin;
+	int i;
+
+	pin = *da->name;
+
+	for (i = 1; i < *da->nr; i++)
+		pin[i-1].next = &pin[i];
+
+	irq_2_pin_ptr = &pin[0];
+}
+DEFINE_DYN_ARRAY(irq_2_pin_head, sizeof(struct irq_pin_list), nr_irq_2_pin, PAGE_SIZE, irq_2_pin_init_work);
+
+static struct irq_pin_list *get_one_free_irq_2_pin(void)
+{
+	struct irq_pin_list *pin;
+	int i;
+
+	pin = irq_2_pin_ptr;
+
+	if (pin) {
+		irq_2_pin_ptr = pin->next;
+		pin->next = NULL;
+		return pin;
+	}
+
+	/*
+	 *  we run out of pre-allocate ones, allocate more
+	 */
+	printk(KERN_DEBUG "try to get more irq_2_pin %d\n", nr_irq_2_pin);
+
+	if (after_bootmem)
+		pin = kzalloc(sizeof(struct irq_pin_list)*nr_irq_2_pin,
+				 GFP_ATOMIC);
+	else
+		pin = __alloc_bootmem_nopanic(sizeof(struct irq_pin_list) *
+				nr_irq_2_pin, PAGE_SIZE, 0);
+
+	if (!pin)
+		panic("can not get more irq_2_pin\n");
 
-DEFINE_DYN_ARRAY(irq_2_pin, sizeof(struct irq_pin_list), pin_map_size, sizeof(struct irq_pin_list), NULL);
+	for (i = 1; i < nr_irq_2_pin; i++)
+		pin[i-1].next = &pin[i];
 
+	irq_2_pin_ptr = pin->next;
+	pin->next = NULL;
+
+	return pin;
+}
 
 struct io_apic {
 	unsigned int index;
@@ -300,16 +354,17 @@ static bool io_apic_level_ack_pending(unsigned int irq)
 {
 	struct irq_pin_list *entry;
 	unsigned long flags;
+	struct irq_cfg *cfg = irq_cfg(irq);
 
 	spin_lock_irqsave(&ioapic_lock, flags);
-	entry = irq_2_pin + irq;
+	entry = cfg->irq_2_pin;
 	for (;;) {
 		unsigned int reg;
 		int pin;
 
-		pin = entry->pin;
-		if (pin == -1)
+		if (!entry)
 			break;
+		pin = entry->pin;
 		reg = io_apic_read(entry->apic, 0x10 + pin*2);
 		/* Is the remote IRR bit set? */
 		if (reg & IO_APIC_REDIR_REMOTE_IRR) {
@@ -318,7 +373,7 @@ static bool io_apic_level_ack_pending(unsigned int irq)
 		}
 		if (!entry->next)
 			break;
-		entry = irq_2_pin + entry->next;
+		entry = entry->next;
 	}
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 
@@ -339,21 +394,24 @@ static inline void io_apic_sync(unsigned int apic)
 									\
 {									\
 	int pin;							\
-	struct irq_pin_list *entry = irq_2_pin + irq;			\
+	struct irq_cfg *cfg;						\
+	struct irq_pin_list *entry;					\
 									\
 	BUG_ON(irq >= nr_irqs);						\
+	cfg = irq_cfg(irq);						\
+	entry = cfg->irq_2_pin;						\
 	for (;;) {							\
 		unsigned int reg;					\
-		pin = entry->pin;					\
-		if (pin == -1)						\
+		if (!entry)						\
 			break;						\
+		pin = entry->pin;					\
 		reg = io_apic_read(entry->apic, 0x10 + R + pin*2);	\
 		reg ACTION;						\
 		io_apic_modify(entry->apic, reg);			\
 		FINAL;							\
 		if (!entry->next)					\
 			break;						\
-		entry = irq_2_pin + entry->next;			\
+		entry = entry->next;					\
 	}								\
 }
 
@@ -416,15 +474,20 @@ static void ioapic_mask_entry(int apic, int pin)
 static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
 {
 	int apic, pin;
-	struct irq_pin_list *entry = irq_2_pin + irq;
+	struct irq_cfg *cfg;
+	struct irq_pin_list *entry;
 
 	BUG_ON(irq >= nr_irqs);
+	cfg = irq_cfg(irq);
+	entry = cfg->irq_2_pin;
 	for (;;) {
 		unsigned int reg;
+
+		if (!entry)
+			break;
+
 		apic = entry->apic;
 		pin = entry->pin;
-		if (pin == -1)
-			break;
 		/*
 		 * With interrupt-remapping, destination information comes
 		 * from interrupt-remapping table entry.
@@ -437,7 +500,7 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
 		io_apic_modify(apic, reg);
 		if (!entry->next)
 			break;
-		entry = irq_2_pin + entry->next;
+		entry = entry->next;
 	}
 }
 
@@ -480,22 +543,35 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 int first_free_entry;
 static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 {
-	struct irq_pin_list *entry = irq_2_pin + irq;
+	struct irq_cfg *cfg;
+	struct irq_pin_list *entry;
 
 	BUG_ON(irq >= nr_irqs);
-	irq_cfg_alloc(irq);
+	/* first time to refer irq_cfg, so with new */
+	cfg = irq_cfg_alloc(irq);
+	entry = cfg->irq_2_pin;
+	if (!entry) {
+		entry = get_one_free_irq_2_pin();
+		cfg->irq_2_pin = entry;
+		entry->apic = apic;
+		entry->pin = pin;
+		printk(KERN_DEBUG " 0 add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin);
+		return;
+	}
 
-	while (entry->next)
-		entry = irq_2_pin + entry->next;
+	while (entry->next) {
+		/* not again, please */
+		if (entry->apic == apic && entry->pin == pin)
+			return;
 
-	if (entry->pin != -1) {
-		entry->next = first_free_entry;
-		entry = irq_2_pin + entry->next;
-		if (++first_free_entry >= pin_map_size)
-			panic("io_apic.c: ran out of irq_2_pin entries!");
+		entry = entry->next;
 	}
+
+	entry->next = get_one_free_irq_2_pin();
+	entry = entry->next;
 	entry->apic = apic;
 	entry->pin = pin;
+	printk(KERN_DEBUG " x add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin);
 }
 
 /*
@@ -505,17 +581,24 @@ static void __init replace_pin_at_irq(unsigned int irq,
 				      int oldapic, int oldpin,
 				      int newapic, int newpin)
 {
-	struct irq_pin_list *entry = irq_2_pin + irq;
+	struct irq_cfg *cfg = irq_cfg(irq);
+	struct irq_pin_list *entry = cfg->irq_2_pin;
+	int replaced = 0;
 
-	while (1) {
+	while (entry) {
 		if (entry->apic == oldapic && entry->pin == oldpin) {
 			entry->apic = newapic;
 			entry->pin = newpin;
-		}
-		if (!entry->next)
+			replaced = 1;
+			/* every one is different, right? */
 			break;
-		entry = irq_2_pin + entry->next;
+		}
+		entry = entry->next;
 	}
+
+	/* why? call replace before add? */
+	if (!replaced)
+		add_pin_to_irq(irq, newapic, newpin);
 }
 
 
@@ -1326,15 +1409,16 @@ __apicdebuginit(void) print_IO_APIC(void)
 	}
 	printk(KERN_DEBUG "IRQ to pin mappings:\n");
 	for (i = 0; i < nr_irqs; i++) {
-		struct irq_pin_list *entry = irq_2_pin + i;
-		if (entry->pin < 0)
+		struct irq_cfg *cfg = irq_cfg(i);
+		struct irq_pin_list *entry = cfg->irq_2_pin;
+		if (!entry)
 			continue;
 		printk(KERN_DEBUG "IRQ%d ", i);
 		for (;;) {
 			printk("-> %d:%d", entry->apic, entry->pin);
 			if (!entry->next)
 				break;
-			entry = irq_2_pin + entry->next;
+			entry = entry->next;
 		}
 		printk("\n");
 	}
@@ -1495,14 +1579,9 @@ void __init enable_IO_APIC(void)
 {
 	union IO_APIC_reg_01 reg_01;
 	int i8259_apic, i8259_pin;
-	int i, apic;
+	int apic;
 	unsigned long flags;
 
-	for (i = 0; i < pin_map_size; i++) {
-		irq_2_pin[i].pin = -1;
-		irq_2_pin[i].next = 0;
-	}
-
 	/*
 	 * The number of IO-APIC IRQ registers (== #pins):
 	 */
-- 
GitLab


From 3060d6fe28570640c2d7d66d38b9eaa848c3b9e3 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:08 -0700
Subject: [PATCH 399/895] x86: put timer_rand_state pointer into irq_desc

irq_timer_state[] is a NR_IRQS sized array that is a side-by array to
the real irq_desc[] array.

Integrate that field into the (now dynamic) irq_desc dynamic array and
save some RAM.

v2: keep the old way to support arch not support irq_desc

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/char/random.c | 66 +++++++++++++++++++++++++++++++++++++++----
 include/linux/irq.h   |  2 ++
 2 files changed, 63 insertions(+), 5 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 1610aa64c7cf..60c9c7ee6b2c 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -558,7 +558,7 @@ struct timer_rand_state {
 	unsigned dont_count_entropy:1;
 };
 
-static struct timer_rand_state input_timer_state;
+#ifndef CONFIG_HAVE_SPARSE_IRQ
 
 #ifdef CONFIG_HAVE_DYN_ARRAY
 static struct timer_rand_state **irq_timer_state;
@@ -567,6 +567,51 @@ DEFINE_DYN_ARRAY(irq_timer_state, sizeof(struct timer_rand_state *), nr_irqs, PA
 static struct timer_rand_state *irq_timer_state[NR_IRQS];
 #endif
 
+static struct timer_rand_state *get_timer_rand_state(unsigned int irq)
+{
+	if (irq >= nr_irqs)
+		return NULL;
+
+	return irq_timer_state[irq];
+}
+
+static void set_timer_rand_state(unsigned int irq, struct timer_rand_state *state)
+{
+	if (irq >= nr_irqs)
+		return;
+
+	irq_timer_state[irq] = state;
+}
+
+#else
+
+static struct timer_rand_state *get_timer_rand_state(unsigned int irq)
+{
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+
+	if (!desc)
+		return NULL;
+
+	return desc->timer_rand_state;
+}
+
+static void set_timer_rand_state(unsigned int irq, struct timer_rand_state *state)
+{
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+
+	if (!desc)
+		return;
+
+	desc->timer_rand_state = state;
+}
+#endif
+
+static struct timer_rand_state input_timer_state;
+
 /*
  * This function adds entropy to the entropy "pool" by using timing
  * delays.  It uses the timer_rand_state structure to make an estimate
@@ -654,11 +699,15 @@ EXPORT_SYMBOL_GPL(add_input_randomness);
 
 void add_interrupt_randomness(int irq)
 {
-	if (irq >= nr_irqs || irq_timer_state[irq] == NULL)
+	struct timer_rand_state *state;
+
+	state = get_timer_rand_state(irq);
+
+	if (state == NULL)
 		return;
 
 	DEBUG_ENT("irq event %d\n", irq);
-	add_timer_randomness(irq_timer_state[irq], 0x100 + irq);
+	add_timer_randomness(state, 0x100 + irq);
 }
 
 #ifdef CONFIG_BLOCK
@@ -918,7 +967,14 @@ void rand_initialize_irq(int irq)
 {
 	struct timer_rand_state *state;
 
-	if (irq >= nr_irqs || irq_timer_state[irq])
+#ifndef CONFIG_HAVE_SPARSE_IRQ
+	if (irq >= nr_irqs)
+		return;
+#endif
+
+	state = get_timer_rand_state(irq);
+
+	if (state)
 		return;
 
 	/*
@@ -927,7 +983,7 @@ void rand_initialize_irq(int irq)
 	 */
 	state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL);
 	if (state)
-		irq_timer_state[irq] = state;
+		set_timer_rand_state(irq, state);
 }
 
 #ifdef CONFIG_BLOCK
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 80b8200f2adb..60c856aaac0f 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -127,6 +127,7 @@ struct irq_chip {
 	const char	*typename;
 };
 
+struct timer_rand_state;
 /**
  * struct irq_desc - interrupt descriptor
  *
@@ -155,6 +156,7 @@ struct irq_desc {
 	unsigned int		irq;
 #ifdef CONFIG_HAVE_SPARSE_IRQ
 	struct irq_desc		*next;
+	struct timer_rand_state *timer_rand_state;
 #endif
 	irq_flow_handler_t	handle_irq;
 	struct irq_chip		*chip;
-- 
GitLab


From 7f95ec9e4c12fd067febfd57532da1166d75d858 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:09 -0700
Subject: [PATCH 400/895] x86: move kstat_irqs from kstat to irq_desc

based on Eric's patch ...

together mold it with dyn_array for irq_desc, will allcate kstat_irqs for
nr_irq_desc alltogether if needed. -- at that point nr_cpus is known already.

v2: make sure system without generic_hardirqs works they don't have irq_desc
v3: fix merging
v4: [mingo@elte.hu] fix typo

[ mingo@elte.hu ] irq: build fix

fix:

 arch/x86/xen/spinlock.c: In function 'xen_spin_lock_slow':
 arch/x86/xen/spinlock.c:90: error: 'struct kernel_stat' has no member named 'irqs'

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_32.c   |  2 +-
 arch/x86/kernel/irq_32.c       |  4 +-
 arch/x86/kernel/irq_64.c       |  4 +-
 arch/x86/kernel/visws_quirks.c |  2 +-
 arch/x86/xen/spinlock.c        |  2 +-
 fs/proc/proc_misc.c            |  2 +-
 include/linux/irq.h            |  7 +++
 include/linux/kernel_stat.h    | 22 +++++---
 kernel/irq/chip.c              | 15 ++----
 kernel/irq/handle.c            | 97 ++++++++++++++++++++++++----------
 kernel/sched.c                 |  5 +-
 11 files changed, 106 insertions(+), 56 deletions(-)

diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index c2160cfdec9b..204884b1415a 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -526,7 +526,7 @@ static void do_irq_balance(void)
 			if (package_index == i)
 				IRQ_DELTA(package_index, j) = 0;
 			/* Determine the total count per processor per IRQ */
-			value_now = (unsigned long) kstat_cpu(i).irqs[j];
+			value_now = (unsigned long) kstat_irqs_cpu(j, i);
 
 			/* Determine the activity per processor per IRQ */
 			delta = value_now - LAST_CPU_IRQ(i, j);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index ede513be517d..576c5df6cad8 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -280,7 +280,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		any_count = kstat_irqs(i);
 #else
 		for_each_online_cpu(j)
-			any_count |= kstat_cpu(j).irqs[i];
+			any_count |= kstat_irqs_cpu(i, j);
 #endif
 		action = desc->action;
 		if (!action && !any_count)
@@ -290,7 +290,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_printf(p, "%10u ", kstat_irqs(i));
 #else
 		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
 #endif
 		seq_printf(p, " %8s", desc->chip->name);
 		seq_printf(p, "-%-8s", desc->name);
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 738eb65a924e..4a0a4eb44dcb 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -90,7 +90,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		any_count = kstat_irqs(i);
 #else
 		for_each_online_cpu(j)
-			any_count |= kstat_cpu(j).irqs[i];
+			any_count |= kstat_irqs_cpu(i, j);
 #endif
 		action = desc->action;
 		if (!action && !any_count)
@@ -100,7 +100,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_printf(p, "%10u ", kstat_irqs(i));
 #else
 		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
 #endif
 		seq_printf(p, " %8s", desc->chip->name);
 		seq_printf(p, "-%-8s", desc->name);
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index 9d85ab384435..817aa55a1209 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -633,7 +633,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id)
 	/*
 	 * handle this 'virtual interrupt' as a Cobalt one now.
 	 */
-	kstat_cpu(smp_processor_id()).irqs[realirq]++;
+	kstat_irqs_this_cpu(desc)++;
 
 	if (likely(desc->action != NULL))
 		handle_IRQ_event(realirq, desc->action);
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index dd71e3a021cd..bb6bc721b13d 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -241,7 +241,7 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl
 		ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq));
 	} while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */
 
-	kstat_this_cpu.irqs[irq]++;
+	kstat_irqs_this_cpu(irq_to_desc(irq))++;
 
 out:
 	raw_local_irq_restore(flags);
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index a2173a2a5625..aa069acf61a0 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -532,7 +532,7 @@ static int show_stat(struct seq_file *p, void *v)
 		steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
 		guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
 		for (j = 0; j < nr_irqs; j++) {
-			unsigned int temp = kstat_cpu(i).irqs[j];
+			unsigned int temp = kstat_irqs_cpu(j, i);
 			sum += temp;
 			per_irq_sum[j] += temp;
 		}
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 60c856aaac0f..cbf471aee1ce 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -157,6 +157,11 @@ struct irq_desc {
 #ifdef CONFIG_HAVE_SPARSE_IRQ
 	struct irq_desc		*next;
 	struct timer_rand_state *timer_rand_state;
+#endif
+#ifdef CONFIG_HAVE_DYN_ARRAY
+	unsigned int            *kstat_irqs;
+#else
+	unsigned int            kstat_irqs[NR_CPUS];
 #endif
 	irq_flow_handler_t	handle_irq;
 	struct irq_chip		*chip;
@@ -190,6 +195,8 @@ extern struct irq_desc *irq_to_desc(unsigned int irq);
 /* could be removed if we get rid of all irq_desc reference */
 extern struct irq_desc irq_desc[NR_IRQS];
 #endif
+#define kstat_irqs_this_cpu(DESC) \
+	((DESC)->kstat_irqs[smp_processor_id()])
 
 /*
  * Migration helpers for obsolete names, they will go away:
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index fe1f7fe534b4..f10616712de5 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -28,10 +28,8 @@ struct cpu_usage_stat {
 
 struct kernel_stat {
 	struct cpu_usage_stat	cpustat;
-#ifdef CONFIG_HAVE_DYN_ARRAY
-	unsigned int *irqs;
-#else
-	unsigned int irqs[NR_IRQS];
+#ifndef CONFIG_GENERIC_HARDIRQS
+       unsigned int irqs[NR_IRQS];
 #endif
 };
 
@@ -43,15 +41,25 @@ DECLARE_PER_CPU(struct kernel_stat, kstat);
 
 extern unsigned long long nr_context_switches(void);
 
+#ifndef CONFIG_GENERIC_HARDIRQS
+static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
+{
+       return kstat_cpu(cpu).irqs[irq];
+}
+#else
+extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu);
+#endif
+
 /*
  * Number of interrupts per specific IRQ source, since bootup
  */
-static inline int kstat_irqs(int irq)
+static inline unsigned int kstat_irqs(unsigned int irq)
 {
-	int cpu, sum = 0;
+	unsigned int sum = 0;
+	int cpu;
 
 	for_each_possible_cpu(cpu)
-		sum += kstat_cpu(cpu).irqs[irq];
+		sum += kstat_irqs_cpu(irq, cpu);
 
 	return sum;
 }
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 76c225cf4b26..2aa3d4b2fce8 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -312,14 +312,13 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc)
 {
 	struct irqaction *action;
 	irqreturn_t action_ret;
-	const unsigned int cpu = smp_processor_id();
 
 	spin_lock(&desc->lock);
 
 	if (unlikely(desc->status & IRQ_INPROGRESS))
 		goto out_unlock;
 	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
-	kstat_cpu(cpu).irqs[irq]++;
+	kstat_irqs_this_cpu(desc)++;
 
 	action = desc->action;
 	if (unlikely(!action || (desc->status & IRQ_DISABLED)))
@@ -351,7 +350,6 @@ out_unlock:
 void
 handle_level_irq(unsigned int irq, struct irq_desc *desc)
 {
-	unsigned int cpu = smp_processor_id();
 	struct irqaction *action;
 	irqreturn_t action_ret;
 
@@ -361,7 +359,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
 	if (unlikely(desc->status & IRQ_INPROGRESS))
 		goto out_unlock;
 	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
-	kstat_cpu(cpu).irqs[irq]++;
+	kstat_irqs_this_cpu(desc)++;
 
 	/*
 	 * If its disabled or no action available
@@ -399,7 +397,6 @@ out_unlock:
 void
 handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
 {
-	unsigned int cpu = smp_processor_id();
 	struct irqaction *action;
 	irqreturn_t action_ret;
 
@@ -409,7 +406,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
 		goto out;
 
 	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
-	kstat_cpu(cpu).irqs[irq]++;
+	kstat_irqs_this_cpu(desc)++;
 
 	/*
 	 * If its disabled or no action available
@@ -458,8 +455,6 @@ out:
 void
 handle_edge_irq(unsigned int irq, struct irq_desc *desc)
 {
-	const unsigned int cpu = smp_processor_id();
-
 	spin_lock(&desc->lock);
 
 	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
@@ -476,7 +471,7 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
 		goto out_unlock;
 	}
 
-	kstat_cpu(cpu).irqs[irq]++;
+	kstat_irqs_this_cpu(desc)++;
 
 	/* Start handling the irq */
 	desc->chip->ack(irq);
@@ -531,7 +526,7 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
 {
 	irqreturn_t action_ret;
 
-	kstat_this_cpu.irqs[irq]++;
+	kstat_irqs_this_cpu(desc)++;
 
 	if (desc->chip->ack)
 		desc->chip->ack(irq);
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 9fc33b3378e6..1f346990f3f8 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -37,7 +37,7 @@ void
 handle_bad_irq(unsigned int irq, struct irq_desc *desc)
 {
 	print_irq_desc(irq, desc);
-	kstat_this_cpu.irqs[irq]++;
+	kstat_irqs_this_cpu(desc)++;
 	ack_bad_irq(irq);
 }
 
@@ -80,17 +80,38 @@ static void init_one_irq_desc(struct irq_desc *desc)
 #endif
 }
 
-#ifdef CONFIG_HAVE_SPARSE_IRQ
-static int nr_irq_desc = 32;
+extern int after_bootmem;
+extern void *__alloc_bootmem_nopanic(unsigned long size,
+			     unsigned long align,
+			     unsigned long goal);
 
-static int __init parse_nr_irq_desc(char *arg)
+static void init_kstat_irqs(struct irq_desc *desc, int nr_desc, int nr)
 {
-	if (arg)
-		nr_irq_desc = simple_strtoul(arg, NULL, 0);
-	return 0;
+	unsigned long bytes, total_bytes;
+	char *ptr;
+	int i;
+	unsigned long phys;
+
+	/* Compute how many bytes we need per irq and allocate them */
+	bytes = nr * sizeof(unsigned int);
+	total_bytes = bytes * nr_desc;
+	if (after_bootmem)
+		ptr = kzalloc(total_bytes, GFP_ATOMIC);
+	else
+		ptr = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0);
+
+	if (!ptr)
+		panic(" can not allocate kstat_irqs\n");
+
+	phys = __pa(ptr);
+	printk(KERN_DEBUG "kstat_irqs ==> [%#lx - %#lx]\n", phys, phys + total_bytes);
+
+	for (i = 0; i < nr_desc; i++) {
+		desc[i].kstat_irqs = (unsigned int *)ptr;
+		ptr += bytes;
+	}
 }
 
-early_param("nr_irq_desc", parse_nr_irq_desc);
 
 static void __init init_work(void *data)
 {
@@ -100,25 +121,44 @@ static void __init init_work(void *data)
 
 	desc = *da->name;
 
-	for (i = 0; i < *da->nr; i++)
+	for (i = 0; i < *da->nr; i++) {
 		init_one_irq_desc(&desc[i]);
+#ifndef CONFIG_HAVE_SPARSE_IRQ
+		desc[i].irq = i;
+#endif
+	}
 
+#ifdef CONFIG_HAVE_SPARSE_IRQ
 	for (i = 1; i < *da->nr; i++)
 		desc[i-1].next = &desc[i];
+#endif
+
+	/* init kstat_irqs, nr_cpu_ids is ready already */
+	init_kstat_irqs(desc, *da->nr, nr_cpu_ids);
 }
 
+#ifdef CONFIG_HAVE_SPARSE_IRQ
+static int nr_irq_desc = 32;
+
+static int __init parse_nr_irq_desc(char *arg)
+{
+	if (arg)
+		nr_irq_desc = simple_strtoul(arg, NULL, 0);
+	return 0;
+}
+
+early_param("nr_irq_desc", parse_nr_irq_desc);
+
 static struct irq_desc *sparse_irqs;
 DEFINE_DYN_ARRAY(sparse_irqs, sizeof(struct irq_desc), nr_irq_desc, PAGE_SIZE, init_work);
 
-extern int after_bootmem;
-extern void *__alloc_bootmem_nopanic(unsigned long size,
-			     unsigned long align,
-			     unsigned long goal);
 struct irq_desc *irq_to_desc(unsigned int irq)
 {
 	struct irq_desc *desc, *desc_pri;
 	int i;
 	int count = 0;
+	unsigned long phys;
+	unsigned long total_bytes;
 
 	BUG_ON(irq == -1U);
 
@@ -141,38 +181,34 @@ struct irq_desc *irq_to_desc(unsigned int irq)
 	 */
 	printk(KERN_DEBUG "try to get more irq_desc %d\n", nr_irq_desc);
 
+	total_bytes = sizeof(struct irq_desc) * nr_irq_desc;
 	if (after_bootmem)
-		desc = kzalloc(sizeof(struct irq_desc)*nr_irq_desc, GFP_ATOMIC);
+		desc = kzalloc(total_bytes, GFP_ATOMIC);
 	else
-		desc = __alloc_bootmem_nopanic(sizeof(struct irq_desc)*nr_irq_desc, PAGE_SIZE, 0);
+		desc = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0);
 
 	if (!desc)
 		panic("please boot with nr_irq_desc= %d\n", count * 2);
 
+	phys = __pa(desc);
+	printk(KERN_DEBUG "irq_desc ==> [%#lx - %#lx]\n", phys, phys + total_bytes);
+
 	for (i = 0; i < nr_irq_desc; i++)
 		init_one_irq_desc(&desc[i]);
 
 	for (i = 1; i < nr_irq_desc; i++)
 		desc[i-1].next = &desc[i];
 
+	/* init kstat_irqs, nr_cpu_ids is ready already */
+	init_kstat_irqs(desc, nr_irq_desc, nr_cpu_ids);
+
 	desc->irq = irq;
 	desc_pri->next = desc;
 
 	return desc;
 }
 #else
-static void __init init_work(void *data)
-{
-	struct dyn_array *da = data;
-	int i;
-	struct  irq_desc *desc;
-
-	desc = *da->name;
 
-	for (i = 0; i < *da->nr; i++)
-		init_one_irq_desc(&desc[i]);
-
-}
 static struct irq_desc *irq_desc;
 DEFINE_DYN_ARRAY(irq_desc, sizeof(struct irq_desc), nr_irqs, PAGE_SIZE, init_work);
 
@@ -315,7 +351,7 @@ unsigned int __do_IRQ(unsigned int irq)
 	struct irqaction *action;
 	unsigned int status;
 
-	kstat_this_cpu.irqs[irq]++;
+	kstat_irqs_this_cpu(desc)++;
 	if (CHECK_IRQ_PER_CPU(desc->status)) {
 		irqreturn_t action_ret;
 
@@ -415,3 +451,10 @@ void early_init_irq_lock_class(void)
 }
 #endif
 
+unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	return desc->kstat_irqs[cpu];
+}
+EXPORT_SYMBOL(kstat_irqs_cpu);
+
diff --git a/kernel/sched.c b/kernel/sched.c
index b9d713781b5b..6f230596bd0c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4048,11 +4048,8 @@ static inline void idle_balance(int cpu, struct rq *rq)
 #endif
 
 DEFINE_PER_CPU(struct kernel_stat, kstat);
-EXPORT_PER_CPU_SYMBOL(kstat);
 
-#ifdef CONFIG_HAVE_DYN_ARRAY
-DEFINE_PER_CPU_DYN_ARRAY_ADDR(per_cpu__kstat_irqs, per_cpu__kstat.irqs, sizeof(unsigned int), nr_irqs, sizeof(unsigned long), NULL);
-#endif
+EXPORT_PER_CPU_SYMBOL(kstat);
 
 /*
  * Return p->sum_exec_runtime plus any more ns on the sched_clock
-- 
GitLab


From 9059d8fa4a3a9153da53da890039f7f956cc9d19 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:10 -0700
Subject: [PATCH 401/895] irq: add irq_desc_without_new

add an irq_desc accessor that will not allocate any sparse entry
but returns failure if there's no entry present.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irq.h | 13 +++++++++++++
 kernel/irq/handle.c | 28 ++++++++++++++++++++++++----
 2 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/include/linux/irq.h b/include/linux/irq.h
index cbf471aee1ce..c9ffef7c3b44 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -191,10 +191,23 @@ struct irq_desc {
 } ____cacheline_internodealigned_in_smp;
 
 extern struct irq_desc *irq_to_desc(unsigned int irq);
+extern struct irq_desc *__irq_to_desc(unsigned int irq);
+
+#ifndef CONFIG_HAVE_SPARSE_IRQ
+
 #ifndef CONFIG_HAVE_DYN_ARRAY
 /* could be removed if we get rid of all irq_desc reference */
 extern struct irq_desc irq_desc[NR_IRQS];
+#else
+extern struct irq_desc *irq_desc;
 #endif
+
+#else
+
+extern struct irq_desc *sparse_irqs;
+
+#endif
+
 #define kstat_irqs_this_cpu(DESC) \
 	((DESC)->kstat_irqs[smp_processor_id()])
 
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 1f346990f3f8..8e55dbe50afc 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -112,7 +112,6 @@ static void init_kstat_irqs(struct irq_desc *desc, int nr_desc, int nr)
 	}
 }
 
-
 static void __init init_work(void *data)
 {
 	struct dyn_array *da = data;
@@ -149,9 +148,27 @@ static int __init parse_nr_irq_desc(char *arg)
 
 early_param("nr_irq_desc", parse_nr_irq_desc);
 
-static struct irq_desc *sparse_irqs;
+struct irq_desc *sparse_irqs;
 DEFINE_DYN_ARRAY(sparse_irqs, sizeof(struct irq_desc), nr_irq_desc, PAGE_SIZE, init_work);
 
+struct irq_desc *__irq_to_desc(unsigned int irq)
+{
+	struct irq_desc *desc;
+
+	BUG_ON(irq == -1U);
+
+	desc = &sparse_irqs[0];
+	while (desc) {
+		if (desc->irq == irq)
+			return desc;
+
+		if (desc->irq == -1U)
+			return NULL;
+
+		desc = desc->next;
+	}
+	return NULL;
+}
 struct irq_desc *irq_to_desc(unsigned int irq)
 {
 	struct irq_desc *desc, *desc_pri;
@@ -208,8 +225,7 @@ struct irq_desc *irq_to_desc(unsigned int irq)
 	return desc;
 }
 #else
-
-static struct irq_desc *irq_desc;
+struct irq_desc *irq_desc;
 DEFINE_DYN_ARRAY(irq_desc, sizeof(struct irq_desc), nr_irqs, PAGE_SIZE, init_work);
 
 #endif
@@ -239,6 +255,10 @@ struct irq_desc *irq_to_desc(unsigned int irq)
 
 	return NULL;
 }
+struct irq_desc *__irq_to_desc(unsigned int irq)
+{
+	return irq_to_desc(irq);
+}
 #endif
 
 /*
-- 
GitLab


From 2c6927a38f65b53b62f86158fba29a068c4e8b6a Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:11 -0700
Subject: [PATCH 402/895] irq: replace loop with nr_irqs with for_each_irq_desc

There are a handful of loops that go from 0 to nr_irqs and use
get_irq_desc() on them. These would allocate all the irq_desc
entries, regardless of the need for them.

Use the smarter for_each_irq_desc() iterator that will only iterate
over the present ones.

v2: make sure arch without GENERIC_HARDIRQS work too

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_64.c |  6 +++---
 arch/x86/kernel/irq_64.c     |  5 ++---
 arch/x86/kernel/irqinit_64.c | 17 +++++------------
 include/linux/irq.h          |  7 +++++++
 kernel/irq/internals.h       |  4 ++--
 kernel/irq/manage.c          |  2 +-
 kernel/irq/proc.c            | 10 +++++-----
 7 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 51ef7eb75f2e..708be9724daf 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -1871,10 +1871,10 @@ unmask:
 
 static void ir_irq_migration(struct work_struct *work)
 {
-	int irq;
+	unsigned int irq;
+	struct irq_desc *desc;
 
-	for (irq = 0; irq < nr_irqs; irq++) {
-		struct irq_desc *desc = irq_to_desc(irq);
+	for_each_irq_desc(irq, desc) {
 		if (desc->status & IRQ_MOVE_PENDING) {
 			unsigned long flags;
 
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 4a0a4eb44dcb..b3cf55e325f5 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -224,17 +224,16 @@ void fixup_irqs(cpumask_t map)
 {
 	unsigned int irq;
 	static int warned;
+	struct irq_desc *desc;
 
-	for (irq = 0; irq < nr_irqs; irq++) {
+	for_each_irq_desc(irq, desc) {
 		cpumask_t mask;
 		int break_affinity = 0;
 		int set_affinity = 1;
-		struct irq_desc *desc;
 
 		if (irq == 2)
 			continue;
 
-		desc = irq_to_desc(irq);
 		/* interrupt's are disabled at this point */
 		spin_lock(&desc->lock);
 
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 0744b49b4d12..cd9f42d028d9 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -142,25 +142,18 @@ void __init init_ISA_irqs(void)
 	init_bsp_APIC();
 	init_8259A(0);
 
-	for (i = 0; i < nr_irqs; i++) {
+	for (i = 0; i < 16; i++) {
 		struct irq_desc *desc = irq_to_desc(i);
 
 		desc->status = IRQ_DISABLED;
 		desc->action = NULL;
 		desc->depth = 1;
 
-		if (i < 16) {
-			/*
-			 * 16 old-style INTA-cycle interrupts:
-			 */
-			set_irq_chip_and_handler_name(i, &i8259A_chip,
+		/*
+		 * 16 old-style INTA-cycle interrupts:
+		 */
+		set_irq_chip_and_handler_name(i, &i8259A_chip,
 						      handle_level_irq, "XT");
-		} else {
-			/*
-			 * 'high' PCI IRQs filled in on demand
-			 */
-			desc->chip = &no_irq_chip;
-		}
 	}
 }
 
diff --git a/include/linux/irq.h b/include/linux/irq.h
index c9ffef7c3b44..9de16ca8b8e5 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -202,9 +202,16 @@ extern struct irq_desc irq_desc[NR_IRQS];
 extern struct irq_desc *irq_desc;
 #endif
 
+#ifdef CONFIG_GENERIC_HARDIRQS
+#define for_each_irq_desc(irq, desc)		\
+	for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc = &irq_desc[irq])
+#endif
+
 #else
 
 extern struct irq_desc *sparse_irqs;
+#define for_each_irq_desc(irqX, desc)		\
+	for (desc = sparse_irqs, irqX = desc->irq; desc && irqX != -1U; desc = desc->next, irqX = desc ? desc->irq : -1U)
 
 #endif
 
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 422dd00c8bd3..c9767e641980 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -14,11 +14,11 @@ extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
 		unsigned long flags);
 
 #ifdef CONFIG_PROC_FS
-extern void register_irq_proc(unsigned int irq);
+extern void register_irq_proc(unsigned int irq, struct irq_desc *desc);
 extern void register_handler_proc(unsigned int irq, struct irqaction *action);
 extern void unregister_handler_proc(unsigned int irq, struct irqaction *action);
 #else
-static inline void register_irq_proc(unsigned int irq) { }
+static inline void register_irq_proc(unsigned int irq, struct irq_desc *desc) { }
 static inline void register_handler_proc(unsigned int irq,
 					 struct irqaction *action) { }
 static inline void unregister_handler_proc(unsigned int irq,
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index b5943e9f95aa..5070f55fdc16 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -478,7 +478,7 @@ int setup_irq(unsigned int irq, struct irqaction *new)
 	spin_unlock_irqrestore(&desc->lock, flags);
 
 	new->irq = irq;
-	register_irq_proc(irq);
+	register_irq_proc(irq, desc);
 	new->dir = NULL;
 	register_handler_proc(irq, new);
 
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index c2f356c808f6..bc0993d86c8b 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -182,11 +182,10 @@ void register_handler_proc(unsigned int irq, struct irqaction *action)
 
 #define MAX_NAMELEN 10
 
-void register_irq_proc(unsigned int irq)
+void register_irq_proc(unsigned int irq, struct irq_desc *desc)
 {
 	char name [MAX_NAMELEN];
 	struct proc_dir_entry *entry;
-	struct irq_desc *desc = irq_to_desc(irq);
 
 	if (!root_irq_dir || (desc->chip == &no_irq_chip) || desc->dir)
 		return;
@@ -230,7 +229,8 @@ void register_default_affinity_proc(void)
 
 void init_irq_proc(void)
 {
-	int i;
+	unsigned int irq;
+	struct irq_desc *desc;
 
 	/* create /proc/irq */
 	root_irq_dir = proc_mkdir("irq", NULL);
@@ -242,7 +242,7 @@ void init_irq_proc(void)
 	/*
 	 * Create entries for all existing IRQs.
 	 */
-	for (i = 0; i < nr_irqs; i++)
-		register_irq_proc(i);
+	for_each_irq_desc(irq, desc)
+		register_irq_proc(irq, desc);
 }
 
-- 
GitLab


From c7fb03a475bd80c642c1345d85c7c550f63514b8 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:12 -0700
Subject: [PATCH 403/895] irq, fs/proc: replace loop with nr_irqs for proc/stat

Replace another nr_irqs loop to avoid the allocation of all sparse
irq entries - use for_each_irq_desc instead.

v2: make sure arch without GENERIC_HARDIRQS works too

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 fs/proc/proc_misc.c       | 42 ++++++++++++++++++++++++++-------------
 include/linux/interrupt.h |  5 +++++
 2 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index aa069acf61a0..c3cbabe8b38e 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -30,6 +30,7 @@
 #include <linux/mm.h>
 #include <linux/mmzone.h>
 #include <linux/pagemap.h>
+#include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/swap.h>
 #include <linux/slab.h>
@@ -501,17 +502,16 @@ static const struct file_operations proc_vmalloc_operations = {
 
 static int show_stat(struct seq_file *p, void *v)
 {
-	int i;
+	int i, j;
 	unsigned long jif;
 	cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
 	cputime64_t guest;
 	u64 sum = 0;
 	struct timespec boottime;
-	unsigned int *per_irq_sum;
-
-	per_irq_sum = kzalloc(sizeof(unsigned int)*nr_irqs, GFP_KERNEL);
-	if (!per_irq_sum)
-		return -ENOMEM;
+	unsigned int per_irq_sum;
+#ifdef CONFIG_GENERIC_HARDIRQS
+	struct irq_desc *desc;
+#endif
 
 	user = nice = system = idle = iowait =
 		irq = softirq = steal = cputime64_zero;
@@ -520,8 +520,6 @@ static int show_stat(struct seq_file *p, void *v)
 	jif = boottime.tv_sec;
 
 	for_each_possible_cpu(i) {
-		int j;
-
 		user = cputime64_add(user, kstat_cpu(i).cpustat.user);
 		nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice);
 		system = cputime64_add(system, kstat_cpu(i).cpustat.system);
@@ -531,10 +529,12 @@ static int show_stat(struct seq_file *p, void *v)
 		softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
 		steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
 		guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
-		for (j = 0; j < nr_irqs; j++) {
-			unsigned int temp = kstat_irqs_cpu(j, i);
+		for_each_irq_desc(j, desc)
+		{
+			unsigned int temp;
+
+			temp = kstat_irqs_cpu(j, i);
 			sum += temp;
-			per_irq_sum[j] += temp;
 		}
 		sum += arch_irq_stat_cpu(i);
 	}
@@ -577,8 +577,23 @@ static int show_stat(struct seq_file *p, void *v)
 	}
 	seq_printf(p, "intr %llu", (unsigned long long)sum);
 
-	for (i = 0; i < nr_irqs; i++)
-		seq_printf(p, " %u", per_irq_sum[i]);
+	/* sum again ? it could be updated? */
+	for_each_irq_desc(j, desc)
+	{
+		per_irq_sum = 0;
+		for_each_possible_cpu(i) {
+			unsigned int temp;
+
+			temp = kstat_irqs_cpu(j, i);
+			per_irq_sum += temp;
+		}
+
+#ifdef CONFIG_HAVE_SPARSE_IRQ
+		seq_printf(p, " %u:%u", j, per_irq_sum);
+#else
+		seq_printf(p, " %u", per_irq_sum);
+#endif
+	}
 
 	seq_printf(p,
 		"\nctxt %llu\n"
@@ -592,7 +607,6 @@ static int show_stat(struct seq_file *p, void *v)
 		nr_running(),
 		nr_iowait());
 
-	kfree(per_irq_sum);
 	return 0;
 }
 
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 511803853a5b..d4039a0b23f4 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -17,6 +17,11 @@
 
 extern int nr_irqs;
 
+#ifndef CONFIG_GENERIC_HARDIRQS
+#define for_each_irq_desc(irq, desc)		\
+	for (irq = 0; irq < nr_irqs; irq++)
+#endif
+
 /*
  * These correspond to the IORESOURCE_IRQ_* defines in
  * linux/ioport.h to select the interrupt line behaviour.  When
-- 
GitLab


From 46b8214d12c274bd4265aae482ab7ffe69d94818 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:13 -0700
Subject: [PATCH 404/895] x86, ioapic: replace loop with nr_irqs with
 for_each_irq_icfg

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_64.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 708be9724daf..60d60061659c 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -129,6 +129,9 @@ static void __init init_work(void *data)
 		cfg[i-1].next = &cfg[i];
 }
 
+#define for_each_irq_cfg(cfg)		\
+	for (cfg = irq_cfgx; cfg && cfg->irq != -1U; cfg = cfg->next)
+
 static struct irq_cfg *irq_cfgx;
 DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work);
 
@@ -1097,20 +1100,18 @@ void __setup_vector_irq(int cpu)
 	/* Initialize vector_irq on a new cpu */
 	/* This function must be called with vector_lock held */
 	int irq, vector;
+	struct irq_cfg *cfg;
 
 	/* Mark the inuse vectors */
-	for (irq = 0; irq < nr_irqs; ++irq) {
-		struct irq_cfg *cfg = irq_cfg(irq);
-
+	for_each_irq_cfg(cfg) {
 		if (!cpu_isset(cpu, cfg->domain))
 			continue;
 		vector = cfg->vector;
+		irq = cfg->irq;
 		per_cpu(vector_irq, cpu)[vector] = irq;
 	}
 	/* Mark the free vectors */
 	for (vector = 0; vector < NR_VECTORS; ++vector) {
-		struct irq_cfg *cfg;
-
 		irq = per_cpu(vector_irq, cpu)[vector];
 		if (irq < 0)
 			continue;
@@ -1340,6 +1341,7 @@ __apicdebuginit(void) print_IO_APIC(void)
 	union IO_APIC_reg_01 reg_01;
 	union IO_APIC_reg_02 reg_02;
 	unsigned long flags;
+	struct irq_cfg *cfg;
 
 	if (apic_verbosity == APIC_QUIET)
 		return;
@@ -1408,12 +1410,11 @@ __apicdebuginit(void) print_IO_APIC(void)
 	}
 	}
 	printk(KERN_DEBUG "IRQ to pin mappings:\n");
-	for (i = 0; i < nr_irqs; i++) {
-		struct irq_cfg *cfg = irq_cfg(i);
+	for_each_irq_cfg(cfg) {
 		struct irq_pin_list *entry = cfg->irq_2_pin;
 		if (!entry)
 			continue;
-		printk(KERN_DEBUG "IRQ%d ", i);
+		printk(KERN_DEBUG "IRQ%d ", cfg->irq);
 		for (;;) {
 			printk("-> %d:%d", entry->apic, entry->pin);
 			if (!entry->next)
@@ -2070,6 +2071,7 @@ static inline void init_IO_APIC_traps(void)
 {
 	int irq;
 	struct irq_desc *desc;
+	struct irq_cfg *cfg;
 
 	/*
 	 * NOTE! The local APIC isn't very good at handling
@@ -2082,10 +2084,8 @@ static inline void init_IO_APIC_traps(void)
 	 * Also, we've got to be careful not to trash gate
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
-	for (irq = 0; irq < nr_irqs ; irq++) {
-		struct irq_cfg *cfg;
-
-		cfg = irq_cfg(irq);
+	for_each_irq_cfg(cfg) {
+		irq = cfg->irq;
 		if (IO_APIC_IRQ(irq) && !cfg->vector) {
 			/*
 			 * Hmm.. We don't have an entry for this,
-- 
GitLab


From 7d94f7ca401dd7f445fda9a971a48aa5427b3e55 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:14 -0700
Subject: [PATCH 405/895] irq: remove >= nr_irqs checking with
 config_have_sparse_irq

remove irq limit checks - nr_irqs is dynamic and we expand anytime.

v2: fix checking about result irq_cfg_without_new, so could use msi again
v3: use irq_desc_without_new to check irq is valid

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_64.c |  9 -------
 arch/x86/kernel/irq_64.c     |  2 +-
 kernel/irq/chip.c            | 49 ++++++++++++++++++++----------------
 kernel/irq/manage.c          | 43 +++++++++++++++++++------------
 4 files changed, 55 insertions(+), 48 deletions(-)

diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 60d60061659c..1b8cccb5ba25 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -400,7 +400,6 @@ static inline void io_apic_sync(unsigned int apic)
 	struct irq_cfg *cfg;						\
 	struct irq_pin_list *entry;					\
 									\
-	BUG_ON(irq >= nr_irqs);						\
 	cfg = irq_cfg(irq);						\
 	entry = cfg->irq_2_pin;						\
 	for (;;) {							\
@@ -480,7 +479,6 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
 	struct irq_cfg *cfg;
 	struct irq_pin_list *entry;
 
-	BUG_ON(irq >= nr_irqs);
 	cfg = irq_cfg(irq);
 	entry = cfg->irq_2_pin;
 	for (;;) {
@@ -549,7 +547,6 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 	struct irq_cfg *cfg;
 	struct irq_pin_list *entry;
 
-	BUG_ON(irq >= nr_irqs);
 	/* first time to refer irq_cfg, so with new */
 	cfg = irq_cfg_alloc(irq);
 	entry = cfg->irq_2_pin;
@@ -841,7 +838,6 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 				best_guess = irq;
 		}
 	}
-	BUG_ON(best_guess >= nr_irqs);
 	return best_guess;
 }
 
@@ -973,7 +969,6 @@ static int pin_2_irq(int idx, int apic, int pin)
 			irq += nr_ioapic_registers[i++];
 		irq += pin;
 	}
-	BUG_ON(irq >= nr_irqs);
 	return irq;
 }
 
@@ -1008,7 +1003,6 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
 	int cpu;
 	struct irq_cfg *cfg;
 
-	BUG_ON((unsigned)irq >= nr_irqs);
 	cfg = irq_cfg(irq);
 
 	/* Only try and allocate irqs on cpus that are present */
@@ -1082,7 +1076,6 @@ static void __clear_irq_vector(int irq)
 	cpumask_t mask;
 	int cpu, vector;
 
-	BUG_ON((unsigned)irq >= nr_irqs);
 	cfg = irq_cfg(irq);
 	BUG_ON(!cfg->vector);
 
@@ -1924,8 +1917,6 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
 		struct irq_desc *desc;
 		struct irq_cfg *cfg;
 		irq = __get_cpu_var(vector_irq)[vector];
-		if (irq >= nr_irqs)
-			continue;
 
 		desc = irq_to_desc(irq);
 		cfg = irq_cfg(irq);
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index b3cf55e325f5..a3e36336d914 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -202,7 +202,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
 	stack_overflow_check(regs);
 #endif
 
-	if (likely(irq < nr_irqs))
+	if (likely(__irq_to_desc(irq)))
 		generic_handle_irq(irq);
 	else {
 		if (!disable_apic)
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 2aa3d4b2fce8..a4bb0da9c88c 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -27,13 +27,13 @@ void dynamic_irq_init(unsigned int irq)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= nr_irqs) {
+	desc = irq_to_desc(irq);
+	if (!desc) {
 		WARN(1, KERN_ERR "Trying to initialize invalid IRQ%d\n", irq);
 		return;
 	}
 
 	/* Ensure we don't have left over values from a previous use of this irq */
-	desc = irq_to_desc(irq);
 	spin_lock_irqsave(&desc->lock, flags);
 	desc->status = IRQ_DISABLED;
 	desc->chip = &no_irq_chip;
@@ -60,12 +60,12 @@ void dynamic_irq_cleanup(unsigned int irq)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= nr_irqs) {
+	desc = __irq_to_desc(irq);
+	if (!desc) {
 		WARN(1, KERN_ERR "Trying to cleanup invalid IRQ%d\n", irq);
 		return;
 	}
 
-	desc = irq_to_desc(irq);
 	spin_lock_irqsave(&desc->lock, flags);
 	if (desc->action) {
 		spin_unlock_irqrestore(&desc->lock, flags);
@@ -92,7 +92,8 @@ int set_irq_chip(unsigned int irq, struct irq_chip *chip)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= nr_irqs) {
+	desc = __irq_to_desc(irq);
+	if (!desc) {
 		WARN(1, KERN_ERR "Trying to install chip for IRQ%d\n", irq);
 		return -EINVAL;
 	}
@@ -121,12 +122,12 @@ int set_irq_type(unsigned int irq, unsigned int type)
 	unsigned long flags;
 	int ret = -ENXIO;
 
-	if (irq >= nr_irqs) {
+	desc = __irq_to_desc(irq);
+	if (!desc) {
 		printk(KERN_ERR "Trying to set irq type for IRQ%d\n", irq);
 		return -ENODEV;
 	}
 
-	desc = irq_to_desc(irq);
 	if (type == IRQ_TYPE_NONE)
 		return 0;
 
@@ -149,13 +150,13 @@ int set_irq_data(unsigned int irq, void *data)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= nr_irqs) {
+	desc = __irq_to_desc(irq);
+	if (!desc) {
 		printk(KERN_ERR
 		       "Trying to install controller data for IRQ%d\n", irq);
 		return -EINVAL;
 	}
 
-	desc = irq_to_desc(irq);
 	spin_lock_irqsave(&desc->lock, flags);
 	desc->handler_data = data;
 	spin_unlock_irqrestore(&desc->lock, flags);
@@ -175,12 +176,13 @@ int set_irq_msi(unsigned int irq, struct msi_desc *entry)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= nr_irqs) {
+	desc = __irq_to_desc(irq);
+	if (!desc) {
 		printk(KERN_ERR
 		       "Trying to install msi data for IRQ%d\n", irq);
 		return -EINVAL;
 	}
-	desc = irq_to_desc(irq);
+
 	spin_lock_irqsave(&desc->lock, flags);
 	desc->msi_desc = entry;
 	if (entry)
@@ -201,8 +203,14 @@ int set_irq_chip_data(unsigned int irq, void *data)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	desc = irq_to_desc(irq);
-	if (irq >= nr_irqs || !desc->chip) {
+	desc = __irq_to_desc(irq);
+	if (!desc) {
+		printk(KERN_ERR
+		       "Trying to install chip data for IRQ%d\n", irq);
+		return -EINVAL;
+	}
+
+	if (!desc->chip) {
 		printk(KERN_ERR "BUG: bad set_irq_chip_data(IRQ#%d)\n", irq);
 		return -EINVAL;
 	}
@@ -546,14 +554,13 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= nr_irqs) {
+	desc = __irq_to_desc(irq);
+	if (!desc) {
 		printk(KERN_ERR
 		       "Trying to install type control for IRQ%d\n", irq);
 		return;
 	}
 
-	desc = irq_to_desc(irq);
-
 	if (!handle)
 		handle = handle_bad_irq;
 	else if (desc->chip == &no_irq_chip) {
@@ -611,14 +618,13 @@ void __init set_irq_noprobe(unsigned int irq)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= nr_irqs) {
+	desc = __irq_to_desc(irq);
+	if (!desc) {
 		printk(KERN_ERR "Trying to mark IRQ%d non-probeable\n", irq);
 
 		return;
 	}
 
-	desc = irq_to_desc(irq);
-
 	spin_lock_irqsave(&desc->lock, flags);
 	desc->status |= IRQ_NOPROBE;
 	spin_unlock_irqrestore(&desc->lock, flags);
@@ -629,14 +635,13 @@ void __init set_irq_probe(unsigned int irq)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= nr_irqs) {
+	desc = __irq_to_desc(irq);
+	if (!desc) {
 		printk(KERN_ERR "Trying to mark IRQ%d probeable\n", irq);
 
 		return;
 	}
 
-	desc = irq_to_desc(irq);
-
 	spin_lock_irqsave(&desc->lock, flags);
 	desc->status &= ~IRQ_NOPROBE;
 	spin_unlock_irqrestore(&desc->lock, flags);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 5070f55fdc16..c0b4d4df6de2 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -31,10 +31,10 @@ cpumask_t irq_default_affinity = CPU_MASK_ALL;
  */
 void synchronize_irq(unsigned int irq)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_desc *desc = __irq_to_desc(irq);
 	unsigned int status;
 
-	if (irq >= nr_irqs)
+	if (!desc)
 		return;
 
 	do {
@@ -142,10 +142,11 @@ int irq_select_affinity(unsigned int irq)
  */
 void disable_irq_nosync(unsigned int irq)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= nr_irqs)
+	desc = __irq_to_desc(irq);
+	if (!desc)
 		return;
 
 	spin_lock_irqsave(&desc->lock, flags);
@@ -171,9 +172,10 @@ EXPORT_SYMBOL(disable_irq_nosync);
  */
 void disable_irq(unsigned int irq)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_desc *desc;
 
-	if (irq >= nr_irqs)
+	desc = __irq_to_desc(irq);
+	if (!desc)
 		return;
 
 	disable_irq_nosync(irq);
@@ -213,10 +215,11 @@ static void __enable_irq(struct irq_desc *desc, unsigned int irq)
  */
 void enable_irq(unsigned int irq)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= nr_irqs)
+	desc = __irq_to_desc(irq);
+	if (!desc)
 		return;
 
 	spin_lock_irqsave(&desc->lock, flags);
@@ -290,10 +293,14 @@ EXPORT_SYMBOL(set_irq_wake);
  */
 int can_request_irq(unsigned int irq, unsigned long irqflags)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_desc *desc;
 	struct irqaction *action;
 
-	if (irq >= nr_irqs || desc->status & IRQ_NOREQUEST)
+	desc = __irq_to_desc(irq);
+	if (!desc)
+		return 0;
+
+	if (desc->status & IRQ_NOREQUEST)
 		return 0;
 
 	action = desc->action;
@@ -352,14 +359,15 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
  */
 int setup_irq(unsigned int irq, struct irqaction *new)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_desc *desc;
 	struct irqaction *old, **p;
 	const char *old_name = NULL;
 	unsigned long flags;
 	int shared = 0;
 	int ret;
 
-	if (irq >= nr_irqs)
+	desc = __irq_to_desc(irq);
+	if (!desc)
 		return -EINVAL;
 
 	if (desc->chip == &no_irq_chip)
@@ -518,10 +526,11 @@ void free_irq(unsigned int irq, void *dev_id)
 	unsigned long flags;
 
 	WARN_ON(in_interrupt());
-	if (irq >= nr_irqs)
+
+	desc = __irq_to_desc(irq);
+	if (!desc)
 		return;
 
-	desc = irq_to_desc(irq);
 	spin_lock_irqsave(&desc->lock, flags);
 	p = &desc->action;
 	for (;;) {
@@ -634,9 +643,11 @@ int request_irq(unsigned int irq, irq_handler_t handler,
 	 */
 	if ((irqflags & IRQF_SHARED) && !dev_id)
 		return -EINVAL;
-	if (irq >= nr_irqs)
+
+	desc = __irq_to_desc(irq);
+	if (!desc)
 		return -EINVAL;
-	desc = irq_to_desc(irq);
+
 	if (desc->status & IRQ_NOREQUEST)
 		return -EINVAL;
 	if (!handler)
-- 
GitLab


From 46926b67fc663d357a1a8174328998a9e49da0b8 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:15 -0700
Subject: [PATCH 406/895] generic: add irq_desc in function in parameter

So we could remove some duplicated calling to irq_desc

v2: make sure irq_desc in  init/main.c is not used without generic_hardirqs

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/irq_64.c |  6 ++++--
 include/linux/irq.h      |  9 ++++++---
 init/main.c              |  7 +++++++
 kernel/irq/handle.c      | 30 ++++++++++++++++++++++++++++++
 4 files changed, 47 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index a3e36336d914..f58b995b30ee 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -189,6 +189,7 @@ u64 arch_irq_stat(void)
 asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
+	struct irq_desc *desc;
 
 	/* high bit used in ret_from_ code  */
 	unsigned vector = ~regs->orig_ax;
@@ -202,8 +203,9 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
 	stack_overflow_check(regs);
 #endif
 
-	if (likely(__irq_to_desc(irq)))
-		generic_handle_irq(irq);
+	desc = __irq_to_desc(irq);
+	if (likely(desc))
+		generic_handle_irq_desc(irq, desc);
 	else {
 		if (!disable_apic)
 			ack_APIC_irq();
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 9de16ca8b8e5..7b59e193a119 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -315,10 +315,8 @@ extern unsigned int __do_IRQ(unsigned int irq);
  * irqchip-style controller then we call the ->handle_irq() handler,
  * and it calls __do_IRQ() if it's attached to an irqtype-style controller.
  */
-static inline void generic_handle_irq(unsigned int irq)
+static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *desc)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
-
 #ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
 	desc->handle_irq(irq, desc);
 #else
@@ -329,6 +327,11 @@ static inline void generic_handle_irq(unsigned int irq)
 #endif
 }
 
+static inline void generic_handle_irq(unsigned int irq)
+{
+	generic_handle_irq_desc(irq, irq_to_desc(irq));
+}
+
 /* Handling of unhandled and spurious interrupts: */
 extern void note_interrupt(unsigned int irq, struct irq_desc *desc,
 			   int action_ret);
diff --git a/init/main.c b/init/main.c
index ab97d0877acc..0d2e60144f83 100644
--- a/init/main.c
+++ b/init/main.c
@@ -590,6 +590,13 @@ void pre_alloc_dyn_array(void)
 		if (da->init_work)
 			da->init_work(da);
 	}
+#else
+#ifdef CONFIF_GENERIC_HARDIRQS
+	unsigned int i;
+
+	for (i = 0; i < NR_IRQS; i++)
+		irq_desc[i].irq = i;
+#endif
 #endif
 }
 
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 8e55dbe50afc..e1d787e9169b 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -197,6 +197,21 @@ struct irq_desc *irq_to_desc(unsigned int irq)
 	 *  we run out of pre-allocate ones, allocate more
 	 */
 	printk(KERN_DEBUG "try to get more irq_desc %d\n", nr_irq_desc);
+	{
+		/* double check if some one mess up the list */
+		struct irq_desc *desc;
+		int count = 0;
+
+		desc = &sparse_irqs[0];
+		while (desc) {
+			printk(KERN_DEBUG "found irq_desc for irq %d\n", desc->irq);
+			if (desc->next)
+				printk(KERN_DEBUG "found irq_desc for irq %d and next will be irq %d\n", desc->irq, desc->next->irq);
+			desc = desc->next;
+			count++;
+		}
+		printk(KERN_DEBUG "all preallocted %d\n", count);
+	}
 
 	total_bytes = sizeof(struct irq_desc) * nr_irq_desc;
 	if (after_bootmem)
@@ -221,6 +236,21 @@ struct irq_desc *irq_to_desc(unsigned int irq)
 
 	desc->irq = irq;
 	desc_pri->next = desc;
+	{
+		/* double check if some one mess up the list */
+		struct irq_desc *desc;
+		int count = 0;
+
+		desc = &sparse_irqs[0];
+		while (desc) {
+			printk(KERN_DEBUG "1 found irq_desc for irq %d\n", desc->irq);
+			if (desc->next)
+				printk(KERN_DEBUG "1 found irq_desc for irq %d and next will be irq %d\n", desc->irq, desc->next->irq);
+			desc = desc->next;
+			count++;
+		}
+		printk(KERN_DEBUG "1 all preallocted %d\n", count);
+	}
 
 	return desc;
 }
-- 
GitLab


From 1d5f6b36c4736af1dac396d6267eb53dcc8c0021 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:16 -0700
Subject: [PATCH 407/895] x86: check with without_new in show_interrupts

so we don't get new one that we don't need it.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/irq_64.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index f58b995b30ee..f337f87c1e16 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -83,7 +83,10 @@ int show_interrupts(struct seq_file *p, void *v)
 
 	if (i < nr_irqs) {
 		unsigned any_count = 0;
-		struct irq_desc *desc = irq_to_desc(i);
+		struct irq_desc *desc = __irq_to_desc(i);
+
+		if (!desc)
+			return 0;
 
 		spin_lock_irqsave(&desc->lock, flags);
 #ifndef CONFIG_SMP
-- 
GitLab


From cb5bc83225a86ca53bbb889ed8439e4fd6cf44ac Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:17 -0700
Subject: [PATCH 408/895] x86_64: rename irq_desc/irq_desc_alloc

change names:

          irq_desc() ==> irq_desc_alloc
	__irq_desc() ==> irq_desc

Also split a few of the uses in lowlevel x86 code.

v2: need to check if desc is null in smp_irq_move_cleanup

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_64.c | 10 +++++++++-
 arch/x86/kernel/irq_64.c     |  4 ++--
 arch/x86/kernel/irqinit_64.c |  3 ++-
 include/linux/irq.h          |  2 +-
 kernel/irq/chip.c            | 21 +++++++++++----------
 kernel/irq/handle.c          | 23 +++++------------------
 kernel/irq/manage.c          | 16 ++++++++--------
 7 files changed, 38 insertions(+), 41 deletions(-)

diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 1b8cccb5ba25..a054db9ef190 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -1124,7 +1124,12 @@ static void ioapic_register_intr(int irq, unsigned long trigger)
 {
 	struct irq_desc *desc;
 
-	desc = irq_to_desc(irq);
+	/* first time to use this irq_desc */
+	if (irq < 16)
+		desc = irq_to_desc(irq);
+	else
+		desc = irq_to_desc_alloc(irq);
+
 	if (trigger)
 		desc->status |= IRQ_LEVEL;
 	else
@@ -1919,6 +1924,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
 		irq = __get_cpu_var(vector_irq)[vector];
 
 		desc = irq_to_desc(irq);
+		if (!desc)
+			continue;
+
 		cfg = irq_cfg(irq);
 		spin_lock(&desc->lock);
 		if (!cfg->move_cleanup_count)
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index f337f87c1e16..5d5976e0311a 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -83,7 +83,7 @@ int show_interrupts(struct seq_file *p, void *v)
 
 	if (i < nr_irqs) {
 		unsigned any_count = 0;
-		struct irq_desc *desc = __irq_to_desc(i);
+		struct irq_desc *desc = irq_to_desc(i);
 
 		if (!desc)
 			return 0;
@@ -206,7 +206,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
 	stack_overflow_check(regs);
 #endif
 
-	desc = __irq_to_desc(irq);
+	desc = irq_to_desc(irq);
 	if (likely(desc))
 		generic_handle_irq_desc(irq, desc);
 	else {
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index cd9f42d028d9..d17fbc26d96f 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -143,7 +143,8 @@ void __init init_ISA_irqs(void)
 	init_8259A(0);
 
 	for (i = 0; i < 16; i++) {
-		struct irq_desc *desc = irq_to_desc(i);
+		/* first time call this irq_desc */
+		struct irq_desc *desc = irq_to_desc_alloc(i);
 
 		desc->status = IRQ_DISABLED;
 		desc->action = NULL;
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 7b59e193a119..5fe1b01c11fe 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -191,7 +191,7 @@ struct irq_desc {
 } ____cacheline_internodealigned_in_smp;
 
 extern struct irq_desc *irq_to_desc(unsigned int irq);
-extern struct irq_desc *__irq_to_desc(unsigned int irq);
+extern struct irq_desc *irq_to_desc_alloc(unsigned int irq);
 
 #ifndef CONFIG_HAVE_SPARSE_IRQ
 
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index a4bb0da9c88c..9fc5e69213de 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -27,7 +27,8 @@ void dynamic_irq_init(unsigned int irq)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	desc = irq_to_desc(irq);
+	/* first time to use this irq_desc */
+	desc = irq_to_desc_alloc(irq);
 	if (!desc) {
 		WARN(1, KERN_ERR "Trying to initialize invalid IRQ%d\n", irq);
 		return;
@@ -60,7 +61,7 @@ void dynamic_irq_cleanup(unsigned int irq)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	desc = __irq_to_desc(irq);
+	desc = irq_to_desc(irq);
 	if (!desc) {
 		WARN(1, KERN_ERR "Trying to cleanup invalid IRQ%d\n", irq);
 		return;
@@ -92,7 +93,7 @@ int set_irq_chip(unsigned int irq, struct irq_chip *chip)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	desc = __irq_to_desc(irq);
+	desc = irq_to_desc(irq);
 	if (!desc) {
 		WARN(1, KERN_ERR "Trying to install chip for IRQ%d\n", irq);
 		return -EINVAL;
@@ -122,7 +123,7 @@ int set_irq_type(unsigned int irq, unsigned int type)
 	unsigned long flags;
 	int ret = -ENXIO;
 
-	desc = __irq_to_desc(irq);
+	desc = irq_to_desc(irq);
 	if (!desc) {
 		printk(KERN_ERR "Trying to set irq type for IRQ%d\n", irq);
 		return -ENODEV;
@@ -150,7 +151,7 @@ int set_irq_data(unsigned int irq, void *data)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	desc = __irq_to_desc(irq);
+	desc = irq_to_desc(irq);
 	if (!desc) {
 		printk(KERN_ERR
 		       "Trying to install controller data for IRQ%d\n", irq);
@@ -176,7 +177,7 @@ int set_irq_msi(unsigned int irq, struct msi_desc *entry)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	desc = __irq_to_desc(irq);
+	desc = irq_to_desc(irq);
 	if (!desc) {
 		printk(KERN_ERR
 		       "Trying to install msi data for IRQ%d\n", irq);
@@ -203,7 +204,7 @@ int set_irq_chip_data(unsigned int irq, void *data)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	desc = __irq_to_desc(irq);
+	desc = irq_to_desc(irq);
 	if (!desc) {
 		printk(KERN_ERR
 		       "Trying to install chip data for IRQ%d\n", irq);
@@ -554,7 +555,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	desc = __irq_to_desc(irq);
+	desc = irq_to_desc(irq);
 	if (!desc) {
 		printk(KERN_ERR
 		       "Trying to install type control for IRQ%d\n", irq);
@@ -618,7 +619,7 @@ void __init set_irq_noprobe(unsigned int irq)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	desc = __irq_to_desc(irq);
+	desc = irq_to_desc(irq);
 	if (!desc) {
 		printk(KERN_ERR "Trying to mark IRQ%d non-probeable\n", irq);
 
@@ -635,7 +636,7 @@ void __init set_irq_probe(unsigned int irq)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	desc = __irq_to_desc(irq);
+	desc = irq_to_desc(irq);
 	if (!desc) {
 		printk(KERN_ERR "Trying to mark IRQ%d probeable\n", irq);
 
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index e1d787e9169b..d44e3515eae1 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -151,7 +151,7 @@ early_param("nr_irq_desc", parse_nr_irq_desc);
 struct irq_desc *sparse_irqs;
 DEFINE_DYN_ARRAY(sparse_irqs, sizeof(struct irq_desc), nr_irq_desc, PAGE_SIZE, init_work);
 
-struct irq_desc *__irq_to_desc(unsigned int irq)
+struct irq_desc *irq_to_desc(unsigned int irq)
 {
 	struct irq_desc *desc;
 
@@ -169,7 +169,7 @@ struct irq_desc *__irq_to_desc(unsigned int irq)
 	}
 	return NULL;
 }
-struct irq_desc *irq_to_desc(unsigned int irq)
+struct irq_desc *irq_to_desc_alloc(unsigned int irq)
 {
 	struct irq_desc *desc, *desc_pri;
 	int i;
@@ -186,6 +186,7 @@ struct irq_desc *irq_to_desc(unsigned int irq)
 
 		if (desc->irq == -1U) {
 			desc->irq = irq;
+			printk(KERN_DEBUG "found new irq_desc for irq %d\n", desc->irq);
 			return desc;
 		}
 		desc_pri = desc;
@@ -236,21 +237,7 @@ struct irq_desc *irq_to_desc(unsigned int irq)
 
 	desc->irq = irq;
 	desc_pri->next = desc;
-	{
-		/* double check if some one mess up the list */
-		struct irq_desc *desc;
-		int count = 0;
-
-		desc = &sparse_irqs[0];
-		while (desc) {
-			printk(KERN_DEBUG "1 found irq_desc for irq %d\n", desc->irq);
-			if (desc->next)
-				printk(KERN_DEBUG "1 found irq_desc for irq %d and next will be irq %d\n", desc->irq, desc->next->irq);
-			desc = desc->next;
-			count++;
-		}
-		printk(KERN_DEBUG "1 all preallocted %d\n", count);
-	}
+	printk(KERN_DEBUG "1 found new irq_desc for irq %d and pri will be irq %d\n", desc->irq, desc_pri->irq);
 
 	return desc;
 }
@@ -285,7 +272,7 @@ struct irq_desc *irq_to_desc(unsigned int irq)
 
 	return NULL;
 }
-struct irq_desc *__irq_to_desc(unsigned int irq)
+struct irq_desc *irq_to_desc_alloc(unsigned int irq)
 {
 	return irq_to_desc(irq);
 }
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index c0b4d4df6de2..6df49218632a 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -31,7 +31,7 @@ cpumask_t irq_default_affinity = CPU_MASK_ALL;
  */
 void synchronize_irq(unsigned int irq)
 {
-	struct irq_desc *desc = __irq_to_desc(irq);
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned int status;
 
 	if (!desc)
@@ -145,7 +145,7 @@ void disable_irq_nosync(unsigned int irq)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	desc = __irq_to_desc(irq);
+	desc = irq_to_desc(irq);
 	if (!desc)
 		return;
 
@@ -174,7 +174,7 @@ void disable_irq(unsigned int irq)
 {
 	struct irq_desc *desc;
 
-	desc = __irq_to_desc(irq);
+	desc = irq_to_desc(irq);
 	if (!desc)
 		return;
 
@@ -218,7 +218,7 @@ void enable_irq(unsigned int irq)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	desc = __irq_to_desc(irq);
+	desc = irq_to_desc(irq);
 	if (!desc)
 		return;
 
@@ -296,7 +296,7 @@ int can_request_irq(unsigned int irq, unsigned long irqflags)
 	struct irq_desc *desc;
 	struct irqaction *action;
 
-	desc = __irq_to_desc(irq);
+	desc = irq_to_desc(irq);
 	if (!desc)
 		return 0;
 
@@ -366,7 +366,7 @@ int setup_irq(unsigned int irq, struct irqaction *new)
 	int shared = 0;
 	int ret;
 
-	desc = __irq_to_desc(irq);
+	desc = irq_to_desc(irq);
 	if (!desc)
 		return -EINVAL;
 
@@ -527,7 +527,7 @@ void free_irq(unsigned int irq, void *dev_id)
 
 	WARN_ON(in_interrupt());
 
-	desc = __irq_to_desc(irq);
+	desc = irq_to_desc(irq);
 	if (!desc)
 		return;
 
@@ -644,7 +644,7 @@ int request_irq(unsigned int irq, irq_handler_t handler,
 	if ((irqflags & IRQF_SHARED) && !dev_id)
 		return -EINVAL;
 
-	desc = __irq_to_desc(irq);
+	desc = irq_to_desc(irq);
 	if (!desc)
 		return -EINVAL;
 
-- 
GitLab


From 67fb283e148e9bd761f73691d3173b6eab9ba8db Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:18 -0700
Subject: [PATCH 409/895] irq: separate sparse_irqs from sparse_irqs_free

so later don't need compare with -1U

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irq.h |   2 +-
 kernel/irq/handle.c | 115 +++++++++++++++++++++++---------------------
 2 files changed, 62 insertions(+), 55 deletions(-)

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 5fe1b01c11fe..d5749852ee69 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -211,7 +211,7 @@ extern struct irq_desc *irq_desc;
 
 extern struct irq_desc *sparse_irqs;
 #define for_each_irq_desc(irqX, desc)		\
-	for (desc = sparse_irqs, irqX = desc->irq; desc && irqX != -1U; desc = desc->next, irqX = desc ? desc->irq : -1U)
+	for (desc = sparse_irqs, irqX = desc->irq; desc; desc = desc->next, irqX = desc ? desc->irq : -1U)
 
 #endif
 
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index d44e3515eae1..6d174390f3a0 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -112,6 +112,11 @@ static void init_kstat_irqs(struct irq_desc *desc, int nr_desc, int nr)
 	}
 }
 
+#ifdef CONFIG_HAVE_SPARSE_IRQ
+static struct irq_desc *sparse_irqs_free;
+struct irq_desc *sparse_irqs;
+#endif
+
 static void __init init_work(void *data)
 {
 	struct dyn_array *da = data;
@@ -127,13 +132,16 @@ static void __init init_work(void *data)
 #endif
 	}
 
+	/* init kstat_irqs, nr_cpu_ids is ready already */
+	init_kstat_irqs(desc, *da->nr, nr_cpu_ids);
+
 #ifdef CONFIG_HAVE_SPARSE_IRQ
 	for (i = 1; i < *da->nr; i++)
 		desc[i-1].next = &desc[i];
-#endif
 
-	/* init kstat_irqs, nr_cpu_ids is ready already */
-	init_kstat_irqs(desc, *da->nr, nr_cpu_ids);
+	sparse_irqs_free = sparse_irqs;
+	sparse_irqs = NULL;
+#endif
 }
 
 #ifdef CONFIG_HAVE_SPARSE_IRQ
@@ -148,23 +156,17 @@ static int __init parse_nr_irq_desc(char *arg)
 
 early_param("nr_irq_desc", parse_nr_irq_desc);
 
-struct irq_desc *sparse_irqs;
 DEFINE_DYN_ARRAY(sparse_irqs, sizeof(struct irq_desc), nr_irq_desc, PAGE_SIZE, init_work);
 
 struct irq_desc *irq_to_desc(unsigned int irq)
 {
 	struct irq_desc *desc;
 
-	BUG_ON(irq == -1U);
-
-	desc = &sparse_irqs[0];
+	desc = sparse_irqs;
 	while (desc) {
 		if (desc->irq == irq)
 			return desc;
 
-		if (desc->irq == -1U)
-			return NULL;
-
 		desc = desc->next;
 	}
 	return NULL;
@@ -174,21 +176,12 @@ struct irq_desc *irq_to_desc_alloc(unsigned int irq)
 	struct irq_desc *desc, *desc_pri;
 	int i;
 	int count = 0;
-	unsigned long phys;
-	unsigned long total_bytes;
 
-	BUG_ON(irq == -1U);
-
-	desc_pri = desc = &sparse_irqs[0];
+	desc_pri = desc = sparse_irqs;
 	while (desc) {
 		if (desc->irq == irq)
 			return desc;
 
-		if (desc->irq == -1U) {
-			desc->irq = irq;
-			printk(KERN_DEBUG "found new irq_desc for irq %d\n", desc->irq);
-			return desc;
-		}
 		desc_pri = desc;
 		desc = desc->next;
 		count++;
@@ -197,48 +190,62 @@ struct irq_desc *irq_to_desc_alloc(unsigned int irq)
 	/*
 	 *  we run out of pre-allocate ones, allocate more
 	 */
-	printk(KERN_DEBUG "try to get more irq_desc %d\n", nr_irq_desc);
-	{
-		/* double check if some one mess up the list */
-		struct irq_desc *desc;
-		int count = 0;
-
-		desc = &sparse_irqs[0];
-		while (desc) {
-			printk(KERN_DEBUG "found irq_desc for irq %d\n", desc->irq);
-			if (desc->next)
-				printk(KERN_DEBUG "found irq_desc for irq %d and next will be irq %d\n", desc->irq, desc->next->irq);
-			desc = desc->next;
-			count++;
-		}
-		printk(KERN_DEBUG "all preallocted %d\n", count);
-	}
+	if (!sparse_irqs_free) {
+		unsigned long phys;
+		unsigned long total_bytes;
 
-	total_bytes = sizeof(struct irq_desc) * nr_irq_desc;
-	if (after_bootmem)
-		desc = kzalloc(total_bytes, GFP_ATOMIC);
-	else
-		desc = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0);
+		printk(KERN_DEBUG "try to get more irq_desc %d\n", nr_irq_desc);
 
-	if (!desc)
-		panic("please boot with nr_irq_desc= %d\n", count * 2);
+		total_bytes = sizeof(struct irq_desc) * nr_irq_desc;
+		if (after_bootmem)
+			desc = kzalloc(total_bytes, GFP_ATOMIC);
+		else
+			desc = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0);
 
-	phys = __pa(desc);
-	printk(KERN_DEBUG "irq_desc ==> [%#lx - %#lx]\n", phys, phys + total_bytes);
+		if (!desc)
+			panic("please boot with nr_irq_desc= %d\n", count * 2);
 
-	for (i = 0; i < nr_irq_desc; i++)
-		init_one_irq_desc(&desc[i]);
+		phys = __pa(desc);
+		printk(KERN_DEBUG "irq_desc ==> [%#lx - %#lx]\n", phys, phys + total_bytes);
 
-	for (i = 1; i < nr_irq_desc; i++)
-		desc[i-1].next = &desc[i];
+		for (i = 0; i < nr_irq_desc; i++)
+			init_one_irq_desc(&desc[i]);
 
-	/* init kstat_irqs, nr_cpu_ids is ready already */
-	init_kstat_irqs(desc, nr_irq_desc, nr_cpu_ids);
+		for (i = 1; i < nr_irq_desc; i++)
+			desc[i-1].next = &desc[i];
 
-	desc->irq = irq;
-	desc_pri->next = desc;
-	printk(KERN_DEBUG "1 found new irq_desc for irq %d and pri will be irq %d\n", desc->irq, desc_pri->irq);
+		/* init kstat_irqs, nr_cpu_ids is ready already */
+		init_kstat_irqs(desc, nr_irq_desc, nr_cpu_ids);
 
+		sparse_irqs_free = desc;
+	}
+
+	desc = sparse_irqs_free;
+	sparse_irqs_free = sparse_irqs_free->next;
+	desc->next = NULL;
+	if (desc_pri)
+		desc_pri->next = desc;
+	else
+		sparse_irqs = desc;
+	desc->irq = irq;
+	printk(KERN_DEBUG "found new irq_desc for irq %d\n", desc->irq);
+#ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG
+	{
+		/* dump the results */
+		struct irq_desc *desc;
+		unsigned long phys;
+		unsigned long bytes = sizeof(struct irq_desc);
+		unsigned int irqx;
+
+		printk(KERN_DEBUG "=========================== %d\n", irq);
+		printk(KERN_DEBUG "irq_desc dump after get that for %d\n", irq);
+		for_each_irq_desc(irqx, desc) {
+			phys = __pa(desc);
+			printk(KERN_DEBUG "irq_desc %d ==> [%#lx - %#lx]\n", irqx, phys, phys + bytes);
+		}
+		printk(KERN_DEBUG "===========================\n");
+	}
+#endif
 	return desc;
 }
 #else
-- 
GitLab


From a2f9f43858db64cb8b45c4f6746d7a52b80d4dcb Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:19 -0700
Subject: [PATCH 410/895] x86_64: separate irq_cfgx from irq_cfgx_free

so later don't need to compare with -1U

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_64.c | 92 +++++++++++++++++++++++-------------
 1 file changed, 59 insertions(+), 33 deletions(-)

diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index a054db9ef190..8ab7ae01773f 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -111,44 +111,44 @@ static void init_one_irq_cfg(struct irq_cfg *cfg)
 	memcpy(cfg, &irq_cfg_init, sizeof(struct irq_cfg));
 }
 
+static struct irq_cfg *irq_cfgx;
+static struct irq_cfg *irq_cfgx_free;
 static void __init init_work(void *data)
 {
 	struct dyn_array *da = data;
 	struct irq_cfg *cfg;
+	int legacy_count;
 	int i;
 
 	cfg = *da->name;
 
 	memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy));
 
-	i = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]);
-	for (; i < *da->nr; i++)
+	legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]);
+	for (i = legacy_count; i < *da->nr; i++)
 		init_one_irq_cfg(&cfg[i]);
 
 	for (i = 1; i < *da->nr; i++)
 		cfg[i-1].next = &cfg[i];
+
+	irq_cfgx_free = &irq_cfgx[legacy_count];
+	irq_cfgx[legacy_count - 1].next = NULL;
 }
 
 #define for_each_irq_cfg(cfg)		\
-	for (cfg = irq_cfgx; cfg && cfg->irq != -1U; cfg = cfg->next)
+	for (cfg = irq_cfgx; cfg; cfg = cfg->next)
 
-static struct irq_cfg *irq_cfgx;
 DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work);
 
 static struct irq_cfg *irq_cfg(unsigned int irq)
 {
 	struct irq_cfg *cfg;
 
-	BUG_ON(irq == -1U);
-
-	cfg = &irq_cfgx[0];
+	cfg = irq_cfgx;
 	while (cfg) {
 		if (cfg->irq == irq)
 			return cfg;
 
-		if (cfg->irq == -1U)
-			return NULL;
-
 		cfg = cfg->next;
 	}
 
@@ -161,44 +161,70 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
 	int i;
 	int count = 0;
 
-	BUG_ON(irq == -1U);
-
-	cfg_pri = cfg = &irq_cfgx[0];
+	cfg_pri = cfg = irq_cfgx;
 	while (cfg) {
 		if (cfg->irq == irq)
 			return cfg;
 
-		if (cfg->irq == -1U) {
-			cfg->irq = irq;
-			return cfg;
-		}
 		cfg_pri = cfg;
 		cfg = cfg->next;
 		count++;
 	}
 
-	/*
-	 *  we run out of pre-allocate ones, allocate more
-	 */
-	printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg);
+	if (!irq_cfgx_free) {
+		unsigned long phys;
+		unsigned long total_bytes;
+		/*
+		 *  we run out of pre-allocate ones, allocate more
+		 */
+		printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg);
 
-	if (after_bootmem)
-		cfg = kzalloc(sizeof(struct irq_cfg)*nr_irq_cfg, GFP_ATOMIC);
-	else
-		cfg = __alloc_bootmem_nopanic(sizeof(struct irq_cfg)*nr_irq_cfg, PAGE_SIZE, 0);
+		total_bytes = sizeof(struct irq_cfg) * nr_irq_cfg;
+		if (after_bootmem)
+			cfg = kzalloc(total_bytes, GFP_ATOMIC);
+		else
+			cfg = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0);
 
-	if (!cfg)
-		panic("please boot with nr_irq_cfg= %d\n", count * 2);
+		if (!cfg)
+			panic("please boot with nr_irq_cfg= %d\n", count * 2);
 
-	for (i = 0; i < nr_irq_cfg; i++)
-		init_one_irq_cfg(&cfg[i]);
+		phys = __pa(cfg);
+		printk(KERN_DEBUG "irq_irq ==> [%#lx - %#lx]\n", phys, phys + total_bytes);
 
-	for (i = 1; i < nr_irq_cfg; i++)
-		cfg[i-1].next = &cfg[i];
+		for (i = 0; i < nr_irq_cfg; i++)
+			init_one_irq_cfg(&cfg[i]);
 
-	cfg->irq = irq;
-	cfg_pri->next = cfg;
+		for (i = 1; i < nr_irq_cfg; i++)
+			cfg[i-1].next = &cfg[i];
+
+		irq_cfgx_free = cfg;
+	}
 
+	cfg = irq_cfgx_free;
+	irq_cfgx_free = irq_cfgx_free->next;
+	cfg->next = NULL;
+	if (cfg_pri)
+		cfg_pri->next = cfg;
+	else
+		irq_cfgx = cfg;
+	cfg->irq = irq;
+	printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq);
+#ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG
+	{
+		/* dump the results */
+		struct irq_cfg *cfg;
+		unsigned long phys;
+		unsigned long bytes = sizeof(struct irq_cfg);
+
+		printk(KERN_DEBUG "=========================== %d\n", irq);
+		printk(KERN_DEBUG "irq_cfg dump after get that for %d\n", irq);
+		for_each_irq_cfg(cfg) {
+			phys = __pa(cfg);
+			printk(KERN_DEBUG "irq_cfg %d ==> [%#lx - %#lx]\n", cfg->irq, phys, phys + bytes);
+		}
+		printk(KERN_DEBUG "===========================\n");
+	}
+#endif
 	return cfg;
 }
 
-- 
GitLab


From 52b17329d6d0a4824b89206803a430915031ff23 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:20 -0700
Subject: [PATCH 411/895] x86_64: make /proc/interrupts work with dyn irq_desc

loop with irq_desc list

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/irq_64.c | 31 ++++++++++++++++++++++++-------
 fs/proc/proc_misc.c      | 28 ++++++++++++++++++++++++----
 2 files changed, 48 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 5d5976e0311a..7bd841a9c640 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -70,9 +70,27 @@ static inline void stack_overflow_check(struct pt_regs *regs)
 
 int show_interrupts(struct seq_file *p, void *v)
 {
-	int i = *(loff_t *) v, j;
+	int i, j;
 	struct irqaction * action;
 	unsigned long flags;
+	unsigned int entries;
+	struct irq_desc *desc;
+	int tail = 0;
+
+#ifdef CONFIG_HAVE_SPARSE_IRQ
+	desc = (struct irq_desc *)v;
+	entries = -1U;
+	i = desc->irq;
+	if (!desc->next)
+		tail = 1;
+#else
+	entries = nr_irqs - 1;
+	i = *(loff_t *) v;
+	if (i == nr_irqs)
+		tail = 1;
+	else
+		desc = irq_to_desc(i);
+#endif
 
 	if (i == 0) {
 		seq_printf(p, "           ");
@@ -81,12 +99,8 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_putc(p, '\n');
 	}
 
-	if (i < nr_irqs) {
+	if (i <= entries) {
 		unsigned any_count = 0;
-		struct irq_desc *desc = irq_to_desc(i);
-
-		if (!desc)
-			return 0;
 
 		spin_lock_irqsave(&desc->lock, flags);
 #ifndef CONFIG_SMP
@@ -116,7 +130,9 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_putc(p, '\n');
 skip:
 		spin_unlock_irqrestore(&desc->lock, flags);
-	} else if (i == nr_irqs) {
+	}
+
+	if (tail) {
 		seq_printf(p, "NMI: ");
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count);
@@ -155,6 +171,7 @@ skip:
 		seq_printf(p, "  Spurious interrupts\n");
 		seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
 	}
+
 	return 0;
 }
 
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index c3cbabe8b38e..72dd739a7f8a 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -645,15 +645,36 @@ static const struct file_operations proc_stat_operations = {
  */
 static void *int_seq_start(struct seq_file *f, loff_t *pos)
 {
+#ifdef CONFIG_HAVE_SPARSE_IRQ
+	struct irq_desc *desc;
+	int irq;
+	int count = *pos;
+
+	for_each_irq_desc(irq, desc) {
+		if (count-- == 0)
+			return desc;
+	}
+
+	return NULL;
+#else
 	return (*pos <= nr_irqs) ? pos : NULL;
+#endif
 }
 
+
 static void *int_seq_next(struct seq_file *f, void *v, loff_t *pos)
 {
+#ifdef CONFIG_HAVE_SPARSE_IRQ
+	struct irq_desc *desc;
+
+	desc = ((struct irq_desc *)v)->next;
 	(*pos)++;
-	if (*pos > nr_irqs)
-		return NULL;
-	return pos;
+
+	return desc;
+#else
+	(*pos)++;
+	return (*pos <= nr_irqs) ? pos : NULL;
+#endif
 }
 
 static void int_seq_stop(struct seq_file *f, void *v)
@@ -661,7 +682,6 @@ static void int_seq_stop(struct seq_file *f, void *v)
 	/* Nothing to do */
 }
 
-
 static const struct seq_operations int_seq_ops = {
 	.start = int_seq_start,
 	.next  = int_seq_next,
-- 
GitLab


From e420dfb40c453a9760b86c7f338052bdb4dfa755 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:21 -0700
Subject: [PATCH 412/895] x86: put irq_2_iommu pointer into irq_desc

when CONFIG_HAVE_SPARSE_IRQ
preallocate some irq_2_iommu entries, and use get_one_free_irq_2_iomm to
get new one and link to irq_desc if needed.

else will use dyn_array or static array.

v2: <= nr_irqs fix

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/pci/intr_remapping.c | 213 +++++++++++++++++++++++++++--------
 include/linux/irq.h          |   4 +
 2 files changed, 169 insertions(+), 48 deletions(-)

diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index 6961be807684..23372c811159 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -19,41 +19,136 @@ struct irq_2_iommu {
 	u8  irte_mask;
 };
 
-#ifdef CONFIG_HAVE_DYNA_ARRAY
-static struct irq_2_iommu *irq_2_iommu;
-DEFINE_DYN_ARRAY(irq_2_iommu, sizeof(struct irq_2_iommu), nr_irqs, PAGE_SIZE, NULL);
+#ifdef CONFIG_HAVE_SPARSE_IRQ
+static struct irq_2_iommu *irq_2_iommuX;
+/* fill one page ? */
+static int nr_irq_2_iommu = 0x100;
+static int irq_2_iommu_index;
+DEFINE_DYN_ARRAY(irq_2_iommuX, sizeof(struct irq_2_iommu), nr_irq_2_iommu, PAGE_SIZE, NULL);
+
+extern void *__alloc_bootmem_nopanic(unsigned long size,
+				     unsigned long align,
+				     unsigned long goal);
+
+static struct irq_2_iommu *get_one_free_irq_2_iommu(int not_used)
+{
+	struct irq_2_iommu *iommu;
+	unsigned long total_bytes;
+
+	if (irq_2_iommu_index >= nr_irq_2_iommu) {
+		/*
+		 *  we run out of pre-allocate ones, allocate more
+		 */
+		printk(KERN_DEBUG "try to get more irq_2_iommu %d\n", nr_irq_2_iommu);
+
+		total_bytes = sizeof(struct irq_2_iommu)*nr_irq_2_iommu;
+
+		if (after_bootmem)
+			iommu = kzalloc(total_bytes, GFP_ATOMIC);
+		else
+			iommu = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0);
+
+		if (!iommu)
+			panic("can not get more irq_2_iommu\n");
+
+		irq_2_iommuX = iommu;
+		irq_2_iommu_index = 0;
+	}
+
+	iommu = &irq_2_iommuX[irq_2_iommu_index];
+	irq_2_iommu_index++;
+	return iommu;
+}
+
+static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
+{
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+
+	BUG_ON(!desc);
+
+	return desc->irq_2_iommu;
+}
+
+static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq)
+{
+	struct irq_desc *desc;
+	struct irq_2_iommu *irq_iommu;
+
+	desc = irq_to_desc(irq);
+
+	BUG_ON(!desc);
+
+	irq_iommu = desc->irq_2_iommu;
+
+	if (!irq_iommu)
+		desc->irq_2_iommu = get_one_free_irq_2_iommu(irq);
+
+	return desc->irq_2_iommu;
+}
+
+#else /* !CONFIG_HAVE_SPARSE_IRQ */
+
+#ifdef CONFIG_HAVE_DYN_ARRAY
+static struct irq_2_iommu *irq_2_iommuX;
+DEFINE_DYN_ARRAY(irq_2_iommuX, sizeof(struct irq_2_iommu), nr_irqs, PAGE_SIZE, NULL);
 #else
-static struct irq_2_iommu irq_2_iommu[NR_IRQS];
+static struct irq_2_iommu irq_2_iommuX[NR_IRQS];
+#endif
+
+static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
+{
+	if (irq < nr_irqs)
+		return &irq_2_iommuX[irq];
+
+	return NULL;
+}
+static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq)
+{
+	return irq_2_iommu(irq);
+}
 #endif
 
 static DEFINE_SPINLOCK(irq_2_ir_lock);
 
-int irq_remapped(int irq)
+static struct irq_2_iommu *valid_irq_2_iommu(unsigned int irq)
 {
-	if (irq > nr_irqs)
-		return 0;
+	struct irq_2_iommu *irq_iommu;
+
+	irq_iommu = irq_2_iommu(irq);
 
-	if (!irq_2_iommu[irq].iommu)
-		return 0;
+	if (!irq_iommu)
+		return NULL;
 
-	return 1;
+	if (!irq_iommu->iommu)
+		return NULL;
+
+	return irq_iommu;
+}
+
+int irq_remapped(int irq)
+{
+	return valid_irq_2_iommu(irq) != NULL;
 }
 
 int get_irte(int irq, struct irte *entry)
 {
 	int index;
+	struct irq_2_iommu *irq_iommu;
 
-	if (!entry || irq > nr_irqs)
+	if (!entry)
 		return -1;
 
 	spin_lock(&irq_2_ir_lock);
-	if (!irq_2_iommu[irq].iommu) {
+	irq_iommu = valid_irq_2_iommu(irq);
+	if (!irq_iommu) {
 		spin_unlock(&irq_2_ir_lock);
 		return -1;
 	}
 
-	index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
-	*entry = *(irq_2_iommu[irq].iommu->ir_table->base + index);
+	index = irq_iommu->irte_index + irq_iommu->sub_handle;
+	*entry = *(irq_iommu->iommu->ir_table->base + index);
 
 	spin_unlock(&irq_2_ir_lock);
 	return 0;
@@ -62,6 +157,7 @@ int get_irte(int irq, struct irte *entry)
 int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
 {
 	struct ir_table *table = iommu->ir_table;
+	struct irq_2_iommu *irq_iommu;
 	u16 index, start_index;
 	unsigned int mask = 0;
 	int i;
@@ -69,6 +165,12 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
 	if (!count)
 		return -1;
 
+#ifndef	CONFIG_HAVE_SPARSE_IRQ
+	/* protect irq_2_iommu_alloc later */
+	if (irq >= nr_irqs)
+		return -1;
+#endif
+
 	/*
 	 * start the IRTE search from index 0.
 	 */
@@ -108,10 +210,11 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
 	for (i = index; i < index + count; i++)
 		table->base[i].present = 1;
 
-	irq_2_iommu[irq].iommu = iommu;
-	irq_2_iommu[irq].irte_index =  index;
-	irq_2_iommu[irq].sub_handle = 0;
-	irq_2_iommu[irq].irte_mask = mask;
+	irq_iommu = irq_2_iommu_alloc(irq);
+	irq_iommu->iommu = iommu;
+	irq_iommu->irte_index =  index;
+	irq_iommu->sub_handle = 0;
+	irq_iommu->irte_mask = mask;
 
 	spin_unlock(&irq_2_ir_lock);
 
@@ -132,31 +235,36 @@ static void qi_flush_iec(struct intel_iommu *iommu, int index, int mask)
 int map_irq_to_irte_handle(int irq, u16 *sub_handle)
 {
 	int index;
+	struct irq_2_iommu *irq_iommu;
 
 	spin_lock(&irq_2_ir_lock);
-	if (irq >= nr_irqs || !irq_2_iommu[irq].iommu) {
+	irq_iommu = valid_irq_2_iommu(irq);
+	if (!irq_iommu) {
 		spin_unlock(&irq_2_ir_lock);
 		return -1;
 	}
 
-	*sub_handle = irq_2_iommu[irq].sub_handle;
-	index = irq_2_iommu[irq].irte_index;
+	*sub_handle = irq_iommu->sub_handle;
+	index = irq_iommu->irte_index;
 	spin_unlock(&irq_2_ir_lock);
 	return index;
 }
 
 int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
 {
+	struct irq_2_iommu *irq_iommu;
+
 	spin_lock(&irq_2_ir_lock);
-	if (irq >= nr_irqs || irq_2_iommu[irq].iommu) {
+	irq_iommu = valid_irq_2_iommu(irq);
+	if (!irq_iommu) {
 		spin_unlock(&irq_2_ir_lock);
 		return -1;
 	}
 
-	irq_2_iommu[irq].iommu = iommu;
-	irq_2_iommu[irq].irte_index = index;
-	irq_2_iommu[irq].sub_handle = subhandle;
-	irq_2_iommu[irq].irte_mask = 0;
+	irq_iommu->iommu = iommu;
+	irq_iommu->irte_index = index;
+	irq_iommu->sub_handle = subhandle;
+	irq_iommu->irte_mask = 0;
 
 	spin_unlock(&irq_2_ir_lock);
 
@@ -165,16 +273,19 @@ int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
 
 int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index)
 {
+	struct irq_2_iommu *irq_iommu;
+
 	spin_lock(&irq_2_ir_lock);
-	if (irq >= nr_irqs || !irq_2_iommu[irq].iommu) {
+	irq_iommu = valid_irq_2_iommu(irq);
+	if (!irq_iommu) {
 		spin_unlock(&irq_2_ir_lock);
 		return -1;
 	}
 
-	irq_2_iommu[irq].iommu = NULL;
-	irq_2_iommu[irq].irte_index = 0;
-	irq_2_iommu[irq].sub_handle = 0;
-	irq_2_iommu[irq].irte_mask = 0;
+	irq_iommu->iommu = NULL;
+	irq_iommu->irte_index = 0;
+	irq_iommu->sub_handle = 0;
+	irq_2_iommu(irq)->irte_mask = 0;
 
 	spin_unlock(&irq_2_ir_lock);
 
@@ -186,16 +297,18 @@ int modify_irte(int irq, struct irte *irte_modified)
 	int index;
 	struct irte *irte;
 	struct intel_iommu *iommu;
+	struct irq_2_iommu *irq_iommu;
 
 	spin_lock(&irq_2_ir_lock);
-	if (irq >= nr_irqs || !irq_2_iommu[irq].iommu) {
+	irq_iommu = valid_irq_2_iommu(irq);
+	if (!irq_iommu) {
 		spin_unlock(&irq_2_ir_lock);
 		return -1;
 	}
 
-	iommu = irq_2_iommu[irq].iommu;
+	iommu = irq_iommu->iommu;
 
-	index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
+	index = irq_iommu->irte_index + irq_iommu->sub_handle;
 	irte = &iommu->ir_table->base[index];
 
 	set_64bit((unsigned long *)irte, irte_modified->low | (1 << 1));
@@ -211,18 +324,20 @@ int flush_irte(int irq)
 {
 	int index;
 	struct intel_iommu *iommu;
+	struct irq_2_iommu *irq_iommu;
 
 	spin_lock(&irq_2_ir_lock);
-	if (irq >= nr_irqs || !irq_2_iommu[irq].iommu) {
+	irq_iommu = valid_irq_2_iommu(irq);
+	if (!irq_iommu) {
 		spin_unlock(&irq_2_ir_lock);
 		return -1;
 	}
 
-	iommu = irq_2_iommu[irq].iommu;
+	iommu = irq_iommu->iommu;
 
-	index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
+	index = irq_iommu->irte_index + irq_iommu->sub_handle;
 
-	qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask);
+	qi_flush_iec(iommu, index, irq_iommu->irte_mask);
 	spin_unlock(&irq_2_ir_lock);
 
 	return 0;
@@ -254,28 +369,30 @@ int free_irte(int irq)
 	int index, i;
 	struct irte *irte;
 	struct intel_iommu *iommu;
+	struct irq_2_iommu *irq_iommu;
 
 	spin_lock(&irq_2_ir_lock);
-	if (irq >= nr_irqs || !irq_2_iommu[irq].iommu) {
+	irq_iommu = valid_irq_2_iommu(irq);
+	if (!irq_iommu) {
 		spin_unlock(&irq_2_ir_lock);
 		return -1;
 	}
 
-	iommu = irq_2_iommu[irq].iommu;
+	iommu = irq_iommu->iommu;
 
-	index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
+	index = irq_iommu->irte_index + irq_iommu->sub_handle;
 	irte = &iommu->ir_table->base[index];
 
-	if (!irq_2_iommu[irq].sub_handle) {
-		for (i = 0; i < (1 << irq_2_iommu[irq].irte_mask); i++)
+	if (!irq_iommu->sub_handle) {
+		for (i = 0; i < (1 << irq_iommu->irte_mask); i++)
 			set_64bit((unsigned long *)irte, 0);
-		qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask);
+		qi_flush_iec(iommu, index, irq_iommu->irte_mask);
 	}
 
-	irq_2_iommu[irq].iommu = NULL;
-	irq_2_iommu[irq].irte_index = 0;
-	irq_2_iommu[irq].sub_handle = 0;
-	irq_2_iommu[irq].irte_mask = 0;
+	irq_iommu->iommu = NULL;
+	irq_iommu->irte_index = 0;
+	irq_iommu->sub_handle = 0;
+	irq_iommu->irte_mask = 0;
 
 	spin_unlock(&irq_2_ir_lock);
 
diff --git a/include/linux/irq.h b/include/linux/irq.h
index d5749852ee69..788d5a35a580 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -128,6 +128,7 @@ struct irq_chip {
 };
 
 struct timer_rand_state;
+struct irq_2_iommu;
 /**
  * struct irq_desc - interrupt descriptor
  *
@@ -162,6 +163,9 @@ struct irq_desc {
 	unsigned int            *kstat_irqs;
 #else
 	unsigned int            kstat_irqs[NR_CPUS];
+#endif
+#if defined(CONFIG_INTR_REMAP) && defined(CONFIG_HAVE_SPARSE_IRQ)
+       struct irq_2_iommu      *irq_2_iommu;
 #endif
 	irq_flow_handler_t	handle_irq;
 	struct irq_chip		*chip;
-- 
GitLab


From 6d50bc26836e16a9589e0b128d527c29e30d722a Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:22 -0700
Subject: [PATCH 413/895] x86: use 28 bits irq NR for pci msi/msix and ht

also print out irq no in /proc/interrups and /proc/stat in hex, so could
tell bus/dev/func.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_64.c | 64 ++++++++++++++++++++++++++++--------
 arch/x86/kernel/irq_64.c     |  2 +-
 drivers/pci/htirq.c          | 22 +++++++++++--
 fs/proc/proc_misc.c          |  2 +-
 include/linux/irq.h          |  1 +
 5 files changed, 73 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 8ab7ae01773f..b0d4abc55a11 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -2520,17 +2520,21 @@ device_initcall(ioapic_init_sysfs);
 /*
  * Dynamic irq allocate and deallocation
  */
-int create_irq(void)
+unsigned int create_irq_nr(unsigned int irq_want)
 {
 	/* Allocate an unused irq */
-	int irq;
-	int new;
+	unsigned int irq;
+	unsigned int new;
 	unsigned long flags;
 	struct irq_cfg *cfg_new;
 
-	irq = -ENOSPC;
+#ifndef CONFIG_HAVE_SPARSE_IRQ
+	irq_want = nr_irqs - 1;
+#endif
+
+	irq = 0;
 	spin_lock_irqsave(&vector_lock, flags);
-	for (new = (nr_irqs - 1); new >= 0; new--) {
+	for (new = irq_want; new > 0; new--) {
 		if (platform_legacy_irq(new))
 			continue;
 		cfg_new = irq_cfg(new);
@@ -2545,12 +2549,24 @@ int create_irq(void)
 	}
 	spin_unlock_irqrestore(&vector_lock, flags);
 
-	if (irq >= 0) {
+	if (irq > 0) {
 		dynamic_irq_init(irq);
 	}
 	return irq;
 }
 
+int create_irq(void)
+{
+	int irq;
+
+	irq = create_irq_nr(nr_irqs - 1);
+
+	if (irq == 0)
+		irq = -1;
+
+	return irq;
+}
+
 void destroy_irq(unsigned int irq)
 {
 	unsigned long flags;
@@ -2803,13 +2819,29 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
 	return 0;
 }
 
+static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
+{
+	unsigned int irq;
+
+	irq = dev->bus->number;
+	irq <<= 8;
+	irq |= dev->devfn;
+	irq <<= 12;
+
+	return irq;
+}
+
 int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 {
-	int irq, ret;
+	unsigned int irq;
+	int ret;
+	unsigned int irq_want;
 
-	irq = create_irq();
-	if (irq < 0)
-		return irq;
+	irq_want = build_irq_for_pci_dev(dev) + 0x100;
+
+	irq = create_irq_nr(irq_want);
+	if (irq == 0)
+		return -1;
 
 #ifdef CONFIG_INTR_REMAP
 	if (!intr_remapping_enabled)
@@ -2836,18 +2868,22 @@ error:
 
 int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 {
-	int irq, ret, sub_handle;
+	unsigned int irq;
+	int ret, sub_handle;
 	struct msi_desc *desc;
+	unsigned int irq_want;
+
 #ifdef CONFIG_INTR_REMAP
 	struct intel_iommu *iommu = 0;
 	int index = 0;
 #endif
 
+	irq_want = build_irq_for_pci_dev(dev) + 0x100;
 	sub_handle = 0;
 	list_for_each_entry(desc, &dev->msi_list, list) {
-		irq = create_irq();
-		if (irq < 0)
-			return irq;
+		irq = create_irq_nr(irq_want--);
+		if (irq == 0)
+			return -1;
 #ifdef CONFIG_INTR_REMAP
 		if (!intr_remapping_enabled)
 			goto no_ir;
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 7bd841a9c640..348a11168c2b 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -112,7 +112,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		action = desc->action;
 		if (!action && !any_count)
 			goto skip;
-		seq_printf(p, "%3d: ",i);
+		seq_printf(p, "%#x: ",i);
 #ifndef CONFIG_SMP
 		seq_printf(p, "%10u ", kstat_irqs(i));
 #else
diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c
index 279c940a0039..7c5aef13fcdb 100644
--- a/drivers/pci/htirq.c
+++ b/drivers/pci/htirq.c
@@ -82,6 +82,18 @@ void unmask_ht_irq(unsigned int irq)
 	write_ht_irq_msg(irq, &msg);
 }
 
+static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
+{
+	unsigned int irq;
+
+	irq = dev->bus->number;
+	irq <<= 8;
+	irq |= dev->devfn;
+	irq <<= 12;
+
+	return irq;
+}
+
 /**
  * __ht_create_irq - create an irq and attach it to a device.
  * @dev: The hypertransport device to find the irq capability on.
@@ -97,7 +109,8 @@ int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update)
 	u32 data;
 	int max_irq;
 	int pos;
-	int irq;
+	unsigned int irq;
+	unsigned int irq_want;
 
 	pos = pci_find_ht_capability(dev, HT_CAPTYPE_IRQ);
 	if (!pos)
@@ -125,8 +138,13 @@ int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update)
 	cfg->msg.address_lo = 0xffffffff;
 	cfg->msg.address_hi = 0xffffffff;
 
+	irq_want= build_irq_for_pci_dev(dev);
+#ifdef CONFIG_HAVE_SPARSE_IRQ
+	irq = create_irq_nr(irq_want + idx);
+#else
 	irq = create_irq();
-	if (irq < 0) {
+#endif
+	if (irq == 0) {
 		kfree(cfg);
 		return -EBUSY;
 	}
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 72dd739a7f8a..d68c3592fe4a 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -589,7 +589,7 @@ static int show_stat(struct seq_file *p, void *v)
 		}
 
 #ifdef CONFIG_HAVE_SPARSE_IRQ
-		seq_printf(p, " %u:%u", j, per_irq_sum);
+		seq_printf(p, " %#x:%u", j, per_irq_sum);
 #else
 		seq_printf(p, " %u", per_irq_sum);
 #endif
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 788d5a35a580..704136138dc7 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -399,6 +399,7 @@ extern void set_irq_noprobe(unsigned int irq);
 extern void set_irq_probe(unsigned int irq);
 
 /* Handle dynamic irq creation and destruction */
+extern unsigned int create_irq_nr(unsigned int irq_want);
 extern int create_irq(void);
 extern void destroy_irq(unsigned int irq);
 
-- 
GitLab


From 8b8e8c1bf7275eca859fe551dfa484134eaf013b Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:23 -0700
Subject: [PATCH 414/895] x86: remove irqbalance in kernel for 32 bit

This has been deprecated for years, the user space irqbalanced utility
works better with numa, has configurable policies, etc...

Signed-off-by: Yinghai Lu <yhlu.kernel@gmai.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig                |   8 -
 arch/x86/configs/i386_defconfig |   1 -
 arch/x86/kernel/io_apic_32.c    | 402 --------------------------------
 arch/x86/kernel/quirks.c        |   3 -
 include/linux/irq.h             |  14 +-
 kernel/irq/manage.c             |   3 -
 6 files changed, 3 insertions(+), 428 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1004888e9b13..3e0eaaa1a339 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1254,14 +1254,6 @@ config EFI
   	resultant kernel should continue to boot on existing non-EFI
   	platforms.
 
-config IRQBALANCE
-	def_bool y
-	prompt "Enable kernel irq balancing"
-	depends on X86_32 && SMP && X86_IO_APIC
-	help
-	  The default yes will allow the kernel to do irq load balancing.
-	  Saying no will keep the kernel from doing irq load balancing.
-
 config SECCOMP
 	def_bool y
 	prompt "Enable seccomp to safely compute untrusted bytecode"
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 52d0359719d7..13b8c86ae985 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -287,7 +287,6 @@ CONFIG_MTRR=y
 # CONFIG_MTRR_SANITIZER is not set
 CONFIG_X86_PAT=y
 CONFIG_EFI=y
-# CONFIG_IRQBALANCE is not set
 CONFIG_SECCOMP=y
 # CONFIG_HZ_100 is not set
 # CONFIG_HZ_250 is not set
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 204884b1415a..668edf226067 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -371,408 +371,6 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
-#if defined(CONFIG_IRQBALANCE)
-# include <asm/processor.h>	/* kernel_thread() */
-# include <linux/kernel_stat.h>	/* kstat */
-# include <linux/slab.h>		/* kmalloc() */
-# include <linux/timer.h>
-
-#define IRQBALANCE_CHECK_ARCH -999
-#define MAX_BALANCED_IRQ_INTERVAL	(5*HZ)
-#define MIN_BALANCED_IRQ_INTERVAL	(HZ/2)
-#define BALANCED_IRQ_MORE_DELTA		(HZ/10)
-#define BALANCED_IRQ_LESS_DELTA		(HZ)
-
-static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH;
-static int physical_balance __read_mostly;
-static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL;
-
-static struct irq_cpu_info {
-	unsigned long *last_irq;
-	unsigned long *irq_delta;
-	unsigned long irq;
-} irq_cpu_data[NR_CPUS];
-
-#define CPU_IRQ(cpu)		(irq_cpu_data[cpu].irq)
-#define LAST_CPU_IRQ(cpu, irq)   (irq_cpu_data[cpu].last_irq[irq])
-#define IRQ_DELTA(cpu, irq) 	(irq_cpu_data[cpu].irq_delta[irq])
-
-#define IDLE_ENOUGH(cpu,now) \
-	(idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1))
-
-#define IRQ_ALLOWED(cpu, allowed_mask)	cpu_isset(cpu, allowed_mask)
-
-#define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i)))
-
-static cpumask_t balance_irq_affinity_init __initdata = CPU_MASK_ALL;
-
-static cpumask_t *balance_irq_affinity;
-
-
-static void __init irq_affinity_init_work(void *data)
-{
-	struct dyn_array *da = data;
-
-	int i;
-	struct  balance_irq_affinity *affinity;
-
-	affinity = *da->name;
-
-	for (i = 0; i < *da->nr; i++)
-		memcpy(&affinity[i], &balance_irq_affinity_init,
-			 sizeof(struct balance_irq_affinity));
-
-}
-
-DEFINE_DYN_ARRAY(balance_irq_affinity, sizeof(struct balance_irq_affinity), nr_irqs, PAGE_SIZE, irq_affinity_init_work);
-
-
-void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
-{
-	balance_irq_affinity[irq] = mask;
-}
-
-static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
-			unsigned long now, int direction)
-{
-	int search_idle = 1;
-	int cpu = curr_cpu;
-
-	goto inside;
-
-	do {
-		if (unlikely(cpu == curr_cpu))
-			search_idle = 0;
-inside:
-		if (direction == 1) {
-			cpu++;
-			if (cpu >= NR_CPUS)
-				cpu = 0;
-		} else {
-			cpu--;
-			if (cpu == -1)
-				cpu = NR_CPUS-1;
-		}
-	} while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu, allowed_mask) ||
-			(search_idle && !IDLE_ENOUGH(cpu, now)));
-
-	return cpu;
-}
-
-static inline void balance_irq(int cpu, int irq)
-{
-	unsigned long now = jiffies;
-	cpumask_t allowed_mask;
-	unsigned int new_cpu;
-
-	if (irqbalance_disabled)
-		return;
-
-	cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]);
-	new_cpu = move(cpu, allowed_mask, now, 1);
-	if (cpu != new_cpu)
-		set_pending_irq(irq, cpumask_of_cpu(new_cpu));
-}
-
-static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
-{
-	int i, j;
-	struct irq_desc *desc;
-
-	for_each_online_cpu(i) {
-		for (j = 0; j < nr_irqs; j++) {
-			desc = irq_to_desc(j);
-			if (!desc->action)
-				continue;
-			/* Is it a significant load ?  */
-			if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) <
-						useful_load_threshold)
-				continue;
-			balance_irq(i, j);
-		}
-	}
-	balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
-		balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
-	return;
-}
-
-static void do_irq_balance(void)
-{
-	int i, j;
-	unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
-	unsigned long move_this_load = 0;
-	int max_loaded = 0, min_loaded = 0;
-	int load;
-	unsigned long useful_load_threshold = balanced_irq_interval + 10;
-	int selected_irq;
-	int tmp_loaded, first_attempt = 1;
-	unsigned long tmp_cpu_irq;
-	unsigned long imbalance = 0;
-	cpumask_t allowed_mask, target_cpu_mask, tmp;
-	struct irq_desc *desc;
-
-	for_each_possible_cpu(i) {
-		int package_index;
-		CPU_IRQ(i) = 0;
-		if (!cpu_online(i))
-			continue;
-		package_index = CPU_TO_PACKAGEINDEX(i);
-		for (j = 0; j < nr_irqs; j++) {
-			unsigned long value_now, delta;
-			/* Is this an active IRQ or balancing disabled ? */
-			desc = irq_to_desc(j);
-			if (!desc->action || irq_balancing_disabled(j))
-				continue;
-			if (package_index == i)
-				IRQ_DELTA(package_index, j) = 0;
-			/* Determine the total count per processor per IRQ */
-			value_now = (unsigned long) kstat_irqs_cpu(j, i);
-
-			/* Determine the activity per processor per IRQ */
-			delta = value_now - LAST_CPU_IRQ(i, j);
-
-			/* Update last_cpu_irq[][] for the next time */
-			LAST_CPU_IRQ(i, j) = value_now;
-
-			/* Ignore IRQs whose rate is less than the clock */
-			if (delta < useful_load_threshold)
-				continue;
-			/* update the load for the processor or package total */
-			IRQ_DELTA(package_index, j) += delta;
-
-			/* Keep track of the higher numbered sibling as well */
-			if (i != package_index)
-				CPU_IRQ(i) += delta;
-			/*
-			 * We have sibling A and sibling B in the package
-			 *
-			 * cpu_irq[A] = load for cpu A + load for cpu B
-			 * cpu_irq[B] = load for cpu B
-			 */
-			CPU_IRQ(package_index) += delta;
-		}
-	}
-	/* Find the least loaded processor package */
-	for_each_online_cpu(i) {
-		if (i != CPU_TO_PACKAGEINDEX(i))
-			continue;
-		if (min_cpu_irq > CPU_IRQ(i)) {
-			min_cpu_irq = CPU_IRQ(i);
-			min_loaded = i;
-		}
-	}
-	max_cpu_irq = ULONG_MAX;
-
-tryanothercpu:
-	/*
-	 * Look for heaviest loaded processor.
-	 * We may come back to get the next heaviest loaded processor.
-	 * Skip processors with trivial loads.
-	 */
-	tmp_cpu_irq = 0;
-	tmp_loaded = -1;
-	for_each_online_cpu(i) {
-		if (i != CPU_TO_PACKAGEINDEX(i))
-			continue;
-		if (max_cpu_irq <= CPU_IRQ(i))
-			continue;
-		if (tmp_cpu_irq < CPU_IRQ(i)) {
-			tmp_cpu_irq = CPU_IRQ(i);
-			tmp_loaded = i;
-		}
-	}
-
-	if (tmp_loaded == -1) {
-	 /*
-	  * In the case of small number of heavy interrupt sources,
-	  * loading some of the cpus too much. We use Ingo's original
-	  * approach to rotate them around.
-	  */
-		if (!first_attempt && imbalance >= useful_load_threshold) {
-			rotate_irqs_among_cpus(useful_load_threshold);
-			return;
-		}
-		goto not_worth_the_effort;
-	}
-
-	first_attempt = 0;		/* heaviest search */
-	max_cpu_irq = tmp_cpu_irq;	/* load */
-	max_loaded = tmp_loaded;	/* processor */
-	imbalance = (max_cpu_irq - min_cpu_irq) / 2;
-
-	/*
-	 * if imbalance is less than approx 10% of max load, then
-	 * observe diminishing returns action. - quit
-	 */
-	if (imbalance < (max_cpu_irq >> 3))
-		goto not_worth_the_effort;
-
-tryanotherirq:
-	/* if we select an IRQ to move that can't go where we want, then
-	 * see if there is another one to try.
-	 */
-	move_this_load = 0;
-	selected_irq = -1;
-	for (j = 0; j < nr_irqs; j++) {
-		/* Is this an active IRQ? */
-		desc = irq_to_desc(j);
-		if (!desc->action)
-			continue;
-		if (imbalance <= IRQ_DELTA(max_loaded, j))
-			continue;
-		/* Try to find the IRQ that is closest to the imbalance
-		 * without going over.
-		 */
-		if (move_this_load < IRQ_DELTA(max_loaded, j)) {
-			move_this_load = IRQ_DELTA(max_loaded, j);
-			selected_irq = j;
-		}
-	}
-	if (selected_irq == -1)
-		goto tryanothercpu;
-
-	imbalance = move_this_load;
-
-	/* For physical_balance case, we accumulated both load
-	 * values in the one of the siblings cpu_irq[],
-	 * to use the same code for physical and logical processors
-	 * as much as possible.
-	 *
-	 * NOTE: the cpu_irq[] array holds the sum of the load for
-	 * sibling A and sibling B in the slot for the lowest numbered
-	 * sibling (A), _AND_ the load for sibling B in the slot for
-	 * the higher numbered sibling.
-	 *
-	 * We seek the least loaded sibling by making the comparison
-	 * (A+B)/2 vs B
-	 */
-	load = CPU_IRQ(min_loaded) >> 1;
-	for_each_cpu_mask(j, per_cpu(cpu_sibling_map, min_loaded)) {
-		if (load > CPU_IRQ(j)) {
-			/* This won't change cpu_sibling_map[min_loaded] */
-			load = CPU_IRQ(j);
-			min_loaded = j;
-		}
-	}
-
-	cpus_and(allowed_mask,
-		cpu_online_map,
-		balance_irq_affinity[selected_irq]);
-	target_cpu_mask = cpumask_of_cpu(min_loaded);
-	cpus_and(tmp, target_cpu_mask, allowed_mask);
-
-	if (!cpus_empty(tmp)) {
-		/* mark for change destination */
-		set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
-
-		/* Since we made a change, come back sooner to
-		 * check for more variation.
-		 */
-		balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
-			balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
-		return;
-	}
-	goto tryanotherirq;
-
-not_worth_the_effort:
-	/*
-	 * if we did not find an IRQ to move, then adjust the time interval
-	 * upward
-	 */
-	balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
-		balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);
-	return;
-}
-
-static int balanced_irq(void *unused)
-{
-	int i;
-	unsigned long prev_balance_time = jiffies;
-	long time_remaining = balanced_irq_interval;
-	struct irq_desc *desc;
-
-	/* push everything to CPU 0 to give us a starting point.  */
-	for (i = 0 ; i < nr_irqs ; i++) {
-		desc = irq_to_desc(i);
-		desc->pending_mask = cpumask_of_cpu(0);
-		set_pending_irq(i, cpumask_of_cpu(0));
-	}
-
-	set_freezable();
-	for ( ; ; ) {
-		time_remaining = schedule_timeout_interruptible(time_remaining);
-		try_to_freeze();
-		if (time_after(jiffies,
-				prev_balance_time+balanced_irq_interval)) {
-			preempt_disable();
-			do_irq_balance();
-			prev_balance_time = jiffies;
-			time_remaining = balanced_irq_interval;
-			preempt_enable();
-		}
-	}
-	return 0;
-}
-
-static int __init balanced_irq_init(void)
-{
-	int i;
-	struct cpuinfo_x86 *c;
-	cpumask_t tmp;
-
-	cpus_shift_right(tmp, cpu_online_map, 2);
-	c = &boot_cpu_data;
-	/* When not overwritten by the command line ask subarchitecture. */
-	if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
-		irqbalance_disabled = NO_BALANCE_IRQ;
-	if (irqbalance_disabled)
-		return 0;
-
-	 /* disable irqbalance completely if there is only one processor online */
-	if (num_online_cpus() < 2) {
-		irqbalance_disabled = 1;
-		return 0;
-	}
-	/*
-	 * Enable physical balance only if more than 1 physical processor
-	 * is present
-	 */
-	if (smp_num_siblings > 1 && !cpus_empty(tmp))
-		physical_balance = 1;
-
-	for_each_online_cpu(i) {
-		irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * nr_irqs, GFP_KERNEL);
-		irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * nr_irqs, GFP_KERNEL);
-		if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
-			printk(KERN_ERR "balanced_irq_init: out of memory");
-			goto failed;
-		}
-	}
-
-	printk(KERN_INFO "Starting balanced_irq\n");
-	if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd")))
-		return 0;
-	printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
-failed:
-	for_each_possible_cpu(i) {
-		kfree(irq_cpu_data[i].irq_delta);
-		irq_cpu_data[i].irq_delta = NULL;
-		kfree(irq_cpu_data[i].last_irq);
-		irq_cpu_data[i].last_irq = NULL;
-	}
-	return 0;
-}
-
-int __devinit irqbalance_disable(char *str)
-{
-	irqbalance_disabled = 1;
-	return 1;
-}
-
-__setup("noirqbalance", irqbalance_disable);
-
-late_initcall(balanced_irq_init);
-#endif /* CONFIG_IRQBALANCE */
 #endif /* CONFIG_SMP */
 
 #ifndef CONFIG_SMP
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index f6a11b9b1f98..67465ed89310 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -35,9 +35,6 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
 	if (!(word & (1 << 13))) {
 		dev_info(&dev->dev, "Intel E7520/7320/7525 detected; "
 			"disabling irq balancing and affinity\n");
-#ifdef CONFIG_IRQBALANCE
-		irqbalance_disable("");
-#endif
 		noirqdebug_setup("");
 #ifdef CONFIG_PROC_FS
 		no_irq_affinity = 1;
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 704136138dc7..2445d2b3d5dc 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -185,7 +185,7 @@ struct irq_desc {
 	cpumask_t		affinity;
 	unsigned int		cpu;
 #endif
-#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
+#ifdef CONFIG_GENERIC_PENDING_IRQ
 	cpumask_t		pending_mask;
 #endif
 #ifdef CONFIG_PROC_FS
@@ -241,13 +241,13 @@ extern int setup_irq(unsigned int irq, struct irqaction *new);
 
 #ifdef CONFIG_SMP
 
-#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
+#ifdef CONFIG_GENERIC_PENDING_IRQ
 
 void set_pending_irq(unsigned int irq, cpumask_t mask);
 void move_native_irq(int irq);
 void move_masked_irq(int irq);
 
-#else /* CONFIG_GENERIC_PENDING_IRQ || CONFIG_IRQBALANCE */
+#else /* CONFIG_GENERIC_PENDING_IRQ */
 
 static inline void move_irq(int irq)
 {
@@ -274,14 +274,6 @@ static inline void set_pending_irq(unsigned int irq, cpumask_t mask)
 
 #endif /* CONFIG_SMP */
 
-#ifdef CONFIG_IRQBALANCE
-extern void set_balance_irq_affinity(unsigned int irq, cpumask_t mask);
-#else
-static inline void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
-{
-}
-#endif
-
 extern int no_irq_affinity;
 
 static inline int irq_balancing_disabled(unsigned int irq)
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 6df49218632a..ddc956861a58 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -86,8 +86,6 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
 	if (!desc->chip->set_affinity)
 		return -EINVAL;
 
-	set_balance_irq_affinity(irq, cpumask);
-
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	if (desc->status & IRQ_MOVE_PCNTXT) {
 		unsigned long flags;
@@ -122,7 +120,6 @@ int irq_select_affinity(unsigned int irq)
 	desc->affinity = mask;
 	desc->chip->set_affinity(irq, mask);
 
-	set_balance_irq_affinity(irq, mask);
 	return 0;
 }
 #endif
-- 
GitLab


From a1420f395d7721895c05ba3dedf150294d2c0e4d Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:24 -0700
Subject: [PATCH 415/895] x86: add irq_cfg for 32bit

it only contains vector ...

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_32.c | 71 +++++++++++++++++++++++-------------
 1 file changed, 46 insertions(+), 25 deletions(-)

diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 668edf226067..033ad953bee3 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -94,6 +94,43 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
 
 static int disable_timer_pin_1 __initdata;
 
+struct irq_cfg {
+	u8 vector;
+};
+
+
+/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
+static struct irq_cfg irq_cfg_legacy[] __initdata = {
+	[0] = { .vector = FIRST_DEVICE_VECTOR,  },
+};
+
+static void __init init_work(void *data)
+{
+        struct dyn_array *da = data;
+        struct irq_cfg *cfg;
+        int legacy_count;
+        int i;
+
+        cfg = *da->name;
+
+        legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]);
+
+	BUG_ON(legacy_count > nr_irqs);
+
+        memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy));
+}
+
+static struct irq_cfg *irq_cfgx;
+DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irqs, PAGE_SIZE, init_work);
+
+static struct irq_cfg *irq_cfg(unsigned int irq)
+{
+	if (irq >= nr_irqs)
+		return NULL;
+
+	return &irq_cfgx[irq];
+}
+
 /*
  * Rough estimation of how many shared IRQs there are, can
  * be changed anytime.
@@ -794,22 +831,6 @@ static inline int IO_APIC_irq_trigger(int irq)
 	return 0;
 }
 
-/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
-static u8 irq_vector_init_first __initdata = FIRST_DEVICE_VECTOR;
-static u8 *irq_vector;
-
-static void __init irq_vector_init_work(void *data)
-{
-	struct dyn_array *da = data;
-
-	u8 *irq_vec;
-
-	irq_vec = *da->name;
-
-	irq_vec[0] = irq_vector_init_first;
-}
-
-DEFINE_DYN_ARRAY(irq_vector, sizeof(u8), nr_irqs, PAGE_SIZE, irq_vector_init_work);
 
 static int __assign_irq_vector(int irq)
 {
@@ -818,8 +839,8 @@ static int __assign_irq_vector(int irq)
 
 	BUG_ON((unsigned)irq >= nr_irqs);
 
-	if (irq_vector[irq] > 0)
-		return irq_vector[irq];
+	if (irq_cfg(irq)->vector > 0)
+		return irq_cfg(irq)->vector;
 
 	vector = current_vector;
 	offset = current_offset;
@@ -836,7 +857,7 @@ next:
 
 	current_vector = vector;
 	current_offset = offset;
-	irq_vector[irq] = vector;
+	irq_cfg(irq)->vector = vector;
 
 	return vector;
 }
@@ -1598,7 +1619,7 @@ static void ack_ioapic_quirk_irq(unsigned int irq)
  * operation to prevent an edge-triggered interrupt escaping meanwhile.
  * The idea is from Manfred Spraul.  --macro
  */
-	i = irq_vector[irq];
+	i = irq_cfg(irq)->vector;
 
 	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
 
@@ -1615,7 +1636,7 @@ static void ack_ioapic_quirk_irq(unsigned int irq)
 
 static int ioapic_retrigger_irq(unsigned int irq)
 {
-	send_IPI_self(irq_vector[irq]);
+	send_IPI_self(irq_cfg(irq)->vector);
 
 	return 1;
 }
@@ -1651,7 +1672,7 @@ static inline void init_IO_APIC_traps(void)
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
 	for (irq = 0; irq < nr_irqs ; irq++) {
-		if (IO_APIC_IRQ(irq) && !irq_vector[irq]) {
+		if (IO_APIC_IRQ(irq) && !irq_cfg(irq)->vector) {
 			/*
 			 * Hmm.. We don't have an entry for this,
 			 * so default to an old-fashioned 8259
@@ -2102,7 +2123,7 @@ int create_irq(void)
 	for (new = (nr_irqs - 1); new >= 0; new--) {
 		if (platform_legacy_irq(new))
 			continue;
-		if (irq_vector[new] != 0)
+		if (irq_cfg(new)->vector != 0)
 			continue;
 		vector = __assign_irq_vector(new);
 		if (likely(vector > 0))
@@ -2125,8 +2146,8 @@ void destroy_irq(unsigned int irq)
 	dynamic_irq_cleanup(irq);
 
 	spin_lock_irqsave(&vector_lock, flags);
-	clear_bit(irq_vector[irq], used_vectors);
-	irq_vector[irq] = 0;
+	clear_bit(irq_cfg(irq)->vector, used_vectors);
+	irq_cfg(irq)->vector = 0;
 	spin_unlock_irqrestore(&vector_lock, flags);
 }
 
-- 
GitLab


From da51a821314dd0ec7126f2bf9a62117146fbb6b9 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:25 -0700
Subject: [PATCH 416/895] x86: make 32bit use irq_cfg_alloc, etc

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_32.c | 180 +++++++++++++++++++++++++++++++----
 1 file changed, 160 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 033ad953bee3..5a83d7f5b147 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -94,41 +94,172 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
 
 static int disable_timer_pin_1 __initdata;
 
+struct irq_cfg;
+
 struct irq_cfg {
+	unsigned int irq;
+	struct irq_cfg *next;
 	u8 vector;
 };
 
 
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
 static struct irq_cfg irq_cfg_legacy[] __initdata = {
-	[0] = { .vector = FIRST_DEVICE_VECTOR,  },
+	[0]  = { .irq =  0, .vector = IRQ0_VECTOR,  },
+	[1]  = { .irq =  1, .vector = IRQ1_VECTOR,  },
+	[2]  = { .irq =  2, .vector = IRQ2_VECTOR,  },
+	[3]  = { .irq =  3, .vector = IRQ3_VECTOR,  },
+	[4]  = { .irq =  4, .vector = IRQ4_VECTOR,  },
+	[5]  = { .irq =  5, .vector = IRQ5_VECTOR,  },
+	[6]  = { .irq =  6, .vector = IRQ6_VECTOR,  },
+	[7]  = { .irq =  7, .vector = IRQ7_VECTOR,  },
+	[8]  = { .irq =  8, .vector = IRQ8_VECTOR,  },
+	[9]  = { .irq =  9, .vector = IRQ9_VECTOR,  },
+	[10] = { .irq = 10, .vector = IRQ10_VECTOR, },
+	[11] = { .irq = 11, .vector = IRQ11_VECTOR, },
+	[12] = { .irq = 12, .vector = IRQ12_VECTOR, },
+	[13] = { .irq = 13, .vector = IRQ13_VECTOR, },
+	[14] = { .irq = 14, .vector = IRQ14_VECTOR, },
+	[15] = { .irq = 15, .vector = IRQ15_VECTOR, },
 };
 
+static struct irq_cfg irq_cfg_init = { .irq =  -1U, };
+/* need to be biger than size of irq_cfg_legacy */
+static int nr_irq_cfg = 32;
+
+static int __init parse_nr_irq_cfg(char *arg)
+{
+	if (arg) {
+		nr_irq_cfg = simple_strtoul(arg, NULL, 0);
+		if (nr_irq_cfg < 32)
+			nr_irq_cfg = 32;
+	}
+	return 0;
+}
+
+early_param("nr_irq_cfg", parse_nr_irq_cfg);
+
+static void init_one_irq_cfg(struct irq_cfg *cfg)
+{
+	memcpy(cfg, &irq_cfg_init, sizeof(struct irq_cfg));
+}
+
+static struct irq_cfg *irq_cfgx;
+static struct irq_cfg *irq_cfgx_free;
 static void __init init_work(void *data)
 {
-        struct dyn_array *da = data;
-        struct irq_cfg *cfg;
-        int legacy_count;
-        int i;
+	struct dyn_array *da = data;
+	struct irq_cfg *cfg;
+	int legacy_count;
+	int i;
+
+	cfg = *da->name;
 
-        cfg = *da->name;
+	memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy));
 
-        legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]);
+	legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]);
+	for (i = legacy_count; i < *da->nr; i++)
+		init_one_irq_cfg(&cfg[i]);
 
-	BUG_ON(legacy_count > nr_irqs);
+	for (i = 1; i < *da->nr; i++)
+		cfg[i-1].next = &cfg[i];
 
-        memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy));
+	irq_cfgx_free = &irq_cfgx[legacy_count];
+	irq_cfgx[legacy_count - 1].next = NULL;
 }
 
-static struct irq_cfg *irq_cfgx;
-DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irqs, PAGE_SIZE, init_work);
+#define for_each_irq_cfg(cfg)           \
+	for (cfg = irq_cfgx; cfg; cfg = cfg->next)
+
+DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work);
 
 static struct irq_cfg *irq_cfg(unsigned int irq)
 {
-	if (irq >= nr_irqs)
-		return NULL;
+	struct irq_cfg *cfg;
+
+	cfg = irq_cfgx;
+	while (cfg) {
+		if (cfg->irq == irq)
+			return cfg;
 
-	return &irq_cfgx[irq];
+		cfg = cfg->next;
+	}
+
+	return NULL;
+}
+
+static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
+{
+	struct irq_cfg *cfg, *cfg_pri;
+	int i;
+	int count = 0;
+
+	cfg_pri = cfg = irq_cfgx;
+	while (cfg) {
+		if (cfg->irq == irq)
+			return cfg;
+
+		cfg_pri = cfg;
+		cfg = cfg->next;
+		count++;
+	}
+
+	if (!irq_cfgx_free) {
+		unsigned long phys;
+		unsigned long total_bytes;
+		/*
+		 *  we run out of pre-allocate ones, allocate more
+		 */
+		printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg);
+
+		total_bytes = sizeof(struct irq_cfg) * nr_irq_cfg;
+		if (after_bootmem)
+			cfg = kzalloc(total_bytes, GFP_ATOMIC);
+		else
+			cfg = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0);
+
+		if (!cfg)
+			panic("please boot with nr_irq_cfg= %d\n", count * 2);
+
+		phys = __pa(cfg);
+		printk(KERN_DEBUG "irq_irq ==> [%#lx - %#lx]\n", phys, phys + total_bytes);
+
+		for (i = 0; i < nr_irq_cfg; i++)
+			init_one_irq_cfg(&cfg[i]);
+
+		for (i = 1; i < nr_irq_cfg; i++)
+			cfg[i-1].next = &cfg[i];
+
+		irq_cfgx_free = cfg;
+	}
+
+	cfg = irq_cfgx_free;
+	irq_cfgx_free = irq_cfgx_free->next;
+	cfg->next = NULL;
+	if (cfg_pri)
+		cfg_pri->next = cfg;
+	else
+		irq_cfgx = cfg;
+	cfg->irq = irq;
+	printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq);
+
+#ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG
+	{
+		/* dump the results */
+		struct irq_cfg *cfg;
+		unsigned long phys;
+		unsigned long bytes = sizeof(struct irq_cfg);
+
+		printk(KERN_DEBUG "=========================== %d\n", irq);
+		printk(KERN_DEBUG "irq_cfg dump after get that for %d\n", irq);
+		for_each_irq_cfg(cfg) {
+			phys = __pa(cfg);
+			printk(KERN_DEBUG "irq_cfg %d ==> [%#lx - %#lx]\n", cfg->irq, phys, phys + bytes);
+		}
+		printk(KERN_DEBUG "===========================\n");
+	}
+#endif
+	return cfg;
 }
 
 /*
@@ -254,6 +385,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 {
 	struct irq_pin_list *entry = irq_2_pin + irq;
 
+	irq_cfg_alloc(irq);
 	while (entry->next)
 		entry = irq_2_pin + entry->next;
 
@@ -836,11 +968,13 @@ static int __assign_irq_vector(int irq)
 {
 	static int current_vector = FIRST_DEVICE_VECTOR, current_offset;
 	int vector, offset;
+	struct irq_cfg *cfg;
 
 	BUG_ON((unsigned)irq >= nr_irqs);
 
-	if (irq_cfg(irq)->vector > 0)
-		return irq_cfg(irq)->vector;
+	cfg = irq_cfg(irq);
+	if (cfg->vector > 0)
+		return cfg->vector;
 
 	vector = current_vector;
 	offset = current_offset;
@@ -857,7 +991,7 @@ next:
 
 	current_vector = vector;
 	current_offset = offset;
-	irq_cfg(irq)->vector = vector;
+	cfg->vector = vector;
 
 	return vector;
 }
@@ -1659,6 +1793,7 @@ static inline void init_IO_APIC_traps(void)
 {
 	int irq;
 	struct irq_desc *desc;
+	struct irq_cfg *cfg;
 
 	/*
 	 * NOTE! The local APIC isn't very good at handling
@@ -1671,8 +1806,9 @@ static inline void init_IO_APIC_traps(void)
 	 * Also, we've got to be careful not to trash gate
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
-	for (irq = 0; irq < nr_irqs ; irq++) {
-		if (IO_APIC_IRQ(irq) && !irq_cfg(irq)->vector) {
+	for_each_irq_cfg(cfg) {
+		irq = cfg->irq;
+		if (IO_APIC_IRQ(irq) && !cfg->vector) {
 			/*
 			 * Hmm.. We don't have an entry for this,
 			 * so default to an old-fashioned 8259
@@ -2117,14 +2253,18 @@ int create_irq(void)
 	/* Allocate an unused irq */
 	int irq, new, vector = 0;
 	unsigned long flags;
+	struct irq_cfg *cfg_new;
 
 	irq = -ENOSPC;
 	spin_lock_irqsave(&vector_lock, flags);
 	for (new = (nr_irqs - 1); new >= 0; new--) {
 		if (platform_legacy_irq(new))
 			continue;
-		if (irq_cfg(new)->vector != 0)
+		cfg_new = irq_cfg(new);
+		if (cfg_new && cfg_new->vector != 0)
 			continue;
+		if (!cfg_new)
+			cfg_new = irq_cfg_alloc(new);
 		vector = __assign_irq_vector(new);
 		if (likely(vector > 0))
 			irq = new;
-- 
GitLab


From 0f978f4505e96227a89b3c9447552aca983c6b57 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:26 -0700
Subject: [PATCH 417/895] x86: make 32bit to use irq_2_pin in irq_cfg

so it is more like 64 bit.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_32.c | 152 +++++++++++++++++++++++++++--------
 1 file changed, 117 insertions(+), 35 deletions(-)

diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 5a83d7f5b147..59d2e8a273e3 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -95,10 +95,11 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
 static int disable_timer_pin_1 __initdata;
 
 struct irq_cfg;
-
+struct irq_pin_list;
 struct irq_cfg {
 	unsigned int irq;
 	struct irq_cfg *next;
+	struct irq_pin_list *irq_2_pin;
 	u8 vector;
 };
 
@@ -275,11 +276,66 @@ int pin_map_size;
  * between pins and IRQs.
  */
 
-static struct irq_pin_list {
-	int apic, pin, next;
-} *irq_2_pin;
+struct irq_pin_list {
+	int apic, pin;
+	struct irq_pin_list *next;
+};
+
+static struct irq_pin_list *irq_2_pin_head;
+/* fill one page ? */
+static int nr_irq_2_pin = 0x100;
+static struct irq_pin_list *irq_2_pin_ptr;
+static void __init irq_2_pin_init_work(void *data)
+{
+	struct dyn_array *da = data;
+	struct irq_pin_list *pin;
+	int i;
+
+	pin = *da->name;
+
+	for (i = 1; i < *da->nr; i++)
+		pin[i-1].next = &pin[i];
+
+	irq_2_pin_ptr = &pin[0];
+}
+DEFINE_DYN_ARRAY(irq_2_pin_head, sizeof(struct irq_pin_list), nr_irq_2_pin, PAGE_SIZE, irq_2_pin_init_work);
+
+static struct irq_pin_list *get_one_free_irq_2_pin(void)
+{
+	struct irq_pin_list *pin;
+	int i;
+
+	pin = irq_2_pin_ptr;
+
+	if (pin) {
+		irq_2_pin_ptr = pin->next;
+		pin->next = NULL;
+		return pin;
+	}
+
+	/*
+	 *  we run out of pre-allocate ones, allocate more
+	 */
+	printk(KERN_DEBUG "try to get more irq_2_pin %d\n", nr_irq_2_pin);
+
+	if (after_bootmem)
+		pin = kzalloc(sizeof(struct irq_pin_list)*nr_irq_2_pin,
+				 GFP_ATOMIC);
+	else
+		pin = __alloc_bootmem_nopanic(sizeof(struct irq_pin_list) *
+				nr_irq_2_pin, PAGE_SIZE, 0);
+
+	if (!pin)
+		panic("can not get more irq_2_pin\n");
 
-DEFINE_DYN_ARRAY(irq_2_pin, sizeof(struct irq_pin_list), pin_map_size, 16, NULL);
+	for (i = 1; i < nr_irq_2_pin; i++)
+		pin[i-1].next = &pin[i];
+
+	irq_2_pin_ptr = pin->next;
+	pin->next = NULL;
+
+	return pin;
+}
 
 struct io_apic {
 	unsigned int index;
@@ -383,20 +439,34 @@ static void ioapic_mask_entry(int apic, int pin)
  */
 static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 {
-	struct irq_pin_list *entry = irq_2_pin + irq;
+	struct irq_cfg *cfg;
+	struct irq_pin_list *entry;
+
+	/* first time to refer irq_cfg, so with new */
+	cfg = irq_cfg_alloc(irq);
+	entry = cfg->irq_2_pin;
+	if (!entry) {
+		entry = get_one_free_irq_2_pin();
+		cfg->irq_2_pin = entry;
+		entry->apic = apic;
+		entry->pin = pin;
+		printk(KERN_DEBUG " 0 add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin);
+		return;
+	}
 
-	irq_cfg_alloc(irq);
-	while (entry->next)
-		entry = irq_2_pin + entry->next;
+	while (entry->next) {
+		/* not again, please */
+		if (entry->apic == apic && entry->pin == pin)
+			return;
 
-	if (entry->pin != -1) {
-		entry->next = first_free_entry;
-		entry = irq_2_pin + entry->next;
-		if (++first_free_entry >= pin_map_size)
-			panic("io_apic.c: whoops");
+		entry = entry->next;
 	}
+
+	entry->next = get_one_free_irq_2_pin();
+	entry = entry->next;
 	entry->apic = apic;
 	entry->pin = pin;
+	printk(KERN_DEBUG " x add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin);
 }
 
 /*
@@ -406,35 +476,45 @@ static void __init replace_pin_at_irq(unsigned int irq,
 				      int oldapic, int oldpin,
 				      int newapic, int newpin)
 {
-	struct irq_pin_list *entry = irq_2_pin + irq;
+	struct irq_cfg *cfg = irq_cfg(irq);
+	struct irq_pin_list *entry = cfg->irq_2_pin;
+	int replaced = 0;
 
-	while (1) {
+	while (entry) {
 		if (entry->apic == oldapic && entry->pin == oldpin) {
 			entry->apic = newapic;
 			entry->pin = newpin;
-		}
-		if (!entry->next)
+			replaced = 1;
+			/* every one is different, right? */
 			break;
-		entry = irq_2_pin + entry->next;
+		}
+		entry = entry->next;
 	}
+
+	/* why? call replace before add? */
+	if (!replaced)
+		add_pin_to_irq(irq, newapic, newpin);
 }
 
 static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable)
 {
-	struct irq_pin_list *entry = irq_2_pin + irq;
+	struct irq_cfg *cfg;
+	struct irq_pin_list *entry;
 	unsigned int pin, reg;
 
+	cfg = irq_cfg(irq);
+	entry = cfg->irq_2_pin;
 	for (;;) {
-		pin = entry->pin;
-		if (pin == -1)
+		if (!entry)
 			break;
+		pin = entry->pin;
 		reg = io_apic_read(entry->apic, 0x10 + pin*2);
 		reg &= ~disable;
 		reg |= enable;
 		io_apic_modify(entry->apic, 0x10 + pin*2, reg);
 		if (!entry->next)
 			break;
-		entry = irq_2_pin + entry->next;
+		entry = entry->next;
 	}
 }
 
@@ -509,13 +589,18 @@ static void clear_IO_APIC(void)
 #ifdef CONFIG_SMP
 static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
 {
+	struct irq_cfg *cfg;
 	unsigned long flags;
 	int pin;
-	struct irq_pin_list *entry = irq_2_pin + irq;
+	struct irq_pin_list *entry;
 	unsigned int apicid_value;
 	cpumask_t tmp;
 	struct irq_desc *desc;
 
+
+	cfg = irq_cfg(irq);
+	entry = cfg->irq_2_pin;
+
 	cpus_and(tmp, cpumask, cpu_online_map);
 	if (cpus_empty(tmp))
 		tmp = TARGET_CPUS;
@@ -527,13 +612,13 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
 	apicid_value = apicid_value << 24;
 	spin_lock_irqsave(&ioapic_lock, flags);
 	for (;;) {
-		pin = entry->pin;
-		if (pin == -1)
+		if (!entry)
 			break;
+		pin = entry->pin;
 		io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value);
 		if (!entry->next)
 			break;
-		entry = irq_2_pin + entry->next;
+		entry = entry->next;
 	}
 	desc = irq_to_desc(irq);
 	desc->affinity = cpumask;
@@ -1152,6 +1237,7 @@ __apicdebuginit(void) print_IO_APIC(void)
 	union IO_APIC_reg_02 reg_02;
 	union IO_APIC_reg_03 reg_03;
 	unsigned long flags;
+	struct irq_cfg *cfg;
 
 	if (apic_verbosity == APIC_QUIET)
 		return;
@@ -1240,16 +1326,16 @@ __apicdebuginit(void) print_IO_APIC(void)
 	}
 	}
 	printk(KERN_DEBUG "IRQ to pin mappings:\n");
-	for (i = 0; i < nr_irqs; i++) {
-		struct irq_pin_list *entry = irq_2_pin + i;
-		if (entry->pin < 0)
+	for_each_irq_cfg(cfg) {
+		struct irq_pin_list *entry = cfg->irq_2_pin;
+		if (!entry)
 			continue;
 		printk(KERN_DEBUG "IRQ%d ", i);
 		for (;;) {
 			printk("-> %d:%d", entry->apic, entry->pin);
 			if (!entry->next)
 				break;
-			entry = irq_2_pin + entry->next;
+			entry = entry->next;
 		}
 		printk("\n");
 	}
@@ -1420,10 +1506,6 @@ static void __init enable_IO_APIC(void)
 	int i, apic;
 	unsigned long flags;
 
-	for (i = 0; i < pin_map_size; i++) {
-		irq_2_pin[i].pin = -1;
-		irq_2_pin[i].next = 0;
-	}
 	if (!pirqs_enabled)
 		for (i = 0; i < MAX_PIRQS; i++)
 			pirq_entries[i] = -1;
-- 
GitLab


From 199751d715bba5b469ea22adadc68a4166bfa4f5 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:27 -0700
Subject: [PATCH 418/895] x86: make 32 bit to use sparse_irq

but actually irq still needs to be less than NR_IRQS, because
interrupt[NR_IRQS] in entry.S.

need to enable per_cpu vector...

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig             |  2 +-
 arch/x86/kernel/io_apic_32.c | 63 ++++++++++++++++++++++++------------
 arch/x86/kernel/irq_32.c     | 37 +++++++++++++++------
 arch/x86/kernel/irqinit_32.c |  7 ++++
 4 files changed, 79 insertions(+), 30 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 3e0eaaa1a339..b157e94637bf 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -34,7 +34,7 @@ config X86
 	select HAVE_GENERIC_DMA_COHERENT if X86_32
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
 	select HAVE_DYN_ARRAY
-	select HAVE_SPARSE_IRQ if X86_64
+	select HAVE_SPARSE_IRQ
 
 config ARCH_DEFCONFIG
 	string
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 59d2e8a273e3..66c0a91362a7 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -595,7 +595,6 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
 	struct irq_pin_list *entry;
 	unsigned int apicid_value;
 	cpumask_t tmp;
-	struct irq_desc *desc;
 
 
 	cfg = irq_cfg(irq);
@@ -620,8 +619,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
 			break;
 		entry = entry->next;
 	}
-	desc = irq_to_desc(irq);
-	desc->affinity = cpumask;
+	irq_to_desc(irq)->affinity = cpumask;
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
@@ -1055,8 +1053,6 @@ static int __assign_irq_vector(int irq)
 	int vector, offset;
 	struct irq_cfg *cfg;
 
-	BUG_ON((unsigned)irq >= nr_irqs);
-
 	cfg = irq_cfg(irq);
 	if (cfg->vector > 0)
 		return cfg->vector;
@@ -1103,7 +1099,12 @@ static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
 {
 	struct irq_desc *desc;
 
-	desc = irq_to_desc(irq);
+	/* first time to use this irq_desc */
+	if (irq < 16)
+		desc = irq_to_desc(irq);
+	else
+		desc = irq_to_desc_alloc(irq);
+
 	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
 	    trigger == IOAPIC_LEVEL) {
 		desc->status |= IRQ_LEVEL;
@@ -2330,16 +2331,19 @@ device_initcall(ioapic_init_sysfs);
 /*
  * Dynamic irq allocate and deallocation
  */
-int create_irq(void)
+unsigned int create_irq_nr(unsigned int irq_want)
 {
 	/* Allocate an unused irq */
-	int irq, new, vector = 0;
+	unsigned int irq, new, vector = 0;
 	unsigned long flags;
 	struct irq_cfg *cfg_new;
 
-	irq = -ENOSPC;
+	/* only can use bus/dev/fn.. when per_cpu vector is used */
+	irq_want = nr_irqs - 1;
+
+	irq = 0;
 	spin_lock_irqsave(&vector_lock, flags);
-	for (new = (nr_irqs - 1); new >= 0; new--) {
+	for (new = (nr_irqs - 1); new > 0; new--) {
 		if (platform_legacy_irq(new))
 			continue;
 		cfg_new = irq_cfg(new);
@@ -2354,13 +2358,18 @@ int create_irq(void)
 	}
 	spin_unlock_irqrestore(&vector_lock, flags);
 
-	if (irq >= 0) {
+	if (irq > 0) {
 		set_intr_gate(vector, interrupt[irq]);
 		dynamic_irq_init(irq);
 	}
 	return irq;
 }
 
+int create_irq(void)
+{
+	return create_irq_nr(nr_irqs - 1);
+}
+
 void destroy_irq(unsigned int irq)
 {
 	unsigned long flags;
@@ -2415,7 +2424,6 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	unsigned int dest;
 	cpumask_t tmp;
 	int vector;
-	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
@@ -2435,8 +2443,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	write_msi_msg(irq, &msg);
-	desc = irq_to_desc(irq);
-	desc->affinity = mask;
+	irq_to_desc(irq)->affinity = mask;
 }
 #endif /* CONFIG_SMP */
 
@@ -2455,13 +2462,31 @@ static struct irq_chip msi_chip = {
 	.retrigger	= ioapic_retrigger_irq,
 };
 
+static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
+{
+	unsigned int irq;
+
+	irq = dev->bus->number;
+	irq <<= 8;
+	irq |= dev->devfn;
+	irq <<= 12;
+
+	return irq;
+}
+
 int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 {
 	struct msi_msg msg;
 	int irq, ret;
-	irq = create_irq();
-	if (irq < 0)
-		return irq;
+
+	unsigned int irq_want;
+
+	irq_want = build_irq_for_pci_dev(dev) + 0x100;
+
+	irq = create_irq_nr(irq_want);
+
+	if (irq == 0)
+		return -1;
 
 	ret = msi_compose_msg(dev, irq, &msg);
 	if (ret < 0) {
@@ -2510,7 +2535,6 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 {
 	unsigned int dest;
 	cpumask_t tmp;
-	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
@@ -2521,8 +2545,7 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 	dest = cpu_mask_to_apicid(mask);
 
 	target_ht_irq(irq, dest);
-	desc = irq_to_desc(irq);
-	desc->affinity = mask;
+	irq_to_desc(irq)->affinity = mask;
 }
 #endif
 
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 576c5df6cad8..0a57e39159a8 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -224,9 +224,10 @@ unsigned int do_IRQ(struct pt_regs *regs)
 	struct pt_regs *old_regs;
 	/* high bit used in ret_from_ code */
 	int overflow, irq = ~regs->orig_ax;
-	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_desc *desc;
 
-	if (unlikely((unsigned)irq >= nr_irqs)) {
+	desc = irq_to_desc(irq);
+	if (unlikely(!desc)) {
 		printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
 					__func__, irq);
 		BUG();
@@ -263,6 +264,24 @@ int show_interrupts(struct seq_file *p, void *v)
 	int i = *(loff_t *) v, j;
 	struct irqaction * action;
 	unsigned long flags;
+	unsigned int entries;
+	struct irq_desc *desc;
+	int tail = 0;
+
+#ifdef CONFIG_HAVE_SPARSE_IRQ
+	desc = (struct irq_desc *)v;
+	entries = -1U;
+	i = desc->irq;
+	if (!desc->next)
+		tail = 1;
+#else
+	entries = nr_irqs - 1;
+	i = *(loff_t *) v;
+	if (i == nr_irqs)
+		tail = 1;
+	else
+		desc = irq_to_desc(i);
+#endif
 
 	if (i == 0) {
 		seq_printf(p, "           ");
@@ -271,9 +290,8 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_putc(p, '\n');
 	}
 
-	if (i < nr_irqs) {
+	if (i <= entries) {
 		unsigned any_count = 0;
-		struct irq_desc *desc = irq_to_desc(i);
 
 		spin_lock_irqsave(&desc->lock, flags);
 #ifndef CONFIG_SMP
@@ -285,7 +303,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		action = desc->action;
 		if (!action && !any_count)
 			goto skip;
-		seq_printf(p, "%3d: ",i);
+		seq_printf(p, "%#x: ",i);
 #ifndef CONFIG_SMP
 		seq_printf(p, "%10u ", kstat_irqs(i));
 #else
@@ -304,7 +322,9 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_putc(p, '\n');
 skip:
 		spin_unlock_irqrestore(&desc->lock, flags);
-	} else if (i == nr_irqs) {
+	}
+
+	if (tail) {
 		seq_printf(p, "NMI: ");
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", nmi_count(j));
@@ -396,15 +416,14 @@ void fixup_irqs(cpumask_t map)
 {
 	unsigned int irq;
 	static int warned;
+	struct irq_desc *desc;
 
-	for (irq = 0; irq < nr_irqs; irq++) {
+	for_each_irq_desc(irq, desc) {
 		cpumask_t mask;
-		struct irq_desc *desc;
 
 		if (irq == 2)
 			continue;
 
-		desc = irq_to_desc(irq);
 		cpus_and(mask, desc->affinity, map);
 		if (any_online_cpu(mask) == NR_CPUS) {
 			printk("Breaking affinity for irq %i\n", irq);
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 65c1c9507707..ded09ac2642e 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -69,6 +69,13 @@ void __init init_ISA_irqs (void)
 	 * 16 old-style INTA-cycle interrupts:
 	 */
 	for (i = 0; i < 16; i++) {
+		/* first time call this irq_desc */
+		struct irq_desc *desc = irq_to_desc_alloc(i);
+
+		desc->status = IRQ_DISABLED;
+		desc->action = NULL;
+		desc->depth = 1;
+
 		set_irq_chip_and_handler_name(i, &i8259A_chip,
 					      handle_level_irq, "XT");
 	}
-- 
GitLab


From 497c9a195db918d3f035e8cb3021e5d4d035516e Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:28 -0700
Subject: [PATCH 419/895] x86: make 32bit support per_cpu vector

so we can merge io_apic_32.c and io_apic_64.c

v2: Use cpu_online_map as target cpus for bigsmp, just like 64-bit is doing.

Also remove some unused TARGET_CPUS macro.

v3: need to check if desc is null in smp_irq_move_cleanup

also migration needs to reset vector too, so copy __target_IO_APIC_irq
from 64bit.

(the duplication will go away once the two files are unified.)

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/entry_32.S                |   2 +-
 arch/x86/kernel/io_apic_32.c              | 719 ++++++++++++++--------
 arch/x86/kernel/irq_32.c                  |  18 +-
 arch/x86/kernel/irqinit_32.c              |  41 +-
 arch/x86/lguest/boot.c                    |   2 +-
 arch/x86/mach-generic/bigsmp.c            |   4 +
 arch/x86/mach-generic/es7000.c            |  14 +
 arch/x86/mach-generic/numaq.c             |  14 +
 arch/x86/mach-generic/summit.c            |  14 +
 include/asm-x86/bigsmp/apic.h             |  15 +-
 include/asm-x86/es7000/apic.h             |   3 +-
 include/asm-x86/genapic_32.h              |   2 +
 include/asm-x86/hw_irq.h                  |   6 +-
 include/asm-x86/irq_vectors.h             |  15 +-
 include/asm-x86/mach-default/entry_arch.h |   1 +
 include/asm-x86/mach-default/mach_apic.h  |  15 +-
 include/asm-x86/mach-generic/mach_apic.h  |   1 +
 include/asm-x86/numaq/apic.h              |   2 -
 include/asm-x86/summit/apic.h             |   1 -
 19 files changed, 577 insertions(+), 312 deletions(-)

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index b21fbfaffe39..4d82171d0f9c 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -629,7 +629,7 @@ ENTRY(interrupt)
 ENTRY(irq_entries_start)
 	RING0_INT_FRAME
 vector=0
-.rept NR_IRQS
+.rept NR_VECTORS
 	ALIGN
  .if vector
 	CFI_ADJUST_CFA_OFFSET -4
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 66c0a91362a7..ea33d3c74970 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -48,6 +48,7 @@
 #include <asm/hypertransport.h>
 #include <asm/setup.h>
 
+#include <mach_ipi.h>
 #include <mach_apic.h>
 #include <mach_apicdef.h>
 
@@ -60,7 +61,7 @@ atomic_t irq_mis_count;
 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
 
 static DEFINE_SPINLOCK(ioapic_lock);
-DEFINE_SPINLOCK(vector_lock);
+static DEFINE_SPINLOCK(vector_lock);
 
 int timer_through_8259 __initdata;
 
@@ -100,28 +101,32 @@ struct irq_cfg {
 	unsigned int irq;
 	struct irq_cfg *next;
 	struct irq_pin_list *irq_2_pin;
+	cpumask_t domain;
+	cpumask_t old_domain;
+	unsigned move_cleanup_count;
 	u8 vector;
+	u8 move_in_progress : 1;
 };
 
 
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
 static struct irq_cfg irq_cfg_legacy[] __initdata = {
-	[0]  = { .irq =  0, .vector = IRQ0_VECTOR,  },
-	[1]  = { .irq =  1, .vector = IRQ1_VECTOR,  },
-	[2]  = { .irq =  2, .vector = IRQ2_VECTOR,  },
-	[3]  = { .irq =  3, .vector = IRQ3_VECTOR,  },
-	[4]  = { .irq =  4, .vector = IRQ4_VECTOR,  },
-	[5]  = { .irq =  5, .vector = IRQ5_VECTOR,  },
-	[6]  = { .irq =  6, .vector = IRQ6_VECTOR,  },
-	[7]  = { .irq =  7, .vector = IRQ7_VECTOR,  },
-	[8]  = { .irq =  8, .vector = IRQ8_VECTOR,  },
-	[9]  = { .irq =  9, .vector = IRQ9_VECTOR,  },
-	[10] = { .irq = 10, .vector = IRQ10_VECTOR, },
-	[11] = { .irq = 11, .vector = IRQ11_VECTOR, },
-	[12] = { .irq = 12, .vector = IRQ12_VECTOR, },
-	[13] = { .irq = 13, .vector = IRQ13_VECTOR, },
-	[14] = { .irq = 14, .vector = IRQ14_VECTOR, },
-	[15] = { .irq = 15, .vector = IRQ15_VECTOR, },
+	[0]  = { .irq =  0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
+	[1]  = { .irq =  1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
+	[2]  = { .irq =  2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
+	[3]  = { .irq =  3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
+	[4]  = { .irq =  4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
+	[5]  = { .irq =  5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
+	[6]  = { .irq =  6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
+	[7]  = { .irq =  7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
+	[8]  = { .irq =  8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
+	[9]  = { .irq =  9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
+	[10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
+	[11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
+	[12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
+	[13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
+	[14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
+	[15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
 };
 
 static struct irq_cfg irq_cfg_init = { .irq =  -1U, };
@@ -263,6 +268,7 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
 	return cfg;
 }
 
+static int assign_irq_vector(int irq, cpumask_t mask);
 /*
  * Rough estimation of how many shared IRQs there are, can
  * be changed anytime.
@@ -432,6 +438,65 @@ static void ioapic_mask_entry(int apic, int pin)
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
+#ifdef CONFIG_SMP
+static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
+{
+	int apic, pin;
+	struct irq_cfg *cfg;
+	struct irq_pin_list *entry;
+
+	cfg = irq_cfg(irq);
+	entry = cfg->irq_2_pin;
+	for (;;) {
+		unsigned int reg;
+
+		if (!entry)
+			break;
+
+		apic = entry->apic;
+		pin = entry->pin;
+		io_apic_write(apic, 0x11 + pin*2, dest);
+		reg = io_apic_read(apic, 0x10 + pin*2);
+		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
+		reg |= vector;
+		io_apic_modify(apic, 0x10 + pin *2, reg);
+		if (!entry->next)
+			break;
+		entry = entry->next;
+	}
+}
+static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+{
+	struct irq_cfg *cfg;
+	unsigned long flags;
+	unsigned int dest;
+	cpumask_t tmp;
+
+	cfg = irq_cfg(irq);
+
+	cpus_and(tmp, mask, cpu_online_map);
+	if (cpus_empty(tmp))
+		return;
+
+	if (assign_irq_vector(irq, mask))
+		return;
+
+	cpus_and(tmp, cfg->domain, mask);
+
+	dest = cpu_mask_to_apicid(tmp);
+	/*
+	 * Only the high 8 bits are valid.
+	 */
+	dest = SET_APIC_LOGICAL_ID(dest);
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	__target_IO_APIC_irq(irq, dest, cfg->vector);
+	irq_to_desc(irq)->affinity = mask;
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+#endif /* CONFIG_SMP */
+
 /*
  * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
  * shared ISA-space IRQs, so we have to support them. We are super
@@ -586,45 +651,6 @@ static void clear_IO_APIC(void)
 			clear_IO_APIC_pin(apic, pin);
 }
 
-#ifdef CONFIG_SMP
-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
-{
-	struct irq_cfg *cfg;
-	unsigned long flags;
-	int pin;
-	struct irq_pin_list *entry;
-	unsigned int apicid_value;
-	cpumask_t tmp;
-
-
-	cfg = irq_cfg(irq);
-	entry = cfg->irq_2_pin;
-
-	cpus_and(tmp, cpumask, cpu_online_map);
-	if (cpus_empty(tmp))
-		tmp = TARGET_CPUS;
-
-	cpus_and(cpumask, tmp, CPU_MASK_ALL);
-
-	apicid_value = cpu_mask_to_apicid(cpumask);
-	/* Prepare to do the io_apic_write */
-	apicid_value = apicid_value << 24;
-	spin_lock_irqsave(&ioapic_lock, flags);
-	for (;;) {
-		if (!entry)
-			break;
-		pin = entry->pin;
-		io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value);
-		if (!entry->next)
-			break;
-		entry = entry->next;
-	}
-	irq_to_desc(irq)->affinity = cpumask;
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-#endif /* CONFIG_SMP */
-
 #ifndef CONFIG_SMP
 void send_IPI_self(int vector)
 {
@@ -789,32 +815,6 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 }
 EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
 
-/*
- * This function currently is only a helper for the i386 smp boot process where
- * we need to reprogram the ioredtbls to cater for the cpus which have come online
- * so mask in all cases should simply be TARGET_CPUS
- */
-#ifdef CONFIG_SMP
-void __init setup_ioapic_dest(void)
-{
-	int pin, ioapic, irq, irq_entry;
-
-	if (skip_ioapic_setup == 1)
-		return;
-
-	for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
-		for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
-			irq_entry = find_irq_entry(ioapic, pin, mp_INT);
-			if (irq_entry == -1)
-				continue;
-			irq = pin_2_irq(irq_entry, ioapic, pin);
-			set_ioapic_affinity_irq(irq, TARGET_CPUS);
-		}
-
-	}
-}
-#endif
-
 #if defined(CONFIG_EISA) || defined(CONFIG_MCA)
 /*
  * EISA Edge/Level control register, ELCR
@@ -1046,47 +1046,138 @@ static inline int IO_APIC_irq_trigger(int irq)
 	return 0;
 }
 
+void lock_vector_lock(void)
+{
+	/* Used to the online set of cpus does not change
+	 * during assign_irq_vector.
+	 */
+	spin_lock(&vector_lock);
+}
 
-static int __assign_irq_vector(int irq)
+void unlock_vector_lock(void)
 {
-	static int current_vector = FIRST_DEVICE_VECTOR, current_offset;
-	int vector, offset;
-	struct irq_cfg *cfg;
+	spin_unlock(&vector_lock);
+}
 
-	cfg = irq_cfg(irq);
-	if (cfg->vector > 0)
-		return cfg->vector;
+static int __assign_irq_vector(int irq, cpumask_t mask)
+{
+        static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
+        unsigned int old_vector;
+        int cpu;
+        struct irq_cfg *cfg;
 
-	vector = current_vector;
-	offset = current_offset;
-next:
-	vector += 8;
-	if (vector >= first_system_vector) {
-		offset = (offset + 1) % 8;
-		vector = FIRST_DEVICE_VECTOR + offset;
-	}
-	if (vector == current_vector)
-		return -ENOSPC;
-	if (test_and_set_bit(vector, used_vectors))
-		goto next;
+        cfg = irq_cfg(irq);
 
-	current_vector = vector;
-	current_offset = offset;
-	cfg->vector = vector;
+        /* Only try and allocate irqs on cpus that are present */
+        cpus_and(mask, mask, cpu_online_map);
 
-	return vector;
-}
+        if ((cfg->move_in_progress) || cfg->move_cleanup_count)
+                return -EBUSY;
 
-static int assign_irq_vector(int irq)
-{
+        old_vector = cfg->vector;
+        if (old_vector) {
+                cpumask_t tmp;
+                cpus_and(tmp, cfg->domain, mask);
+                if (!cpus_empty(tmp))
+                        return 0;
+        }
+
+        for_each_cpu_mask_nr(cpu, mask) {
+                cpumask_t domain, new_mask;
+                int new_cpu;
+                int vector, offset;
+
+                domain = vector_allocation_domain(cpu);
+                cpus_and(new_mask, domain, cpu_online_map);
+
+                vector = current_vector;
+                offset = current_offset;
+next:
+                vector += 8;
+                if (vector >= first_system_vector) {
+                        /* If we run out of vectors on large boxen, must share them. */
+                        offset = (offset + 1) % 8;
+                        vector = FIRST_DEVICE_VECTOR + offset;
+                }
+                if (unlikely(current_vector == vector))
+                        continue;
+		if (vector == SYSCALL_VECTOR)
+                        goto next;
+
+                for_each_cpu_mask_nr(new_cpu, new_mask)
+                        if (per_cpu(vector_irq, new_cpu)[vector] != -1)
+                                goto next;
+                /* Found one! */
+                current_vector = vector;
+                current_offset = offset;
+                if (old_vector) {
+                        cfg->move_in_progress = 1;
+                        cfg->old_domain = cfg->domain;
+                }
+                for_each_cpu_mask_nr(new_cpu, new_mask)
+                        per_cpu(vector_irq, new_cpu)[vector] = irq;
+                cfg->vector = vector;
+                cfg->domain = domain;
+                return 0;
+        }
+        return -ENOSPC;
+}
+
+static int assign_irq_vector(int irq, cpumask_t mask)
+{
+	int err;
 	unsigned long flags;
-	int vector;
 
 	spin_lock_irqsave(&vector_lock, flags);
-	vector = __assign_irq_vector(irq);
+	err = __assign_irq_vector(irq, mask);
 	spin_unlock_irqrestore(&vector_lock, flags);
 
-	return vector;
+	return err;
+}
+
+static void __clear_irq_vector(int irq)
+{
+	struct irq_cfg *cfg;
+	cpumask_t mask;
+	int cpu, vector;
+
+	cfg = irq_cfg(irq);
+	BUG_ON(!cfg->vector);
+
+	vector = cfg->vector;
+	cpus_and(mask, cfg->domain, cpu_online_map);
+	for_each_cpu_mask_nr(cpu, mask)
+		per_cpu(vector_irq, cpu)[vector] = -1;
+
+	cfg->vector = 0;
+	cpus_clear(cfg->domain);
+}
+
+void __setup_vector_irq(int cpu)
+{
+	/* Initialize vector_irq on a new cpu */
+	/* This function must be called with vector_lock held */
+	int irq, vector;
+	struct irq_cfg *cfg;
+
+	/* Mark the inuse vectors */
+	for_each_irq_cfg(cfg) {
+		if (!cpu_isset(cpu, cfg->domain))
+			continue;
+		vector = cfg->vector;
+		irq = cfg->irq;
+		per_cpu(vector_irq, cpu)[vector] = irq;
+	}
+	/* Mark the free vectors */
+	for (vector = 0; vector < NR_VECTORS; ++vector) {
+		irq = per_cpu(vector_irq, cpu)[vector];
+		if (irq < 0)
+			continue;
+
+		cfg = irq_cfg(irq);
+		if (!cpu_isset(cpu, cfg->domain))
+			per_cpu(vector_irq, cpu)[vector] = -1;
+        }
 }
 
 static struct irq_chip ioapic_chip;
@@ -1095,7 +1186,7 @@ static struct irq_chip ioapic_chip;
 #define IOAPIC_EDGE	0
 #define IOAPIC_LEVEL	1
 
-static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
+static void ioapic_register_intr(int irq, unsigned long trigger)
 {
 	struct irq_desc *desc;
 
@@ -1115,79 +1206,109 @@ static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
 		set_irq_chip_and_handler_name(irq, &ioapic_chip,
 					 handle_edge_irq, "edge");
 	}
-	set_intr_gate(vector, interrupt[irq]);
 }
 
-static void __init setup_IO_APIC_irqs(void)
+static int setup_ioapic_entry(int apic, int irq,
+			      struct IO_APIC_route_entry *entry,
+			      unsigned int destination, int trigger,
+			      int polarity, int vector)
 {
+	/*
+	 * add it to the IO-APIC irq-routing table:
+	 */
+	memset(entry,0,sizeof(*entry));
+
+	entry->delivery_mode = INT_DELIVERY_MODE;
+	entry->dest_mode = INT_DEST_MODE;
+	entry->dest.logical.logical_dest = destination;
+
+	entry->mask = 0;                                /* enable IRQ */
+	entry->trigger = trigger;
+	entry->polarity = polarity;
+	entry->vector = vector;
+
+	/* Mask level triggered irqs.
+	 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
+	 */
+	if (trigger)
+		entry->mask = 1;
+
+	return 0;
+}
+
+static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
+                              int trigger, int polarity)
+{
+	struct irq_cfg *cfg;
 	struct IO_APIC_route_entry entry;
-	int apic, pin, idx, irq, first_notcon = 1, vector;
+	cpumask_t mask;
+
+	if (!IO_APIC_IRQ(irq))
+		return;
+
+	cfg = irq_cfg(irq);
+
+	mask = TARGET_CPUS;
+	if (assign_irq_vector(irq, mask))
+		return;
+
+	cpus_and(mask, cfg->domain, mask);
+
+	apic_printk(APIC_VERBOSE,KERN_DEBUG
+		    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
+		    "IRQ %d Mode:%i Active:%i)\n",
+		    apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
+		    irq, trigger, polarity);
+
+
+	if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
+			       cpu_mask_to_apicid(mask), trigger, polarity,
+			       cfg->vector)) {
+		printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
+		       mp_ioapics[apic].mp_apicid, pin);
+		__clear_irq_vector(irq);
+		return;
+	}
+
+	ioapic_register_intr(irq, trigger);
+	if (irq < 16)
+		disable_8259A_irq(irq);
+
+	ioapic_write_entry(apic, pin, entry);
+}
+
+static void __init setup_IO_APIC_irqs(void)
+{
+	int apic, pin, idx, irq, first_notcon = 1;
 
 	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 
 	for (apic = 0; apic < nr_ioapics; apic++) {
 	for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
 
-		/*
-		 * add it to the IO-APIC irq-routing table:
-		 */
-		memset(&entry, 0, sizeof(entry));
-
-		entry.delivery_mode = INT_DELIVERY_MODE;
-		entry.dest_mode = INT_DEST_MODE;
-		entry.mask = 0;				/* enable IRQ */
-		entry.dest.logical.logical_dest =
-					cpu_mask_to_apicid(TARGET_CPUS);
-
-		idx = find_irq_entry(apic, pin, mp_INT);
+		idx = find_irq_entry(apic,pin,mp_INT);
 		if (idx == -1) {
 			if (first_notcon) {
-				apic_printk(APIC_VERBOSE, KERN_DEBUG
-						" IO-APIC (apicid-pin) %d-%d",
-						mp_ioapics[apic].mp_apicid,
-						pin);
+				apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin);
 				first_notcon = 0;
 			} else
-				apic_printk(APIC_VERBOSE, ", %d-%d",
-					mp_ioapics[apic].mp_apicid, pin);
+				apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin);
 			continue;
 		}
-
 		if (!first_notcon) {
 			apic_printk(APIC_VERBOSE, " not connected.\n");
 			first_notcon = 1;
 		}
 
-		entry.trigger = irq_trigger(idx);
-		entry.polarity = irq_polarity(idx);
-
-		if (irq_trigger(idx)) {
-			entry.trigger = 1;
-			entry.mask = 1;
-		}
-
 		irq = pin_2_irq(idx, apic, pin);
-		/*
-		 * skip adding the timer int on secondary nodes, which causes
-		 * a small but painful rift in the time-space continuum
-		 */
-		if (multi_timer_check(apic, irq))
-			continue;
-		else
-			add_pin_to_irq(irq, apic, pin);
 
-		if (!apic && !IO_APIC_IRQ(irq))
-			continue;
+                if (multi_timer_check(apic, irq))
+                        continue;
 
-		if (IO_APIC_IRQ(irq)) {
-			vector = assign_irq_vector(irq);
-			entry.vector = vector;
-			ioapic_register_intr(irq, vector, IOAPIC_AUTO);
+		add_pin_to_irq(irq, apic, pin);
 
-			if (!apic && (irq < 16))
-				disable_8259A_irq(irq);
-		}
-		ioapic_write_entry(apic, pin, entry);
+		setup_IO_APIC_irq(apic, pin, irq,
+				  irq_trigger(idx), irq_polarity(idx));
 	}
 	}
 
@@ -1221,7 +1342,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
 	 * The timer IRQ doesn't have to know that behind the
 	 * scene we may have a 8259A-master in AEOI mode ...
 	 */
-	ioapic_register_intr(0, vector, IOAPIC_EDGE);
+	ioapic_register_intr(0, IOAPIC_EDGE);
 
 	/*
 	 * Add it to the IO-APIC irq-routing table:
@@ -1805,8 +1926,10 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
 	return was_pending;
 }
 
+static void irq_complete_move(unsigned int irq);
 static void ack_ioapic_irq(unsigned int irq)
 {
+	irq_complete_move(irq);
 	move_native_irq(irq);
 	ack_APIC_irq();
 }
@@ -1816,6 +1939,7 @@ static void ack_ioapic_quirk_irq(unsigned int irq)
 	unsigned long v;
 	int i;
 
+	irq_complete_move(irq);
 	move_native_irq(irq);
 /*
  * It appears there is an erratum which affects at least version 0x11
@@ -1858,6 +1982,64 @@ static int ioapic_retrigger_irq(unsigned int irq)
 	return 1;
 }
 
+#ifdef CONFIG_SMP
+asmlinkage void smp_irq_move_cleanup_interrupt(void)
+{
+	unsigned vector, me;
+	ack_APIC_irq();
+	irq_enter();
+
+	me = smp_processor_id();
+	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
+		unsigned int irq;
+		struct irq_desc *desc;
+		struct irq_cfg *cfg;
+		irq = __get_cpu_var(vector_irq)[vector];
+
+		desc = irq_to_desc(irq);
+		if (!desc)
+			continue;
+
+		cfg = irq_cfg(irq);
+		spin_lock(&desc->lock);
+		if (!cfg->move_cleanup_count)
+			goto unlock;
+
+		if ((vector == cfg->vector) && cpu_isset(me, cfg->domain))
+			goto unlock;
+
+		__get_cpu_var(vector_irq)[vector] = -1;
+		cfg->move_cleanup_count--;
+unlock:
+		spin_unlock(&desc->lock);
+	}
+
+	irq_exit();
+}
+
+static void irq_complete_move(unsigned int irq)
+{
+	struct irq_cfg *cfg = irq_cfg(irq);
+	unsigned vector, me;
+
+	if (likely(!cfg->move_in_progress))
+		return;
+
+	vector = ~get_irq_regs()->orig_ax;
+	me = smp_processor_id();
+	if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
+		cpumask_t cleanup_mask;
+
+		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		cfg->move_in_progress = 0;
+	}
+}
+#else
+static inline void irq_complete_move(unsigned int irq) {}
+#endif
+
 static struct irq_chip ioapic_chip __read_mostly = {
 	.name 		= "IO-APIC",
 	.startup 	= startup_ioapic_irq,
@@ -1940,7 +2122,7 @@ static struct irq_chip lapic_chip __read_mostly = {
 	.ack		= ack_lapic_irq,
 };
 
-static void lapic_register_intr(int irq, int vector)
+static void lapic_register_intr(int irq)
 {
 	struct irq_desc *desc;
 
@@ -1948,7 +2130,6 @@ static void lapic_register_intr(int irq, int vector)
 	desc->status &= ~IRQ_LEVEL;
 	set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
 				      "edge");
-	set_intr_gate(vector, interrupt[irq]);
 }
 
 static void __init setup_nmi(void)
@@ -2036,9 +2217,9 @@ static inline void __init unlock_ExtINT_logic(void)
  */
 static inline void __init check_timer(void)
 {
+	struct irq_cfg *cfg = irq_cfg(0);
 	int apic1, pin1, apic2, pin2;
 	int no_pin1 = 0;
-	int vector;
 	unsigned int ver;
 	unsigned long flags;
 
@@ -2051,8 +2232,7 @@ static inline void __init check_timer(void)
 	 * get/set the timer IRQ vector:
 	 */
 	disable_8259A_irq(0);
-	vector = assign_irq_vector(0);
-	set_intr_gate(vector, interrupt[0]);
+	assign_irq_vector(0, TARGET_CPUS);
 
 	/*
 	 * As IRQ0 is to be enabled in the 8259A, the virtual
@@ -2074,7 +2254,7 @@ static inline void __init check_timer(void)
 
 	apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
 		    "apic1=%d pin1=%d apic2=%d pin2=%d\n",
-		    vector, apic1, pin1, apic2, pin2);
+		    cfg->vector, apic1, pin1, apic2, pin2);
 
 	/*
 	 * Some BIOS writers are clueless and report the ExtINTA
@@ -2098,7 +2278,7 @@ static inline void __init check_timer(void)
 		 */
 		if (no_pin1) {
 			add_pin_to_irq(0, apic1, pin1);
-			setup_timer_IRQ0_pin(apic1, pin1, vector);
+			setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
 		}
 		unmask_IO_APIC_irq(0);
 		if (timer_irq_works()) {
@@ -2123,7 +2303,7 @@ static inline void __init check_timer(void)
 		 * legacy devices should be connected to IO APIC #0
 		 */
 		replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
-		setup_timer_IRQ0_pin(apic2, pin2, vector);
+		setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
 		unmask_IO_APIC_irq(0);
 		enable_8259A_irq(0);
 		if (timer_irq_works()) {
@@ -2154,8 +2334,8 @@ static inline void __init check_timer(void)
 	apic_printk(APIC_QUIET, KERN_INFO
 		    "...trying to set up timer as Virtual Wire IRQ...\n");
 
-	lapic_register_intr(0, vector);
-	apic_write(APIC_LVT0, APIC_DM_FIXED | vector);	/* Fixed mode */
+	lapic_register_intr(0);
+	apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);	/* Fixed mode */
 	enable_8259A_irq(0);
 
 	if (timer_irq_works()) {
@@ -2163,7 +2343,7 @@ static inline void __init check_timer(void)
 		goto out;
 	}
 	disable_8259A_irq(0);
-	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
+	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
 	apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
 
 	apic_printk(APIC_QUIET, KERN_INFO
@@ -2207,12 +2387,6 @@ out:
 
 void __init setup_IO_APIC(void)
 {
-	int i;
-
-	/* Reserve all the system vectors. */
-	for (i = first_system_vector; i < NR_VECTORS; i++)
-		set_bit(i, used_vectors);
-
 	enable_IO_APIC();
 
 	io_apic_irqs = ~PIC_IRQS;
@@ -2334,12 +2508,14 @@ device_initcall(ioapic_init_sysfs);
 unsigned int create_irq_nr(unsigned int irq_want)
 {
 	/* Allocate an unused irq */
-	unsigned int irq, new, vector = 0;
+	unsigned int irq, new;
 	unsigned long flags;
 	struct irq_cfg *cfg_new;
 
+#ifndef CONFIG_HAVE_SPARSE_IRQ
 	/* only can use bus/dev/fn.. when per_cpu vector is used */
 	irq_want = nr_irqs - 1;
+#endif
 
 	irq = 0;
 	spin_lock_irqsave(&vector_lock, flags);
@@ -2351,15 +2527,13 @@ unsigned int create_irq_nr(unsigned int irq_want)
 			continue;
 		if (!cfg_new)
 			cfg_new = irq_cfg_alloc(new);
-		vector = __assign_irq_vector(new);
-		if (likely(vector > 0))
+		if (__assign_irq_vector(new, TARGET_CPUS) == 0)
 			irq = new;
 		break;
 	}
 	spin_unlock_irqrestore(&vector_lock, flags);
 
 	if (irq > 0) {
-		set_intr_gate(vector, interrupt[irq]);
 		dynamic_irq_init(irq);
 	}
 	return irq;
@@ -2377,8 +2551,7 @@ void destroy_irq(unsigned int irq)
 	dynamic_irq_cleanup(irq);
 
 	spin_lock_irqsave(&vector_lock, flags);
-	clear_bit(irq_cfg(irq)->vector, used_vectors);
-	irq_cfg(irq)->vector = 0;
+	__clear_irq_vector(irq);
 	spin_unlock_irqrestore(&vector_lock, flags);
 }
 
@@ -2388,57 +2561,65 @@ void destroy_irq(unsigned int irq)
 #ifdef CONFIG_PCI_MSI
 static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
 {
-	int vector;
+	struct irq_cfg *cfg;
+	int err;
 	unsigned dest;
+	cpumask_t tmp;
 
-	vector = assign_irq_vector(irq);
-	if (vector >= 0) {
-		dest = cpu_mask_to_apicid(TARGET_CPUS);
-
-		msg->address_hi = MSI_ADDR_BASE_HI;
-		msg->address_lo =
-			MSI_ADDR_BASE_LO |
-			((INT_DEST_MODE == 0) ?
-MSI_ADDR_DEST_MODE_PHYSICAL:
-				MSI_ADDR_DEST_MODE_LOGICAL) |
-			((INT_DELIVERY_MODE != dest_LowestPrio) ?
-				MSI_ADDR_REDIRECTION_CPU:
-				MSI_ADDR_REDIRECTION_LOWPRI) |
-			MSI_ADDR_DEST_ID(dest);
+	tmp = TARGET_CPUS;
+	err = assign_irq_vector(irq, tmp);
+	if (err)
+		return err;
 
-		msg->data =
-			MSI_DATA_TRIGGER_EDGE |
-			MSI_DATA_LEVEL_ASSERT |
-			((INT_DELIVERY_MODE != dest_LowestPrio) ?
-MSI_DATA_DELIVERY_FIXED:
-				MSI_DATA_DELIVERY_LOWPRI) |
-			MSI_DATA_VECTOR(vector);
-	}
-	return vector;
+	cfg = irq_cfg(irq);
+	cpus_and(tmp, cfg->domain, tmp);
+	dest = cpu_mask_to_apicid(tmp);
+
+	msg->address_hi = MSI_ADDR_BASE_HI;
+	msg->address_lo =
+		MSI_ADDR_BASE_LO |
+		((INT_DEST_MODE == 0) ?
+			MSI_ADDR_DEST_MODE_PHYSICAL:
+			MSI_ADDR_DEST_MODE_LOGICAL) |
+		((INT_DELIVERY_MODE != dest_LowestPrio) ?
+			MSI_ADDR_REDIRECTION_CPU:
+			MSI_ADDR_REDIRECTION_LOWPRI) |
+		MSI_ADDR_DEST_ID(dest);
+
+	msg->data =
+		MSI_DATA_TRIGGER_EDGE |
+		MSI_DATA_LEVEL_ASSERT |
+		((INT_DELIVERY_MODE != dest_LowestPrio) ?
+			MSI_DATA_DELIVERY_FIXED:
+			MSI_DATA_DELIVERY_LOWPRI) |
+		MSI_DATA_VECTOR(cfg->vector);
+
+	return err;
 }
 
 #ifdef CONFIG_SMP
 static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 {
+	struct irq_cfg *cfg;
 	struct msi_msg msg;
 	unsigned int dest;
 	cpumask_t tmp;
-	int vector;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
-		tmp = TARGET_CPUS;
+		return;
 
-	vector = assign_irq_vector(irq);
-	if (vector < 0)
+	if (assign_irq_vector(irq, mask))
 		return;
 
-	dest = cpu_mask_to_apicid(mask);
+	cfg = irq_cfg(irq);
+	cpus_and(tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(tmp);
 
 	read_msi_msg(irq, &msg);
 
 	msg.data &= ~MSI_DATA_VECTOR_MASK;
-	msg.data |= MSI_DATA_VECTOR(vector);
+	msg.data |= MSI_DATA_VECTOR(cfg->vector);
 	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
@@ -2517,15 +2698,15 @@ void arch_teardown_msi_irq(unsigned int irq)
 
 #ifdef CONFIG_SMP
 
-static void target_ht_irq(unsigned int irq, unsigned int dest)
+static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
 {
 	struct ht_irq_msg msg;
 	fetch_ht_irq_msg(irq, &msg);
 
-	msg.address_lo &= ~(HT_IRQ_LOW_DEST_ID_MASK);
+	msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
 	msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
 
-	msg.address_lo |= HT_IRQ_LOW_DEST_ID(dest);
+	msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
 	msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
 
 	write_ht_irq_msg(irq, &msg);
@@ -2533,18 +2714,22 @@ static void target_ht_irq(unsigned int irq, unsigned int dest)
 
 static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 {
+	struct irq_cfg *cfg;
 	unsigned int dest;
 	cpumask_t tmp;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
-		tmp = TARGET_CPUS;
+		return;
 
-	cpus_and(mask, tmp, CPU_MASK_ALL);
+	if (assign_irq_vector(irq, mask))
+		return;
 
-	dest = cpu_mask_to_apicid(mask);
+	cfg = irq_cfg(irq);
+	cpus_and(tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(tmp);
 
-	target_ht_irq(irq, dest);
+	target_ht_irq(irq, dest, cfg->vector);
 	irq_to_desc(irq)->affinity = mask;
 }
 #endif
@@ -2562,16 +2747,18 @@ static struct irq_chip ht_irq_chip = {
 
 int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 {
-	int vector;
+	struct irq_cfg *cfg;
+	int err;
+	cpumask_t tmp;
 
-	vector = assign_irq_vector(irq);
-	if (vector >= 0) {
+	tmp = TARGET_CPUS;
+	err = assign_irq_vector(irq, tmp);
+	if ( !err) {
 		struct ht_irq_msg msg;
 		unsigned dest;
-		cpumask_t tmp;
 
-		cpus_clear(tmp);
-		cpu_set(vector >> 8, tmp);
+		cfg = irq_cfg(irq);
+		cpus_and(tmp, cfg->domain, tmp);
 		dest = cpu_mask_to_apicid(tmp);
 
 		msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
@@ -2579,7 +2766,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 		msg.address_lo =
 			HT_IRQ_LOW_BASE |
 			HT_IRQ_LOW_DEST_ID(dest) |
-			HT_IRQ_LOW_VECTOR(vector) |
+			HT_IRQ_LOW_VECTOR(cfg->vector) |
 			((INT_DEST_MODE == 0) ?
 				HT_IRQ_LOW_DM_PHYSICAL :
 				HT_IRQ_LOW_DM_LOGICAL) |
@@ -2594,7 +2781,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 		set_irq_chip_and_handler_name(irq, &ht_irq_chip,
 					      handle_edge_irq, "edge");
 	}
-	return vector;
+	return err;
 }
 #endif /* CONFIG_HT_IRQ */
 
@@ -2705,50 +2892,21 @@ int __init io_apic_get_redir_entries(int ioapic)
 }
 
 
-int io_apic_set_pci_routing(int ioapic, int pin, int irq, int edge_level, int active_high_low)
+int io_apic_set_pci_routing(int ioapic, int pin, int irq, int triggering, int polarity)
 {
-	struct IO_APIC_route_entry entry;
-
 	if (!IO_APIC_IRQ(irq)) {
 		printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
 			ioapic);
 		return -EINVAL;
 	}
 
-	/*
-	 * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
-	 * Note that we mask (disable) IRQs now -- these get enabled when the
-	 * corresponding device driver registers for this IRQ.
-	 */
-
-	memset(&entry, 0, sizeof(entry));
-
-	entry.delivery_mode = INT_DELIVERY_MODE;
-	entry.dest_mode = INT_DEST_MODE;
-	entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
-	entry.trigger = edge_level;
-	entry.polarity = active_high_low;
-	entry.mask  = 1;
-
 	/*
 	 * IRQs < 16 are already in the irq_2_pin[] map
 	 */
 	if (irq >= 16)
 		add_pin_to_irq(irq, ioapic, pin);
 
-	entry.vector = assign_irq_vector(irq);
-
-	apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry "
-		"(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic,
-		mp_ioapics[ioapic].mp_apicid, pin, entry.vector, irq,
-		edge_level, active_high_low);
-
-	ioapic_register_intr(irq, entry.vector, edge_level);
-
-	if (!ioapic && (irq < 16))
-		disable_8259A_irq(irq);
-
-	ioapic_write_entry(ioapic, pin, entry);
+	setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity);
 
 	return 0;
 }
@@ -2774,6 +2932,47 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
 
 #endif /* CONFIG_ACPI */
 
+/*
+ * This function currently is only a helper for the i386 smp boot process where
+ * we need to reprogram the ioredtbls to cater for the cpus which have come online
+ * so mask in all cases should simply be TARGET_CPUS
+ */
+#ifdef CONFIG_SMP
+void __init setup_ioapic_dest(void)
+{
+	int pin, ioapic, irq, irq_entry;
+	struct irq_cfg *cfg;
+	struct irq_desc *desc;
+
+	if (skip_ioapic_setup == 1)
+		return;
+
+	for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
+		for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
+			irq_entry = find_irq_entry(ioapic, pin, mp_INT);
+			if (irq_entry == -1)
+				continue;
+			irq = pin_2_irq(irq_entry, ioapic, pin);
+
+			/* setup_IO_APIC_irqs could fail to get vector for some device
+			 * when you have too many devices, because at that time only boot
+			 * cpu is online.
+			 */
+			cfg = irq_cfg(irq);
+			if (!cfg->vector)
+				setup_IO_APIC_irq(ioapic, pin, irq,
+						  irq_trigger(irq_entry),
+						  irq_polarity(irq_entry));
+			else {
+				desc = irq_to_desc(irq);
+				set_ioapic_affinity_irq(irq, TARGET_CPUS);
+			}
+		}
+
+	}
+}
+#endif
+
 static int __init parse_disable_timer_pin_1(char *arg)
 {
 	disable_timer_pin_1 = 1;
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 0a57e39159a8..b51ffdcfa31a 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -223,21 +223,25 @@ unsigned int do_IRQ(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs;
 	/* high bit used in ret_from_ code */
-	int overflow, irq = ~regs->orig_ax;
+	int overflow;
+	unsigned vector = ~regs->orig_ax;
 	struct irq_desc *desc;
+	unsigned irq;
 
-	desc = irq_to_desc(irq);
-	if (unlikely(!desc)) {
-		printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
-					__func__, irq);
-		BUG();
-	}
 
 	old_regs = set_irq_regs(regs);
 	irq_enter();
+	irq = __get_cpu_var(vector_irq)[vector];
 
 	overflow = check_stack_overflow();
 
+	desc = irq_to_desc(irq);
+	if (unlikely(!desc)) {
+		printk(KERN_EMERG "%s: cannot handle IRQ %d vector %#x\n",
+					__func__, irq, vector);
+		BUG();
+	}
+
 	if (!execute_on_irq_stack(overflow, desc, irq)) {
 		if (unlikely(overflow))
 			print_stack_overflow();
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index ded09ac2642e..9092103a18eb 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -90,6 +90,27 @@ static struct irqaction irq2 = {
 	.name = "cascade",
 };
 
+DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
+	[0 ... IRQ0_VECTOR - 1] = -1,
+	[IRQ0_VECTOR] = 0,
+	[IRQ1_VECTOR] = 1,
+	[IRQ2_VECTOR] = 2,
+	[IRQ3_VECTOR] = 3,
+	[IRQ4_VECTOR] = 4,
+	[IRQ5_VECTOR] = 5,
+	[IRQ6_VECTOR] = 6,
+	[IRQ7_VECTOR] = 7,
+	[IRQ8_VECTOR] = 8,
+	[IRQ9_VECTOR] = 9,
+	[IRQ10_VECTOR] = 10,
+	[IRQ11_VECTOR] = 11,
+	[IRQ12_VECTOR] = 12,
+	[IRQ13_VECTOR] = 13,
+	[IRQ14_VECTOR] = 14,
+	[IRQ15_VECTOR] = 15,
+	[IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
+};
+
 /* Overridden in paravirt.c */
 void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
 
@@ -105,22 +126,14 @@ void __init native_init_IRQ(void)
 	 * us. (some of these will be overridden and become
 	 * 'special' SMP interrupts)
 	 */
-	for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
-		int vector = FIRST_EXTERNAL_VECTOR + i;
-		if (i >= nr_irqs)
-			break;
+	for (i =  FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
 		/* SYSCALL_VECTOR was reserved in trap_init. */
-		if (!test_bit(vector, used_vectors))
-			set_intr_gate(vector, interrupt[i]);
+		if (i != SYSCALL_VECTOR)
+			set_intr_gate(i, interrupt[i]);
 	}
 
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
-	/*
-	 * IRQ0 must be given a fixed assignment and initialized,
-	 * because it's used before the IO-APIC is set up.
-	 */
-	set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
 
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
 	/*
 	 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
 	 * IPI, driven by wakeup.
@@ -135,6 +148,9 @@ void __init native_init_IRQ(void)
 
 	/* IPI for single call function */
 	set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt);
+
+	/* Low priority IPI to cleanup after moving an irq */
+	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
@@ -168,3 +184,4 @@ void __init native_init_IRQ(void)
 
 	irq_ctx_init(smp_processor_id());
 }
+
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 65f0b8a47bed..48ee4f9435f4 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -582,7 +582,7 @@ static void __init lguest_init_IRQ(void)
 	for (i = 0; i < LGUEST_IRQS; i++) {
 		int vector = FIRST_EXTERNAL_VECTOR + i;
 		if (vector != SYSCALL_VECTOR) {
-			set_intr_gate(vector, interrupt[i]);
+			set_intr_gate(vector, interrupt[vector]);
 			set_irq_chip_and_handler_name(i, &lguest_irq_controller,
 						      handle_level_irq,
 						      "level");
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c
index df37fc9d6a26..3c3b471ea496 100644
--- a/arch/x86/mach-generic/bigsmp.c
+++ b/arch/x86/mach-generic/bigsmp.c
@@ -41,6 +41,10 @@ static const struct dmi_system_id bigsmp_dmi_table[] = {
 	 { }
 };
 
+static cpumask_t vector_allocation_domain(int cpu)
+{
+        return cpumask_of_cpu(cpu);
+}
 
 static int probe_bigsmp(void)
 {
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c
index 6513d41ea21e..28459cab3ddb 100644
--- a/arch/x86/mach-generic/es7000.c
+++ b/arch/x86/mach-generic/es7000.c
@@ -75,4 +75,18 @@ static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 }
 #endif
 
+static cpumask_t vector_allocation_domain(int cpu)
+{
+	/* Careful. Some cpus do not strictly honor the set of cpus
+	 * specified in the interrupt destination when using lowest
+	 * priority interrupt delivery mode.
+	 *
+	 * In particular there was a hyperthreading cpu observed to
+	 * deliver interrupts to the wrong hyperthread when only one
+	 * hyperthread was specified in the interrupt desitination.
+	 */
+	cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
+	return domain;
+}
+
 struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000);
diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c
index 8cf58394975e..71a309b122e6 100644
--- a/arch/x86/mach-generic/numaq.c
+++ b/arch/x86/mach-generic/numaq.c
@@ -38,4 +38,18 @@ static int acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	return 0;
 }
 
+static cpumask_t vector_allocation_domain(int cpu)
+{
+	/* Careful. Some cpus do not strictly honor the set of cpus
+	 * specified in the interrupt destination when using lowest
+	 * priority interrupt delivery mode.
+	 *
+	 * In particular there was a hyperthreading cpu observed to
+	 * deliver interrupts to the wrong hyperthread when only one
+	 * hyperthread was specified in the interrupt desitination.
+	 */
+	cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
+	return domain;
+}
+
 struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq);
diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c
index 6ad6b67a723d..6272b5e69da6 100644
--- a/arch/x86/mach-generic/summit.c
+++ b/arch/x86/mach-generic/summit.c
@@ -23,4 +23,18 @@ static int probe_summit(void)
 	return 0;
 }
 
+static cpumask_t vector_allocation_domain(int cpu)
+{
+	/* Careful. Some cpus do not strictly honor the set of cpus
+	 * specified in the interrupt destination when using lowest
+	 * priority interrupt delivery mode.
+	 *
+	 * In particular there was a hyperthreading cpu observed to
+	 * deliver interrupts to the wrong hyperthread when only one
+	 * hyperthread was specified in the interrupt desitination.
+	 */
+	cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
+	return domain;
+}
+
 struct genapic apic_summit = APIC_INIT("summit", probe_summit);
diff --git a/include/asm-x86/bigsmp/apic.h b/include/asm-x86/bigsmp/apic.h
index 0a9cd7c5ca0c..1d9543b9d358 100644
--- a/include/asm-x86/bigsmp/apic.h
+++ b/include/asm-x86/bigsmp/apic.h
@@ -9,22 +9,17 @@ static inline int apic_id_registered(void)
 	return (1);
 }
 
-/* Round robin the irqs amoung the online cpus */
 static inline cpumask_t target_cpus(void)
 {
-	static unsigned long cpu = NR_CPUS;
-	do {
-		if (cpu >= NR_CPUS)
-			cpu = first_cpu(cpu_online_map);
-		else
-			cpu = next_cpu(cpu, cpu_online_map);
-	} while (cpu >= NR_CPUS);
-	return cpumask_of_cpu(cpu);
+#ifdef CONFIG_SMP
+        return cpu_online_map;
+#else
+        return cpumask_of_cpu(0);
+#endif
 }
 
 #undef APIC_DEST_LOGICAL
 #define APIC_DEST_LOGICAL	0
-#define TARGET_CPUS		(target_cpus())
 #define APIC_DFR_VALUE		(APIC_DFR_FLAT)
 #define INT_DELIVERY_MODE	(dest_Fixed)
 #define INT_DEST_MODE		(0)    /* phys delivery to target proc */
diff --git a/include/asm-x86/es7000/apic.h b/include/asm-x86/es7000/apic.h
index bd2c44d1f7ac..750afada5fbf 100644
--- a/include/asm-x86/es7000/apic.h
+++ b/include/asm-x86/es7000/apic.h
@@ -17,7 +17,6 @@ static inline cpumask_t target_cpus(void)
 	return cpumask_of_cpu(smp_processor_id());
 #endif
 }
-#define TARGET_CPUS	(target_cpus())
 
 #if defined CONFIG_ES7000_CLUSTERED_APIC
 #define APIC_DFR_VALUE		(APIC_DFR_CLUSTER)
@@ -81,7 +80,7 @@ static inline void setup_apic_routing(void)
 	int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id());
 	printk("Enabling APIC mode:  %s.  Using %d I/O APICs, target cpus %lx\n",
 		(apic_version[apic] == 0x14) ?
-		"Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(TARGET_CPUS)[0]);
+		"Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(target_cpus())[0]);
 }
 
 static inline int multi_timer_check(int apic, int irq)
diff --git a/include/asm-x86/genapic_32.h b/include/asm-x86/genapic_32.h
index 34280f027664..6fe4f81bfcf9 100644
--- a/include/asm-x86/genapic_32.h
+++ b/include/asm-x86/genapic_32.h
@@ -57,6 +57,7 @@ struct genapic {
 	unsigned (*get_apic_id)(unsigned long x);
 	unsigned long apic_id_mask;
 	unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
+	cpumask_t (*vector_allocation_domain)(int cpu);
 
 #ifdef CONFIG_SMP
 	/* ipi */
@@ -104,6 +105,7 @@ struct genapic {
 	APICFUNC(get_apic_id)				\
 	.apic_id_mask = APIC_ID_MASK,			\
 	APICFUNC(cpu_mask_to_apicid)			\
+	APICFUNC(vector_allocation_domain)			\
 	APICFUNC(acpi_madt_oem_check)			\
 	IPIFUNC(send_IPI_mask)				\
 	IPIFUNC(send_IPI_allbutself)			\
diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h
index 50f6e0316b50..51c787d17cbc 100644
--- a/include/asm-x86/hw_irq.h
+++ b/include/asm-x86/hw_irq.h
@@ -116,12 +116,12 @@ extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *);
 
 #ifdef CONFIG_X86_32
 extern void (*const interrupt[NR_IRQS])(void);
-#else
+#endif
+
 typedef int vector_irq_t[NR_VECTORS];
 DECLARE_PER_CPU(vector_irq_t, vector_irq);
-#endif
 
-#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_X86_64)
+#ifdef CONFIG_X86_IO_APIC
 extern void lock_vector_lock(void);
 extern void unlock_vector_lock(void);
 extern void __setup_vector_irq(int cpu);
diff --git a/include/asm-x86/irq_vectors.h b/include/asm-x86/irq_vectors.h
index cb09802ce651..a8d065d85f57 100644
--- a/include/asm-x86/irq_vectors.h
+++ b/include/asm-x86/irq_vectors.h
@@ -19,19 +19,14 @@
 
 /*
  * Reserve the lowest usable priority level 0x20 - 0x2f for triggering
- * cleanup after irq migration on 64 bit.
+ * cleanup after irq migration.
  */
 #define IRQ_MOVE_CLEANUP_VECTOR	FIRST_EXTERNAL_VECTOR
 
 /*
- * Vectors 0x20-0x2f are used for ISA interrupts on 32 bit.
- * Vectors 0x30-0x3f are used for ISA interrupts on 64 bit.
+ * Vectors 0x30-0x3f are used for ISA interrupts.
  */
-#ifdef CONFIG_X86_32
-#define IRQ0_VECTOR		(FIRST_EXTERNAL_VECTOR)
-#else
 #define IRQ0_VECTOR		(FIRST_EXTERNAL_VECTOR + 0x10)
-#endif
 #define IRQ1_VECTOR		(IRQ0_VECTOR + 1)
 #define IRQ2_VECTOR		(IRQ0_VECTOR + 2)
 #define IRQ3_VECTOR		(IRQ0_VECTOR + 3)
@@ -96,11 +91,7 @@
  * start at 0x31(0x41) to spread out vectors evenly between priority
  * levels. (0x80 is the syscall vector)
  */
-#ifdef CONFIG_X86_32
-# define FIRST_DEVICE_VECTOR	0x31
-#else
-# define FIRST_DEVICE_VECTOR	(IRQ15_VECTOR + 2)
-#endif
+#define FIRST_DEVICE_VECTOR	(IRQ15_VECTOR + 2)
 
 #define NR_VECTORS		256
 
diff --git a/include/asm-x86/mach-default/entry_arch.h b/include/asm-x86/mach-default/entry_arch.h
index 9283b60a1dd2..6b1add8e31dd 100644
--- a/include/asm-x86/mach-default/entry_arch.h
+++ b/include/asm-x86/mach-default/entry_arch.h
@@ -14,6 +14,7 @@ BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
 BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
 BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
 BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
+BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
 #endif
 
 /*
diff --git a/include/asm-x86/mach-default/mach_apic.h b/include/asm-x86/mach-default/mach_apic.h
index 2a330a41b3dd..3c66f2cdaec1 100644
--- a/include/asm-x86/mach-default/mach_apic.h
+++ b/include/asm-x86/mach-default/mach_apic.h
@@ -85,6 +85,20 @@ static inline int apicid_to_node(int logical_apicid)
 	return 0;
 #endif
 }
+
+static inline cpumask_t vector_allocation_domain(int cpu)
+{
+        /* Careful. Some cpus do not strictly honor the set of cpus
+         * specified in the interrupt destination when using lowest
+         * priority interrupt delivery mode.
+         *
+         * In particular there was a hyperthreading cpu observed to
+         * deliver interrupts to the wrong hyperthread when only one
+         * hyperthread was specified in the interrupt desitination.
+         */
+        cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
+        return domain;
+}
 #endif
 
 static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
@@ -138,6 +152,5 @@ static inline int check_phys_apicid_present(int boot_cpu_physical_apicid)
 static inline void enable_apic_mode(void)
 {
 }
-
 #endif /* CONFIG_X86_LOCAL_APIC */
 #endif /* ASM_X86__MACH_DEFAULT__MACH_APIC_H */
diff --git a/include/asm-x86/mach-generic/mach_apic.h b/include/asm-x86/mach-generic/mach_apic.h
index 5d010c6881dd..5085b52da301 100644
--- a/include/asm-x86/mach-generic/mach_apic.h
+++ b/include/asm-x86/mach-generic/mach_apic.h
@@ -24,6 +24,7 @@
 #define check_phys_apicid_present (genapic->check_phys_apicid_present)
 #define check_apicid_used (genapic->check_apicid_used)
 #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
+#define vector_allocation_domain (genapic->vector_allocation_domain)
 #define enable_apic_mode (genapic->enable_apic_mode)
 #define phys_pkg_id (genapic->phys_pkg_id)
 
diff --git a/include/asm-x86/numaq/apic.h b/include/asm-x86/numaq/apic.h
index a8344ba6ea15..0bf2a06b7a4e 100644
--- a/include/asm-x86/numaq/apic.h
+++ b/include/asm-x86/numaq/apic.h
@@ -12,8 +12,6 @@ static inline cpumask_t target_cpus(void)
 	return CPU_MASK_ALL;
 }
 
-#define TARGET_CPUS (target_cpus())
-
 #define NO_BALANCE_IRQ (1)
 #define esr_disable (1)
 
diff --git a/include/asm-x86/summit/apic.h b/include/asm-x86/summit/apic.h
index c5b2e4b10358..0f68037b8f24 100644
--- a/include/asm-x86/summit/apic.h
+++ b/include/asm-x86/summit/apic.h
@@ -22,7 +22,6 @@ static inline cpumask_t target_cpus(void)
 	 */
 	return cpumask_of_cpu(0);
 }
-#define TARGET_CPUS	(target_cpus())
 
 #define INT_DELIVERY_MODE (dest_LowestPrio)
 #define INT_DEST_MODE 1     /* logical delivery broadcast to all procs */
-- 
GitLab


From 7a959cff725872ce9c3a534f10724d7bb2cb3c4a Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:32 -0700
Subject: [PATCH 420/895] x86: add debug info for 32bit sparse_irq

so could figure out bugs where we get an interrupt, but vector_irq is
not initialized yet.

Signed-off-by: Yinghai Lu  <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_32.c | 8 ++++++--
 arch/x86/kernel/io_apic_64.c | 2 ++
 arch/x86/kernel/irq_32.c     | 4 ++--
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index ea33d3c74970..3001924bdd36 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -1114,8 +1114,12 @@ next:
                         cfg->move_in_progress = 1;
                         cfg->old_domain = cfg->domain;
                 }
-                for_each_cpu_mask_nr(new_cpu, new_mask)
-                        per_cpu(vector_irq, new_cpu)[vector] = irq;
+		printk(KERN_DEBUG "assign_irq_vector: irq %d vector %#x cpu ", irq, vector);
+		for_each_cpu_mask_nr(new_cpu, new_mask) {
+			per_cpu(vector_irq, new_cpu)[vector] = irq;
+			printk(KERN_CONT " %d ", new_cpu);
+		}
+		printk(KERN_CONT "\n");
                 cfg->vector = vector;
                 cfg->domain = domain;
                 return 0;
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index b0d4abc55a11..30d2e3811313 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -1394,6 +1394,8 @@ __apicdebuginit(void) print_IO_APIC(void)
 	printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
 	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
 	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
+	printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
+	printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
 
 	printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
 	printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index b51ffdcfa31a..cc929f2f84f1 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -237,8 +237,8 @@ unsigned int do_IRQ(struct pt_regs *regs)
 
 	desc = irq_to_desc(irq);
 	if (unlikely(!desc)) {
-		printk(KERN_EMERG "%s: cannot handle IRQ %d vector %#x\n",
-					__func__, irq, vector);
+		printk(KERN_EMERG "%s: cannot handle IRQ %d vector %#x cpu %d\n",
+					__func__, irq, vector, smp_processor_id());
 		BUG();
 	}
 
-- 
GitLab


From d83e94acd95789829804fd9e442bd18975f4dc89 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:33 -0700
Subject: [PATCH 421/895] x86, io-apic: remove union about dest for log/phy

let user decide the meaning of the bits.

This unifies the 32-bit and 64-bit io-apic code a bit.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_32.c | 18 +++++++-----------
 include/asm-x86/io_apic.h    | 16 ----------------
 2 files changed, 7 insertions(+), 27 deletions(-)

diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 3001924bdd36..353e586822af 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -1224,7 +1224,7 @@ static int setup_ioapic_entry(int apic, int irq,
 
 	entry->delivery_mode = INT_DELIVERY_MODE;
 	entry->dest_mode = INT_DEST_MODE;
-	entry->dest.logical.logical_dest = destination;
+	entry->dest = destination;
 
 	entry->mask = 0;                                /* enable IRQ */
 	entry->trigger = trigger;
@@ -1336,7 +1336,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
 	 */
 	entry.dest_mode = INT_DEST_MODE;
 	entry.mask = 1;					/* mask IRQ now */
-	entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
+	entry.dest = cpu_mask_to_apicid(TARGET_CPUS);
 	entry.delivery_mode = INT_DELIVERY_MODE;
 	entry.polarity = 0;
 	entry.trigger = 0;
@@ -1425,19 +1425,15 @@ __apicdebuginit(void) print_IO_APIC(void)
 
 	printk(KERN_DEBUG ".... IRQ redirection table:\n");
 
-	printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
-			  " Stat Dest Deli Vect:   \n");
+	printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
+			  " Stat Dmod Deli Vect:   \n");
 
 	for (i = 0; i <= reg_01.bits.entries; i++) {
 		struct IO_APIC_route_entry entry;
 
 		entry = ioapic_read_entry(apic, i);
 
-		printk(KERN_DEBUG " %02x %03X %02X  ",
-			i,
-			entry.dest.logical.logical_dest,
-			entry.dest.physical.physical_dest
-		);
+		printk(KERN_DEBUG " %02x %02X  ", i, entry.dest);
 
 		printk("%1d    %1d    %1d   %1d   %1d    %1d    %1d    %02X\n",
 			entry.mask,
@@ -1717,7 +1713,7 @@ void disable_IO_APIC(void)
 		entry.dest_mode       = 0; /* Physical */
 		entry.delivery_mode   = dest_ExtINT; /* ExtInt */
 		entry.vector          = 0;
-		entry.dest.physical.physical_dest = read_apic_id();
+		entry.dest	      = read_apic_id();
 
 		/*
 		 * Add it to the IO-APIC irq-routing table:
@@ -2185,7 +2181,7 @@ static inline void __init unlock_ExtINT_logic(void)
 
 	entry1.dest_mode = 0;			/* physical delivery */
 	entry1.mask = 0;			/* unmask IRQ now */
-	entry1.dest.physical.physical_dest = hard_smp_processor_id();
+	entry1.dest = hard_smp_processor_id();
 	entry1.delivery_mode = dest_ExtINT;
 	entry1.polarity = entry0.polarity;
 	entry1.trigger = 0;
diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h
index 8ec68a50cf10..ce818292d2c7 100644
--- a/include/asm-x86/io_apic.h
+++ b/include/asm-x86/io_apic.h
@@ -87,24 +87,8 @@ struct IO_APIC_route_entry {
 		mask		:  1,	/* 0: enabled, 1: disabled */
 		__reserved_2	: 15;
 
-#ifdef CONFIG_X86_32
-	union {
-		struct {
-			__u32	__reserved_1	: 24,
-				physical_dest	:  4,
-				__reserved_2	:  4;
-		} physical;
-
-		struct {
-			__u32	__reserved_1	: 24,
-				logical_dest	:  8;
-		} logical;
-	} dest;
-#else
 	__u32	__reserved_3	: 24,
 		dest		:  8;
-#endif
-
 } __attribute__ ((packed));
 
 struct IR_IO_APIC_route_entry {
-- 
GitLab


From 1d02519242c23450b043e5e8a9e3cb84a8666fe3 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:34 -0700
Subject: [PATCH 422/895] x86: ordering functions in io_apic_32.c

prepare for unification:

try to make functions be of the same order to io_apic_64.c.

v2: add calling setup_msi_irq back to arch_setup_msi_irq

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_32.c | 178 ++++++++++++++++++-----------------
 1 file changed, 94 insertions(+), 84 deletions(-)

diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 353e586822af..9531ef33362b 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -1029,23 +1029,6 @@ static int pin_2_irq(int idx, int apic, int pin)
 	return irq;
 }
 
-static inline int IO_APIC_irq_trigger(int irq)
-{
-	int apic, idx, pin;
-
-	for (apic = 0; apic < nr_ioapics; apic++) {
-		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-			idx = find_irq_entry(apic, pin, mp_INT);
-			if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
-				return irq_trigger(idx);
-		}
-	}
-	/*
-	 * nonexistent IRQs are edge default
-	 */
-	return 0;
-}
-
 void lock_vector_lock(void)
 {
 	/* Used to the online set of cpus does not change
@@ -1190,6 +1173,23 @@ static struct irq_chip ioapic_chip;
 #define IOAPIC_EDGE	0
 #define IOAPIC_LEVEL	1
 
+static inline int IO_APIC_irq_trigger(int irq)
+{
+	int apic, idx, pin;
+
+	for (apic = 0; apic < nr_ioapics; apic++) {
+		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+			idx = find_irq_entry(apic, pin, mp_INT);
+			if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
+				return irq_trigger(idx);
+		}
+	}
+	/*
+	 * nonexistent IRQs are edge default
+	 */
+	return 0;
+}
+
 static void ioapic_register_intr(int irq, unsigned long trigger)
 {
 	struct irq_desc *desc;
@@ -1926,55 +1926,6 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
 	return was_pending;
 }
 
-static void irq_complete_move(unsigned int irq);
-static void ack_ioapic_irq(unsigned int irq)
-{
-	irq_complete_move(irq);
-	move_native_irq(irq);
-	ack_APIC_irq();
-}
-
-static void ack_ioapic_quirk_irq(unsigned int irq)
-{
-	unsigned long v;
-	int i;
-
-	irq_complete_move(irq);
-	move_native_irq(irq);
-/*
- * It appears there is an erratum which affects at least version 0x11
- * of I/O APIC (that's the 82093AA and cores integrated into various
- * chipsets).  Under certain conditions a level-triggered interrupt is
- * erroneously delivered as edge-triggered one but the respective IRR
- * bit gets set nevertheless.  As a result the I/O unit expects an EOI
- * message but it will never arrive and further interrupts are blocked
- * from the source.  The exact reason is so far unknown, but the
- * phenomenon was observed when two consecutive interrupt requests
- * from a given source get delivered to the same CPU and the source is
- * temporarily disabled in between.
- *
- * A workaround is to simulate an EOI message manually.  We achieve it
- * by setting the trigger mode to edge and then to level when the edge
- * trigger mode gets detected in the TMR of a local APIC for a
- * level-triggered interrupt.  We mask the source for the time of the
- * operation to prevent an edge-triggered interrupt escaping meanwhile.
- * The idea is from Manfred Spraul.  --macro
- */
-	i = irq_cfg(irq)->vector;
-
-	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
-
-	ack_APIC_irq();
-
-	if (!(v & (1 << (i & 0x1f)))) {
-		atomic_inc(&irq_mis_count);
-		spin_lock(&ioapic_lock);
-		__mask_and_edge_IO_APIC_irq(irq);
-		__unmask_and_level_IO_APIC_irq(irq);
-		spin_unlock(&ioapic_lock);
-	}
-}
-
 static int ioapic_retrigger_irq(unsigned int irq)
 {
 	send_IPI_self(irq_cfg(irq)->vector);
@@ -2040,13 +1991,61 @@ static void irq_complete_move(unsigned int irq)
 static inline void irq_complete_move(unsigned int irq) {}
 #endif
 
+static void ack_apic_edge(unsigned int irq)
+{
+	irq_complete_move(irq);
+	move_native_irq(irq);
+	ack_APIC_irq();
+}
+
+static void ack_apic_level(unsigned int irq)
+{
+	unsigned long v;
+	int i;
+
+	irq_complete_move(irq);
+	move_native_irq(irq);
+/*
+ * It appears there is an erratum which affects at least version 0x11
+ * of I/O APIC (that's the 82093AA and cores integrated into various
+ * chipsets).  Under certain conditions a level-triggered interrupt is
+ * erroneously delivered as edge-triggered one but the respective IRR
+ * bit gets set nevertheless.  As a result the I/O unit expects an EOI
+ * message but it will never arrive and further interrupts are blocked
+ * from the source.  The exact reason is so far unknown, but the
+ * phenomenon was observed when two consecutive interrupt requests
+ * from a given source get delivered to the same CPU and the source is
+ * temporarily disabled in between.
+ *
+ * A workaround is to simulate an EOI message manually.  We achieve it
+ * by setting the trigger mode to edge and then to level when the edge
+ * trigger mode gets detected in the TMR of a local APIC for a
+ * level-triggered interrupt.  We mask the source for the time of the
+ * operation to prevent an edge-triggered interrupt escaping meanwhile.
+ * The idea is from Manfred Spraul.  --macro
+ */
+	i = irq_cfg(irq)->vector;
+
+	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
+
+	ack_APIC_irq();
+
+	if (!(v & (1 << (i & 0x1f)))) {
+		atomic_inc(&irq_mis_count);
+		spin_lock(&ioapic_lock);
+		__mask_and_edge_IO_APIC_irq(irq);
+		__unmask_and_level_IO_APIC_irq(irq);
+		spin_unlock(&ioapic_lock);
+	}
+}
+
 static struct irq_chip ioapic_chip __read_mostly = {
 	.name 		= "IO-APIC",
 	.startup 	= startup_ioapic_irq,
 	.mask	 	= mask_IO_APIC_irq,
 	.unmask	 	= unmask_IO_APIC_irq,
-	.ack 		= ack_ioapic_irq,
-	.eoi 		= ack_ioapic_quirk_irq,
+	.ack 		= ack_apic_edge,
+	.eoi 		= ack_apic_level,
 #ifdef CONFIG_SMP
 	.set_affinity 	= set_ioapic_affinity_irq,
 #endif
@@ -2094,11 +2093,6 @@ static inline void init_IO_APIC_traps(void)
  * The local APIC irq-chip implementation:
  */
 
-static void ack_lapic_irq(unsigned int irq)
-{
-	ack_APIC_irq();
-}
-
 static void mask_lapic_irq(unsigned int irq)
 {
 	unsigned long v;
@@ -2115,6 +2109,11 @@ static void unmask_lapic_irq(unsigned int irq)
 	apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
 }
 
+static void ack_lapic_irq(unsigned int irq)
+{
+	ack_APIC_irq();
+}
+
 static struct irq_chip lapic_chip __read_mostly = {
 	.name		= "local-APIC",
 	.mask		= mask_lapic_irq,
@@ -2636,13 +2635,31 @@ static struct irq_chip msi_chip = {
 	.name		= "PCI-MSI",
 	.unmask		= unmask_msi_irq,
 	.mask		= mask_msi_irq,
-	.ack		= ack_ioapic_irq,
+	.ack		= ack_apic_edge,
 #ifdef CONFIG_SMP
 	.set_affinity	= set_msi_irq_affinity,
 #endif
 	.retrigger	= ioapic_retrigger_irq,
 };
 
+
+static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
+{
+	int ret;
+	struct msi_msg msg;
+
+	ret = msi_compose_msg(dev, irq, &msg);
+	if (ret < 0)
+		return ret;
+
+	set_irq_msi(irq, desc);
+	write_msi_msg(irq, &msg);
+
+	set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
+
+	return 0;
+}
+
 static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
 {
 	unsigned int irq;
@@ -2657,7 +2674,6 @@ static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
 
 int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 {
-	struct msi_msg msg;
 	int irq, ret;
 
 	unsigned int irq_want;
@@ -2669,17 +2685,11 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 	if (irq == 0)
 		return -1;
 
-	ret = msi_compose_msg(dev, irq, &msg);
+	ret = setup_msi_irq(dev, desc, irq);
 	if (ret < 0) {
 		destroy_irq(irq);
 		return ret;
-	}
-
-	set_irq_msi(irq, desc);
-	write_msi_msg(irq, &msg);
-
-	set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq,
-				      "edge");
+        }
 
 	return 0;
 }
@@ -2738,7 +2748,7 @@ static struct irq_chip ht_irq_chip = {
 	.name		= "PCI-HT",
 	.mask		= mask_ht_irq,
 	.unmask		= unmask_ht_irq,
-	.ack		= ack_ioapic_irq,
+	.ack		= ack_apic_edge,
 #ifdef CONFIG_SMP
 	.set_affinity	= set_ht_irq_affinity,
 #endif
-- 
GitLab


From 8ea5371baa82db452a8d93e9977b418d30944e32 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:35 -0700
Subject: [PATCH 423/895] x86: ordering functions in io_apic_64.c

try to make functions have the same order between 32-bit and 64-bit.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_64.c | 67 ++++++++++++++++++------------------
 1 file changed, 33 insertions(+), 34 deletions(-)

diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 30d2e3811313..07e1e45ee026 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -409,40 +409,6 @@ static bool io_apic_level_ack_pending(unsigned int irq)
 	return false;
 }
 
-/*
- * Synchronize the IO-APIC and the CPU by doing
- * a dummy read from the IO-APIC
- */
-static inline void io_apic_sync(unsigned int apic)
-{
-	struct io_apic __iomem *io_apic = io_apic_base(apic);
-	readl(&io_apic->data);
-}
-
-#define __DO_ACTION(R, ACTION, FINAL)					\
-									\
-{									\
-	int pin;							\
-	struct irq_cfg *cfg;						\
-	struct irq_pin_list *entry;					\
-									\
-	cfg = irq_cfg(irq);						\
-	entry = cfg->irq_2_pin;						\
-	for (;;) {							\
-		unsigned int reg;					\
-		if (!entry)						\
-			break;						\
-		pin = entry->pin;					\
-		reg = io_apic_read(entry->apic, 0x10 + R + pin*2);	\
-		reg ACTION;						\
-		io_apic_modify(entry->apic, reg);			\
-		FINAL;							\
-		if (!entry->next)					\
-			break;						\
-		entry = entry->next;					\
-	}								\
-}
-
 union entry_union {
 	struct { u32 w1, w2; };
 	struct IO_APIC_route_entry entry;
@@ -627,6 +593,39 @@ static void __init replace_pin_at_irq(unsigned int irq,
 		add_pin_to_irq(irq, newapic, newpin);
 }
 
+/*
+ * Synchronize the IO-APIC and the CPU by doing
+ * a dummy read from the IO-APIC
+ */
+static inline void io_apic_sync(unsigned int apic)
+{
+	struct io_apic __iomem *io_apic = io_apic_base(apic);
+	readl(&io_apic->data);
+}
+
+#define __DO_ACTION(R, ACTION, FINAL)					\
+									\
+{									\
+	int pin;							\
+	struct irq_cfg *cfg;						\
+	struct irq_pin_list *entry;					\
+									\
+	cfg = irq_cfg(irq);						\
+	entry = cfg->irq_2_pin;						\
+	for (;;) {							\
+		unsigned int reg;					\
+		if (!entry)						\
+			break;						\
+		pin = entry->pin;					\
+		reg = io_apic_read(entry->apic, 0x10 + R + pin*2);	\
+		reg ACTION;						\
+		io_apic_modify(entry->apic, reg);			\
+		FINAL;							\
+		if (!entry->next)					\
+			break;						\
+		entry = entry->next;					\
+	}								\
+}
 
 #define DO_ACTION(name,R,ACTION, FINAL)					\
 									\
-- 
GitLab


From efa2559f65167989f1893cb065e3126d4f13ba60 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:36 -0700
Subject: [PATCH 424/895] x86: order variables in io_apic_xx.c

move first_system_vector to apic_64.c.

also add #ifdef CONFIG_INTR_REMAP to prepare 32 bit to use
same file.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_64.c    |   5 ++
 arch/x86/kernel/io_apic_32.c |  79 ++++++++---------
 arch/x86/kernel/io_apic_64.c | 160 +++++++++++++++++++----------------
 3 files changed, 127 insertions(+), 117 deletions(-)

diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 94ddb69ae15e..4d7a188025b3 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -33,6 +33,7 @@
 #include <asm/smp.h>
 #include <asm/mtrr.h>
 #include <asm/mpspec.h>
+#include <asm/desc.h>
 #include <asm/hpet.h>
 #include <asm/pgalloc.h>
 #include <asm/nmi.h>
@@ -59,6 +60,10 @@ int x2apic_preenabled;
 int local_apic_timer_c2_ok;
 EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
 
+int first_system_vector = 0xfe;
+
+char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
+
 /*
  * Debug level, exported for io_apic.c
  */
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 9531ef33362b..3010bdd3352d 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -54,24 +54,22 @@
 
 #define __apicdebuginit(type) static type __init
 
-int (*ioapic_renumber_irq)(int ioapic, int irq);
-atomic_t irq_mis_count;
-
-/* Where if anywhere is the i8259 connect in external int mode */
-static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
-
-static DEFINE_SPINLOCK(ioapic_lock);
-static DEFINE_SPINLOCK(vector_lock);
-
-int timer_through_8259 __initdata;
-
 /*
  *	Is the SiS APIC rmw bug present ?
  *	-1 = don't know, 0 = no, 1 = yes
  */
 int sis_apic_bug = -1;
 
+static DEFINE_SPINLOCK(ioapic_lock);
+static DEFINE_SPINLOCK(vector_lock);
+
 int first_free_entry;
+/*
+ * Rough estimation of how many shared IRQs there are, can
+ * be changed anytime.
+ */
+int pin_map_size;
+
 /*
  * # of IRQ routing registers
  */
@@ -93,7 +91,15 @@ int mp_bus_id_to_type[MAX_MP_BUSSES];
 
 DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
 
-static int disable_timer_pin_1 __initdata;
+int skip_ioapic_setup;
+
+static int __init parse_noapic(char *arg)
+{
+	/* disable IO-APIC */
+	disable_ioapic_setup();
+	return 0;
+}
+early_param("noapic", parse_noapic);
 
 struct irq_cfg;
 struct irq_pin_list;
@@ -268,13 +274,6 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
 	return cfg;
 }
 
-static int assign_irq_vector(int irq, cpumask_t mask);
-/*
- * Rough estimation of how many shared IRQs there are, can
- * be changed anytime.
- */
-int pin_map_size;
-
 /*
  * This is performance-critical, we want to do it O(1)
  *
@@ -465,6 +464,9 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
 		entry = entry->next;
 	}
 }
+
+static int assign_irq_vector(int irq, cpumask_t mask);
+
 static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 {
 	struct irq_cfg *cfg;
@@ -677,7 +679,6 @@ void send_IPI_self(int vector)
 #define MAX_PIRQS 8
 static int pirq_entries [MAX_PIRQS];
 static int pirqs_enabled;
-int skip_ioapic_setup;
 
 static int __init ioapic_pirq_setup(char *str)
 {
@@ -981,6 +982,7 @@ static inline int irq_trigger(int idx)
 	return MPBIOS_trigger(idx);
 }
 
+int (*ioapic_renumber_irq)(int ioapic, int irq);
 static int pin_2_irq(int idx, int apic, int pin)
 {
 	int irq, i;
@@ -1621,6 +1623,9 @@ __apicdebuginit(int) print_all_ICs(void)
 fs_initcall(print_all_ICs);
 
 
+/* Where if anywhere is the i8259 connect in external int mode */
+static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
+
 static void __init enable_IO_APIC(void)
 {
 	union IO_APIC_reg_01 reg_01;
@@ -1998,6 +2003,7 @@ static void ack_apic_edge(unsigned int irq)
 	ack_APIC_irq();
 }
 
+atomic_t irq_mis_count;
 static void ack_apic_level(unsigned int irq)
 {
 	unsigned long v;
@@ -2208,6 +2214,17 @@ static inline void __init unlock_ExtINT_logic(void)
 	ioapic_write_entry(apic, pin, entry0);
 }
 
+static int disable_timer_pin_1 __initdata;
+
+static int __init parse_disable_timer_pin_1(char *arg)
+{
+	disable_timer_pin_1 = 1;
+	return 0;
+}
+early_param("disable_timer_pin_1", parse_disable_timer_pin_1);
+
+int timer_through_8259 __initdata;
+
 /*
  * This code may look a bit paranoid, but it's supposed to cooperate with
  * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
@@ -2983,28 +3000,6 @@ void __init setup_ioapic_dest(void)
 }
 #endif
 
-static int __init parse_disable_timer_pin_1(char *arg)
-{
-	disable_timer_pin_1 = 1;
-	return 0;
-}
-early_param("disable_timer_pin_1", parse_disable_timer_pin_1);
-
-static int __init parse_enable_timer_pin_1(char *arg)
-{
-	disable_timer_pin_1 = -1;
-	return 0;
-}
-early_param("enable_timer_pin_1", parse_enable_timer_pin_1);
-
-static int __init parse_noapic(char *arg)
-{
-	/* disable IO-APIC */
-	disable_ioapic_setup();
-	return 0;
-}
-early_param("noapic", parse_noapic);
-
 void __init ioapic_init_mappings(void)
 {
 	unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 07e1e45ee026..847aa7987645 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -57,6 +57,47 @@
 
 #define __apicdebuginit(type) static type __init
 
+int ioapic_force;
+
+int sis_apic_bug; /* not actually supported, dummy for compile */
+
+static DEFINE_SPINLOCK(ioapic_lock);
+static DEFINE_SPINLOCK(vector_lock);
+
+int first_free_entry;
+/*
+ * Rough estimation of how many shared IRQs there are, can
+ * be changed anytime.
+ */
+int pin_map_size;
+
+/*
+ * # of IRQ routing registers
+ */
+int nr_ioapic_registers[MAX_IO_APICS];
+
+/* I/O APIC entries */
+struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
+int nr_ioapics;
+
+/* MP IRQ source entries */
+struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+
+/* # of MP IRQ source entries */
+int mp_irq_entries;
+
+DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
+
+int skip_ioapic_setup;
+
+static int __init parse_noapic(char *str)
+{
+	disable_ioapic_setup();
+	return 0;
+}
+early_param("noapic", parse_noapic);
+
+
 struct irq_cfg;
 struct irq_pin_list;
 struct irq_cfg {
@@ -228,53 +269,6 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
 	return cfg;
 }
 
-static int assign_irq_vector(int irq, cpumask_t mask);
-
-int first_system_vector = 0xfe;
-
-char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
-
-int sis_apic_bug; /* not actually supported, dummy for compile */
-
-static int no_timer_check;
-
-static int disable_timer_pin_1 __initdata;
-
-int timer_through_8259 __initdata;
-
-/* Where if anywhere is the i8259 connect in external int mode */
-static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
-
-static DEFINE_SPINLOCK(ioapic_lock);
-static DEFINE_SPINLOCK(vector_lock);
-
-/*
- * # of IRQ routing registers
- */
-int nr_ioapic_registers[MAX_IO_APICS];
-
-/* I/O APIC RTE contents at the OS boot up */
-struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
-
-/* I/O APIC entries */
-struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
-int nr_ioapics;
-
-/* MP IRQ source entries */
-struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
-
-/* # of MP IRQ source entries */
-int mp_irq_entries;
-
-DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
-
-/*
- * Rough estimation of how many shared IRQs there are, can
- * be changed anytime.
- */
-
-int pin_map_size;
-
 /*
  * This is performance-critical, we want to do it O(1)
  *
@@ -481,12 +475,16 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
 
 		apic = entry->apic;
 		pin = entry->pin;
+#ifdef CONFIG_INTR_REMAP
 		/*
 		 * With interrupt-remapping, destination information comes
 		 * from interrupt-remapping table entry.
 		 */
 		if (!irq_remapped(irq))
 			io_apic_write(apic, 0x11 + pin*2, dest);
+#else
+		io_apic_write(apic, 0x11 + pin*2, dest);
+#endif
 		reg = io_apic_read(apic, 0x10 + pin*2);
 		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
 		reg |= vector;
@@ -497,6 +495,8 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
 	}
 }
 
+static int assign_irq_vector(int irq, cpumask_t mask);
+
 static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 {
 	struct irq_cfg *cfg = irq_cfg(irq);
@@ -533,7 +533,6 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
  * shared ISA-space IRQs, so we have to support them. We are super
  * fast in the common case, and fast for shared ISA-space IRQs.
  */
-int first_free_entry;
 static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 {
 	struct irq_cfg *cfg;
@@ -679,6 +678,10 @@ static void clear_IO_APIC (void)
 			clear_IO_APIC_pin(apic, pin);
 }
 
+#ifdef CONFIG_INTR_REMAP
+/* I/O APIC RTE contents at the OS boot up */
+static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
+
 /*
  * Saves and masks all the unmasked IO-APIC RTE's
  */
@@ -741,25 +744,7 @@ void reinit_intr_remapped_IO_APIC(int intr_remapping)
 	 */
 	restore_IO_APIC_setup();
 }
-
-int skip_ioapic_setup;
-int ioapic_force;
-
-static int __init parse_noapic(char *str)
-{
-	disable_ioapic_setup();
-	return 0;
-}
-early_param("noapic", parse_noapic);
-
-/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
-static int __init disable_timer_pin_setup(char *arg)
-{
-	disable_timer_pin_1 = 1;
-	return 1;
-}
-__setup("disable_timer_pin_1", disable_timer_pin_setup);
-
+#endif
 
 /*
  * Find the IRQ entry number of a certain pin.
@@ -1327,8 +1312,10 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
 {
 	struct IO_APIC_route_entry entry;
 
+#ifdef CONFIG_INTR_REMAP
 	if (intr_remapping_enabled)
 		return;
+#endif
 
 	memset(&entry, 0, sizeof(entry));
 
@@ -1601,6 +1588,9 @@ __apicdebuginit(int) print_all_ICs(void)
 fs_initcall(print_all_ICs);
 
 
+/* Where if anywhere is the i8259 connect in external int mode */
+static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
+
 void __init enable_IO_APIC(void)
 {
 	union IO_APIC_reg_01 reg_01;
@@ -1695,6 +1685,15 @@ void disable_IO_APIC(void)
 	disconnect_bsp_APIC(ioapic_i8259.pin != -1);
 }
 
+static int no_timer_check;
+
+static int __init notimercheck(char *s)
+{
+	no_timer_check = 1;
+	return 1;
+}
+__setup("no_timer_check", notimercheck);
+
 /*
  * There is a nasty bug in some older SMP boards, their mptable lies
  * about the timer IRQ. We do the following to work around the situation:
@@ -1708,6 +1707,9 @@ static int __init timer_irq_works(void)
 	unsigned long t1 = jiffies;
 	unsigned long flags;
 
+	if (no_timer_check)
+		return 1;
+
 	local_save_flags(flags);
 	local_irq_enable();
 	/* Let ten ticks pass... */
@@ -2239,6 +2241,17 @@ static inline void __init unlock_ExtINT_logic(void)
 	ioapic_write_entry(apic, pin, entry0);
 }
 
+static int disable_timer_pin_1 __initdata;
+/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
+static int __init disable_timer_pin_setup(char *arg)
+{
+	disable_timer_pin_1 = 1;
+	return 0;
+}
+early_param("disable_timer_pin_1", disable_timer_pin_setup);
+
+int timer_through_8259 __initdata;
+
 /*
  * This code may look a bit paranoid, but it's supposed to cooperate with
  * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
@@ -2286,8 +2299,10 @@ static inline void __init check_timer(void)
 	 * 8259A.
 	 */
 	if (pin1 == -1) {
+#ifdef CONFIG_INTR_REMAP
 		if (intr_remapping_enabled)
 			panic("BIOS bug: timer not connected to IO-APIC");
+#endif
 		pin1 = pin2;
 		apic1 = apic2;
 		no_pin1 = 1;
@@ -2305,7 +2320,7 @@ static inline void __init check_timer(void)
 			setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
 		}
 		unmask_IO_APIC_irq(0);
-		if (!no_timer_check && timer_irq_works()) {
+		if (timer_irq_works()) {
 			if (nmi_watchdog == NMI_IO_APIC) {
 				setup_nmi();
 				enable_8259A_irq(0);
@@ -2314,8 +2329,10 @@ static inline void __init check_timer(void)
 				clear_IO_APIC_pin(0, pin1);
 			goto out;
 		}
+#ifdef CONFIG_INTR_REMAP
 		if (intr_remapping_enabled)
 			panic("timer doesn't work through Interrupt-remapped IO-APIC");
+#endif
 		clear_IO_APIC_pin(apic1, pin1);
 		if (!no_pin1)
 			apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -2391,13 +2408,6 @@ out:
 	local_irq_restore(flags);
 }
 
-static int __init notimercheck(char *s)
-{
-	no_timer_check = 1;
-	return 1;
-}
-__setup("no_timer_check", notimercheck);
-
 /*
  * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
  * to devices.  However there may be an I/O APIC pin available for
-- 
GitLab


From e955b5398b660a204854bdff059d050b44090879 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 19 Aug 2008 20:50:37 -0700
Subject: [PATCH 425/895] sparseirq: fix lockdep

-tip testing found this lockdep splat:

[    0.000000] Initializing CPU#0
[    0.000000] found new irq_desc for irq 0
[    0.000000] INFO: trying to register non-static key.
[    0.000000] the code is fine but needs lockdep annotation.
[    0.000000] turning off the locking correctness validator.
[    0.000000] Pid: 0, comm: swapper Not tainted 2.6.27-rc3-tip-00191-g98ccb89-dirty #1
[    0.000000]  [<c0153c22>] register_lock_class+0x3d2/0x400
[    0.000000]  [<c0104d87>] ? mcount_call+0x5/0xa
[    0.000000]  [<c0154f3a>] __lock_acquire+0x22a/0x5d0
[    0.000000]  [<c0104d87>] ? mcount_call+0x5/0xa
[    0.000000]  [<c0155351>] lock_acquire+0x71/0xa0
[    0.000000]  [<c016d61f>] ? set_irq_chip+0x3f/0x90
[    0.000000]  [<c070f148>] _spin_lock_irqsave+0x58/0x90
[    0.000000]  [<c016d61f>] ? set_irq_chip+0x3f/0x90
[    0.000000]  [<c016d61f>] set_irq_chip+0x3f/0x90
[    0.000000]  [<c016d7e0>] ? handle_level_irq+0x0/0xe0
[    0.000000]  [<c016da1a>] set_irq_chip_and_handler_name+0x1a/0x40
[    0.000000]  [<c0a396c1>] init_ISA_irqs+0x51/0xa0
[    0.000000]  [<c0a4a365>] pre_intr_init_hook+0x25/0x30
[    0.000000]  [<c0a39723>] native_init_IRQ+0x13/0x370
[    0.000000]  [<c015569c>] ? lock_release+0xcc/0x1d0
[    0.000000]  [<c0104d87>] ? mcount_call+0x5/0xa
[    0.000000]  [<c070dc22>] ? __mutex_unlock_slowpath+0x92/0x110
[    0.000000]  [<c070dcad>] ? mutex_unlock+0xd/0x10
[    0.000000]  [<c0135f62>] ? cpu_maps_update_done+0x12/0x20
[    0.000000]  [<c06c6743>] ? register_cpu_notifier+0x23/0x30
[    0.000000]  [<c011e8ae>] init_IRQ+0xe/0x10
[    0.000000]  [<c0a357a5>] start_kernel+0x1c5/0x340
[    0.000000]  [<c0a35280>] ? unknown_bootoption+0x0/0x210
[    0.000000]  [<c0a3506b>] i386_start_kernel+0x6b/0x80
[    0.000000]  =======================
[    0.000000] found new irq_desc for irq 1
[    0.000000] found new irq_desc for irq 2
[    0.000000] found new irq_desc for irq 3

this:

 static void init_one_irq_desc(struct irq_desc *desc)
 {
         memcpy(desc, &irq_desc_init, sizeof(struct irq_desc));
 #ifdef CONFIG_TRACE_IRQFLAGS
         lockdep_set_class(&desc->lock, &irq_desc_lock_class);
 #endif
 }

should be unconditional.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/irq/handle.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 6d174390f3a0..24c83a3cee4d 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -18,13 +18,10 @@
 
 #include "internals.h"
 
-#ifdef CONFIG_TRACE_IRQFLAGS
-
 /*
  * lockdep: we want to handle all irq_desc locks as a single lock-class:
  */
 static struct lock_class_key irq_desc_lock_class;
-#endif
 
 /**
  * handle_bad_irq - handle spurious and unhandled irqs
@@ -75,9 +72,7 @@ static struct irq_desc irq_desc_init = {
 static void init_one_irq_desc(struct irq_desc *desc)
 {
 	memcpy(desc, &irq_desc_init, sizeof(struct irq_desc));
-#ifdef CONFIG_TRACE_IRQFLAGS
 	lockdep_set_class(&desc->lock, &irq_desc_lock_class);
-#endif
 }
 
 extern int after_bootmem;
-- 
GitLab


From d4057bdb6a3bb85dd44f9f39f41eac53696fd637 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:38 -0700
Subject: [PATCH 426/895] x86: make headers files the same in io_apic_xx.c

also make no_timer_check to be global on 64 bit, because vmi_32 is using that.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_32.c | 15 ++++++++++++---
 arch/x86/kernel/io_apic_64.c | 12 +++++++++---
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 3010bdd3352d..48184e126f2c 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -25,28 +25,37 @@
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/sched.h>
-#include <linux/bootmem.h>
+#include <linux/pci.h>
 #include <linux/mc146818rtc.h>
 #include <linux/compiler.h>
 #include <linux/acpi.h>
 #include <linux/module.h>
 #include <linux/sysdev.h>
-#include <linux/pci.h>
 #include <linux/msi.h>
 #include <linux/htirq.h>
 #include <linux/freezer.h>
 #include <linux/kthread.h>
-#include <linux/jiffies.h>	/* time_after() */
+#include <linux/jiffies.h>      /* time_after() */
+#ifdef CONFIG_ACPI
+#include <acpi/acpi_bus.h>
+#endif
+#include <linux/bootmem.h>
+#include <linux/dmar.h>
 
+#include <asm/idle.h>
 #include <asm/io.h>
 #include <asm/smp.h>
 #include <asm/desc.h>
+#include <asm/proto.h>
+#include <asm/acpi.h>
+#include <asm/dma.h>
 #include <asm/timer.h>
 #include <asm/i8259.h>
 #include <asm/nmi.h>
 #include <asm/msidef.h>
 #include <asm/hypertransport.h>
 #include <asm/setup.h>
+#include <asm/irq_remapping.h>
 
 #include <mach_ipi.h>
 #include <mach_apic.h>
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 847aa7987645..3c66e5a8e899 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -27,12 +27,15 @@
 #include <linux/sched.h>
 #include <linux/pci.h>
 #include <linux/mc146818rtc.h>
+#include <linux/compiler.h>
 #include <linux/acpi.h>
+#include <linux/module.h>
 #include <linux/sysdev.h>
 #include <linux/msi.h>
 #include <linux/htirq.h>
-#include <linux/dmar.h>
-#include <linux/jiffies.h>
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+#include <linux/jiffies.h>	/* time_after() */
 #ifdef CONFIG_ACPI
 #include <acpi/acpi_bus.h>
 #endif
@@ -46,14 +49,17 @@
 #include <asm/proto.h>
 #include <asm/acpi.h>
 #include <asm/dma.h>
+#include <asm/timer.h>
 #include <asm/i8259.h>
 #include <asm/nmi.h>
 #include <asm/msidef.h>
 #include <asm/hypertransport.h>
+#include <asm/setup.h>
 #include <asm/irq_remapping.h>
 
 #include <mach_ipi.h>
 #include <mach_apic.h>
+#include <mach_apicdef.h>
 
 #define __apicdebuginit(type) static type __init
 
@@ -1685,7 +1691,7 @@ void disable_IO_APIC(void)
 	disconnect_bsp_APIC(ioapic_i8259.pin != -1);
 }
 
-static int no_timer_check;
+int no_timer_check __initdata;
 
 static int __init notimercheck(char *s)
 {
-- 
GitLab


From f876d213a59c363d2492e399cc6c24edd6f3c368 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:39 -0700
Subject: [PATCH 427/895] x86: make 64 handle sis_apic_bug like the 32 bit

do we have 64bit system with sis chipset?

[ mingo@elte.hu: nope, the problem chipset was 32-bit only.
                 The code symmetry is good nevertheless. ]

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_64.c | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 3c66e5a8e899..915af9b87aa4 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -65,7 +65,11 @@
 
 int ioapic_force;
 
-int sis_apic_bug; /* not actually supported, dummy for compile */
+/*
+ *      Is the SiS APIC rmw bug present ?
+ *      -1 = don't know, 0 = no, 1 = yes
+ */
+int sis_apic_bug = -1;
 
 static DEFINE_SPINLOCK(ioapic_lock);
 static DEFINE_SPINLOCK(vector_lock);
@@ -373,9 +377,11 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i
  * Re-write a value: to be used for read-modify-write
  * cycles where the read already set up the index register.
  */
-static inline void io_apic_modify(unsigned int apic, unsigned int value)
+static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
 {
 	struct io_apic __iomem *io_apic = io_apic_base(apic);
+        if (sis_apic_bug)
+                writel(reg, &io_apic->index);
 	writel(value, &io_apic->data);
 }
 
@@ -494,7 +500,7 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
 		reg = io_apic_read(apic, 0x10 + pin*2);
 		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
 		reg |= vector;
-		io_apic_modify(apic, reg);
+		io_apic_modify(apic, 0x10 + pin*2, reg);
 		if (!entry->next)
 			break;
 		entry = entry->next;
@@ -624,7 +630,7 @@ static inline void io_apic_sync(unsigned int apic)
 		pin = entry->pin;					\
 		reg = io_apic_read(entry->apic, 0x10 + R + pin*2);	\
 		reg ACTION;						\
-		io_apic_modify(entry->apic, reg);			\
+		io_apic_modify(entry->apic, 0x10 + R + pin*2, reg);	\
 		FINAL;							\
 		if (!entry->next)					\
 			break;						\
@@ -2450,6 +2456,20 @@ void __init setup_IO_APIC(void)
 	check_timer();
 }
 
+/*
+ *      Called after all the initialization is done. If we didnt find any
+ *      APIC bugs then we can allow the modify fast path
+ */
+
+static int __init io_apic_bug_finalize(void)
+{
+        if (sis_apic_bug == -1)
+                sis_apic_bug = 0;
+        return 0;
+}
+
+late_initcall(io_apic_bug_finalize);
+
 struct sysfs_ioapic_data {
 	struct sys_device dev;
 	struct IO_APIC_route_entry entry[0];
-- 
GitLab


From aa45f97b1bb40adae1288669e73350907ffae85e Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:40 -0700
Subject: [PATCH 428/895] x86: remove ioapic_force

no user left.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_64.c    | 1 -
 arch/x86/kernel/io_apic_64.c | 2 --
 include/asm-x86/apic.h       | 2 --
 3 files changed, 5 deletions(-)

diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 4d7a188025b3..cd860856a0b7 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -1813,7 +1813,6 @@ static int __init apic_set_verbosity(char *arg)
 	if (!arg)  {
 #ifdef CONFIG_X86_64
 		skip_ioapic_setup = 0;
-		ioapic_force = 1;
 		return 0;
 #endif
 		return -EINVAL;
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 915af9b87aa4..b70fd8232185 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -63,8 +63,6 @@
 
 #define __apicdebuginit(type) static type __init
 
-int ioapic_force;
-
 /*
  *      Is the SiS APIC rmw bug present ?
  *      -1 = don't know, 0 = no, 1 = yes
diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index d76a0839abe9..2d970f6bc2a1 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h
@@ -40,8 +40,6 @@ extern void generic_apic_probe(void);
 extern unsigned int apic_verbosity;
 extern int local_apic_timer_c2_ok;
 
-extern int ioapic_force;
-
 extern int disable_apic;
 /*
  * Basic functions accessing APICs.
-- 
GitLab


From 047c8fdb8718890e3340073b178d0859d0c7f91f Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:41 -0700
Subject: [PATCH 429/895] x86: make io_apic_64.c and io_apic_32.c the same

all the same except INTR_REMAPPING related and ioapic io resource.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_32.c | 213 ++++++++++-
 arch/x86/kernel/io_apic_64.c | 663 +++++++++++++++++++++++++++++++++--
 2 files changed, 829 insertions(+), 47 deletions(-)

diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 48184e126f2c..3ed36041c81e 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -123,7 +123,6 @@ struct irq_cfg {
 	u8 move_in_progress : 1;
 };
 
-
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
 static struct irq_cfg irq_cfg_legacy[] __initdata = {
 	[0]  = { .irq =  0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
@@ -391,6 +390,38 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned
 	writel(value, &io_apic->data);
 }
 
+#ifdef CONFIG_X86_64
+static bool io_apic_level_ack_pending(unsigned int irq)
+{
+	struct irq_pin_list *entry;
+	unsigned long flags;
+	struct irq_cfg *cfg = irq_cfg(irq);
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	entry = cfg->irq_2_pin;
+	for (;;) {
+		unsigned int reg;
+		int pin;
+
+		if (!entry)
+			break;
+		pin = entry->pin;
+		reg = io_apic_read(entry->apic, 0x10 + pin*2);
+		/* Is the remote IRR bit set? */
+		if (reg & IO_APIC_REDIR_REMOTE_IRR) {
+			spin_unlock_irqrestore(&ioapic_lock, flags);
+			return true;
+		}
+		if (!entry->next)
+			break;
+		entry = entry->next;
+	}
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	return false;
+}
+#endif
+
 union entry_union {
 	struct { u32 w1, w2; };
 	struct IO_APIC_route_entry entry;
@@ -483,17 +514,15 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 	unsigned int dest;
 	cpumask_t tmp;
 
-	cfg = irq_cfg(irq);
-
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
 		return;
 
+	cfg = irq_cfg(irq);
 	if (assign_irq_vector(irq, mask))
 		return;
 
 	cpus_and(tmp, cfg->domain, mask);
-
 	dest = cpu_mask_to_apicid(tmp);
 	/*
 	 * Only the high 8 bits are valid.
@@ -572,6 +601,54 @@ static void __init replace_pin_at_irq(unsigned int irq,
 		add_pin_to_irq(irq, newapic, newpin);
 }
 
+#ifdef CONFIG_X86_64
+/*
+ * Synchronize the IO-APIC and the CPU by doing
+ * a dummy read from the IO-APIC
+ */
+static inline void io_apic_sync(unsigned int apic)
+{
+	struct io_apic __iomem *io_apic = io_apic_base(apic);
+	readl(&io_apic->data);
+}
+
+#define __DO_ACTION(R, ACTION, FINAL)					\
+									\
+{									\
+	int pin;							\
+	struct irq_cfg *cfg;						\
+	struct irq_pin_list *entry;					\
+									\
+	cfg = irq_cfg(irq);						\
+	entry = cfg->irq_2_pin;						\
+	for (;;) {							\
+		unsigned int reg;					\
+		if (!entry)						\
+			break;						\
+		pin = entry->pin;					\
+		reg = io_apic_read(entry->apic, 0x10 + R + pin*2);	\
+		reg ACTION;						\
+		io_apic_modify(entry->apic, 0x10 + R + pin*2, reg);	\
+		FINAL;							\
+		if (!entry->next)					\
+			break;						\
+		entry = entry->next;					\
+	}								\
+}
+
+#define DO_ACTION(name,R,ACTION, FINAL)					\
+									\
+	static void name##_IO_APIC_irq (unsigned int irq)		\
+	__DO_ACTION(R, ACTION, FINAL)
+
+/* mask = 1 */
+DO_ACTION(__mask,	0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic))
+
+/* mask = 0 */
+DO_ACTION(__unmask,	0, &= ~IO_APIC_REDIR_MASKED, )
+
+#else
+
 static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable)
 {
 	struct irq_cfg *cfg;
@@ -620,6 +697,8 @@ static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
 				IO_APIC_REDIR_MASKED);
 }
 
+#endif
+
 static void mask_IO_APIC_irq(unsigned int irq)
 {
 	unsigned long flags;
@@ -1055,6 +1134,17 @@ void unlock_vector_lock(void)
 
 static int __assign_irq_vector(int irq, cpumask_t mask)
 {
+	/*
+	 * NOTE! The local APIC isn't very good at handling
+	 * multiple interrupts at the same interrupt level.
+	 * As the interrupt level is determined by taking the
+	 * vector number and shifting that right by 4, we
+	 * want to spread these out a bit so that they don't
+	 * all fall in the same interrupt level.
+	 *
+	 * Also, we've got to be careful not to trash gate
+	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
+	 */
         static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
         unsigned int old_vector;
         int cpu;
@@ -1095,9 +1185,13 @@ next:
                 }
                 if (unlikely(current_vector == vector))
                         continue;
-		if (vector == SYSCALL_VECTOR)
+#ifdef CONFIG_X86_64
+                if (vector == IA32_SYSCALL_VECTOR)
                         goto next;
-
+#else
+                if (vector == SYSCALL_VECTOR)
+                        goto next;
+#endif
                 for_each_cpu_mask_nr(new_cpu, new_mask)
                         if (per_cpu(vector_irq, new_cpu)[vector] != -1)
                                 goto next;
@@ -1184,6 +1278,7 @@ static struct irq_chip ioapic_chip;
 #define IOAPIC_EDGE	0
 #define IOAPIC_LEVEL	1
 
+#ifdef CONFIG_X86_32
 static inline int IO_APIC_irq_trigger(int irq)
 {
 	int apic, idx, pin;
@@ -1200,6 +1295,12 @@ static inline int IO_APIC_irq_trigger(int irq)
 	 */
 	return 0;
 }
+#else
+static inline int IO_APIC_irq_trigger(int irq)
+{
+        return 1;
+}
+#endif
 
 static void ioapic_register_intr(int irq, unsigned long trigger)
 {
@@ -1212,15 +1313,18 @@ static void ioapic_register_intr(int irq, unsigned long trigger)
 		desc = irq_to_desc_alloc(irq);
 
 	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
-	    trigger == IOAPIC_LEVEL) {
+	    trigger == IOAPIC_LEVEL)
 		desc->status |= IRQ_LEVEL;
+	else
+		desc->status &= ~IRQ_LEVEL;
+
+	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+	    trigger == IOAPIC_LEVEL)
 		set_irq_chip_and_handler_name(irq, &ioapic_chip,
 					 handle_fasteoi_irq, "fasteoi");
-	} else {
-		desc->status &= ~IRQ_LEVEL;
+	else
 		set_irq_chip_and_handler_name(irq, &ioapic_chip,
 					 handle_edge_irq, "edge");
-	}
 }
 
 static int setup_ioapic_entry(int apic, int irq,
@@ -1662,7 +1766,6 @@ static void __init enable_IO_APIC(void)
 			struct IO_APIC_route_entry entry;
 			entry = ioapic_read_entry(apic, pin);
 
-
 			/* If the interrupt line is enabled and in ExtInt mode
 			 * I have found the pin where the i8259 is connected.
 			 */
@@ -2012,6 +2115,60 @@ static void ack_apic_edge(unsigned int irq)
 	ack_APIC_irq();
 }
 
+#ifdef CONFIG_X86_64
+static void ack_apic_level(unsigned int irq)
+{
+        int do_unmask_irq = 0;
+
+        irq_complete_move(irq);
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+        /* If we are moving the irq we need to mask it */
+        if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
+                do_unmask_irq = 1;
+                mask_IO_APIC_irq(irq);
+        }
+#endif
+
+        /*
+         * We must acknowledge the irq before we move it or the acknowledge will
+         * not propagate properly.
+         */
+        ack_APIC_irq();
+
+        /* Now we can move and renable the irq */
+        if (unlikely(do_unmask_irq)) {
+                /* Only migrate the irq if the ack has been received.
+                 *
+                 * On rare occasions the broadcast level triggered ack gets
+                 * delayed going to ioapics, and if we reprogram the
+                 * vector while Remote IRR is still set the irq will never
+                 * fire again.
+                 *
+                 * To prevent this scenario we read the Remote IRR bit
+                 * of the ioapic.  This has two effects.
+                 * - On any sane system the read of the ioapic will
+                 *   flush writes (and acks) going to the ioapic from
+                 *   this cpu.
+                 * - We get to see if the ACK has actually been delivered.
+                 *
+                 * Based on failed experiments of reprogramming the
+                 * ioapic entry from outside of irq context starting
+                 * with masking the ioapic entry and then polling until
+                 * Remote IRR was clear before reprogramming the
+                 * ioapic I don't trust the Remote IRR bit to be
+                 * completey accurate.
+                 *
+                 * However there appears to be no other way to plug
+                 * this race, so if the Remote IRR bit is not
+                 * accurate and is causing problems then it is a hardware bug
+                 * and you can go talk to the chipset vendor about it.
+                 */
+                if (!io_apic_level_ack_pending(irq))
+                        move_masked_irq(irq, desc);
+                unmask_IO_APIC_irq(irq);
+        }
+}
+#else
 atomic_t irq_mis_count;
 static void ack_apic_level(unsigned int irq)
 {
@@ -2053,6 +2210,7 @@ static void ack_apic_level(unsigned int irq)
 		spin_unlock(&ioapic_lock);
 	}
 }
+#endif
 
 static struct irq_chip ioapic_chip __read_mostly = {
 	.name 		= "IO-APIC",
@@ -2224,7 +2382,7 @@ static inline void __init unlock_ExtINT_logic(void)
 }
 
 static int disable_timer_pin_1 __initdata;
-
+/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
 static int __init parse_disable_timer_pin_1(char *arg)
 {
 	disable_timer_pin_1 = 1;
@@ -2244,9 +2402,9 @@ static inline void __init check_timer(void)
 {
 	struct irq_cfg *cfg = irq_cfg(0);
 	int apic1, pin1, apic2, pin2;
-	int no_pin1 = 0;
-	unsigned int ver;
 	unsigned long flags;
+	unsigned int ver;
+	int no_pin1 = 0;
 
 	local_irq_save(flags);
 
@@ -2550,6 +2708,7 @@ unsigned int create_irq_nr(unsigned int irq_want)
 		cfg_new = irq_cfg(new);
 		if (cfg_new && cfg_new->vector != 0)
 			continue;
+		/* check if need to create one */
 		if (!cfg_new)
 			cfg_new = irq_cfg_alloc(new);
 		if (__assign_irq_vector(new, TARGET_CPUS) == 0)
@@ -2720,6 +2879,32 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 	return 0;
 }
 
+int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+        unsigned int irq;
+        int ret, sub_handle;
+        struct msi_desc *desc;
+        unsigned int irq_want;
+
+        irq_want = build_irq_for_pci_dev(dev) + 0x100;
+        sub_handle = 0;
+        list_for_each_entry(desc, &dev->msi_list, list) {
+                irq = create_irq_nr(irq_want--);
+                if (irq == 0)
+                        return -1;
+                ret = setup_msi_irq(dev, desc, irq);
+                if (ret < 0)
+                        goto error;
+                sub_handle++;
+        }
+        return 0;
+
+error:
+        destroy_irq(irq);
+        return ret;
+}
+
+
 void arch_teardown_msi_irq(unsigned int irq)
 {
 	destroy_irq(irq);
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index b70fd8232185..940c4167b325 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -94,18 +94,22 @@ struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
 /* # of MP IRQ source entries */
 int mp_irq_entries;
 
+#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
+int mp_bus_id_to_type[MAX_MP_BUSSES];
+#endif
+
 DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
 
 int skip_ioapic_setup;
 
 static int __init parse_noapic(char *str)
 {
+	/* disable IO-APIC */
 	disable_ioapic_setup();
 	return 0;
 }
 early_param("noapic", parse_noapic);
 
-
 struct irq_cfg;
 struct irq_pin_list;
 struct irq_cfg {
@@ -374,6 +378,8 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i
 /*
  * Re-write a value: to be used for read-modify-write
  * cycles where the read already set up the index register.
+ *
+ * Older SiS APIC requires we rewrite the index register
  */
 static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
 {
@@ -383,6 +389,7 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned
 	writel(value, &io_apic->data);
 }
 
+#ifdef CONFIG_X86_64
 static bool io_apic_level_ack_pending(unsigned int irq)
 {
 	struct irq_pin_list *entry;
@@ -412,6 +419,7 @@ static bool io_apic_level_ack_pending(unsigned int irq)
 
 	return false;
 }
+#endif
 
 union entry_union {
 	struct { u32 w1, w2; };
@@ -509,7 +517,7 @@ static int assign_irq_vector(int irq, cpumask_t mask);
 
 static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 {
-	struct irq_cfg *cfg = irq_cfg(irq);
+	struct irq_cfg *cfg;
 	unsigned long flags;
 	unsigned int dest;
 	cpumask_t tmp;
@@ -519,12 +527,12 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 	if (cpus_empty(tmp))
 		return;
 
+	cfg = irq_cfg(irq);
 	if (assign_irq_vector(irq, mask))
 		return;
 
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
-
 	/*
 	 * Only the high 8 bits are valid.
 	 */
@@ -536,7 +544,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 	desc->affinity = mask;
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
-#endif
+#endif /* CONFIG_SMP */
 
 /*
  * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
@@ -602,6 +610,7 @@ static void __init replace_pin_at_irq(unsigned int irq,
 		add_pin_to_irq(irq, newapic, newpin);
 }
 
+#ifdef CONFIG_X86_64
 /*
  * Synchronize the IO-APIC and the CPU by doing
  * a dummy read from the IO-APIC
@@ -647,6 +656,58 @@ DO_ACTION(__mask,	0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic))
 /* mask = 0 */
 DO_ACTION(__unmask,	0, &= ~IO_APIC_REDIR_MASKED, )
 
+#else
+
+static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable)
+{
+	struct irq_cfg *cfg;
+	struct irq_pin_list *entry;
+	unsigned int pin, reg;
+
+	cfg = irq_cfg(irq);
+	entry = cfg->irq_2_pin;
+	for (;;) {
+		if (!entry)
+			break;
+		pin = entry->pin;
+		reg = io_apic_read(entry->apic, 0x10 + pin*2);
+		reg &= ~disable;
+		reg |= enable;
+		io_apic_modify(entry->apic, 0x10 + pin*2, reg);
+		if (!entry->next)
+			break;
+		entry = entry->next;
+	}
+}
+
+/* mask = 1 */
+static void __mask_IO_APIC_irq(unsigned int irq)
+{
+	__modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0);
+}
+
+/* mask = 0 */
+static void __unmask_IO_APIC_irq(unsigned int irq)
+{
+	__modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED);
+}
+
+/* mask = 1, trigger = 0 */
+static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
+{
+	__modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED,
+				IO_APIC_REDIR_LEVEL_TRIGGER);
+}
+
+/* mask = 0, trigger = 1 */
+static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
+{
+	__modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER,
+				IO_APIC_REDIR_MASKED);
+}
+
+#endif
+
 static void mask_IO_APIC_irq (unsigned int irq)
 {
 	unsigned long flags;
@@ -688,6 +749,64 @@ static void clear_IO_APIC (void)
 			clear_IO_APIC_pin(apic, pin);
 }
 
+#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32)
+void send_IPI_self(int vector)
+{
+	unsigned int cfg;
+
+	/*
+	 * Wait for idle.
+	 */
+	apic_wait_icr_idle();
+	cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
+	/*
+	 * Send the IPI. The write to APIC_ICR fires this off.
+	 */
+	apic_write(APIC_ICR, cfg);
+}
+#endif /* !CONFIG_SMP && CONFIG_X86_32*/
+
+#ifdef CONFIG_X86_32
+/*
+ * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
+ * specific CPU-side IRQs.
+ */
+
+#define MAX_PIRQS 8
+static int pirq_entries [MAX_PIRQS];
+static int pirqs_enabled;
+
+static int __init ioapic_pirq_setup(char *str)
+{
+	int i, max;
+	int ints[MAX_PIRQS+1];
+
+	get_options(str, ARRAY_SIZE(ints), ints);
+
+	for (i = 0; i < MAX_PIRQS; i++)
+		pirq_entries[i] = -1;
+
+	pirqs_enabled = 1;
+	apic_printk(APIC_VERBOSE, KERN_INFO
+			"PIRQ redirection, working around broken MP-BIOS.\n");
+	max = MAX_PIRQS;
+	if (ints[0] < MAX_PIRQS)
+		max = ints[0];
+
+	for (i = 0; i < max; i++) {
+		apic_printk(APIC_VERBOSE, KERN_DEBUG
+				"... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
+		/*
+		 * PIRQs are mapped upside down, usually.
+		 */
+		pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
+	}
+	return 1;
+}
+
+__setup("pirq=", ioapic_pirq_setup);
+#endif /* CONFIG_X86_32 */
+
 #ifdef CONFIG_INTR_REMAP
 /* I/O APIC RTE contents at the OS boot up */
 static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
@@ -861,18 +980,51 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 	return best_guess;
 }
 
+EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
+
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
+/*
+ * EISA Edge/Level control register, ELCR
+ */
+static int EISA_ELCR(unsigned int irq)
+{
+	if (irq < 16) {
+		unsigned int port = 0x4d0 + (irq >> 3);
+		return (inb(port) >> (irq & 7)) & 1;
+	}
+	apic_printk(APIC_VERBOSE, KERN_INFO
+			"Broken MPtable reports ISA irq %d\n", irq);
+	return 0;
+}
+
+#endif
+
 /* ISA interrupts are always polarity zero edge triggered,
  * when listed as conforming in the MP table. */
 
 #define default_ISA_trigger(idx)	(0)
 #define default_ISA_polarity(idx)	(0)
 
+/* EISA interrupts are always polarity zero and can be edge or level
+ * trigger depending on the ELCR value.  If an interrupt is listed as
+ * EISA conforming in the MP table, that means its trigger type must
+ * be read in from the ELCR */
+
+#define default_EISA_trigger(idx)	(EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
+#define default_EISA_polarity(idx)	default_ISA_polarity(idx)
+
 /* PCI interrupts are always polarity one level triggered,
  * when listed as conforming in the MP table. */
 
 #define default_PCI_trigger(idx)	(1)
 #define default_PCI_polarity(idx)	(1)
 
+/* MCA interrupts are always polarity zero level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_MCA_trigger(idx)	(1)
+#define default_MCA_polarity(idx)	default_ISA_polarity(idx)
+
 static int MPBIOS_polarity(int idx)
 {
 	int bus = mp_irqs[idx].mp_srcbus;
@@ -930,6 +1082,36 @@ static int MPBIOS_trigger(int idx)
 				trigger = default_ISA_trigger(idx);
 			else
 				trigger = default_PCI_trigger(idx);
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
+			switch (mp_bus_id_to_type[bus]) {
+				case MP_BUS_ISA: /* ISA pin */
+				{
+					/* set before the switch */
+					break;
+				}
+				case MP_BUS_EISA: /* EISA pin */
+				{
+					trigger = default_EISA_trigger(idx);
+					break;
+				}
+				case MP_BUS_PCI: /* PCI pin */
+				{
+					/* set before the switch */
+					break;
+				}
+				case MP_BUS_MCA: /* MCA pin */
+				{
+					trigger = default_MCA_trigger(idx);
+					break;
+				}
+				default:
+				{
+					printk(KERN_WARNING "broken BIOS!!\n");
+					trigger = 1;
+					break;
+				}
+			}
+#endif
 			break;
 		case 1: /* edge */
 		{
@@ -967,6 +1149,7 @@ static inline int irq_trigger(int idx)
 	return MPBIOS_trigger(idx);
 }
 
+int (*ioapic_renumber_irq)(int ioapic, int irq);
 static int pin_2_irq(int idx, int apic, int pin)
 {
 	int irq, i;
@@ -988,7 +1171,32 @@ static int pin_2_irq(int idx, int apic, int pin)
 		while (i < apic)
 			irq += nr_ioapic_registers[i++];
 		irq += pin;
+                /*
+                 * For MPS mode, so far only needed by ES7000 platform
+                 */
+                if (ioapic_renumber_irq)
+                        irq = ioapic_renumber_irq(apic, irq);
 	}
+
+#ifdef CONFIG_X86_32
+	/*
+	 * PCI IRQ command line redirection. Yes, limits are hardcoded.
+	 */
+	if ((pin >= 16) && (pin <= 23)) {
+		if (pirq_entries[pin-16] != -1) {
+			if (!pirq_entries[pin-16]) {
+				apic_printk(APIC_VERBOSE, KERN_DEBUG
+						"disabling PIRQ%d\n", pin-16);
+			} else {
+				irq = pirq_entries[pin-16];
+				apic_printk(APIC_VERBOSE, KERN_DEBUG
+						"using PIRQ%d -> IRQ %d\n",
+						pin-16, irq);
+			}
+		}
+	}
+#endif
+
 	return irq;
 }
 
@@ -1058,8 +1266,13 @@ next:
 		}
 		if (unlikely(current_vector == vector))
 			continue;
+#ifdef CONFIG_X86_64
 		if (vector == IA32_SYSCALL_VECTOR)
 			goto next;
+#else
+		if (vector == SYSCALL_VECTOR)
+			goto next;
+#endif
 		for_each_cpu_mask_nr(new_cpu, new_mask)
 			if (per_cpu(vector_irq, new_cpu)[vector] != -1)
 				goto next;
@@ -1140,6 +1353,34 @@ static struct irq_chip ioapic_chip;
 static struct irq_chip ir_ioapic_chip;
 #endif
 
+#define IOAPIC_AUTO     -1
+#define IOAPIC_EDGE     0
+#define IOAPIC_LEVEL    1
+
+#ifdef CONFIG_X86_32
+static inline int IO_APIC_irq_trigger(int irq)
+{
+        int apic, idx, pin;
+
+        for (apic = 0; apic < nr_ioapics; apic++) {
+                for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+                        idx = find_irq_entry(apic, pin, mp_INT);
+                        if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
+                                return irq_trigger(idx);
+                }
+        }
+        /*
+         * nonexistent IRQs are edge default
+         */
+        return 0;
+}
+#else
+static inline int IO_APIC_irq_trigger(int irq)
+{
+	return 1;
+}
+#endif
+
 static void ioapic_register_intr(int irq, unsigned long trigger)
 {
 	struct irq_desc *desc;
@@ -1150,7 +1391,8 @@ static void ioapic_register_intr(int irq, unsigned long trigger)
 	else
 		desc = irq_to_desc_alloc(irq);
 
-	if (trigger)
+	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+	    trigger == IOAPIC_LEVEL)
 		desc->status |= IRQ_LEVEL;
 	else
 		desc->status &= ~IRQ_LEVEL;
@@ -1168,7 +1410,8 @@ static void ioapic_register_intr(int irq, unsigned long trigger)
 		return;
 	}
 #endif
-	if (trigger)
+	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+	    trigger == IOAPIC_LEVEL)
 		set_irq_chip_and_handler_name(irq, &ioapic_chip,
 					      handle_fasteoi_irq,
 					      "fasteoi");
@@ -1303,6 +1546,10 @@ static void __init setup_IO_APIC_irqs(void)
 		}
 
 		irq = pin_2_irq(idx, apic, pin);
+#ifdef CONFIG_X86_32
+                if (multi_timer_check(apic, irq))
+                        continue;
+#endif
 		add_pin_to_irq(irq, apic, pin);
 
 		setup_IO_APIC_irq(apic, pin, irq,
@@ -1360,6 +1607,7 @@ __apicdebuginit(void) print_IO_APIC(void)
 	union IO_APIC_reg_00 reg_00;
 	union IO_APIC_reg_01 reg_01;
 	union IO_APIC_reg_02 reg_02;
+	union IO_APIC_reg_03 reg_03;
 	unsigned long flags;
 	struct irq_cfg *cfg;
 
@@ -1384,6 +1632,8 @@ __apicdebuginit(void) print_IO_APIC(void)
 	reg_01.raw = io_apic_read(apic, 1);
 	if (reg_01.bits.version >= 0x10)
 		reg_02.raw = io_apic_read(apic, 2);
+        if (reg_01.bits.version >= 0x20)
+                reg_03.raw = io_apic_read(apic, 3);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 
 	printk("\n");
@@ -1399,11 +1649,27 @@ __apicdebuginit(void) print_IO_APIC(void)
 	printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
 	printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.bits.version);
 
-	if (reg_01.bits.version >= 0x10) {
+	/*
+	 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
+	 * but the value of reg_02 is read as the previous read register
+	 * value, so ignore it if reg_02 == reg_01.
+	 */
+	if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
 		printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
 		printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
 	}
 
+	/*
+	 * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
+	 * or reg_03, but the value of reg_0[23] is read as the previous read
+	 * register value, so ignore it if reg_03 == reg_0[12].
+	 */
+	if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
+	    reg_03.raw != reg_01.raw) {
+		printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
+		printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
+	}
+
 	printk(KERN_DEBUG ".... IRQ redirection table:\n");
 
 	printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
@@ -1475,7 +1741,7 @@ __apicdebuginit(void) print_APIC_bitfield(int base)
 __apicdebuginit(void) print_local_APIC(void *dummy)
 {
 	unsigned int v, ver, maxlvt;
-	unsigned long icr;
+	u64 icr;
 
 	if (apic_verbosity == APIC_QUIET)
 		return;
@@ -1492,11 +1758,13 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
 	v = apic_read(APIC_TASKPRI);
 	printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
 
-	v = apic_read(APIC_ARBPRI);
-	printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
-		v & APIC_ARBPRI_MASK);
-	v = apic_read(APIC_PROCPRI);
-	printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
+	if (APIC_INTEGRATED(ver)) {                     /* !82489DX */
+		v = apic_read(APIC_ARBPRI);
+		printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
+			v & APIC_ARBPRI_MASK);
+		v = apic_read(APIC_PROCPRI);
+		printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
+	}
 
 	v = apic_read(APIC_EOI);
 	printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
@@ -1516,8 +1784,13 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
 	printk(KERN_DEBUG "... APIC IRR field:\n");
 	print_APIC_bitfield(APIC_IRR);
 
-	v = apic_read(APIC_ESR);
-	printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
+	if (APIC_INTEGRATED(ver)) {             /* !82489DX */
+		if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
+			apic_write(APIC_ESR, 0);
+
+		v = apic_read(APIC_ESR);
+		printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
+	}
 
 	icr = apic_icr_read();
 	printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
@@ -1608,6 +1881,13 @@ void __init enable_IO_APIC(void)
 	int apic;
 	unsigned long flags;
 
+#ifdef CONFIG_X86_32
+	int i;
+	if (!pirqs_enabled)
+		for (i = 0; i < MAX_PIRQS; i++)
+			pirq_entries[i] = -1;
+#endif
+
 	/*
 	 * The number of IO-APIC IRQ registers (== #pins):
 	 */
@@ -1636,6 +1916,10 @@ void __init enable_IO_APIC(void)
 	}
  found_i8259:
 	/* Look to see what if the MP table has reported the ExtINT */
+	/* If we could not find the appropriate pin by looking at the ioapic
+	 * the i8259 probably is not connected the ioapic but give the
+	 * mptable a chance anyway.
+	 */
 	i8259_pin  = find_isa_irq_pin(0, mp_ExtINT);
 	i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
 	/* Trust the MP table if nothing is setup in the hardware */
@@ -1695,6 +1979,122 @@ void disable_IO_APIC(void)
 	disconnect_bsp_APIC(ioapic_i8259.pin != -1);
 }
 
+#ifdef CONFIG_X86_32
+/*
+ * function to set the IO-APIC physical IDs based on the
+ * values stored in the MPC table.
+ *
+ * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
+ */
+
+static void __init setup_ioapic_ids_from_mpc(void)
+{
+	union IO_APIC_reg_00 reg_00;
+	physid_mask_t phys_id_present_map;
+	int apic;
+	int i;
+	unsigned char old_id;
+	unsigned long flags;
+
+	if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
+		return;
+
+	/*
+	 * Don't check I/O APIC IDs for xAPIC systems.  They have
+	 * no meaning without the serial APIC bus.
+	 */
+	if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+		|| APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+		return;
+	/*
+	 * This is broken; anything with a real cpu count has to
+	 * circumvent this idiocy regardless.
+	 */
+	phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
+
+	/*
+	 * Set the IOAPIC ID to the value stored in the MPC table.
+	 */
+	for (apic = 0; apic < nr_ioapics; apic++) {
+
+		/* Read the register 0 value */
+		spin_lock_irqsave(&ioapic_lock, flags);
+		reg_00.raw = io_apic_read(apic, 0);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+
+		old_id = mp_ioapics[apic].mp_apicid;
+
+		if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
+			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
+				apic, mp_ioapics[apic].mp_apicid);
+			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+				reg_00.bits.ID);
+			mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
+		}
+
+		/*
+		 * Sanity check, is the ID really free? Every APIC in a
+		 * system must have a unique ID or we get lots of nice
+		 * 'stuck on smp_invalidate_needed IPI wait' messages.
+		 */
+		if (check_apicid_used(phys_id_present_map,
+					mp_ioapics[apic].mp_apicid)) {
+			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
+				apic, mp_ioapics[apic].mp_apicid);
+			for (i = 0; i < get_physical_broadcast(); i++)
+				if (!physid_isset(i, phys_id_present_map))
+					break;
+			if (i >= get_physical_broadcast())
+				panic("Max APIC ID exceeded!\n");
+			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+				i);
+			physid_set(i, phys_id_present_map);
+			mp_ioapics[apic].mp_apicid = i;
+		} else {
+			physid_mask_t tmp;
+			tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
+			apic_printk(APIC_VERBOSE, "Setting %d in the "
+					"phys_id_present_map\n",
+					mp_ioapics[apic].mp_apicid);
+			physids_or(phys_id_present_map, phys_id_present_map, tmp);
+		}
+
+
+		/*
+		 * We need to adjust the IRQ routing table
+		 * if the ID changed.
+		 */
+		if (old_id != mp_ioapics[apic].mp_apicid)
+			for (i = 0; i < mp_irq_entries; i++)
+				if (mp_irqs[i].mp_dstapic == old_id)
+					mp_irqs[i].mp_dstapic
+						= mp_ioapics[apic].mp_apicid;
+
+		/*
+		 * Read the right value from the MPC table and
+		 * write it into the ID register.
+		 */
+		apic_printk(APIC_VERBOSE, KERN_INFO
+			"...changing IO-APIC physical APIC ID to %d ...",
+			mp_ioapics[apic].mp_apicid);
+
+		reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
+		spin_lock_irqsave(&ioapic_lock, flags);
+
+		/*
+		 * Sanity check
+		 */
+		spin_lock_irqsave(&ioapic_lock, flags);
+		reg_00.raw = io_apic_read(apic, 0);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+		if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
+			printk("could not set ID!\n");
+		else
+			apic_printk(APIC_VERBOSE, " ok.\n");
+	}
+}
+#endif
+
 int no_timer_check __initdata;
 
 static int __init notimercheck(char *s)
@@ -1780,8 +2180,10 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
 	return was_pending;
 }
 
+#ifdef CONFIG_X86_64
 static int ioapic_retrigger_irq(unsigned int irq)
 {
+
 	struct irq_cfg *cfg = irq_cfg(irq);
 	unsigned long flags;
 
@@ -1791,6 +2193,14 @@ static int ioapic_retrigger_irq(unsigned int irq)
 
 	return 1;
 }
+#else
+static int ioapic_retrigger_irq(unsigned int irq)
+{
+        send_IPI_self(irq_cfg(irq)->vector);
+
+        return 1;
+}
+#endif
 
 /*
  * Level and edge triggered IO-APIC interrupts need different handling,
@@ -1952,7 +2362,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
 {
 	unsigned vector, me;
 	ack_APIC_irq();
+#ifdef CONFIG_X86_64
 	exit_idle();
+#endif
 	irq_enter();
 
 	me = smp_processor_id();
@@ -2024,6 +2436,7 @@ static void ack_apic_edge(unsigned int irq)
 	ack_APIC_irq();
 }
 
+#ifdef CONFIG_X86_64
 static void ack_apic_level(unsigned int irq)
 {
 	int do_unmask_irq = 0;
@@ -2076,6 +2489,49 @@ static void ack_apic_level(unsigned int irq)
 		unmask_IO_APIC_irq(irq);
 	}
 }
+#else
+atomic_t irq_mis_count;
+static void ack_apic_level(unsigned int irq)
+{
+	unsigned long v;
+	int i;
+
+	irq_complete_move(irq);
+	move_native_irq(irq);
+	/*
+	* It appears there is an erratum which affects at least version 0x11
+	* of I/O APIC (that's the 82093AA and cores integrated into various
+	* chipsets).  Under certain conditions a level-triggered interrupt is
+	* erroneously delivered as edge-triggered one but the respective IRR
+	* bit gets set nevertheless.  As a result the I/O unit expects an EOI
+	* message but it will never arrive and further interrupts are blocked
+	* from the source.  The exact reason is so far unknown, but the
+	* phenomenon was observed when two consecutive interrupt requests
+	* from a given source get delivered to the same CPU and the source is
+	* temporarily disabled in between.
+	*
+	* A workaround is to simulate an EOI message manually.  We achieve it
+	* by setting the trigger mode to edge and then to level when the edge
+	* trigger mode gets detected in the TMR of a local APIC for a
+	* level-triggered interrupt.  We mask the source for the time of the
+	* operation to prevent an edge-triggered interrupt escaping meanwhile.
+	* The idea is from Manfred Spraul.  --macro
+	*/
+	i = irq_cfg(irq)->vector;
+
+	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
+
+	ack_APIC_irq();
+
+	if (!(v & (1 << (i & 0x1f)))) {
+		atomic_inc(&irq_mis_count);
+		spin_lock(&ioapic_lock);
+		__mask_and_edge_IO_APIC_irq(irq);
+		__unmask_and_level_IO_APIC_irq(irq);
+		spin_unlock(&ioapic_lock);
+	}
+}
+#endif
 
 static struct irq_chip ioapic_chip __read_mostly = {
 	.name 		= "IO-APIC",
@@ -2141,20 +2597,24 @@ static inline void init_IO_APIC_traps(void)
 	}
 }
 
-static void unmask_lapic_irq(unsigned int irq)
+/*
+ * The local APIC irq-chip implementation:
+ */
+
+static void mask_lapic_irq(unsigned int irq)
 {
 	unsigned long v;
 
 	v = apic_read(APIC_LVT0);
-	apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
+	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
 }
 
-static void mask_lapic_irq(unsigned int irq)
+static void unmask_lapic_irq(unsigned int irq)
 {
 	unsigned long v;
 
 	v = apic_read(APIC_LVT0);
-	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
+	apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
 }
 
 static void ack_lapic_irq (unsigned int irq)
@@ -2182,19 +2642,19 @@ static void lapic_register_intr(int irq)
 static void __init setup_nmi(void)
 {
 	/*
- 	 * Dirty trick to enable the NMI watchdog ...
+	 * Dirty trick to enable the NMI watchdog ...
 	 * We put the 8259A master into AEOI mode and
 	 * unmask on all local APICs LVT0 as NMI.
 	 *
 	 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
 	 * is from Maciej W. Rozycki - so we do not have to EOI from
 	 * the NMI handler or the timer interrupt.
-	 */ 
-	printk(KERN_INFO "activating NMI Watchdog ...");
+	 */
+	apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
 
 	enable_NMI_through_LVT0();
 
-	printk(" done.\n");
+	apic_printk(APIC_VERBOSE, " done.\n");
 }
 
 /*
@@ -2211,12 +2671,17 @@ static inline void __init unlock_ExtINT_logic(void)
 	unsigned char save_control, save_freq_select;
 
 	pin  = find_isa_irq_pin(8, mp_INT);
+	if (pin == -1) {
+		WARN_ON_ONCE(1);
+		return;
+	}
 	apic = find_isa_irq_apic(8, mp_INT);
-	if (pin == -1)
+	if (apic == -1) {
+		WARN_ON_ONCE(1);
 		return;
+	}
 
 	entry0 = ioapic_read_entry(apic, pin);
-
 	clear_IO_APIC_pin(apic, pin);
 
 	memset(&entry1, 0, sizeof(entry1));
@@ -2268,17 +2733,21 @@ int timer_through_8259 __initdata;
  * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
  * fanatically on his truly buggy board.
  *
- * FIXME: really need to revamp this for modern platforms only.
+ * FIXME: really need to revamp this for all platforms.
  */
 static inline void __init check_timer(void)
 {
 	struct irq_cfg *cfg = irq_cfg(0);
 	int apic1, pin1, apic2, pin2;
 	unsigned long flags;
+	unsigned int ver;
 	int no_pin1 = 0;
 
 	local_irq_save(flags);
 
+        ver = apic_read(APIC_LVR);
+        ver = GET_APIC_VERSION(ver);
+
 	/*
 	 * get/set the timer IRQ vector:
 	 */
@@ -2287,10 +2756,18 @@ static inline void __init check_timer(void)
 
 	/*
 	 * As IRQ0 is to be enabled in the 8259A, the virtual
-	 * wire has to be disabled in the local APIC.
+	 * wire has to be disabled in the local APIC.  Also
+	 * timer interrupts need to be acknowledged manually in
+	 * the 8259A for the i82489DX when using the NMI
+	 * watchdog as that APIC treats NMIs as level-triggered.
+	 * The AEOI mode will finish them in the 8259A
+	 * automatically.
 	 */
 	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
 	init_8259A(1);
+#ifdef CONFIG_X86_32
+	timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
+#endif
 
 	pin1  = find_isa_irq_pin(0, mp_INT);
 	apic1 = find_isa_irq_apic(0, mp_INT);
@@ -2382,6 +2859,9 @@ static inline void __init check_timer(void)
 			    "through the IO-APIC - disabling NMI Watchdog!\n");
 		nmi_watchdog = NMI_NONE;
 	}
+#ifdef CONFIG_X86_32
+	timer_ack = 0;
+#endif
 
 	apic_printk(APIC_QUIET, KERN_INFO
 		    "...trying to set up timer as Virtual Wire IRQ...\n");
@@ -2435,19 +2915,29 @@ out:
  * the I/O APIC in all cases now.  No actual device should request
  * it anyway.  --macro
  */
-#define PIC_IRQS	(1<<2)
+#define PIC_IRQS	(1 << PIC_CASCADE_IR)
 
 void __init setup_IO_APIC(void)
 {
 
+#ifdef CONFIG_X86_32
+	enable_IO_APIC();
+#else
 	/*
 	 * calling enable_IO_APIC() is moved to setup_local_APIC for BP
 	 */
+#endif
 
 	io_apic_irqs = ~PIC_IRQS;
 
 	apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
-
+        /*
+         * Set up IO-APIC IRQ routing.
+         */
+#ifdef CONFIG_X86_32
+        if (!acpi_ioapic)
+                setup_ioapic_ids_from_mpc();
+#endif
 	sync_Arb_IDs();
 	setup_IO_APIC_irqs();
 	init_IO_APIC_traps();
@@ -3128,7 +3618,93 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 
 #ifdef CONFIG_ACPI
 
-#define IO_APIC_MAX_ID		0xFE
+#ifdef CONFIG_X86_32
+int __init io_apic_get_unique_id(int ioapic, int apic_id)
+{
+	union IO_APIC_reg_00 reg_00;
+	static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
+	physid_mask_t tmp;
+	unsigned long flags;
+	int i = 0;
+
+	/*
+	 * The P4 platform supports up to 256 APIC IDs on two separate APIC
+	 * buses (one for LAPICs, one for IOAPICs), where predecessors only
+	 * supports up to 16 on one shared APIC bus.
+	 *
+	 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
+	 *      advantage of new APIC bus architecture.
+	 */
+
+	if (physids_empty(apic_id_map))
+		apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	reg_00.raw = io_apic_read(ioapic, 0);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	if (apic_id >= get_physical_broadcast()) {
+		printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
+			"%d\n", ioapic, apic_id, reg_00.bits.ID);
+		apic_id = reg_00.bits.ID;
+	}
+
+	/*
+	 * Every APIC in a system must have a unique ID or we get lots of nice
+	 * 'stuck on smp_invalidate_needed IPI wait' messages.
+	 */
+	if (check_apicid_used(apic_id_map, apic_id)) {
+
+		for (i = 0; i < get_physical_broadcast(); i++) {
+			if (!check_apicid_used(apic_id_map, i))
+				break;
+		}
+
+		if (i == get_physical_broadcast())
+			panic("Max apic_id exceeded!\n");
+
+		printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
+			"trying %d\n", ioapic, apic_id, i);
+
+		apic_id = i;
+	}
+
+	tmp = apicid_to_cpu_present(apic_id);
+	physids_or(apic_id_map, apic_id_map, tmp);
+
+	if (reg_00.bits.ID != apic_id) {
+		reg_00.bits.ID = apic_id;
+
+		spin_lock_irqsave(&ioapic_lock, flags);
+		io_apic_write(ioapic, 0, reg_00.raw);
+		reg_00.raw = io_apic_read(ioapic, 0);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+
+		/* Sanity check */
+		if (reg_00.bits.ID != apic_id) {
+			printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
+			return -1;
+		}
+	}
+
+	apic_printk(APIC_VERBOSE, KERN_INFO
+			"IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
+
+	return apic_id;
+}
+
+int __init io_apic_get_version(int ioapic)
+{
+	union IO_APIC_reg_01	reg_01;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	reg_01.raw = io_apic_read(ioapic, 1);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	return reg_01.bits.version;
+}
+#endif
 
 int __init io_apic_get_redir_entries (int ioapic)
 {
@@ -3226,6 +3802,7 @@ void __init setup_ioapic_dest(void)
 }
 #endif
 
+#ifdef CONFIG_X86_64
 #define IOAPIC_RESOURCE_NAME_SIZE 11
 
 static struct resource *ioapic_resources;
@@ -3261,36 +3838,56 @@ static struct resource * __init ioapic_setup_resources(void)
 
 	return res;
 }
+#endif
 
 void __init ioapic_init_mappings(void)
 {
 	unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
-	struct resource *ioapic_res;
 	int i;
+#ifdef CONFIG_X86_64
+	struct resource *ioapic_res;
 
 	ioapic_res = ioapic_setup_resources();
+#endif
 	for (i = 0; i < nr_ioapics; i++) {
 		if (smp_found_config) {
 			ioapic_phys = mp_ioapics[i].mp_apicaddr;
+#ifdef CONFIG_X86_32
+                        if (!ioapic_phys) {
+                                printk(KERN_ERR
+                                       "WARNING: bogus zero IO-APIC "
+                                       "address found in MPTABLE, "
+                                       "disabling IO/APIC support!\n");
+                                smp_found_config = 0;
+                                skip_ioapic_setup = 1;
+                                goto fake_ioapic_page;
+                        }
+#endif
 		} else {
+#ifdef CONFIG_X86_32
+fake_ioapic_page:
+#endif
 			ioapic_phys = (unsigned long)
 				alloc_bootmem_pages(PAGE_SIZE);
 			ioapic_phys = __pa(ioapic_phys);
 		}
 		set_fixmap_nocache(idx, ioapic_phys);
 		apic_printk(APIC_VERBOSE,
-			    "mapped IOAPIC to %016lx (%016lx)\n",
+			    "mapped IOAPIC to %08lx (%08lx)\n",
 			    __fix_to_virt(idx), ioapic_phys);
 		idx++;
 
+#ifdef CONFIG_X86_64
 		if (ioapic_res != NULL) {
 			ioapic_res->start = ioapic_phys;
 			ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
 			ioapic_res++;
 		}
+#endif
 	}
 }
 
+#ifdef CONFIG_X86_64
 static int __init ioapic_insert_resources(void)
 {
 	int i;
@@ -3313,4 +3910,4 @@ static int __init ioapic_insert_resources(void)
 /* Insert the IO APIC resources after PCI initialization has occured to handle
  * IO APICS that are mapped in on a BAR in PCI space. */
 late_initcall(ioapic_insert_resources);
-
+#endif
-- 
GitLab


From 54168ed7f2a4f3fc2780e645124ae952598da601 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 20 Aug 2008 09:07:45 +0200
Subject: [PATCH 430/895] x86: make io_apic_32.c the same as io_apic_64.c

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_32.c | 1521 ++++++++++++++++++++++++----------
 1 file changed, 1104 insertions(+), 417 deletions(-)

diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 3ed36041c81e..fba6d6ee3480 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -35,7 +35,7 @@
 #include <linux/htirq.h>
 #include <linux/freezer.h>
 #include <linux/kthread.h>
-#include <linux/jiffies.h>      /* time_after() */
+#include <linux/jiffies.h>	/* time_after() */
 #ifdef CONFIG_ACPI
 #include <acpi/acpi_bus.h>
 #endif
@@ -64,8 +64,8 @@
 #define __apicdebuginit(type) static type __init
 
 /*
- *	Is the SiS APIC rmw bug present ?
- *	-1 = don't know, 0 = no, 1 = yes
+ *      Is the SiS APIC rmw bug present ?
+ *      -1 = don't know, 0 = no, 1 = yes
  */
 int sis_apic_bug = -1;
 
@@ -102,7 +102,7 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
 
 int skip_ioapic_setup;
 
-static int __init parse_noapic(char *arg)
+static int __init parse_noapic(char *str)
 {
 	/* disable IO-APIC */
 	disable_ioapic_setup();
@@ -188,7 +188,7 @@ static void __init init_work(void *data)
 	irq_cfgx[legacy_count - 1].next = NULL;
 }
 
-#define for_each_irq_cfg(cfg)           \
+#define for_each_irq_cfg(cfg)		\
 	for (cfg = irq_cfgx; cfg; cfg = cfg->next)
 
 DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work);
@@ -262,7 +262,6 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
 		irq_cfgx = cfg;
 	cfg->irq = irq;
 	printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq);
-
 #ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG
 	{
 		/* dump the results */
@@ -384,9 +383,9 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i
  */
 static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
 {
-	volatile struct io_apic __iomem *io_apic = io_apic_base(apic);
-	if (sis_apic_bug)
-		writel(reg, &io_apic->index);
+	struct io_apic __iomem *io_apic = io_apic_base(apic);
+        if (sis_apic_bug)
+                writel(reg, &io_apic->index);
 	writel(value, &io_apic->data);
 }
 
@@ -494,11 +493,20 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
 
 		apic = entry->apic;
 		pin = entry->pin;
+#ifdef CONFIG_INTR_REMAP
+		/*
+		 * With interrupt-remapping, destination information comes
+		 * from interrupt-remapping table entry.
+		 */
+		if (!irq_remapped(irq))
+			io_apic_write(apic, 0x11 + pin*2, dest);
+#else
 		io_apic_write(apic, 0x11 + pin*2, dest);
+#endif
 		reg = io_apic_read(apic, 0x10 + pin*2);
 		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
 		reg |= vector;
-		io_apic_modify(apic, 0x10 + pin *2, reg);
+		io_apic_modify(apic, 0x10 + pin*2, reg);
 		if (!entry->next)
 			break;
 		entry = entry->next;
@@ -513,6 +521,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 	unsigned long flags;
 	unsigned int dest;
 	cpumask_t tmp;
+	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
@@ -529,12 +538,12 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 	 */
 	dest = SET_APIC_LOGICAL_ID(dest);
 
+	desc = irq_to_desc(irq);
 	spin_lock_irqsave(&ioapic_lock, flags);
 	__target_IO_APIC_irq(irq, dest, cfg->vector);
-	irq_to_desc(irq)->affinity = mask;
+	desc->affinity = mask;
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
-
 #endif /* CONFIG_SMP */
 
 /*
@@ -699,7 +708,7 @@ static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
 
 #endif
 
-static void mask_IO_APIC_irq(unsigned int irq)
+static void mask_IO_APIC_irq (unsigned int irq)
 {
 	unsigned long flags;
 
@@ -708,7 +717,7 @@ static void mask_IO_APIC_irq(unsigned int irq)
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
-static void unmask_IO_APIC_irq(unsigned int irq)
+static void unmask_IO_APIC_irq (unsigned int irq)
 {
 	unsigned long flags;
 
@@ -725,14 +734,13 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
 	entry = ioapic_read_entry(apic, pin);
 	if (entry.delivery_mode == dest_SMI)
 		return;
-
 	/*
 	 * Disable it in the IO-APIC irq-routing table:
 	 */
 	ioapic_mask_entry(apic, pin);
 }
 
-static void clear_IO_APIC(void)
+static void clear_IO_APIC (void)
 {
 	int apic, pin;
 
@@ -741,7 +749,7 @@ static void clear_IO_APIC(void)
 			clear_IO_APIC_pin(apic, pin);
 }
 
-#ifndef CONFIG_SMP
+#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32)
 void send_IPI_self(int vector)
 {
 	unsigned int cfg;
@@ -756,9 +764,9 @@ void send_IPI_self(int vector)
 	 */
 	apic_write(APIC_ICR, cfg);
 }
-#endif /* !CONFIG_SMP */
-
+#endif /* !CONFIG_SMP && CONFIG_X86_32*/
 
+#ifdef CONFIG_X86_32
 /*
  * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
  * specific CPU-side IRQs.
@@ -797,6 +805,75 @@ static int __init ioapic_pirq_setup(char *str)
 }
 
 __setup("pirq=", ioapic_pirq_setup);
+#endif /* CONFIG_X86_32 */
+
+#ifdef CONFIG_INTR_REMAP
+/* I/O APIC RTE contents at the OS boot up */
+static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
+
+/*
+ * Saves and masks all the unmasked IO-APIC RTE's
+ */
+int save_mask_IO_APIC_setup(void)
+{
+	union IO_APIC_reg_01 reg_01;
+	unsigned long flags;
+	int apic, pin;
+
+	/*
+	 * The number of IO-APIC IRQ registers (== #pins):
+	 */
+	for (apic = 0; apic < nr_ioapics; apic++) {
+		spin_lock_irqsave(&ioapic_lock, flags);
+		reg_01.raw = io_apic_read(apic, 1);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+		nr_ioapic_registers[apic] = reg_01.bits.entries+1;
+	}
+
+	for (apic = 0; apic < nr_ioapics; apic++) {
+		early_ioapic_entries[apic] =
+			kzalloc(sizeof(struct IO_APIC_route_entry) *
+				nr_ioapic_registers[apic], GFP_KERNEL);
+		if (!early_ioapic_entries[apic])
+			return -ENOMEM;
+	}
+
+	for (apic = 0; apic < nr_ioapics; apic++)
+		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+			struct IO_APIC_route_entry entry;
+
+			entry = early_ioapic_entries[apic][pin] =
+				ioapic_read_entry(apic, pin);
+			if (!entry.mask) {
+				entry.mask = 1;
+				ioapic_write_entry(apic, pin, entry);
+			}
+		}
+	return 0;
+}
+
+void restore_IO_APIC_setup(void)
+{
+	int apic, pin;
+
+	for (apic = 0; apic < nr_ioapics; apic++)
+		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+			ioapic_write_entry(apic, pin,
+					   early_ioapic_entries[apic][pin]);
+}
+
+void reinit_intr_remapped_IO_APIC(int intr_remapping)
+{
+	/*
+	 * for now plain restore of previous settings.
+	 * TBD: In the case of OS enabling interrupt-remapping,
+	 * IO-APIC RTE's need to be setup to point to interrupt-remapping
+	 * table entries. for now, do a plain restore, and wait for
+	 * the setup_IO_APIC_irqs() to do proper initialization.
+	 */
+	restore_IO_APIC_setup();
+}
+#endif
 
 /*
  * Find the IRQ entry number of a certain pin.
@@ -848,7 +925,7 @@ static int __init find_isa_irq_apic(int irq, int type)
 	}
 	if (i < mp_irq_entries) {
 		int apic;
-		for (apic = 0; apic < nr_ioapics; apic++) {
+		for(apic = 0; apic < nr_ioapics; apic++) {
 			if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
 				return apic;
 		}
@@ -867,10 +944,10 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 {
 	int apic, i, best_guess = -1;
 
-	apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, "
-		"slot:%d, pin:%d.\n", bus, slot, pin);
+	apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
+		bus, slot, pin);
 	if (test_bit(bus, mp_bus_not_pci)) {
-		printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
+		apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
 		return -1;
 	}
 	for (i = 0; i < mp_irq_entries; i++) {
@@ -885,7 +962,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 		    !mp_irqs[i].mp_irqtype &&
 		    (bus == lbus) &&
 		    (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
-			int irq = pin_2_irq(i, apic, mp_irqs[i].mp_dstirq);
+			int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq);
 
 			if (!(apic || IO_APIC_IRQ(irq)))
 				continue;
@@ -902,6 +979,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 	}
 	return best_guess;
 }
+
 EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
 
 #if defined(CONFIG_EISA) || defined(CONFIG_MCA)
@@ -918,6 +996,7 @@ static int EISA_ELCR(unsigned int irq)
 			"Broken MPtable reports ISA irq %d\n", irq);
 	return 0;
 }
+
 #endif
 
 /* ISA interrupts are always polarity zero edge triggered,
@@ -954,36 +1033,36 @@ static int MPBIOS_polarity(int idx)
 	/*
 	 * Determine IRQ line polarity (high active or low active):
 	 */
-	switch (mp_irqs[idx].mp_irqflag & 3) {
-	case 0: /* conforms, ie. bus-type dependent polarity */
-	{
-		polarity = test_bit(bus, mp_bus_not_pci)?
-			default_ISA_polarity(idx):
-			default_PCI_polarity(idx);
-		break;
-	}
-	case 1: /* high active */
-	{
-		polarity = 0;
-		break;
-	}
-	case 2: /* reserved */
-	{
-		printk(KERN_WARNING "broken BIOS!!\n");
-		polarity = 1;
-		break;
-	}
-	case 3: /* low active */
-	{
-		polarity = 1;
-		break;
-	}
-	default: /* invalid */
+	switch (mp_irqs[idx].mp_irqflag & 3)
 	{
-		printk(KERN_WARNING "broken BIOS!!\n");
-		polarity = 1;
-		break;
-	}
+		case 0: /* conforms, ie. bus-type dependent polarity */
+			if (test_bit(bus, mp_bus_not_pci))
+				polarity = default_ISA_polarity(idx);
+			else
+				polarity = default_PCI_polarity(idx);
+			break;
+		case 1: /* high active */
+		{
+			polarity = 0;
+			break;
+		}
+		case 2: /* reserved */
+		{
+			printk(KERN_WARNING "broken BIOS!!\n");
+			polarity = 1;
+			break;
+		}
+		case 3: /* low active */
+		{
+			polarity = 1;
+			break;
+		}
+		default: /* invalid */
+		{
+			printk(KERN_WARNING "broken BIOS!!\n");
+			polarity = 1;
+			break;
+		}
 	}
 	return polarity;
 }
@@ -996,67 +1075,67 @@ static int MPBIOS_trigger(int idx)
 	/*
 	 * Determine IRQ trigger mode (edge or level sensitive):
 	 */
-	switch ((mp_irqs[idx].mp_irqflag>>2) & 3) {
-	case 0: /* conforms, ie. bus-type dependent */
+	switch ((mp_irqs[idx].mp_irqflag>>2) & 3)
 	{
-		trigger = test_bit(bus, mp_bus_not_pci)?
-				default_ISA_trigger(idx):
-				default_PCI_trigger(idx);
+		case 0: /* conforms, ie. bus-type dependent */
+			if (test_bit(bus, mp_bus_not_pci))
+				trigger = default_ISA_trigger(idx);
+			else
+				trigger = default_PCI_trigger(idx);
 #if defined(CONFIG_EISA) || defined(CONFIG_MCA)
-		switch (mp_bus_id_to_type[bus]) {
-		case MP_BUS_ISA: /* ISA pin */
-		{
-			/* set before the switch */
+			switch (mp_bus_id_to_type[bus]) {
+				case MP_BUS_ISA: /* ISA pin */
+				{
+					/* set before the switch */
+					break;
+				}
+				case MP_BUS_EISA: /* EISA pin */
+				{
+					trigger = default_EISA_trigger(idx);
+					break;
+				}
+				case MP_BUS_PCI: /* PCI pin */
+				{
+					/* set before the switch */
+					break;
+				}
+				case MP_BUS_MCA: /* MCA pin */
+				{
+					trigger = default_MCA_trigger(idx);
+					break;
+				}
+				default:
+				{
+					printk(KERN_WARNING "broken BIOS!!\n");
+					trigger = 1;
+					break;
+				}
+			}
+#endif
 			break;
-		}
-		case MP_BUS_EISA: /* EISA pin */
+		case 1: /* edge */
 		{
-			trigger = default_EISA_trigger(idx);
+			trigger = 0;
 			break;
 		}
-		case MP_BUS_PCI: /* PCI pin */
+		case 2: /* reserved */
 		{
-			/* set before the switch */
+			printk(KERN_WARNING "broken BIOS!!\n");
+			trigger = 1;
 			break;
 		}
-		case MP_BUS_MCA: /* MCA pin */
+		case 3: /* level */
 		{
-			trigger = default_MCA_trigger(idx);
+			trigger = 1;
 			break;
 		}
-		default:
+		default: /* invalid */
 		{
 			printk(KERN_WARNING "broken BIOS!!\n");
-			trigger = 1;
+			trigger = 0;
 			break;
 		}
 	}
-#endif
-		break;
-	}
-	case 1: /* edge */
-	{
-		trigger = 0;
-		break;
-	}
-	case 2: /* reserved */
-	{
-		printk(KERN_WARNING "broken BIOS!!\n");
-		trigger = 1;
-		break;
-	}
-	case 3: /* level */
-	{
-		trigger = 1;
-		break;
-	}
-	default: /* invalid */
-	{
-		printk(KERN_WARNING "broken BIOS!!\n");
-		trigger = 0;
-		break;
-	}
-	}
 	return trigger;
 }
 
@@ -1082,9 +1161,9 @@ static int pin_2_irq(int idx, int apic, int pin)
 	if (mp_irqs[idx].mp_dstirq != pin)
 		printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
 
-	if (test_bit(bus, mp_bus_not_pci))
+	if (test_bit(bus, mp_bus_not_pci)) {
 		irq = mp_irqs[idx].mp_srcbusirq;
-	else {
+	} else {
 		/*
 		 * PCI IRQs are mapped in order
 		 */
@@ -1092,14 +1171,14 @@ static int pin_2_irq(int idx, int apic, int pin)
 		while (i < apic)
 			irq += nr_ioapic_registers[i++];
 		irq += pin;
-
-		/*
-		 * For MPS mode, so far only needed by ES7000 platform
-		 */
-		if (ioapic_renumber_irq)
-			irq = ioapic_renumber_irq(apic, irq);
+                /*
+                 * For MPS mode, so far only needed by ES7000 platform
+                 */
+                if (ioapic_renumber_irq)
+                        irq = ioapic_renumber_irq(apic, irq);
 	}
 
+#ifdef CONFIG_X86_32
 	/*
 	 * PCI IRQ command line redirection. Yes, limits are hardcoded.
 	 */
@@ -1116,6 +1195,8 @@ static int pin_2_irq(int idx, int apic, int pin)
 			}
 		}
 	}
+#endif
+
 	return irq;
 }
 
@@ -1145,74 +1226,70 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
 	 * Also, we've got to be careful not to trash gate
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
-        static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
-        unsigned int old_vector;
-        int cpu;
-        struct irq_cfg *cfg;
+	static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
+	unsigned int old_vector;
+	int cpu;
+	struct irq_cfg *cfg;
 
-        cfg = irq_cfg(irq);
+	cfg = irq_cfg(irq);
 
-        /* Only try and allocate irqs on cpus that are present */
-        cpus_and(mask, mask, cpu_online_map);
+	/* Only try and allocate irqs on cpus that are present */
+	cpus_and(mask, mask, cpu_online_map);
 
-        if ((cfg->move_in_progress) || cfg->move_cleanup_count)
-                return -EBUSY;
+	if ((cfg->move_in_progress) || cfg->move_cleanup_count)
+		return -EBUSY;
 
-        old_vector = cfg->vector;
-        if (old_vector) {
-                cpumask_t tmp;
-                cpus_and(tmp, cfg->domain, mask);
-                if (!cpus_empty(tmp))
-                        return 0;
-        }
+	old_vector = cfg->vector;
+	if (old_vector) {
+		cpumask_t tmp;
+		cpus_and(tmp, cfg->domain, mask);
+		if (!cpus_empty(tmp))
+			return 0;
+	}
 
-        for_each_cpu_mask_nr(cpu, mask) {
-                cpumask_t domain, new_mask;
-                int new_cpu;
-                int vector, offset;
+	for_each_cpu_mask_nr(cpu, mask) {
+		cpumask_t domain, new_mask;
+		int new_cpu;
+		int vector, offset;
 
-                domain = vector_allocation_domain(cpu);
-                cpus_and(new_mask, domain, cpu_online_map);
+		domain = vector_allocation_domain(cpu);
+		cpus_and(new_mask, domain, cpu_online_map);
 
-                vector = current_vector;
-                offset = current_offset;
+		vector = current_vector;
+		offset = current_offset;
 next:
-                vector += 8;
-                if (vector >= first_system_vector) {
-                        /* If we run out of vectors on large boxen, must share them. */
-                        offset = (offset + 1) % 8;
-                        vector = FIRST_DEVICE_VECTOR + offset;
-                }
-                if (unlikely(current_vector == vector))
-                        continue;
+		vector += 8;
+		if (vector >= first_system_vector) {
+			/* If we run out of vectors on large boxen, must share them. */
+			offset = (offset + 1) % 8;
+			vector = FIRST_DEVICE_VECTOR + offset;
+		}
+		if (unlikely(current_vector == vector))
+			continue;
 #ifdef CONFIG_X86_64
-                if (vector == IA32_SYSCALL_VECTOR)
-                        goto next;
+		if (vector == IA32_SYSCALL_VECTOR)
+			goto next;
 #else
-                if (vector == SYSCALL_VECTOR)
-                        goto next;
+		if (vector == SYSCALL_VECTOR)
+			goto next;
 #endif
-                for_each_cpu_mask_nr(new_cpu, new_mask)
-                        if (per_cpu(vector_irq, new_cpu)[vector] != -1)
-                                goto next;
-                /* Found one! */
-                current_vector = vector;
-                current_offset = offset;
-                if (old_vector) {
-                        cfg->move_in_progress = 1;
-                        cfg->old_domain = cfg->domain;
-                }
-		printk(KERN_DEBUG "assign_irq_vector: irq %d vector %#x cpu ", irq, vector);
-		for_each_cpu_mask_nr(new_cpu, new_mask) {
-			per_cpu(vector_irq, new_cpu)[vector] = irq;
-			printk(KERN_CONT " %d ", new_cpu);
+		for_each_cpu_mask_nr(new_cpu, new_mask)
+			if (per_cpu(vector_irq, new_cpu)[vector] != -1)
+				goto next;
+		/* Found one! */
+		current_vector = vector;
+		current_offset = offset;
+		if (old_vector) {
+			cfg->move_in_progress = 1;
+			cfg->old_domain = cfg->domain;
 		}
-		printk(KERN_CONT "\n");
-                cfg->vector = vector;
-                cfg->domain = domain;
-                return 0;
-        }
-        return -ENOSPC;
+		for_each_cpu_mask_nr(new_cpu, new_mask)
+			per_cpu(vector_irq, new_cpu)[vector] = irq;
+		cfg->vector = vector;
+		cfg->domain = domain;
+		return 0;
+	}
+	return -ENOSPC;
 }
 
 static int assign_irq_vector(int irq, cpumask_t mask)
@@ -1223,7 +1300,6 @@ static int assign_irq_vector(int irq, cpumask_t mask)
 	spin_lock_irqsave(&vector_lock, flags);
 	err = __assign_irq_vector(irq, mask);
 	spin_unlock_irqrestore(&vector_lock, flags);
-
 	return err;
 }
 
@@ -1269,36 +1345,39 @@ void __setup_vector_irq(int cpu)
 		cfg = irq_cfg(irq);
 		if (!cpu_isset(cpu, cfg->domain))
 			per_cpu(vector_irq, cpu)[vector] = -1;
-        }
+	}
 }
 
 static struct irq_chip ioapic_chip;
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip ir_ioapic_chip;
+#endif
 
-#define IOAPIC_AUTO	-1
-#define IOAPIC_EDGE	0
-#define IOAPIC_LEVEL	1
+#define IOAPIC_AUTO     -1
+#define IOAPIC_EDGE     0
+#define IOAPIC_LEVEL    1
 
 #ifdef CONFIG_X86_32
 static inline int IO_APIC_irq_trigger(int irq)
 {
-	int apic, idx, pin;
+        int apic, idx, pin;
 
-	for (apic = 0; apic < nr_ioapics; apic++) {
-		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-			idx = find_irq_entry(apic, pin, mp_INT);
-			if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
-				return irq_trigger(idx);
-		}
-	}
-	/*
-	 * nonexistent IRQs are edge default
-	 */
-	return 0;
+        for (apic = 0; apic < nr_ioapics; apic++) {
+                for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+                        idx = find_irq_entry(apic, pin, mp_INT);
+                        if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
+                                return irq_trigger(idx);
+                }
+        }
+        /*
+         * nonexistent IRQs are edge default
+         */
+        return 0;
 }
 #else
 static inline int IO_APIC_irq_trigger(int irq)
 {
-        return 1;
+	return 1;
 }
 #endif
 
@@ -1318,13 +1397,27 @@ static void ioapic_register_intr(int irq, unsigned long trigger)
 	else
 		desc->status &= ~IRQ_LEVEL;
 
+#ifdef CONFIG_INTR_REMAP
+	if (irq_remapped(irq)) {
+		desc->status |= IRQ_MOVE_PCNTXT;
+		if (trigger)
+			set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
+						      handle_fasteoi_irq,
+						     "fasteoi");
+		else
+			set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
+						      handle_edge_irq, "edge");
+		return;
+	}
+#endif
 	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
 	    trigger == IOAPIC_LEVEL)
 		set_irq_chip_and_handler_name(irq, &ioapic_chip,
-					 handle_fasteoi_irq, "fasteoi");
+					      handle_fasteoi_irq,
+					      "fasteoi");
 	else
 		set_irq_chip_and_handler_name(irq, &ioapic_chip,
-					 handle_edge_irq, "edge");
+					      handle_edge_irq, "edge");
 }
 
 static int setup_ioapic_entry(int apic, int irq,
@@ -1337,11 +1430,45 @@ static int setup_ioapic_entry(int apic, int irq,
 	 */
 	memset(entry,0,sizeof(*entry));
 
-	entry->delivery_mode = INT_DELIVERY_MODE;
-	entry->dest_mode = INT_DEST_MODE;
-	entry->dest = destination;
+#ifdef CONFIG_INTR_REMAP
+	if (intr_remapping_enabled) {
+		struct intel_iommu *iommu = map_ioapic_to_ir(apic);
+		struct irte irte;
+		struct IR_IO_APIC_route_entry *ir_entry =
+			(struct IR_IO_APIC_route_entry *) entry;
+		int index;
+
+		if (!iommu)
+			panic("No mapping iommu for ioapic %d\n", apic);
+
+		index = alloc_irte(iommu, irq, 1);
+		if (index < 0)
+			panic("Failed to allocate IRTE for ioapic %d\n", apic);
+
+		memset(&irte, 0, sizeof(irte));
+
+		irte.present = 1;
+		irte.dst_mode = INT_DEST_MODE;
+		irte.trigger_mode = trigger;
+		irte.dlvry_mode = INT_DELIVERY_MODE;
+		irte.vector = vector;
+		irte.dest_id = IRTE_DEST(destination);
+
+		modify_irte(irq, &irte);
+
+		ir_entry->index2 = (index >> 15) & 0x1;
+		ir_entry->zero = 0;
+		ir_entry->format = 1;
+		ir_entry->index = (index & 0x7fff);
+	} else
+#endif
+	{
+		entry->delivery_mode = INT_DELIVERY_MODE;
+		entry->dest_mode = INT_DEST_MODE;
+		entry->dest = destination;
+	}
 
-	entry->mask = 0;                                /* enable IRQ */
+	entry->mask = 0;				/* enable IRQ */
 	entry->trigger = trigger;
 	entry->polarity = polarity;
 	entry->vector = vector;
@@ -1351,12 +1478,11 @@ static int setup_ioapic_entry(int apic, int irq,
 	 */
 	if (trigger)
 		entry->mask = 1;
-
 	return 0;
 }
 
 static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
-                              int trigger, int polarity)
+			      int trigger, int polarity)
 {
 	struct irq_cfg *cfg;
 	struct IO_APIC_route_entry entry;
@@ -1420,10 +1546,10 @@ static void __init setup_IO_APIC_irqs(void)
 		}
 
 		irq = pin_2_irq(idx, apic, pin);
-
+#ifdef CONFIG_X86_32
                 if (multi_timer_check(apic, irq))
                         continue;
-
+#endif
 		add_pin_to_irq(irq, apic, pin);
 
 		setup_IO_APIC_irq(apic, pin, irq,
@@ -1443,6 +1569,11 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
 {
 	struct IO_APIC_route_entry entry;
 
+#ifdef CONFIG_INTR_REMAP
+	if (intr_remapping_enabled)
+		return;
+#endif
+
 	memset(&entry, 0, sizeof(entry));
 
 	/*
@@ -1461,7 +1592,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
 	 * The timer IRQ doesn't have to know that behind the
 	 * scene we may have a 8259A-master in AEOI mode ...
 	 */
-	ioapic_register_intr(0, IOAPIC_EDGE);
+	set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
 
 	/*
 	 * Add it to the IO-APIC irq-routing table:
@@ -1501,17 +1632,18 @@ __apicdebuginit(void) print_IO_APIC(void)
 	reg_01.raw = io_apic_read(apic, 1);
 	if (reg_01.bits.version >= 0x10)
 		reg_02.raw = io_apic_read(apic, 2);
-	if (reg_01.bits.version >= 0x20)
-		reg_03.raw = io_apic_read(apic, 3);
+        if (reg_01.bits.version >= 0x20)
+                reg_03.raw = io_apic_read(apic, 3);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 
+	printk("\n");
 	printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
 	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
 	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
 	printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
 	printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
 
-	printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw);
+	printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
 	printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);
 
 	printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
@@ -1548,7 +1680,10 @@ __apicdebuginit(void) print_IO_APIC(void)
 
 		entry = ioapic_read_entry(apic, i);
 
-		printk(KERN_DEBUG " %02x %02X  ", i, entry.dest);
+		printk(KERN_DEBUG " %02x %03X ",
+			i,
+			entry.dest
+		);
 
 		printk("%1d    %1d    %1d   %1d   %1d    %1d    %1d    %02X\n",
 			entry.mask,
@@ -1567,7 +1702,7 @@ __apicdebuginit(void) print_IO_APIC(void)
 		struct irq_pin_list *entry = cfg->irq_2_pin;
 		if (!entry)
 			continue;
-		printk(KERN_DEBUG "IRQ%d ", i);
+		printk(KERN_DEBUG "IRQ%d ", cfg->irq);
 		for (;;) {
 			printk("-> %d:%d", entry->apic, entry->pin);
 			if (!entry->next)
@@ -1614,8 +1749,7 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
 	printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
 		smp_processor_id(), hard_smp_processor_id());
 	v = apic_read(APIC_ID);
-	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v,
-			GET_APIC_ID(v));
+	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, read_apic_id());
 	v = apic_read(APIC_LVR);
 	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
 	ver = GET_APIC_VERSION(v);
@@ -1624,7 +1758,7 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
 	v = apic_read(APIC_TASKPRI);
 	printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
 
-	if (APIC_INTEGRATED(ver)) {			/* !82489DX */
+	if (APIC_INTEGRATED(ver)) {                     /* !82489DX */
 		v = apic_read(APIC_ARBPRI);
 		printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
 			v & APIC_ARBPRI_MASK);
@@ -1650,9 +1784,10 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
 	printk(KERN_DEBUG "... APIC IRR field:\n");
 	print_APIC_bitfield(APIC_IRR);
 
-	if (APIC_INTEGRATED(ver)) {		/* !82489DX */
-		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
+	if (APIC_INTEGRATED(ver)) {             /* !82489DX */
+		if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
 			apic_write(APIC_ESR, 0);
+
 		v = apic_read(APIC_ESR);
 		printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
 	}
@@ -1710,11 +1845,11 @@ __apicdebuginit(void) print_PIC(void)
 	v = inb(0xa0) << 8 | inb(0x20);
 	printk(KERN_DEBUG "... PIC  IRR: %04x\n", v);
 
-	outb(0x0b, 0xa0);
-	outb(0x0b, 0x20);
+	outb(0x0b,0xa0);
+	outb(0x0b,0x20);
 	v = inb(0xa0) << 8 | inb(0x20);
-	outb(0x0a, 0xa0);
-	outb(0x0a, 0x20);
+	outb(0x0a,0xa0);
+	outb(0x0a,0x20);
 
 	spin_unlock_irqrestore(&i8259A_lock, flags);
 
@@ -1739,16 +1874,19 @@ fs_initcall(print_all_ICs);
 /* Where if anywhere is the i8259 connect in external int mode */
 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
 
-static void __init enable_IO_APIC(void)
+void __init enable_IO_APIC(void)
 {
 	union IO_APIC_reg_01 reg_01;
 	int i8259_apic, i8259_pin;
-	int i, apic;
+	int apic;
 	unsigned long flags;
 
+#ifdef CONFIG_X86_32
+	int i;
 	if (!pirqs_enabled)
 		for (i = 0; i < MAX_PIRQS; i++)
 			pirq_entries[i] = -1;
+#endif
 
 	/*
 	 * The number of IO-APIC IRQ registers (== #pins):
@@ -1759,7 +1897,7 @@ static void __init enable_IO_APIC(void)
 		spin_unlock_irqrestore(&ioapic_lock, flags);
 		nr_ioapic_registers[apic] = reg_01.bits.entries+1;
 	}
-	for (apic = 0; apic < nr_ioapics; apic++) {
+	for(apic = 0; apic < nr_ioapics; apic++) {
 		int pin;
 		/* See if any of the pins is in ExtINT mode */
 		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
@@ -1830,16 +1968,18 @@ void disable_IO_APIC(void)
 		entry.dest_mode       = 0; /* Physical */
 		entry.delivery_mode   = dest_ExtINT; /* ExtInt */
 		entry.vector          = 0;
-		entry.dest	      = read_apic_id();
+		entry.dest            = read_apic_id();
 
 		/*
 		 * Add it to the IO-APIC irq-routing table:
 		 */
 		ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
 	}
+
 	disconnect_bsp_APIC(ioapic_i8259.pin != -1);
 }
 
+#ifdef CONFIG_X86_32
 /*
  * function to set the IO-APIC physical IDs based on the
  * values stored in the MPC table.
@@ -1940,8 +2080,6 @@ static void __init setup_ioapic_ids_from_mpc(void)
 
 		reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
 		spin_lock_irqsave(&ioapic_lock, flags);
-		io_apic_write(apic, 0, reg_00.raw);
-		spin_unlock_irqrestore(&ioapic_lock, flags);
 
 		/*
 		 * Sanity check
@@ -1955,6 +2093,7 @@ static void __init setup_ioapic_ids_from_mpc(void)
 			apic_printk(APIC_VERBOSE, " ok.\n");
 	}
 }
+#endif
 
 int no_timer_check __initdata;
 
@@ -1994,9 +2133,10 @@ static int __init timer_irq_works(void)
 	 * might have cached one ExtINT interrupt.  Finally, at
 	 * least one tick may be lost due to delays.
 	 */
+
+	/* jiffies wrap? */
 	if (time_after(jiffies, t1 + 4))
 		return 1;
-
 	return 0;
 }
 
@@ -2014,8 +2154,6 @@ static int __init timer_irq_works(void)
  */
 
 /*
- * Startup quirk:
- *
  * Starting up a edge-triggered IO-APIC interrupt is
  * nasty - we need to make sure that we get the edge.
  * If it is already asserted for some reason, we need
@@ -2023,9 +2161,8 @@ static int __init timer_irq_works(void)
  *
  * This is not complete - we should be able to fake
  * an edge even if it isn't on the 8259A...
- *
- * (We do this for level-triggered IRQs too - it cannot hurt.)
  */
+
 static unsigned int startup_ioapic_irq(unsigned int irq)
 {
 	int was_pending = 0;
@@ -2043,70 +2180,254 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
 	return was_pending;
 }
 
+#ifdef CONFIG_X86_64
 static int ioapic_retrigger_irq(unsigned int irq)
 {
-	send_IPI_self(irq_cfg(irq)->vector);
+
+	struct irq_cfg *cfg = irq_cfg(irq);
+	unsigned long flags;
+
+	spin_lock_irqsave(&vector_lock, flags);
+	send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector);
+	spin_unlock_irqrestore(&vector_lock, flags);
 
 	return 1;
 }
-
-#ifdef CONFIG_SMP
-asmlinkage void smp_irq_move_cleanup_interrupt(void)
+#else
+static int ioapic_retrigger_irq(unsigned int irq)
 {
-	unsigned vector, me;
-	ack_APIC_irq();
-	irq_enter();
-
-	me = smp_processor_id();
-	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
-		unsigned int irq;
-		struct irq_desc *desc;
-		struct irq_cfg *cfg;
-		irq = __get_cpu_var(vector_irq)[vector];
+        send_IPI_self(irq_cfg(irq)->vector);
 
-		desc = irq_to_desc(irq);
-		if (!desc)
-			continue;
+        return 1;
+}
+#endif
 
-		cfg = irq_cfg(irq);
-		spin_lock(&desc->lock);
-		if (!cfg->move_cleanup_count)
-			goto unlock;
+/*
+ * Level and edge triggered IO-APIC interrupts need different handling,
+ * so we use two separate IRQ descriptors. Edge triggered IRQs can be
+ * handled with the level-triggered descriptor, but that one has slightly
+ * more overhead. Level-triggered interrupts cannot be handled with the
+ * edge-triggered handler, without risking IRQ storms and other ugly
+ * races.
+ */
 
-		if ((vector == cfg->vector) && cpu_isset(me, cfg->domain))
-			goto unlock;
+#ifdef CONFIG_SMP
 
-		__get_cpu_var(vector_irq)[vector] = -1;
-		cfg->move_cleanup_count--;
-unlock:
-		spin_unlock(&desc->lock);
-	}
+#ifdef CONFIG_INTR_REMAP
+static void ir_irq_migration(struct work_struct *work);
 
-	irq_exit();
-}
+static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
 
-static void irq_complete_move(unsigned int irq)
+/*
+ * Migrate the IO-APIC irq in the presence of intr-remapping.
+ *
+ * For edge triggered, irq migration is a simple atomic update(of vector
+ * and cpu destination) of IRTE and flush the hardware cache.
+ *
+ * For level triggered, we need to modify the io-apic RTE aswell with the update
+ * vector information, along with modifying IRTE with vector and destination.
+ * So irq migration for level triggered is little  bit more complex compared to
+ * edge triggered migration. But the good news is, we use the same algorithm
+ * for level triggered migration as we have today, only difference being,
+ * we now initiate the irq migration from process context instead of the
+ * interrupt context.
+ *
+ * In future, when we do a directed EOI (combined with cpu EOI broadcast
+ * suppression) to the IO-APIC, level triggered irq migration will also be
+ * as simple as edge triggered migration and we can do the irq migration
+ * with a simple atomic update to IO-APIC RTE.
+ */
+static void migrate_ioapic_irq(int irq, cpumask_t mask)
 {
-	struct irq_cfg *cfg = irq_cfg(irq);
-	unsigned vector, me;
+	struct irq_cfg *cfg;
+	struct irq_desc *desc;
+	cpumask_t tmp, cleanup_mask;
+	struct irte irte;
+	int modify_ioapic_rte;
+	unsigned int dest;
+	unsigned long flags;
 
-	if (likely(!cfg->move_in_progress))
+	cpus_and(tmp, mask, cpu_online_map);
+	if (cpus_empty(tmp))
 		return;
 
-	vector = ~get_irq_regs()->orig_ax;
-	me = smp_processor_id();
-	if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
-		cpumask_t cleanup_mask;
+	if (get_irte(irq, &irte))
+		return;
 
-		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
-		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
-		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+	if (assign_irq_vector(irq, mask))
+		return;
+
+	cfg = irq_cfg(irq);
+	cpus_and(tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(tmp);
+
+	desc = irq_to_desc(irq);
+	modify_ioapic_rte = desc->status & IRQ_LEVEL;
+	if (modify_ioapic_rte) {
+		spin_lock_irqsave(&ioapic_lock, flags);
+		__target_IO_APIC_irq(irq, dest, cfg->vector);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+	}
+
+	irte.vector = cfg->vector;
+	irte.dest_id = IRTE_DEST(dest);
+
+	/*
+	 * Modified the IRTE and flushes the Interrupt entry cache.
+	 */
+	modify_irte(irq, &irte);
+
+	if (cfg->move_in_progress) {
+		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		cfg->move_in_progress = 0;
+	}
+
+	desc->affinity = mask;
+}
+
+static int migrate_irq_remapped_level(int irq)
+{
+	int ret = -1;
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	mask_IO_APIC_irq(irq);
+
+	if (io_apic_level_ack_pending(irq)) {
+		/*
+	 	 * Interrupt in progress. Migrating irq now will change the
+		 * vector information in the IO-APIC RTE and that will confuse
+		 * the EOI broadcast performed by cpu.
+		 * So, delay the irq migration to the next instance.
+		 */
+		schedule_delayed_work(&ir_migration_work, 1);
+		goto unmask;
+	}
+
+	/* everthing is clear. we have right of way */
+	migrate_ioapic_irq(irq, desc->pending_mask);
+
+	ret = 0;
+	desc->status &= ~IRQ_MOVE_PENDING;
+	cpus_clear(desc->pending_mask);
+
+unmask:
+	unmask_IO_APIC_irq(irq);
+	return ret;
+}
+
+static void ir_irq_migration(struct work_struct *work)
+{
+	unsigned int irq;
+	struct irq_desc *desc;
+
+	for_each_irq_desc(irq, desc) {
+		if (desc->status & IRQ_MOVE_PENDING) {
+			unsigned long flags;
+
+			spin_lock_irqsave(&desc->lock, flags);
+			if (!desc->chip->set_affinity ||
+			    !(desc->status & IRQ_MOVE_PENDING)) {
+				desc->status &= ~IRQ_MOVE_PENDING;
+				spin_unlock_irqrestore(&desc->lock, flags);
+				continue;
+			}
+
+			desc->chip->set_affinity(irq, desc->pending_mask);
+			spin_unlock_irqrestore(&desc->lock, flags);
+		}
+	}
+}
+
+/*
+ * Migrates the IRQ destination in the process context.
+ */
+static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	if (desc->status & IRQ_LEVEL) {
+		desc->status |= IRQ_MOVE_PENDING;
+		desc->pending_mask = mask;
+		migrate_irq_remapped_level(irq);
+		return;
+	}
+
+	migrate_ioapic_irq(irq, mask);
+}
+#endif
+
+asmlinkage void smp_irq_move_cleanup_interrupt(void)
+{
+	unsigned vector, me;
+	ack_APIC_irq();
+#ifdef CONFIG_X86_64
+	exit_idle();
+#endif
+	irq_enter();
+
+	me = smp_processor_id();
+	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
+		unsigned int irq;
+		struct irq_desc *desc;
+		struct irq_cfg *cfg;
+		irq = __get_cpu_var(vector_irq)[vector];
+
+		desc = irq_to_desc(irq);
+		if (!desc)
+			continue;
+
+		cfg = irq_cfg(irq);
+		spin_lock(&desc->lock);
+		if (!cfg->move_cleanup_count)
+			goto unlock;
+
+		if ((vector == cfg->vector) && cpu_isset(me, cfg->domain))
+			goto unlock;
+
+		__get_cpu_var(vector_irq)[vector] = -1;
+		cfg->move_cleanup_count--;
+unlock:
+		spin_unlock(&desc->lock);
+	}
+
+	irq_exit();
+}
+
+static void irq_complete_move(unsigned int irq)
+{
+	struct irq_cfg *cfg = irq_cfg(irq);
+	unsigned vector, me;
+
+	if (likely(!cfg->move_in_progress))
+		return;
+
+	vector = ~get_irq_regs()->orig_ax;
+	me = smp_processor_id();
+	if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
+		cpumask_t cleanup_mask;
+
+		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
 		cfg->move_in_progress = 0;
 	}
 }
 #else
 static inline void irq_complete_move(unsigned int irq) {}
 #endif
+#ifdef CONFIG_INTR_REMAP
+static void ack_x2apic_level(unsigned int irq)
+{
+	ack_x2APIC_irq();
+}
+
+static void ack_x2apic_edge(unsigned int irq)
+{
+	ack_x2APIC_irq();
+}
+#endif
 
 static void ack_apic_edge(unsigned int irq)
 {
@@ -2118,55 +2439,55 @@ static void ack_apic_edge(unsigned int irq)
 #ifdef CONFIG_X86_64
 static void ack_apic_level(unsigned int irq)
 {
-        int do_unmask_irq = 0;
+	int do_unmask_irq = 0;
 
-        irq_complete_move(irq);
+	irq_complete_move(irq);
 #ifdef CONFIG_GENERIC_PENDING_IRQ
-        /* If we are moving the irq we need to mask it */
-        if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
-                do_unmask_irq = 1;
-                mask_IO_APIC_irq(irq);
-        }
+	/* If we are moving the irq we need to mask it */
+	if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
+		do_unmask_irq = 1;
+		mask_IO_APIC_irq(irq);
+	}
 #endif
 
-        /*
-         * We must acknowledge the irq before we move it or the acknowledge will
-         * not propagate properly.
-         */
-        ack_APIC_irq();
-
-        /* Now we can move and renable the irq */
-        if (unlikely(do_unmask_irq)) {
-                /* Only migrate the irq if the ack has been received.
-                 *
-                 * On rare occasions the broadcast level triggered ack gets
-                 * delayed going to ioapics, and if we reprogram the
-                 * vector while Remote IRR is still set the irq will never
-                 * fire again.
-                 *
-                 * To prevent this scenario we read the Remote IRR bit
-                 * of the ioapic.  This has two effects.
-                 * - On any sane system the read of the ioapic will
-                 *   flush writes (and acks) going to the ioapic from
-                 *   this cpu.
-                 * - We get to see if the ACK has actually been delivered.
-                 *
-                 * Based on failed experiments of reprogramming the
-                 * ioapic entry from outside of irq context starting
-                 * with masking the ioapic entry and then polling until
-                 * Remote IRR was clear before reprogramming the
-                 * ioapic I don't trust the Remote IRR bit to be
-                 * completey accurate.
-                 *
-                 * However there appears to be no other way to plug
-                 * this race, so if the Remote IRR bit is not
-                 * accurate and is causing problems then it is a hardware bug
-                 * and you can go talk to the chipset vendor about it.
-                 */
-                if (!io_apic_level_ack_pending(irq))
-                        move_masked_irq(irq, desc);
-                unmask_IO_APIC_irq(irq);
-        }
+	/*
+	 * We must acknowledge the irq before we move it or the acknowledge will
+	 * not propagate properly.
+	 */
+	ack_APIC_irq();
+
+	/* Now we can move and renable the irq */
+	if (unlikely(do_unmask_irq)) {
+		/* Only migrate the irq if the ack has been received.
+		 *
+		 * On rare occasions the broadcast level triggered ack gets
+		 * delayed going to ioapics, and if we reprogram the
+		 * vector while Remote IRR is still set the irq will never
+		 * fire again.
+		 *
+		 * To prevent this scenario we read the Remote IRR bit
+		 * of the ioapic.  This has two effects.
+		 * - On any sane system the read of the ioapic will
+		 *   flush writes (and acks) going to the ioapic from
+		 *   this cpu.
+		 * - We get to see if the ACK has actually been delivered.
+		 *
+		 * Based on failed experiments of reprogramming the
+		 * ioapic entry from outside of irq context starting
+		 * with masking the ioapic entry and then polling until
+		 * Remote IRR was clear before reprogramming the
+		 * ioapic I don't trust the Remote IRR bit to be
+		 * completey accurate.
+		 *
+		 * However there appears to be no other way to plug
+		 * this race, so if the Remote IRR bit is not
+		 * accurate and is causing problems then it is a hardware bug
+		 * and you can go talk to the chipset vendor about it.
+		 */
+		if (!io_apic_level_ack_pending(irq))
+			move_masked_irq(irq);
+		unmask_IO_APIC_irq(irq);
+	}
 }
 #else
 atomic_t irq_mis_count;
@@ -2177,25 +2498,25 @@ static void ack_apic_level(unsigned int irq)
 
 	irq_complete_move(irq);
 	move_native_irq(irq);
-/*
- * It appears there is an erratum which affects at least version 0x11
- * of I/O APIC (that's the 82093AA and cores integrated into various
- * chipsets).  Under certain conditions a level-triggered interrupt is
- * erroneously delivered as edge-triggered one but the respective IRR
- * bit gets set nevertheless.  As a result the I/O unit expects an EOI
- * message but it will never arrive and further interrupts are blocked
- * from the source.  The exact reason is so far unknown, but the
- * phenomenon was observed when two consecutive interrupt requests
- * from a given source get delivered to the same CPU and the source is
- * temporarily disabled in between.
- *
- * A workaround is to simulate an EOI message manually.  We achieve it
- * by setting the trigger mode to edge and then to level when the edge
- * trigger mode gets detected in the TMR of a local APIC for a
- * level-triggered interrupt.  We mask the source for the time of the
- * operation to prevent an edge-triggered interrupt escaping meanwhile.
- * The idea is from Manfred Spraul.  --macro
- */
+	/*
+	* It appears there is an erratum which affects at least version 0x11
+	* of I/O APIC (that's the 82093AA and cores integrated into various
+	* chipsets).  Under certain conditions a level-triggered interrupt is
+	* erroneously delivered as edge-triggered one but the respective IRR
+	* bit gets set nevertheless.  As a result the I/O unit expects an EOI
+	* message but it will never arrive and further interrupts are blocked
+	* from the source.  The exact reason is so far unknown, but the
+	* phenomenon was observed when two consecutive interrupt requests
+	* from a given source get delivered to the same CPU and the source is
+	* temporarily disabled in between.
+	*
+	* A workaround is to simulate an EOI message manually.  We achieve it
+	* by setting the trigger mode to edge and then to level when the edge
+	* trigger mode gets detected in the TMR of a local APIC for a
+	* level-triggered interrupt.  We mask the source for the time of the
+	* operation to prevent an edge-triggered interrupt escaping meanwhile.
+	* The idea is from Manfred Spraul.  --macro
+	*/
 	i = irq_cfg(irq)->vector;
 
 	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
@@ -2225,6 +2546,20 @@ static struct irq_chip ioapic_chip __read_mostly = {
 	.retrigger	= ioapic_retrigger_irq,
 };
 
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip ir_ioapic_chip __read_mostly = {
+	.name 		= "IR-IO-APIC",
+	.startup 	= startup_ioapic_irq,
+	.mask	 	= mask_IO_APIC_irq,
+	.unmask	 	= unmask_IO_APIC_irq,
+	.ack 		= ack_x2apic_edge,
+	.eoi 		= ack_x2apic_level,
+#ifdef CONFIG_SMP
+	.set_affinity 	= set_ir_ioapic_affinity_irq,
+#endif
+	.retrigger	= ioapic_retrigger_irq,
+};
+#endif
 
 static inline void init_IO_APIC_traps(void)
 {
@@ -2282,7 +2617,7 @@ static void unmask_lapic_irq(unsigned int irq)
 	apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
 }
 
-static void ack_lapic_irq(unsigned int irq)
+static void ack_lapic_irq (unsigned int irq)
 {
 	ack_APIC_irq();
 }
@@ -2383,12 +2718,12 @@ static inline void __init unlock_ExtINT_logic(void)
 
 static int disable_timer_pin_1 __initdata;
 /* Actually the next is obsolete, but keep it for paranoid reasons -AK */
-static int __init parse_disable_timer_pin_1(char *arg)
+static int __init disable_timer_pin_setup(char *arg)
 {
 	disable_timer_pin_1 = 1;
 	return 0;
 }
-early_param("disable_timer_pin_1", parse_disable_timer_pin_1);
+early_param("disable_timer_pin_1", disable_timer_pin_setup);
 
 int timer_through_8259 __initdata;
 
@@ -2397,6 +2732,8 @@ int timer_through_8259 __initdata;
  * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
  * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
  * fanatically on his truly buggy board.
+ *
+ * FIXME: really need to revamp this for all platforms.
  */
 static inline void __init check_timer(void)
 {
@@ -2408,8 +2745,8 @@ static inline void __init check_timer(void)
 
 	local_irq_save(flags);
 
-	ver = apic_read(APIC_LVR);
-	ver = GET_APIC_VERSION(ver);
+        ver = apic_read(APIC_LVR);
+        ver = GET_APIC_VERSION(ver);
 
 	/*
 	 * get/set the timer IRQ vector:
@@ -2428,7 +2765,9 @@ static inline void __init check_timer(void)
 	 */
 	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
 	init_8259A(1);
+#ifdef CONFIG_X86_32
 	timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
+#endif
 
 	pin1  = find_isa_irq_pin(0, mp_INT);
 	apic1 = find_isa_irq_apic(0, mp_INT);
@@ -2447,6 +2786,10 @@ static inline void __init check_timer(void)
 	 * 8259A.
 	 */
 	if (pin1 == -1) {
+#ifdef CONFIG_INTR_REMAP
+		if (intr_remapping_enabled)
+			panic("BIOS bug: timer not connected to IO-APIC");
+#endif
 		pin1 = pin2;
 		apic1 = apic2;
 		no_pin1 = 1;
@@ -2473,6 +2816,10 @@ static inline void __init check_timer(void)
 				clear_IO_APIC_pin(0, pin1);
 			goto out;
 		}
+#ifdef CONFIG_INTR_REMAP
+		if (intr_remapping_enabled)
+			panic("timer doesn't work through Interrupt-remapped IO-APIC");
+#endif
 		clear_IO_APIC_pin(apic1, pin1);
 		if (!no_pin1)
 			apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -2512,7 +2859,9 @@ static inline void __init check_timer(void)
 			    "through the IO-APIC - disabling NMI Watchdog!\n");
 		nmi_watchdog = NMI_NONE;
 	}
+#ifdef CONFIG_X86_32
 	timer_ack = 0;
+#endif
 
 	apic_printk(APIC_QUIET, KERN_INFO
 		    "...trying to set up timer as Virtual Wire IRQ...\n");
@@ -2570,17 +2919,25 @@ out:
 
 void __init setup_IO_APIC(void)
 {
+
+#ifdef CONFIG_X86_32
 	enable_IO_APIC();
+#else
+	/*
+	 * calling enable_IO_APIC() is moved to setup_local_APIC for BP
+	 */
+#endif
 
 	io_apic_irqs = ~PIC_IRQS;
 
-	printk("ENABLING IO-APIC IRQs\n");
-
-	/*
-	 * Set up IO-APIC IRQ routing.
-	 */
-	if (!acpi_ioapic)
-		setup_ioapic_ids_from_mpc();
+	apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
+        /*
+         * Set up IO-APIC IRQ routing.
+         */
+#ifdef CONFIG_X86_32
+        if (!acpi_ioapic)
+                setup_ioapic_ids_from_mpc();
+#endif
 	sync_Arb_IDs();
 	setup_IO_APIC_irqs();
 	init_IO_APIC_traps();
@@ -2588,15 +2945,15 @@ void __init setup_IO_APIC(void)
 }
 
 /*
- *	Called after all the initialization is done. If we didnt find any
- *	APIC bugs then we can allow the modify fast path
+ *      Called after all the initialization is done. If we didnt find any
+ *      APIC bugs then we can allow the modify fast path
  */
 
 static int __init io_apic_bug_finalize(void)
 {
-	if (sis_apic_bug == -1)
-		sis_apic_bug = 0;
-	return 0;
+        if (sis_apic_bug == -1)
+                sis_apic_bug = 0;
+        return 0;
 }
 
 late_initcall(io_apic_bug_finalize);
@@ -2605,7 +2962,7 @@ struct sysfs_ioapic_data {
 	struct sys_device dev;
 	struct IO_APIC_route_entry entry[0];
 };
-static struct sysfs_ioapic_data *mp_ioapic_data[MAX_IO_APICS];
+static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
 
 static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
 {
@@ -2615,8 +2972,8 @@ static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
 
 	data = container_of(dev, struct sysfs_ioapic_data, dev);
 	entry = data->entry;
-	for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
-		entry[i] = ioapic_read_entry(dev->id, i);
+	for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
+		*entry = ioapic_read_entry(dev->id, i);
 
 	return 0;
 }
@@ -2653,14 +3010,14 @@ static struct sysdev_class ioapic_sysdev_class = {
 
 static int __init ioapic_init_sysfs(void)
 {
-	struct sys_device *dev;
-	int i, size, error = 0;
+	struct sys_device * dev;
+	int i, size, error;
 
 	error = sysdev_class_register(&ioapic_sysdev_class);
 	if (error)
 		return error;
 
-	for (i = 0; i < nr_ioapics; i++) {
+	for (i = 0; i < nr_ioapics; i++ ) {
 		size = sizeof(struct sys_device) + nr_ioapic_registers[i]
 			* sizeof(struct IO_APIC_route_entry);
 		mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
@@ -2691,18 +3048,18 @@ device_initcall(ioapic_init_sysfs);
 unsigned int create_irq_nr(unsigned int irq_want)
 {
 	/* Allocate an unused irq */
-	unsigned int irq, new;
+	unsigned int irq;
+	unsigned int new;
 	unsigned long flags;
 	struct irq_cfg *cfg_new;
 
 #ifndef CONFIG_HAVE_SPARSE_IRQ
-	/* only can use bus/dev/fn.. when per_cpu vector is used */
 	irq_want = nr_irqs - 1;
 #endif
 
 	irq = 0;
 	spin_lock_irqsave(&vector_lock, flags);
-	for (new = (nr_irqs - 1); new > 0; new--) {
+	for (new = irq_want; new > 0; new--) {
 		if (platform_legacy_irq(new))
 			continue;
 		cfg_new = irq_cfg(new);
@@ -2725,7 +3082,14 @@ unsigned int create_irq_nr(unsigned int irq_want)
 
 int create_irq(void)
 {
-	return create_irq_nr(nr_irqs - 1);
+	int irq;
+
+	irq = create_irq_nr(nr_irqs - 1);
+
+	if (irq == 0)
+		irq = -1;
+
+	return irq;
 }
 
 void destroy_irq(unsigned int irq)
@@ -2734,6 +3098,9 @@ void destroy_irq(unsigned int irq)
 
 	dynamic_irq_cleanup(irq);
 
+#ifdef CONFIG_INTR_REMAP
+	free_irte(irq);
+#endif
 	spin_lock_irqsave(&vector_lock, flags);
 	__clear_irq_vector(irq);
 	spin_unlock_irqrestore(&vector_lock, flags);
@@ -2759,25 +3126,54 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 	cpus_and(tmp, cfg->domain, tmp);
 	dest = cpu_mask_to_apicid(tmp);
 
-	msg->address_hi = MSI_ADDR_BASE_HI;
-	msg->address_lo =
-		MSI_ADDR_BASE_LO |
-		((INT_DEST_MODE == 0) ?
-			MSI_ADDR_DEST_MODE_PHYSICAL:
-			MSI_ADDR_DEST_MODE_LOGICAL) |
-		((INT_DELIVERY_MODE != dest_LowestPrio) ?
-			MSI_ADDR_REDIRECTION_CPU:
-			MSI_ADDR_REDIRECTION_LOWPRI) |
-		MSI_ADDR_DEST_ID(dest);
-
-	msg->data =
-		MSI_DATA_TRIGGER_EDGE |
-		MSI_DATA_LEVEL_ASSERT |
-		((INT_DELIVERY_MODE != dest_LowestPrio) ?
-			MSI_DATA_DELIVERY_FIXED:
-			MSI_DATA_DELIVERY_LOWPRI) |
-		MSI_DATA_VECTOR(cfg->vector);
+#ifdef CONFIG_INTR_REMAP
+	if (irq_remapped(irq)) {
+		struct irte irte;
+		int ir_index;
+		u16 sub_handle;
+
+		ir_index = map_irq_to_irte_handle(irq, &sub_handle);
+		BUG_ON(ir_index == -1);
+
+		memset (&irte, 0, sizeof(irte));
+
+		irte.present = 1;
+		irte.dst_mode = INT_DEST_MODE;
+		irte.trigger_mode = 0; /* edge */
+		irte.dlvry_mode = INT_DELIVERY_MODE;
+		irte.vector = cfg->vector;
+		irte.dest_id = IRTE_DEST(dest);
+
+		modify_irte(irq, &irte);
+
+		msg->address_hi = MSI_ADDR_BASE_HI;
+		msg->data = sub_handle;
+		msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
+				  MSI_ADDR_IR_SHV |
+				  MSI_ADDR_IR_INDEX1(ir_index) |
+				  MSI_ADDR_IR_INDEX2(ir_index);
+	} else
+#endif
+	{
+		msg->address_hi = MSI_ADDR_BASE_HI;
+		msg->address_lo =
+			MSI_ADDR_BASE_LO |
+			((INT_DEST_MODE == 0) ?
+				MSI_ADDR_DEST_MODE_PHYSICAL:
+				MSI_ADDR_DEST_MODE_LOGICAL) |
+			((INT_DELIVERY_MODE != dest_LowestPrio) ?
+				MSI_ADDR_REDIRECTION_CPU:
+				MSI_ADDR_REDIRECTION_LOWPRI) |
+			MSI_ADDR_DEST_ID(dest);
 
+		msg->data =
+			MSI_DATA_TRIGGER_EDGE |
+			MSI_DATA_LEVEL_ASSERT |
+			((INT_DELIVERY_MODE != dest_LowestPrio) ?
+				MSI_DATA_DELIVERY_FIXED:
+				MSI_DATA_DELIVERY_LOWPRI) |
+			MSI_DATA_VECTOR(cfg->vector);
+	}
 	return err;
 }
 
@@ -2788,6 +3184,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	struct msi_msg msg;
 	unsigned int dest;
 	cpumask_t tmp;
+	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
@@ -2808,8 +3205,61 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	write_msi_msg(irq, &msg);
-	irq_to_desc(irq)->affinity = mask;
+	desc = irq_to_desc(irq);
+	desc->affinity = mask;
 }
+
+#ifdef CONFIG_INTR_REMAP
+/*
+ * Migrate the MSI irq to another cpumask. This migration is
+ * done in the process context using interrupt-remapping hardware.
+ */
+static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+{
+	struct irq_cfg *cfg;
+	unsigned int dest;
+	cpumask_t tmp, cleanup_mask;
+	struct irte irte;
+	struct irq_desc *desc;
+
+	cpus_and(tmp, mask, cpu_online_map);
+	if (cpus_empty(tmp))
+		return;
+
+	if (get_irte(irq, &irte))
+		return;
+
+	if (assign_irq_vector(irq, mask))
+		return;
+
+	cfg = irq_cfg(irq);
+	cpus_and(tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(tmp);
+
+	irte.vector = cfg->vector;
+	irte.dest_id = IRTE_DEST(dest);
+
+	/*
+	 * atomically update the IRTE with the new destination and vector.
+	 */
+	modify_irte(irq, &irte);
+
+	/*
+	 * After this point, all the interrupts will start arriving
+	 * at the new destination. So, time to cleanup the previous
+	 * vector allocation.
+	 */
+	if (cfg->move_in_progress) {
+		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		cfg->move_in_progress = 0;
+	}
+
+	desc = irq_to_desc(irq);
+	desc->affinity = mask;
+}
+#endif
 #endif /* CONFIG_SMP */
 
 /*
@@ -2827,6 +3277,45 @@ static struct irq_chip msi_chip = {
 	.retrigger	= ioapic_retrigger_irq,
 };
 
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip msi_ir_chip = {
+	.name		= "IR-PCI-MSI",
+	.unmask		= unmask_msi_irq,
+	.mask		= mask_msi_irq,
+	.ack		= ack_x2apic_edge,
+#ifdef CONFIG_SMP
+	.set_affinity	= ir_set_msi_irq_affinity,
+#endif
+	.retrigger	= ioapic_retrigger_irq,
+};
+
+/*
+ * Map the PCI dev to the corresponding remapping hardware unit
+ * and allocate 'nvec' consecutive interrupt-remapping table entries
+ * in it.
+ */
+static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
+{
+	struct intel_iommu *iommu;
+	int index;
+
+	iommu = map_dev_to_ir(dev);
+	if (!iommu) {
+		printk(KERN_ERR
+		       "Unable to map PCI %s to iommu\n", pci_name(dev));
+		return -ENOENT;
+	}
+
+	index = alloc_irte(iommu, irq, nvec);
+	if (index < 0) {
+		printk(KERN_ERR
+		       "Unable to allocate %d IRTE for PCI %s\n", nvec,
+		        pci_name(dev));
+		return -ENOSPC;
+	}
+	return index;
+}
+#endif
 
 static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
 {
@@ -2840,7 +3329,17 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
 	set_irq_msi(irq, desc);
 	write_msi_msg(irq, &msg);
 
-	set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
+#ifdef CONFIG_INTR_REMAP
+	if (irq_remapped(irq)) {
+		struct irq_desc *desc = irq_to_desc(irq);
+		/*
+		 * irq migration in process context
+		 */
+		desc->status |= IRQ_MOVE_PCNTXT;
+		set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
+	} else
+#endif
+		set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
 
 	return 0;
 }
@@ -2859,59 +3358,164 @@ static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
 
 int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 {
-	int irq, ret;
-
+	unsigned int irq;
+	int ret;
 	unsigned int irq_want;
 
 	irq_want = build_irq_for_pci_dev(dev) + 0x100;
 
 	irq = create_irq_nr(irq_want);
-
 	if (irq == 0)
 		return -1;
 
+#ifdef CONFIG_INTR_REMAP
+	if (!intr_remapping_enabled)
+		goto no_ir;
+
+	ret = msi_alloc_irte(dev, irq, 1);
+	if (ret < 0)
+		goto error;
+no_ir:
+#endif
 	ret = setup_msi_irq(dev, desc, irq);
 	if (ret < 0) {
 		destroy_irq(irq);
 		return ret;
-        }
-
+	}
 	return 0;
+
+#ifdef CONFIG_INTR_REMAP
+error:
+	destroy_irq(irq);
+	return ret;
+#endif
 }
 
 int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 {
-        unsigned int irq;
-        int ret, sub_handle;
-        struct msi_desc *desc;
-        unsigned int irq_want;
-
-        irq_want = build_irq_for_pci_dev(dev) + 0x100;
-        sub_handle = 0;
-        list_for_each_entry(desc, &dev->msi_list, list) {
-                irq = create_irq_nr(irq_want--);
-                if (irq == 0)
-                        return -1;
-                ret = setup_msi_irq(dev, desc, irq);
-                if (ret < 0)
-                        goto error;
-                sub_handle++;
-        }
-        return 0;
+	unsigned int irq;
+	int ret, sub_handle;
+	struct msi_desc *desc;
+	unsigned int irq_want;
+
+#ifdef CONFIG_INTR_REMAP
+	struct intel_iommu *iommu = 0;
+	int index = 0;
+#endif
+
+	irq_want = build_irq_for_pci_dev(dev) + 0x100;
+	sub_handle = 0;
+	list_for_each_entry(desc, &dev->msi_list, list) {
+		irq = create_irq_nr(irq_want--);
+		if (irq == 0)
+			return -1;
+#ifdef CONFIG_INTR_REMAP
+		if (!intr_remapping_enabled)
+			goto no_ir;
+
+		if (!sub_handle) {
+			/*
+			 * allocate the consecutive block of IRTE's
+			 * for 'nvec'
+			 */
+			index = msi_alloc_irte(dev, irq, nvec);
+			if (index < 0) {
+				ret = index;
+				goto error;
+			}
+		} else {
+			iommu = map_dev_to_ir(dev);
+			if (!iommu) {
+				ret = -ENOENT;
+				goto error;
+			}
+			/*
+			 * setup the mapping between the irq and the IRTE
+			 * base index, the sub_handle pointing to the
+			 * appropriate interrupt remap table entry.
+			 */
+			set_irte_irq(irq, iommu, index, sub_handle);
+		}
+no_ir:
+#endif
+		ret = setup_msi_irq(dev, desc, irq);
+		if (ret < 0)
+			goto error;
+		sub_handle++;
+	}
+	return 0;
 
 error:
-        destroy_irq(irq);
-        return ret;
+	destroy_irq(irq);
+	return ret;
 }
 
-
 void arch_teardown_msi_irq(unsigned int irq)
 {
 	destroy_irq(irq);
 }
 
-#endif /* CONFIG_PCI_MSI */
+#ifdef CONFIG_DMAR
+#ifdef CONFIG_SMP
+static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
+{
+	struct irq_cfg *cfg;
+	struct msi_msg msg;
+	unsigned int dest;
+	cpumask_t tmp;
+	struct irq_desc *desc;
+
+	cpus_and(tmp, mask, cpu_online_map);
+	if (cpus_empty(tmp))
+		return;
+
+	if (assign_irq_vector(irq, mask))
+		return;
+
+	cfg = irq_cfg(irq);
+	cpus_and(tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(tmp);
+
+	dmar_msi_read(irq, &msg);
+
+	msg.data &= ~MSI_DATA_VECTOR_MASK;
+	msg.data |= MSI_DATA_VECTOR(cfg->vector);
+	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+
+	dmar_msi_write(irq, &msg);
+	desc = irq_to_desc(irq);
+	desc->affinity = mask;
+}
+#endif /* CONFIG_SMP */
+
+struct irq_chip dmar_msi_type = {
+	.name = "DMAR_MSI",
+	.unmask = dmar_msi_unmask,
+	.mask = dmar_msi_mask,
+	.ack = ack_apic_edge,
+#ifdef CONFIG_SMP
+	.set_affinity = dmar_msi_set_affinity,
+#endif
+	.retrigger = ioapic_retrigger_irq,
+};
+
+int arch_setup_dmar_msi(unsigned int irq)
+{
+	int ret;
+	struct msi_msg msg;
 
+	ret = msi_compose_msg(NULL, irq, &msg);
+	if (ret < 0)
+		return ret;
+	dmar_msi_write(irq, &msg);
+	set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
+		"edge");
+	return 0;
+}
+#endif
+
+#endif /* CONFIG_PCI_MSI */
 /*
  * Hypertransport interrupt support
  */
@@ -2938,6 +3542,7 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 	struct irq_cfg *cfg;
 	unsigned int dest;
 	cpumask_t tmp;
+	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
@@ -2951,7 +3556,8 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 	dest = cpu_mask_to_apicid(tmp);
 
 	target_ht_irq(irq, dest, cfg->vector);
-	irq_to_desc(irq)->affinity = mask;
+	desc = irq_to_desc(irq);
+	desc->affinity = mask;
 }
 #endif
 
@@ -2974,7 +3580,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 
 	tmp = TARGET_CPUS;
 	err = assign_irq_vector(irq, tmp);
-	if ( !err) {
+	if (!err) {
 		struct ht_irq_msg msg;
 		unsigned dest;
 
@@ -3007,11 +3613,12 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 #endif /* CONFIG_HT_IRQ */
 
 /* --------------------------------------------------------------------------
-			ACPI-based IOAPIC Configuration
+                          ACPI-based IOAPIC Configuration
    -------------------------------------------------------------------------- */
 
 #ifdef CONFIG_ACPI
 
+#ifdef CONFIG_X86_32
 int __init io_apic_get_unique_id(int ioapic, int apic_id)
 {
 	union IO_APIC_reg_00 reg_00;
@@ -3086,7 +3693,6 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
 	return apic_id;
 }
 
-
 int __init io_apic_get_version(int ioapic)
 {
 	union IO_APIC_reg_01	reg_01;
@@ -3098,9 +3704,9 @@ int __init io_apic_get_version(int ioapic)
 
 	return reg_01.bits.version;
 }
+#endif
 
-
-int __init io_apic_get_redir_entries(int ioapic)
+int __init io_apic_get_redir_entries (int ioapic)
 {
 	union IO_APIC_reg_01	reg_01;
 	unsigned long flags;
@@ -3113,10 +3719,10 @@ int __init io_apic_get_redir_entries(int ioapic)
 }
 
 
-int io_apic_set_pci_routing(int ioapic, int pin, int irq, int triggering, int polarity)
+int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
 {
 	if (!IO_APIC_IRQ(irq)) {
-		printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
+		apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
 			ioapic);
 		return -EINVAL;
 	}
@@ -3132,6 +3738,7 @@ int io_apic_set_pci_routing(int ioapic, int pin, int irq, int triggering, int po
 	return 0;
 }
 
+
 int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
 {
 	int i;
@@ -3163,7 +3770,6 @@ void __init setup_ioapic_dest(void)
 {
 	int pin, ioapic, irq, irq_entry;
 	struct irq_cfg *cfg;
-	struct irq_desc *desc;
 
 	if (skip_ioapic_setup == 1)
 		return;
@@ -3184,43 +3790,124 @@ void __init setup_ioapic_dest(void)
 				setup_IO_APIC_irq(ioapic, pin, irq,
 						  irq_trigger(irq_entry),
 						  irq_polarity(irq_entry));
-			else {
-				desc = irq_to_desc(irq);
+#ifdef CONFIG_INTR_REMAP
+			else if (intr_remapping_enabled)
+				set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
+#endif
+			else
 				set_ioapic_affinity_irq(irq, TARGET_CPUS);
-			}
 		}
 
 	}
 }
 #endif
 
+#ifdef CONFIG_X86_64
+#define IOAPIC_RESOURCE_NAME_SIZE 11
+
+static struct resource *ioapic_resources;
+
+static struct resource * __init ioapic_setup_resources(void)
+{
+	unsigned long n;
+	struct resource *res;
+	char *mem;
+	int i;
+
+	if (nr_ioapics <= 0)
+		return NULL;
+
+	n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
+	n *= nr_ioapics;
+
+	mem = alloc_bootmem(n);
+	res = (void *)mem;
+
+	if (mem != NULL) {
+		mem += sizeof(struct resource) * nr_ioapics;
+
+		for (i = 0; i < nr_ioapics; i++) {
+			res[i].name = mem;
+			res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+			sprintf(mem,  "IOAPIC %u", i);
+			mem += IOAPIC_RESOURCE_NAME_SIZE;
+		}
+	}
+
+	ioapic_resources = res;
+
+	return res;
+}
+#endif
+
 void __init ioapic_init_mappings(void)
 {
 	unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
 	int i;
+#ifdef CONFIG_X86_64
+	struct resource *ioapic_res;
 
+	ioapic_res = ioapic_setup_resources();
+#endif
 	for (i = 0; i < nr_ioapics; i++) {
 		if (smp_found_config) {
 			ioapic_phys = mp_ioapics[i].mp_apicaddr;
-			if (!ioapic_phys) {
-				printk(KERN_ERR
-				       "WARNING: bogus zero IO-APIC "
-				       "address found in MPTABLE, "
-				       "disabling IO/APIC support!\n");
-				smp_found_config = 0;
-				skip_ioapic_setup = 1;
-				goto fake_ioapic_page;
-			}
+#ifdef CONFIG_X86_32
+                        if (!ioapic_phys) {
+                                printk(KERN_ERR
+                                       "WARNING: bogus zero IO-APIC "
+                                       "address found in MPTABLE, "
+                                       "disabling IO/APIC support!\n");
+                                smp_found_config = 0;
+                                skip_ioapic_setup = 1;
+                                goto fake_ioapic_page;
+                        }
+#endif
 		} else {
+#ifdef CONFIG_X86_32
 fake_ioapic_page:
+#endif
 			ioapic_phys = (unsigned long)
-				      alloc_bootmem_pages(PAGE_SIZE);
+				alloc_bootmem_pages(PAGE_SIZE);
 			ioapic_phys = __pa(ioapic_phys);
 		}
 		set_fixmap_nocache(idx, ioapic_phys);
-		printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n",
-		       __fix_to_virt(idx), ioapic_phys);
+		apic_printk(APIC_VERBOSE,
+			    "mapped IOAPIC to %08lx (%08lx)\n",
+			    __fix_to_virt(idx), ioapic_phys);
 		idx++;
+
+#ifdef CONFIG_X86_64
+		if (ioapic_res != NULL) {
+			ioapic_res->start = ioapic_phys;
+			ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
+			ioapic_res++;
+		}
+#endif
 	}
 }
 
+#ifdef CONFIG_X86_64
+static int __init ioapic_insert_resources(void)
+{
+	int i;
+	struct resource *r = ioapic_resources;
+
+	if (!r) {
+		printk(KERN_ERR
+		       "IO APIC resources could be not be allocated.\n");
+		return -1;
+	}
+
+	for (i = 0; i < nr_ioapics; i++) {
+		insert_resource(&iomem_resource, r);
+		r++;
+	}
+
+	return 0;
+}
+
+/* Insert the IO APIC resources after PCI initialization has occured to handle
+ * IO APICS that are mapped in on a BAR in PCI space. */
+late_initcall(ioapic_insert_resources);
+#endif
-- 
GitLab


From 26d347c2c035b1f4c5b3c5094f3046db9ec920f5 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:42 -0700
Subject: [PATCH 431/895] rename io_apic_64.c and io_apic_32.c to io_apic.c

The two files are now line by line equal. (sans a printk)

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/Makefile                    |    2 +-
 arch/x86/kernel/{io_apic_32.c => io_apic.c} |    0
 arch/x86/kernel/io_apic_64.c                | 3913 -------------------
 3 files changed, 1 insertion(+), 3914 deletions(-)
 rename arch/x86/kernel/{io_apic_32.c => io_apic.c} (100%)
 delete mode 100644 arch/x86/kernel/io_apic_64.c

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 0d41f0343dc0..7264f9c3af8f 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -61,7 +61,7 @@ obj-$(CONFIG_X86_64_SMP)	+= tsc_sync.o smpcommon.o
 obj-$(CONFIG_X86_TRAMPOLINE)	+= trampoline_$(BITS).o
 obj-$(CONFIG_X86_MPPARSE)	+= mpparse.o
 obj-$(CONFIG_X86_LOCAL_APIC)	+= apic_$(BITS).o nmi.o
-obj-$(CONFIG_X86_IO_APIC)	+= io_apic_$(BITS).o
+obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o
 obj-$(CONFIG_X86_REBOOTFIXUPS)	+= reboot_fixups_32.o
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
 obj-$(CONFIG_KEXEC)		+= machine_kexec_$(BITS).o
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic.c
similarity index 100%
rename from arch/x86/kernel/io_apic_32.c
rename to arch/x86/kernel/io_apic.c
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
deleted file mode 100644
index 940c4167b325..000000000000
--- a/arch/x86/kernel/io_apic_64.c
+++ /dev/null
@@ -1,3913 +0,0 @@
-/*
- *	Intel IO-APIC support for multi-Pentium hosts.
- *
- *	Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
- *
- *	Many thanks to Stig Venaas for trying out countless experimental
- *	patches and reporting/debugging problems patiently!
- *
- *	(c) 1999, Multiple IO-APIC support, developed by
- *	Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
- *      Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
- *	further tested and cleaned up by Zach Brown <zab@redhat.com>
- *	and Ingo Molnar <mingo@redhat.com>
- *
- *	Fixes
- *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs;
- *					thanks to Eric Gilmore
- *					and Rolf G. Tews
- *					for testing these extensively
- *	Paul Diefenbaugh	:	Added full ACPI support
- */
-
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/sched.h>
-#include <linux/pci.h>
-#include <linux/mc146818rtc.h>
-#include <linux/compiler.h>
-#include <linux/acpi.h>
-#include <linux/module.h>
-#include <linux/sysdev.h>
-#include <linux/msi.h>
-#include <linux/htirq.h>
-#include <linux/freezer.h>
-#include <linux/kthread.h>
-#include <linux/jiffies.h>	/* time_after() */
-#ifdef CONFIG_ACPI
-#include <acpi/acpi_bus.h>
-#endif
-#include <linux/bootmem.h>
-#include <linux/dmar.h>
-
-#include <asm/idle.h>
-#include <asm/io.h>
-#include <asm/smp.h>
-#include <asm/desc.h>
-#include <asm/proto.h>
-#include <asm/acpi.h>
-#include <asm/dma.h>
-#include <asm/timer.h>
-#include <asm/i8259.h>
-#include <asm/nmi.h>
-#include <asm/msidef.h>
-#include <asm/hypertransport.h>
-#include <asm/setup.h>
-#include <asm/irq_remapping.h>
-
-#include <mach_ipi.h>
-#include <mach_apic.h>
-#include <mach_apicdef.h>
-
-#define __apicdebuginit(type) static type __init
-
-/*
- *      Is the SiS APIC rmw bug present ?
- *      -1 = don't know, 0 = no, 1 = yes
- */
-int sis_apic_bug = -1;
-
-static DEFINE_SPINLOCK(ioapic_lock);
-static DEFINE_SPINLOCK(vector_lock);
-
-int first_free_entry;
-/*
- * Rough estimation of how many shared IRQs there are, can
- * be changed anytime.
- */
-int pin_map_size;
-
-/*
- * # of IRQ routing registers
- */
-int nr_ioapic_registers[MAX_IO_APICS];
-
-/* I/O APIC entries */
-struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
-int nr_ioapics;
-
-/* MP IRQ source entries */
-struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
-
-/* # of MP IRQ source entries */
-int mp_irq_entries;
-
-#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
-int mp_bus_id_to_type[MAX_MP_BUSSES];
-#endif
-
-DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
-
-int skip_ioapic_setup;
-
-static int __init parse_noapic(char *str)
-{
-	/* disable IO-APIC */
-	disable_ioapic_setup();
-	return 0;
-}
-early_param("noapic", parse_noapic);
-
-struct irq_cfg;
-struct irq_pin_list;
-struct irq_cfg {
-	unsigned int irq;
-	struct irq_cfg *next;
-	struct irq_pin_list *irq_2_pin;
-	cpumask_t domain;
-	cpumask_t old_domain;
-	unsigned move_cleanup_count;
-	u8 vector;
-	u8 move_in_progress : 1;
-};
-
-/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
-static struct irq_cfg irq_cfg_legacy[] __initdata = {
-	[0]  = { .irq =  0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
-	[1]  = { .irq =  1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
-	[2]  = { .irq =  2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
-	[3]  = { .irq =  3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
-	[4]  = { .irq =  4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
-	[5]  = { .irq =  5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
-	[6]  = { .irq =  6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
-	[7]  = { .irq =  7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
-	[8]  = { .irq =  8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
-	[9]  = { .irq =  9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
-	[10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
-	[11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
-	[12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
-	[13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
-	[14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
-	[15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
-};
-
-static struct irq_cfg irq_cfg_init = { .irq =  -1U, };
-/* need to be biger than size of irq_cfg_legacy */
-static int nr_irq_cfg = 32;
-
-static int __init parse_nr_irq_cfg(char *arg)
-{
-	if (arg) {
-		nr_irq_cfg = simple_strtoul(arg, NULL, 0);
-		if (nr_irq_cfg < 32)
-			nr_irq_cfg = 32;
-	}
-	return 0;
-}
-
-early_param("nr_irq_cfg", parse_nr_irq_cfg);
-
-static void init_one_irq_cfg(struct irq_cfg *cfg)
-{
-	memcpy(cfg, &irq_cfg_init, sizeof(struct irq_cfg));
-}
-
-static struct irq_cfg *irq_cfgx;
-static struct irq_cfg *irq_cfgx_free;
-static void __init init_work(void *data)
-{
-	struct dyn_array *da = data;
-	struct irq_cfg *cfg;
-	int legacy_count;
-	int i;
-
-	cfg = *da->name;
-
-	memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy));
-
-	legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]);
-	for (i = legacy_count; i < *da->nr; i++)
-		init_one_irq_cfg(&cfg[i]);
-
-	for (i = 1; i < *da->nr; i++)
-		cfg[i-1].next = &cfg[i];
-
-	irq_cfgx_free = &irq_cfgx[legacy_count];
-	irq_cfgx[legacy_count - 1].next = NULL;
-}
-
-#define for_each_irq_cfg(cfg)		\
-	for (cfg = irq_cfgx; cfg; cfg = cfg->next)
-
-DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work);
-
-static struct irq_cfg *irq_cfg(unsigned int irq)
-{
-	struct irq_cfg *cfg;
-
-	cfg = irq_cfgx;
-	while (cfg) {
-		if (cfg->irq == irq)
-			return cfg;
-
-		cfg = cfg->next;
-	}
-
-	return NULL;
-}
-
-static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
-{
-	struct irq_cfg *cfg, *cfg_pri;
-	int i;
-	int count = 0;
-
-	cfg_pri = cfg = irq_cfgx;
-	while (cfg) {
-		if (cfg->irq == irq)
-			return cfg;
-
-		cfg_pri = cfg;
-		cfg = cfg->next;
-		count++;
-	}
-
-	if (!irq_cfgx_free) {
-		unsigned long phys;
-		unsigned long total_bytes;
-		/*
-		 *  we run out of pre-allocate ones, allocate more
-		 */
-		printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg);
-
-		total_bytes = sizeof(struct irq_cfg) * nr_irq_cfg;
-		if (after_bootmem)
-			cfg = kzalloc(total_bytes, GFP_ATOMIC);
-		else
-			cfg = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0);
-
-		if (!cfg)
-			panic("please boot with nr_irq_cfg= %d\n", count * 2);
-
-		phys = __pa(cfg);
-		printk(KERN_DEBUG "irq_irq ==> [%#lx - %#lx]\n", phys, phys + total_bytes);
-
-		for (i = 0; i < nr_irq_cfg; i++)
-			init_one_irq_cfg(&cfg[i]);
-
-		for (i = 1; i < nr_irq_cfg; i++)
-			cfg[i-1].next = &cfg[i];
-
-		irq_cfgx_free = cfg;
-	}
-
-	cfg = irq_cfgx_free;
-	irq_cfgx_free = irq_cfgx_free->next;
-	cfg->next = NULL;
-	if (cfg_pri)
-		cfg_pri->next = cfg;
-	else
-		irq_cfgx = cfg;
-	cfg->irq = irq;
-	printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq);
-#ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG
-	{
-		/* dump the results */
-		struct irq_cfg *cfg;
-		unsigned long phys;
-		unsigned long bytes = sizeof(struct irq_cfg);
-
-		printk(KERN_DEBUG "=========================== %d\n", irq);
-		printk(KERN_DEBUG "irq_cfg dump after get that for %d\n", irq);
-		for_each_irq_cfg(cfg) {
-			phys = __pa(cfg);
-			printk(KERN_DEBUG "irq_cfg %d ==> [%#lx - %#lx]\n", cfg->irq, phys, phys + bytes);
-		}
-		printk(KERN_DEBUG "===========================\n");
-	}
-#endif
-	return cfg;
-}
-
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * the indexing order of this array favors 1:1 mappings
- * between pins and IRQs.
- */
-
-struct irq_pin_list {
-	int apic, pin;
-	struct irq_pin_list *next;
-};
-
-static struct irq_pin_list *irq_2_pin_head;
-/* fill one page ? */
-static int nr_irq_2_pin = 0x100;
-static struct irq_pin_list *irq_2_pin_ptr;
-static void __init irq_2_pin_init_work(void *data)
-{
-	struct dyn_array *da = data;
-	struct irq_pin_list *pin;
-	int i;
-
-	pin = *da->name;
-
-	for (i = 1; i < *da->nr; i++)
-		pin[i-1].next = &pin[i];
-
-	irq_2_pin_ptr = &pin[0];
-}
-DEFINE_DYN_ARRAY(irq_2_pin_head, sizeof(struct irq_pin_list), nr_irq_2_pin, PAGE_SIZE, irq_2_pin_init_work);
-
-static struct irq_pin_list *get_one_free_irq_2_pin(void)
-{
-	struct irq_pin_list *pin;
-	int i;
-
-	pin = irq_2_pin_ptr;
-
-	if (pin) {
-		irq_2_pin_ptr = pin->next;
-		pin->next = NULL;
-		return pin;
-	}
-
-	/*
-	 *  we run out of pre-allocate ones, allocate more
-	 */
-	printk(KERN_DEBUG "try to get more irq_2_pin %d\n", nr_irq_2_pin);
-
-	if (after_bootmem)
-		pin = kzalloc(sizeof(struct irq_pin_list)*nr_irq_2_pin,
-				 GFP_ATOMIC);
-	else
-		pin = __alloc_bootmem_nopanic(sizeof(struct irq_pin_list) *
-				nr_irq_2_pin, PAGE_SIZE, 0);
-
-	if (!pin)
-		panic("can not get more irq_2_pin\n");
-
-	for (i = 1; i < nr_irq_2_pin; i++)
-		pin[i-1].next = &pin[i];
-
-	irq_2_pin_ptr = pin->next;
-	pin->next = NULL;
-
-	return pin;
-}
-
-struct io_apic {
-	unsigned int index;
-	unsigned int unused[3];
-	unsigned int data;
-};
-
-static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
-{
-	return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
-		+ (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
-}
-
-static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
-{
-	struct io_apic __iomem *io_apic = io_apic_base(apic);
-	writel(reg, &io_apic->index);
-	return readl(&io_apic->data);
-}
-
-static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
-{
-	struct io_apic __iomem *io_apic = io_apic_base(apic);
-	writel(reg, &io_apic->index);
-	writel(value, &io_apic->data);
-}
-
-/*
- * Re-write a value: to be used for read-modify-write
- * cycles where the read already set up the index register.
- *
- * Older SiS APIC requires we rewrite the index register
- */
-static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
-{
-	struct io_apic __iomem *io_apic = io_apic_base(apic);
-        if (sis_apic_bug)
-                writel(reg, &io_apic->index);
-	writel(value, &io_apic->data);
-}
-
-#ifdef CONFIG_X86_64
-static bool io_apic_level_ack_pending(unsigned int irq)
-{
-	struct irq_pin_list *entry;
-	unsigned long flags;
-	struct irq_cfg *cfg = irq_cfg(irq);
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	entry = cfg->irq_2_pin;
-	for (;;) {
-		unsigned int reg;
-		int pin;
-
-		if (!entry)
-			break;
-		pin = entry->pin;
-		reg = io_apic_read(entry->apic, 0x10 + pin*2);
-		/* Is the remote IRR bit set? */
-		if (reg & IO_APIC_REDIR_REMOTE_IRR) {
-			spin_unlock_irqrestore(&ioapic_lock, flags);
-			return true;
-		}
-		if (!entry->next)
-			break;
-		entry = entry->next;
-	}
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-
-	return false;
-}
-#endif
-
-union entry_union {
-	struct { u32 w1, w2; };
-	struct IO_APIC_route_entry entry;
-};
-
-static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
-{
-	union entry_union eu;
-	unsigned long flags;
-	spin_lock_irqsave(&ioapic_lock, flags);
-	eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
-	eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-	return eu.entry;
-}
-
-/*
- * When we write a new IO APIC routing entry, we need to write the high
- * word first! If the mask bit in the low word is clear, we will enable
- * the interrupt, and we need to make sure the entry is fully populated
- * before that happens.
- */
-static void
-__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
-{
-	union entry_union eu;
-	eu.entry = e;
-	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
-	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
-}
-
-static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
-{
-	unsigned long flags;
-	spin_lock_irqsave(&ioapic_lock, flags);
-	__ioapic_write_entry(apic, pin, e);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-/*
- * When we mask an IO APIC routing entry, we need to write the low
- * word first, in order to set the mask bit before we change the
- * high bits!
- */
-static void ioapic_mask_entry(int apic, int pin)
-{
-	unsigned long flags;
-	union entry_union eu = { .entry.mask = 1 };
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
-	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-#ifdef CONFIG_SMP
-static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
-{
-	int apic, pin;
-	struct irq_cfg *cfg;
-	struct irq_pin_list *entry;
-
-	cfg = irq_cfg(irq);
-	entry = cfg->irq_2_pin;
-	for (;;) {
-		unsigned int reg;
-
-		if (!entry)
-			break;
-
-		apic = entry->apic;
-		pin = entry->pin;
-#ifdef CONFIG_INTR_REMAP
-		/*
-		 * With interrupt-remapping, destination information comes
-		 * from interrupt-remapping table entry.
-		 */
-		if (!irq_remapped(irq))
-			io_apic_write(apic, 0x11 + pin*2, dest);
-#else
-		io_apic_write(apic, 0x11 + pin*2, dest);
-#endif
-		reg = io_apic_read(apic, 0x10 + pin*2);
-		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
-		reg |= vector;
-		io_apic_modify(apic, 0x10 + pin*2, reg);
-		if (!entry->next)
-			break;
-		entry = entry->next;
-	}
-}
-
-static int assign_irq_vector(int irq, cpumask_t mask);
-
-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
-{
-	struct irq_cfg *cfg;
-	unsigned long flags;
-	unsigned int dest;
-	cpumask_t tmp;
-	struct irq_desc *desc;
-
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
-		return;
-
-	cfg = irq_cfg(irq);
-	if (assign_irq_vector(irq, mask))
-		return;
-
-	cpus_and(tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
-	/*
-	 * Only the high 8 bits are valid.
-	 */
-	dest = SET_APIC_LOGICAL_ID(dest);
-
-	desc = irq_to_desc(irq);
-	spin_lock_irqsave(&ioapic_lock, flags);
-	__target_IO_APIC_irq(irq, dest, cfg->vector);
-	desc->affinity = mask;
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-#endif /* CONFIG_SMP */
-
-/*
- * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
- * shared ISA-space IRQs, so we have to support them. We are super
- * fast in the common case, and fast for shared ISA-space IRQs.
- */
-static void add_pin_to_irq(unsigned int irq, int apic, int pin)
-{
-	struct irq_cfg *cfg;
-	struct irq_pin_list *entry;
-
-	/* first time to refer irq_cfg, so with new */
-	cfg = irq_cfg_alloc(irq);
-	entry = cfg->irq_2_pin;
-	if (!entry) {
-		entry = get_one_free_irq_2_pin();
-		cfg->irq_2_pin = entry;
-		entry->apic = apic;
-		entry->pin = pin;
-		printk(KERN_DEBUG " 0 add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin);
-		return;
-	}
-
-	while (entry->next) {
-		/* not again, please */
-		if (entry->apic == apic && entry->pin == pin)
-			return;
-
-		entry = entry->next;
-	}
-
-	entry->next = get_one_free_irq_2_pin();
-	entry = entry->next;
-	entry->apic = apic;
-	entry->pin = pin;
-	printk(KERN_DEBUG " x add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin);
-}
-
-/*
- * Reroute an IRQ to a different pin.
- */
-static void __init replace_pin_at_irq(unsigned int irq,
-				      int oldapic, int oldpin,
-				      int newapic, int newpin)
-{
-	struct irq_cfg *cfg = irq_cfg(irq);
-	struct irq_pin_list *entry = cfg->irq_2_pin;
-	int replaced = 0;
-
-	while (entry) {
-		if (entry->apic == oldapic && entry->pin == oldpin) {
-			entry->apic = newapic;
-			entry->pin = newpin;
-			replaced = 1;
-			/* every one is different, right? */
-			break;
-		}
-		entry = entry->next;
-	}
-
-	/* why? call replace before add? */
-	if (!replaced)
-		add_pin_to_irq(irq, newapic, newpin);
-}
-
-#ifdef CONFIG_X86_64
-/*
- * Synchronize the IO-APIC and the CPU by doing
- * a dummy read from the IO-APIC
- */
-static inline void io_apic_sync(unsigned int apic)
-{
-	struct io_apic __iomem *io_apic = io_apic_base(apic);
-	readl(&io_apic->data);
-}
-
-#define __DO_ACTION(R, ACTION, FINAL)					\
-									\
-{									\
-	int pin;							\
-	struct irq_cfg *cfg;						\
-	struct irq_pin_list *entry;					\
-									\
-	cfg = irq_cfg(irq);						\
-	entry = cfg->irq_2_pin;						\
-	for (;;) {							\
-		unsigned int reg;					\
-		if (!entry)						\
-			break;						\
-		pin = entry->pin;					\
-		reg = io_apic_read(entry->apic, 0x10 + R + pin*2);	\
-		reg ACTION;						\
-		io_apic_modify(entry->apic, 0x10 + R + pin*2, reg);	\
-		FINAL;							\
-		if (!entry->next)					\
-			break;						\
-		entry = entry->next;					\
-	}								\
-}
-
-#define DO_ACTION(name,R,ACTION, FINAL)					\
-									\
-	static void name##_IO_APIC_irq (unsigned int irq)		\
-	__DO_ACTION(R, ACTION, FINAL)
-
-/* mask = 1 */
-DO_ACTION(__mask,	0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic))
-
-/* mask = 0 */
-DO_ACTION(__unmask,	0, &= ~IO_APIC_REDIR_MASKED, )
-
-#else
-
-static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable)
-{
-	struct irq_cfg *cfg;
-	struct irq_pin_list *entry;
-	unsigned int pin, reg;
-
-	cfg = irq_cfg(irq);
-	entry = cfg->irq_2_pin;
-	for (;;) {
-		if (!entry)
-			break;
-		pin = entry->pin;
-		reg = io_apic_read(entry->apic, 0x10 + pin*2);
-		reg &= ~disable;
-		reg |= enable;
-		io_apic_modify(entry->apic, 0x10 + pin*2, reg);
-		if (!entry->next)
-			break;
-		entry = entry->next;
-	}
-}
-
-/* mask = 1 */
-static void __mask_IO_APIC_irq(unsigned int irq)
-{
-	__modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0);
-}
-
-/* mask = 0 */
-static void __unmask_IO_APIC_irq(unsigned int irq)
-{
-	__modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED);
-}
-
-/* mask = 1, trigger = 0 */
-static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
-{
-	__modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED,
-				IO_APIC_REDIR_LEVEL_TRIGGER);
-}
-
-/* mask = 0, trigger = 1 */
-static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
-{
-	__modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER,
-				IO_APIC_REDIR_MASKED);
-}
-
-#endif
-
-static void mask_IO_APIC_irq (unsigned int irq)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	__mask_IO_APIC_irq(irq);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void unmask_IO_APIC_irq (unsigned int irq)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	__unmask_IO_APIC_irq(irq);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
-{
-	struct IO_APIC_route_entry entry;
-
-	/* Check delivery_mode to be sure we're not clearing an SMI pin */
-	entry = ioapic_read_entry(apic, pin);
-	if (entry.delivery_mode == dest_SMI)
-		return;
-	/*
-	 * Disable it in the IO-APIC irq-routing table:
-	 */
-	ioapic_mask_entry(apic, pin);
-}
-
-static void clear_IO_APIC (void)
-{
-	int apic, pin;
-
-	for (apic = 0; apic < nr_ioapics; apic++)
-		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
-			clear_IO_APIC_pin(apic, pin);
-}
-
-#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32)
-void send_IPI_self(int vector)
-{
-	unsigned int cfg;
-
-	/*
-	 * Wait for idle.
-	 */
-	apic_wait_icr_idle();
-	cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
-	/*
-	 * Send the IPI. The write to APIC_ICR fires this off.
-	 */
-	apic_write(APIC_ICR, cfg);
-}
-#endif /* !CONFIG_SMP && CONFIG_X86_32*/
-
-#ifdef CONFIG_X86_32
-/*
- * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
- * specific CPU-side IRQs.
- */
-
-#define MAX_PIRQS 8
-static int pirq_entries [MAX_PIRQS];
-static int pirqs_enabled;
-
-static int __init ioapic_pirq_setup(char *str)
-{
-	int i, max;
-	int ints[MAX_PIRQS+1];
-
-	get_options(str, ARRAY_SIZE(ints), ints);
-
-	for (i = 0; i < MAX_PIRQS; i++)
-		pirq_entries[i] = -1;
-
-	pirqs_enabled = 1;
-	apic_printk(APIC_VERBOSE, KERN_INFO
-			"PIRQ redirection, working around broken MP-BIOS.\n");
-	max = MAX_PIRQS;
-	if (ints[0] < MAX_PIRQS)
-		max = ints[0];
-
-	for (i = 0; i < max; i++) {
-		apic_printk(APIC_VERBOSE, KERN_DEBUG
-				"... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
-		/*
-		 * PIRQs are mapped upside down, usually.
-		 */
-		pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
-	}
-	return 1;
-}
-
-__setup("pirq=", ioapic_pirq_setup);
-#endif /* CONFIG_X86_32 */
-
-#ifdef CONFIG_INTR_REMAP
-/* I/O APIC RTE contents at the OS boot up */
-static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
-
-/*
- * Saves and masks all the unmasked IO-APIC RTE's
- */
-int save_mask_IO_APIC_setup(void)
-{
-	union IO_APIC_reg_01 reg_01;
-	unsigned long flags;
-	int apic, pin;
-
-	/*
-	 * The number of IO-APIC IRQ registers (== #pins):
-	 */
-	for (apic = 0; apic < nr_ioapics; apic++) {
-		spin_lock_irqsave(&ioapic_lock, flags);
-		reg_01.raw = io_apic_read(apic, 1);
-		spin_unlock_irqrestore(&ioapic_lock, flags);
-		nr_ioapic_registers[apic] = reg_01.bits.entries+1;
-	}
-
-	for (apic = 0; apic < nr_ioapics; apic++) {
-		early_ioapic_entries[apic] =
-			kzalloc(sizeof(struct IO_APIC_route_entry) *
-				nr_ioapic_registers[apic], GFP_KERNEL);
-		if (!early_ioapic_entries[apic])
-			return -ENOMEM;
-	}
-
-	for (apic = 0; apic < nr_ioapics; apic++)
-		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-			struct IO_APIC_route_entry entry;
-
-			entry = early_ioapic_entries[apic][pin] =
-				ioapic_read_entry(apic, pin);
-			if (!entry.mask) {
-				entry.mask = 1;
-				ioapic_write_entry(apic, pin, entry);
-			}
-		}
-	return 0;
-}
-
-void restore_IO_APIC_setup(void)
-{
-	int apic, pin;
-
-	for (apic = 0; apic < nr_ioapics; apic++)
-		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
-			ioapic_write_entry(apic, pin,
-					   early_ioapic_entries[apic][pin]);
-}
-
-void reinit_intr_remapped_IO_APIC(int intr_remapping)
-{
-	/*
-	 * for now plain restore of previous settings.
-	 * TBD: In the case of OS enabling interrupt-remapping,
-	 * IO-APIC RTE's need to be setup to point to interrupt-remapping
-	 * table entries. for now, do a plain restore, and wait for
-	 * the setup_IO_APIC_irqs() to do proper initialization.
-	 */
-	restore_IO_APIC_setup();
-}
-#endif
-
-/*
- * Find the IRQ entry number of a certain pin.
- */
-static int find_irq_entry(int apic, int pin, int type)
-{
-	int i;
-
-	for (i = 0; i < mp_irq_entries; i++)
-		if (mp_irqs[i].mp_irqtype == type &&
-		    (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
-		     mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
-		    mp_irqs[i].mp_dstirq == pin)
-			return i;
-
-	return -1;
-}
-
-/*
- * Find the pin to which IRQ[irq] (ISA) is connected
- */
-static int __init find_isa_irq_pin(int irq, int type)
-{
-	int i;
-
-	for (i = 0; i < mp_irq_entries; i++) {
-		int lbus = mp_irqs[i].mp_srcbus;
-
-		if (test_bit(lbus, mp_bus_not_pci) &&
-		    (mp_irqs[i].mp_irqtype == type) &&
-		    (mp_irqs[i].mp_srcbusirq == irq))
-
-			return mp_irqs[i].mp_dstirq;
-	}
-	return -1;
-}
-
-static int __init find_isa_irq_apic(int irq, int type)
-{
-	int i;
-
-	for (i = 0; i < mp_irq_entries; i++) {
-		int lbus = mp_irqs[i].mp_srcbus;
-
-		if (test_bit(lbus, mp_bus_not_pci) &&
-		    (mp_irqs[i].mp_irqtype == type) &&
-		    (mp_irqs[i].mp_srcbusirq == irq))
-			break;
-	}
-	if (i < mp_irq_entries) {
-		int apic;
-		for(apic = 0; apic < nr_ioapics; apic++) {
-			if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
-				return apic;
-		}
-	}
-
-	return -1;
-}
-
-/*
- * Find a specific PCI IRQ entry.
- * Not an __init, possibly needed by modules
- */
-static int pin_2_irq(int idx, int apic, int pin);
-
-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
-{
-	int apic, i, best_guess = -1;
-
-	apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
-		bus, slot, pin);
-	if (test_bit(bus, mp_bus_not_pci)) {
-		apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
-		return -1;
-	}
-	for (i = 0; i < mp_irq_entries; i++) {
-		int lbus = mp_irqs[i].mp_srcbus;
-
-		for (apic = 0; apic < nr_ioapics; apic++)
-			if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
-			    mp_irqs[i].mp_dstapic == MP_APIC_ALL)
-				break;
-
-		if (!test_bit(lbus, mp_bus_not_pci) &&
-		    !mp_irqs[i].mp_irqtype &&
-		    (bus == lbus) &&
-		    (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
-			int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq);
-
-			if (!(apic || IO_APIC_IRQ(irq)))
-				continue;
-
-			if (pin == (mp_irqs[i].mp_srcbusirq & 3))
-				return irq;
-			/*
-			 * Use the first all-but-pin matching entry as a
-			 * best-guess fuzzy result for broken mptables.
-			 */
-			if (best_guess < 0)
-				best_guess = irq;
-		}
-	}
-	return best_guess;
-}
-
-EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
-
-#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
-/*
- * EISA Edge/Level control register, ELCR
- */
-static int EISA_ELCR(unsigned int irq)
-{
-	if (irq < 16) {
-		unsigned int port = 0x4d0 + (irq >> 3);
-		return (inb(port) >> (irq & 7)) & 1;
-	}
-	apic_printk(APIC_VERBOSE, KERN_INFO
-			"Broken MPtable reports ISA irq %d\n", irq);
-	return 0;
-}
-
-#endif
-
-/* ISA interrupts are always polarity zero edge triggered,
- * when listed as conforming in the MP table. */
-
-#define default_ISA_trigger(idx)	(0)
-#define default_ISA_polarity(idx)	(0)
-
-/* EISA interrupts are always polarity zero and can be edge or level
- * trigger depending on the ELCR value.  If an interrupt is listed as
- * EISA conforming in the MP table, that means its trigger type must
- * be read in from the ELCR */
-
-#define default_EISA_trigger(idx)	(EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
-#define default_EISA_polarity(idx)	default_ISA_polarity(idx)
-
-/* PCI interrupts are always polarity one level triggered,
- * when listed as conforming in the MP table. */
-
-#define default_PCI_trigger(idx)	(1)
-#define default_PCI_polarity(idx)	(1)
-
-/* MCA interrupts are always polarity zero level triggered,
- * when listed as conforming in the MP table. */
-
-#define default_MCA_trigger(idx)	(1)
-#define default_MCA_polarity(idx)	default_ISA_polarity(idx)
-
-static int MPBIOS_polarity(int idx)
-{
-	int bus = mp_irqs[idx].mp_srcbus;
-	int polarity;
-
-	/*
-	 * Determine IRQ line polarity (high active or low active):
-	 */
-	switch (mp_irqs[idx].mp_irqflag & 3)
-	{
-		case 0: /* conforms, ie. bus-type dependent polarity */
-			if (test_bit(bus, mp_bus_not_pci))
-				polarity = default_ISA_polarity(idx);
-			else
-				polarity = default_PCI_polarity(idx);
-			break;
-		case 1: /* high active */
-		{
-			polarity = 0;
-			break;
-		}
-		case 2: /* reserved */
-		{
-			printk(KERN_WARNING "broken BIOS!!\n");
-			polarity = 1;
-			break;
-		}
-		case 3: /* low active */
-		{
-			polarity = 1;
-			break;
-		}
-		default: /* invalid */
-		{
-			printk(KERN_WARNING "broken BIOS!!\n");
-			polarity = 1;
-			break;
-		}
-	}
-	return polarity;
-}
-
-static int MPBIOS_trigger(int idx)
-{
-	int bus = mp_irqs[idx].mp_srcbus;
-	int trigger;
-
-	/*
-	 * Determine IRQ trigger mode (edge or level sensitive):
-	 */
-	switch ((mp_irqs[idx].mp_irqflag>>2) & 3)
-	{
-		case 0: /* conforms, ie. bus-type dependent */
-			if (test_bit(bus, mp_bus_not_pci))
-				trigger = default_ISA_trigger(idx);
-			else
-				trigger = default_PCI_trigger(idx);
-#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
-			switch (mp_bus_id_to_type[bus]) {
-				case MP_BUS_ISA: /* ISA pin */
-				{
-					/* set before the switch */
-					break;
-				}
-				case MP_BUS_EISA: /* EISA pin */
-				{
-					trigger = default_EISA_trigger(idx);
-					break;
-				}
-				case MP_BUS_PCI: /* PCI pin */
-				{
-					/* set before the switch */
-					break;
-				}
-				case MP_BUS_MCA: /* MCA pin */
-				{
-					trigger = default_MCA_trigger(idx);
-					break;
-				}
-				default:
-				{
-					printk(KERN_WARNING "broken BIOS!!\n");
-					trigger = 1;
-					break;
-				}
-			}
-#endif
-			break;
-		case 1: /* edge */
-		{
-			trigger = 0;
-			break;
-		}
-		case 2: /* reserved */
-		{
-			printk(KERN_WARNING "broken BIOS!!\n");
-			trigger = 1;
-			break;
-		}
-		case 3: /* level */
-		{
-			trigger = 1;
-			break;
-		}
-		default: /* invalid */
-		{
-			printk(KERN_WARNING "broken BIOS!!\n");
-			trigger = 0;
-			break;
-		}
-	}
-	return trigger;
-}
-
-static inline int irq_polarity(int idx)
-{
-	return MPBIOS_polarity(idx);
-}
-
-static inline int irq_trigger(int idx)
-{
-	return MPBIOS_trigger(idx);
-}
-
-int (*ioapic_renumber_irq)(int ioapic, int irq);
-static int pin_2_irq(int idx, int apic, int pin)
-{
-	int irq, i;
-	int bus = mp_irqs[idx].mp_srcbus;
-
-	/*
-	 * Debugging check, we are in big trouble if this message pops up!
-	 */
-	if (mp_irqs[idx].mp_dstirq != pin)
-		printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
-
-	if (test_bit(bus, mp_bus_not_pci)) {
-		irq = mp_irqs[idx].mp_srcbusirq;
-	} else {
-		/*
-		 * PCI IRQs are mapped in order
-		 */
-		i = irq = 0;
-		while (i < apic)
-			irq += nr_ioapic_registers[i++];
-		irq += pin;
-                /*
-                 * For MPS mode, so far only needed by ES7000 platform
-                 */
-                if (ioapic_renumber_irq)
-                        irq = ioapic_renumber_irq(apic, irq);
-	}
-
-#ifdef CONFIG_X86_32
-	/*
-	 * PCI IRQ command line redirection. Yes, limits are hardcoded.
-	 */
-	if ((pin >= 16) && (pin <= 23)) {
-		if (pirq_entries[pin-16] != -1) {
-			if (!pirq_entries[pin-16]) {
-				apic_printk(APIC_VERBOSE, KERN_DEBUG
-						"disabling PIRQ%d\n", pin-16);
-			} else {
-				irq = pirq_entries[pin-16];
-				apic_printk(APIC_VERBOSE, KERN_DEBUG
-						"using PIRQ%d -> IRQ %d\n",
-						pin-16, irq);
-			}
-		}
-	}
-#endif
-
-	return irq;
-}
-
-void lock_vector_lock(void)
-{
-	/* Used to the online set of cpus does not change
-	 * during assign_irq_vector.
-	 */
-	spin_lock(&vector_lock);
-}
-
-void unlock_vector_lock(void)
-{
-	spin_unlock(&vector_lock);
-}
-
-static int __assign_irq_vector(int irq, cpumask_t mask)
-{
-	/*
-	 * NOTE! The local APIC isn't very good at handling
-	 * multiple interrupts at the same interrupt level.
-	 * As the interrupt level is determined by taking the
-	 * vector number and shifting that right by 4, we
-	 * want to spread these out a bit so that they don't
-	 * all fall in the same interrupt level.
-	 *
-	 * Also, we've got to be careful not to trash gate
-	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
-	 */
-	static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
-	unsigned int old_vector;
-	int cpu;
-	struct irq_cfg *cfg;
-
-	cfg = irq_cfg(irq);
-
-	/* Only try and allocate irqs on cpus that are present */
-	cpus_and(mask, mask, cpu_online_map);
-
-	if ((cfg->move_in_progress) || cfg->move_cleanup_count)
-		return -EBUSY;
-
-	old_vector = cfg->vector;
-	if (old_vector) {
-		cpumask_t tmp;
-		cpus_and(tmp, cfg->domain, mask);
-		if (!cpus_empty(tmp))
-			return 0;
-	}
-
-	for_each_cpu_mask_nr(cpu, mask) {
-		cpumask_t domain, new_mask;
-		int new_cpu;
-		int vector, offset;
-
-		domain = vector_allocation_domain(cpu);
-		cpus_and(new_mask, domain, cpu_online_map);
-
-		vector = current_vector;
-		offset = current_offset;
-next:
-		vector += 8;
-		if (vector >= first_system_vector) {
-			/* If we run out of vectors on large boxen, must share them. */
-			offset = (offset + 1) % 8;
-			vector = FIRST_DEVICE_VECTOR + offset;
-		}
-		if (unlikely(current_vector == vector))
-			continue;
-#ifdef CONFIG_X86_64
-		if (vector == IA32_SYSCALL_VECTOR)
-			goto next;
-#else
-		if (vector == SYSCALL_VECTOR)
-			goto next;
-#endif
-		for_each_cpu_mask_nr(new_cpu, new_mask)
-			if (per_cpu(vector_irq, new_cpu)[vector] != -1)
-				goto next;
-		/* Found one! */
-		current_vector = vector;
-		current_offset = offset;
-		if (old_vector) {
-			cfg->move_in_progress = 1;
-			cfg->old_domain = cfg->domain;
-		}
-		for_each_cpu_mask_nr(new_cpu, new_mask)
-			per_cpu(vector_irq, new_cpu)[vector] = irq;
-		cfg->vector = vector;
-		cfg->domain = domain;
-		return 0;
-	}
-	return -ENOSPC;
-}
-
-static int assign_irq_vector(int irq, cpumask_t mask)
-{
-	int err;
-	unsigned long flags;
-
-	spin_lock_irqsave(&vector_lock, flags);
-	err = __assign_irq_vector(irq, mask);
-	spin_unlock_irqrestore(&vector_lock, flags);
-	return err;
-}
-
-static void __clear_irq_vector(int irq)
-{
-	struct irq_cfg *cfg;
-	cpumask_t mask;
-	int cpu, vector;
-
-	cfg = irq_cfg(irq);
-	BUG_ON(!cfg->vector);
-
-	vector = cfg->vector;
-	cpus_and(mask, cfg->domain, cpu_online_map);
-	for_each_cpu_mask_nr(cpu, mask)
-		per_cpu(vector_irq, cpu)[vector] = -1;
-
-	cfg->vector = 0;
-	cpus_clear(cfg->domain);
-}
-
-void __setup_vector_irq(int cpu)
-{
-	/* Initialize vector_irq on a new cpu */
-	/* This function must be called with vector_lock held */
-	int irq, vector;
-	struct irq_cfg *cfg;
-
-	/* Mark the inuse vectors */
-	for_each_irq_cfg(cfg) {
-		if (!cpu_isset(cpu, cfg->domain))
-			continue;
-		vector = cfg->vector;
-		irq = cfg->irq;
-		per_cpu(vector_irq, cpu)[vector] = irq;
-	}
-	/* Mark the free vectors */
-	for (vector = 0; vector < NR_VECTORS; ++vector) {
-		irq = per_cpu(vector_irq, cpu)[vector];
-		if (irq < 0)
-			continue;
-
-		cfg = irq_cfg(irq);
-		if (!cpu_isset(cpu, cfg->domain))
-			per_cpu(vector_irq, cpu)[vector] = -1;
-	}
-}
-
-static struct irq_chip ioapic_chip;
-#ifdef CONFIG_INTR_REMAP
-static struct irq_chip ir_ioapic_chip;
-#endif
-
-#define IOAPIC_AUTO     -1
-#define IOAPIC_EDGE     0
-#define IOAPIC_LEVEL    1
-
-#ifdef CONFIG_X86_32
-static inline int IO_APIC_irq_trigger(int irq)
-{
-        int apic, idx, pin;
-
-        for (apic = 0; apic < nr_ioapics; apic++) {
-                for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-                        idx = find_irq_entry(apic, pin, mp_INT);
-                        if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
-                                return irq_trigger(idx);
-                }
-        }
-        /*
-         * nonexistent IRQs are edge default
-         */
-        return 0;
-}
-#else
-static inline int IO_APIC_irq_trigger(int irq)
-{
-	return 1;
-}
-#endif
-
-static void ioapic_register_intr(int irq, unsigned long trigger)
-{
-	struct irq_desc *desc;
-
-	/* first time to use this irq_desc */
-	if (irq < 16)
-		desc = irq_to_desc(irq);
-	else
-		desc = irq_to_desc_alloc(irq);
-
-	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
-	    trigger == IOAPIC_LEVEL)
-		desc->status |= IRQ_LEVEL;
-	else
-		desc->status &= ~IRQ_LEVEL;
-
-#ifdef CONFIG_INTR_REMAP
-	if (irq_remapped(irq)) {
-		desc->status |= IRQ_MOVE_PCNTXT;
-		if (trigger)
-			set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
-						      handle_fasteoi_irq,
-						     "fasteoi");
-		else
-			set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
-						      handle_edge_irq, "edge");
-		return;
-	}
-#endif
-	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
-	    trigger == IOAPIC_LEVEL)
-		set_irq_chip_and_handler_name(irq, &ioapic_chip,
-					      handle_fasteoi_irq,
-					      "fasteoi");
-	else
-		set_irq_chip_and_handler_name(irq, &ioapic_chip,
-					      handle_edge_irq, "edge");
-}
-
-static int setup_ioapic_entry(int apic, int irq,
-			      struct IO_APIC_route_entry *entry,
-			      unsigned int destination, int trigger,
-			      int polarity, int vector)
-{
-	/*
-	 * add it to the IO-APIC irq-routing table:
-	 */
-	memset(entry,0,sizeof(*entry));
-
-#ifdef CONFIG_INTR_REMAP
-	if (intr_remapping_enabled) {
-		struct intel_iommu *iommu = map_ioapic_to_ir(apic);
-		struct irte irte;
-		struct IR_IO_APIC_route_entry *ir_entry =
-			(struct IR_IO_APIC_route_entry *) entry;
-		int index;
-
-		if (!iommu)
-			panic("No mapping iommu for ioapic %d\n", apic);
-
-		index = alloc_irte(iommu, irq, 1);
-		if (index < 0)
-			panic("Failed to allocate IRTE for ioapic %d\n", apic);
-
-		memset(&irte, 0, sizeof(irte));
-
-		irte.present = 1;
-		irte.dst_mode = INT_DEST_MODE;
-		irte.trigger_mode = trigger;
-		irte.dlvry_mode = INT_DELIVERY_MODE;
-		irte.vector = vector;
-		irte.dest_id = IRTE_DEST(destination);
-
-		modify_irte(irq, &irte);
-
-		ir_entry->index2 = (index >> 15) & 0x1;
-		ir_entry->zero = 0;
-		ir_entry->format = 1;
-		ir_entry->index = (index & 0x7fff);
-	} else
-#endif
-	{
-		entry->delivery_mode = INT_DELIVERY_MODE;
-		entry->dest_mode = INT_DEST_MODE;
-		entry->dest = destination;
-	}
-
-	entry->mask = 0;				/* enable IRQ */
-	entry->trigger = trigger;
-	entry->polarity = polarity;
-	entry->vector = vector;
-
-	/* Mask level triggered irqs.
-	 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
-	 */
-	if (trigger)
-		entry->mask = 1;
-	return 0;
-}
-
-static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
-			      int trigger, int polarity)
-{
-	struct irq_cfg *cfg;
-	struct IO_APIC_route_entry entry;
-	cpumask_t mask;
-
-	if (!IO_APIC_IRQ(irq))
-		return;
-
-	cfg = irq_cfg(irq);
-
-	mask = TARGET_CPUS;
-	if (assign_irq_vector(irq, mask))
-		return;
-
-	cpus_and(mask, cfg->domain, mask);
-
-	apic_printk(APIC_VERBOSE,KERN_DEBUG
-		    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
-		    "IRQ %d Mode:%i Active:%i)\n",
-		    apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
-		    irq, trigger, polarity);
-
-
-	if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
-			       cpu_mask_to_apicid(mask), trigger, polarity,
-			       cfg->vector)) {
-		printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
-		       mp_ioapics[apic].mp_apicid, pin);
-		__clear_irq_vector(irq);
-		return;
-	}
-
-	ioapic_register_intr(irq, trigger);
-	if (irq < 16)
-		disable_8259A_irq(irq);
-
-	ioapic_write_entry(apic, pin, entry);
-}
-
-static void __init setup_IO_APIC_irqs(void)
-{
-	int apic, pin, idx, irq, first_notcon = 1;
-
-	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
-
-	for (apic = 0; apic < nr_ioapics; apic++) {
-	for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-
-		idx = find_irq_entry(apic,pin,mp_INT);
-		if (idx == -1) {
-			if (first_notcon) {
-				apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin);
-				first_notcon = 0;
-			} else
-				apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin);
-			continue;
-		}
-		if (!first_notcon) {
-			apic_printk(APIC_VERBOSE, " not connected.\n");
-			first_notcon = 1;
-		}
-
-		irq = pin_2_irq(idx, apic, pin);
-#ifdef CONFIG_X86_32
-                if (multi_timer_check(apic, irq))
-                        continue;
-#endif
-		add_pin_to_irq(irq, apic, pin);
-
-		setup_IO_APIC_irq(apic, pin, irq,
-				  irq_trigger(idx), irq_polarity(idx));
-	}
-	}
-
-	if (!first_notcon)
-		apic_printk(APIC_VERBOSE, " not connected.\n");
-}
-
-/*
- * Set up the timer pin, possibly with the 8259A-master behind.
- */
-static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
-					int vector)
-{
-	struct IO_APIC_route_entry entry;
-
-#ifdef CONFIG_INTR_REMAP
-	if (intr_remapping_enabled)
-		return;
-#endif
-
-	memset(&entry, 0, sizeof(entry));
-
-	/*
-	 * We use logical delivery to get the timer IRQ
-	 * to the first CPU.
-	 */
-	entry.dest_mode = INT_DEST_MODE;
-	entry.mask = 1;					/* mask IRQ now */
-	entry.dest = cpu_mask_to_apicid(TARGET_CPUS);
-	entry.delivery_mode = INT_DELIVERY_MODE;
-	entry.polarity = 0;
-	entry.trigger = 0;
-	entry.vector = vector;
-
-	/*
-	 * The timer IRQ doesn't have to know that behind the
-	 * scene we may have a 8259A-master in AEOI mode ...
-	 */
-	set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
-
-	/*
-	 * Add it to the IO-APIC irq-routing table:
-	 */
-	ioapic_write_entry(apic, pin, entry);
-}
-
-
-__apicdebuginit(void) print_IO_APIC(void)
-{
-	int apic, i;
-	union IO_APIC_reg_00 reg_00;
-	union IO_APIC_reg_01 reg_01;
-	union IO_APIC_reg_02 reg_02;
-	union IO_APIC_reg_03 reg_03;
-	unsigned long flags;
-	struct irq_cfg *cfg;
-
-	if (apic_verbosity == APIC_QUIET)
-		return;
-
-	printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
-	for (i = 0; i < nr_ioapics; i++)
-		printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
-		       mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
-
-	/*
-	 * We are a bit conservative about what we expect.  We have to
-	 * know about every hardware change ASAP.
-	 */
-	printk(KERN_INFO "testing the IO APIC.......................\n");
-
-	for (apic = 0; apic < nr_ioapics; apic++) {
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	reg_00.raw = io_apic_read(apic, 0);
-	reg_01.raw = io_apic_read(apic, 1);
-	if (reg_01.bits.version >= 0x10)
-		reg_02.raw = io_apic_read(apic, 2);
-        if (reg_01.bits.version >= 0x20)
-                reg_03.raw = io_apic_read(apic, 3);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-
-	printk("\n");
-	printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
-	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
-	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
-	printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
-	printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
-
-	printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
-	printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);
-
-	printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
-	printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.bits.version);
-
-	/*
-	 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
-	 * but the value of reg_02 is read as the previous read register
-	 * value, so ignore it if reg_02 == reg_01.
-	 */
-	if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
-		printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
-		printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
-	}
-
-	/*
-	 * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
-	 * or reg_03, but the value of reg_0[23] is read as the previous read
-	 * register value, so ignore it if reg_03 == reg_0[12].
-	 */
-	if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
-	    reg_03.raw != reg_01.raw) {
-		printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
-		printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
-	}
-
-	printk(KERN_DEBUG ".... IRQ redirection table:\n");
-
-	printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
-			  " Stat Dmod Deli Vect:   \n");
-
-	for (i = 0; i <= reg_01.bits.entries; i++) {
-		struct IO_APIC_route_entry entry;
-
-		entry = ioapic_read_entry(apic, i);
-
-		printk(KERN_DEBUG " %02x %03X ",
-			i,
-			entry.dest
-		);
-
-		printk("%1d    %1d    %1d   %1d   %1d    %1d    %1d    %02X\n",
-			entry.mask,
-			entry.trigger,
-			entry.irr,
-			entry.polarity,
-			entry.delivery_status,
-			entry.dest_mode,
-			entry.delivery_mode,
-			entry.vector
-		);
-	}
-	}
-	printk(KERN_DEBUG "IRQ to pin mappings:\n");
-	for_each_irq_cfg(cfg) {
-		struct irq_pin_list *entry = cfg->irq_2_pin;
-		if (!entry)
-			continue;
-		printk(KERN_DEBUG "IRQ%d ", cfg->irq);
-		for (;;) {
-			printk("-> %d:%d", entry->apic, entry->pin);
-			if (!entry->next)
-				break;
-			entry = entry->next;
-		}
-		printk("\n");
-	}
-
-	printk(KERN_INFO ".................................... done.\n");
-
-	return;
-}
-
-__apicdebuginit(void) print_APIC_bitfield(int base)
-{
-	unsigned int v;
-	int i, j;
-
-	if (apic_verbosity == APIC_QUIET)
-		return;
-
-	printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
-	for (i = 0; i < 8; i++) {
-		v = apic_read(base + i*0x10);
-		for (j = 0; j < 32; j++) {
-			if (v & (1<<j))
-				printk("1");
-			else
-				printk("0");
-		}
-		printk("\n");
-	}
-}
-
-__apicdebuginit(void) print_local_APIC(void *dummy)
-{
-	unsigned int v, ver, maxlvt;
-	u64 icr;
-
-	if (apic_verbosity == APIC_QUIET)
-		return;
-
-	printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
-		smp_processor_id(), hard_smp_processor_id());
-	v = apic_read(APIC_ID);
-	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, read_apic_id());
-	v = apic_read(APIC_LVR);
-	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
-	ver = GET_APIC_VERSION(v);
-	maxlvt = lapic_get_maxlvt();
-
-	v = apic_read(APIC_TASKPRI);
-	printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
-
-	if (APIC_INTEGRATED(ver)) {                     /* !82489DX */
-		v = apic_read(APIC_ARBPRI);
-		printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
-			v & APIC_ARBPRI_MASK);
-		v = apic_read(APIC_PROCPRI);
-		printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
-	}
-
-	v = apic_read(APIC_EOI);
-	printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
-	v = apic_read(APIC_RRR);
-	printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
-	v = apic_read(APIC_LDR);
-	printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
-	v = apic_read(APIC_DFR);
-	printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
-	v = apic_read(APIC_SPIV);
-	printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
-
-	printk(KERN_DEBUG "... APIC ISR field:\n");
-	print_APIC_bitfield(APIC_ISR);
-	printk(KERN_DEBUG "... APIC TMR field:\n");
-	print_APIC_bitfield(APIC_TMR);
-	printk(KERN_DEBUG "... APIC IRR field:\n");
-	print_APIC_bitfield(APIC_IRR);
-
-	if (APIC_INTEGRATED(ver)) {             /* !82489DX */
-		if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
-			apic_write(APIC_ESR, 0);
-
-		v = apic_read(APIC_ESR);
-		printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
-	}
-
-	icr = apic_icr_read();
-	printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
-	printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32));
-
-	v = apic_read(APIC_LVTT);
-	printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
-
-	if (maxlvt > 3) {                       /* PC is LVT#4. */
-		v = apic_read(APIC_LVTPC);
-		printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
-	}
-	v = apic_read(APIC_LVT0);
-	printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
-	v = apic_read(APIC_LVT1);
-	printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
-
-	if (maxlvt > 2) {			/* ERR is LVT#3. */
-		v = apic_read(APIC_LVTERR);
-		printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
-	}
-
-	v = apic_read(APIC_TMICT);
-	printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
-	v = apic_read(APIC_TMCCT);
-	printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
-	v = apic_read(APIC_TDCR);
-	printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
-	printk("\n");
-}
-
-__apicdebuginit(void) print_all_local_APICs(void)
-{
-	on_each_cpu(print_local_APIC, NULL, 1);
-}
-
-__apicdebuginit(void) print_PIC(void)
-{
-	unsigned int v;
-	unsigned long flags;
-
-	if (apic_verbosity == APIC_QUIET)
-		return;
-
-	printk(KERN_DEBUG "\nprinting PIC contents\n");
-
-	spin_lock_irqsave(&i8259A_lock, flags);
-
-	v = inb(0xa1) << 8 | inb(0x21);
-	printk(KERN_DEBUG "... PIC  IMR: %04x\n", v);
-
-	v = inb(0xa0) << 8 | inb(0x20);
-	printk(KERN_DEBUG "... PIC  IRR: %04x\n", v);
-
-	outb(0x0b,0xa0);
-	outb(0x0b,0x20);
-	v = inb(0xa0) << 8 | inb(0x20);
-	outb(0x0a,0xa0);
-	outb(0x0a,0x20);
-
-	spin_unlock_irqrestore(&i8259A_lock, flags);
-
-	printk(KERN_DEBUG "... PIC  ISR: %04x\n", v);
-
-	v = inb(0x4d1) << 8 | inb(0x4d0);
-	printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
-}
-
-__apicdebuginit(int) print_all_ICs(void)
-{
-	print_PIC();
-	print_all_local_APICs();
-	print_IO_APIC();
-
-	return 0;
-}
-
-fs_initcall(print_all_ICs);
-
-
-/* Where if anywhere is the i8259 connect in external int mode */
-static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
-
-void __init enable_IO_APIC(void)
-{
-	union IO_APIC_reg_01 reg_01;
-	int i8259_apic, i8259_pin;
-	int apic;
-	unsigned long flags;
-
-#ifdef CONFIG_X86_32
-	int i;
-	if (!pirqs_enabled)
-		for (i = 0; i < MAX_PIRQS; i++)
-			pirq_entries[i] = -1;
-#endif
-
-	/*
-	 * The number of IO-APIC IRQ registers (== #pins):
-	 */
-	for (apic = 0; apic < nr_ioapics; apic++) {
-		spin_lock_irqsave(&ioapic_lock, flags);
-		reg_01.raw = io_apic_read(apic, 1);
-		spin_unlock_irqrestore(&ioapic_lock, flags);
-		nr_ioapic_registers[apic] = reg_01.bits.entries+1;
-	}
-	for(apic = 0; apic < nr_ioapics; apic++) {
-		int pin;
-		/* See if any of the pins is in ExtINT mode */
-		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-			struct IO_APIC_route_entry entry;
-			entry = ioapic_read_entry(apic, pin);
-
-			/* If the interrupt line is enabled and in ExtInt mode
-			 * I have found the pin where the i8259 is connected.
-			 */
-			if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
-				ioapic_i8259.apic = apic;
-				ioapic_i8259.pin  = pin;
-				goto found_i8259;
-			}
-		}
-	}
- found_i8259:
-	/* Look to see what if the MP table has reported the ExtINT */
-	/* If we could not find the appropriate pin by looking at the ioapic
-	 * the i8259 probably is not connected the ioapic but give the
-	 * mptable a chance anyway.
-	 */
-	i8259_pin  = find_isa_irq_pin(0, mp_ExtINT);
-	i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
-	/* Trust the MP table if nothing is setup in the hardware */
-	if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
-		printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
-		ioapic_i8259.pin  = i8259_pin;
-		ioapic_i8259.apic = i8259_apic;
-	}
-	/* Complain if the MP table and the hardware disagree */
-	if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
-		(i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
-	{
-		printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
-	}
-
-	/*
-	 * Do not trust the IO-APIC being empty at bootup
-	 */
-	clear_IO_APIC();
-}
-
-/*
- * Not an __init, needed by the reboot code
- */
-void disable_IO_APIC(void)
-{
-	/*
-	 * Clear the IO-APIC before rebooting:
-	 */
-	clear_IO_APIC();
-
-	/*
-	 * If the i8259 is routed through an IOAPIC
-	 * Put that IOAPIC in virtual wire mode
-	 * so legacy interrupts can be delivered.
-	 */
-	if (ioapic_i8259.pin != -1) {
-		struct IO_APIC_route_entry entry;
-
-		memset(&entry, 0, sizeof(entry));
-		entry.mask            = 0; /* Enabled */
-		entry.trigger         = 0; /* Edge */
-		entry.irr             = 0;
-		entry.polarity        = 0; /* High */
-		entry.delivery_status = 0;
-		entry.dest_mode       = 0; /* Physical */
-		entry.delivery_mode   = dest_ExtINT; /* ExtInt */
-		entry.vector          = 0;
-		entry.dest            = read_apic_id();
-
-		/*
-		 * Add it to the IO-APIC irq-routing table:
-		 */
-		ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
-	}
-
-	disconnect_bsp_APIC(ioapic_i8259.pin != -1);
-}
-
-#ifdef CONFIG_X86_32
-/*
- * function to set the IO-APIC physical IDs based on the
- * values stored in the MPC table.
- *
- * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
- */
-
-static void __init setup_ioapic_ids_from_mpc(void)
-{
-	union IO_APIC_reg_00 reg_00;
-	physid_mask_t phys_id_present_map;
-	int apic;
-	int i;
-	unsigned char old_id;
-	unsigned long flags;
-
-	if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
-		return;
-
-	/*
-	 * Don't check I/O APIC IDs for xAPIC systems.  They have
-	 * no meaning without the serial APIC bus.
-	 */
-	if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
-		|| APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
-		return;
-	/*
-	 * This is broken; anything with a real cpu count has to
-	 * circumvent this idiocy regardless.
-	 */
-	phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
-
-	/*
-	 * Set the IOAPIC ID to the value stored in the MPC table.
-	 */
-	for (apic = 0; apic < nr_ioapics; apic++) {
-
-		/* Read the register 0 value */
-		spin_lock_irqsave(&ioapic_lock, flags);
-		reg_00.raw = io_apic_read(apic, 0);
-		spin_unlock_irqrestore(&ioapic_lock, flags);
-
-		old_id = mp_ioapics[apic].mp_apicid;
-
-		if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
-			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
-				apic, mp_ioapics[apic].mp_apicid);
-			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
-				reg_00.bits.ID);
-			mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
-		}
-
-		/*
-		 * Sanity check, is the ID really free? Every APIC in a
-		 * system must have a unique ID or we get lots of nice
-		 * 'stuck on smp_invalidate_needed IPI wait' messages.
-		 */
-		if (check_apicid_used(phys_id_present_map,
-					mp_ioapics[apic].mp_apicid)) {
-			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
-				apic, mp_ioapics[apic].mp_apicid);
-			for (i = 0; i < get_physical_broadcast(); i++)
-				if (!physid_isset(i, phys_id_present_map))
-					break;
-			if (i >= get_physical_broadcast())
-				panic("Max APIC ID exceeded!\n");
-			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
-				i);
-			physid_set(i, phys_id_present_map);
-			mp_ioapics[apic].mp_apicid = i;
-		} else {
-			physid_mask_t tmp;
-			tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
-			apic_printk(APIC_VERBOSE, "Setting %d in the "
-					"phys_id_present_map\n",
-					mp_ioapics[apic].mp_apicid);
-			physids_or(phys_id_present_map, phys_id_present_map, tmp);
-		}
-
-
-		/*
-		 * We need to adjust the IRQ routing table
-		 * if the ID changed.
-		 */
-		if (old_id != mp_ioapics[apic].mp_apicid)
-			for (i = 0; i < mp_irq_entries; i++)
-				if (mp_irqs[i].mp_dstapic == old_id)
-					mp_irqs[i].mp_dstapic
-						= mp_ioapics[apic].mp_apicid;
-
-		/*
-		 * Read the right value from the MPC table and
-		 * write it into the ID register.
-		 */
-		apic_printk(APIC_VERBOSE, KERN_INFO
-			"...changing IO-APIC physical APIC ID to %d ...",
-			mp_ioapics[apic].mp_apicid);
-
-		reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
-		spin_lock_irqsave(&ioapic_lock, flags);
-
-		/*
-		 * Sanity check
-		 */
-		spin_lock_irqsave(&ioapic_lock, flags);
-		reg_00.raw = io_apic_read(apic, 0);
-		spin_unlock_irqrestore(&ioapic_lock, flags);
-		if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
-			printk("could not set ID!\n");
-		else
-			apic_printk(APIC_VERBOSE, " ok.\n");
-	}
-}
-#endif
-
-int no_timer_check __initdata;
-
-static int __init notimercheck(char *s)
-{
-	no_timer_check = 1;
-	return 1;
-}
-__setup("no_timer_check", notimercheck);
-
-/*
- * There is a nasty bug in some older SMP boards, their mptable lies
- * about the timer IRQ. We do the following to work around the situation:
- *
- *	- timer IRQ defaults to IO-APIC IRQ
- *	- if this function detects that timer IRQs are defunct, then we fall
- *	  back to ISA timer IRQs
- */
-static int __init timer_irq_works(void)
-{
-	unsigned long t1 = jiffies;
-	unsigned long flags;
-
-	if (no_timer_check)
-		return 1;
-
-	local_save_flags(flags);
-	local_irq_enable();
-	/* Let ten ticks pass... */
-	mdelay((10 * 1000) / HZ);
-	local_irq_restore(flags);
-
-	/*
-	 * Expect a few ticks at least, to be sure some possible
-	 * glue logic does not lock up after one or two first
-	 * ticks in a non-ExtINT mode.  Also the local APIC
-	 * might have cached one ExtINT interrupt.  Finally, at
-	 * least one tick may be lost due to delays.
-	 */
-
-	/* jiffies wrap? */
-	if (time_after(jiffies, t1 + 4))
-		return 1;
-	return 0;
-}
-
-/*
- * In the SMP+IOAPIC case it might happen that there are an unspecified
- * number of pending IRQ events unhandled. These cases are very rare,
- * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
- * better to do it this way as thus we do not have to be aware of
- * 'pending' interrupts in the IRQ path, except at this point.
- */
-/*
- * Edge triggered needs to resend any interrupt
- * that was delayed but this is now handled in the device
- * independent code.
- */
-
-/*
- * Starting up a edge-triggered IO-APIC interrupt is
- * nasty - we need to make sure that we get the edge.
- * If it is already asserted for some reason, we need
- * return 1 to indicate that is was pending.
- *
- * This is not complete - we should be able to fake
- * an edge even if it isn't on the 8259A...
- */
-
-static unsigned int startup_ioapic_irq(unsigned int irq)
-{
-	int was_pending = 0;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	if (irq < 16) {
-		disable_8259A_irq(irq);
-		if (i8259A_irq_pending(irq))
-			was_pending = 1;
-	}
-	__unmask_IO_APIC_irq(irq);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-
-	return was_pending;
-}
-
-#ifdef CONFIG_X86_64
-static int ioapic_retrigger_irq(unsigned int irq)
-{
-
-	struct irq_cfg *cfg = irq_cfg(irq);
-	unsigned long flags;
-
-	spin_lock_irqsave(&vector_lock, flags);
-	send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector);
-	spin_unlock_irqrestore(&vector_lock, flags);
-
-	return 1;
-}
-#else
-static int ioapic_retrigger_irq(unsigned int irq)
-{
-        send_IPI_self(irq_cfg(irq)->vector);
-
-        return 1;
-}
-#endif
-
-/*
- * Level and edge triggered IO-APIC interrupts need different handling,
- * so we use two separate IRQ descriptors. Edge triggered IRQs can be
- * handled with the level-triggered descriptor, but that one has slightly
- * more overhead. Level-triggered interrupts cannot be handled with the
- * edge-triggered handler, without risking IRQ storms and other ugly
- * races.
- */
-
-#ifdef CONFIG_SMP
-
-#ifdef CONFIG_INTR_REMAP
-static void ir_irq_migration(struct work_struct *work);
-
-static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
-
-/*
- * Migrate the IO-APIC irq in the presence of intr-remapping.
- *
- * For edge triggered, irq migration is a simple atomic update(of vector
- * and cpu destination) of IRTE and flush the hardware cache.
- *
- * For level triggered, we need to modify the io-apic RTE aswell with the update
- * vector information, along with modifying IRTE with vector and destination.
- * So irq migration for level triggered is little  bit more complex compared to
- * edge triggered migration. But the good news is, we use the same algorithm
- * for level triggered migration as we have today, only difference being,
- * we now initiate the irq migration from process context instead of the
- * interrupt context.
- *
- * In future, when we do a directed EOI (combined with cpu EOI broadcast
- * suppression) to the IO-APIC, level triggered irq migration will also be
- * as simple as edge triggered migration and we can do the irq migration
- * with a simple atomic update to IO-APIC RTE.
- */
-static void migrate_ioapic_irq(int irq, cpumask_t mask)
-{
-	struct irq_cfg *cfg;
-	struct irq_desc *desc;
-	cpumask_t tmp, cleanup_mask;
-	struct irte irte;
-	int modify_ioapic_rte;
-	unsigned int dest;
-	unsigned long flags;
-
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
-		return;
-
-	if (get_irte(irq, &irte))
-		return;
-
-	if (assign_irq_vector(irq, mask))
-		return;
-
-	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
-
-	desc = irq_to_desc(irq);
-	modify_ioapic_rte = desc->status & IRQ_LEVEL;
-	if (modify_ioapic_rte) {
-		spin_lock_irqsave(&ioapic_lock, flags);
-		__target_IO_APIC_irq(irq, dest, cfg->vector);
-		spin_unlock_irqrestore(&ioapic_lock, flags);
-	}
-
-	irte.vector = cfg->vector;
-	irte.dest_id = IRTE_DEST(dest);
-
-	/*
-	 * Modified the IRTE and flushes the Interrupt entry cache.
-	 */
-	modify_irte(irq, &irte);
-
-	if (cfg->move_in_progress) {
-		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
-		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
-		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-		cfg->move_in_progress = 0;
-	}
-
-	desc->affinity = mask;
-}
-
-static int migrate_irq_remapped_level(int irq)
-{
-	int ret = -1;
-	struct irq_desc *desc = irq_to_desc(irq);
-
-	mask_IO_APIC_irq(irq);
-
-	if (io_apic_level_ack_pending(irq)) {
-		/*
-	 	 * Interrupt in progress. Migrating irq now will change the
-		 * vector information in the IO-APIC RTE and that will confuse
-		 * the EOI broadcast performed by cpu.
-		 * So, delay the irq migration to the next instance.
-		 */
-		schedule_delayed_work(&ir_migration_work, 1);
-		goto unmask;
-	}
-
-	/* everthing is clear. we have right of way */
-	migrate_ioapic_irq(irq, desc->pending_mask);
-
-	ret = 0;
-	desc->status &= ~IRQ_MOVE_PENDING;
-	cpus_clear(desc->pending_mask);
-
-unmask:
-	unmask_IO_APIC_irq(irq);
-	return ret;
-}
-
-static void ir_irq_migration(struct work_struct *work)
-{
-	unsigned int irq;
-	struct irq_desc *desc;
-
-	for_each_irq_desc(irq, desc) {
-		if (desc->status & IRQ_MOVE_PENDING) {
-			unsigned long flags;
-
-			spin_lock_irqsave(&desc->lock, flags);
-			if (!desc->chip->set_affinity ||
-			    !(desc->status & IRQ_MOVE_PENDING)) {
-				desc->status &= ~IRQ_MOVE_PENDING;
-				spin_unlock_irqrestore(&desc->lock, flags);
-				continue;
-			}
-
-			desc->chip->set_affinity(irq, desc->pending_mask);
-			spin_unlock_irqrestore(&desc->lock, flags);
-		}
-	}
-}
-
-/*
- * Migrates the IRQ destination in the process context.
- */
-static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
-{
-	struct irq_desc *desc = irq_to_desc(irq);
-
-	if (desc->status & IRQ_LEVEL) {
-		desc->status |= IRQ_MOVE_PENDING;
-		desc->pending_mask = mask;
-		migrate_irq_remapped_level(irq);
-		return;
-	}
-
-	migrate_ioapic_irq(irq, mask);
-}
-#endif
-
-asmlinkage void smp_irq_move_cleanup_interrupt(void)
-{
-	unsigned vector, me;
-	ack_APIC_irq();
-#ifdef CONFIG_X86_64
-	exit_idle();
-#endif
-	irq_enter();
-
-	me = smp_processor_id();
-	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
-		unsigned int irq;
-		struct irq_desc *desc;
-		struct irq_cfg *cfg;
-		irq = __get_cpu_var(vector_irq)[vector];
-
-		desc = irq_to_desc(irq);
-		if (!desc)
-			continue;
-
-		cfg = irq_cfg(irq);
-		spin_lock(&desc->lock);
-		if (!cfg->move_cleanup_count)
-			goto unlock;
-
-		if ((vector == cfg->vector) && cpu_isset(me, cfg->domain))
-			goto unlock;
-
-		__get_cpu_var(vector_irq)[vector] = -1;
-		cfg->move_cleanup_count--;
-unlock:
-		spin_unlock(&desc->lock);
-	}
-
-	irq_exit();
-}
-
-static void irq_complete_move(unsigned int irq)
-{
-	struct irq_cfg *cfg = irq_cfg(irq);
-	unsigned vector, me;
-
-	if (likely(!cfg->move_in_progress))
-		return;
-
-	vector = ~get_irq_regs()->orig_ax;
-	me = smp_processor_id();
-	if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
-		cpumask_t cleanup_mask;
-
-		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
-		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
-		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-		cfg->move_in_progress = 0;
-	}
-}
-#else
-static inline void irq_complete_move(unsigned int irq) {}
-#endif
-#ifdef CONFIG_INTR_REMAP
-static void ack_x2apic_level(unsigned int irq)
-{
-	ack_x2APIC_irq();
-}
-
-static void ack_x2apic_edge(unsigned int irq)
-{
-	ack_x2APIC_irq();
-}
-#endif
-
-static void ack_apic_edge(unsigned int irq)
-{
-	irq_complete_move(irq);
-	move_native_irq(irq);
-	ack_APIC_irq();
-}
-
-#ifdef CONFIG_X86_64
-static void ack_apic_level(unsigned int irq)
-{
-	int do_unmask_irq = 0;
-
-	irq_complete_move(irq);
-#ifdef CONFIG_GENERIC_PENDING_IRQ
-	/* If we are moving the irq we need to mask it */
-	if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
-		do_unmask_irq = 1;
-		mask_IO_APIC_irq(irq);
-	}
-#endif
-
-	/*
-	 * We must acknowledge the irq before we move it or the acknowledge will
-	 * not propagate properly.
-	 */
-	ack_APIC_irq();
-
-	/* Now we can move and renable the irq */
-	if (unlikely(do_unmask_irq)) {
-		/* Only migrate the irq if the ack has been received.
-		 *
-		 * On rare occasions the broadcast level triggered ack gets
-		 * delayed going to ioapics, and if we reprogram the
-		 * vector while Remote IRR is still set the irq will never
-		 * fire again.
-		 *
-		 * To prevent this scenario we read the Remote IRR bit
-		 * of the ioapic.  This has two effects.
-		 * - On any sane system the read of the ioapic will
-		 *   flush writes (and acks) going to the ioapic from
-		 *   this cpu.
-		 * - We get to see if the ACK has actually been delivered.
-		 *
-		 * Based on failed experiments of reprogramming the
-		 * ioapic entry from outside of irq context starting
-		 * with masking the ioapic entry and then polling until
-		 * Remote IRR was clear before reprogramming the
-		 * ioapic I don't trust the Remote IRR bit to be
-		 * completey accurate.
-		 *
-		 * However there appears to be no other way to plug
-		 * this race, so if the Remote IRR bit is not
-		 * accurate and is causing problems then it is a hardware bug
-		 * and you can go talk to the chipset vendor about it.
-		 */
-		if (!io_apic_level_ack_pending(irq))
-			move_masked_irq(irq);
-		unmask_IO_APIC_irq(irq);
-	}
-}
-#else
-atomic_t irq_mis_count;
-static void ack_apic_level(unsigned int irq)
-{
-	unsigned long v;
-	int i;
-
-	irq_complete_move(irq);
-	move_native_irq(irq);
-	/*
-	* It appears there is an erratum which affects at least version 0x11
-	* of I/O APIC (that's the 82093AA and cores integrated into various
-	* chipsets).  Under certain conditions a level-triggered interrupt is
-	* erroneously delivered as edge-triggered one but the respective IRR
-	* bit gets set nevertheless.  As a result the I/O unit expects an EOI
-	* message but it will never arrive and further interrupts are blocked
-	* from the source.  The exact reason is so far unknown, but the
-	* phenomenon was observed when two consecutive interrupt requests
-	* from a given source get delivered to the same CPU and the source is
-	* temporarily disabled in between.
-	*
-	* A workaround is to simulate an EOI message manually.  We achieve it
-	* by setting the trigger mode to edge and then to level when the edge
-	* trigger mode gets detected in the TMR of a local APIC for a
-	* level-triggered interrupt.  We mask the source for the time of the
-	* operation to prevent an edge-triggered interrupt escaping meanwhile.
-	* The idea is from Manfred Spraul.  --macro
-	*/
-	i = irq_cfg(irq)->vector;
-
-	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
-
-	ack_APIC_irq();
-
-	if (!(v & (1 << (i & 0x1f)))) {
-		atomic_inc(&irq_mis_count);
-		spin_lock(&ioapic_lock);
-		__mask_and_edge_IO_APIC_irq(irq);
-		__unmask_and_level_IO_APIC_irq(irq);
-		spin_unlock(&ioapic_lock);
-	}
-}
-#endif
-
-static struct irq_chip ioapic_chip __read_mostly = {
-	.name 		= "IO-APIC",
-	.startup 	= startup_ioapic_irq,
-	.mask	 	= mask_IO_APIC_irq,
-	.unmask	 	= unmask_IO_APIC_irq,
-	.ack 		= ack_apic_edge,
-	.eoi 		= ack_apic_level,
-#ifdef CONFIG_SMP
-	.set_affinity 	= set_ioapic_affinity_irq,
-#endif
-	.retrigger	= ioapic_retrigger_irq,
-};
-
-#ifdef CONFIG_INTR_REMAP
-static struct irq_chip ir_ioapic_chip __read_mostly = {
-	.name 		= "IR-IO-APIC",
-	.startup 	= startup_ioapic_irq,
-	.mask	 	= mask_IO_APIC_irq,
-	.unmask	 	= unmask_IO_APIC_irq,
-	.ack 		= ack_x2apic_edge,
-	.eoi 		= ack_x2apic_level,
-#ifdef CONFIG_SMP
-	.set_affinity 	= set_ir_ioapic_affinity_irq,
-#endif
-	.retrigger	= ioapic_retrigger_irq,
-};
-#endif
-
-static inline void init_IO_APIC_traps(void)
-{
-	int irq;
-	struct irq_desc *desc;
-	struct irq_cfg *cfg;
-
-	/*
-	 * NOTE! The local APIC isn't very good at handling
-	 * multiple interrupts at the same interrupt level.
-	 * As the interrupt level is determined by taking the
-	 * vector number and shifting that right by 4, we
-	 * want to spread these out a bit so that they don't
-	 * all fall in the same interrupt level.
-	 *
-	 * Also, we've got to be careful not to trash gate
-	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
-	 */
-	for_each_irq_cfg(cfg) {
-		irq = cfg->irq;
-		if (IO_APIC_IRQ(irq) && !cfg->vector) {
-			/*
-			 * Hmm.. We don't have an entry for this,
-			 * so default to an old-fashioned 8259
-			 * interrupt if we can..
-			 */
-			if (irq < 16)
-				make_8259A_irq(irq);
-			else {
-				desc = irq_to_desc(irq);
-				/* Strange. Oh, well.. */
-				desc->chip = &no_irq_chip;
-			}
-		}
-	}
-}
-
-/*
- * The local APIC irq-chip implementation:
- */
-
-static void mask_lapic_irq(unsigned int irq)
-{
-	unsigned long v;
-
-	v = apic_read(APIC_LVT0);
-	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
-}
-
-static void unmask_lapic_irq(unsigned int irq)
-{
-	unsigned long v;
-
-	v = apic_read(APIC_LVT0);
-	apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
-}
-
-static void ack_lapic_irq (unsigned int irq)
-{
-	ack_APIC_irq();
-}
-
-static struct irq_chip lapic_chip __read_mostly = {
-	.name		= "local-APIC",
-	.mask		= mask_lapic_irq,
-	.unmask		= unmask_lapic_irq,
-	.ack		= ack_lapic_irq,
-};
-
-static void lapic_register_intr(int irq)
-{
-	struct irq_desc *desc;
-
-	desc = irq_to_desc(irq);
-	desc->status &= ~IRQ_LEVEL;
-	set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
-				      "edge");
-}
-
-static void __init setup_nmi(void)
-{
-	/*
-	 * Dirty trick to enable the NMI watchdog ...
-	 * We put the 8259A master into AEOI mode and
-	 * unmask on all local APICs LVT0 as NMI.
-	 *
-	 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
-	 * is from Maciej W. Rozycki - so we do not have to EOI from
-	 * the NMI handler or the timer interrupt.
-	 */
-	apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
-
-	enable_NMI_through_LVT0();
-
-	apic_printk(APIC_VERBOSE, " done.\n");
-}
-
-/*
- * This looks a bit hackish but it's about the only one way of sending
- * a few INTA cycles to 8259As and any associated glue logic.  ICR does
- * not support the ExtINT mode, unfortunately.  We need to send these
- * cycles as some i82489DX-based boards have glue logic that keeps the
- * 8259A interrupt line asserted until INTA.  --macro
- */
-static inline void __init unlock_ExtINT_logic(void)
-{
-	int apic, pin, i;
-	struct IO_APIC_route_entry entry0, entry1;
-	unsigned char save_control, save_freq_select;
-
-	pin  = find_isa_irq_pin(8, mp_INT);
-	if (pin == -1) {
-		WARN_ON_ONCE(1);
-		return;
-	}
-	apic = find_isa_irq_apic(8, mp_INT);
-	if (apic == -1) {
-		WARN_ON_ONCE(1);
-		return;
-	}
-
-	entry0 = ioapic_read_entry(apic, pin);
-	clear_IO_APIC_pin(apic, pin);
-
-	memset(&entry1, 0, sizeof(entry1));
-
-	entry1.dest_mode = 0;			/* physical delivery */
-	entry1.mask = 0;			/* unmask IRQ now */
-	entry1.dest = hard_smp_processor_id();
-	entry1.delivery_mode = dest_ExtINT;
-	entry1.polarity = entry0.polarity;
-	entry1.trigger = 0;
-	entry1.vector = 0;
-
-	ioapic_write_entry(apic, pin, entry1);
-
-	save_control = CMOS_READ(RTC_CONTROL);
-	save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
-	CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
-		   RTC_FREQ_SELECT);
-	CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
-
-	i = 100;
-	while (i-- > 0) {
-		mdelay(10);
-		if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
-			i -= 10;
-	}
-
-	CMOS_WRITE(save_control, RTC_CONTROL);
-	CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
-	clear_IO_APIC_pin(apic, pin);
-
-	ioapic_write_entry(apic, pin, entry0);
-}
-
-static int disable_timer_pin_1 __initdata;
-/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
-static int __init disable_timer_pin_setup(char *arg)
-{
-	disable_timer_pin_1 = 1;
-	return 0;
-}
-early_param("disable_timer_pin_1", disable_timer_pin_setup);
-
-int timer_through_8259 __initdata;
-
-/*
- * This code may look a bit paranoid, but it's supposed to cooperate with
- * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
- * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
- * fanatically on his truly buggy board.
- *
- * FIXME: really need to revamp this for all platforms.
- */
-static inline void __init check_timer(void)
-{
-	struct irq_cfg *cfg = irq_cfg(0);
-	int apic1, pin1, apic2, pin2;
-	unsigned long flags;
-	unsigned int ver;
-	int no_pin1 = 0;
-
-	local_irq_save(flags);
-
-        ver = apic_read(APIC_LVR);
-        ver = GET_APIC_VERSION(ver);
-
-	/*
-	 * get/set the timer IRQ vector:
-	 */
-	disable_8259A_irq(0);
-	assign_irq_vector(0, TARGET_CPUS);
-
-	/*
-	 * As IRQ0 is to be enabled in the 8259A, the virtual
-	 * wire has to be disabled in the local APIC.  Also
-	 * timer interrupts need to be acknowledged manually in
-	 * the 8259A for the i82489DX when using the NMI
-	 * watchdog as that APIC treats NMIs as level-triggered.
-	 * The AEOI mode will finish them in the 8259A
-	 * automatically.
-	 */
-	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
-	init_8259A(1);
-#ifdef CONFIG_X86_32
-	timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
-#endif
-
-	pin1  = find_isa_irq_pin(0, mp_INT);
-	apic1 = find_isa_irq_apic(0, mp_INT);
-	pin2  = ioapic_i8259.pin;
-	apic2 = ioapic_i8259.apic;
-
-	apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
-		    "apic1=%d pin1=%d apic2=%d pin2=%d\n",
-		    cfg->vector, apic1, pin1, apic2, pin2);
-
-	/*
-	 * Some BIOS writers are clueless and report the ExtINTA
-	 * I/O APIC input from the cascaded 8259A as the timer
-	 * interrupt input.  So just in case, if only one pin
-	 * was found above, try it both directly and through the
-	 * 8259A.
-	 */
-	if (pin1 == -1) {
-#ifdef CONFIG_INTR_REMAP
-		if (intr_remapping_enabled)
-			panic("BIOS bug: timer not connected to IO-APIC");
-#endif
-		pin1 = pin2;
-		apic1 = apic2;
-		no_pin1 = 1;
-	} else if (pin2 == -1) {
-		pin2 = pin1;
-		apic2 = apic1;
-	}
-
-	if (pin1 != -1) {
-		/*
-		 * Ok, does IRQ0 through the IOAPIC work?
-		 */
-		if (no_pin1) {
-			add_pin_to_irq(0, apic1, pin1);
-			setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
-		}
-		unmask_IO_APIC_irq(0);
-		if (timer_irq_works()) {
-			if (nmi_watchdog == NMI_IO_APIC) {
-				setup_nmi();
-				enable_8259A_irq(0);
-			}
-			if (disable_timer_pin_1 > 0)
-				clear_IO_APIC_pin(0, pin1);
-			goto out;
-		}
-#ifdef CONFIG_INTR_REMAP
-		if (intr_remapping_enabled)
-			panic("timer doesn't work through Interrupt-remapped IO-APIC");
-#endif
-		clear_IO_APIC_pin(apic1, pin1);
-		if (!no_pin1)
-			apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
-				    "8254 timer not connected to IO-APIC\n");
-
-		apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
-			    "(IRQ0) through the 8259A ...\n");
-		apic_printk(APIC_QUIET, KERN_INFO
-			    "..... (found apic %d pin %d) ...\n", apic2, pin2);
-		/*
-		 * legacy devices should be connected to IO APIC #0
-		 */
-		replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
-		setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
-		unmask_IO_APIC_irq(0);
-		enable_8259A_irq(0);
-		if (timer_irq_works()) {
-			apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
-			timer_through_8259 = 1;
-			if (nmi_watchdog == NMI_IO_APIC) {
-				disable_8259A_irq(0);
-				setup_nmi();
-				enable_8259A_irq(0);
-			}
-			goto out;
-		}
-		/*
-		 * Cleanup, just in case ...
-		 */
-		disable_8259A_irq(0);
-		clear_IO_APIC_pin(apic2, pin2);
-		apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
-	}
-
-	if (nmi_watchdog == NMI_IO_APIC) {
-		apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
-			    "through the IO-APIC - disabling NMI Watchdog!\n");
-		nmi_watchdog = NMI_NONE;
-	}
-#ifdef CONFIG_X86_32
-	timer_ack = 0;
-#endif
-
-	apic_printk(APIC_QUIET, KERN_INFO
-		    "...trying to set up timer as Virtual Wire IRQ...\n");
-
-	lapic_register_intr(0);
-	apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);	/* Fixed mode */
-	enable_8259A_irq(0);
-
-	if (timer_irq_works()) {
-		apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
-		goto out;
-	}
-	disable_8259A_irq(0);
-	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
-	apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
-
-	apic_printk(APIC_QUIET, KERN_INFO
-		    "...trying to set up timer as ExtINT IRQ...\n");
-
-	init_8259A(0);
-	make_8259A_irq(0);
-	apic_write(APIC_LVT0, APIC_DM_EXTINT);
-
-	unlock_ExtINT_logic();
-
-	if (timer_irq_works()) {
-		apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
-		goto out;
-	}
-	apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
-	panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
-		"report.  Then try booting with the 'noapic' option.\n");
-out:
-	local_irq_restore(flags);
-}
-
-/*
- * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
- * to devices.  However there may be an I/O APIC pin available for
- * this interrupt regardless.  The pin may be left unconnected, but
- * typically it will be reused as an ExtINT cascade interrupt for
- * the master 8259A.  In the MPS case such a pin will normally be
- * reported as an ExtINT interrupt in the MP table.  With ACPI
- * there is no provision for ExtINT interrupts, and in the absence
- * of an override it would be treated as an ordinary ISA I/O APIC
- * interrupt, that is edge-triggered and unmasked by default.  We
- * used to do this, but it caused problems on some systems because
- * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
- * the same ExtINT cascade interrupt to drive the local APIC of the
- * bootstrap processor.  Therefore we refrain from routing IRQ2 to
- * the I/O APIC in all cases now.  No actual device should request
- * it anyway.  --macro
- */
-#define PIC_IRQS	(1 << PIC_CASCADE_IR)
-
-void __init setup_IO_APIC(void)
-{
-
-#ifdef CONFIG_X86_32
-	enable_IO_APIC();
-#else
-	/*
-	 * calling enable_IO_APIC() is moved to setup_local_APIC for BP
-	 */
-#endif
-
-	io_apic_irqs = ~PIC_IRQS;
-
-	apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
-        /*
-         * Set up IO-APIC IRQ routing.
-         */
-#ifdef CONFIG_X86_32
-        if (!acpi_ioapic)
-                setup_ioapic_ids_from_mpc();
-#endif
-	sync_Arb_IDs();
-	setup_IO_APIC_irqs();
-	init_IO_APIC_traps();
-	check_timer();
-}
-
-/*
- *      Called after all the initialization is done. If we didnt find any
- *      APIC bugs then we can allow the modify fast path
- */
-
-static int __init io_apic_bug_finalize(void)
-{
-        if (sis_apic_bug == -1)
-                sis_apic_bug = 0;
-        return 0;
-}
-
-late_initcall(io_apic_bug_finalize);
-
-struct sysfs_ioapic_data {
-	struct sys_device dev;
-	struct IO_APIC_route_entry entry[0];
-};
-static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
-
-static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
-{
-	struct IO_APIC_route_entry *entry;
-	struct sysfs_ioapic_data *data;
-	int i;
-
-	data = container_of(dev, struct sysfs_ioapic_data, dev);
-	entry = data->entry;
-	for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
-		*entry = ioapic_read_entry(dev->id, i);
-
-	return 0;
-}
-
-static int ioapic_resume(struct sys_device *dev)
-{
-	struct IO_APIC_route_entry *entry;
-	struct sysfs_ioapic_data *data;
-	unsigned long flags;
-	union IO_APIC_reg_00 reg_00;
-	int i;
-
-	data = container_of(dev, struct sysfs_ioapic_data, dev);
-	entry = data->entry;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	reg_00.raw = io_apic_read(dev->id, 0);
-	if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
-		reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
-		io_apic_write(dev->id, 0, reg_00.raw);
-	}
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-	for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
-		ioapic_write_entry(dev->id, i, entry[i]);
-
-	return 0;
-}
-
-static struct sysdev_class ioapic_sysdev_class = {
-	.name = "ioapic",
-	.suspend = ioapic_suspend,
-	.resume = ioapic_resume,
-};
-
-static int __init ioapic_init_sysfs(void)
-{
-	struct sys_device * dev;
-	int i, size, error;
-
-	error = sysdev_class_register(&ioapic_sysdev_class);
-	if (error)
-		return error;
-
-	for (i = 0; i < nr_ioapics; i++ ) {
-		size = sizeof(struct sys_device) + nr_ioapic_registers[i]
-			* sizeof(struct IO_APIC_route_entry);
-		mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
-		if (!mp_ioapic_data[i]) {
-			printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
-			continue;
-		}
-		dev = &mp_ioapic_data[i]->dev;
-		dev->id = i;
-		dev->cls = &ioapic_sysdev_class;
-		error = sysdev_register(dev);
-		if (error) {
-			kfree(mp_ioapic_data[i]);
-			mp_ioapic_data[i] = NULL;
-			printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
-			continue;
-		}
-	}
-
-	return 0;
-}
-
-device_initcall(ioapic_init_sysfs);
-
-/*
- * Dynamic irq allocate and deallocation
- */
-unsigned int create_irq_nr(unsigned int irq_want)
-{
-	/* Allocate an unused irq */
-	unsigned int irq;
-	unsigned int new;
-	unsigned long flags;
-	struct irq_cfg *cfg_new;
-
-#ifndef CONFIG_HAVE_SPARSE_IRQ
-	irq_want = nr_irqs - 1;
-#endif
-
-	irq = 0;
-	spin_lock_irqsave(&vector_lock, flags);
-	for (new = irq_want; new > 0; new--) {
-		if (platform_legacy_irq(new))
-			continue;
-		cfg_new = irq_cfg(new);
-		if (cfg_new && cfg_new->vector != 0)
-			continue;
-		/* check if need to create one */
-		if (!cfg_new)
-			cfg_new = irq_cfg_alloc(new);
-		if (__assign_irq_vector(new, TARGET_CPUS) == 0)
-			irq = new;
-		break;
-	}
-	spin_unlock_irqrestore(&vector_lock, flags);
-
-	if (irq > 0) {
-		dynamic_irq_init(irq);
-	}
-	return irq;
-}
-
-int create_irq(void)
-{
-	int irq;
-
-	irq = create_irq_nr(nr_irqs - 1);
-
-	if (irq == 0)
-		irq = -1;
-
-	return irq;
-}
-
-void destroy_irq(unsigned int irq)
-{
-	unsigned long flags;
-
-	dynamic_irq_cleanup(irq);
-
-#ifdef CONFIG_INTR_REMAP
-	free_irte(irq);
-#endif
-	spin_lock_irqsave(&vector_lock, flags);
-	__clear_irq_vector(irq);
-	spin_unlock_irqrestore(&vector_lock, flags);
-}
-
-/*
- * MSI message composition
- */
-#ifdef CONFIG_PCI_MSI
-static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
-{
-	struct irq_cfg *cfg;
-	int err;
-	unsigned dest;
-	cpumask_t tmp;
-
-	tmp = TARGET_CPUS;
-	err = assign_irq_vector(irq, tmp);
-	if (err)
-		return err;
-
-	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, tmp);
-	dest = cpu_mask_to_apicid(tmp);
-
-#ifdef CONFIG_INTR_REMAP
-	if (irq_remapped(irq)) {
-		struct irte irte;
-		int ir_index;
-		u16 sub_handle;
-
-		ir_index = map_irq_to_irte_handle(irq, &sub_handle);
-		BUG_ON(ir_index == -1);
-
-		memset (&irte, 0, sizeof(irte));
-
-		irte.present = 1;
-		irte.dst_mode = INT_DEST_MODE;
-		irte.trigger_mode = 0; /* edge */
-		irte.dlvry_mode = INT_DELIVERY_MODE;
-		irte.vector = cfg->vector;
-		irte.dest_id = IRTE_DEST(dest);
-
-		modify_irte(irq, &irte);
-
-		msg->address_hi = MSI_ADDR_BASE_HI;
-		msg->data = sub_handle;
-		msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
-				  MSI_ADDR_IR_SHV |
-				  MSI_ADDR_IR_INDEX1(ir_index) |
-				  MSI_ADDR_IR_INDEX2(ir_index);
-	} else
-#endif
-	{
-		msg->address_hi = MSI_ADDR_BASE_HI;
-		msg->address_lo =
-			MSI_ADDR_BASE_LO |
-			((INT_DEST_MODE == 0) ?
-				MSI_ADDR_DEST_MODE_PHYSICAL:
-				MSI_ADDR_DEST_MODE_LOGICAL) |
-			((INT_DELIVERY_MODE != dest_LowestPrio) ?
-				MSI_ADDR_REDIRECTION_CPU:
-				MSI_ADDR_REDIRECTION_LOWPRI) |
-			MSI_ADDR_DEST_ID(dest);
-
-		msg->data =
-			MSI_DATA_TRIGGER_EDGE |
-			MSI_DATA_LEVEL_ASSERT |
-			((INT_DELIVERY_MODE != dest_LowestPrio) ?
-				MSI_DATA_DELIVERY_FIXED:
-				MSI_DATA_DELIVERY_LOWPRI) |
-			MSI_DATA_VECTOR(cfg->vector);
-	}
-	return err;
-}
-
-#ifdef CONFIG_SMP
-static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
-{
-	struct irq_cfg *cfg;
-	struct msi_msg msg;
-	unsigned int dest;
-	cpumask_t tmp;
-	struct irq_desc *desc;
-
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
-		return;
-
-	if (assign_irq_vector(irq, mask))
-		return;
-
-	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
-
-	read_msi_msg(irq, &msg);
-
-	msg.data &= ~MSI_DATA_VECTOR_MASK;
-	msg.data |= MSI_DATA_VECTOR(cfg->vector);
-	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
-	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
-
-	write_msi_msg(irq, &msg);
-	desc = irq_to_desc(irq);
-	desc->affinity = mask;
-}
-
-#ifdef CONFIG_INTR_REMAP
-/*
- * Migrate the MSI irq to another cpumask. This migration is
- * done in the process context using interrupt-remapping hardware.
- */
-static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
-{
-	struct irq_cfg *cfg;
-	unsigned int dest;
-	cpumask_t tmp, cleanup_mask;
-	struct irte irte;
-	struct irq_desc *desc;
-
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
-		return;
-
-	if (get_irte(irq, &irte))
-		return;
-
-	if (assign_irq_vector(irq, mask))
-		return;
-
-	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
-
-	irte.vector = cfg->vector;
-	irte.dest_id = IRTE_DEST(dest);
-
-	/*
-	 * atomically update the IRTE with the new destination and vector.
-	 */
-	modify_irte(irq, &irte);
-
-	/*
-	 * After this point, all the interrupts will start arriving
-	 * at the new destination. So, time to cleanup the previous
-	 * vector allocation.
-	 */
-	if (cfg->move_in_progress) {
-		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
-		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
-		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-		cfg->move_in_progress = 0;
-	}
-
-	desc = irq_to_desc(irq);
-	desc->affinity = mask;
-}
-#endif
-#endif /* CONFIG_SMP */
-
-/*
- * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
- * which implement the MSI or MSI-X Capability Structure.
- */
-static struct irq_chip msi_chip = {
-	.name		= "PCI-MSI",
-	.unmask		= unmask_msi_irq,
-	.mask		= mask_msi_irq,
-	.ack		= ack_apic_edge,
-#ifdef CONFIG_SMP
-	.set_affinity	= set_msi_irq_affinity,
-#endif
-	.retrigger	= ioapic_retrigger_irq,
-};
-
-#ifdef CONFIG_INTR_REMAP
-static struct irq_chip msi_ir_chip = {
-	.name		= "IR-PCI-MSI",
-	.unmask		= unmask_msi_irq,
-	.mask		= mask_msi_irq,
-	.ack		= ack_x2apic_edge,
-#ifdef CONFIG_SMP
-	.set_affinity	= ir_set_msi_irq_affinity,
-#endif
-	.retrigger	= ioapic_retrigger_irq,
-};
-
-/*
- * Map the PCI dev to the corresponding remapping hardware unit
- * and allocate 'nvec' consecutive interrupt-remapping table entries
- * in it.
- */
-static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
-{
-	struct intel_iommu *iommu;
-	int index;
-
-	iommu = map_dev_to_ir(dev);
-	if (!iommu) {
-		printk(KERN_ERR
-		       "Unable to map PCI %s to iommu\n", pci_name(dev));
-		return -ENOENT;
-	}
-
-	index = alloc_irte(iommu, irq, nvec);
-	if (index < 0) {
-		printk(KERN_ERR
-		       "Unable to allocate %d IRTE for PCI %s\n", nvec,
-		        pci_name(dev));
-		return -ENOSPC;
-	}
-	return index;
-}
-#endif
-
-static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
-{
-	int ret;
-	struct msi_msg msg;
-
-	ret = msi_compose_msg(dev, irq, &msg);
-	if (ret < 0)
-		return ret;
-
-	set_irq_msi(irq, desc);
-	write_msi_msg(irq, &msg);
-
-#ifdef CONFIG_INTR_REMAP
-	if (irq_remapped(irq)) {
-		struct irq_desc *desc = irq_to_desc(irq);
-		/*
-		 * irq migration in process context
-		 */
-		desc->status |= IRQ_MOVE_PCNTXT;
-		set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
-	} else
-#endif
-		set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
-
-	return 0;
-}
-
-static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
-{
-	unsigned int irq;
-
-	irq = dev->bus->number;
-	irq <<= 8;
-	irq |= dev->devfn;
-	irq <<= 12;
-
-	return irq;
-}
-
-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
-{
-	unsigned int irq;
-	int ret;
-	unsigned int irq_want;
-
-	irq_want = build_irq_for_pci_dev(dev) + 0x100;
-
-	irq = create_irq_nr(irq_want);
-	if (irq == 0)
-		return -1;
-
-#ifdef CONFIG_INTR_REMAP
-	if (!intr_remapping_enabled)
-		goto no_ir;
-
-	ret = msi_alloc_irte(dev, irq, 1);
-	if (ret < 0)
-		goto error;
-no_ir:
-#endif
-	ret = setup_msi_irq(dev, desc, irq);
-	if (ret < 0) {
-		destroy_irq(irq);
-		return ret;
-	}
-	return 0;
-
-#ifdef CONFIG_INTR_REMAP
-error:
-	destroy_irq(irq);
-	return ret;
-#endif
-}
-
-int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
-{
-	unsigned int irq;
-	int ret, sub_handle;
-	struct msi_desc *desc;
-	unsigned int irq_want;
-
-#ifdef CONFIG_INTR_REMAP
-	struct intel_iommu *iommu = 0;
-	int index = 0;
-#endif
-
-	irq_want = build_irq_for_pci_dev(dev) + 0x100;
-	sub_handle = 0;
-	list_for_each_entry(desc, &dev->msi_list, list) {
-		irq = create_irq_nr(irq_want--);
-		if (irq == 0)
-			return -1;
-#ifdef CONFIG_INTR_REMAP
-		if (!intr_remapping_enabled)
-			goto no_ir;
-
-		if (!sub_handle) {
-			/*
-			 * allocate the consecutive block of IRTE's
-			 * for 'nvec'
-			 */
-			index = msi_alloc_irte(dev, irq, nvec);
-			if (index < 0) {
-				ret = index;
-				goto error;
-			}
-		} else {
-			iommu = map_dev_to_ir(dev);
-			if (!iommu) {
-				ret = -ENOENT;
-				goto error;
-			}
-			/*
-			 * setup the mapping between the irq and the IRTE
-			 * base index, the sub_handle pointing to the
-			 * appropriate interrupt remap table entry.
-			 */
-			set_irte_irq(irq, iommu, index, sub_handle);
-		}
-no_ir:
-#endif
-		ret = setup_msi_irq(dev, desc, irq);
-		if (ret < 0)
-			goto error;
-		sub_handle++;
-	}
-	return 0;
-
-error:
-	destroy_irq(irq);
-	return ret;
-}
-
-void arch_teardown_msi_irq(unsigned int irq)
-{
-	destroy_irq(irq);
-}
-
-#ifdef CONFIG_DMAR
-#ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
-{
-	struct irq_cfg *cfg;
-	struct msi_msg msg;
-	unsigned int dest;
-	cpumask_t tmp;
-	struct irq_desc *desc;
-
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
-		return;
-
-	if (assign_irq_vector(irq, mask))
-		return;
-
-	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
-
-	dmar_msi_read(irq, &msg);
-
-	msg.data &= ~MSI_DATA_VECTOR_MASK;
-	msg.data |= MSI_DATA_VECTOR(cfg->vector);
-	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
-	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
-
-	dmar_msi_write(irq, &msg);
-	desc = irq_to_desc(irq);
-	desc->affinity = mask;
-}
-#endif /* CONFIG_SMP */
-
-struct irq_chip dmar_msi_type = {
-	.name = "DMAR_MSI",
-	.unmask = dmar_msi_unmask,
-	.mask = dmar_msi_mask,
-	.ack = ack_apic_edge,
-#ifdef CONFIG_SMP
-	.set_affinity = dmar_msi_set_affinity,
-#endif
-	.retrigger = ioapic_retrigger_irq,
-};
-
-int arch_setup_dmar_msi(unsigned int irq)
-{
-	int ret;
-	struct msi_msg msg;
-
-	ret = msi_compose_msg(NULL, irq, &msg);
-	if (ret < 0)
-		return ret;
-	dmar_msi_write(irq, &msg);
-	set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
-		"edge");
-	return 0;
-}
-#endif
-
-#endif /* CONFIG_PCI_MSI */
-/*
- * Hypertransport interrupt support
- */
-#ifdef CONFIG_HT_IRQ
-
-#ifdef CONFIG_SMP
-
-static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
-{
-	struct ht_irq_msg msg;
-	fetch_ht_irq_msg(irq, &msg);
-
-	msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
-	msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
-
-	msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
-	msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
-
-	write_ht_irq_msg(irq, &msg);
-}
-
-static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
-{
-	struct irq_cfg *cfg;
-	unsigned int dest;
-	cpumask_t tmp;
-	struct irq_desc *desc;
-
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
-		return;
-
-	if (assign_irq_vector(irq, mask))
-		return;
-
-	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
-
-	target_ht_irq(irq, dest, cfg->vector);
-	desc = irq_to_desc(irq);
-	desc->affinity = mask;
-}
-#endif
-
-static struct irq_chip ht_irq_chip = {
-	.name		= "PCI-HT",
-	.mask		= mask_ht_irq,
-	.unmask		= unmask_ht_irq,
-	.ack		= ack_apic_edge,
-#ifdef CONFIG_SMP
-	.set_affinity	= set_ht_irq_affinity,
-#endif
-	.retrigger	= ioapic_retrigger_irq,
-};
-
-int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
-{
-	struct irq_cfg *cfg;
-	int err;
-	cpumask_t tmp;
-
-	tmp = TARGET_CPUS;
-	err = assign_irq_vector(irq, tmp);
-	if (!err) {
-		struct ht_irq_msg msg;
-		unsigned dest;
-
-		cfg = irq_cfg(irq);
-		cpus_and(tmp, cfg->domain, tmp);
-		dest = cpu_mask_to_apicid(tmp);
-
-		msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
-
-		msg.address_lo =
-			HT_IRQ_LOW_BASE |
-			HT_IRQ_LOW_DEST_ID(dest) |
-			HT_IRQ_LOW_VECTOR(cfg->vector) |
-			((INT_DEST_MODE == 0) ?
-				HT_IRQ_LOW_DM_PHYSICAL :
-				HT_IRQ_LOW_DM_LOGICAL) |
-			HT_IRQ_LOW_RQEOI_EDGE |
-			((INT_DELIVERY_MODE != dest_LowestPrio) ?
-				HT_IRQ_LOW_MT_FIXED :
-				HT_IRQ_LOW_MT_ARBITRATED) |
-			HT_IRQ_LOW_IRQ_MASKED;
-
-		write_ht_irq_msg(irq, &msg);
-
-		set_irq_chip_and_handler_name(irq, &ht_irq_chip,
-					      handle_edge_irq, "edge");
-	}
-	return err;
-}
-#endif /* CONFIG_HT_IRQ */
-
-/* --------------------------------------------------------------------------
-                          ACPI-based IOAPIC Configuration
-   -------------------------------------------------------------------------- */
-
-#ifdef CONFIG_ACPI
-
-#ifdef CONFIG_X86_32
-int __init io_apic_get_unique_id(int ioapic, int apic_id)
-{
-	union IO_APIC_reg_00 reg_00;
-	static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
-	physid_mask_t tmp;
-	unsigned long flags;
-	int i = 0;
-
-	/*
-	 * The P4 platform supports up to 256 APIC IDs on two separate APIC
-	 * buses (one for LAPICs, one for IOAPICs), where predecessors only
-	 * supports up to 16 on one shared APIC bus.
-	 *
-	 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
-	 *      advantage of new APIC bus architecture.
-	 */
-
-	if (physids_empty(apic_id_map))
-		apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	reg_00.raw = io_apic_read(ioapic, 0);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-
-	if (apic_id >= get_physical_broadcast()) {
-		printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
-			"%d\n", ioapic, apic_id, reg_00.bits.ID);
-		apic_id = reg_00.bits.ID;
-	}
-
-	/*
-	 * Every APIC in a system must have a unique ID or we get lots of nice
-	 * 'stuck on smp_invalidate_needed IPI wait' messages.
-	 */
-	if (check_apicid_used(apic_id_map, apic_id)) {
-
-		for (i = 0; i < get_physical_broadcast(); i++) {
-			if (!check_apicid_used(apic_id_map, i))
-				break;
-		}
-
-		if (i == get_physical_broadcast())
-			panic("Max apic_id exceeded!\n");
-
-		printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
-			"trying %d\n", ioapic, apic_id, i);
-
-		apic_id = i;
-	}
-
-	tmp = apicid_to_cpu_present(apic_id);
-	physids_or(apic_id_map, apic_id_map, tmp);
-
-	if (reg_00.bits.ID != apic_id) {
-		reg_00.bits.ID = apic_id;
-
-		spin_lock_irqsave(&ioapic_lock, flags);
-		io_apic_write(ioapic, 0, reg_00.raw);
-		reg_00.raw = io_apic_read(ioapic, 0);
-		spin_unlock_irqrestore(&ioapic_lock, flags);
-
-		/* Sanity check */
-		if (reg_00.bits.ID != apic_id) {
-			printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
-			return -1;
-		}
-	}
-
-	apic_printk(APIC_VERBOSE, KERN_INFO
-			"IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
-
-	return apic_id;
-}
-
-int __init io_apic_get_version(int ioapic)
-{
-	union IO_APIC_reg_01	reg_01;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	reg_01.raw = io_apic_read(ioapic, 1);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-
-	return reg_01.bits.version;
-}
-#endif
-
-int __init io_apic_get_redir_entries (int ioapic)
-{
-	union IO_APIC_reg_01	reg_01;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	reg_01.raw = io_apic_read(ioapic, 1);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-
-	return reg_01.bits.entries;
-}
-
-
-int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
-{
-	if (!IO_APIC_IRQ(irq)) {
-		apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
-			ioapic);
-		return -EINVAL;
-	}
-
-	/*
-	 * IRQs < 16 are already in the irq_2_pin[] map
-	 */
-	if (irq >= 16)
-		add_pin_to_irq(irq, ioapic, pin);
-
-	setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity);
-
-	return 0;
-}
-
-
-int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
-{
-	int i;
-
-	if (skip_ioapic_setup)
-		return -1;
-
-	for (i = 0; i < mp_irq_entries; i++)
-		if (mp_irqs[i].mp_irqtype == mp_INT &&
-		    mp_irqs[i].mp_srcbusirq == bus_irq)
-			break;
-	if (i >= mp_irq_entries)
-		return -1;
-
-	*trigger = irq_trigger(i);
-	*polarity = irq_polarity(i);
-	return 0;
-}
-
-#endif /* CONFIG_ACPI */
-
-/*
- * This function currently is only a helper for the i386 smp boot process where
- * we need to reprogram the ioredtbls to cater for the cpus which have come online
- * so mask in all cases should simply be TARGET_CPUS
- */
-#ifdef CONFIG_SMP
-void __init setup_ioapic_dest(void)
-{
-	int pin, ioapic, irq, irq_entry;
-	struct irq_cfg *cfg;
-
-	if (skip_ioapic_setup == 1)
-		return;
-
-	for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
-		for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
-			irq_entry = find_irq_entry(ioapic, pin, mp_INT);
-			if (irq_entry == -1)
-				continue;
-			irq = pin_2_irq(irq_entry, ioapic, pin);
-
-			/* setup_IO_APIC_irqs could fail to get vector for some device
-			 * when you have too many devices, because at that time only boot
-			 * cpu is online.
-			 */
-			cfg = irq_cfg(irq);
-			if (!cfg->vector)
-				setup_IO_APIC_irq(ioapic, pin, irq,
-						  irq_trigger(irq_entry),
-						  irq_polarity(irq_entry));
-#ifdef CONFIG_INTR_REMAP
-			else if (intr_remapping_enabled)
-				set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
-#endif
-			else
-				set_ioapic_affinity_irq(irq, TARGET_CPUS);
-		}
-
-	}
-}
-#endif
-
-#ifdef CONFIG_X86_64
-#define IOAPIC_RESOURCE_NAME_SIZE 11
-
-static struct resource *ioapic_resources;
-
-static struct resource * __init ioapic_setup_resources(void)
-{
-	unsigned long n;
-	struct resource *res;
-	char *mem;
-	int i;
-
-	if (nr_ioapics <= 0)
-		return NULL;
-
-	n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
-	n *= nr_ioapics;
-
-	mem = alloc_bootmem(n);
-	res = (void *)mem;
-
-	if (mem != NULL) {
-		mem += sizeof(struct resource) * nr_ioapics;
-
-		for (i = 0; i < nr_ioapics; i++) {
-			res[i].name = mem;
-			res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
-			sprintf(mem,  "IOAPIC %u", i);
-			mem += IOAPIC_RESOURCE_NAME_SIZE;
-		}
-	}
-
-	ioapic_resources = res;
-
-	return res;
-}
-#endif
-
-void __init ioapic_init_mappings(void)
-{
-	unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
-	int i;
-#ifdef CONFIG_X86_64
-	struct resource *ioapic_res;
-
-	ioapic_res = ioapic_setup_resources();
-#endif
-	for (i = 0; i < nr_ioapics; i++) {
-		if (smp_found_config) {
-			ioapic_phys = mp_ioapics[i].mp_apicaddr;
-#ifdef CONFIG_X86_32
-                        if (!ioapic_phys) {
-                                printk(KERN_ERR
-                                       "WARNING: bogus zero IO-APIC "
-                                       "address found in MPTABLE, "
-                                       "disabling IO/APIC support!\n");
-                                smp_found_config = 0;
-                                skip_ioapic_setup = 1;
-                                goto fake_ioapic_page;
-                        }
-#endif
-		} else {
-#ifdef CONFIG_X86_32
-fake_ioapic_page:
-#endif
-			ioapic_phys = (unsigned long)
-				alloc_bootmem_pages(PAGE_SIZE);
-			ioapic_phys = __pa(ioapic_phys);
-		}
-		set_fixmap_nocache(idx, ioapic_phys);
-		apic_printk(APIC_VERBOSE,
-			    "mapped IOAPIC to %08lx (%08lx)\n",
-			    __fix_to_virt(idx), ioapic_phys);
-		idx++;
-
-#ifdef CONFIG_X86_64
-		if (ioapic_res != NULL) {
-			ioapic_res->start = ioapic_phys;
-			ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
-			ioapic_res++;
-		}
-#endif
-	}
-}
-
-#ifdef CONFIG_X86_64
-static int __init ioapic_insert_resources(void)
-{
-	int i;
-	struct resource *r = ioapic_resources;
-
-	if (!r) {
-		printk(KERN_ERR
-		       "IO APIC resources could be not be allocated.\n");
-		return -1;
-	}
-
-	for (i = 0; i < nr_ioapics; i++) {
-		insert_resource(&iomem_resource, r);
-		r++;
-	}
-
-	return 0;
-}
-
-/* Insert the IO APIC resources after PCI initialization has occured to handle
- * IO APICS that are mapped in on a BAR in PCI space. */
-late_initcall(ioapic_insert_resources);
-#endif
-- 
GitLab


From c691cc84529ec88ccb32b174535bb61875888c90 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:43 -0700
Subject: [PATCH 432/895] io_apic: make 32 bit have io_apic resource in
 /proc/iomem

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index fba6d6ee3480..7e303e0967a4 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -3802,7 +3802,6 @@ void __init setup_ioapic_dest(void)
 }
 #endif
 
-#ifdef CONFIG_X86_64
 #define IOAPIC_RESOURCE_NAME_SIZE 11
 
 static struct resource *ioapic_resources;
@@ -3838,17 +3837,14 @@ static struct resource * __init ioapic_setup_resources(void)
 
 	return res;
 }
-#endif
 
 void __init ioapic_init_mappings(void)
 {
 	unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
 	int i;
-#ifdef CONFIG_X86_64
 	struct resource *ioapic_res;
 
 	ioapic_res = ioapic_setup_resources();
-#endif
 	for (i = 0; i < nr_ioapics; i++) {
 		if (smp_found_config) {
 			ioapic_phys = mp_ioapics[i].mp_apicaddr;
@@ -3877,17 +3873,14 @@ fake_ioapic_page:
 			    __fix_to_virt(idx), ioapic_phys);
 		idx++;
 
-#ifdef CONFIG_X86_64
 		if (ioapic_res != NULL) {
 			ioapic_res->start = ioapic_phys;
 			ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
 			ioapic_res++;
 		}
-#endif
 	}
 }
 
-#ifdef CONFIG_X86_64
 static int __init ioapic_insert_resources(void)
 {
 	int i;
@@ -3910,4 +3903,3 @@ static int __init ioapic_insert_resources(void)
 /* Insert the IO APIC resources after PCI initialization has occured to handle
  * IO APICS that are mapped in on a BAR in PCI space. */
 late_initcall(ioapic_insert_resources);
-#endif
-- 
GitLab


From 42379b1122bab7f9aefdbd4b7004a6fa89dfbae5 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:45 -0700
Subject: [PATCH 433/895] pci: change msi-x vector to 32bit

we are using 28bit pci (bus/dev/fn + 12 bits) as irq number, so the
cache for irq number should be 32 bit too.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/scsi/qla2xxx/qla_def.h | 2 +-
 include/linux/pci.h            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 83c819216771..f25f41a499e5 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -2108,7 +2108,7 @@ struct scsi_qla_host;
 
 struct qla_msix_entry {
 	int have_irq;
-	uint16_t msix_vector;
+	uint32_t msix_vector;
 	uint16_t msix_entry;
 };
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 98dc6243a706..1f8db240ca48 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -723,7 +723,7 @@ enum pci_dma_burst_strategy {
 };
 
 struct msix_entry {
-	u16 	vector;	/* kernel uses to write allocated vector */
+	u32	vector;	/* kernel uses to write allocated vector */
 	u16	entry;	/* driver uses to specify entry, OS writes */
 };
 
-- 
GitLab


From bcd562607f17b0c9f9ae96af849894dd06645f63 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:46 -0700
Subject: [PATCH 434/895] x86: irq: interrupt array size should be NR_VECTORS

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/hw_irq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h
index 51c787d17cbc..39c7a4745d25 100644
--- a/include/asm-x86/hw_irq.h
+++ b/include/asm-x86/hw_irq.h
@@ -115,7 +115,7 @@ extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *);
 #endif
 
 #ifdef CONFIG_X86_32
-extern void (*const interrupt[NR_IRQS])(void);
+extern void (*const interrupt[NR_VECTORS])(void);
 #endif
 
 typedef int vector_irq_t[NR_VECTORS];
-- 
GitLab


From 4e738e2f307113feaedebae147c3e0d072e39648 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:47 -0700
Subject: [PATCH 435/895] x86: unify mask_IO_APIC_irq

use MACRO for 32 bit too

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 80 ++++++++++-----------------------------
 1 file changed, 21 insertions(+), 59 deletions(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 7e303e0967a4..099696109ca8 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -610,18 +610,7 @@ static void __init replace_pin_at_irq(unsigned int irq,
 		add_pin_to_irq(irq, newapic, newpin);
 }
 
-#ifdef CONFIG_X86_64
-/*
- * Synchronize the IO-APIC and the CPU by doing
- * a dummy read from the IO-APIC
- */
-static inline void io_apic_sync(unsigned int apic)
-{
-	struct io_apic __iomem *io_apic = io_apic_base(apic);
-	readl(&io_apic->data);
-}
-
-#define __DO_ACTION(R, ACTION, FINAL)					\
+#define __DO_ACTION(R, ACTION_ENABLE, ACTION_DISABLE, FINAL)		\
 									\
 {									\
 	int pin;							\
@@ -636,7 +625,8 @@ static inline void io_apic_sync(unsigned int apic)
 			break;						\
 		pin = entry->pin;					\
 		reg = io_apic_read(entry->apic, 0x10 + R + pin*2);	\
-		reg ACTION;						\
+		reg ACTION_DISABLE;					\
+		reg ACTION_ENABLE;					\
 		io_apic_modify(entry->apic, 0x10 + R + pin*2, reg);	\
 		FINAL;							\
 		if (!entry->next)					\
@@ -645,66 +635,38 @@ static inline void io_apic_sync(unsigned int apic)
 	}								\
 }
 
-#define DO_ACTION(name,R,ACTION, FINAL)					\
+#define DO_ACTION(name,R, ACTION_ENABLE, ACTION_DISABLE, FINAL)		\
 									\
 	static void name##_IO_APIC_irq (unsigned int irq)		\
-	__DO_ACTION(R, ACTION, FINAL)
-
-/* mask = 1 */
-DO_ACTION(__mask,	0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic))
+	__DO_ACTION(R, ACTION_ENABLE, ACTION_DISABLE, FINAL)
 
 /* mask = 0 */
-DO_ACTION(__unmask,	0, &= ~IO_APIC_REDIR_MASKED, )
+DO_ACTION(__unmask,	0, |= 0, &= ~IO_APIC_REDIR_MASKED, )
 
-#else
-
-static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable)
+#ifdef CONFIG_X86_64
+/*
+ * Synchronize the IO-APIC and the CPU by doing
+ * a dummy read from the IO-APIC
+ */
+static inline void io_apic_sync(unsigned int apic)
 {
-	struct irq_cfg *cfg;
-	struct irq_pin_list *entry;
-	unsigned int pin, reg;
-
-	cfg = irq_cfg(irq);
-	entry = cfg->irq_2_pin;
-	for (;;) {
-		if (!entry)
-			break;
-		pin = entry->pin;
-		reg = io_apic_read(entry->apic, 0x10 + pin*2);
-		reg &= ~disable;
-		reg |= enable;
-		io_apic_modify(entry->apic, 0x10 + pin*2, reg);
-		if (!entry->next)
-			break;
-		entry = entry->next;
-	}
+	struct io_apic __iomem *io_apic = io_apic_base(apic);
+	readl(&io_apic->data);
 }
 
 /* mask = 1 */
-static void __mask_IO_APIC_irq(unsigned int irq)
-{
-	__modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0);
-}
+DO_ACTION(__mask,	0, |= IO_APIC_REDIR_MASKED, &= ~0, io_apic_sync(entry->apic))
 
-/* mask = 0 */
-static void __unmask_IO_APIC_irq(unsigned int irq)
-{
-	__modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED);
-}
+#else
+
+/* mask = 1 */
+DO_ACTION(__mask,	0, |= IO_APIC_REDIR_MASKED, &= ~0, )
 
 /* mask = 1, trigger = 0 */
-static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
-{
-	__modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED,
-				IO_APIC_REDIR_LEVEL_TRIGGER);
-}
+DO_ACTION(__mask_and_edge, 0, |= IO_APIC_REDIR_MASKED, &= ~IO_APIC_REDIR_LEVEL_TRIGGER, )
 
 /* mask = 0, trigger = 1 */
-static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
-{
-	__modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER,
-				IO_APIC_REDIR_MASKED);
-}
+DO_ACTION(__unmask_and_level, 0, |= IO_APIC_REDIR_LEVEL_TRIGGER, &= ~IO_APIC_REDIR_MASKED, )
 
 #endif
 
-- 
GitLab


From 3eb2cce84beae8fd41de950569cafd5bca7edd5d Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:48 -0700
Subject: [PATCH 436/895] x86: unify ack_apic_edge

use code in 64 to replace
	move_native_irq(irq, desc);
in 32 bit

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 73 +++++++++++++++++++--------------------
 1 file changed, 35 insertions(+), 38 deletions(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 099696109ca8..a466b04ad5a4 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -389,7 +389,6 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned
 	writel(value, &io_apic->data);
 }
 
-#ifdef CONFIG_X86_64
 static bool io_apic_level_ack_pending(unsigned int irq)
 {
 	struct irq_pin_list *entry;
@@ -419,7 +418,6 @@ static bool io_apic_level_ack_pending(unsigned int irq)
 
 	return false;
 }
-#endif
 
 union entry_union {
 	struct { u32 w1, w2; };
@@ -2398,9 +2396,16 @@ static void ack_apic_edge(unsigned int irq)
 	ack_APIC_irq();
 }
 
-#ifdef CONFIG_X86_64
+#ifdef CONFIG_X86_32
+atomic_t irq_mis_count;
+#endif
+
 static void ack_apic_level(unsigned int irq)
 {
+#ifdef CONFIG_X86_32
+	unsigned long v;
+	int i;
+#endif
 	int do_unmask_irq = 0;
 
 	irq_complete_move(irq);
@@ -2412,6 +2417,31 @@ static void ack_apic_level(unsigned int irq)
 	}
 #endif
 
+#ifdef CONFIG_X86_32
+	/*
+	* It appears there is an erratum which affects at least version 0x11
+	* of I/O APIC (that's the 82093AA and cores integrated into various
+	* chipsets).  Under certain conditions a level-triggered interrupt is
+	* erroneously delivered as edge-triggered one but the respective IRR
+	* bit gets set nevertheless.  As a result the I/O unit expects an EOI
+	* message but it will never arrive and further interrupts are blocked
+	* from the source.  The exact reason is so far unknown, but the
+	* phenomenon was observed when two consecutive interrupt requests
+	* from a given source get delivered to the same CPU and the source is
+	* temporarily disabled in between.
+	*
+	* A workaround is to simulate an EOI message manually.  We achieve it
+	* by setting the trigger mode to edge and then to level when the edge
+	* trigger mode gets detected in the TMR of a local APIC for a
+	* level-triggered interrupt.  We mask the source for the time of the
+	* operation to prevent an edge-triggered interrupt escaping meanwhile.
+	* The idea is from Manfred Spraul.  --macro
+	*/
+	i = irq_cfg(irq)->vector;
+
+	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
+#endif
+
 	/*
 	 * We must acknowledge the irq before we move it or the acknowledge will
 	 * not propagate properly.
@@ -2450,41 +2480,8 @@ static void ack_apic_level(unsigned int irq)
 			move_masked_irq(irq);
 		unmask_IO_APIC_irq(irq);
 	}
-}
-#else
-atomic_t irq_mis_count;
-static void ack_apic_level(unsigned int irq)
-{
-	unsigned long v;
-	int i;
-
-	irq_complete_move(irq);
-	move_native_irq(irq);
-	/*
-	* It appears there is an erratum which affects at least version 0x11
-	* of I/O APIC (that's the 82093AA and cores integrated into various
-	* chipsets).  Under certain conditions a level-triggered interrupt is
-	* erroneously delivered as edge-triggered one but the respective IRR
-	* bit gets set nevertheless.  As a result the I/O unit expects an EOI
-	* message but it will never arrive and further interrupts are blocked
-	* from the source.  The exact reason is so far unknown, but the
-	* phenomenon was observed when two consecutive interrupt requests
-	* from a given source get delivered to the same CPU and the source is
-	* temporarily disabled in between.
-	*
-	* A workaround is to simulate an EOI message manually.  We achieve it
-	* by setting the trigger mode to edge and then to level when the edge
-	* trigger mode gets detected in the TMR of a local APIC for a
-	* level-triggered interrupt.  We mask the source for the time of the
-	* operation to prevent an edge-triggered interrupt escaping meanwhile.
-	* The idea is from Manfred Spraul.  --macro
-	*/
-	i = irq_cfg(irq)->vector;
-
-	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
-
-	ack_APIC_irq();
 
+#ifdef CONFIG_X86_32
 	if (!(v & (1 << (i & 0x1f)))) {
 		atomic_inc(&irq_mis_count);
 		spin_lock(&ioapic_lock);
@@ -2492,8 +2489,8 @@ static void ack_apic_level(unsigned int irq)
 		__unmask_and_level_IO_APIC_irq(irq);
 		spin_unlock(&ioapic_lock);
 	}
-}
 #endif
+}
 
 static struct irq_chip ioapic_chip __read_mostly = {
 	.name 		= "IO-APIC",
-- 
GitLab


From 29ccbbf232c035b8c7ff0c5060fbe30a66ed9b99 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:49 -0700
Subject: [PATCH 437/895] x86: remove first_free_entry/pin_map_size

no user now

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 7 -------
 arch/x86/kernel/setup.c   | 4 ----
 include/asm-x86/irq.h     | 3 ---
 3 files changed, 14 deletions(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index a466b04ad5a4..5de2d38812aa 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -72,13 +72,6 @@ int sis_apic_bug = -1;
 static DEFINE_SPINLOCK(ioapic_lock);
 static DEFINE_SPINLOCK(vector_lock);
 
-int first_free_entry;
-/*
- * Rough estimation of how many shared IRQs there are, can
- * be changed anytime.
- */
-int pin_map_size;
-
 /*
  * # of IRQ routing registers
  */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 02e3a6697977..d90c659b70e9 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1074,10 +1074,6 @@ void __init setup_arch(char **cmdline_p)
 		nr_irqs = 32 * nr_cpu_ids + 224;
 	init_cpu_to_node();
 #endif
-#ifdef CONFIG_X86_IO_APIC
-	pin_map_size = nr_irqs * 2;
-	first_free_entry = nr_irqs;
-#endif
 
 	init_apic_mappings();
 	ioapic_init_mappings();
diff --git a/include/asm-x86/irq.h b/include/asm-x86/irq.h
index 2a130c44f5de..1e5f2909c1db 100644
--- a/include/asm-x86/irq.h
+++ b/include/asm-x86/irq.h
@@ -10,9 +10,6 @@
 #include <asm/apicdef.h>
 #include <asm/irq_vectors.h>
 
-extern int pin_map_size;
-extern int first_free_entry;
-
 static inline int irq_canonicalize(int irq)
 {
 	return ((irq == 2) ? 9 : irq);
-- 
GitLab


From ffd5aae7817fba22c5c3e304a31c44fa0a4e9a97 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:50 -0700
Subject: [PATCH 438/895] x86: print local APIC of APs one by one

instead of print that of all APs at the time

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 5de2d38812aa..bf0e66d73030 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -1777,7 +1777,12 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
 
 __apicdebuginit(void) print_all_local_APICs(void)
 {
-	on_each_cpu(print_local_APIC, NULL, 1);
+	int cpu;
+
+	preempt_disable();
+	for_each_online_cpu(cpu)
+		smp_call_function_single(cpu, print_local_APIC, NULL, 1);
+	preempt_enable();
 }
 
 __apicdebuginit(void) print_PIC(void)
-- 
GitLab


From 8f09cd20a24c5f13c571bc73ddcd47be0af3b70f Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:51 -0700
Subject: [PATCH 439/895] x86: make HAVE_SPARSE_IRQ support selectable

Ingo said sparse_irq is some intrusive. need to make it selectable

to make it simple, remove irq_desc as parameter in some functions.
(ack, eoi, set_affinity).
may need to make member if irq_chip to take irq_desc, or struct irq later.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/Kconfig              |  3 --
 arch/x86/Kconfig          | 12 ++++++-
 arch/x86/kernel/io_apic.c | 71 +++++++++++++++++++++++++++------------
 arch/x86/kernel/irq_32.c  |  2 +-
 arch/x86/kernel/irq_64.c  |  2 +-
 5 files changed, 62 insertions(+), 28 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index b36762246265..c8a7c2eb6490 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -106,6 +106,3 @@ config HAVE_CLK
 config HAVE_DYN_ARRAY
 	def_bool n
 
-config HAVE_SPARSE_IRQ
-	def_bool n
-
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b157e94637bf..600584b7a497 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -34,7 +34,6 @@ config X86
 	select HAVE_GENERIC_DMA_COHERENT if X86_32
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
 	select HAVE_DYN_ARRAY
-	select HAVE_SPARSE_IRQ
 
 config ARCH_DEFCONFIG
 	string
@@ -238,6 +237,17 @@ config SMP
 
 	  If you don't know what to do here, say N.
 
+config HAVE_SPARSE_IRQ
+	bool "Support sparse irq numbering"
+	depends on PCI_MSI || HT_IRQ
+	default y
+	help
+	  This enables support for sparse irq, esp for msi/msi-x. the irq
+	  number will be bus/dev/fn + 12bit. You may need if you have lots of
+	  cards supports msi-x installed.
+
+	  If you don't know what to do here, say Y.
+
 config X86_FIND_SMP_CONFIG
 	def_bool y
 	depends on X86_MPPARSE || X86_VOYAGER
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index bf0e66d73030..f853b667fa5c 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -107,7 +107,9 @@ struct irq_cfg;
 struct irq_pin_list;
 struct irq_cfg {
 	unsigned int irq;
+#ifdef CONFIG_HAVE_SPARSE_IRQ
 	struct irq_cfg *next;
+#endif
 	struct irq_pin_list *irq_2_pin;
 	cpumask_t domain;
 	cpumask_t old_domain;
@@ -137,20 +139,6 @@ static struct irq_cfg irq_cfg_legacy[] __initdata = {
 };
 
 static struct irq_cfg irq_cfg_init = { .irq =  -1U, };
-/* need to be biger than size of irq_cfg_legacy */
-static int nr_irq_cfg = 32;
-
-static int __init parse_nr_irq_cfg(char *arg)
-{
-	if (arg) {
-		nr_irq_cfg = simple_strtoul(arg, NULL, 0);
-		if (nr_irq_cfg < 32)
-			nr_irq_cfg = 32;
-	}
-	return 0;
-}
-
-early_param("nr_irq_cfg", parse_nr_irq_cfg);
 
 static void init_one_irq_cfg(struct irq_cfg *cfg)
 {
@@ -158,7 +146,9 @@ static void init_one_irq_cfg(struct irq_cfg *cfg)
 }
 
 static struct irq_cfg *irq_cfgx;
+#ifdef CONFIG_HAVE_SPARSE_IRQ
 static struct irq_cfg *irq_cfgx_free;
+#endif
 static void __init init_work(void *data)
 {
 	struct dyn_array *da = data;
@@ -174,15 +164,34 @@ static void __init init_work(void *data)
 	for (i = legacy_count; i < *da->nr; i++)
 		init_one_irq_cfg(&cfg[i]);
 
+#ifdef CONFIG_HAVE_SPARSE_IRQ
 	for (i = 1; i < *da->nr; i++)
 		cfg[i-1].next = &cfg[i];
 
 	irq_cfgx_free = &irq_cfgx[legacy_count];
 	irq_cfgx[legacy_count - 1].next = NULL;
+#endif
+}
+
+#ifdef CONFIG_HAVE_SPARSE_IRQ
+/* need to be biger than size of irq_cfg_legacy */
+static int nr_irq_cfg = 32;
+
+static int __init parse_nr_irq_cfg(char *arg)
+{
+	if (arg) {
+		nr_irq_cfg = simple_strtoul(arg, NULL, 0);
+		if (nr_irq_cfg < 32)
+			nr_irq_cfg = 32;
+	}
+	return 0;
 }
 
-#define for_each_irq_cfg(cfg)		\
-	for (cfg = irq_cfgx; cfg; cfg = cfg->next)
+early_param("nr_irq_cfg", parse_nr_irq_cfg);
+
+#define for_each_irq_cfg(irqX, cfg)           \
+        for (cfg = irq_cfgx, irqX = cfg->irq; cfg; cfg = cfg->next, irqX = cfg ? cfg->irq : -1U)
+
 
 DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work);
 
@@ -273,7 +282,26 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
 #endif
 	return cfg;
 }
+#else
+
+#define for_each_irq_cfg(irq, cfg)		\
+	for (irq = 0, cfg = &irq_cfgx[irq]; irq < nr_irqs; irq++, cfg = &irq_cfgx[irq])
+
+DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irqs, PAGE_SIZE, init_work);
 
+struct irq_cfg *irq_cfg(unsigned int irq)
+{
+        if (irq < nr_irqs)
+                return &irq_cfgx[irq];
+
+        return NULL;
+}
+struct irq_cfg *irq_cfg_alloc(unsigned int irq)
+{
+        return irq_cfg(irq);
+}
+
+#endif
 /*
  * This is performance-critical, we want to do it O(1)
  *
@@ -1282,11 +1310,10 @@ void __setup_vector_irq(int cpu)
 	struct irq_cfg *cfg;
 
 	/* Mark the inuse vectors */
-	for_each_irq_cfg(cfg) {
+	for_each_irq_cfg(irq, cfg) {
 		if (!cpu_isset(cpu, cfg->domain))
 			continue;
 		vector = cfg->vector;
-		irq = cfg->irq;
 		per_cpu(vector_irq, cpu)[vector] = irq;
 	}
 	/* Mark the free vectors */
@@ -1563,6 +1590,7 @@ __apicdebuginit(void) print_IO_APIC(void)
 	union IO_APIC_reg_03 reg_03;
 	unsigned long flags;
 	struct irq_cfg *cfg;
+	unsigned int irq;
 
 	if (apic_verbosity == APIC_QUIET)
 		return;
@@ -1651,11 +1679,11 @@ __apicdebuginit(void) print_IO_APIC(void)
 	}
 	}
 	printk(KERN_DEBUG "IRQ to pin mappings:\n");
-	for_each_irq_cfg(cfg) {
+	for_each_irq_cfg(irq, cfg) {
 		struct irq_pin_list *entry = cfg->irq_2_pin;
 		if (!entry)
 			continue;
-		printk(KERN_DEBUG "IRQ%d ", cfg->irq);
+		printk(KERN_DEBUG "IRQ%d ", irq);
 		for (;;) {
 			printk("-> %d:%d", entry->apic, entry->pin);
 			if (!entry->next)
@@ -2535,8 +2563,7 @@ static inline void init_IO_APIC_traps(void)
 	 * Also, we've got to be careful not to trash gate
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
-	for_each_irq_cfg(cfg) {
-		irq = cfg->irq;
+	for_each_irq_cfg(irq, cfg) {
 		if (IO_APIC_IRQ(irq) && !cfg->vector) {
 			/*
 			 * Hmm.. We don't have an entry for this,
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index cc929f2f84f1..b2e1082cf5ad 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -269,7 +269,7 @@ int show_interrupts(struct seq_file *p, void *v)
 	struct irqaction * action;
 	unsigned long flags;
 	unsigned int entries;
-	struct irq_desc *desc;
+	struct irq_desc *desc = NULL;
 	int tail = 0;
 
 #ifdef CONFIG_HAVE_SPARSE_IRQ
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 348a11168c2b..c2fca89a3f7e 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -74,7 +74,7 @@ int show_interrupts(struct seq_file *p, void *v)
 	struct irqaction * action;
 	unsigned long flags;
 	unsigned int entries;
-	struct irq_desc *desc;
+	struct irq_desc *desc = NULL;
 	int tail = 0;
 
 #ifdef CONFIG_HAVE_SPARSE_IRQ
-- 
GitLab


From 9d6a4d0823b3b8e29156f5e698b5a68687afad32 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:52 -0700
Subject: [PATCH 440/895] x86: probe nr_irqs even only mptable is used

for !CONFIG_HAVE_SPARSE_IRQ

fix:

 In file included from arch/x86/kernel/early-quirks.c:18:
 include/asm/io_apic.h: In function 'probe_nr_irqs':
 include/asm/io_apic.h:209: error: 'NR_IRQS' undeclared (first use in this function)
 include/asm/io_apic.h:209: error: (Each undeclared identifier is reported only once
 include/asm/io_apic.h:209: error: for each function it appears in.)

v2: fix by Ingo

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/acpi/boot.c | 25 ---------------------
 arch/x86/kernel/io_apic.c   | 43 ++++++++++++++++++++++++++-----------
 arch/x86/kernel/setup.c     |  6 +++---
 include/asm-x86/io_apic.h   |  8 +++++++
 include/asm-x86/mpspec.h    |  1 -
 5 files changed, 41 insertions(+), 42 deletions(-)

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 3e9d163fd92f..5fef4fece4a5 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -957,29 +957,6 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
 	nr_ioapics++;
 }
 
-int get_nr_irqs_via_madt(void)
-{
-	int idx;
-	int nr = 0;
-
-	for (idx = 0; idx < nr_ioapics; idx++) {
-		if (mp_ioapic_routing[idx].gsi_end > nr)
-			nr = mp_ioapic_routing[idx].gsi_end;
-	}
-
-	nr++;
-
-	/* double it for hotplug and msi and nmi */
-	nr <<= 1;
-
-	/* something wrong ? */
-	if (nr < 32)
-		nr = 32;
-
-	return nr;
-
-}
-
 static void assign_to_mp_irq(struct mp_config_intsrc *m,
 				    struct mp_config_intsrc *mp_irq)
 {
@@ -1278,8 +1255,6 @@ static int __init acpi_parse_madt_ioapic_entries(void)
 	}
 
 
-	nr_irqs = get_nr_irqs_via_madt();
-
 	count =
 	    acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr,
 				  nr_irqs);
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index f853b667fa5c..f7e80262cbbb 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -3596,6 +3596,36 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 }
 #endif /* CONFIG_HT_IRQ */
 
+int __init io_apic_get_redir_entries (int ioapic)
+{
+	union IO_APIC_reg_01	reg_01;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	reg_01.raw = io_apic_read(ioapic, 1);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	return reg_01.bits.entries;
+}
+
+int __init probe_nr_irqs(void)
+{
+	int idx;
+	int nr = 0;
+
+	for (idx = 0; idx < nr_ioapics; idx++)
+		nr += io_apic_get_redir_entries(idx);
+
+	/* double it for hotplug and msi and nmi */
+	nr <<= 1;
+
+	/* something wrong ? */
+	if (nr < 32)
+		nr = 32;
+
+	return nr;
+}
+
 /* --------------------------------------------------------------------------
                           ACPI-based IOAPIC Configuration
    -------------------------------------------------------------------------- */
@@ -3690,19 +3720,6 @@ int __init io_apic_get_version(int ioapic)
 }
 #endif
 
-int __init io_apic_get_redir_entries (int ioapic)
-{
-	union IO_APIC_reg_01	reg_01;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	reg_01.raw = io_apic_read(ioapic, 1);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-
-	return reg_01.bits.entries;
-}
-
-
 int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
 {
 	if (!IO_APIC_IRQ(irq)) {
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d90c659b70e9..61335306696a 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1069,15 +1069,15 @@ void __init setup_arch(char **cmdline_p)
 	prefill_possible_map();
 
 #ifdef CONFIG_X86_64
-	/* need to wait for nr_cpu_ids settle down */
-	if (nr_irqs == NR_IRQS)
-		nr_irqs = 32 * nr_cpu_ids + 224;
 	init_cpu_to_node();
 #endif
 
 	init_apic_mappings();
 	ioapic_init_mappings();
 
+	/* need to wait for io_apic is mapped */
+	nr_irqs = probe_nr_irqs();
+
 	kvm_guest_init();
 
 	e820_reserve_resources();
diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h
index ce818292d2c7..d35cbd7aa587 100644
--- a/include/asm-x86/io_apic.h
+++ b/include/asm-x86/io_apic.h
@@ -4,6 +4,7 @@
 #include <linux/types.h>
 #include <asm/mpspec.h>
 #include <asm/apicdef.h>
+#include <asm/irq_vectors.h>
 
 /*
  * Intel IO-APIC support for SMP and UP systems.
@@ -187,10 +188,17 @@ extern void restore_IO_APIC_setup(void);
 extern void reinit_intr_remapped_IO_APIC(int);
 #endif
 
+extern int probe_nr_irqs(void);
+
 #else  /* !CONFIG_X86_IO_APIC */
 #define io_apic_assign_pci_irqs 0
 static const int timer_through_8259 = 0;
 static inline void ioapic_init_mappings(void) { }
+
+static inline int probe_nr_irqs(void)
+{
+	return NR_IRQS;
+}
 #endif
 
 #endif /* ASM_X86__IO_APIC_H */
diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h
index a0748021250b..be2241a818f1 100644
--- a/include/asm-x86/mpspec.h
+++ b/include/asm-x86/mpspec.h
@@ -60,7 +60,6 @@ extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
 				   u32 gsi);
 extern void mp_config_acpi_legacy_irqs(void);
 extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low);
-extern int get_nr_irqs_via_madt(void);
 #ifdef CONFIG_X86_IO_APIC
 extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
 				u32 gsi, int triggering, int polarity);
-- 
GitLab


From 2b46b37de73296018da02c2a421ac2a9cbccfa9f Mon Sep 17 00:00:00 2001
From: Alex Nixon <alex.nixon@citrix.com>
Date: Tue, 19 Aug 2008 20:50:53 -0700
Subject: [PATCH 441/895] xen: fix memory access violation bug when
 CONFIG_HAVE_SPARSE_IRQ is enabled

When sparse IRQs are enabled, it is not safe to assume an IRQ descriptor
exists for every possible IRQ.  This patch causes init_evtchn_cpu_bindings
to skip initialisation of IRQ descriptors which don't exist.

Signed-off-by: Alex Nixon <alex.nixon@citrix.com>
Acked-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/xen/events.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 56ace47f24d6..e6d47e8ca1ac 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -141,6 +141,8 @@ static void init_evtchn_cpu_bindings(void)
 	/* By default all event channels notify CPU#0. */
 	for (i = 0; i < nr_irqs; i++) {
 		struct irq_desc *desc = irq_to_desc(i);
+		if (!desc)
+			continue;
 		desc->affinity = cpumask_of_cpu(0);
 	}
 #endif
-- 
GitLab


From e7f5ed8d6e921c6200ce5e2549a86db629f2dd11 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 19 Aug 2008 20:50:54 -0700
Subject: [PATCH 442/895] dyn_array: split dyn_array functions from init/main.c

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 init/Makefile    |   2 +-
 init/dyn_array.c | 124 +++++++++++++++++++++++++++++++++++++++++++++++
 init/main.c      | 118 --------------------------------------------
 3 files changed, 125 insertions(+), 119 deletions(-)
 create mode 100644 init/dyn_array.c

diff --git a/init/Makefile b/init/Makefile
index 4a243df426f7..dc5eeca6eb6d 100644
--- a/init/Makefile
+++ b/init/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the linux kernel.
 #
 
-obj-y                          := main.o version.o mounts.o
+obj-y                          := main.o dyn_array.o version.o mounts.o
 ifneq ($(CONFIG_BLK_DEV_INITRD),y)
 obj-y                          += noinitramfs.o
 else
diff --git a/init/dyn_array.c b/init/dyn_array.c
new file mode 100644
index 000000000000..c4cd902a1180
--- /dev/null
+++ b/init/dyn_array.c
@@ -0,0 +1,124 @@
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/kallsyms.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/irq.h>
+
+void __init pre_alloc_dyn_array(void)
+{
+#ifdef CONFIG_HAVE_DYN_ARRAY
+	unsigned long total_size = 0, size, phys;
+	unsigned long max_align = 1;
+	struct dyn_array **daa;
+	char *ptr;
+
+	/* get the total size at first */
+	for (daa = __dyn_array_start ; daa < __dyn_array_end; daa++) {
+		struct dyn_array *da = *daa;
+
+		size = da->size * (*da->nr);
+		print_fn_descriptor_symbol("dyn_array %s ", da->name);
+		printk(KERN_CONT "size:%#lx nr:%d align:%#lx\n",
+			da->size, *da->nr, da->align);
+		total_size += roundup(size, da->align);
+		if (da->align > max_align)
+			max_align = da->align;
+	}
+	if (total_size)
+		printk(KERN_DEBUG "dyn_array total_size: %#lx\n",
+			 total_size);
+	else
+		return;
+
+	/* allocate them all together */
+	max_align = max_t(unsigned long, max_align, PAGE_SIZE);
+	ptr = __alloc_bootmem_nopanic(total_size, max_align, 0);
+	if (!ptr)
+		panic("Can not alloc dyn_alloc\n");
+
+	phys = virt_to_phys(ptr);
+	for (daa = __dyn_array_start ; daa < __dyn_array_end; daa++) {
+		struct dyn_array *da = *daa;
+
+		size = da->size * (*da->nr);
+		print_fn_descriptor_symbol("dyn_array %s ", da->name);
+
+		phys = roundup(phys, da->align);
+		*da->name = phys_to_virt(phys);
+		printk(KERN_CONT " ==> [%#lx - %#lx]\n", phys, phys + size);
+
+		phys += size;
+
+		if (da->init_work)
+			da->init_work(da);
+	}
+#else
+#ifdef CONFIF_GENERIC_HARDIRQS
+	unsigned int i;
+
+	for (i = 0; i < NR_IRQS; i++)
+		irq_desc[i].irq = i;
+#endif
+#endif
+}
+
+unsigned long __init per_cpu_dyn_array_size(unsigned long *align)
+{
+	unsigned long total_size = 0;
+#ifdef CONFIG_HAVE_DYN_ARRAY
+	unsigned long size;
+	struct dyn_array **daa;
+	unsigned max_align = 1;
+
+	for (daa = __per_cpu_dyn_array_start ; daa < __per_cpu_dyn_array_end; daa++) {
+		struct dyn_array *da = *daa;
+
+		size = da->size * (*da->nr);
+		print_fn_descriptor_symbol("per_cpu_dyn_array %s ", da->name);
+		printk(KERN_CONT "size:%#lx nr:%d align:%#lx\n",
+			da->size, *da->nr, da->align);
+		total_size += roundup(size, da->align);
+		if (da->align > max_align)
+			max_align = da->align;
+	}
+	if (total_size) {
+		printk(KERN_DEBUG "per_cpu_dyn_array total_size: %#lx\n",
+			 total_size);
+		*align = max_align;
+	}
+#endif
+	return total_size;
+}
+
+void __init per_cpu_alloc_dyn_array(int cpu, char *ptr)
+{
+#ifdef CONFIG_HAVE_DYN_ARRAY
+	unsigned long size, phys;
+	struct dyn_array **daa;
+	unsigned long addr;
+	void **array;
+
+	phys = virt_to_phys(ptr);
+	for (daa = __per_cpu_dyn_array_start ; daa < __per_cpu_dyn_array_end; daa++) {
+		struct dyn_array *da = *daa;
+
+		size = da->size * (*da->nr);
+		print_fn_descriptor_symbol("per_cpu_dyn_array %s ", da->name);
+
+		phys = roundup(phys, da->align);
+		addr = (unsigned long)da->name;
+		addr += per_cpu_offset(cpu);
+		array = (void **)addr;
+		*array = phys_to_virt(phys);
+		*da->name = *array; /* so init_work could use it directly */
+		printk(KERN_CONT " ==> [%#lx - %#lx]\n", phys, phys + size);
+
+		phys += size;
+
+		if (da->init_work) {
+			da->init_work(da);
+		}
+	}
+#endif
+}
diff --git a/init/main.c b/init/main.c
index 0d2e60144f83..e81cf427d9c7 100644
--- a/init/main.c
+++ b/init/main.c
@@ -542,124 +542,6 @@ void __init __weak thread_info_cache_init(void)
 {
 }
 
-void pre_alloc_dyn_array(void)
-{
-#ifdef CONFIG_HAVE_DYN_ARRAY
-	unsigned long total_size = 0, size, phys;
-	unsigned long max_align = 1;
-	struct dyn_array **daa;
-	char *ptr;
-
-	/* get the total size at first */
-	for (daa = __dyn_array_start ; daa < __dyn_array_end; daa++) {
-		struct dyn_array *da = *daa;
-
-		size = da->size * (*da->nr);
-		print_fn_descriptor_symbol("dyn_array %s ", da->name);
-		printk(KERN_CONT "size:%#lx nr:%d align:%#lx\n",
-			da->size, *da->nr, da->align);
-		total_size += roundup(size, da->align);
-		if (da->align > max_align)
-			max_align = da->align;
-	}
-	if (total_size)
-		printk(KERN_DEBUG "dyn_array total_size: %#lx\n",
-			 total_size);
-	else
-		return;
-
-	/* allocate them all together */
-	max_align = max_t(unsigned long, max_align, PAGE_SIZE);
-	ptr = __alloc_bootmem_nopanic(total_size, max_align, 0);
-	if (!ptr)
-		panic("Can not alloc dyn_alloc\n");
-
-	phys = virt_to_phys(ptr);
-	for (daa = __dyn_array_start ; daa < __dyn_array_end; daa++) {
-		struct dyn_array *da = *daa;
-
-		size = da->size * (*da->nr);
-		print_fn_descriptor_symbol("dyn_array %s ", da->name);
-
-		phys = roundup(phys, da->align);
-		*da->name = phys_to_virt(phys);
-		printk(KERN_CONT " ==> [%#lx - %#lx]\n", phys, phys + size);
-
-		phys += size;
-
-		if (da->init_work)
-			da->init_work(da);
-	}
-#else
-#ifdef CONFIF_GENERIC_HARDIRQS
-	unsigned int i;
-
-	for (i = 0; i < NR_IRQS; i++)
-		irq_desc[i].irq = i;
-#endif
-#endif
-}
-
-unsigned long per_cpu_dyn_array_size(unsigned long *align)
-{
-	unsigned long total_size = 0;
-#ifdef CONFIG_HAVE_DYN_ARRAY
-	unsigned long size;
-	struct dyn_array **daa;
-	unsigned max_align = 1;
-
-	for (daa = __per_cpu_dyn_array_start ; daa < __per_cpu_dyn_array_end; daa++) {
-		struct dyn_array *da = *daa;
-
-		size = da->size * (*da->nr);
-		print_fn_descriptor_symbol("per_cpu_dyn_array %s ", da->name);
-		printk(KERN_CONT "size:%#lx nr:%d align:%#lx\n",
-			da->size, *da->nr, da->align);
-		total_size += roundup(size, da->align);
-		if (da->align > max_align)
-			max_align = da->align;
-	}
-	if (total_size) {
-		printk(KERN_DEBUG "per_cpu_dyn_array total_size: %#lx\n",
-			 total_size);
-		*align = max_align;
-	}
-#endif
-	return total_size;
-}
-
-void per_cpu_alloc_dyn_array(int cpu, char *ptr)
-{
-#ifdef CONFIG_HAVE_DYN_ARRAY
-	unsigned long size, phys;
-	struct dyn_array **daa;
-	unsigned long addr;
-	void **array;
-
-	phys = virt_to_phys(ptr);
-	for (daa = __per_cpu_dyn_array_start ; daa < __per_cpu_dyn_array_end; daa++) {
-		struct dyn_array *da = *daa;
-
-		size = da->size * (*da->nr);
-		print_fn_descriptor_symbol("per_cpu_dyn_array %s ", da->name);
-
-		phys = roundup(phys, da->align);
-		addr = (unsigned long)da->name;
-		addr += per_cpu_offset(cpu);
-		array = (void **)addr;
-		*array = phys_to_virt(phys);
-		*da->name = *array; /* so init_work could use it directly */
-		printk(KERN_CONT " ==> [%#lx - %#lx]\n", phys, phys + size);
-
-		phys += size;
-
-		if (da->init_work) {
-			da->init_work(da);
-		}
-	}
-#endif
-}
-
 asmlinkage void __init start_kernel(void)
 {
 	char * command_line;
-- 
GitLab


From bf9d3cf73e8caf0b3ae0b7f508a9f251536f5ff4 Mon Sep 17 00:00:00 2001
From: Alex Nixon <alex.nixon@citrix.com>
Date: Mon, 18 Aug 2008 22:17:08 -0700
Subject: [PATCH 443/895] xen: Fix bug `do_IRQ: cannot handle IRQ -1 vector 0x6
 cpu 1'

Following commit 9c3f2468d8339866d9ef6a25aae31a8909c6be0d, do_IRQ()
looks up the IRQ number in the per-cpu variable vector_irq.

This commit makes Xen initialise an identity vector_irq map for both X86_32 and X86_64.

Signed-off-by: Alex Nixon <alex.nixon@citrix.com>
Acked-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/irq.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 28b85ab8422e..bb042608c602 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -21,7 +21,6 @@ void xen_force_evtchn_callback(void)
 
 static void __init __xen_init_IRQ(void)
 {
-#ifdef CONFIG_X86_64
 	int i;
 
 	/* Create identity vector->irq map */
@@ -31,7 +30,6 @@ static void __init __xen_init_IRQ(void)
 		for_each_possible_cpu(cpu)
 			per_cpu(vector_irq, cpu)[i] = i;
 	}
-#endif	/* CONFIG_X86_64 */
 
 	xen_init_IRQ();
 }
-- 
GitLab


From 0c425cec64eb0c0d0dd7037c21a25585cbe3636c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 18 Aug 2008 13:04:26 +0200
Subject: [PATCH 444/895] warning: fix arch x86 kernel io_apic c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

fix warning:

  arch/x86/kernel/io_apic.c: In function â€˜print_local_APICâ€™:
  arch/x86/kernel/io_apic.c:1786: warning: format â€˜%08xâ€™ expects type â€˜unsigned intâ€™, but argument 2 has type â€˜u64â€™
  arch/x86/kernel/io_apic.c:1787: warning: format â€˜%08xâ€™ expects type â€˜unsigned intâ€™, but argument 2 has type â€˜u64â€™

By creating uniform behavior on 32-bit and 64-bit and printing out the ICR
value in two 32-bit words.

Code has changed:

   text	   data	    bss	    dec	    hex	filename
  22901	  19650	  17040	  59591	   e8c7	io_apic.o.before
  22899	  19650	  17040	  59589	   e8c5	io_apic.o.after

Due to the 32-bit cast narrowing the printed out value on 64-bit.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index f7e80262cbbb..34c74cf5c244 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -1774,8 +1774,8 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
 	}
 
 	icr = apic_icr_read();
-	printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
-	printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
+	printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
+	printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32));
 
 	v = apic_read(APIC_LVTT);
 	printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
-- 
GitLab


From 7ddfb650c7ef7a33a5ef11c0fdf5b3d837a47dba Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Wed, 20 Aug 2008 17:22:51 -0700
Subject: [PATCH 445/895] sparseirq: fix intr-remap with dyn_array/nr_irqs
 changes]

In irq_2_iommu_alloc() and set_irte_irq(), irq_to_desc or
irq_2_iommu pointers may not be allocated. So use the routines
which will allocate them if they are not already allocated.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/pci/intr_remapping.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index 23372c811159..2dcf973890c4 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -76,9 +76,10 @@ static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq)
 	struct irq_desc *desc;
 	struct irq_2_iommu *irq_iommu;
 
-	desc = irq_to_desc(irq);
-
-	BUG_ON(!desc);
+	/*
+	 * alloc irq desc if not allocated already.
+	 */
+	desc = irq_to_desc_alloc(irq);
 
 	irq_iommu = desc->irq_2_iommu;
 
@@ -255,11 +256,8 @@ int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
 	struct irq_2_iommu *irq_iommu;
 
 	spin_lock(&irq_2_ir_lock);
-	irq_iommu = valid_irq_2_iommu(irq);
-	if (!irq_iommu) {
-		spin_unlock(&irq_2_ir_lock);
-		return -1;
-	}
+
+	irq_iommu = irq_2_iommu_alloc(irq);
 
 	irq_iommu->iommu = iommu;
 	irq_iommu->irte_index = index;
-- 
GitLab


From e89eb43863c2d9f11a3bbe766766fe646e6c50d9 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Wed, 20 Aug 2008 20:46:25 -0700
Subject: [PATCH 446/895] x86: sparse_irq needs spin_lock in allocations

Suresh Siddha noticed that we should have a spinlock around it.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 13 ++++++++++++-
 kernel/irq/handle.c       | 13 ++++++++++++-
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 34c74cf5c244..7ca556690474 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -146,6 +146,12 @@ static void init_one_irq_cfg(struct irq_cfg *cfg)
 }
 
 static struct irq_cfg *irq_cfgx;
+
+/*
+ * Protect the irq_cfgx_free freelist:
+ */
+static DEFINE_SPINLOCK(irq_cfg_lock);
+
 #ifdef CONFIG_HAVE_SPARSE_IRQ
 static struct irq_cfg *irq_cfgx_free;
 #endif
@@ -213,8 +219,9 @@ static struct irq_cfg *irq_cfg(unsigned int irq)
 static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
 {
 	struct irq_cfg *cfg, *cfg_pri;
-	int i;
+	unsigned long flags;
 	int count = 0;
+	int i;
 
 	cfg_pri = cfg = irq_cfgx;
 	while (cfg) {
@@ -226,6 +233,7 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
 		count++;
 	}
 
+	spin_lock_irqsave(&irq_cfg_lock, flags);
 	if (!irq_cfgx_free) {
 		unsigned long phys;
 		unsigned long total_bytes;
@@ -263,6 +271,9 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
 	else
 		irq_cfgx = cfg;
 	cfg->irq = irq;
+
+	spin_unlock_irqrestore(&irq_cfg_lock, flags);
+
 	printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq);
 #ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG
 	{
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 24c83a3cee4d..d638a911cbc1 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -107,6 +107,11 @@ static void init_kstat_irqs(struct irq_desc *desc, int nr_desc, int nr)
 	}
 }
 
+/*
+ * Protect the sparse_irqs_free freelist:
+ */
+static DEFINE_SPINLOCK(sparse_irq_lock);
+
 #ifdef CONFIG_HAVE_SPARSE_IRQ
 static struct irq_desc *sparse_irqs_free;
 struct irq_desc *sparse_irqs;
@@ -166,11 +171,13 @@ struct irq_desc *irq_to_desc(unsigned int irq)
 	}
 	return NULL;
 }
+
 struct irq_desc *irq_to_desc_alloc(unsigned int irq)
 {
 	struct irq_desc *desc, *desc_pri;
-	int i;
+	unsigned long flags;
 	int count = 0;
+	int i;
 
 	desc_pri = desc = sparse_irqs;
 	while (desc) {
@@ -182,6 +189,7 @@ struct irq_desc *irq_to_desc_alloc(unsigned int irq)
 		count++;
 	}
 
+	spin_lock_irqsave(&sparse_irq_lock, flags);
 	/*
 	 *  we run out of pre-allocate ones, allocate more
 	 */
@@ -223,6 +231,9 @@ struct irq_desc *irq_to_desc_alloc(unsigned int irq)
 	else
 		sparse_irqs = desc;
 	desc->irq = irq;
+
+	spin_unlock_irqrestore(&sparse_irq_lock, flags);
+
 	printk(KERN_DEBUG "found new irq_desc for irq %d\n", desc->irq);
 #ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG
 	{
-- 
GitLab


From a2d332fa3445160519de03c350a59602ac1c3df9 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Thu, 21 Aug 2008 12:56:32 -0700
Subject: [PATCH 447/895] x86: fix 32-bit ioapic lockup with sparseirqs

Missed two lines when copying.

Fix panic on one of Ingo's machines that need to adjust ioapic id when
acpi off/ 32bit.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 7ca556690474..4e44fd1f466e 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -2077,6 +2077,8 @@ static void __init setup_ioapic_ids_from_mpc(void)
 
 		reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
 		spin_lock_irqsave(&ioapic_lock, flags);
+		io_apic_write(apic, 0, reg_00.raw);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
 
 		/*
 		 * Sanity check
-- 
GitLab


From 052c0bff9b83a578654dfa513d6e3d0b3795f1e8 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Thu, 21 Aug 2008 13:10:09 -0700
Subject: [PATCH 448/895] x86: fix probe_nr_irqs for xen

otherwise Xen is _completely_ unusable with 5 or more VCPUs.
(when !CONFIG_HAVE_SPARSE_IRQ).

based on Alex Nixon's patch.

also add +1 offset after redir_entries

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Acked-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Alex Nixon <alex.nixon@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 4e44fd1f466e..d28128e0392c 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -3625,16 +3625,21 @@ int __init probe_nr_irqs(void)
 {
 	int idx;
 	int nr = 0;
+#ifndef CONFIG_XEN
+	int nr_min = 32;
+#else
+	int nr_min = NR_IRQS;
+#endif
 
 	for (idx = 0; idx < nr_ioapics; idx++)
-		nr += io_apic_get_redir_entries(idx);
+		nr += io_apic_get_redir_entries(idx) + 1;
 
 	/* double it for hotplug and msi and nmi */
 	nr <<= 1;
 
 	/* something wrong ? */
-	if (nr < 32)
-		nr = 32;
+	if (nr < nr_min)
+		nr = nr_min;
 
 	return nr;
 }
-- 
GitLab


From 2699574b3c04351f5a23c8a842e17c303d7ebb6f Mon Sep 17 00:00:00 2001
From: Alok Kataria <akataria@vmware.com>
Date: Thu, 21 Aug 2008 11:26:43 -0700
Subject: [PATCH 449/895] x86: VMI, initialize IRQ vector

Initialize vector_irq for the vmi used vector, to point to correct irq.

Signed-off-by: Alok N Kataria <akataria@vmware.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/vmiclock_32.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index 6953859fe289..254ee07f8635 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -235,11 +235,14 @@ static void __devinit vmi_time_init_clockevent(void)
 
 void __init vmi_time_init(void)
 {
+	unsigned int cpu;
 	/* Disable PIT: BIOSes start PIT CH0 with 18.2hz peridic. */
 	outb_pit(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */
 
 	vmi_time_init_clockevent();
 	setup_irq(0, &vmi_clock_action);
+	for_each_possible_cpu(cpu)
+		per_cpu(vector_irq, cpu)[vmi_get_timer_vector()] = 0;
 }
 
 #ifdef CONFIG_X86_LOCAL_APIC
-- 
GitLab


From db4b5525caafd846ec20f95afbc6403c792e22cf Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Sun, 24 Aug 2008 02:01:39 -0700
Subject: [PATCH 450/895] x86: apic_64.c - setup_APIC_timer has to be __cpuinit
 function

There is no need to hold this code if CPU_HOTPLUG is not
defined.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index cd860856a0b7..8f551276681e 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -400,7 +400,7 @@ static void lapic_timer_broadcast(cpumask_t mask)
  * Setup the local APIC timer for this CPU. Copy the initilized values
  * of the boot CPU and register the clock event in the framework.
  */
-static void setup_APIC_timer(void)
+static void __cpuinit setup_APIC_timer(void)
 {
 	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
 
-- 
GitLab


From 7c37e48b5125fdb0acac846a0a3af42806175d44 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Sun, 24 Aug 2008 02:01:40 -0700
Subject: [PATCH 451/895] x86: apic - introduce get_physical_broadcast for
 64bit

We don't really use it now on 64bit mode but
could reserve it for future.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_64.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 8f551276681e..bb46711a0b1e 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -244,6 +244,16 @@ void __cpuinit enable_NMI_through_LVT0(void)
 	apic_write(APIC_LVT0, v);
 }
 
+#ifdef CONFIG_X86_32
+/**
+ * get_physical_broadcast - Get number of physical broadcast IDs
+ */
+int get_physical_broadcast(void)
+{
+	return modern_apic() ? 0xff : 0xf;
+}
+#endif
+
 /**
  * lapic_get_maxlvt - get the maximum number of local vector table entries
  */
-- 
GitLab


From 920fa7a507c3b1004a9ebe07a2c9d38605b3406a Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Sun, 24 Aug 2008 02:01:41 -0700
Subject: [PATCH 452/895] x86: apic - unify setup_apicpmtimer

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c | 10 ++++++++++
 arch/x86/kernel/apic_64.c |  2 ++
 2 files changed, 12 insertions(+)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 21c831d96af3..df6ed603e547 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1774,6 +1774,16 @@ static int __init parse_nolapic_timer(char *arg)
 }
 early_param("nolapic_timer", parse_nolapic_timer);
 
+#ifdef CONFIG_X86_64
+static __init int setup_apicpmtimer(char *s)
+{
+	apic_calibrate_pmtmr = 1;
+	notsc_setup(NULL);
+	return 0;
+}
+__setup("apicpmtimer", setup_apicpmtimer);
+#endif
+
 static int __init apic_set_verbosity(char *arg)
 {
 	if (!arg)  {
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index bb46711a0b1e..ddc5b245faf2 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -1810,6 +1810,7 @@ static int __init parse_nolapic_timer(char *arg)
 }
 early_param("nolapic_timer", parse_nolapic_timer);
 
+#ifdef CONFIG_X86_64
 static __init int setup_apicpmtimer(char *s)
 {
 	apic_calibrate_pmtmr = 1;
@@ -1817,6 +1818,7 @@ static __init int setup_apicpmtimer(char *s)
 	return 0;
 }
 __setup("apicpmtimer", setup_apicpmtimer);
+#endif
 
 static int __init apic_set_verbosity(char *arg)
 {
-- 
GitLab


From 80e5609cabd7e4321769701a70297f819a15b08d Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Sun, 24 Aug 2008 02:01:42 -0700
Subject: [PATCH 453/895] x86: apic_64.c - add sanity check for spurious vector
 definition

Do not check for SPUTIOUS_APIC_VECTOR definition twice.
Check it once - is what we need.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_64.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index ddc5b245faf2..4587e16f73ec 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -46,6 +46,13 @@
 #include <mach_ipi.h>
 #include <mach_apic.h>
 
+/*
+ * Sanity check
+ */
+#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F)
+# error SPURIOUS_APIC_VECTOR definition error
+#endif
+
 /* Disable local APIC timer from the kernel commandline or via dmi quirk */
 static int disable_apic_timer __cpuinitdata;
 static int apic_calibrate_pmtmr __initdata;
@@ -939,8 +946,6 @@ void __cpuinit setup_local_APIC(void)
 	preempt_disable();
 	value = apic_read(APIC_LVR);
 
-	BUILD_BUG_ON((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f);
-
 	/*
 	 * Double-check whether this APIC is really registered.
 	 * This is meaningless in clustered apic mode, so we skip it.
-- 
GitLab


From 89c38c2867ebe37c4c5aee23e7fa1bffb025b171 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Sun, 24 Aug 2008 02:01:43 -0700
Subject: [PATCH 454/895] x86: apic - unify setup_local_APIC

- remove useless read of APIC_LVR
- wrap with preempt_disable/enable
- check for integrated APIC just in place

v2: fix by Yinghai Lu.
	fix lapic_is_integrated using
	let 64-bit too have pic_mode

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c | 21 ++++++++++++----
 arch/x86/kernel/apic_64.c | 51 +++++++++++++++++++++++++++++++++++----
 2 files changed, 62 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index df6ed603e547..f8c1251984e2 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1033,9 +1033,10 @@ static void __cpuinit lapic_setup_esr(void)
  */
 void __cpuinit setup_local_APIC(void)
 {
-	unsigned long value, integrated;
+	unsigned int value;
 	int i, j;
 
+#ifdef CONFIG_X86_32
 	/* Pound the ESR really hard over the head with a big hammer - mbligh */
 	if (esr_disable) {
 		apic_write(APIC_ESR, 0);
@@ -1043,14 +1044,16 @@ void __cpuinit setup_local_APIC(void)
 		apic_write(APIC_ESR, 0);
 		apic_write(APIC_ESR, 0);
 	}
+#endif
 
-	integrated = lapic_is_integrated();
+	preempt_disable();
 
 	/*
 	 * Double-check whether this APIC is really registered.
+	 * This is meaningless in clustered apic mode, so we skip it.
 	 */
 	if (!apic_id_registered())
-		WARN_ON_ONCE(1);
+		BUG();
 
 	/*
 	 * Intel recommends to set DFR, LDR and TPR before enabling
@@ -1096,6 +1099,7 @@ void __cpuinit setup_local_APIC(void)
 	 */
 	value |= APIC_SPIV_APIC_ENABLED;
 
+#ifdef CONFIG_X86_32
 	/*
 	 * Some unknown Intel IO/APIC (or APIC) errata is biting us with
 	 * certain networking cards. If high frequency interrupts are
@@ -1116,8 +1120,13 @@ void __cpuinit setup_local_APIC(void)
 	 * See also the comment in end_level_ioapic_irq().  --macro
 	 */
 
-	/* Enable focus processor (bit==0) */
+	/*
+	 * - enable focus processor (bit==0)
+	 * - 64bit mode always use processor focus
+	 *   so no need to set it
+	 */
 	value &= ~APIC_SPIV_FOCUS_DISABLED;
+#endif
 
 	/*
 	 * Set spurious IRQ vector
@@ -1154,9 +1163,11 @@ void __cpuinit setup_local_APIC(void)
 		value = APIC_DM_NMI;
 	else
 		value = APIC_DM_NMI | APIC_LVT_MASKED;
-	if (!integrated)		/* 82489DX */
+	if (!lapic_is_integrated())		/* 82489DX */
 		value |= APIC_LVT_LEVEL_TRIGGER;
 	apic_write(APIC_LVT1, value);
+
+	preempt_enable();
 }
 
 void __cpuinit end_local_APIC_setup(void)
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 4587e16f73ec..283968d4e024 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -76,6 +76,8 @@ char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
  */
 unsigned int apic_verbosity;
 
+int pic_mode;
+
 /* Have we found an MP table */
 int smp_found_config;
 
@@ -943,8 +945,17 @@ void __cpuinit setup_local_APIC(void)
 	unsigned int value;
 	int i, j;
 
+#ifdef CONFIG_X86_32
+	/* Pound the ESR really hard over the head with a big hammer - mbligh */
+	if (esr_disable) {
+		apic_write(APIC_ESR, 0);
+		apic_write(APIC_ESR, 0);
+		apic_write(APIC_ESR, 0);
+		apic_write(APIC_ESR, 0);
+	}
+#endif
+
 	preempt_disable();
-	value = apic_read(APIC_LVR);
 
 	/*
 	 * Double-check whether this APIC is really registered.
@@ -997,7 +1008,34 @@ void __cpuinit setup_local_APIC(void)
 	 */
 	value |= APIC_SPIV_APIC_ENABLED;
 
-	/* We always use processor focus */
+#ifdef CONFIG_X86_32
+	/*
+	 * Some unknown Intel IO/APIC (or APIC) errata is biting us with
+	 * certain networking cards. If high frequency interrupts are
+	 * happening on a particular IOAPIC pin, plus the IOAPIC routing
+	 * entry is masked/unmasked at a high rate as well then sooner or
+	 * later IOAPIC line gets 'stuck', no more interrupts are received
+	 * from the device. If focus CPU is disabled then the hang goes
+	 * away, oh well :-(
+	 *
+	 * [ This bug can be reproduced easily with a level-triggered
+	 *   PCI Ne2000 networking cards and PII/PIII processors, dual
+	 *   BX chipset. ]
+	 */
+	/*
+	 * Actually disabling the focus CPU check just makes the hang less
+	 * frequent as it makes the interrupt distributon model be more
+	 * like LRU than MRU (the short-term load is more even across CPUs).
+	 * See also the comment in end_level_ioapic_irq().  --macro
+	 */
+
+	/*
+	 * - enable focus processor (bit==0)
+	 * - 64bit mode always use processor focus
+	 *   so no need to set it
+	 */
+	value &= ~APIC_SPIV_FOCUS_DISABLED;
+#endif
 
 	/*
 	 * Set spurious IRQ vector
@@ -1016,14 +1054,14 @@ void __cpuinit setup_local_APIC(void)
 	 * TODO: set up through-local-APIC from through-I/O-APIC? --macro
 	 */
 	value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
-	if (!smp_processor_id() && !value) {
+	if (!smp_processor_id() && (pic_mode || !value)) {
 		value = APIC_DM_EXTINT;
 		apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n",
-			    smp_processor_id());
+				smp_processor_id());
 	} else {
 		value = APIC_DM_EXTINT | APIC_LVT_MASKED;
 		apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n",
-			    smp_processor_id());
+				smp_processor_id());
 	}
 	apic_write(APIC_LVT0, value);
 
@@ -1034,7 +1072,10 @@ void __cpuinit setup_local_APIC(void)
 		value = APIC_DM_NMI;
 	else
 		value = APIC_DM_NMI | APIC_LVT_MASKED;
+	if (!lapic_is_integrated())		/* 82489DX */
+		value |= APIC_LVT_LEVEL_TRIGGER;
 	apic_write(APIC_LVT1, value);
+
 	preempt_enable();
 }
 
-- 
GitLab


From 457cc52d4670bcf1470606a108bbf35aac28eb7f Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Sun, 24 Aug 2008 02:01:44 -0700
Subject: [PATCH 455/895] x86: apic_32.c should use __cpuinit section

All callers are __init or __cpuinit so there is no need
to hold this code without CPU_HOTPLUG being set.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index f8c1251984e2..4ca3717b3026 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -383,7 +383,7 @@ static void lapic_timer_broadcast(cpumask_t mask)
  * Setup the local APIC timer for this CPU. Copy the initilized values
  * of the boot CPU and register the clock event in the framework.
  */
-static void __devinit setup_APIC_timer(void)
+static void __cpuinit setup_APIC_timer(void)
 {
 	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
 
@@ -652,7 +652,7 @@ void __init setup_boot_APIC_clock(void)
 	setup_APIC_timer();
 }
 
-void __devinit setup_secondary_APIC_clock(void)
+void __cpuinit setup_secondary_APIC_clock(void)
 {
 	setup_APIC_timer();
 }
@@ -1713,7 +1713,7 @@ static struct sys_device device_lapic = {
 	.cls	= &lapic_sysclass,
 };
 
-static void __devinit apic_pm_activate(void)
+static void __cpuinit apic_pm_activate(void)
 {
 	apic_pm_state.active = 1;
 }
-- 
GitLab


From 6460bc73aac970135104a0bc407c2c8b85394d59 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Sun, 24 Aug 2008 02:01:45 -0700
Subject: [PATCH 456/895] x86: apic - unify smp_apic_timer_interrupt

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c | 3 +++
 arch/x86/kernel/apic_64.c | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 4ca3717b3026..913d9924b101 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -718,6 +718,9 @@ void smp_apic_timer_interrupt(struct pt_regs *regs)
 	 * Besides, if we don't timer interrupts ignore the global
 	 * interrupt lock, which is the WrongThing (tm) to do.
 	 */
+#ifdef CONFIG_X86_64
+	exit_idle();
+#endif
 	irq_enter();
 	local_apic_timer_interrupt();
 	irq_exit();
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 283968d4e024..2e109119f647 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -625,7 +625,9 @@ void smp_apic_timer_interrupt(struct pt_regs *regs)
 	 * Besides, if we don't timer interrupts ignore the global
 	 * interrupt lock, which is the WrongThing (tm) to do.
 	 */
+#ifdef CONFIG_X86_64
 	exit_idle();
+#endif
 	irq_enter();
 	local_apic_timer_interrupt();
 	irq_exit();
-- 
GitLab


From b3c5117050e8028d48b2fa0ea09c7a50dd7f3414 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sun, 24 Aug 2008 02:01:46 -0700
Subject: [PATCH 457/895] x86: apic_xx.c order variables

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c | 46 +++++++++++++++++++------------------
 arch/x86/kernel/apic_64.c | 48 ++++++++++++++++++++++++++-------------
 2 files changed, 56 insertions(+), 38 deletions(-)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 913d9924b101..a54512bea629 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -50,16 +50,37 @@
 # error SPURIOUS_APIC_VECTOR definition error
 #endif
 
-unsigned long mp_lapic_addr;
-
+#ifdef CONFIG_X86_32
 /*
  * Knob to control our willingness to enable the local APIC.
  *
  * +1=force-enable
  */
 static int force_enable_local_apic;
-int disable_apic;
+/*
+ * APIC command line parameters
+ */
+static int __init parse_lapic(char *arg)
+{
+	force_enable_local_apic = 1;
+	return 0;
+}
+early_param("lapic", parse_lapic);
+#endif
 
+#ifdef CONFIG_X86_64
+static int apic_calibrate_pmtmr __initdata;
+static __init int setup_apicpmtimer(char *s)
+{
+	apic_calibrate_pmtmr = 1;
+	notsc_setup(NULL);
+	return 0;
+}
+__setup("apicpmtimer", setup_apicpmtimer);
+#endif
+
+unsigned long mp_lapic_addr;
+int disable_apic;
 /* Disable local APIC timer from the kernel commandline or via dmi quirk */
 static int disable_apic_timer __cpuinitdata;
 /* Local APIC timer works in C2 */
@@ -1742,15 +1763,6 @@ static void apic_pm_activate(void) { }
 
 #endif	/* CONFIG_PM */
 
-/*
- * APIC command line parameters
- */
-static int __init parse_lapic(char *arg)
-{
-	force_enable_local_apic = 1;
-	return 0;
-}
-early_param("lapic", parse_lapic);
 
 static int __init setup_disableapic(char *arg)
 {
@@ -1788,16 +1800,6 @@ static int __init parse_nolapic_timer(char *arg)
 }
 early_param("nolapic_timer", parse_nolapic_timer);
 
-#ifdef CONFIG_X86_64
-static __init int setup_apicpmtimer(char *s)
-{
-	apic_calibrate_pmtmr = 1;
-	notsc_setup(NULL);
-	return 0;
-}
-__setup("apicpmtimer", setup_apicpmtimer);
-#endif
-
 static int __init apic_set_verbosity(char *arg)
 {
 	if (!arg)  {
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 2e109119f647..c40a900e155b 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -53,16 +53,44 @@
 # error SPURIOUS_APIC_VECTOR definition error
 #endif
 
-/* Disable local APIC timer from the kernel commandline or via dmi quirk */
-static int disable_apic_timer __cpuinitdata;
+#ifdef CONFIG_X86_32
+/*
+ * Knob to control our willingness to enable the local APIC.
+ *
+ * +1=force-enable
+ */
+static int force_enable_local_apic;
+/*
+ * APIC command line parameters
+ */
+static int __init parse_lapic(char *arg)
+{
+	force_enable_local_apic = 1;
+	return 0;
+}
+early_param("lapic", parse_lapic);
+#endif
+
+#ifdef CONFIG_X86_64
 static int apic_calibrate_pmtmr __initdata;
-int disable_apic;
+static __init int setup_apicpmtimer(char *s)
+{
+	apic_calibrate_pmtmr = 1;
+	notsc_setup(NULL);
+	return 0;
+}
+__setup("apicpmtimer", setup_apicpmtimer);
+#endif
+
 int disable_x2apic;
 int x2apic;
-
 /* x2apic enabled before OS handover */
 int x2apic_preenabled;
 
+unsigned long mp_lapic_addr;
+int disable_apic;
+/* Disable local APIC timer from the kernel commandline or via dmi quirk */
+static int disable_apic_timer __cpuinitdata;
 /* Local APIC timer works in C2 */
 int local_apic_timer_c2_ok;
 EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
@@ -113,8 +141,6 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
 
 static unsigned long apic_phys;
 
-unsigned long mp_lapic_addr;
-
 /*
  * Get the LAPIC version
  */
@@ -1858,16 +1884,6 @@ static int __init parse_nolapic_timer(char *arg)
 }
 early_param("nolapic_timer", parse_nolapic_timer);
 
-#ifdef CONFIG_X86_64
-static __init int setup_apicpmtimer(char *s)
-{
-	apic_calibrate_pmtmr = 1;
-	notsc_setup(NULL);
-	return 0;
-}
-__setup("apicpmtimer", setup_apicpmtimer);
-#endif
-
 static int __init apic_set_verbosity(char *arg)
 {
 	if (!arg)  {
-- 
GitLab


From 49899eacce79ce39faf531dad3e00f771eba2eb1 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sun, 24 Aug 2008 02:01:47 -0700
Subject: [PATCH 458/895] x86: use HAVE_X2APIC in apic_64.c

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_64.c | 35 +++++++++++++++++++++++------------
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index c40a900e155b..d3ec746aede4 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -82,10 +82,23 @@ static __init int setup_apicpmtimer(char *s)
 __setup("apicpmtimer", setup_apicpmtimer);
 #endif
 
-int disable_x2apic;
+#ifdef CONFIG_X86_64
+#define HAVE_X2APIC
+#endif
+
+#ifdef HAVE_X2APIC
 int x2apic;
 /* x2apic enabled before OS handover */
 int x2apic_preenabled;
+int disable_x2apic;
+static __init int setup_nox2apic(char *str)
+{
+	disable_x2apic = 1;
+	setup_clear_cpu_cap(X86_FEATURE_X2APIC);
+	return 0;
+}
+early_param("nox2apic", setup_nox2apic);
+#endif
 
 unsigned long mp_lapic_addr;
 int disable_apic;
@@ -228,6 +241,7 @@ static struct apic_ops xapic_ops = {
 struct apic_ops __read_mostly *apic_ops = &xapic_ops;
 EXPORT_SYMBOL_GPL(apic_ops);
 
+#ifdef HAVE_X2APIC
 static void x2apic_wait_icr_idle(void)
 {
 	/* no need to wait for icr idle in x2apic */
@@ -261,6 +275,7 @@ static struct apic_ops x2apic_ops = {
 	.wait_icr_idle = x2apic_wait_icr_idle,
 	.safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
 };
+#endif
 
 /**
  * enable_NMI_through_LVT0 - enable NMI through local vector table 0
@@ -1125,6 +1140,7 @@ void __cpuinit end_local_APIC_setup(void)
 	apic_pm_activate();
 }
 
+#ifdef HAVE_X2APIC
 void check_x2apic(void)
 {
 	int msr, msr2;
@@ -1243,6 +1259,7 @@ end:
 
 	return;
 }
+#endif /* HAVE_X2APIC */
 
 /*
  * Detect and enable local APICs on non-SMP boards.
@@ -1291,10 +1308,12 @@ void __init early_init_lapic_mapping(void)
  */
 void __init init_apic_mappings(void)
 {
+#ifdef HAVE_X2APIC
 	if (x2apic) {
 		boot_cpu_physical_apicid = read_apic_id();
 		return;
 	}
+#endif
 
 	/*
 	 * If no local APIC can be found then set up a fake all
@@ -1335,8 +1354,9 @@ int __init APIC_init_uniprocessor(void)
 		printk(KERN_INFO "Apic disabled by BIOS\n");
 		return -1;
 	}
-
+#ifdef HAVE_X2APIC
 	enable_IR_x2apic();
+#endif
 	setup_apic_routing();
 
 	verify_local_APIC();
@@ -1672,7 +1692,7 @@ static int lapic_resume(struct sys_device *dev)
 
 	local_irq_save(flags);
 
-#ifdef CONFIG_X86_64
+#ifdef HAVE_X2APIC
 	if (x2apic)
 		enable_x2apic();
 	else
@@ -1836,15 +1856,6 @@ __cpuinit int apic_is_clustered_box(void)
 	return (clusters > 2);
 }
 
-static __init int setup_nox2apic(char *str)
-{
-	disable_x2apic = 1;
-	clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC);
-	return 0;
-}
-early_param("nox2apic", setup_nox2apic);
-
-
 /*
  * APIC command line parameters
  */
-- 
GitLab


From 3491998dd54f6d4ef7344518fe5463b299fdf537 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sun, 24 Aug 2008 02:01:48 -0700
Subject: [PATCH 459/895] x86: add hard_smp_prossor_id with MACRO in
 io_apic_xx.c

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c | 7 +++++++
 arch/x86/kernel/apic_64.c | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index a54512bea629..93718ffb9b9c 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1600,6 +1600,13 @@ void __cpuinit generic_processor_info(int apicid, int version)
 	cpu_set(cpu, cpu_present_map);
 }
 
+#ifdef CONFIG_X86_64
+int hard_smp_processor_id(void)
+{
+	return read_apic_id();
+}
+#endif
+
 /*
  * Power management
  */
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index d3ec746aede4..eeb69838c2f8 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -1612,10 +1612,12 @@ void __cpuinit generic_processor_info(int apicid, int version)
 	cpu_set(cpu, cpu_present_map);
 }
 
+#ifdef CONFIG_X86_64
 int hard_smp_processor_id(void)
 {
 	return read_apic_id();
 }
+#endif
 
 /*
  * Power management
-- 
GitLab


From f28c0ae21d80ffd6eb0987901c5273843387e341 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sun, 24 Aug 2008 02:01:49 -0700
Subject: [PATCH 460/895] x86: make apic_32/64.c more like

except x2apic, detec_init_APIC, and calibrating_APIC_clock

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c | 125 ++++++++++++++++++++++++++++++++++----
 arch/x86/kernel/apic_64.c |  10 ++-
 2 files changed, 122 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 93718ffb9b9c..bfac26a16099 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -66,6 +66,9 @@ static int __init parse_lapic(char *arg)
 	return 0;
 }
 early_param("lapic", parse_lapic);
+/* Local APIC was disabled by the BIOS and enabled by the kernel */
+static int enabled_via_apicbase;
+
 #endif
 
 #ifdef CONFIG_X86_64
@@ -131,9 +134,6 @@ static struct clock_event_device lapic_clockevent = {
 };
 static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
 
-/* Local APIC was disabled by the BIOS and enabled by the kernel */
-static int enabled_via_apicbase;
-
 static unsigned long apic_phys;
 
 /*
@@ -240,6 +240,7 @@ void __cpuinit enable_NMI_through_LVT0(void)
 	apic_write(APIC_LVT0, v);
 }
 
+#ifdef CONFIG_X86_32
 /**
  * get_physical_broadcast - Get number of physical broadcast IDs
  */
@@ -247,6 +248,7 @@ int get_physical_broadcast(void)
 {
 	return modern_apic() ? 0xff : 0xf;
 }
+#endif
 
 /**
  * lapic_get_maxlvt - get the maximum number of local vector table entries
@@ -1291,6 +1293,32 @@ no_apic:
 	return -1;
 }
 
+#ifdef CONFIG_X86_64
+void __init early_init_lapic_mapping(void)
+{
+	unsigned long phys_addr;
+
+	/*
+	 * If no local APIC can be found then go out
+	 * : it means there is no mpatable and MADT
+	 */
+	if (!smp_found_config)
+		return;
+
+	phys_addr = mp_lapic_addr;
+
+	set_fixmap_nocache(FIX_APIC_BASE, phys_addr);
+	apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
+		    APIC_BASE, phys_addr);
+
+	/*
+	 * Fetch the APIC ID of the BSP in case we have a
+	 * default configuration (or the MP table is broken).
+	 */
+	boot_cpu_physical_apicid = read_apic_id();
+}
+#endif
+
 /**
  * init_apic_mappings - initialize APIC mappings
  */
@@ -1308,8 +1336,8 @@ void __init init_apic_mappings(void)
 		apic_phys = mp_lapic_addr;
 
 	set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
-	printk(KERN_DEBUG "mapped APIC to %08lx (%08lx)\n", APIC_BASE,
-	       apic_phys);
+	apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
+				APIC_BASE, apic_phys);
 
 	/*
 	 * Fetch the APIC ID of the BSP in case we have a
@@ -1317,14 +1345,12 @@ void __init init_apic_mappings(void)
 	 */
 	if (boot_cpu_physical_apicid == -1U)
 		boot_cpu_physical_apicid = read_apic_id();
-
 }
 
 /*
  * This initializes the IO-APIC and APIC hardware if this is
  * a UP kernel.
  */
-
 int apic_version[MAX_APICS];
 
 int __init APIC_init_uniprocessor(void)
@@ -1682,11 +1708,6 @@ static int lapic_resume(struct sys_device *dev)
 
 	local_irq_save(flags);
 
-#ifdef CONFIG_X86_64
-	if (x2apic)
-		enable_x2apic();
-	else
-#endif
 	{
 		/*
 		 * Make sure the APICBASE points to the right address
@@ -1770,7 +1791,87 @@ static void apic_pm_activate(void) { }
 
 #endif	/* CONFIG_PM */
 
+#ifdef CONFIG_X86_64
+/*
+ * apic_is_clustered_box() -- Check if we can expect good TSC
+ *
+ * Thus far, the major user of this is IBM's Summit2 series:
+ *
+ * Clustered boxes may have unsynced TSC problems if they are
+ * multi-chassis. Use available data to take a good guess.
+ * If in doubt, go HPET.
+ */
+__cpuinit int apic_is_clustered_box(void)
+{
+	int i, clusters, zeros;
+	unsigned id;
+	u16 *bios_cpu_apicid;
+	DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
+
+	/*
+	 * there is not this kind of box with AMD CPU yet.
+	 * Some AMD box with quadcore cpu and 8 sockets apicid
+	 * will be [4, 0x23] or [8, 0x27] could be thought to
+	 * vsmp box still need checking...
+	 */
+	if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
+		return 0;
+
+	bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
+	bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
+
+	for (i = 0; i < NR_CPUS; i++) {
+		/* are we being called early in kernel startup? */
+		if (bios_cpu_apicid) {
+			id = bios_cpu_apicid[i];
+		}
+		else if (i < nr_cpu_ids) {
+			if (cpu_present(i))
+				id = per_cpu(x86_bios_cpu_apicid, i);
+			else
+				continue;
+		}
+		else
+			break;
 
+		if (id != BAD_APICID)
+			__set_bit(APIC_CLUSTERID(id), clustermap);
+	}
+
+	/* Problem:  Partially populated chassis may not have CPUs in some of
+	 * the APIC clusters they have been allocated.  Only present CPUs have
+	 * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.
+	 * Since clusters are allocated sequentially, count zeros only if
+	 * they are bounded by ones.
+	 */
+	clusters = 0;
+	zeros = 0;
+	for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
+		if (test_bit(i, clustermap)) {
+			clusters += 1 + zeros;
+			zeros = 0;
+		} else
+			++zeros;
+	}
+
+	/* ScaleMP vSMPowered boxes have one cluster per board and TSCs are
+	 * not guaranteed to be synced between boards
+	 */
+	if (is_vsmp_box() && clusters > 1)
+		return 1;
+
+	/*
+	 * If clusters > 2, then should be multi-chassis.
+	 * May have to revisit this when multi-core + hyperthreaded CPUs come
+	 * out, but AFAIK this will work even for them.
+	 */
+	return (clusters > 2);
+}
+#endif
+
+/*
+ * APIC command line parameters
+ */
 static int __init setup_disableapic(char *arg)
 {
 	disable_apic = 1;
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index eeb69838c2f8..dca6c246e731 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -69,6 +69,9 @@ static int __init parse_lapic(char *arg)
 	return 0;
 }
 early_param("lapic", parse_lapic);
+/* Local APIC was disabled by the BIOS and enabled by the kernel */
+static int enabled_via_apicbase;
+
 #endif
 
 #ifdef CONFIG_X86_64
@@ -1279,6 +1282,7 @@ static int __init detect_init_APIC(void)
 	return 0;
 }
 
+#ifdef CONFIG_X86_64
 void __init early_init_lapic_mapping(void)
 {
 	unsigned long phys_addr;
@@ -1302,6 +1306,7 @@ void __init early_init_lapic_mapping(void)
 	 */
 	boot_cpu_physical_apicid = read_apic_id();
 }
+#endif
 
 /**
  * init_apic_mappings - initialize APIC mappings
@@ -1334,7 +1339,8 @@ void __init init_apic_mappings(void)
 	 * Fetch the APIC ID of the BSP in case we have a
 	 * default configuration (or the MP table is broken).
 	 */
-	boot_cpu_physical_apicid = read_apic_id();
+	if (boot_cpu_physical_apicid == -1U)
+		boot_cpu_physical_apicid = read_apic_id();
 }
 
 /*
@@ -1782,6 +1788,7 @@ static void apic_pm_activate(void) { }
 
 #endif	/* CONFIG_PM */
 
+#ifdef CONFIG_X86_64
 /*
  * apic_is_clustered_box() -- Check if we can expect good TSC
  *
@@ -1857,6 +1864,7 @@ __cpuinit int apic_is_clustered_box(void)
 	 */
 	return (clusters > 2);
 }
+#endif
 
 /*
  * APIC command line parameters
-- 
GitLab


From fa2bd35a8d5c88c03b638c72daf7f38a132d0e8c Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sun, 24 Aug 2008 02:01:50 -0700
Subject: [PATCH 461/895] x86: merge APIC_init_uniprocessor

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c | 51 ++++++++++++++++++++++++++++++++++-----
 arch/x86/kernel/apic_64.c | 48 +++++++++++++++++++++++++++++++++---
 2 files changed, 90 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index bfac26a16099..8f8b0e1f3eb3 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1355,6 +1355,17 @@ int apic_version[MAX_APICS];
 
 int __init APIC_init_uniprocessor(void)
 {
+#ifdef CONFIG_X86_64
+	if (disable_apic) {
+		printk(KERN_INFO "Apic disabled\n");
+		return -1;
+	}
+	if (!cpu_has_apic) {
+		disable_apic = 1;
+		printk(KERN_INFO "Apic disabled by BIOS\n");
+		return -1;
+	}
+#else
 	if (!smp_found_config && !cpu_has_apic)
 		return -1;
 
@@ -1368,34 +1379,62 @@ int __init APIC_init_uniprocessor(void)
 		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
 		return -1;
 	}
+#endif
 
+#ifdef HAVE_X2APIC
+	enable_IR_x2apic();
+#endif
+#ifdef CONFIG_X86_64
+	setup_apic_routing();
+#endif
 	verify_local_APIC();
-
 	connect_bsp_APIC();
 
+#ifdef CONFIG_X86_64
+	apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
+#else
 	/*
 	 * Hack: In case of kdump, after a crash, kernel might be booting
 	 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
 	 * might be zero if read from MP tables. Get it from LAPIC.
 	 */
-#ifdef CONFIG_CRASH_DUMP
+# ifdef CONFIG_CRASH_DUMP
 	boot_cpu_physical_apicid = read_apic_id();
+# endif
 #endif
 	physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
-
 	setup_local_APIC();
 
+#ifdef CONFIG_X86_64
+	/*
+	 * Now enable IO-APICs, actually call clear_IO_APIC
+	 * We need clear_IO_APIC before enabling vector on BP
+	 */
+	if (!skip_ioapic_setup && nr_ioapics)
+		enable_IO_APIC();
+#endif
+
 #ifdef CONFIG_X86_IO_APIC
 	if (!smp_found_config || skip_ioapic_setup || !nr_ioapics)
 #endif
 		localise_nmi_watchdog();
 	end_local_APIC_setup();
+
 #ifdef CONFIG_X86_IO_APIC
-	if (smp_found_config)
-		if (!skip_ioapic_setup && nr_ioapics)
-			setup_IO_APIC();
+	if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
+		setup_IO_APIC();
+# ifdef CONFIG_X86_64
+	else
+		nr_ioapics = 0;
+# endif
 #endif
+
+#ifdef CONFIG_X86_64
+	setup_boot_APIC_clock();
+	check_nmi_watchdog();
+#else
 	setup_boot_clock();
+#endif
 
 	return 0;
 }
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index dca6c246e731..16a30c22b07c 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -1351,6 +1351,7 @@ int apic_version[MAX_APICS];
 
 int __init APIC_init_uniprocessor(void)
 {
+#ifdef CONFIG_X86_64
 	if (disable_apic) {
 		printk(KERN_INFO "Apic disabled\n");
 		return -1;
@@ -1360,37 +1361,78 @@ int __init APIC_init_uniprocessor(void)
 		printk(KERN_INFO "Apic disabled by BIOS\n");
 		return -1;
 	}
+#else
+	if (!smp_found_config && !cpu_has_apic)
+		return -1;
+
+	/*
+	 * Complain if the BIOS pretends there is one.
+	 */
+	if (!cpu_has_apic &&
+	    APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
+		printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
+		       boot_cpu_physical_apicid);
+		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
+		return -1;
+	}
+#endif
+
 #ifdef HAVE_X2APIC
 	enable_IR_x2apic();
 #endif
+#ifdef CONFIG_X86_64
 	setup_apic_routing();
+#endif
 
 	verify_local_APIC();
-
 	connect_bsp_APIC();
 
-	physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
+#ifdef CONFIG_X86_64
 	apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
-
+#else
+	/*
+	 * Hack: In case of kdump, after a crash, kernel might be booting
+	 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
+	 * might be zero if read from MP tables. Get it from LAPIC.
+	 */
+# ifdef CONFIG_CRASH_DUMP
+	boot_cpu_physical_apicid = read_apic_id();
+# endif
+#endif
+	physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
 	setup_local_APIC();
 
+#ifdef CONFIG_X86_64
 	/*
 	 * Now enable IO-APICs, actually call clear_IO_APIC
 	 * We need clear_IO_APIC before enabling vector on BP
 	 */
 	if (!skip_ioapic_setup && nr_ioapics)
 		enable_IO_APIC();
+#endif
 
+#ifdef CONFIG_X86_IO_APIC
 	if (!smp_found_config || skip_ioapic_setup || !nr_ioapics)
+#endif
 		localise_nmi_watchdog();
 	end_local_APIC_setup();
 
+#ifdef CONFIG_X86_IO_APIC
 	if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
 		setup_IO_APIC();
+# ifdef CONFIG_X86_64
 	else
 		nr_ioapics = 0;
+# endif
+#endif
+
+#ifdef CONFIG_X86_64
 	setup_boot_APIC_clock();
 	check_nmi_watchdog();
+#else
+	setup_boot_clock();
+#endif
+
 	return 0;
 }
 
-- 
GitLab


From be7a656fe131cba088912bcafb079b029320504d Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sun, 24 Aug 2008 02:01:51 -0700
Subject: [PATCH 462/895] x86: copy detect_init_APIC to the other

Signed-off-by: Yinghai Lu <yhlu.kernel@mgail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c | 20 ++++++++++
 arch/x86/kernel/apic_64.c | 82 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 101 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 8f8b0e1f3eb3..1103e05aa1ba 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1214,6 +1214,25 @@ void __cpuinit end_local_APIC_setup(void)
 	apic_pm_activate();
 }
 
+#ifdef CONFIG_X86_64
+/*
+ * Detect and enable local APICs on non-SMP boards.
+ * Original code written by Keir Fraser.
+ * On AMD64 we trust the BIOS - if it says no APIC it is likely
+ * not correctly set up (usually the APIC timer won't work etc.)
+ */
+static int __init detect_init_APIC(void)
+{
+	if (!cpu_has_apic) {
+		printk(KERN_INFO "No local APIC present\n");
+		return -1;
+	}
+
+	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+	boot_cpu_physical_apicid = 0;
+	return 0;
+}
+#else
 /*
  * Detect and initialize APIC
  */
@@ -1292,6 +1311,7 @@ no_apic:
 	printk(KERN_INFO "No local APIC present or hardware disabled\n");
 	return -1;
 }
+#endif
 
 #ifdef CONFIG_X86_64
 void __init early_init_lapic_mapping(void)
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 16a30c22b07c..b7268f5c8b18 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -684,7 +684,6 @@ int setup_profiling_timer(unsigned int multiplier)
 	return -EINVAL;
 }
 
-
 /*
  * Local APIC start and shutdown
  */
@@ -1264,6 +1263,7 @@ end:
 }
 #endif /* HAVE_X2APIC */
 
+#ifdef CONFIG_X86_64
 /*
  * Detect and enable local APICs on non-SMP boards.
  * Original code written by Keir Fraser.
@@ -1281,6 +1281,86 @@ static int __init detect_init_APIC(void)
 	boot_cpu_physical_apicid = 0;
 	return 0;
 }
+#else
+/*
+ * Detect and initialize APIC
+ */
+static int __init detect_init_APIC(void)
+{
+	u32 h, l, features;
+
+	/* Disabled by kernel option? */
+	if (disable_apic)
+		return -1;
+
+	switch (boot_cpu_data.x86_vendor) {
+	case X86_VENDOR_AMD:
+		if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
+		    (boot_cpu_data.x86 == 15))
+			break;
+		goto no_apic;
+	case X86_VENDOR_INTEL:
+		if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
+		    (boot_cpu_data.x86 == 5 && cpu_has_apic))
+			break;
+		goto no_apic;
+	default:
+		goto no_apic;
+	}
+
+	if (!cpu_has_apic) {
+		/*
+		 * Over-ride BIOS and try to enable the local APIC only if
+		 * "lapic" specified.
+		 */
+		if (!force_enable_local_apic) {
+			printk(KERN_INFO "Local APIC disabled by BIOS -- "
+			       "you can enable it with \"lapic\"\n");
+			return -1;
+		}
+		/*
+		 * Some BIOSes disable the local APIC in the APIC_BASE
+		 * MSR. This can only be done in software for Intel P6 or later
+		 * and AMD K7 (Model > 1) or later.
+		 */
+		rdmsr(MSR_IA32_APICBASE, l, h);
+		if (!(l & MSR_IA32_APICBASE_ENABLE)) {
+			printk(KERN_INFO
+			       "Local APIC disabled by BIOS -- reenabling.\n");
+			l &= ~MSR_IA32_APICBASE_BASE;
+			l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
+			wrmsr(MSR_IA32_APICBASE, l, h);
+			enabled_via_apicbase = 1;
+		}
+	}
+	/*
+	 * The APIC feature bit should now be enabled
+	 * in `cpuid'
+	 */
+	features = cpuid_edx(1);
+	if (!(features & (1 << X86_FEATURE_APIC))) {
+		printk(KERN_WARNING "Could not enable APIC!\n");
+		return -1;
+	}
+	set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
+	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+
+	/* The BIOS may have set up the APIC at some other address */
+	rdmsr(MSR_IA32_APICBASE, l, h);
+	if (l & MSR_IA32_APICBASE_ENABLE)
+		mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
+
+	printk(KERN_INFO "Found and enabled local APIC!\n");
+
+	apic_pm_activate();
+
+	return 0;
+
+no_apic:
+	printk(KERN_INFO "No local APIC present or hardware disabled\n");
+	return -1;
+}
+#endif
 
 #ifdef CONFIG_X86_64
 void __init early_init_lapic_mapping(void)
-- 
GitLab


From 773763df7de881e65ff2600c024c9ce2dde64750 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sun, 24 Aug 2008 02:01:52 -0700
Subject: [PATCH 463/895] x86: merge header files in apic_xx.c

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c | 8 ++++++++
 arch/x86/kernel/apic_64.c | 7 ++++++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 1103e05aa1ba..0ec8321e687c 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -23,11 +23,13 @@
 #include <linux/mc146818rtc.h>
 #include <linux/kernel_stat.h>
 #include <linux/sysdev.h>
+#include <linux/ioport.h>
 #include <linux/cpu.h>
 #include <linux/clockchips.h>
 #include <linux/acpi_pmtmr.h>
 #include <linux/module.h>
 #include <linux/dmi.h>
+#include <linux/dmar.h>
 
 #include <asm/atomic.h>
 #include <asm/smp.h>
@@ -36,8 +38,14 @@
 #include <asm/desc.h>
 #include <asm/arch_hooks.h>
 #include <asm/hpet.h>
+#include <asm/pgalloc.h>
 #include <asm/i8253.h>
 #include <asm/nmi.h>
+#include <asm/idle.h>
+#include <asm/proto.h>
+#include <asm/timex.h>
+#include <asm/apic.h>
+#include <asm/i8259.h>
 
 #include <mach_apic.h>
 #include <mach_apicdef.h>
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index b7268f5c8b18..ebe417b4d7fc 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -24,9 +24,11 @@
 #include <linux/kernel_stat.h>
 #include <linux/sysdev.h>
 #include <linux/ioport.h>
+#include <linux/cpu.h>
 #include <linux/clockchips.h>
 #include <linux/acpi_pmtmr.h>
 #include <linux/module.h>
+#include <linux/dmi.h>
 #include <linux/dmar.h>
 
 #include <asm/atomic.h>
@@ -34,8 +36,10 @@
 #include <asm/mtrr.h>
 #include <asm/mpspec.h>
 #include <asm/desc.h>
+#include <asm/arch_hooks.h>
 #include <asm/hpet.h>
 #include <asm/pgalloc.h>
+#include <asm/i8253.h>
 #include <asm/nmi.h>
 #include <asm/idle.h>
 #include <asm/proto.h>
@@ -43,8 +47,9 @@
 #include <asm/apic.h>
 #include <asm/i8259.h>
 
-#include <mach_ipi.h>
 #include <mach_apic.h>
+#include <mach_apicdef.h>
+#include <mach_ipi.h>
 
 /*
  * Sanity check
-- 
GitLab


From dc1528dd864a0b79fa67b60b3ca5674fe94fdce5 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sun, 24 Aug 2008 02:01:53 -0700
Subject: [PATCH 464/895] x86: apic unify smp_spurious/error_interrupt

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c | 24 +++++++++++++++++++++---
 arch/x86/kernel/apic_64.c | 24 ++++++++++++++++++++++--
 2 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 0ec8321e687c..60a901b2d4fe 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1474,10 +1474,17 @@ int __init APIC_init_uniprocessor(void)
 /*
  * This interrupt should _never_ happen with our APIC/SMP architecture
  */
+#ifdef CONFIG_X86_64
+asmlinkage void smp_spurious_interrupt(void)
+#else
 void smp_spurious_interrupt(struct pt_regs *regs)
+#endif
 {
-	unsigned long v;
+	u32 v;
 
+#ifdef CONFIG_X86_64
+	exit_idle();
+#endif
 	irq_enter();
 	/*
 	 * Check if this really is a spurious interrupt and ACK it
@@ -1488,20 +1495,31 @@ void smp_spurious_interrupt(struct pt_regs *regs)
 	if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
 		ack_APIC_irq();
 
+#ifdef CONFIG_X86_64
+	add_pda(irq_spurious_count, 1);
+#else
 	/* see sw-dev-man vol 3, chapter 7.4.13.5 */
 	printk(KERN_INFO "spurious APIC interrupt on CPU#%d, "
 	       "should never happen.\n", smp_processor_id());
 	__get_cpu_var(irq_stat).irq_spurious_count++;
+#endif
 	irq_exit();
 }
 
 /*
  * This interrupt should never happen with our APIC/SMP architecture
  */
+#ifdef CONFIG_X86_64
+asmlinkage void smp_error_interrupt(void)
+#else
 void smp_error_interrupt(struct pt_regs *regs)
+#endif
 {
-	unsigned long v, v1;
+	u32 v, v1;
 
+#ifdef CONFIG_X86_64
+	exit_idle();
+#endif
 	irq_enter();
 	/* First tickle the hardware, only then report what went on. -- REW */
 	v = apic_read(APIC_ESR);
@@ -1520,7 +1538,7 @@ void smp_error_interrupt(struct pt_regs *regs)
 	   6: Received illegal vector
 	   7: Illegal register address
 	*/
-	printk(KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n",
+	printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
 		smp_processor_id(), v , v1);
 	irq_exit();
 }
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index ebe417b4d7fc..c728885e4f4a 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -1528,10 +1528,17 @@ int __init APIC_init_uniprocessor(void)
 /*
  * This interrupt should _never_ happen with our APIC/SMP architecture
  */
+#ifdef CONFIG_X86_64
 asmlinkage void smp_spurious_interrupt(void)
+#else
+void smp_spurious_interrupt(struct pt_regs *regs)
+#endif
 {
-	unsigned int v;
+	u32 v;
+
+#ifdef CONFIG_X86_64
 	exit_idle();
+#endif
 	irq_enter();
 	/*
 	 * Check if this really is a spurious interrupt and ACK it
@@ -1542,18 +1549,31 @@ asmlinkage void smp_spurious_interrupt(void)
 	if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
 		ack_APIC_irq();
 
+#ifdef CONFIG_X86_64
 	add_pda(irq_spurious_count, 1);
+#else
+	/* see sw-dev-man vol 3, chapter 7.4.13.5 */
+	printk(KERN_INFO "spurious APIC interrupt on CPU#%d, "
+	       "should never happen.\n", smp_processor_id());
+	__get_cpu_var(irq_stat).irq_spurious_count++;
+#endif
 	irq_exit();
 }
 
 /*
  * This interrupt should never happen with our APIC/SMP architecture
  */
+#ifdef CONFIG_X86_64
 asmlinkage void smp_error_interrupt(void)
+#else
+void smp_error_interrupt(struct pt_regs *regs)
+#endif
 {
-	unsigned int v, v1;
+	u32 v, v1;
 
+#ifdef CONFIG_X86_64
 	exit_idle();
+#endif
 	irq_enter();
 	/* First tickle the hardware, only then report what went on. -- REW */
 	v = apic_read(APIC_ESR);
-- 
GitLab


From 2f04fa888d270951b9e0fe9e641ddd560d77ad1b Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sun, 24 Aug 2008 02:01:54 -0700
Subject: [PATCH 465/895] x86: apic copy calibrate_APIC_clock to each other in
 apic_32/64.c

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c |  86 +++++++++++++++
 arch/x86/kernel/apic_64.c | 215 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 301 insertions(+)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 60a901b2d4fe..05498c37f8d9 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -424,6 +424,90 @@ static void __cpuinit setup_APIC_timer(void)
 	clockevents_register_device(levt);
 }
 
+#ifdef CONFIG_X86_64
+/*
+ * In this function we calibrate APIC bus clocks to the external
+ * timer. Unfortunately we cannot use jiffies and the timer irq
+ * to calibrate, since some later bootup code depends on getting
+ * the first irq? Ugh.
+ *
+ * We want to do the calibration only once since we
+ * want to have local timer irqs syncron. CPUs connected
+ * by the same APIC bus have the very same bus frequency.
+ * And we want to have irqs off anyways, no accidental
+ * APIC irq that way.
+ */
+
+#define TICK_COUNT 100000000
+
+static int __init calibrate_APIC_clock(void)
+{
+	unsigned apic, apic_start;
+	unsigned long tsc, tsc_start;
+	int result;
+
+	local_irq_disable();
+
+	/*
+	 * Put whatever arbitrary (but long enough) timeout
+	 * value into the APIC clock, we just want to get the
+	 * counter running for calibration.
+	 *
+	 * No interrupt enable !
+	 */
+	__setup_APIC_LVTT(250000000, 0, 0);
+
+	apic_start = apic_read(APIC_TMCCT);
+#ifdef CONFIG_X86_PM_TIMER
+	if (apic_calibrate_pmtmr && pmtmr_ioport) {
+		pmtimer_wait(5000);  /* 5ms wait */
+		apic = apic_read(APIC_TMCCT);
+		result = (apic_start - apic) * 1000L / 5;
+	} else
+#endif
+	{
+		rdtscll(tsc_start);
+
+		do {
+			apic = apic_read(APIC_TMCCT);
+			rdtscll(tsc);
+		} while ((tsc - tsc_start) < TICK_COUNT &&
+				(apic_start - apic) < TICK_COUNT);
+
+		result = (apic_start - apic) * 1000L * tsc_khz /
+					(tsc - tsc_start);
+	}
+
+	local_irq_enable();
+
+	printk(KERN_DEBUG "APIC timer calibration result %d\n", result);
+
+	printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n",
+		result / 1000 / 1000, result / 1000 % 1000);
+
+	/* Calculate the scaled math multiplication factor */
+	lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC,
+				       lapic_clockevent.shift);
+	lapic_clockevent.max_delta_ns =
+		clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
+	lapic_clockevent.min_delta_ns =
+		clockevent_delta2ns(0xF, &lapic_clockevent);
+
+	calibration_result = (result * APIC_DIVISOR) / HZ;
+
+	/*
+	 * Do a sanity check on the APIC calibration result
+	 */
+	if (calibration_result < (1000000 / HZ)) {
+		printk(KERN_WARNING
+			"APIC frequency too slow, disabling apic timer\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+#else
 /*
  * In this functions we calibrate APIC bus clocks to the external timer.
  *
@@ -635,6 +719,8 @@ static int __init calibrate_APIC_clock(void)
 	return 0;
 }
 
+#endif
+
 /*
  * Setup the boot APIC
  *
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index c728885e4f4a..6e99af5ce678 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -478,6 +478,7 @@ static void __cpuinit setup_APIC_timer(void)
 	clockevents_register_device(levt);
 }
 
+#ifdef CONFIG_X86_64
 /*
  * In this function we calibrate APIC bus clocks to the external
  * timer. Unfortunately we cannot use jiffies and the timer irq
@@ -560,6 +561,220 @@ static int __init calibrate_APIC_clock(void)
 	return 0;
 }
 
+#else
+/*
+ * In this functions we calibrate APIC bus clocks to the external timer.
+ *
+ * We want to do the calibration only once since we want to have local timer
+ * irqs syncron. CPUs connected by the same APIC bus have the very same bus
+ * frequency.
+ *
+ * This was previously done by reading the PIT/HPET and waiting for a wrap
+ * around to find out, that a tick has elapsed. I have a box, where the PIT
+ * readout is broken, so it never gets out of the wait loop again. This was
+ * also reported by others.
+ *
+ * Monitoring the jiffies value is inaccurate and the clockevents
+ * infrastructure allows us to do a simple substitution of the interrupt
+ * handler.
+ *
+ * The calibration routine also uses the pm_timer when possible, as the PIT
+ * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes
+ * back to normal later in the boot process).
+ */
+
+#define LAPIC_CAL_LOOPS		(HZ/10)
+
+static __initdata int lapic_cal_loops = -1;
+static __initdata long lapic_cal_t1, lapic_cal_t2;
+static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2;
+static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
+static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;
+
+/*
+ * Temporary interrupt handler.
+ */
+static void __init lapic_cal_handler(struct clock_event_device *dev)
+{
+	unsigned long long tsc = 0;
+	long tapic = apic_read(APIC_TMCCT);
+	unsigned long pm = acpi_pm_read_early();
+
+	if (cpu_has_tsc)
+		rdtscll(tsc);
+
+	switch (lapic_cal_loops++) {
+	case 0:
+		lapic_cal_t1 = tapic;
+		lapic_cal_tsc1 = tsc;
+		lapic_cal_pm1 = pm;
+		lapic_cal_j1 = jiffies;
+		break;
+
+	case LAPIC_CAL_LOOPS:
+		lapic_cal_t2 = tapic;
+		lapic_cal_tsc2 = tsc;
+		if (pm < lapic_cal_pm1)
+			pm += ACPI_PM_OVRRUN;
+		lapic_cal_pm2 = pm;
+		lapic_cal_j2 = jiffies;
+		break;
+	}
+}
+
+static int __init calibrate_APIC_clock(void)
+{
+	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
+	const long pm_100ms = PMTMR_TICKS_PER_SEC/10;
+	const long pm_thresh = pm_100ms/100;
+	void (*real_handler)(struct clock_event_device *dev);
+	unsigned long deltaj;
+	long delta, deltapm;
+	int pm_referenced = 0;
+
+	local_irq_disable();
+
+	/* Replace the global interrupt handler */
+	real_handler = global_clock_event->event_handler;
+	global_clock_event->event_handler = lapic_cal_handler;
+
+	/*
+	 * Setup the APIC counter to 1e9. There is no way the lapic
+	 * can underflow in the 100ms detection time frame
+	 */
+	__setup_APIC_LVTT(1000000000, 0, 0);
+
+	/* Let the interrupts run */
+	local_irq_enable();
+
+	while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
+		cpu_relax();
+
+	local_irq_disable();
+
+	/* Restore the real event handler */
+	global_clock_event->event_handler = real_handler;
+
+	/* Build delta t1-t2 as apic timer counts down */
+	delta = lapic_cal_t1 - lapic_cal_t2;
+	apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
+
+	/* Check, if the PM timer is available */
+	deltapm = lapic_cal_pm2 - lapic_cal_pm1;
+	apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm);
+
+	if (deltapm) {
+		unsigned long mult;
+		u64 res;
+
+		mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
+
+		if (deltapm > (pm_100ms - pm_thresh) &&
+		    deltapm < (pm_100ms + pm_thresh)) {
+			apic_printk(APIC_VERBOSE, "... PM timer result ok\n");
+		} else {
+			res = (((u64) deltapm) *  mult) >> 22;
+			do_div(res, 1000000);
+			printk(KERN_WARNING "APIC calibration not consistent "
+			       "with PM Timer: %ldms instead of 100ms\n",
+			       (long)res);
+			/* Correct the lapic counter value */
+			res = (((u64) delta) * pm_100ms);
+			do_div(res, deltapm);
+			printk(KERN_INFO "APIC delta adjusted to PM-Timer: "
+			       "%lu (%ld)\n", (unsigned long) res, delta);
+			delta = (long) res;
+		}
+		pm_referenced = 1;
+	}
+
+	/* Calculate the scaled math multiplication factor */
+	lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
+				       lapic_clockevent.shift);
+	lapic_clockevent.max_delta_ns =
+		clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
+	lapic_clockevent.min_delta_ns =
+		clockevent_delta2ns(0xF, &lapic_clockevent);
+
+	calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
+
+	apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
+	apic_printk(APIC_VERBOSE, "..... mult: %ld\n", lapic_clockevent.mult);
+	apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
+		    calibration_result);
+
+	if (cpu_has_tsc) {
+		delta = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
+		apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
+			    "%ld.%04ld MHz.\n",
+			    (delta / LAPIC_CAL_LOOPS) / (1000000 / HZ),
+			    (delta / LAPIC_CAL_LOOPS) % (1000000 / HZ));
+	}
+
+	apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
+		    "%u.%04u MHz.\n",
+		    calibration_result / (1000000 / HZ),
+		    calibration_result % (1000000 / HZ));
+
+	/*
+	 * Do a sanity check on the APIC calibration result
+	 */
+	if (calibration_result < (1000000 / HZ)) {
+		local_irq_enable();
+		printk(KERN_WARNING
+		       "APIC frequency too slow, disabling apic timer\n");
+		return -1;
+	}
+
+	levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
+
+	/* We trust the pm timer based calibration */
+	if (!pm_referenced) {
+		apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
+
+		/*
+		 * Setup the apic timer manually
+		 */
+		levt->event_handler = lapic_cal_handler;
+		lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt);
+		lapic_cal_loops = -1;
+
+		/* Let the interrupts run */
+		local_irq_enable();
+
+		while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
+			cpu_relax();
+
+		local_irq_disable();
+
+		/* Stop the lapic timer */
+		lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt);
+
+		local_irq_enable();
+
+		/* Jiffies delta */
+		deltaj = lapic_cal_j2 - lapic_cal_j1;
+		apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
+
+		/* Check, if the jiffies result is consistent */
+		if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2)
+			apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
+		else
+			levt->features |= CLOCK_EVT_FEAT_DUMMY;
+	} else
+		local_irq_enable();
+
+	if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
+		printk(KERN_WARNING
+		       "APIC timer disabled due to verification failure.\n");
+			return -1;
+	}
+
+	return 0;
+}
+
+#endif
+
 /*
  * Setup the boot APIC
  *
-- 
GitLab


From 6c15822752a13748241e1a34068f2b15b660d28e Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sun, 24 Aug 2008 02:01:55 -0700
Subject: [PATCH 466/895] x86: apic copy apic_64.c to apic_32.c

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c | 188 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 188 insertions(+)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 05498c37f8d9..0b11d6e3f091 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -90,6 +90,24 @@ static __init int setup_apicpmtimer(char *s)
 __setup("apicpmtimer", setup_apicpmtimer);
 #endif
 
+#ifdef CONFIG_X86_64
+#define HAVE_X2APIC
+#endif
+
+#ifdef HAVE_X2APIC
+int x2apic;
+/* x2apic enabled before OS handover */
+int x2apic_preenabled;
+int disable_x2apic;
+static __init int setup_nox2apic(char *str)
+{
+	disable_x2apic = 1;
+	setup_clear_cpu_cap(X86_FEATURE_X2APIC);
+	return 0;
+}
+early_param("nox2apic", setup_nox2apic);
+#endif
+
 unsigned long mp_lapic_addr;
 int disable_apic;
 /* Disable local APIC timer from the kernel commandline or via dmi quirk */
@@ -231,6 +249,42 @@ static struct apic_ops xapic_ops = {
 struct apic_ops __read_mostly *apic_ops = &xapic_ops;
 EXPORT_SYMBOL_GPL(apic_ops);
 
+#ifdef HAVE_X2APIC
+static void x2apic_wait_icr_idle(void)
+{
+	/* no need to wait for icr idle in x2apic */
+	return;
+}
+
+static u32 safe_x2apic_wait_icr_idle(void)
+{
+	/* no need to wait for icr idle in x2apic */
+	return 0;
+}
+
+void x2apic_icr_write(u32 low, u32 id)
+{
+	wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
+}
+
+u64 x2apic_icr_read(void)
+{
+	unsigned long val;
+
+	rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
+	return val;
+}
+
+static struct apic_ops x2apic_ops = {
+	.read = native_apic_msr_read,
+	.write = native_apic_msr_write,
+	.icr_read = x2apic_icr_read,
+	.icr_write = x2apic_icr_write,
+	.wait_icr_idle = x2apic_wait_icr_idle,
+	.safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
+};
+#endif
+
 /**
  * enable_NMI_through_LVT0 - enable NMI through local vector table 0
  */
@@ -1308,6 +1362,127 @@ void __cpuinit end_local_APIC_setup(void)
 	apic_pm_activate();
 }
 
+#ifdef HAVE_X2APIC
+void check_x2apic(void)
+{
+	int msr, msr2;
+
+	rdmsr(MSR_IA32_APICBASE, msr, msr2);
+
+	if (msr & X2APIC_ENABLE) {
+		printk("x2apic enabled by BIOS, switching to x2apic ops\n");
+		x2apic_preenabled = x2apic = 1;
+		apic_ops = &x2apic_ops;
+	}
+}
+
+void enable_x2apic(void)
+{
+	int msr, msr2;
+
+	rdmsr(MSR_IA32_APICBASE, msr, msr2);
+	if (!(msr & X2APIC_ENABLE)) {
+		printk("Enabling x2apic\n");
+		wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
+	}
+}
+
+void enable_IR_x2apic(void)
+{
+#ifdef CONFIG_INTR_REMAP
+	int ret;
+	unsigned long flags;
+
+	if (!cpu_has_x2apic)
+		return;
+
+	if (!x2apic_preenabled && disable_x2apic) {
+		printk(KERN_INFO
+		       "Skipped enabling x2apic and Interrupt-remapping "
+		       "because of nox2apic\n");
+		return;
+	}
+
+	if (x2apic_preenabled && disable_x2apic)
+		panic("Bios already enabled x2apic, can't enforce nox2apic");
+
+	if (!x2apic_preenabled && skip_ioapic_setup) {
+		printk(KERN_INFO
+		       "Skipped enabling x2apic and Interrupt-remapping "
+		       "because of skipping io-apic setup\n");
+		return;
+	}
+
+	ret = dmar_table_init();
+	if (ret) {
+		printk(KERN_INFO
+		       "dmar_table_init() failed with %d:\n", ret);
+
+		if (x2apic_preenabled)
+			panic("x2apic enabled by bios. But IR enabling failed");
+		else
+			printk(KERN_INFO
+			       "Not enabling x2apic,Intr-remapping\n");
+		return;
+	}
+
+	local_irq_save(flags);
+	mask_8259A();
+	save_mask_IO_APIC_setup();
+
+	ret = enable_intr_remapping(1);
+
+	if (ret && x2apic_preenabled) {
+		local_irq_restore(flags);
+		panic("x2apic enabled by bios. But IR enabling failed");
+	}
+
+	if (ret)
+		goto end;
+
+	if (!x2apic) {
+		x2apic = 1;
+		apic_ops = &x2apic_ops;
+		enable_x2apic();
+	}
+end:
+	if (ret)
+		/*
+		 * IR enabling failed
+		 */
+		restore_IO_APIC_setup();
+	else
+		reinit_intr_remapped_IO_APIC(x2apic_preenabled);
+
+	unmask_8259A();
+	local_irq_restore(flags);
+
+	if (!ret) {
+		if (!x2apic_preenabled)
+			printk(KERN_INFO
+			       "Enabled x2apic and interrupt-remapping\n");
+		else
+			printk(KERN_INFO
+			       "Enabled Interrupt-remapping\n");
+	} else
+		printk(KERN_ERR
+		       "Failed to enable Interrupt-remapping and x2apic\n");
+#else
+	if (!cpu_has_x2apic)
+		return;
+
+	if (x2apic_preenabled)
+		panic("x2apic enabled prior OS handover,"
+		      " enable CONFIG_INTR_REMAP");
+
+	printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
+	       " and x2apic\n");
+#endif
+
+	return;
+}
+#endif /* HAVE_X2APIC */
+
 #ifdef CONFIG_X86_64
 /*
  * Detect and enable local APICs on non-SMP boards.
@@ -1438,6 +1613,13 @@ void __init early_init_lapic_mapping(void)
  */
 void __init init_apic_mappings(void)
 {
+#ifdef HAVE_X2APIC
+	if (x2apic) {
+		boot_cpu_physical_apicid = read_apic_id();
+		return;
+	}
+#endif
+
 	/*
 	 * If no local APIC can be found then set up a fake all
 	 * zeroes page to simulate the local APIC and another
@@ -1501,6 +1683,7 @@ int __init APIC_init_uniprocessor(void)
 #ifdef CONFIG_X86_64
 	setup_apic_routing();
 #endif
+
 	verify_local_APIC();
 	connect_bsp_APIC();
 
@@ -1879,6 +2062,11 @@ static int lapic_resume(struct sys_device *dev)
 
 	local_irq_save(flags);
 
+#ifdef HAVE_X2APIC
+	if (x2apic)
+		enable_x2apic();
+	else
+#endif
 	{
 		/*
 		 * Make sure the APICBASE points to the right address
-- 
GitLab


From 0f611ffaea81b8e1c69682188ba1ccaf7683a2ba Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sun, 24 Aug 2008 02:01:56 -0700
Subject: [PATCH 467/895] x86: rename apic_32.c and apic_64.c to apic.c

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/Makefile              |    2 +-
 arch/x86/kernel/{apic_64.c => apic.c} |    0
 arch/x86/kernel/apic_32.c             | 2312 -------------------------
 3 files changed, 1 insertion(+), 2313 deletions(-)
 rename arch/x86/kernel/{apic_64.c => apic.c} (100%)
 delete mode 100644 arch/x86/kernel/apic_32.c

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 7264f9c3af8f..45cecc59d894 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -60,7 +60,7 @@ obj-$(CONFIG_X86_32_SMP)	+= smpcommon.o
 obj-$(CONFIG_X86_64_SMP)	+= tsc_sync.o smpcommon.o
 obj-$(CONFIG_X86_TRAMPOLINE)	+= trampoline_$(BITS).o
 obj-$(CONFIG_X86_MPPARSE)	+= mpparse.o
-obj-$(CONFIG_X86_LOCAL_APIC)	+= apic_$(BITS).o nmi.o
+obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o nmi.o
 obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o
 obj-$(CONFIG_X86_REBOOTFIXUPS)	+= reboot_fixups_32.o
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic.c
similarity index 100%
rename from arch/x86/kernel/apic_64.c
rename to arch/x86/kernel/apic.c
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
deleted file mode 100644
index 0b11d6e3f091..000000000000
--- a/arch/x86/kernel/apic_32.c
+++ /dev/null
@@ -1,2312 +0,0 @@
-/*
- *	Local APIC handling, local APIC timers
- *
- *	(c) 1999, 2000 Ingo Molnar <mingo@redhat.com>
- *
- *	Fixes
- *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs;
- *					thanks to Eric Gilmore
- *					and Rolf G. Tews
- *					for testing these extensively.
- *	Maciej W. Rozycki	:	Various updates and fixes.
- *	Mikael Pettersson	:	Power Management for UP-APIC.
- *	Pavel Machek and
- *	Mikael Pettersson	:	PM converted to driver model.
- */
-
-#include <linux/init.h>
-
-#include <linux/mm.h>
-#include <linux/delay.h>
-#include <linux/bootmem.h>
-#include <linux/interrupt.h>
-#include <linux/mc146818rtc.h>
-#include <linux/kernel_stat.h>
-#include <linux/sysdev.h>
-#include <linux/ioport.h>
-#include <linux/cpu.h>
-#include <linux/clockchips.h>
-#include <linux/acpi_pmtmr.h>
-#include <linux/module.h>
-#include <linux/dmi.h>
-#include <linux/dmar.h>
-
-#include <asm/atomic.h>
-#include <asm/smp.h>
-#include <asm/mtrr.h>
-#include <asm/mpspec.h>
-#include <asm/desc.h>
-#include <asm/arch_hooks.h>
-#include <asm/hpet.h>
-#include <asm/pgalloc.h>
-#include <asm/i8253.h>
-#include <asm/nmi.h>
-#include <asm/idle.h>
-#include <asm/proto.h>
-#include <asm/timex.h>
-#include <asm/apic.h>
-#include <asm/i8259.h>
-
-#include <mach_apic.h>
-#include <mach_apicdef.h>
-#include <mach_ipi.h>
-
-/*
- * Sanity check
- */
-#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F)
-# error SPURIOUS_APIC_VECTOR definition error
-#endif
-
-#ifdef CONFIG_X86_32
-/*
- * Knob to control our willingness to enable the local APIC.
- *
- * +1=force-enable
- */
-static int force_enable_local_apic;
-/*
- * APIC command line parameters
- */
-static int __init parse_lapic(char *arg)
-{
-	force_enable_local_apic = 1;
-	return 0;
-}
-early_param("lapic", parse_lapic);
-/* Local APIC was disabled by the BIOS and enabled by the kernel */
-static int enabled_via_apicbase;
-
-#endif
-
-#ifdef CONFIG_X86_64
-static int apic_calibrate_pmtmr __initdata;
-static __init int setup_apicpmtimer(char *s)
-{
-	apic_calibrate_pmtmr = 1;
-	notsc_setup(NULL);
-	return 0;
-}
-__setup("apicpmtimer", setup_apicpmtimer);
-#endif
-
-#ifdef CONFIG_X86_64
-#define HAVE_X2APIC
-#endif
-
-#ifdef HAVE_X2APIC
-int x2apic;
-/* x2apic enabled before OS handover */
-int x2apic_preenabled;
-int disable_x2apic;
-static __init int setup_nox2apic(char *str)
-{
-	disable_x2apic = 1;
-	setup_clear_cpu_cap(X86_FEATURE_X2APIC);
-	return 0;
-}
-early_param("nox2apic", setup_nox2apic);
-#endif
-
-unsigned long mp_lapic_addr;
-int disable_apic;
-/* Disable local APIC timer from the kernel commandline or via dmi quirk */
-static int disable_apic_timer __cpuinitdata;
-/* Local APIC timer works in C2 */
-int local_apic_timer_c2_ok;
-EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
-
-int first_system_vector = 0xfe;
-
-char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
-
-/*
- * Debug level, exported for io_apic.c
- */
-unsigned int apic_verbosity;
-
-int pic_mode;
-
-/* Have we found an MP table */
-int smp_found_config;
-
-static struct resource lapic_resource = {
-	.name = "Local APIC",
-	.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
-};
-
-static unsigned int calibration_result;
-
-static int lapic_next_event(unsigned long delta,
-			    struct clock_event_device *evt);
-static void lapic_timer_setup(enum clock_event_mode mode,
-			      struct clock_event_device *evt);
-static void lapic_timer_broadcast(cpumask_t mask);
-static void apic_pm_activate(void);
-
-/*
- * The local apic timer can be used for any function which is CPU local.
- */
-static struct clock_event_device lapic_clockevent = {
-	.name		= "lapic",
-	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
-			| CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY,
-	.shift		= 32,
-	.set_mode	= lapic_timer_setup,
-	.set_next_event	= lapic_next_event,
-	.broadcast	= lapic_timer_broadcast,
-	.rating		= 100,
-	.irq		= -1,
-};
-static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
-
-static unsigned long apic_phys;
-
-/*
- * Get the LAPIC version
- */
-static inline int lapic_get_version(void)
-{
-	return GET_APIC_VERSION(apic_read(APIC_LVR));
-}
-
-/*
- * Check, if the APIC is integrated or a separate chip
- */
-static inline int lapic_is_integrated(void)
-{
-#ifdef CONFIG_X86_64
-	return 1;
-#else
-	return APIC_INTEGRATED(lapic_get_version());
-#endif
-}
-
-/*
- * Check, whether this is a modern or a first generation APIC
- */
-static int modern_apic(void)
-{
-	/* AMD systems use old APIC versions, so check the CPU */
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
-	    boot_cpu_data.x86 >= 0xf)
-		return 1;
-	return lapic_get_version() >= 0x14;
-}
-
-/*
- * Paravirt kernels also might be using these below ops. So we still
- * use generic apic_read()/apic_write(), which might be pointing to different
- * ops in PARAVIRT case.
- */
-void xapic_wait_icr_idle(void)
-{
-	while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
-		cpu_relax();
-}
-
-u32 safe_xapic_wait_icr_idle(void)
-{
-	u32 send_status;
-	int timeout;
-
-	timeout = 0;
-	do {
-		send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
-		if (!send_status)
-			break;
-		udelay(100);
-	} while (timeout++ < 1000);
-
-	return send_status;
-}
-
-void xapic_icr_write(u32 low, u32 id)
-{
-	apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
-	apic_write(APIC_ICR, low);
-}
-
-u64 xapic_icr_read(void)
-{
-	u32 icr1, icr2;
-
-	icr2 = apic_read(APIC_ICR2);
-	icr1 = apic_read(APIC_ICR);
-
-	return icr1 | ((u64)icr2 << 32);
-}
-
-static struct apic_ops xapic_ops = {
-	.read = native_apic_mem_read,
-	.write = native_apic_mem_write,
-	.icr_read = xapic_icr_read,
-	.icr_write = xapic_icr_write,
-	.wait_icr_idle = xapic_wait_icr_idle,
-	.safe_wait_icr_idle = safe_xapic_wait_icr_idle,
-};
-
-struct apic_ops __read_mostly *apic_ops = &xapic_ops;
-EXPORT_SYMBOL_GPL(apic_ops);
-
-#ifdef HAVE_X2APIC
-static void x2apic_wait_icr_idle(void)
-{
-	/* no need to wait for icr idle in x2apic */
-	return;
-}
-
-static u32 safe_x2apic_wait_icr_idle(void)
-{
-	/* no need to wait for icr idle in x2apic */
-	return 0;
-}
-
-void x2apic_icr_write(u32 low, u32 id)
-{
-	wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
-}
-
-u64 x2apic_icr_read(void)
-{
-	unsigned long val;
-
-	rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
-	return val;
-}
-
-static struct apic_ops x2apic_ops = {
-	.read = native_apic_msr_read,
-	.write = native_apic_msr_write,
-	.icr_read = x2apic_icr_read,
-	.icr_write = x2apic_icr_write,
-	.wait_icr_idle = x2apic_wait_icr_idle,
-	.safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
-};
-#endif
-
-/**
- * enable_NMI_through_LVT0 - enable NMI through local vector table 0
- */
-void __cpuinit enable_NMI_through_LVT0(void)
-{
-	unsigned int v;
-
-	/* unmask and set to NMI */
-	v = APIC_DM_NMI;
-
-	/* Level triggered for 82489DX (32bit mode) */
-	if (!lapic_is_integrated())
-		v |= APIC_LVT_LEVEL_TRIGGER;
-
-	apic_write(APIC_LVT0, v);
-}
-
-#ifdef CONFIG_X86_32
-/**
- * get_physical_broadcast - Get number of physical broadcast IDs
- */
-int get_physical_broadcast(void)
-{
-	return modern_apic() ? 0xff : 0xf;
-}
-#endif
-
-/**
- * lapic_get_maxlvt - get the maximum number of local vector table entries
- */
-int lapic_get_maxlvt(void)
-{
-	unsigned int v;
-
-	v = apic_read(APIC_LVR);
-	/*
-	 * - we always have APIC integrated on 64bit mode
-	 * - 82489DXs do not report # of LVT entries
-	 */
-	return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
-}
-
-/*
- * Local APIC timer
- */
-
-/* Clock divisor */
-#ifdef CONFG_X86_64
-#define APIC_DIVISOR 1
-#else
-#define APIC_DIVISOR 16
-#endif
-
-/*
- * This function sets up the local APIC timer, with a timeout of
- * 'clocks' APIC bus clock. During calibration we actually call
- * this function twice on the boot CPU, once with a bogus timeout
- * value, second time for real. The other (noncalibrating) CPUs
- * call this function only once, with the real, calibrated value.
- *
- * We do reads before writes even if unnecessary, to get around the
- * P5 APIC double write bug.
- */
-static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
-{
-	unsigned int lvtt_value, tmp_value;
-
-	lvtt_value = LOCAL_TIMER_VECTOR;
-	if (!oneshot)
-		lvtt_value |= APIC_LVT_TIMER_PERIODIC;
-	if (!lapic_is_integrated())
-		lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
-
-	if (!irqen)
-		lvtt_value |= APIC_LVT_MASKED;
-
-	apic_write(APIC_LVTT, lvtt_value);
-
-	/*
-	 * Divide PICLK by 16
-	 */
-	tmp_value = apic_read(APIC_TDCR);
-	apic_write(APIC_TDCR,
-		(tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
-		APIC_TDR_DIV_16);
-
-	if (!oneshot)
-		apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
-}
-
-/*
- * Setup extended LVT, AMD specific (K8, family 10h)
- *
- * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
- * MCE interrupts are supported. Thus MCE offset must be set to 0.
- *
- * If mask=1, the LVT entry does not generate interrupts while mask=0
- * enables the vector. See also the BKDGs.
- */
-
-#define APIC_EILVT_LVTOFF_MCE 0
-#define APIC_EILVT_LVTOFF_IBS 1
-
-static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
-{
-	unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
-	unsigned int  v   = (mask << 16) | (msg_type << 8) | vector;
-
-	apic_write(reg, v);
-}
-
-u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask)
-{
-	setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask);
-	return APIC_EILVT_LVTOFF_MCE;
-}
-
-u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
-{
-	setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
-	return APIC_EILVT_LVTOFF_IBS;
-}
-EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs);
-
-/*
- * Program the next event, relative to now
- */
-static int lapic_next_event(unsigned long delta,
-			    struct clock_event_device *evt)
-{
-	apic_write(APIC_TMICT, delta);
-	return 0;
-}
-
-/*
- * Setup the lapic timer in periodic or oneshot mode
- */
-static void lapic_timer_setup(enum clock_event_mode mode,
-			      struct clock_event_device *evt)
-{
-	unsigned long flags;
-	unsigned int v;
-
-	/* Lapic used as dummy for broadcast ? */
-	if (evt->features & CLOCK_EVT_FEAT_DUMMY)
-		return;
-
-	local_irq_save(flags);
-
-	switch (mode) {
-	case CLOCK_EVT_MODE_PERIODIC:
-	case CLOCK_EVT_MODE_ONESHOT:
-		__setup_APIC_LVTT(calibration_result,
-				  mode != CLOCK_EVT_MODE_PERIODIC, 1);
-		break;
-	case CLOCK_EVT_MODE_UNUSED:
-	case CLOCK_EVT_MODE_SHUTDOWN:
-		v = apic_read(APIC_LVTT);
-		v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
-		apic_write(APIC_LVTT, v);
-		break;
-	case CLOCK_EVT_MODE_RESUME:
-		/* Nothing to do here */
-		break;
-	}
-
-	local_irq_restore(flags);
-}
-
-/*
- * Local APIC timer broadcast function
- */
-static void lapic_timer_broadcast(cpumask_t mask)
-{
-#ifdef CONFIG_SMP
-	send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
-#endif
-}
-
-/*
- * Setup the local APIC timer for this CPU. Copy the initilized values
- * of the boot CPU and register the clock event in the framework.
- */
-static void __cpuinit setup_APIC_timer(void)
-{
-	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
-
-	memcpy(levt, &lapic_clockevent, sizeof(*levt));
-	levt->cpumask = cpumask_of_cpu(smp_processor_id());
-
-	clockevents_register_device(levt);
-}
-
-#ifdef CONFIG_X86_64
-/*
- * In this function we calibrate APIC bus clocks to the external
- * timer. Unfortunately we cannot use jiffies and the timer irq
- * to calibrate, since some later bootup code depends on getting
- * the first irq? Ugh.
- *
- * We want to do the calibration only once since we
- * want to have local timer irqs syncron. CPUs connected
- * by the same APIC bus have the very same bus frequency.
- * And we want to have irqs off anyways, no accidental
- * APIC irq that way.
- */
-
-#define TICK_COUNT 100000000
-
-static int __init calibrate_APIC_clock(void)
-{
-	unsigned apic, apic_start;
-	unsigned long tsc, tsc_start;
-	int result;
-
-	local_irq_disable();
-
-	/*
-	 * Put whatever arbitrary (but long enough) timeout
-	 * value into the APIC clock, we just want to get the
-	 * counter running for calibration.
-	 *
-	 * No interrupt enable !
-	 */
-	__setup_APIC_LVTT(250000000, 0, 0);
-
-	apic_start = apic_read(APIC_TMCCT);
-#ifdef CONFIG_X86_PM_TIMER
-	if (apic_calibrate_pmtmr && pmtmr_ioport) {
-		pmtimer_wait(5000);  /* 5ms wait */
-		apic = apic_read(APIC_TMCCT);
-		result = (apic_start - apic) * 1000L / 5;
-	} else
-#endif
-	{
-		rdtscll(tsc_start);
-
-		do {
-			apic = apic_read(APIC_TMCCT);
-			rdtscll(tsc);
-		} while ((tsc - tsc_start) < TICK_COUNT &&
-				(apic_start - apic) < TICK_COUNT);
-
-		result = (apic_start - apic) * 1000L * tsc_khz /
-					(tsc - tsc_start);
-	}
-
-	local_irq_enable();
-
-	printk(KERN_DEBUG "APIC timer calibration result %d\n", result);
-
-	printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n",
-		result / 1000 / 1000, result / 1000 % 1000);
-
-	/* Calculate the scaled math multiplication factor */
-	lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC,
-				       lapic_clockevent.shift);
-	lapic_clockevent.max_delta_ns =
-		clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
-	lapic_clockevent.min_delta_ns =
-		clockevent_delta2ns(0xF, &lapic_clockevent);
-
-	calibration_result = (result * APIC_DIVISOR) / HZ;
-
-	/*
-	 * Do a sanity check on the APIC calibration result
-	 */
-	if (calibration_result < (1000000 / HZ)) {
-		printk(KERN_WARNING
-			"APIC frequency too slow, disabling apic timer\n");
-		return -1;
-	}
-
-	return 0;
-}
-
-#else
-/*
- * In this functions we calibrate APIC bus clocks to the external timer.
- *
- * We want to do the calibration only once since we want to have local timer
- * irqs syncron. CPUs connected by the same APIC bus have the very same bus
- * frequency.
- *
- * This was previously done by reading the PIT/HPET and waiting for a wrap
- * around to find out, that a tick has elapsed. I have a box, where the PIT
- * readout is broken, so it never gets out of the wait loop again. This was
- * also reported by others.
- *
- * Monitoring the jiffies value is inaccurate and the clockevents
- * infrastructure allows us to do a simple substitution of the interrupt
- * handler.
- *
- * The calibration routine also uses the pm_timer when possible, as the PIT
- * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes
- * back to normal later in the boot process).
- */
-
-#define LAPIC_CAL_LOOPS		(HZ/10)
-
-static __initdata int lapic_cal_loops = -1;
-static __initdata long lapic_cal_t1, lapic_cal_t2;
-static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2;
-static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
-static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;
-
-/*
- * Temporary interrupt handler.
- */
-static void __init lapic_cal_handler(struct clock_event_device *dev)
-{
-	unsigned long long tsc = 0;
-	long tapic = apic_read(APIC_TMCCT);
-	unsigned long pm = acpi_pm_read_early();
-
-	if (cpu_has_tsc)
-		rdtscll(tsc);
-
-	switch (lapic_cal_loops++) {
-	case 0:
-		lapic_cal_t1 = tapic;
-		lapic_cal_tsc1 = tsc;
-		lapic_cal_pm1 = pm;
-		lapic_cal_j1 = jiffies;
-		break;
-
-	case LAPIC_CAL_LOOPS:
-		lapic_cal_t2 = tapic;
-		lapic_cal_tsc2 = tsc;
-		if (pm < lapic_cal_pm1)
-			pm += ACPI_PM_OVRRUN;
-		lapic_cal_pm2 = pm;
-		lapic_cal_j2 = jiffies;
-		break;
-	}
-}
-
-static int __init calibrate_APIC_clock(void)
-{
-	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
-	const long pm_100ms = PMTMR_TICKS_PER_SEC/10;
-	const long pm_thresh = pm_100ms/100;
-	void (*real_handler)(struct clock_event_device *dev);
-	unsigned long deltaj;
-	long delta, deltapm;
-	int pm_referenced = 0;
-
-	local_irq_disable();
-
-	/* Replace the global interrupt handler */
-	real_handler = global_clock_event->event_handler;
-	global_clock_event->event_handler = lapic_cal_handler;
-
-	/*
-	 * Setup the APIC counter to 1e9. There is no way the lapic
-	 * can underflow in the 100ms detection time frame
-	 */
-	__setup_APIC_LVTT(1000000000, 0, 0);
-
-	/* Let the interrupts run */
-	local_irq_enable();
-
-	while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
-		cpu_relax();
-
-	local_irq_disable();
-
-	/* Restore the real event handler */
-	global_clock_event->event_handler = real_handler;
-
-	/* Build delta t1-t2 as apic timer counts down */
-	delta = lapic_cal_t1 - lapic_cal_t2;
-	apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
-
-	/* Check, if the PM timer is available */
-	deltapm = lapic_cal_pm2 - lapic_cal_pm1;
-	apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm);
-
-	if (deltapm) {
-		unsigned long mult;
-		u64 res;
-
-		mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
-
-		if (deltapm > (pm_100ms - pm_thresh) &&
-		    deltapm < (pm_100ms + pm_thresh)) {
-			apic_printk(APIC_VERBOSE, "... PM timer result ok\n");
-		} else {
-			res = (((u64) deltapm) *  mult) >> 22;
-			do_div(res, 1000000);
-			printk(KERN_WARNING "APIC calibration not consistent "
-			       "with PM Timer: %ldms instead of 100ms\n",
-			       (long)res);
-			/* Correct the lapic counter value */
-			res = (((u64) delta) * pm_100ms);
-			do_div(res, deltapm);
-			printk(KERN_INFO "APIC delta adjusted to PM-Timer: "
-			       "%lu (%ld)\n", (unsigned long) res, delta);
-			delta = (long) res;
-		}
-		pm_referenced = 1;
-	}
-
-	/* Calculate the scaled math multiplication factor */
-	lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
-				       lapic_clockevent.shift);
-	lapic_clockevent.max_delta_ns =
-		clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
-	lapic_clockevent.min_delta_ns =
-		clockevent_delta2ns(0xF, &lapic_clockevent);
-
-	calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
-
-	apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
-	apic_printk(APIC_VERBOSE, "..... mult: %ld\n", lapic_clockevent.mult);
-	apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
-		    calibration_result);
-
-	if (cpu_has_tsc) {
-		delta = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
-		apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
-			    "%ld.%04ld MHz.\n",
-			    (delta / LAPIC_CAL_LOOPS) / (1000000 / HZ),
-			    (delta / LAPIC_CAL_LOOPS) % (1000000 / HZ));
-	}
-
-	apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
-		    "%u.%04u MHz.\n",
-		    calibration_result / (1000000 / HZ),
-		    calibration_result % (1000000 / HZ));
-
-	/*
-	 * Do a sanity check on the APIC calibration result
-	 */
-	if (calibration_result < (1000000 / HZ)) {
-		local_irq_enable();
-		printk(KERN_WARNING
-		       "APIC frequency too slow, disabling apic timer\n");
-		return -1;
-	}
-
-	levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
-
-	/* We trust the pm timer based calibration */
-	if (!pm_referenced) {
-		apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
-
-		/*
-		 * Setup the apic timer manually
-		 */
-		levt->event_handler = lapic_cal_handler;
-		lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt);
-		lapic_cal_loops = -1;
-
-		/* Let the interrupts run */
-		local_irq_enable();
-
-		while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
-			cpu_relax();
-
-		local_irq_disable();
-
-		/* Stop the lapic timer */
-		lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt);
-
-		local_irq_enable();
-
-		/* Jiffies delta */
-		deltaj = lapic_cal_j2 - lapic_cal_j1;
-		apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
-
-		/* Check, if the jiffies result is consistent */
-		if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2)
-			apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
-		else
-			levt->features |= CLOCK_EVT_FEAT_DUMMY;
-	} else
-		local_irq_enable();
-
-	if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
-		printk(KERN_WARNING
-		       "APIC timer disabled due to verification failure.\n");
-			return -1;
-	}
-
-	return 0;
-}
-
-#endif
-
-/*
- * Setup the boot APIC
- *
- * Calibrate and verify the result.
- */
-void __init setup_boot_APIC_clock(void)
-{
-	/*
-	 * The local apic timer can be disabled via the kernel
-	 * commandline or from the CPU detection code. Register the lapic
-	 * timer as a dummy clock event source on SMP systems, so the
-	 * broadcast mechanism is used. On UP systems simply ignore it.
-	 */
-	if (disable_apic_timer) {
-		printk(KERN_INFO "Disabling APIC timer\n");
-		/* No broadcast on UP ! */
-		if (num_possible_cpus() > 1) {
-			lapic_clockevent.mult = 1;
-			setup_APIC_timer();
-		}
-		return;
-	}
-
-	apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
-		    "calibrating APIC timer ...\n");
-
-	if (calibrate_APIC_clock()) {
-		/* No broadcast on UP ! */
-		if (num_possible_cpus() > 1)
-			setup_APIC_timer();
-		return;
-	}
-
-	/*
-	 * If nmi_watchdog is set to IO_APIC, we need the
-	 * PIT/HPET going.  Otherwise register lapic as a dummy
-	 * device.
-	 */
-	if (nmi_watchdog != NMI_IO_APIC)
-		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
-	else
-		printk(KERN_WARNING "APIC timer registered as dummy,"
-			" due to nmi_watchdog=%d!\n", nmi_watchdog);
-
-	/* Setup the lapic or request the broadcast */
-	setup_APIC_timer();
-}
-
-void __cpuinit setup_secondary_APIC_clock(void)
-{
-	setup_APIC_timer();
-}
-
-/*
- * The guts of the apic timer interrupt
- */
-static void local_apic_timer_interrupt(void)
-{
-	int cpu = smp_processor_id();
-	struct clock_event_device *evt = &per_cpu(lapic_events, cpu);
-
-	/*
-	 * Normally we should not be here till LAPIC has been initialized but
-	 * in some cases like kdump, its possible that there is a pending LAPIC
-	 * timer interrupt from previous kernel's context and is delivered in
-	 * new kernel the moment interrupts are enabled.
-	 *
-	 * Interrupts are enabled early and LAPIC is setup much later, hence
-	 * its possible that when we get here evt->event_handler is NULL.
-	 * Check for event_handler being NULL and discard the interrupt as
-	 * spurious.
-	 */
-	if (!evt->event_handler) {
-		printk(KERN_WARNING
-		       "Spurious LAPIC timer interrupt on cpu %d\n", cpu);
-		/* Switch it off */
-		lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
-		return;
-	}
-
-	/*
-	 * the NMI deadlock-detector uses this.
-	 */
-#ifdef CONFIG_X86_64
-	add_pda(apic_timer_irqs, 1);
-#else
-	per_cpu(irq_stat, cpu).apic_timer_irqs++;
-#endif
-
-	evt->event_handler(evt);
-}
-
-/*
- * Local APIC timer interrupt. This is the most natural way for doing
- * local interrupts, but local timer interrupts can be emulated by
- * broadcast interrupts too. [in case the hw doesn't support APIC timers]
- *
- * [ if a single-CPU system runs an SMP kernel then we call the local
- *   interrupt as well. Thus we cannot inline the local irq ... ]
- */
-void smp_apic_timer_interrupt(struct pt_regs *regs)
-{
-	struct pt_regs *old_regs = set_irq_regs(regs);
-
-	/*
-	 * NOTE! We'd better ACK the irq immediately,
-	 * because timer handling can be slow.
-	 */
-	ack_APIC_irq();
-	/*
-	 * update_process_times() expects us to have done irq_enter().
-	 * Besides, if we don't timer interrupts ignore the global
-	 * interrupt lock, which is the WrongThing (tm) to do.
-	 */
-#ifdef CONFIG_X86_64
-	exit_idle();
-#endif
-	irq_enter();
-	local_apic_timer_interrupt();
-	irq_exit();
-
-	set_irq_regs(old_regs);
-}
-
-int setup_profiling_timer(unsigned int multiplier)
-{
-	return -EINVAL;
-}
-
-/*
- * Local APIC start and shutdown
- */
-
-/**
- * clear_local_APIC - shutdown the local APIC
- *
- * This is called, when a CPU is disabled and before rebooting, so the state of
- * the local APIC has no dangling leftovers. Also used to cleanout any BIOS
- * leftovers during boot.
- */
-void clear_local_APIC(void)
-{
-	int maxlvt;
-	u32 v;
-
-	/* APIC hasn't been mapped yet */
-	if (!apic_phys)
-		return;
-
-	maxlvt = lapic_get_maxlvt();
-	/*
-	 * Masking an LVT entry can trigger a local APIC error
-	 * if the vector is zero. Mask LVTERR first to prevent this.
-	 */
-	if (maxlvt >= 3) {
-		v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
-		apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
-	}
-	/*
-	 * Careful: we have to set masks only first to deassert
-	 * any level-triggered sources.
-	 */
-	v = apic_read(APIC_LVTT);
-	apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
-	v = apic_read(APIC_LVT0);
-	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
-	v = apic_read(APIC_LVT1);
-	apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
-	if (maxlvt >= 4) {
-		v = apic_read(APIC_LVTPC);
-		apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
-	}
-
-	/* lets not touch this if we didn't frob it */
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL)
-	if (maxlvt >= 5) {
-		v = apic_read(APIC_LVTTHMR);
-		apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
-	}
-#endif
-	/*
-	 * Clean APIC state for other OSs:
-	 */
-	apic_write(APIC_LVTT, APIC_LVT_MASKED);
-	apic_write(APIC_LVT0, APIC_LVT_MASKED);
-	apic_write(APIC_LVT1, APIC_LVT_MASKED);
-	if (maxlvt >= 3)
-		apic_write(APIC_LVTERR, APIC_LVT_MASKED);
-	if (maxlvt >= 4)
-		apic_write(APIC_LVTPC, APIC_LVT_MASKED);
-
-	/* Integrated APIC (!82489DX) ? */
-	if (lapic_is_integrated()) {
-		if (maxlvt > 3)
-			/* Clear ESR due to Pentium errata 3AP and 11AP */
-			apic_write(APIC_ESR, 0);
-		apic_read(APIC_ESR);
-	}
-}
-
-/**
- * disable_local_APIC - clear and disable the local APIC
- */
-void disable_local_APIC(void)
-{
-	unsigned int value;
-
-	clear_local_APIC();
-
-	/*
-	 * Disable APIC (implies clearing of registers
-	 * for 82489DX!).
-	 */
-	value = apic_read(APIC_SPIV);
-	value &= ~APIC_SPIV_APIC_ENABLED;
-	apic_write(APIC_SPIV, value);
-
-#ifdef CONFIG_X86_32
-	/*
-	 * When LAPIC was disabled by the BIOS and enabled by the kernel,
-	 * restore the disabled state.
-	 */
-	if (enabled_via_apicbase) {
-		unsigned int l, h;
-
-		rdmsr(MSR_IA32_APICBASE, l, h);
-		l &= ~MSR_IA32_APICBASE_ENABLE;
-		wrmsr(MSR_IA32_APICBASE, l, h);
-	}
-#endif
-}
-
-/*
- * If Linux enabled the LAPIC against the BIOS default disable it down before
- * re-entering the BIOS on shutdown.  Otherwise the BIOS may get confused and
- * not power-off.  Additionally clear all LVT entries before disable_local_APIC
- * for the case where Linux didn't enable the LAPIC.
- */
-void lapic_shutdown(void)
-{
-	unsigned long flags;
-
-	if (!cpu_has_apic)
-		return;
-
-	local_irq_save(flags);
-
-#ifdef CONFIG_X86_32
-	if (!enabled_via_apicbase)
-		clear_local_APIC();
-	else
-#endif
-		disable_local_APIC();
-
-
-	local_irq_restore(flags);
-}
-
-/*
- * This is to verify that we're looking at a real local APIC.
- * Check these against your board if the CPUs aren't getting
- * started for no apparent reason.
- */
-int __init verify_local_APIC(void)
-{
-	unsigned int reg0, reg1;
-
-	/*
-	 * The version register is read-only in a real APIC.
-	 */
-	reg0 = apic_read(APIC_LVR);
-	apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
-	apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
-	reg1 = apic_read(APIC_LVR);
-	apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
-
-	/*
-	 * The two version reads above should print the same
-	 * numbers.  If the second one is different, then we
-	 * poke at a non-APIC.
-	 */
-	if (reg1 != reg0)
-		return 0;
-
-	/*
-	 * Check if the version looks reasonably.
-	 */
-	reg1 = GET_APIC_VERSION(reg0);
-	if (reg1 == 0x00 || reg1 == 0xff)
-		return 0;
-	reg1 = lapic_get_maxlvt();
-	if (reg1 < 0x02 || reg1 == 0xff)
-		return 0;
-
-	/*
-	 * The ID register is read/write in a real APIC.
-	 */
-	reg0 = apic_read(APIC_ID);
-	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
-	apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
-	reg1 = apic_read(APIC_ID);
-	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
-	apic_write(APIC_ID, reg0);
-	if (reg1 != (reg0 ^ APIC_ID_MASK))
-		return 0;
-
-	/*
-	 * The next two are just to see if we have sane values.
-	 * They're only really relevant if we're in Virtual Wire
-	 * compatibility mode, but most boxes are anymore.
-	 */
-	reg0 = apic_read(APIC_LVT0);
-	apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
-	reg1 = apic_read(APIC_LVT1);
-	apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
-
-	return 1;
-}
-
-/**
- * sync_Arb_IDs - synchronize APIC bus arbitration IDs
- */
-void __init sync_Arb_IDs(void)
-{
-	/*
-	 * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
-	 * needed on AMD.
-	 */
-	if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
-		return;
-
-	/*
-	 * Wait for idle.
-	 */
-	apic_wait_icr_idle();
-
-	apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
-	apic_write(APIC_ICR, APIC_DEST_ALLINC |
-			APIC_INT_LEVELTRIG | APIC_DM_INIT);
-}
-
-/*
- * An initial setup of the virtual wire mode.
- */
-void __init init_bsp_APIC(void)
-{
-	unsigned int value;
-
-	/*
-	 * Don't do the setup now if we have a SMP BIOS as the
-	 * through-I/O-APIC virtual wire mode might be active.
-	 */
-	if (smp_found_config || !cpu_has_apic)
-		return;
-
-	/*
-	 * Do not trust the local APIC being empty at bootup.
-	 */
-	clear_local_APIC();
-
-	/*
-	 * Enable APIC.
-	 */
-	value = apic_read(APIC_SPIV);
-	value &= ~APIC_VECTOR_MASK;
-	value |= APIC_SPIV_APIC_ENABLED;
-
-#ifdef CONFIG_X86_32
-	/* This bit is reserved on P4/Xeon and should be cleared */
-	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
-	    (boot_cpu_data.x86 == 15))
-		value &= ~APIC_SPIV_FOCUS_DISABLED;
-	else
-#endif
-		value |= APIC_SPIV_FOCUS_DISABLED;
-	value |= SPURIOUS_APIC_VECTOR;
-	apic_write(APIC_SPIV, value);
-
-	/*
-	 * Set up the virtual wire mode.
-	 */
-	apic_write(APIC_LVT0, APIC_DM_EXTINT);
-	value = APIC_DM_NMI;
-	if (!lapic_is_integrated())		/* 82489DX */
-		value |= APIC_LVT_LEVEL_TRIGGER;
-	apic_write(APIC_LVT1, value);
-}
-
-static void __cpuinit lapic_setup_esr(void)
-{
-	unsigned long oldvalue, value, maxlvt;
-	if (lapic_is_integrated() && !esr_disable) {
-		if (esr_disable) {
-			/*
-			 * Something untraceable is creating bad interrupts on
-			 * secondary quads ... for the moment, just leave the
-			 * ESR disabled - we can't do anything useful with the
-			 * errors anyway - mbligh
-			 */
-			printk(KERN_INFO "Leaving ESR disabled.\n");
-			return;
-		}
-		/* !82489DX */
-		maxlvt = lapic_get_maxlvt();
-		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
-			apic_write(APIC_ESR, 0);
-		oldvalue = apic_read(APIC_ESR);
-
-		/* enables sending errors */
-		value = ERROR_APIC_VECTOR;
-		apic_write(APIC_LVTERR, value);
-		/*
-		 * spec says clear errors after enabling vector.
-		 */
-		if (maxlvt > 3)
-			apic_write(APIC_ESR, 0);
-		value = apic_read(APIC_ESR);
-		if (value != oldvalue)
-			apic_printk(APIC_VERBOSE, "ESR value before enabling "
-				"vector: 0x%08lx  after: 0x%08lx\n",
-				oldvalue, value);
-	} else {
-		printk(KERN_INFO "No ESR for 82489DX.\n");
-	}
-}
-
-
-/**
- * setup_local_APIC - setup the local APIC
- */
-void __cpuinit setup_local_APIC(void)
-{
-	unsigned int value;
-	int i, j;
-
-#ifdef CONFIG_X86_32
-	/* Pound the ESR really hard over the head with a big hammer - mbligh */
-	if (esr_disable) {
-		apic_write(APIC_ESR, 0);
-		apic_write(APIC_ESR, 0);
-		apic_write(APIC_ESR, 0);
-		apic_write(APIC_ESR, 0);
-	}
-#endif
-
-	preempt_disable();
-
-	/*
-	 * Double-check whether this APIC is really registered.
-	 * This is meaningless in clustered apic mode, so we skip it.
-	 */
-	if (!apic_id_registered())
-		BUG();
-
-	/*
-	 * Intel recommends to set DFR, LDR and TPR before enabling
-	 * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
-	 * document number 292116).  So here it goes...
-	 */
-	init_apic_ldr();
-
-	/*
-	 * Set Task Priority to 'accept all'. We never change this
-	 * later on.
-	 */
-	value = apic_read(APIC_TASKPRI);
-	value &= ~APIC_TPRI_MASK;
-	apic_write(APIC_TASKPRI, value);
-
-	/*
-	 * After a crash, we no longer service the interrupts and a pending
-	 * interrupt from previous kernel might still have ISR bit set.
-	 *
-	 * Most probably by now CPU has serviced that pending interrupt and
-	 * it might not have done the ack_APIC_irq() because it thought,
-	 * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it
-	 * does not clear the ISR bit and cpu thinks it has already serivced
-	 * the interrupt. Hence a vector might get locked. It was noticed
-	 * for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
-	 */
-	for (i = APIC_ISR_NR - 1; i >= 0; i--) {
-		value = apic_read(APIC_ISR + i*0x10);
-		for (j = 31; j >= 0; j--) {
-			if (value & (1<<j))
-				ack_APIC_irq();
-		}
-	}
-
-	/*
-	 * Now that we are all set up, enable the APIC
-	 */
-	value = apic_read(APIC_SPIV);
-	value &= ~APIC_VECTOR_MASK;
-	/*
-	 * Enable APIC
-	 */
-	value |= APIC_SPIV_APIC_ENABLED;
-
-#ifdef CONFIG_X86_32
-	/*
-	 * Some unknown Intel IO/APIC (or APIC) errata is biting us with
-	 * certain networking cards. If high frequency interrupts are
-	 * happening on a particular IOAPIC pin, plus the IOAPIC routing
-	 * entry is masked/unmasked at a high rate as well then sooner or
-	 * later IOAPIC line gets 'stuck', no more interrupts are received
-	 * from the device. If focus CPU is disabled then the hang goes
-	 * away, oh well :-(
-	 *
-	 * [ This bug can be reproduced easily with a level-triggered
-	 *   PCI Ne2000 networking cards and PII/PIII processors, dual
-	 *   BX chipset. ]
-	 */
-	/*
-	 * Actually disabling the focus CPU check just makes the hang less
-	 * frequent as it makes the interrupt distributon model be more
-	 * like LRU than MRU (the short-term load is more even across CPUs).
-	 * See also the comment in end_level_ioapic_irq().  --macro
-	 */
-
-	/*
-	 * - enable focus processor (bit==0)
-	 * - 64bit mode always use processor focus
-	 *   so no need to set it
-	 */
-	value &= ~APIC_SPIV_FOCUS_DISABLED;
-#endif
-
-	/*
-	 * Set spurious IRQ vector
-	 */
-	value |= SPURIOUS_APIC_VECTOR;
-	apic_write(APIC_SPIV, value);
-
-	/*
-	 * Set up LVT0, LVT1:
-	 *
-	 * set up through-local-APIC on the BP's LINT0. This is not
-	 * strictly necessary in pure symmetric-IO mode, but sometimes
-	 * we delegate interrupts to the 8259A.
-	 */
-	/*
-	 * TODO: set up through-local-APIC from through-I/O-APIC? --macro
-	 */
-	value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
-	if (!smp_processor_id() && (pic_mode || !value)) {
-		value = APIC_DM_EXTINT;
-		apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n",
-				smp_processor_id());
-	} else {
-		value = APIC_DM_EXTINT | APIC_LVT_MASKED;
-		apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n",
-				smp_processor_id());
-	}
-	apic_write(APIC_LVT0, value);
-
-	/*
-	 * only the BP should see the LINT1 NMI signal, obviously.
-	 */
-	if (!smp_processor_id())
-		value = APIC_DM_NMI;
-	else
-		value = APIC_DM_NMI | APIC_LVT_MASKED;
-	if (!lapic_is_integrated())		/* 82489DX */
-		value |= APIC_LVT_LEVEL_TRIGGER;
-	apic_write(APIC_LVT1, value);
-
-	preempt_enable();
-}
-
-void __cpuinit end_local_APIC_setup(void)
-{
-	lapic_setup_esr();
-
-#ifdef CONFIG_X86_32
-	{
-		unsigned int value;
-		/* Disable the local apic timer */
-		value = apic_read(APIC_LVTT);
-		value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
-		apic_write(APIC_LVTT, value);
-	}
-#endif
-
-	setup_apic_nmi_watchdog(NULL);
-	apic_pm_activate();
-}
-
-#ifdef HAVE_X2APIC
-void check_x2apic(void)
-{
-	int msr, msr2;
-
-	rdmsr(MSR_IA32_APICBASE, msr, msr2);
-
-	if (msr & X2APIC_ENABLE) {
-		printk("x2apic enabled by BIOS, switching to x2apic ops\n");
-		x2apic_preenabled = x2apic = 1;
-		apic_ops = &x2apic_ops;
-	}
-}
-
-void enable_x2apic(void)
-{
-	int msr, msr2;
-
-	rdmsr(MSR_IA32_APICBASE, msr, msr2);
-	if (!(msr & X2APIC_ENABLE)) {
-		printk("Enabling x2apic\n");
-		wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
-	}
-}
-
-void enable_IR_x2apic(void)
-{
-#ifdef CONFIG_INTR_REMAP
-	int ret;
-	unsigned long flags;
-
-	if (!cpu_has_x2apic)
-		return;
-
-	if (!x2apic_preenabled && disable_x2apic) {
-		printk(KERN_INFO
-		       "Skipped enabling x2apic and Interrupt-remapping "
-		       "because of nox2apic\n");
-		return;
-	}
-
-	if (x2apic_preenabled && disable_x2apic)
-		panic("Bios already enabled x2apic, can't enforce nox2apic");
-
-	if (!x2apic_preenabled && skip_ioapic_setup) {
-		printk(KERN_INFO
-		       "Skipped enabling x2apic and Interrupt-remapping "
-		       "because of skipping io-apic setup\n");
-		return;
-	}
-
-	ret = dmar_table_init();
-	if (ret) {
-		printk(KERN_INFO
-		       "dmar_table_init() failed with %d:\n", ret);
-
-		if (x2apic_preenabled)
-			panic("x2apic enabled by bios. But IR enabling failed");
-		else
-			printk(KERN_INFO
-			       "Not enabling x2apic,Intr-remapping\n");
-		return;
-	}
-
-	local_irq_save(flags);
-	mask_8259A();
-	save_mask_IO_APIC_setup();
-
-	ret = enable_intr_remapping(1);
-
-	if (ret && x2apic_preenabled) {
-		local_irq_restore(flags);
-		panic("x2apic enabled by bios. But IR enabling failed");
-	}
-
-	if (ret)
-		goto end;
-
-	if (!x2apic) {
-		x2apic = 1;
-		apic_ops = &x2apic_ops;
-		enable_x2apic();
-	}
-end:
-	if (ret)
-		/*
-		 * IR enabling failed
-		 */
-		restore_IO_APIC_setup();
-	else
-		reinit_intr_remapped_IO_APIC(x2apic_preenabled);
-
-	unmask_8259A();
-	local_irq_restore(flags);
-
-	if (!ret) {
-		if (!x2apic_preenabled)
-			printk(KERN_INFO
-			       "Enabled x2apic and interrupt-remapping\n");
-		else
-			printk(KERN_INFO
-			       "Enabled Interrupt-remapping\n");
-	} else
-		printk(KERN_ERR
-		       "Failed to enable Interrupt-remapping and x2apic\n");
-#else
-	if (!cpu_has_x2apic)
-		return;
-
-	if (x2apic_preenabled)
-		panic("x2apic enabled prior OS handover,"
-		      " enable CONFIG_INTR_REMAP");
-
-	printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
-	       " and x2apic\n");
-#endif
-
-	return;
-}
-#endif /* HAVE_X2APIC */
-
-#ifdef CONFIG_X86_64
-/*
- * Detect and enable local APICs on non-SMP boards.
- * Original code written by Keir Fraser.
- * On AMD64 we trust the BIOS - if it says no APIC it is likely
- * not correctly set up (usually the APIC timer won't work etc.)
- */
-static int __init detect_init_APIC(void)
-{
-	if (!cpu_has_apic) {
-		printk(KERN_INFO "No local APIC present\n");
-		return -1;
-	}
-
-	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-	boot_cpu_physical_apicid = 0;
-	return 0;
-}
-#else
-/*
- * Detect and initialize APIC
- */
-static int __init detect_init_APIC(void)
-{
-	u32 h, l, features;
-
-	/* Disabled by kernel option? */
-	if (disable_apic)
-		return -1;
-
-	switch (boot_cpu_data.x86_vendor) {
-	case X86_VENDOR_AMD:
-		if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
-		    (boot_cpu_data.x86 == 15))
-			break;
-		goto no_apic;
-	case X86_VENDOR_INTEL:
-		if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
-		    (boot_cpu_data.x86 == 5 && cpu_has_apic))
-			break;
-		goto no_apic;
-	default:
-		goto no_apic;
-	}
-
-	if (!cpu_has_apic) {
-		/*
-		 * Over-ride BIOS and try to enable the local APIC only if
-		 * "lapic" specified.
-		 */
-		if (!force_enable_local_apic) {
-			printk(KERN_INFO "Local APIC disabled by BIOS -- "
-			       "you can enable it with \"lapic\"\n");
-			return -1;
-		}
-		/*
-		 * Some BIOSes disable the local APIC in the APIC_BASE
-		 * MSR. This can only be done in software for Intel P6 or later
-		 * and AMD K7 (Model > 1) or later.
-		 */
-		rdmsr(MSR_IA32_APICBASE, l, h);
-		if (!(l & MSR_IA32_APICBASE_ENABLE)) {
-			printk(KERN_INFO
-			       "Local APIC disabled by BIOS -- reenabling.\n");
-			l &= ~MSR_IA32_APICBASE_BASE;
-			l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
-			wrmsr(MSR_IA32_APICBASE, l, h);
-			enabled_via_apicbase = 1;
-		}
-	}
-	/*
-	 * The APIC feature bit should now be enabled
-	 * in `cpuid'
-	 */
-	features = cpuid_edx(1);
-	if (!(features & (1 << X86_FEATURE_APIC))) {
-		printk(KERN_WARNING "Could not enable APIC!\n");
-		return -1;
-	}
-	set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
-	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-
-	/* The BIOS may have set up the APIC at some other address */
-	rdmsr(MSR_IA32_APICBASE, l, h);
-	if (l & MSR_IA32_APICBASE_ENABLE)
-		mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
-
-	printk(KERN_INFO "Found and enabled local APIC!\n");
-
-	apic_pm_activate();
-
-	return 0;
-
-no_apic:
-	printk(KERN_INFO "No local APIC present or hardware disabled\n");
-	return -1;
-}
-#endif
-
-#ifdef CONFIG_X86_64
-void __init early_init_lapic_mapping(void)
-{
-	unsigned long phys_addr;
-
-	/*
-	 * If no local APIC can be found then go out
-	 * : it means there is no mpatable and MADT
-	 */
-	if (!smp_found_config)
-		return;
-
-	phys_addr = mp_lapic_addr;
-
-	set_fixmap_nocache(FIX_APIC_BASE, phys_addr);
-	apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
-		    APIC_BASE, phys_addr);
-
-	/*
-	 * Fetch the APIC ID of the BSP in case we have a
-	 * default configuration (or the MP table is broken).
-	 */
-	boot_cpu_physical_apicid = read_apic_id();
-}
-#endif
-
-/**
- * init_apic_mappings - initialize APIC mappings
- */
-void __init init_apic_mappings(void)
-{
-#ifdef HAVE_X2APIC
-	if (x2apic) {
-		boot_cpu_physical_apicid = read_apic_id();
-		return;
-	}
-#endif
-
-	/*
-	 * If no local APIC can be found then set up a fake all
-	 * zeroes page to simulate the local APIC and another
-	 * one for the IO-APIC.
-	 */
-	if (!smp_found_config && detect_init_APIC()) {
-		apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
-		apic_phys = __pa(apic_phys);
-	} else
-		apic_phys = mp_lapic_addr;
-
-	set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
-	apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
-				APIC_BASE, apic_phys);
-
-	/*
-	 * Fetch the APIC ID of the BSP in case we have a
-	 * default configuration (or the MP table is broken).
-	 */
-	if (boot_cpu_physical_apicid == -1U)
-		boot_cpu_physical_apicid = read_apic_id();
-}
-
-/*
- * This initializes the IO-APIC and APIC hardware if this is
- * a UP kernel.
- */
-int apic_version[MAX_APICS];
-
-int __init APIC_init_uniprocessor(void)
-{
-#ifdef CONFIG_X86_64
-	if (disable_apic) {
-		printk(KERN_INFO "Apic disabled\n");
-		return -1;
-	}
-	if (!cpu_has_apic) {
-		disable_apic = 1;
-		printk(KERN_INFO "Apic disabled by BIOS\n");
-		return -1;
-	}
-#else
-	if (!smp_found_config && !cpu_has_apic)
-		return -1;
-
-	/*
-	 * Complain if the BIOS pretends there is one.
-	 */
-	if (!cpu_has_apic &&
-	    APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
-		printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
-		       boot_cpu_physical_apicid);
-		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
-		return -1;
-	}
-#endif
-
-#ifdef HAVE_X2APIC
-	enable_IR_x2apic();
-#endif
-#ifdef CONFIG_X86_64
-	setup_apic_routing();
-#endif
-
-	verify_local_APIC();
-	connect_bsp_APIC();
-
-#ifdef CONFIG_X86_64
-	apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
-#else
-	/*
-	 * Hack: In case of kdump, after a crash, kernel might be booting
-	 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
-	 * might be zero if read from MP tables. Get it from LAPIC.
-	 */
-# ifdef CONFIG_CRASH_DUMP
-	boot_cpu_physical_apicid = read_apic_id();
-# endif
-#endif
-	physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
-	setup_local_APIC();
-
-#ifdef CONFIG_X86_64
-	/*
-	 * Now enable IO-APICs, actually call clear_IO_APIC
-	 * We need clear_IO_APIC before enabling vector on BP
-	 */
-	if (!skip_ioapic_setup && nr_ioapics)
-		enable_IO_APIC();
-#endif
-
-#ifdef CONFIG_X86_IO_APIC
-	if (!smp_found_config || skip_ioapic_setup || !nr_ioapics)
-#endif
-		localise_nmi_watchdog();
-	end_local_APIC_setup();
-
-#ifdef CONFIG_X86_IO_APIC
-	if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
-		setup_IO_APIC();
-# ifdef CONFIG_X86_64
-	else
-		nr_ioapics = 0;
-# endif
-#endif
-
-#ifdef CONFIG_X86_64
-	setup_boot_APIC_clock();
-	check_nmi_watchdog();
-#else
-	setup_boot_clock();
-#endif
-
-	return 0;
-}
-
-/*
- * Local APIC interrupts
- */
-
-/*
- * This interrupt should _never_ happen with our APIC/SMP architecture
- */
-#ifdef CONFIG_X86_64
-asmlinkage void smp_spurious_interrupt(void)
-#else
-void smp_spurious_interrupt(struct pt_regs *regs)
-#endif
-{
-	u32 v;
-
-#ifdef CONFIG_X86_64
-	exit_idle();
-#endif
-	irq_enter();
-	/*
-	 * Check if this really is a spurious interrupt and ACK it
-	 * if it is a vectored one.  Just in case...
-	 * Spurious interrupts should not be ACKed.
-	 */
-	v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
-	if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
-		ack_APIC_irq();
-
-#ifdef CONFIG_X86_64
-	add_pda(irq_spurious_count, 1);
-#else
-	/* see sw-dev-man vol 3, chapter 7.4.13.5 */
-	printk(KERN_INFO "spurious APIC interrupt on CPU#%d, "
-	       "should never happen.\n", smp_processor_id());
-	__get_cpu_var(irq_stat).irq_spurious_count++;
-#endif
-	irq_exit();
-}
-
-/*
- * This interrupt should never happen with our APIC/SMP architecture
- */
-#ifdef CONFIG_X86_64
-asmlinkage void smp_error_interrupt(void)
-#else
-void smp_error_interrupt(struct pt_regs *regs)
-#endif
-{
-	u32 v, v1;
-
-#ifdef CONFIG_X86_64
-	exit_idle();
-#endif
-	irq_enter();
-	/* First tickle the hardware, only then report what went on. -- REW */
-	v = apic_read(APIC_ESR);
-	apic_write(APIC_ESR, 0);
-	v1 = apic_read(APIC_ESR);
-	ack_APIC_irq();
-	atomic_inc(&irq_err_count);
-
-	/* Here is what the APIC error bits mean:
-	   0: Send CS error
-	   1: Receive CS error
-	   2: Send accept error
-	   3: Receive accept error
-	   4: Reserved
-	   5: Send illegal vector
-	   6: Received illegal vector
-	   7: Illegal register address
-	*/
-	printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
-		smp_processor_id(), v , v1);
-	irq_exit();
-}
-
-/**
- * connect_bsp_APIC - attach the APIC to the interrupt system
- */
-void __init connect_bsp_APIC(void)
-{
-#ifdef CONFIG_X86_32
-	if (pic_mode) {
-		/*
-		 * Do not trust the local APIC being empty at bootup.
-		 */
-		clear_local_APIC();
-		/*
-		 * PIC mode, enable APIC mode in the IMCR, i.e.  connect BSP's
-		 * local APIC to INT and NMI lines.
-		 */
-		apic_printk(APIC_VERBOSE, "leaving PIC mode, "
-				"enabling APIC mode.\n");
-		outb(0x70, 0x22);
-		outb(0x01, 0x23);
-	}
-#endif
-	enable_apic_mode();
-}
-
-/**
- * disconnect_bsp_APIC - detach the APIC from the interrupt system
- * @virt_wire_setup:	indicates, whether virtual wire mode is selected
- *
- * Virtual wire mode is necessary to deliver legacy interrupts even when the
- * APIC is disabled.
- */
-void disconnect_bsp_APIC(int virt_wire_setup)
-{
-	unsigned int value;
-
-#ifdef CONFIG_X86_32
-	if (pic_mode) {
-		/*
-		 * Put the board back into PIC mode (has an effect only on
-		 * certain older boards).  Note that APIC interrupts, including
-		 * IPIs, won't work beyond this point!  The only exception are
-		 * INIT IPIs.
-		 */
-		apic_printk(APIC_VERBOSE, "disabling APIC mode, "
-				"entering PIC mode.\n");
-		outb(0x70, 0x22);
-		outb(0x00, 0x23);
-		return;
-	}
-#endif
-
-	/* Go back to Virtual Wire compatibility mode */
-
-	/* For the spurious interrupt use vector F, and enable it */
-	value = apic_read(APIC_SPIV);
-	value &= ~APIC_VECTOR_MASK;
-	value |= APIC_SPIV_APIC_ENABLED;
-	value |= 0xf;
-	apic_write(APIC_SPIV, value);
-
-	if (!virt_wire_setup) {
-		/*
-		 * For LVT0 make it edge triggered, active high,
-		 * external and enabled
-		 */
-		value = apic_read(APIC_LVT0);
-		value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
-			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
-			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
-		value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
-		value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
-		apic_write(APIC_LVT0, value);
-	} else {
-		/* Disable LVT0 */
-		apic_write(APIC_LVT0, APIC_LVT_MASKED);
-	}
-
-	/*
-	 * For LVT1 make it edge triggered, active high,
-	 * nmi and enabled
-	 */
-	value = apic_read(APIC_LVT1);
-	value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
-			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
-			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
-	value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
-	value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
-	apic_write(APIC_LVT1, value);
-}
-
-void __cpuinit generic_processor_info(int apicid, int version)
-{
-	int cpu;
-	cpumask_t tmp_map;
-
-	/*
-	 * Validate version
-	 */
-	if (version == 0x0) {
-		printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
-				"fixing up to 0x10. (tell your hw vendor)\n",
-				version);
-		version = 0x10;
-	}
-	apic_version[apicid] = version;
-
-	if (num_processors >= NR_CPUS) {
-		printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
-			"  Processor ignored.\n", NR_CPUS);
-		return;
-	}
-
-	num_processors++;
-	cpus_complement(tmp_map, cpu_present_map);
-	cpu = first_cpu(tmp_map);
-
-	physid_set(apicid, phys_cpu_present_map);
-	if (apicid == boot_cpu_physical_apicid) {
-		/*
-		 * x86_bios_cpu_apicid is required to have processors listed
-		 * in same order as logical cpu numbers. Hence the first
-		 * entry is BSP, and so on.
-		 */
-		cpu = 0;
-	}
-	if (apicid > max_physical_apicid)
-		max_physical_apicid = apicid;
-
-#ifdef CONFIG_X86_32
-	/*
-	 * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
-	 * but we need to work other dependencies like SMP_SUSPEND etc
-	 * before this can be done without some confusion.
-	 * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
-	 *       - Ashok Raj <ashok.raj@intel.com>
-	 */
-	if (max_physical_apicid >= 8) {
-		switch (boot_cpu_data.x86_vendor) {
-		case X86_VENDOR_INTEL:
-			if (!APIC_XAPIC(version)) {
-				def_to_bigsmp = 0;
-				break;
-			}
-			/* If P4 and above fall through */
-		case X86_VENDOR_AMD:
-			def_to_bigsmp = 1;
-		}
-	}
-#endif
-
-#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64)
-	/* are we being called early in kernel startup? */
-	if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
-		u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
-		u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
-
-		cpu_to_apicid[cpu] = apicid;
-		bios_cpu_apicid[cpu] = apicid;
-	} else {
-		per_cpu(x86_cpu_to_apicid, cpu) = apicid;
-		per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
-	}
-#endif
-
-	cpu_set(cpu, cpu_possible_map);
-	cpu_set(cpu, cpu_present_map);
-}
-
-#ifdef CONFIG_X86_64
-int hard_smp_processor_id(void)
-{
-	return read_apic_id();
-}
-#endif
-
-/*
- * Power management
- */
-#ifdef CONFIG_PM
-
-static struct {
-	/*
-	 * 'active' is true if the local APIC was enabled by us and
-	 * not the BIOS; this signifies that we are also responsible
-	 * for disabling it before entering apm/acpi suspend
-	 */
-	int active;
-	/* r/w apic fields */
-	unsigned int apic_id;
-	unsigned int apic_taskpri;
-	unsigned int apic_ldr;
-	unsigned int apic_dfr;
-	unsigned int apic_spiv;
-	unsigned int apic_lvtt;
-	unsigned int apic_lvtpc;
-	unsigned int apic_lvt0;
-	unsigned int apic_lvt1;
-	unsigned int apic_lvterr;
-	unsigned int apic_tmict;
-	unsigned int apic_tdcr;
-	unsigned int apic_thmr;
-} apic_pm_state;
-
-static int lapic_suspend(struct sys_device *dev, pm_message_t state)
-{
-	unsigned long flags;
-	int maxlvt;
-
-	if (!apic_pm_state.active)
-		return 0;
-
-	maxlvt = lapic_get_maxlvt();
-
-	apic_pm_state.apic_id = apic_read(APIC_ID);
-	apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
-	apic_pm_state.apic_ldr = apic_read(APIC_LDR);
-	apic_pm_state.apic_dfr = apic_read(APIC_DFR);
-	apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
-	apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
-	if (maxlvt >= 4)
-		apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
-	apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
-	apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
-	apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
-	apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
-	apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
-	if (maxlvt >= 5)
-		apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
-#endif
-
-	local_irq_save(flags);
-	disable_local_APIC();
-	local_irq_restore(flags);
-	return 0;
-}
-
-static int lapic_resume(struct sys_device *dev)
-{
-	unsigned int l, h;
-	unsigned long flags;
-	int maxlvt;
-
-	if (!apic_pm_state.active)
-		return 0;
-
-	maxlvt = lapic_get_maxlvt();
-
-	local_irq_save(flags);
-
-#ifdef HAVE_X2APIC
-	if (x2apic)
-		enable_x2apic();
-	else
-#endif
-	{
-		/*
-		 * Make sure the APICBASE points to the right address
-		 *
-		 * FIXME! This will be wrong if we ever support suspend on
-		 * SMP! We'll need to do this as part of the CPU restore!
-		 */
-		rdmsr(MSR_IA32_APICBASE, l, h);
-		l &= ~MSR_IA32_APICBASE_BASE;
-		l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
-		wrmsr(MSR_IA32_APICBASE, l, h);
-	}
-
-	apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
-	apic_write(APIC_ID, apic_pm_state.apic_id);
-	apic_write(APIC_DFR, apic_pm_state.apic_dfr);
-	apic_write(APIC_LDR, apic_pm_state.apic_ldr);
-	apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
-	apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
-	apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
-	apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
-	if (maxlvt >= 5)
-		apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
-#endif
-	if (maxlvt >= 4)
-		apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
-	apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
-	apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
-	apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
-	apic_write(APIC_ESR, 0);
-	apic_read(APIC_ESR);
-	apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
-	apic_write(APIC_ESR, 0);
-	apic_read(APIC_ESR);
-
-	local_irq_restore(flags);
-
-	return 0;
-}
-
-/*
- * This device has no shutdown method - fully functioning local APICs
- * are needed on every CPU up until machine_halt/restart/poweroff.
- */
-
-static struct sysdev_class lapic_sysclass = {
-	.name		= "lapic",
-	.resume		= lapic_resume,
-	.suspend	= lapic_suspend,
-};
-
-static struct sys_device device_lapic = {
-	.id	= 0,
-	.cls	= &lapic_sysclass,
-};
-
-static void __cpuinit apic_pm_activate(void)
-{
-	apic_pm_state.active = 1;
-}
-
-static int __init init_lapic_sysfs(void)
-{
-	int error;
-
-	if (!cpu_has_apic)
-		return 0;
-	/* XXX: remove suspend/resume procs if !apic_pm_state.active? */
-
-	error = sysdev_class_register(&lapic_sysclass);
-	if (!error)
-		error = sysdev_register(&device_lapic);
-	return error;
-}
-device_initcall(init_lapic_sysfs);
-
-#else	/* CONFIG_PM */
-
-static void apic_pm_activate(void) { }
-
-#endif	/* CONFIG_PM */
-
-#ifdef CONFIG_X86_64
-/*
- * apic_is_clustered_box() -- Check if we can expect good TSC
- *
- * Thus far, the major user of this is IBM's Summit2 series:
- *
- * Clustered boxes may have unsynced TSC problems if they are
- * multi-chassis. Use available data to take a good guess.
- * If in doubt, go HPET.
- */
-__cpuinit int apic_is_clustered_box(void)
-{
-	int i, clusters, zeros;
-	unsigned id;
-	u16 *bios_cpu_apicid;
-	DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
-
-	/*
-	 * there is not this kind of box with AMD CPU yet.
-	 * Some AMD box with quadcore cpu and 8 sockets apicid
-	 * will be [4, 0x23] or [8, 0x27] could be thought to
-	 * vsmp box still need checking...
-	 */
-	if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
-		return 0;
-
-	bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
-	bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
-
-	for (i = 0; i < NR_CPUS; i++) {
-		/* are we being called early in kernel startup? */
-		if (bios_cpu_apicid) {
-			id = bios_cpu_apicid[i];
-		}
-		else if (i < nr_cpu_ids) {
-			if (cpu_present(i))
-				id = per_cpu(x86_bios_cpu_apicid, i);
-			else
-				continue;
-		}
-		else
-			break;
-
-		if (id != BAD_APICID)
-			__set_bit(APIC_CLUSTERID(id), clustermap);
-	}
-
-	/* Problem:  Partially populated chassis may not have CPUs in some of
-	 * the APIC clusters they have been allocated.  Only present CPUs have
-	 * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.
-	 * Since clusters are allocated sequentially, count zeros only if
-	 * they are bounded by ones.
-	 */
-	clusters = 0;
-	zeros = 0;
-	for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
-		if (test_bit(i, clustermap)) {
-			clusters += 1 + zeros;
-			zeros = 0;
-		} else
-			++zeros;
-	}
-
-	/* ScaleMP vSMPowered boxes have one cluster per board and TSCs are
-	 * not guaranteed to be synced between boards
-	 */
-	if (is_vsmp_box() && clusters > 1)
-		return 1;
-
-	/*
-	 * If clusters > 2, then should be multi-chassis.
-	 * May have to revisit this when multi-core + hyperthreaded CPUs come
-	 * out, but AFAIK this will work even for them.
-	 */
-	return (clusters > 2);
-}
-#endif
-
-/*
- * APIC command line parameters
- */
-static int __init setup_disableapic(char *arg)
-{
-	disable_apic = 1;
-	setup_clear_cpu_cap(X86_FEATURE_APIC);
-	return 0;
-}
-early_param("disableapic", setup_disableapic);
-
-/* same as disableapic, for compatibility */
-static int __init setup_nolapic(char *arg)
-{
-	return setup_disableapic(arg);
-}
-early_param("nolapic", setup_nolapic);
-
-static int __init parse_lapic_timer_c2_ok(char *arg)
-{
-	local_apic_timer_c2_ok = 1;
-	return 0;
-}
-early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
-
-static int __init parse_disable_apic_timer(char *arg)
-{
-	disable_apic_timer = 1;
-	return 0;
-}
-early_param("noapictimer", parse_disable_apic_timer);
-
-static int __init parse_nolapic_timer(char *arg)
-{
-	disable_apic_timer = 1;
-	return 0;
-}
-early_param("nolapic_timer", parse_nolapic_timer);
-
-static int __init apic_set_verbosity(char *arg)
-{
-	if (!arg)  {
-#ifdef CONFIG_X86_64
-		skip_ioapic_setup = 0;
-		ioapic_force = 1;
-		return 0;
-#endif
-		return -EINVAL;
-	}
-
-	if (strcmp("debug", arg) == 0)
-		apic_verbosity = APIC_DEBUG;
-	else if (strcmp("verbose", arg) == 0)
-		apic_verbosity = APIC_VERBOSE;
-	else {
-		printk(KERN_WARNING "APIC Verbosity level %s not recognised"
-			" use apic=verbose or apic=debug\n", arg);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-early_param("apic", apic_set_verbosity);
-
-static int __init lapic_insert_resource(void)
-{
-	if (!apic_phys)
-		return -1;
-
-	/* Put local APIC into the resource map. */
-	lapic_resource.start = apic_phys;
-	lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
-	insert_resource(&iomem_resource, &lapic_resource);
-
-	return 0;
-}
-
-/*
- * need call insert after e820_reserve_resources()
- * that is using request_resource
- */
-late_initcall(lapic_insert_resource);
-- 
GitLab


From e492c5ae85428d4a3815d06bf308c590120b928b Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sun, 24 Aug 2008 22:41:26 -0700
Subject: [PATCH 468/895] x86: let 64 bit to use 32 bit calibrate_apic_clock

Use the 32-bit APIC calibration code - it's more mature.

Signed-of-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic.c | 88 +-----------------------------------------
 1 file changed, 2 insertions(+), 86 deletions(-)

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 6e99af5ce678..ca5ef71f4208 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -478,90 +478,6 @@ static void __cpuinit setup_APIC_timer(void)
 	clockevents_register_device(levt);
 }
 
-#ifdef CONFIG_X86_64
-/*
- * In this function we calibrate APIC bus clocks to the external
- * timer. Unfortunately we cannot use jiffies and the timer irq
- * to calibrate, since some later bootup code depends on getting
- * the first irq? Ugh.
- *
- * We want to do the calibration only once since we
- * want to have local timer irqs syncron. CPUs connected
- * by the same APIC bus have the very same bus frequency.
- * And we want to have irqs off anyways, no accidental
- * APIC irq that way.
- */
-
-#define TICK_COUNT 100000000
-
-static int __init calibrate_APIC_clock(void)
-{
-	unsigned apic, apic_start;
-	unsigned long tsc, tsc_start;
-	int result;
-
-	local_irq_disable();
-
-	/*
-	 * Put whatever arbitrary (but long enough) timeout
-	 * value into the APIC clock, we just want to get the
-	 * counter running for calibration.
-	 *
-	 * No interrupt enable !
-	 */
-	__setup_APIC_LVTT(250000000, 0, 0);
-
-	apic_start = apic_read(APIC_TMCCT);
-#ifdef CONFIG_X86_PM_TIMER
-	if (apic_calibrate_pmtmr && pmtmr_ioport) {
-		pmtimer_wait(5000);  /* 5ms wait */
-		apic = apic_read(APIC_TMCCT);
-		result = (apic_start - apic) * 1000L / 5;
-	} else
-#endif
-	{
-		rdtscll(tsc_start);
-
-		do {
-			apic = apic_read(APIC_TMCCT);
-			rdtscll(tsc);
-		} while ((tsc - tsc_start) < TICK_COUNT &&
-				(apic_start - apic) < TICK_COUNT);
-
-		result = (apic_start - apic) * 1000L * tsc_khz /
-					(tsc - tsc_start);
-	}
-
-	local_irq_enable();
-
-	printk(KERN_DEBUG "APIC timer calibration result %d\n", result);
-
-	printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n",
-		result / 1000 / 1000, result / 1000 % 1000);
-
-	/* Calculate the scaled math multiplication factor */
-	lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC,
-				       lapic_clockevent.shift);
-	lapic_clockevent.max_delta_ns =
-		clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
-	lapic_clockevent.min_delta_ns =
-		clockevent_delta2ns(0xF, &lapic_clockevent);
-
-	calibration_result = (result * APIC_DIVISOR) / HZ;
-
-	/*
-	 * Do a sanity check on the APIC calibration result
-	 */
-	if (calibration_result < (1000000 / HZ)) {
-		printk(KERN_WARNING
-			"APIC frequency too slow, disabling apic timer\n");
-		return -1;
-	}
-
-	return 0;
-}
-
-#else
 /*
  * In this functions we calibrate APIC bus clocks to the external timer.
  *
@@ -659,6 +575,7 @@ static int __init calibrate_APIC_clock(void)
 	delta = lapic_cal_t1 - lapic_cal_t2;
 	apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
 
+#ifdef CONFIG_X86_PM_TIMER
 	/* Check, if the PM timer is available */
 	deltapm = lapic_cal_pm2 - lapic_cal_pm1;
 	apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm);
@@ -687,6 +604,7 @@ static int __init calibrate_APIC_clock(void)
 		}
 		pm_referenced = 1;
 	}
+#endif
 
 	/* Calculate the scaled math multiplication factor */
 	lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
@@ -773,8 +691,6 @@ static int __init calibrate_APIC_clock(void)
 	return 0;
 }
 
-#endif
-
 /*
  * Setup the boot APIC
  *
-- 
GitLab


From 8c464a4b23ca283b414022ebc77787f3c7040fa7 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Mon, 25 Aug 2008 12:41:19 -0700
Subject: [PATCH 469/895] sparseirq: move kstat_irqs from kstat to irq_desc -
 fix

fix non-sparseirq architectures.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irq.h         |  4 ++--
 include/linux/kernel_stat.h | 10 ++++++++--
 kernel/irq/chip.c           | 21 ++++++++++++++++++++-
 kernel/irq/handle.c         | 10 ++++++++++
 4 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 2445d2b3d5dc..93fe9a943e71 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -161,8 +161,6 @@ struct irq_desc {
 #endif
 #ifdef CONFIG_HAVE_DYN_ARRAY
 	unsigned int            *kstat_irqs;
-#else
-	unsigned int            kstat_irqs[NR_CPUS];
 #endif
 #if defined(CONFIG_INTR_REMAP) && defined(CONFIG_HAVE_SPARSE_IRQ)
        struct irq_2_iommu      *irq_2_iommu;
@@ -219,8 +217,10 @@ extern struct irq_desc *sparse_irqs;
 
 #endif
 
+#ifdef CONFIG_HAVE_DYN_ARRAY
 #define kstat_irqs_this_cpu(DESC) \
 	((DESC)->kstat_irqs[smp_processor_id()])
+#endif
 
 /*
  * Migration helpers for obsolete names, they will go away:
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index f10616712de5..21249d8c1293 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -28,7 +28,7 @@ struct cpu_usage_stat {
 
 struct kernel_stat {
 	struct cpu_usage_stat	cpustat;
-#ifndef CONFIG_GENERIC_HARDIRQS
+#ifndef CONFIG_HAVE_DYN_ARRAY
        unsigned int irqs[NR_IRQS];
 #endif
 };
@@ -41,7 +41,13 @@ DECLARE_PER_CPU(struct kernel_stat, kstat);
 
 extern unsigned long long nr_context_switches(void);
 
-#ifndef CONFIG_GENERIC_HARDIRQS
+#ifndef CONFIG_HAVE_DYN_ARRAY
+#define kstat_irqs_this_cpu(irq) \
+	(kstat_this_cpu.irqs[irq])
+#endif
+
+
+#ifndef CONFIG_HAVE_DYN_ARRAY
 static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
 {
        return kstat_cpu(cpu).irqs[irq];
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 9fc5e69213de..4ef555c50db8 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -327,7 +327,11 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc)
 	if (unlikely(desc->status & IRQ_INPROGRESS))
 		goto out_unlock;
 	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
+#ifdef CONFIG_HAVE_DYN_ARRAY
 	kstat_irqs_this_cpu(desc)++;
+#else
+	kstat_irqs_this_cpu(irq)++;
+#endif
 
 	action = desc->action;
 	if (unlikely(!action || (desc->status & IRQ_DISABLED)))
@@ -368,7 +372,11 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
 	if (unlikely(desc->status & IRQ_INPROGRESS))
 		goto out_unlock;
 	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
+#ifdef CONFIG_HAVE_DYN_ARRAY
 	kstat_irqs_this_cpu(desc)++;
+#else
+	kstat_irqs_this_cpu(irq)++;
+#endif
 
 	/*
 	 * If its disabled or no action available
@@ -415,7 +423,11 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
 		goto out;
 
 	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
+#ifdef CONFIG_HAVE_DYN_ARRAY
 	kstat_irqs_this_cpu(desc)++;
+#else
+	kstat_irqs_this_cpu(irq)++;
+#endif
 
 	/*
 	 * If its disabled or no action available
@@ -479,8 +491,11 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
 		mask_ack_irq(desc, irq);
 		goto out_unlock;
 	}
-
+#ifdef CONFIG_HAVE_DYN_ARRAY
 	kstat_irqs_this_cpu(desc)++;
+#else
+	kstat_irqs_this_cpu(irq)++;
+#endif
 
 	/* Start handling the irq */
 	desc->chip->ack(irq);
@@ -535,7 +550,11 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
 {
 	irqreturn_t action_ret;
 
+#ifdef CONFIG_HAVE_DYN_ARRAY
 	kstat_irqs_this_cpu(desc)++;
+#else
+	kstat_irqs_this_cpu(irq)++;
+#endif
 
 	if (desc->chip->ack)
 		desc->chip->ack(irq);
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index d638a911cbc1..eae69373a9c6 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -34,7 +34,11 @@ void
 handle_bad_irq(unsigned int irq, struct irq_desc *desc)
 {
 	print_irq_desc(irq, desc);
+#ifdef CONFIG_HAVE_DYN_ARRAY
 	kstat_irqs_this_cpu(desc)++;
+#else
+	kstat_irqs_this_cpu(irq)++;
+#endif
 	ack_bad_irq(irq);
 }
 
@@ -401,7 +405,11 @@ unsigned int __do_IRQ(unsigned int irq)
 	struct irqaction *action;
 	unsigned int status;
 
+#ifdef CONFIG_HAVE_DYN_ARRAY
 	kstat_irqs_this_cpu(desc)++;
+#else
+	kstat_irqs_this_cpu(irq)++;
+#endif
 	if (CHECK_IRQ_PER_CPU(desc->status)) {
 		irqreturn_t action_ret;
 
@@ -501,10 +509,12 @@ void early_init_irq_lock_class(void)
 }
 #endif
 
+#ifdef CONFIG_HAVE_DYN_ARRAY
 unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	return desc->kstat_irqs[cpu];
 }
+#endif
 EXPORT_SYMBOL(kstat_irqs_cpu);
 
-- 
GitLab


From 1dd6ba2e179773597e20f17f66049a64e6c4b2ec Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Mon, 25 Aug 2008 21:27:26 +0400
Subject: [PATCH 470/895] x86: apic - unify smp_spurious/error_interrupt
 declaration

According to entry_64.S we do pass pt_regs pointer
into interrupt handlers but don't use them. So we
safely may merge the declarations.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic.c   | 8 --------
 include/asm-x86/hw_irq.h | 5 -----
 2 files changed, 13 deletions(-)

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index ca5ef71f4208..0556f375e40b 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -1659,11 +1659,7 @@ int __init APIC_init_uniprocessor(void)
 /*
  * This interrupt should _never_ happen with our APIC/SMP architecture
  */
-#ifdef CONFIG_X86_64
-asmlinkage void smp_spurious_interrupt(void)
-#else
 void smp_spurious_interrupt(struct pt_regs *regs)
-#endif
 {
 	u32 v;
 
@@ -1694,11 +1690,7 @@ void smp_spurious_interrupt(struct pt_regs *regs)
 /*
  * This interrupt should never happen with our APIC/SMP architecture
  */
-#ifdef CONFIG_X86_64
-asmlinkage void smp_error_interrupt(void)
-#else
 void smp_error_interrupt(struct pt_regs *regs)
-#endif
 {
 	u32 v, v1;
 
diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h
index 39c7a4745d25..749d042f0556 100644
--- a/include/asm-x86/hw_irq.h
+++ b/include/asm-x86/hw_irq.h
@@ -96,13 +96,8 @@ extern asmlinkage void qic_call_function_interrupt(void);
 
 /* SMP */
 extern void smp_apic_timer_interrupt(struct pt_regs *);
-#ifdef CONFIG_X86_32
 extern void smp_spurious_interrupt(struct pt_regs *);
 extern void smp_error_interrupt(struct pt_regs *);
-#else
-extern asmlinkage void smp_spurious_interrupt(void);
-extern asmlinkage void smp_error_interrupt(void);
-#endif
 #ifdef CONFIG_X86_SMP
 extern void smp_reschedule_interrupt(struct pt_regs *);
 extern void smp_call_function_interrupt(struct pt_regs *);
-- 
GitLab


From c59d85a7b7822b83fc9783314543eea0ca860480 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 28 Aug 2008 08:56:33 +0200
Subject: [PATCH 471/895] sparseirq: export nr_irqs on m68k/sparc/s390

Stephen Rothwell reported such build failures on m68k/sparc/s390:

> ERROR: "nr_irqs" [drivers/net/hamradio/baycom_ser_fdx.ko] undefined!
> ERROR: "nr_irqs" [drivers/net/3c59x.ko] undefined!

export nr_irqs on these architectures too.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/m68k/kernel/ints.c | 1 +
 arch/s390/kernel/irq.c  | 1 +
 arch/sparc/kernel/irq.c | 1 +
 3 files changed, 3 insertions(+)

diff --git a/arch/m68k/kernel/ints.c b/arch/m68k/kernel/ints.c
index 74453d15692e..44169e4cd91d 100644
--- a/arch/m68k/kernel/ints.c
+++ b/arch/m68k/kernel/ints.c
@@ -47,6 +47,7 @@
 #endif
 
 int nr_irqs = NR_IRQS;
+EXPORT_SYMBOL(nr_irqs);
 
 extern u32 auto_irqhandler_fixup[];
 extern u32 user_irqhandler_fixup[];
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 14eb5496c8a8..3624c4a0037a 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -18,6 +18,7 @@
 #include <linux/profile.h>
 
 int nr_irqs = NR_IRQS;
+EXPORT_SYMBOL(nr_irqs);
 
 /*
  * show_interrupts is needed by /proc/interrupts.
diff --git a/arch/sparc/kernel/irq.c b/arch/sparc/kernel/irq.c
index 059598b7e0f0..4b99e3ce3916 100644
--- a/arch/sparc/kernel/irq.c
+++ b/arch/sparc/kernel/irq.c
@@ -57,6 +57,7 @@
 #endif /* SMP */
 
 int nr_irqs = NR_IRQS;
+EXPORT_SYMBOL(nr_irqs);
 
 unsigned long __raw_local_irq_save(void)
 {
-- 
GitLab


From a11b5abef50722e42a7d13f6b799c4f606fcb797 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Wed, 3 Sep 2008 16:58:31 -0700
Subject: [PATCH 472/895] x2apic: fix reserved APIC register accesses in
 print_local_APIC()

APIC_ARBPRI is a reserved register for XAPIC and beyond.
APIC_RRR is a reserved register except for 82489DX, APIC for Pentium processors.
APIC_EOI is a write only register.
APIC_DFR is reserved in x2apic mode.

Access to these registers in x2apic will result in #GP fault. Fix these
apic register accesses.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Maciej W. Rozycki <macro@linux-mips.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 27 ++++++++++++++++++---------
 include/asm-x86/apic.h    | 14 ++++++++++++++
 2 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index d28128e0392c..93ceb19d1e90 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -1751,21 +1751,30 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
 	printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
 
 	if (APIC_INTEGRATED(ver)) {                     /* !82489DX */
-		v = apic_read(APIC_ARBPRI);
-		printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
-			v & APIC_ARBPRI_MASK);
+		if (!APIC_XAPIC(ver)) {
+			v = apic_read(APIC_ARBPRI);
+			printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
+			       v & APIC_ARBPRI_MASK);
+		}
 		v = apic_read(APIC_PROCPRI);
 		printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
 	}
 
-	v = apic_read(APIC_EOI);
-	printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
-	v = apic_read(APIC_RRR);
-	printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
+	/*
+	 * Remote read supported only in the 82489DX and local APIC for
+	 * Pentium processors.
+	 */
+	if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
+		v = apic_read(APIC_RRR);
+		printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
+	}
+
 	v = apic_read(APIC_LDR);
 	printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
-	v = apic_read(APIC_DFR);
-	printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
+	if (!x2apic_enabled()) {
+		v = apic_read(APIC_DFR);
+		printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
+	}
 	v = apic_read(APIC_SPIV);
 	printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
 
diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index 2d970f6bc2a1..ef1d72dbdfe0 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h
@@ -98,6 +98,20 @@ extern void check_x2apic(void);
 extern void enable_x2apic(void);
 extern void enable_IR_x2apic(void);
 extern void x2apic_icr_write(u32 low, u32 id);
+static inline int x2apic_enabled(void)
+{
+	int msr, msr2;
+
+	if (!cpu_has_x2apic)
+		return 0;
+
+	rdmsr(MSR_IA32_APICBASE, msr, msr2);
+	if (msr & X2APIC_ENABLE)
+		return 1;
+	return 0;
+}
+#else
+#define x2apic_enabled()	0
 #endif
 
 struct apic_ops {
-- 
GitLab


From f6dd5c3106fb283e37d915eeb33019ef40510f85 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Wed, 3 Sep 2008 16:58:32 -0700
Subject: [PATCH 473/895] dmar: fix using early fixmap mapping for DMAR table
 parsing

Very early detection of the DMAR tables will setup fixmap mapping. For
parsing these tables later (while enabling dma and/or interrupt remapping),
early fixmap mapping shouldn't be used. Fix it by calling table detection
routines again, which will call generic apci_get_table() for setting up
the correct mapping.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/pci/dmar.c   | 49 +++++++++++++++++++++++++-------------------
 include/linux/dmar.h |  1 -
 2 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index bd2c01674f5e..f2c5eb6e78f7 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -289,6 +289,24 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
 	}
 }
 
+/**
+ * dmar_table_detect - checks to see if the platform supports DMAR devices
+ */
+static int __init dmar_table_detect(void)
+{
+	acpi_status status = AE_OK;
+
+	/* if we could find DMAR table, then there are DMAR devices */
+	status = acpi_get_table(ACPI_SIG_DMAR, 0,
+				(struct acpi_table_header **)&dmar_tbl);
+
+	if (ACPI_SUCCESS(status) && !dmar_tbl) {
+		printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
+		status = AE_NOT_FOUND;
+	}
+
+	return (ACPI_SUCCESS(status) ? 1 : 0);
+}
 
 /**
  * parse_dmar_table - parses the DMA reporting table
@@ -300,6 +318,12 @@ parse_dmar_table(void)
 	struct acpi_dmar_header *entry_header;
 	int ret = 0;
 
+	/*
+	 * Do it again, earlier dmar_tbl mapping could be mapped with
+	 * fixed map.
+	 */
+	dmar_table_detect();
+
 	dmar = (struct acpi_table_dmar *)dmar_tbl;
 	if (!dmar)
 		return -ENODEV;
@@ -430,30 +454,11 @@ int __init dmar_table_init(void)
 	return 0;
 }
 
-/**
- * early_dmar_detect - checks to see if the platform supports DMAR devices
- */
-int __init early_dmar_detect(void)
-{
-	acpi_status status = AE_OK;
-
-	/* if we could find DMAR table, then there are DMAR devices */
-	status = acpi_get_table(ACPI_SIG_DMAR, 0,
-				(struct acpi_table_header **)&dmar_tbl);
-
-	if (ACPI_SUCCESS(status) && !dmar_tbl) {
-		printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
-		status = AE_NOT_FOUND;
-	}
-
-	return (ACPI_SUCCESS(status) ? 1 : 0);
-}
-
 void __init detect_intel_iommu(void)
 {
 	int ret;
 
-	ret = early_dmar_detect();
+	ret = dmar_table_detect();
 
 #ifdef CONFIG_DMAR
 	{
@@ -479,14 +484,16 @@ void __init detect_intel_iommu(void)
 			       " x2apic support\n");
 
 			dmar_disabled = 1;
-			return;
+			goto end;
 		}
 
 		if (ret && !no_iommu && !iommu_detected && !swiotlb &&
 		    !dmar_disabled)
 			iommu_detected = 1;
 	}
+end:
 #endif
+	dmar_tbl = NULL;
 }
 
 
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index c360c558e59e..f1984fc3e06d 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -45,7 +45,6 @@ extern struct list_head dmar_drhd_units;
 	list_for_each_entry(drhd, &dmar_drhd_units, list)
 
 extern int dmar_table_init(void);
-extern int early_dmar_detect(void);
 extern int dmar_dev_scope_init(void);
 
 /* Intel IOMMU detection */
-- 
GitLab


From 74d04bd7dcb4c6130fd8a314d28bfecc9ae7c360 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Wed, 3 Sep 2008 16:58:33 -0700
Subject: [PATCH 474/895] dmar: initialize the return value in dmar_parse_dev()

initialize the return value in dmar_parse_dev()

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/pci/dmar.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index f2c5eb6e78f7..d281a03695f7 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -193,7 +193,7 @@ dmar_parse_dev(struct dmar_drhd_unit *dmaru)
 {
 	struct acpi_dmar_hardware_unit *drhd;
 	static int include_all;
-	int ret;
+	int ret = 0;
 
 	drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
 
-- 
GitLab


From 04e2ea67069e285404192a35c24dfe7c53b9c61f Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Wed, 3 Sep 2008 16:58:34 -0700
Subject: [PATCH 475/895] dmar: use list_for_each_entry_safe() in
 dmar_dev_scope_init()

In dmar_dev_scope_init(), functions called under for_each_drhd_unit()/
for_each_rmrr_units() can delete the list entry under some error conditions.

So we should use list_for_each_entry_safe() for safe traversal.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Acked-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/pci/dmar.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index d281a03695f7..ceb338dfa3f2 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -397,10 +397,10 @@ dmar_find_matched_drhd_unit(struct pci_dev *dev)
 
 int __init dmar_dev_scope_init(void)
 {
-	struct dmar_drhd_unit *drhd;
+	struct dmar_drhd_unit *drhd, *drhd_n;
 	int ret = -ENODEV;
 
-	for_each_drhd_unit(drhd) {
+	list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
 		ret = dmar_parse_dev(drhd);
 		if (ret)
 			return ret;
@@ -408,8 +408,8 @@ int __init dmar_dev_scope_init(void)
 
 #ifdef CONFIG_DMAR
 	{
-		struct dmar_rmrr_unit *rmrr;
-		for_each_rmrr_units(rmrr) {
+		struct dmar_rmrr_unit *rmrr, *rmrr_n;
+		list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
 			ret = rmrr_parse_dev(rmrr);
 			if (ret)
 				return ret;
-- 
GitLab


From 1c7d1bcad218808a4f67a4492a5e1d920e85c239 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Wed, 3 Sep 2008 16:58:35 -0700
Subject: [PATCH 476/895] dmar: fix dmar_parse_dev() devices_cnt error
 condition check

It is possible that,
instead of PCI endpoint/sub-hierarchy structures, only IO-APIC/HPET
devices may be reported under device scope structures. Fix the devices_cnt
error check, which cares about only PCI structures and removes the
dma-remapping unit structure (dmaru) when the devices_cnt is zero
and include_all flag is not set.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Acked-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/pci/dmar.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index ceb338dfa3f2..9527405ae198 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -212,7 +212,7 @@ dmar_parse_dev(struct dmar_drhd_unit *dmaru)
 		include_all = 1;
 	}
 
-	if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all)) {
+	if (ret) {
 		list_del(&dmaru->list);
 		kfree(dmaru);
 	}
-- 
GitLab


From e8fc96ed3603924e7aa09fd5e4dbd289b7e69907 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Thu, 4 Sep 2008 09:56:10 -0700
Subject: [PATCH 477/895] dyn_array: don't break compiling for !CONFIG_SMP

Impact: build failure on uniprocessor

When compiling for !CONFIG_SMP, per_cpu_alloc_dyn_array() would fail
to compile, since it uses per_cpu_offset, which is not defined for
uniprocessor builds.

Hence, do not compile per_cpu_alloc_dyn_array() for !CONFIG_SMP.
Attempting to call this function in a uniprocessor configuration would
be simply wrong in the first place.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 init/dyn_array.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/init/dyn_array.c b/init/dyn_array.c
index c4cd902a1180..1dc08140f3cb 100644
--- a/init/dyn_array.c
+++ b/init/dyn_array.c
@@ -91,6 +91,7 @@ unsigned long __init per_cpu_dyn_array_size(unsigned long *align)
 	return total_size;
 }
 
+#ifdef CONFIG_SMP
 void __init per_cpu_alloc_dyn_array(int cpu, char *ptr)
 {
 #ifdef CONFIG_HAVE_DYN_ARRAY
@@ -122,3 +123,4 @@ void __init per_cpu_alloc_dyn_array(int cpu, char *ptr)
 	}
 #endif
 }
+#endif
-- 
GitLab


From 02c1df199c990cd21c09a4dffaa06d4e0b7bf2bf Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Thu, 4 Sep 2008 20:57:11 +0200
Subject: [PATCH 478/895] x86: print out if acpi want physical flat of all

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/genapic_flat_64.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index 9eca5ba7a6b1..2ec2de8d8c46 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -179,8 +179,10 @@ static int __init physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	 * is an example).
 	 */
 	if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
-		(acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
+		(acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
+		printk(KERN_DEBUG "system APIC only can use physical flat");
 		return 1;
+	}
 #endif
 
 	return 0;
-- 
GitLab


From b558cb35f1ef92837cba0fba9aad267e5eff1f65 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Thu, 4 Sep 2008 20:57:13 +0200
Subject: [PATCH 479/895] dyn_array: fix typo

Andrew found the typo could break some platforms.

also fix a checkpatch warning.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 init/dyn_array.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/init/dyn_array.c b/init/dyn_array.c
index 1dc08140f3cb..cf1e04c83b33 100644
--- a/init/dyn_array.c
+++ b/init/dyn_array.c
@@ -54,7 +54,7 @@ void __init pre_alloc_dyn_array(void)
 			da->init_work(da);
 	}
 #else
-#ifdef CONFIF_GENERIC_HARDIRQS
+#ifdef CONFIG_GENERIC_HARDIRQS
 	unsigned int i;
 
 	for (i = 0; i < NR_IRQS; i++)
@@ -117,9 +117,8 @@ void __init per_cpu_alloc_dyn_array(int cpu, char *ptr)
 
 		phys += size;
 
-		if (da->init_work) {
+		if (da->init_work)
 			da->init_work(da);
-		}
 	}
 #endif
 }
-- 
GitLab


From 676f4a920be27160747439fe71026aa15ec78e5a Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Thu, 4 Sep 2008 22:37:49 +0400
Subject: [PATCH 480/895] x86: io-apic - use ARRAY_SIZE macro

Make the code width a bit shorter with ARRAY_SIZE macro.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 93ceb19d1e90..9b01fdadcb9b 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -166,7 +166,7 @@ static void __init init_work(void *data)
 
 	memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy));
 
-	legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]);
+	legacy_count = ARRAY_SIZE(irq_cfg_legacy);
 	for (i = legacy_count; i < *da->nr; i++)
 		init_one_irq_cfg(&cfg[i]);
 
-- 
GitLab


From ac54a6c9371bacb86bee1db23f7d82e8685c7e17 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Thu, 4 Sep 2008 22:37:50 +0400
Subject: [PATCH 481/895] x86: io-apic - declare irq_cfg_lock for SPARSE_IRQ
 only

We use irq_cfg_lock lock in SPARSE_IRQ only context so
move it under #ifdef and compiler will be happy.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 9b01fdadcb9b..d22fecf828b8 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -147,14 +147,15 @@ static void init_one_irq_cfg(struct irq_cfg *cfg)
 
 static struct irq_cfg *irq_cfgx;
 
+#ifdef CONFIG_HAVE_SPARSE_IRQ
 /*
  * Protect the irq_cfgx_free freelist:
  */
 static DEFINE_SPINLOCK(irq_cfg_lock);
 
-#ifdef CONFIG_HAVE_SPARSE_IRQ
 static struct irq_cfg *irq_cfgx_free;
 #endif
+
 static void __init init_work(void *data)
 {
 	struct dyn_array *da = data;
-- 
GitLab


From e65ef88c20d5c68bde18f559e0d0ad7d718beb28 Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Fri, 5 Sep 2008 09:07:20 -0500
Subject: [PATCH 482/895] irq: error missed ifndef CONFIG_HAVE_SPARSE_IRQ

An error return from create_irq_nr() is 0, but an error return from
create_irq() is -1.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/pci/htirq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c
index 7c5aef13fcdb..7b180e0c634e 100644
--- a/drivers/pci/htirq.c
+++ b/drivers/pci/htirq.c
@@ -144,7 +144,7 @@ int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update)
 #else
 	irq = create_irq();
 #endif
-	if (irq == 0) {
+	if (irq <= 0) {
 		kfree(cfg);
 		return -EBUSY;
 	}
-- 
GitLab


From 21056830c4e5c6735f1d49453398d7186ca4e8d7 Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Fri, 5 Sep 2008 09:10:40 -0500
Subject: [PATCH 483/895] irq: set_irq_chip() has redundant call to
 irq_to_desc()

Extraneous call to irq_to_desc().

Signed-off-by: Dean Nelson <dcn@sgi.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/irq/chip.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 4ef555c50db8..570d1ea1db5d 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -102,7 +102,6 @@ int set_irq_chip(unsigned int irq, struct irq_chip *chip)
 	if (!chip)
 		chip = &no_irq_chip;
 
-	desc = irq_to_desc(irq);
 	spin_lock_irqsave(&desc->lock, flags);
 	irq_chip_set_defaults(chip);
 	desc->chip = chip;
-- 
GitLab


From 1f3addcf2d54394d10713be947eeaf18d2b998a9 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Fri, 5 Sep 2008 10:03:37 -0700
Subject: [PATCH 484/895] irq: error missed ifndef CONFIG_HAVE_SPARSE_IRQ, v2

need to change irq to int too

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/pci/htirq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c
index 7b180e0c634e..9e4929a00832 100644
--- a/drivers/pci/htirq.c
+++ b/drivers/pci/htirq.c
@@ -109,7 +109,7 @@ int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update)
 	u32 data;
 	int max_irq;
 	int pos;
-	unsigned int irq;
+	int irq;
 	unsigned int irq_want;
 
 	pos = pci_find_ht_capability(dev, HT_CAPTYPE_IRQ);
-- 
GitLab


From 932775a4ab622e3c99bd59f14cc7d96722f79501 Mon Sep 17 00:00:00 2001
From: "venkatesh.pallipadi@intel.com" <venkatesh.pallipadi@intel.com>
Date: Fri, 5 Sep 2008 18:02:15 -0700
Subject: [PATCH 485/895] x86: HPET_MSI change IRQ affinity in process context
 when it is disabled

Change the IRQ affinity in the process context when the IRQ is disabled.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/irq/manage.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index ddc956861a58..ad2ce72c83c4 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -87,10 +87,11 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
 		return -EINVAL;
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
-	if (desc->status & IRQ_MOVE_PCNTXT) {
+	if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) {
 		unsigned long flags;
 
 		spin_lock_irqsave(&desc->lock, flags);
+		desc->affinity = cpumask;
 		desc->chip->set_affinity(irq, cpumask);
 		spin_unlock_irqrestore(&desc->lock, flags);
 	} else
-- 
GitLab


From b40d575bf0679c45aaf9e1161fc51a6b041b7210 Mon Sep 17 00:00:00 2001
From: "venkatesh.pallipadi@intel.com" <venkatesh.pallipadi@intel.com>
Date: Fri, 5 Sep 2008 18:02:16 -0700
Subject: [PATCH 486/895] x86: HPET_MSI Refactor code in preparation for
 HPET_MSI

Preparatory patch before the actual HPET MSI changes. Sets up hpet_set_mode
and hpet_next_event for the MSI related changes. Just the code
refactoring and should be zero functional change.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/hpet.c | 42 +++++++++++++++++++++++++++---------------
 1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index acf62fc233da..f7cb5e9e261e 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -227,44 +227,44 @@ static void hpet_legacy_clockevent_register(void)
 	printk(KERN_DEBUG "hpet clockevent registered\n");
 }
 
-static void hpet_legacy_set_mode(enum clock_event_mode mode,
-			  struct clock_event_device *evt)
+static void hpet_set_mode(enum clock_event_mode mode,
+			  struct clock_event_device *evt, int timer)
 {
 	unsigned long cfg, cmp, now;
 	uint64_t delta;
 
 	switch(mode) {
 	case CLOCK_EVT_MODE_PERIODIC:
-		delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * hpet_clockevent.mult;
-		delta >>= hpet_clockevent.shift;
+		delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult;
+		delta >>= evt->shift;
 		now = hpet_readl(HPET_COUNTER);
 		cmp = now + (unsigned long) delta;
-		cfg = hpet_readl(HPET_T0_CFG);
+		cfg = hpet_readl(HPET_Tn_CFG(timer));
 		cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
 		       HPET_TN_SETVAL | HPET_TN_32BIT;
-		hpet_writel(cfg, HPET_T0_CFG);
+		hpet_writel(cfg, HPET_Tn_CFG(timer));
 		/*
 		 * The first write after writing TN_SETVAL to the
 		 * config register sets the counter value, the second
 		 * write sets the period.
 		 */
-		hpet_writel(cmp, HPET_T0_CMP);
+		hpet_writel(cmp, HPET_Tn_CMP(timer));
 		udelay(1);
-		hpet_writel((unsigned long) delta, HPET_T0_CMP);
+		hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer));
 		break;
 
 	case CLOCK_EVT_MODE_ONESHOT:
-		cfg = hpet_readl(HPET_T0_CFG);
+		cfg = hpet_readl(HPET_Tn_CFG(timer));
 		cfg &= ~HPET_TN_PERIODIC;
 		cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
-		hpet_writel(cfg, HPET_T0_CFG);
+		hpet_writel(cfg, HPET_Tn_CFG(timer));
 		break;
 
 	case CLOCK_EVT_MODE_UNUSED:
 	case CLOCK_EVT_MODE_SHUTDOWN:
-		cfg = hpet_readl(HPET_T0_CFG);
+		cfg = hpet_readl(HPET_Tn_CFG(timer));
 		cfg &= ~HPET_TN_ENABLE;
-		hpet_writel(cfg, HPET_T0_CFG);
+		hpet_writel(cfg, HPET_Tn_CFG(timer));
 		break;
 
 	case CLOCK_EVT_MODE_RESUME:
@@ -273,14 +273,14 @@ static void hpet_legacy_set_mode(enum clock_event_mode mode,
 	}
 }
 
-static int hpet_legacy_next_event(unsigned long delta,
-				  struct clock_event_device *evt)
+static int hpet_next_event(unsigned long delta,
+			   struct clock_event_device *evt, int timer)
 {
 	u32 cnt;
 
 	cnt = hpet_readl(HPET_COUNTER);
 	cnt += (u32) delta;
-	hpet_writel(cnt, HPET_T0_CMP);
+	hpet_writel(cnt, HPET_Tn_CMP(timer));
 
 	/*
 	 * We need to read back the CMP register to make sure that
@@ -292,6 +292,18 @@ static int hpet_legacy_next_event(unsigned long delta,
 	return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
 }
 
+static void hpet_legacy_set_mode(enum clock_event_mode mode,
+			struct clock_event_device *evt)
+{
+	hpet_set_mode(mode, evt, 0);
+}
+
+static int hpet_legacy_next_event(unsigned long delta,
+			struct clock_event_device *evt)
+{
+	return hpet_next_event(delta, evt, 0);
+}
+
 /*
  * Clock source related code
  */
-- 
GitLab


From 58ac1e76ce77d515bd5cb65dbc465a040da341c6 Mon Sep 17 00:00:00 2001
From: "venkatesh.pallipadi@intel.com" <venkatesh.pallipadi@intel.com>
Date: Fri, 5 Sep 2008 18:02:17 -0700
Subject: [PATCH 487/895] x86: HPET_MSI Basic HPET_MSI setup code

Basic HPET MSI setup code. Routines to perform basic MSI read write
in HPET memory map and setting up irq_chip for HPET MSI.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/hpet.c    | 54 +++++++++++++++++++++++++++++++++
 arch/x86/kernel/io_apic.c | 64 +++++++++++++++++++++++++++++++++++++++
 include/asm-x86/hpet.h    | 21 +++++++++++++
 3 files changed, 139 insertions(+)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index f7cb5e9e261e..3f10d16a8348 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -6,6 +6,8 @@
 #include <linux/init.h>
 #include <linux/sysdev.h>
 #include <linux/pm.h>
+#include <linux/interrupt.h>
+#include <linux/cpu.h>
 
 #include <asm/fixmap.h>
 #include <asm/hpet.h>
@@ -25,6 +27,15 @@
 unsigned long hpet_address;
 static void __iomem *hpet_virt_address;
 
+struct hpet_dev {
+	struct clock_event_device evt;
+	unsigned int num;
+	int cpu;
+	unsigned int irq;
+	unsigned int flags;
+	char name[10];
+};
+
 unsigned long hpet_readl(unsigned long a)
 {
 	return readl(hpet_virt_address + a);
@@ -304,6 +315,49 @@ static int hpet_legacy_next_event(unsigned long delta,
 	return hpet_next_event(delta, evt, 0);
 }
 
+/*
+ * HPET MSI Support
+ */
+
+void hpet_msi_unmask(unsigned int irq)
+{
+	struct hpet_dev *hdev = get_irq_data(irq);
+	unsigned long cfg;
+
+	/* unmask it */
+	cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
+	cfg |= HPET_TN_FSB;
+	hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
+}
+
+void hpet_msi_mask(unsigned int irq)
+{
+	unsigned long cfg;
+	struct hpet_dev *hdev = get_irq_data(irq);
+
+	/* mask it */
+	cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
+	cfg &= ~HPET_TN_FSB;
+	hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
+}
+
+void hpet_msi_write(unsigned int irq, struct msi_msg *msg)
+{
+	struct hpet_dev *hdev = get_irq_data(irq);
+
+	hpet_writel(msg->data, HPET_Tn_ROUTE(hdev->num));
+	hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hdev->num) + 4);
+}
+
+void hpet_msi_read(unsigned int irq, struct msi_msg *msg)
+{
+	struct hpet_dev *hdev = get_irq_data(irq);
+
+	msg->data = hpet_readl(HPET_Tn_ROUTE(hdev->num));
+	msg->address_lo = hpet_readl(HPET_Tn_ROUTE(hdev->num) + 4);
+	msg->address_hi = 0;
+}
+
 /*
  * Clock source related code
  */
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index d22fecf828b8..77fa155becf6 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -41,6 +41,7 @@
 #endif
 #include <linux/bootmem.h>
 #include <linux/dmar.h>
+#include <linux/hpet.h>
 
 #include <asm/idle.h>
 #include <asm/io.h>
@@ -56,6 +57,7 @@
 #include <asm/hypertransport.h>
 #include <asm/setup.h>
 #include <asm/irq_remapping.h>
+#include <asm/hpet.h>
 
 #include <mach_ipi.h>
 #include <mach_apic.h>
@@ -3522,6 +3524,68 @@ int arch_setup_dmar_msi(unsigned int irq)
 }
 #endif
 
+#ifdef CONFIG_HPET_TIMER
+
+#ifdef CONFIG_SMP
+static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
+{
+	struct irq_cfg *cfg;
+	struct irq_desc *desc;
+	struct msi_msg msg;
+	unsigned int dest;
+	cpumask_t tmp;
+
+	cpus_and(tmp, mask, cpu_online_map);
+	if (cpus_empty(tmp))
+		return;
+
+	if (assign_irq_vector(irq, mask))
+		return;
+
+	cfg = irq_cfg(irq);
+	cpus_and(tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(tmp);
+
+	hpet_msi_read(irq, &msg);
+
+	msg.data &= ~MSI_DATA_VECTOR_MASK;
+	msg.data |= MSI_DATA_VECTOR(cfg->vector);
+	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+
+	hpet_msi_write(irq, &msg);
+	desc = irq_to_desc(irq);
+	desc->affinity = mask;
+}
+#endif /* CONFIG_SMP */
+
+struct irq_chip hpet_msi_type = {
+	.name = "HPET_MSI",
+	.unmask = hpet_msi_unmask,
+	.mask = hpet_msi_mask,
+	.ack = ack_apic_edge,
+#ifdef CONFIG_SMP
+	.set_affinity = hpet_msi_set_affinity,
+#endif
+	.retrigger = ioapic_retrigger_irq,
+};
+
+int arch_setup_hpet_msi(unsigned int irq)
+{
+	int ret;
+	struct msi_msg msg;
+
+	ret = msi_compose_msg(NULL, irq, &msg);
+	if (ret < 0)
+		return ret;
+
+	hpet_msi_write(irq, &msg);
+	set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq,
+		"edge");
+	return 0;
+}
+#endif
+
 #endif /* CONFIG_PCI_MSI */
 /*
  * Hypertransport interrupt support
diff --git a/include/asm-x86/hpet.h b/include/asm-x86/hpet.h
index cbbbb6d4dd32..58b273f6ef07 100644
--- a/include/asm-x86/hpet.h
+++ b/include/asm-x86/hpet.h
@@ -1,6 +1,8 @@
 #ifndef ASM_X86__HPET_H
 #define ASM_X86__HPET_H
 
+#include <linux/msi.h>
+
 #ifdef CONFIG_HPET_TIMER
 
 #define HPET_MMAP_SIZE		1024
@@ -10,6 +12,11 @@
 #define HPET_CFG		0x010
 #define HPET_STATUS		0x020
 #define HPET_COUNTER		0x0f0
+
+#define HPET_Tn_CFG(n)		(0x100 + 0x20 * n)
+#define HPET_Tn_CMP(n)		(0x108 + 0x20 * n)
+#define HPET_Tn_ROUTE(n)	(0x110 + 0x20 * n)
+
 #define HPET_T0_CFG		0x100
 #define HPET_T0_CMP		0x108
 #define HPET_T0_ROUTE		0x110
@@ -65,6 +72,20 @@ extern void hpet_disable(void);
 extern unsigned long hpet_readl(unsigned long a);
 extern void force_hpet_resume(void);
 
+extern void hpet_msi_unmask(unsigned int irq);
+extern void hpet_msi_mask(unsigned int irq);
+extern void hpet_msi_write(unsigned int irq, struct msi_msg *msg);
+extern void hpet_msi_read(unsigned int irq, struct msi_msg *msg);
+
+#ifdef CONFIG_PCI_MSI
+extern int arch_setup_hpet_msi(unsigned int irq);
+#else
+static inline int arch_setup_hpet_msi(unsigned int irq)
+{
+	return -EINVAL;
+}
+#endif
+
 #ifdef CONFIG_HPET_EMULATE_RTC
 
 #include <linux/interrupt.h>
-- 
GitLab


From 4588c1f0354ac96a358b3f9e8e4331c51cf3336f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 6 Sep 2008 14:19:17 +0200
Subject: [PATCH 488/895] x86: HPET_MSI Basic HPET_MSI setup code, cleanups

small style cleanups.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/hpet.c | 50 +++++++++++++++++++++---------------------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 3f10d16a8348..03d3655734b4 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -1,39 +1,39 @@
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
+#include <linux/interrupt.h>
+#include <linux/sysdev.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/hpet.h>
 #include <linux/init.h>
-#include <linux/sysdev.h>
-#include <linux/pm.h>
-#include <linux/interrupt.h>
 #include <linux/cpu.h>
+#include <linux/pm.h>
+#include <linux/io.h>
 
 #include <asm/fixmap.h>
-#include <asm/hpet.h>
 #include <asm/i8253.h>
-#include <asm/io.h>
+#include <asm/hpet.h>
 
-#define HPET_MASK	CLOCKSOURCE_MASK(32)
-#define HPET_SHIFT	22
+#define HPET_MASK			CLOCKSOURCE_MASK(32)
+#define HPET_SHIFT			22
 
 /* FSEC = 10^-15
    NSEC = 10^-9 */
-#define FSEC_PER_NSEC	1000000L
+#define FSEC_PER_NSEC			1000000L
 
 /*
  * HPET address is set in acpi/boot.c, when an ACPI entry exists
  */
-unsigned long hpet_address;
-static void __iomem *hpet_virt_address;
+unsigned long				hpet_address;
+static void __iomem			*hpet_virt_address;
 
 struct hpet_dev {
-	struct clock_event_device evt;
-	unsigned int num;
-	int cpu;
-	unsigned int irq;
-	unsigned int flags;
-	char name[10];
+	struct clock_event_device	evt;
+	unsigned int			num;
+	int				cpu;
+	unsigned int			irq;
+	unsigned int			flags;
+	char				name[10];
 };
 
 unsigned long hpet_readl(unsigned long a)
@@ -70,7 +70,7 @@ static inline void hpet_clear_mapping(void)
 static int boot_hpet_disable;
 int hpet_force_user;
 
-static int __init hpet_setup(char* str)
+static int __init hpet_setup(char *str)
 {
 	if (str) {
 		if (!strncmp("disable", str, 7))
@@ -91,7 +91,7 @@ __setup("nohpet", disable_hpet);
 
 static inline int is_hpet_capable(void)
 {
-	return (!boot_hpet_disable && hpet_address);
+	return !boot_hpet_disable && hpet_address;
 }
 
 /*
@@ -122,10 +122,10 @@ static void hpet_reserve_platform_timers(unsigned long id)
 
 	nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
 
-	memset(&hd, 0, sizeof (hd));
-	hd.hd_phys_address = hpet_address;
-	hd.hd_address = hpet;
-	hd.hd_nirqs = nrtimers;
+	memset(&hd, 0, sizeof(hd));
+	hd.hd_phys_address	= hpet_address;
+	hd.hd_address		= hpet;
+	hd.hd_nirqs		= nrtimers;
 	hpet_reserve_timer(&hd, 0);
 
 #ifdef CONFIG_HPET_EMULATE_RTC
@@ -141,8 +141,8 @@ static void hpet_reserve_platform_timers(unsigned long id)
 	hd.hd_irq[1] = HPET_LEGACY_RTC;
 
 	for (i = 2; i < nrtimers; timer++, i++) {
-		hd.hd_irq[i] = (readl(&timer->hpet_config) & Tn_INT_ROUTE_CNF_MASK) >>
-			Tn_INT_ROUTE_CNF_SHIFT;
+		hd.hd_irq[i] = (readl(&timer->hpet_config) &
+			Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT;
 	}
 
 	hpet_alloc(&hd);
@@ -244,7 +244,7 @@ static void hpet_set_mode(enum clock_event_mode mode,
 	unsigned long cfg, cmp, now;
 	uint64_t delta;
 
-	switch(mode) {
+	switch (mode) {
 	case CLOCK_EVT_MODE_PERIODIC:
 		delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult;
 		delta >>= evt->shift;
-- 
GitLab


From 26afe5f2fbf06ea0765aaa316640c4dd472310c0 Mon Sep 17 00:00:00 2001
From: "venkatesh.pallipadi@intel.com" <venkatesh.pallipadi@intel.com>
Date: Fri, 5 Sep 2008 18:02:18 -0700
Subject: [PATCH 489/895] x86: HPET_MSI Initialise per-cpu HPET timers

Initialize a per CPU HPET MSI timer when possible. We retain the HPET
timer 0 (IRQ 0) and timer 1 (IRQ 8) as is when legacy mode is being used. We
setup the remaining HPET timers as per CPU MSI based timers. This per CPU
timer will eliminate the need for timer broadcasting with IRQ 0 when there
is non-functional LAPIC timer across CPU deep C-states.

If there are more CPUs than number of available timers, CPUs that do not
find any timer to use will continue using LAPIC and IRQ 0 broadcast.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/hpet.c | 295 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 293 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 03d3655734b4..31e9191b7e19 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -21,10 +21,19 @@
    NSEC = 10^-9 */
 #define FSEC_PER_NSEC			1000000L
 
+#define HPET_DEV_USED_BIT		2
+#define HPET_DEV_USED			(1 << HPET_DEV_USED_BIT)
+#define HPET_DEV_VALID			0x8
+#define HPET_DEV_FSB_CAP		0x1000
+#define HPET_DEV_PERI_CAP		0x2000
+
+#define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt)
+
 /*
  * HPET address is set in acpi/boot.c, when an ACPI entry exists
  */
 unsigned long				hpet_address;
+unsigned long				hpet_num_timers;
 static void __iomem			*hpet_virt_address;
 
 struct hpet_dev {
@@ -36,6 +45,10 @@ struct hpet_dev {
 	char				name[10];
 };
 
+static struct hpet_dev			*hpet_devs;
+
+static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev);
+
 unsigned long hpet_readl(unsigned long a)
 {
 	return readl(hpet_virt_address + a);
@@ -145,6 +158,16 @@ static void hpet_reserve_platform_timers(unsigned long id)
 			Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT;
 	}
 
+	for (i = 0; i < nrtimers; i++) {
+		struct hpet_dev *hdev = &hpet_devs[i];
+
+		if (!(hdev->flags & HPET_DEV_VALID))
+			continue;
+
+		hd.hd_irq[hdev->num] = hdev->irq;
+		hpet_reserve_timer(&hd, hdev->num);
+	}
+
 	hpet_alloc(&hd);
 
 }
@@ -238,6 +261,8 @@ static void hpet_legacy_clockevent_register(void)
 	printk(KERN_DEBUG "hpet clockevent registered\n");
 }
 
+static int hpet_setup_msi_irq(unsigned int irq);
+
 static void hpet_set_mode(enum clock_event_mode mode,
 			  struct clock_event_device *evt, int timer)
 {
@@ -279,7 +304,15 @@ static void hpet_set_mode(enum clock_event_mode mode,
 		break;
 
 	case CLOCK_EVT_MODE_RESUME:
-		hpet_enable_legacy_int();
+		if (timer == 0) {
+			hpet_enable_legacy_int();
+		} else {
+			struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
+			hpet_setup_msi_irq(hdev->irq);
+			disable_irq(hdev->irq);
+			irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu));
+			enable_irq(hdev->irq);
+		}
 		break;
 	}
 }
@@ -318,7 +351,7 @@ static int hpet_legacy_next_event(unsigned long delta,
 /*
  * HPET MSI Support
  */
-
+#ifdef CONFIG_PCI_MSI
 void hpet_msi_unmask(unsigned int irq)
 {
 	struct hpet_dev *hdev = get_irq_data(irq);
@@ -358,6 +391,253 @@ void hpet_msi_read(unsigned int irq, struct msi_msg *msg)
 	msg->address_hi = 0;
 }
 
+static void hpet_msi_set_mode(enum clock_event_mode mode,
+				struct clock_event_device *evt)
+{
+	struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
+	hpet_set_mode(mode, evt, hdev->num);
+}
+
+static int hpet_msi_next_event(unsigned long delta,
+				struct clock_event_device *evt)
+{
+	struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
+	return hpet_next_event(delta, evt, hdev->num);
+}
+
+static int hpet_setup_msi_irq(unsigned int irq)
+{
+	if (arch_setup_hpet_msi(irq)) {
+		destroy_irq(irq);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int hpet_assign_irq(struct hpet_dev *dev)
+{
+	unsigned int irq;
+
+	irq = create_irq();
+	if (!irq)
+		return -EINVAL;
+
+	set_irq_data(irq, dev);
+
+	if (hpet_setup_msi_irq(irq))
+		return -EINVAL;
+
+	dev->irq = irq;
+	return 0;
+}
+
+static irqreturn_t hpet_interrupt_handler(int irq, void *data)
+{
+	struct hpet_dev *dev = (struct hpet_dev *)data;
+	struct clock_event_device *hevt = &dev->evt;
+
+	if (!hevt->event_handler) {
+		printk(KERN_INFO "Spurious HPET timer interrupt on HPET timer %d\n",
+				dev->num);
+		return IRQ_HANDLED;
+	}
+
+	hevt->event_handler(hevt);
+	return IRQ_HANDLED;
+}
+
+static int hpet_setup_irq(struct hpet_dev *dev)
+{
+
+	if (request_irq(dev->irq, hpet_interrupt_handler,
+			IRQF_SHARED|IRQF_NOBALANCING, dev->name, dev))
+		return -1;
+
+	disable_irq(dev->irq);
+	irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu));
+	enable_irq(dev->irq);
+
+	return 0;
+}
+
+/* This should be called in specific @cpu */
+static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
+{
+	struct clock_event_device *evt = &hdev->evt;
+	uint64_t hpet_freq;
+
+	WARN_ON(cpu != smp_processor_id());
+	if (!(hdev->flags & HPET_DEV_VALID))
+		return;
+
+	if (hpet_setup_msi_irq(hdev->irq))
+		return;
+
+	hdev->cpu = cpu;
+	per_cpu(cpu_hpet_dev, cpu) = hdev;
+	evt->name = hdev->name;
+	hpet_setup_irq(hdev);
+	evt->irq = hdev->irq;
+
+	evt->rating = 110;
+	evt->features = CLOCK_EVT_FEAT_ONESHOT;
+	if (hdev->flags & HPET_DEV_PERI_CAP)
+		evt->features |= CLOCK_EVT_FEAT_PERIODIC;
+
+	evt->set_mode = hpet_msi_set_mode;
+	evt->set_next_event = hpet_msi_next_event;
+	evt->shift = 32;
+
+	/*
+	 * The period is a femto seconds value. We need to calculate the
+	 * scaled math multiplication factor for nanosecond to hpet tick
+	 * conversion.
+	 */
+	hpet_freq = 1000000000000000ULL;
+	do_div(hpet_freq, hpet_period);
+	evt->mult = div_sc((unsigned long) hpet_freq,
+				      NSEC_PER_SEC, evt->shift);
+	/* Calculate the max delta */
+	evt->max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, evt);
+	/* 5 usec minimum reprogramming delta. */
+	evt->min_delta_ns = 5000;
+
+	evt->cpumask = cpumask_of_cpu(hdev->cpu);
+	clockevents_register_device(evt);
+}
+
+#ifdef CONFIG_HPET
+/* Reserve at least one timer for userspace (/dev/hpet) */
+#define RESERVE_TIMERS 1
+#else
+#define RESERVE_TIMERS 0
+#endif
+void hpet_msi_capability_lookup(unsigned int start_timer)
+{
+	unsigned int id;
+	unsigned int num_timers;
+	unsigned int num_timers_used = 0;
+	int i;
+
+	id = hpet_readl(HPET_ID);
+
+	num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT);
+	num_timers++; /* Value read out starts from 0 */
+
+	hpet_devs = kzalloc(sizeof(struct hpet_dev) * num_timers, GFP_KERNEL);
+	if (!hpet_devs)
+		return;
+
+	hpet_num_timers = num_timers;
+
+	for (i = start_timer; i < num_timers - RESERVE_TIMERS; i++) {
+		struct hpet_dev *hdev = &hpet_devs[num_timers_used];
+		unsigned long cfg = hpet_readl(HPET_Tn_CFG(i));
+
+		/* Only consider HPET timer with MSI support */
+		if (!(cfg & HPET_TN_FSB_CAP))
+			continue;
+
+		hdev->flags = 0;
+		if (cfg & HPET_TN_PERIODIC_CAP)
+			hdev->flags |= HPET_DEV_PERI_CAP;
+		hdev->num = i;
+
+		sprintf(hdev->name, "hpet%d", i);
+		if (hpet_assign_irq(hdev))
+			continue;
+
+		hdev->flags |= HPET_DEV_FSB_CAP;
+		hdev->flags |= HPET_DEV_VALID;
+		num_timers_used++;
+		if (num_timers_used == num_possible_cpus())
+			break;
+	}
+
+	printk(KERN_INFO "HPET: %d timers in total, %d timers will be used for per-cpu timer\n",
+		num_timers, num_timers_used);
+}
+
+static struct hpet_dev *hpet_get_unused_timer(void)
+{
+	int i;
+
+	if (!hpet_devs)
+		return NULL;
+
+	for (i = 0; i < hpet_num_timers; i++) {
+		struct hpet_dev *hdev = &hpet_devs[i];
+
+		if (!(hdev->flags & HPET_DEV_VALID))
+			continue;
+		if (test_and_set_bit(HPET_DEV_USED_BIT,
+			(unsigned long *)&hdev->flags))
+			continue;
+		return hdev;
+	}
+	return NULL;
+}
+
+struct hpet_work_struct {
+	struct delayed_work work;
+	struct completion complete;
+};
+
+static void hpet_work(struct work_struct *w)
+{
+	struct hpet_dev *hdev;
+	int cpu = smp_processor_id();
+	struct hpet_work_struct *hpet_work;
+
+	hpet_work = container_of(w, struct hpet_work_struct, work.work);
+
+	hdev = hpet_get_unused_timer();
+	if (hdev)
+		init_one_hpet_msi_clockevent(hdev, cpu);
+
+	complete(&hpet_work->complete);
+}
+
+static int hpet_cpuhp_notify(struct notifier_block *n,
+		unsigned long action, void *hcpu)
+{
+	unsigned long cpu = (unsigned long)hcpu;
+	struct hpet_work_struct work;
+	struct hpet_dev *hdev = per_cpu(cpu_hpet_dev, cpu);
+
+	switch (action & 0xf) {
+	case CPU_ONLINE:
+		INIT_DELAYED_WORK(&work.work, hpet_work);
+		init_completion(&work.complete);
+		/* FIXME: add schedule_work_on() */
+		schedule_delayed_work_on(cpu, &work.work, 0);
+		wait_for_completion(&work.complete);
+		break;
+	case CPU_DEAD:
+		if (hdev) {
+			free_irq(hdev->irq, hdev);
+			hdev->flags &= ~HPET_DEV_USED;
+			per_cpu(cpu_hpet_dev, cpu) = NULL;
+		}
+		break;
+	}
+	return NOTIFY_OK;
+}
+#else
+
+void hpet_msi_capability_lookup(unsigned int start_timer)
+{
+	return;
+}
+
+static int hpet_cpuhp_notify(struct notifier_block *n,
+		unsigned long action, void *hcpu)
+{
+	return NOTIFY_OK;
+}
+
+#endif
+
 /*
  * Clock source related code
  */
@@ -493,8 +773,10 @@ int __init hpet_enable(void)
 
 	if (id & HPET_ID_LEGSUP) {
 		hpet_legacy_clockevent_register();
+		hpet_msi_capability_lookup(2);
 		return 1;
 	}
+	hpet_msi_capability_lookup(0);
 	return 0;
 
 out_nohpet:
@@ -511,6 +793,8 @@ out_nohpet:
  */
 static __init int hpet_late_init(void)
 {
+	int cpu;
+
 	if (boot_hpet_disable)
 		return -ENODEV;
 
@@ -526,6 +810,13 @@ static __init int hpet_late_init(void)
 
 	hpet_reserve_platform_timers(hpet_readl(HPET_ID));
 
+	for_each_online_cpu(cpu) {
+		hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu);
+	}
+
+	/* This notifier should be called after workqueue is ready */
+	hotcpu_notifier(hpet_cpuhp_notify, -20);
+
 	return 0;
 }
 fs_initcall(hpet_late_init);
-- 
GitLab


From 3c2cbd2490656fb4b6ede586c557a2b09811a432 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Sat, 6 Sep 2008 14:15:33 +0400
Subject: [PATCH 490/895] x86: io-apic - code style cleaning for
 setup_IO_APIC_irqs

By changing printout form we are able to shrink (and clean up) code a bit.

Former printout example:

	init IO_APIC IRQs
	 IO-APIC (apicid-pin) 1-1, 1-2, 1-3 not connected.
	 IO-APIC (apicid-pin) 2-1, 2-2, 2-3 not connected.

New printout example:

	init IO_APIC IRQs
	 1-1 1-2 1-3 (apicid-pin) not connected
	 2-1 2-2 2-3 (apicid-pin) not connected

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 53 +++++++++++++++++++++------------------
 1 file changed, 28 insertions(+), 25 deletions(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 77fa155becf6..4040d575a21e 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -1518,41 +1518,44 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
 
 static void __init setup_IO_APIC_irqs(void)
 {
-	int apic, pin, idx, irq, first_notcon = 1;
+	int apic, pin, idx, irq;
+	int notcon = 0;
 
 	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 
 	for (apic = 0; apic < nr_ioapics; apic++) {
-	for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-
-		idx = find_irq_entry(apic,pin,mp_INT);
-		if (idx == -1) {
-			if (first_notcon) {
-				apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin);
-				first_notcon = 0;
-			} else
-				apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin);
-			continue;
-		}
-		if (!first_notcon) {
-			apic_printk(APIC_VERBOSE, " not connected.\n");
-			first_notcon = 1;
-		}
+		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+
+			idx = find_irq_entry(apic, pin, mp_INT);
+			if (idx == -1) {
+				apic_printk(APIC_VERBOSE,
+					KERN_DEBUG " %d-%d",
+					mp_ioapics[apic].mp_apicid, pin);
+				if (!notcon)
+					notcon = 1;
+				continue;
+			}
 
-		irq = pin_2_irq(idx, apic, pin);
+			irq = pin_2_irq(idx, apic, pin);
 #ifdef CONFIG_X86_32
-                if (multi_timer_check(apic, irq))
-                        continue;
+			if (multi_timer_check(apic, irq))
+				continue;
 #endif
-		add_pin_to_irq(irq, apic, pin);
+			add_pin_to_irq(irq, apic, pin);
 
-		setup_IO_APIC_irq(apic, pin, irq,
-				  irq_trigger(idx), irq_polarity(idx));
-	}
+			setup_IO_APIC_irq(apic, pin, irq,
+					irq_trigger(idx), irq_polarity(idx));
+		}
+		if (notcon) {
+			apic_printk(APIC_VERBOSE,
+				KERN_DEBUG " (apicid-pin) not connected\n");
+			notcon = 0;
+		}
 	}
 
-	if (!first_notcon)
-		apic_printk(APIC_VERBOSE, " not connected.\n");
+	if (notcon)
+		apic_printk(APIC_VERBOSE,
+			KERN_DEBUG " (apicid-pin) not connected\n");
 }
 
 /*
-- 
GitLab


From 9e6cad9b1230b5563c4b38335bf1ed0f73c5d74a Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sat, 6 Sep 2008 10:26:50 -0700
Subject: [PATCH 491/895] dyn_array: remove one panic

Andrew said, we don't need duplicated panic.
because __alloc_bootmem already have that.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 init/dyn_array.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/init/dyn_array.c b/init/dyn_array.c
index cf1e04c83b33..778d9d508158 100644
--- a/init/dyn_array.c
+++ b/init/dyn_array.c
@@ -33,11 +33,9 @@ void __init pre_alloc_dyn_array(void)
 
 	/* allocate them all together */
 	max_align = max_t(unsigned long, max_align, PAGE_SIZE);
-	ptr = __alloc_bootmem_nopanic(total_size, max_align, 0);
-	if (!ptr)
-		panic("Can not alloc dyn_alloc\n");
-
+	ptr = __alloc_bootmem(total_size, max_align, 0);
 	phys = virt_to_phys(ptr);
+
 	for (daa = __dyn_array_start ; daa < __dyn_array_end; daa++) {
 		struct dyn_array *da = *daa;
 
-- 
GitLab


From 79c09698cac8df16c5539447d5e1047fde9742e5 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sun, 7 Sep 2008 17:58:57 -0700
Subject: [PATCH 492/895] x86: lapic address print out like io apic addr

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 0556f375e40b..f3f50858048e 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -1548,7 +1548,7 @@ void __init init_apic_mappings(void)
 		apic_phys = mp_lapic_addr;
 
 	set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
-	apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
+	apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n",
 				APIC_BASE, apic_phys);
 
 	/*
-- 
GitLab


From 2a554fb132cf804477087057b9b0ff2162984507 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Mon, 8 Sep 2008 19:38:06 +0400
Subject: [PATCH 493/895] x86: io-apic - do not use KERN_DEBUG marker too much

Do not use KERN_DEBUG several times on the same line being printed.
Introduced by mine previous patch, sorry.

Reported-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 4040d575a21e..12e59df026db 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -1528,11 +1528,16 @@ static void __init setup_IO_APIC_irqs(void)
 
 			idx = find_irq_entry(apic, pin, mp_INT);
 			if (idx == -1) {
-				apic_printk(APIC_VERBOSE,
-					KERN_DEBUG " %d-%d",
-					mp_ioapics[apic].mp_apicid, pin);
-				if (!notcon)
+				if (!notcon) {
 					notcon = 1;
+					apic_printk(APIC_VERBOSE,
+						KERN_DEBUG " %d-%d",
+						mp_ioapics[apic].mp_apicid,
+						pin);
+				} else
+					apic_printk(APIC_VERBOSE, " %d-%d",
+						mp_ioapics[apic].mp_apicid,
+						pin);
 				continue;
 			}
 
@@ -1548,14 +1553,14 @@ static void __init setup_IO_APIC_irqs(void)
 		}
 		if (notcon) {
 			apic_printk(APIC_VERBOSE,
-				KERN_DEBUG " (apicid-pin) not connected\n");
+				" (apicid-pin) not connected\n");
 			notcon = 0;
 		}
 	}
 
 	if (notcon)
 		apic_printk(APIC_VERBOSE,
-			KERN_DEBUG " (apicid-pin) not connected\n");
+			" (apicid-pin) not connected\n");
 }
 
 /*
-- 
GitLab


From f0ed4e695faf6766927c8cfbda2bc7530c7210c2 Mon Sep 17 00:00:00 2001
From: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Mon, 8 Sep 2008 10:18:40 -0700
Subject: [PATCH 494/895] x86: using HPET in MSI mode and setting up per CPU
 HPET timers, fix

On Sat, Sep 06, 2008 at 06:03:53AM -0700, Ingo Molnar wrote:
>
> it crashes two testsystems, the fault on a NULL pointer in hpet init,
> with:
>
> initcall print_all_ICs+0x0/0x520 returned 0 after 26 msecs
> calling  hpet_late_init+0x0/0x1c0
> BUG: unable to handle kernel NULL pointer dereference at 000000000000008c
> IP: [<ffffffff80d228be>] hpet_late_init+0xfe/0x1c0
> PGD 0
> Oops: 0000 [1] SMP
> CPU 0
> Modules linked in:
> Pid: 1, comm: swapper Not tainted 2.6.27-rc5 #29725
> RIP: 0010:[<ffffffff80d228be>]  [<ffffffff80d228be>] hpet_late_init+0xfe/0x1c0
> RSP: 0018:ffff88003fa07dd0  EFLAGS: 00010246
> RAX: 0000000000000000 RBX: 0000000000000003 RCX: 0000000000000000
> RDX: ffffc20000000160 RSI: 0000000000000000 RDI: 0000000000000003
> RBP: ffff88003fa07e90 R08: 0000000000000000 R09: ffff88003fa07dd0
> R10: 0000000000000001 R11: 0000000000000000 R12: ffff88003fa07dd0
> R13: 0000000000000002 R14: ffffc20000000000 R15: 000000006f57e511
> FS:  0000000000000000(0000) GS:ffffffff80cf6a80(0000) knlGS:0000000000000000
> CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
> CR2: 000000000000008c CR3: 0000000000201000 CR4: 00000000000006e0
> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
> Process swapper (pid: 1, threadinfo ffff88003fa06000, task ffff88003fa08000)
> Stack:  00000000fed00000 ffffc20000000000 0000000100000003 0000000800000002
>  0000000000000000 0000000000000000 0000000000000000 0000000000000000
>  0000000000000000 0000000000000000 0000000000000000 0000000000000000
> Call Trace:
>  [<ffffffff80d227c0>] ? hpet_late_init+0x0/0x1c0
>  [<ffffffff80209045>] do_one_initcall+0x45/0x190
>  [<ffffffff80296f39>] ? register_irq_proc+0x19/0xe0
>  [<ffffffff80d0d140>] ? early_idt_handler+0x0/0x73
>  [<ffffffff80d0dabc>] kernel_init+0x14c/0x1b0
>  [<ffffffff80942ac1>] ? trace_hardirqs_on_thunk+0x3a/0x3f
>  [<ffffffff8020dbd9>] child_rip+0xa/0x11
>  [<ffffffff8020ceee>] ? restore_args+0x0/0x30
>  [<ffffffff80d0d970>] ? kernel_init+0x0/0x1b0
>  [<ffffffff8020dbcf>] ? child_rip+0x0/0x11
> Code: 20 48 83 c1 01 48 39 f1 75 e3 44 89 e8 4c 8b 05 29 29 22 00 31 f6 48 8d 78 01 66 66 90 89 f0 48 8d 04 80 48 c1 e0 05 4a 8d 0c 00 <f6> 81 8c 00 00 00 08 74 26 8b 81 80 00 00 00 8b 91 88 00 00 00
> RIP  [<ffffffff80d228be>] hpet_late_init+0xfe/0x1c0
>  RSP <ffff88003fa07dd0>
> CR2: 000000000000008c
> Kernel panic - not syncing: Fatal exception

There was one code path, with CONFIG_PCI_MSI disabled, where we were accessing
hpet_devs without initialization. That resulted in the above crash. The change
below adds a check for hpet_devs.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/hpet.c | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 31e9191b7e19..01005aeda7d9 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -126,6 +126,24 @@ EXPORT_SYMBOL_GPL(is_hpet_enabled);
  * timer 0 and timer 1 in case of RTC emulation.
  */
 #ifdef CONFIG_HPET
+static void hpet_reserve_msi_timers(struct hpet_data *hd)
+{
+	int i;
+
+	if (!hpet_devs)
+		return;
+
+	for (i = 0; i < hpet_num_timers; i++) {
+		struct hpet_dev *hdev = &hpet_devs[i];
+
+		if (!(hdev->flags & HPET_DEV_VALID))
+			continue;
+
+		hd->hd_irq[hdev->num] = hdev->irq;
+		hpet_reserve_timer(hd, hdev->num);
+	}
+}
+
 static void hpet_reserve_platform_timers(unsigned long id)
 {
 	struct hpet __iomem *hpet = hpet_virt_address;
@@ -158,15 +176,7 @@ static void hpet_reserve_platform_timers(unsigned long id)
 			Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT;
 	}
 
-	for (i = 0; i < nrtimers; i++) {
-		struct hpet_dev *hdev = &hpet_devs[i];
-
-		if (!(hdev->flags & HPET_DEV_VALID))
-			continue;
-
-		hd.hd_irq[hdev->num] = hdev->irq;
-		hpet_reserve_timer(&hd, hdev->num);
-	}
+	hpet_reserve_msi_timers(&hd);
 
 	hpet_alloc(&hd);
 
-- 
GitLab


From ba374c9baef910fbc5373541d98c50f15e82c3f8 Mon Sep 17 00:00:00 2001
From: Steven Noonan <steven@uplinklabs.net>
Date: Mon, 8 Sep 2008 16:19:09 -0700
Subject: [PATCH 495/895] x86: fix HPET compiler error when not using
 CONFIG_PCI_MSI

Added dummy function for hpet_setup_msi_irq().

Signed-off-by: Steven Noonan <steven@uplinklabs.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/hpet.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 01005aeda7d9..422c577ef691 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -635,6 +635,10 @@ static int hpet_cpuhp_notify(struct notifier_block *n,
 }
 #else
 
+static int hpet_setup_msi_irq(unsigned int irq)
+{
+	return 0;
+}
 void hpet_msi_capability_lookup(unsigned int start_timer)
 {
 	return;
-- 
GitLab


From 87783be4c26d79f5cde245e6e8a9fd2b295e92d7 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Wed, 10 Sep 2008 22:19:50 +0400
Subject: [PATCH 496/895] x86: io-apic - get rid of __DO_ACTION macro

Replace __DO_ACTION macro with io_apic_modify_irq function.
This allow us to 'grep' definitions being hided by
__DO_ACTION macro:

	__unmask_IO_APIC_irq
	__mask_IO_APIC_irq
	__mask_and_edge_IO_APIC_irq
	__unmask_and_level_IO_APIC_irq

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Acked-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 103 +++++++++++++++++++-------------------
 1 file changed, 52 insertions(+), 51 deletions(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 12e59df026db..ac47384724e3 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -643,65 +643,66 @@ static void __init replace_pin_at_irq(unsigned int irq,
 		add_pin_to_irq(irq, newapic, newpin);
 }
 
-#define __DO_ACTION(R, ACTION_ENABLE, ACTION_DISABLE, FINAL)		\
-									\
-{									\
-	int pin;							\
-	struct irq_cfg *cfg;						\
-	struct irq_pin_list *entry;					\
-									\
-	cfg = irq_cfg(irq);						\
-	entry = cfg->irq_2_pin;						\
-	for (;;) {							\
-		unsigned int reg;					\
-		if (!entry)						\
-			break;						\
-		pin = entry->pin;					\
-		reg = io_apic_read(entry->apic, 0x10 + R + pin*2);	\
-		reg ACTION_DISABLE;					\
-		reg ACTION_ENABLE;					\
-		io_apic_modify(entry->apic, 0x10 + R + pin*2, reg);	\
-		FINAL;							\
-		if (!entry->next)					\
-			break;						\
-		entry = entry->next;					\
-	}								\
-}
-
-#define DO_ACTION(name,R, ACTION_ENABLE, ACTION_DISABLE, FINAL)		\
-									\
-	static void name##_IO_APIC_irq (unsigned int irq)		\
-	__DO_ACTION(R, ACTION_ENABLE, ACTION_DISABLE, FINAL)
-
-/* mask = 0 */
-DO_ACTION(__unmask,	0, |= 0, &= ~IO_APIC_REDIR_MASKED, )
+static inline void io_apic_modify_irq(unsigned int irq,
+				int mask_and, int mask_or,
+				void (*final)(struct irq_pin_list *entry))
+{
+	int pin;
+	struct irq_cfg *cfg;
+	struct irq_pin_list *entry;
+
+	cfg = irq_cfg(irq);
+	for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
+		unsigned int reg;
+		pin = entry->pin;
+		reg = io_apic_read(entry->apic, 0x10 + pin * 2);
+		reg &= mask_and;
+		reg |= mask_or;
+		io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
+		if (final)
+			final(entry);
+	}
+}
+
+static void __unmask_IO_APIC_irq(unsigned int irq)
+{
+	io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL);
+}
 
 #ifdef CONFIG_X86_64
-/*
- * Synchronize the IO-APIC and the CPU by doing
- * a dummy read from the IO-APIC
- */
-static inline void io_apic_sync(unsigned int apic)
+void io_apic_sync(struct irq_pin_list *entry)
 {
-	struct io_apic __iomem *io_apic = io_apic_base(apic);
+	/*
+	 * Synchronize the IO-APIC and the CPU by doing
+	 * a dummy read from the IO-APIC
+	 */
+	struct io_apic __iomem *io_apic;
+	io_apic = io_apic_base(entry->apic);
 	readl(&io_apic->data);
 }
 
-/* mask = 1 */
-DO_ACTION(__mask,	0, |= IO_APIC_REDIR_MASKED, &= ~0, io_apic_sync(entry->apic))
-
-#else
-
-/* mask = 1 */
-DO_ACTION(__mask,	0, |= IO_APIC_REDIR_MASKED, &= ~0, )
-
-/* mask = 1, trigger = 0 */
-DO_ACTION(__mask_and_edge, 0, |= IO_APIC_REDIR_MASKED, &= ~IO_APIC_REDIR_LEVEL_TRIGGER, )
+static void __mask_IO_APIC_irq(unsigned int irq)
+{
+	io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
+}
+#else /* CONFIG_X86_32 */
+static void __mask_IO_APIC_irq(unsigned int irq)
+{
+	io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL);
+}
 
-/* mask = 0, trigger = 1 */
-DO_ACTION(__unmask_and_level, 0, |= IO_APIC_REDIR_LEVEL_TRIGGER, &= ~IO_APIC_REDIR_MASKED, )
+static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
+{
+	io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER,
+			IO_APIC_REDIR_MASKED, NULL);
+}
 
-#endif
+static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
+{
+	io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED,
+			IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
+}
+#endif /* CONFIG_X86_32 */
 
 static void mask_IO_APIC_irq (unsigned int irq)
 {
-- 
GitLab


From 823b259b80158a5fb694f6784e18b5bae669c599 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Wed, 10 Sep 2008 21:56:46 -0700
Subject: [PATCH 497/895] x86: print out apic id in hex format

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic.c      | 2 +-
 arch/x86/kernel/cpu/amd.c   | 2 +-
 arch/x86/kernel/cpu/intel.c | 2 +-
 arch/x86/kernel/smpboot.c   | 6 +++---
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index f3f50858048e..11cb18639581 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -1586,7 +1586,7 @@ int __init APIC_init_uniprocessor(void)
 	 */
 	if (!cpu_has_apic &&
 	    APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
-		printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
+		printk(KERN_ERR "BIOS bug, local APIC 0x%x not detected!...\n",
 		       boot_cpu_physical_apicid);
 		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
 		return -1;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 32e73520adf7..8f1e31db2ad5 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -249,7 +249,7 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 	}
 	numa_set_node(cpu, node);
 
-	printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
+	printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node);
 #endif
 }
 
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 99468dbd08da..cce0b6118d55 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -174,7 +174,7 @@ static void __cpuinit srat_detect_node(void)
 		node = first_node(node_online_map);
 	numa_set_node(cpu, node);
 
-	printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
+	printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node);
 #endif
 }
 
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 8c3aca7cb343..f84de2ff933c 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -543,10 +543,10 @@ static inline void __inquire_remote_apic(int apicid)
 	int timeout;
 	u32 status;
 
-	printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid);
+	printk(KERN_INFO "Inquiring remote APIC 0x%x...\n", apicid);
 
 	for (i = 0; i < ARRAY_SIZE(regs); i++) {
-		printk(KERN_INFO "... APIC #%d %s: ", apicid, names[i]);
+		printk(KERN_INFO "... APIC 0x%x %s: ", apicid, names[i]);
 
 		/*
 		 * Wait for idle.
@@ -874,7 +874,7 @@ do_rest:
 	start_ip = setup_trampoline();
 
 	/* So we see what's up   */
-	printk(KERN_INFO "Booting processor %d/%d ip %lx\n",
+	printk(KERN_INFO "Booting processor %d APIC 0x%x ip 0x%lx\n",
 			  cpu, apicid, start_ip);
 
 	/*
-- 
GitLab


From c87695ea74f21d180e1df3a1d00ac3dd432ea03b Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sun, 14 Sep 2008 02:33:11 -0700
Subject: [PATCH 498/895] dyn_array: use %pF instead of
 print_fn_descriptor_symbol

... and tidy up the printouts. Suggested by Andrew Morton.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 init/dyn_array.c | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/init/dyn_array.c b/init/dyn_array.c
index 778d9d508158..c8d5e2a18588 100644
--- a/init/dyn_array.c
+++ b/init/dyn_array.c
@@ -17,10 +17,9 @@ void __init pre_alloc_dyn_array(void)
 	for (daa = __dyn_array_start ; daa < __dyn_array_end; daa++) {
 		struct dyn_array *da = *daa;
 
+		printk(KERN_DEBUG "dyn_array %pF size:%#lx nr:%d align:%#lx\n",
+			da->name, da->size, *da->nr, da->align);
 		size = da->size * (*da->nr);
-		print_fn_descriptor_symbol("dyn_array %s ", da->name);
-		printk(KERN_CONT "size:%#lx nr:%d align:%#lx\n",
-			da->size, *da->nr, da->align);
 		total_size += roundup(size, da->align);
 		if (da->align > max_align)
 			max_align = da->align;
@@ -40,11 +39,10 @@ void __init pre_alloc_dyn_array(void)
 		struct dyn_array *da = *daa;
 
 		size = da->size * (*da->nr);
-		print_fn_descriptor_symbol("dyn_array %s ", da->name);
-
 		phys = roundup(phys, da->align);
+		printk(KERN_DEBUG "dyn_array %pF ==> [%#lx - %#lx]\n",
+			da->name, phys, phys + size);
 		*da->name = phys_to_virt(phys);
-		printk(KERN_CONT " ==> [%#lx - %#lx]\n", phys, phys + size);
 
 		phys += size;
 
@@ -72,10 +70,9 @@ unsigned long __init per_cpu_dyn_array_size(unsigned long *align)
 	for (daa = __per_cpu_dyn_array_start ; daa < __per_cpu_dyn_array_end; daa++) {
 		struct dyn_array *da = *daa;
 
+		printk(KERN_DEBUG "per_cpu_dyn_array %pF size:%#lx nr:%d align:%#lx\n",
+			da->name, da->size, *da->nr, da->align);
 		size = da->size * (*da->nr);
-		print_fn_descriptor_symbol("per_cpu_dyn_array %s ", da->name);
-		printk(KERN_CONT "size:%#lx nr:%d align:%#lx\n",
-			da->size, *da->nr, da->align);
 		total_size += roundup(size, da->align);
 		if (da->align > max_align)
 			max_align = da->align;
@@ -103,15 +100,15 @@ void __init per_cpu_alloc_dyn_array(int cpu, char *ptr)
 		struct dyn_array *da = *daa;
 
 		size = da->size * (*da->nr);
-		print_fn_descriptor_symbol("per_cpu_dyn_array %s ", da->name);
-
 		phys = roundup(phys, da->align);
+		printk(KERN_DEBUG "per_cpu_dyn_array %pF ==> [%#lx - %#lx]\n",
+			da->name, phys, phys + size);
+
 		addr = (unsigned long)da->name;
 		addr += per_cpu_offset(cpu);
 		array = (void **)addr;
 		*array = phys_to_virt(phys);
 		*da->name = *array; /* so init_work could use it directly */
-		printk(KERN_CONT " ==> [%#lx - %#lx]\n", phys, phys + size);
 
 		phys += size;
 
-- 
GitLab


From 9df08f109572e88fbdab9a61d5cb0f0eae4ac9fa Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Sun, 14 Sep 2008 11:55:37 +0400
Subject: [PATCH 499/895] x86: apic - lapic_setup_esr does not handle
 esr_disable - fix it

lapic_setup_esr doesn't handle esr_disable inquire.
The error brought in during unification process.
Fix it.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic.c | 63 ++++++++++++++++++++++--------------------
 1 file changed, 33 insertions(+), 30 deletions(-)

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 11cb18639581..59550cc017dc 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -1081,40 +1081,43 @@ void __init init_bsp_APIC(void)
 
 static void __cpuinit lapic_setup_esr(void)
 {
-	unsigned long oldvalue, value, maxlvt;
-	if (lapic_is_integrated() && !esr_disable) {
-		if (esr_disable) {
-			/*
-			 * Something untraceable is creating bad interrupts on
-			 * secondary quads ... for the moment, just leave the
-			 * ESR disabled - we can't do anything useful with the
-			 * errors anyway - mbligh
-			 */
-			printk(KERN_INFO "Leaving ESR disabled.\n");
-			return;
-		}
-		/* !82489DX */
-		maxlvt = lapic_get_maxlvt();
-		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
-			apic_write(APIC_ESR, 0);
-		oldvalue = apic_read(APIC_ESR);
+	unsigned int oldvalue, value, maxlvt;
+
+	if (!lapic_is_integrated()) {
+		printk(KERN_INFO "No ESR for 82489DX.\n");
+		return;
+	}
 
-		/* enables sending errors */
-		value = ERROR_APIC_VECTOR;
-		apic_write(APIC_LVTERR, value);
+	if (esr_disable) {
 		/*
-		 * spec says clear errors after enabling vector.
+		 * Something untraceable is creating bad interrupts on
+		 * secondary quads ... for the moment, just leave the
+		 * ESR disabled - we can't do anything useful with the
+		 * errors anyway - mbligh
 		 */
-		if (maxlvt > 3)
-			apic_write(APIC_ESR, 0);
-		value = apic_read(APIC_ESR);
-		if (value != oldvalue)
-			apic_printk(APIC_VERBOSE, "ESR value before enabling "
-				"vector: 0x%08lx  after: 0x%08lx\n",
-				oldvalue, value);
-	} else {
-		printk(KERN_INFO "No ESR for 82489DX.\n");
+		printk(KERN_INFO "Leaving ESR disabled.\n");
+		return;
 	}
+
+	maxlvt = lapic_get_maxlvt();
+	if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
+		apic_write(APIC_ESR, 0);
+	oldvalue = apic_read(APIC_ESR);
+
+	/* enables sending errors */
+	value = ERROR_APIC_VECTOR;
+	apic_write(APIC_LVTERR, value);
+
+	/*
+	 * spec says clear errors after enabling vector.
+	 */
+	if (maxlvt > 3)
+		apic_write(APIC_ESR, 0);
+	value = apic_read(APIC_ESR);
+	if (value != oldvalue)
+		apic_printk(APIC_VERBOSE, "ESR value before enabling "
+			"vector: 0x%08x  after: 0x%08x\n",
+			oldvalue, value);
 }
 
 
-- 
GitLab


From 08ad776e3c54e6fe8b50939f176538063b69f89e Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Sun, 14 Sep 2008 11:55:38 +0400
Subject: [PATCH 500/895] x86: apic - skip writting ESR register if we dont
 have on

On 82489DX we don't have ESR register so we should not
write it.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 59550cc017dc..d730e8d31727 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -1131,7 +1131,7 @@ void __cpuinit setup_local_APIC(void)
 
 #ifdef CONFIG_X86_32
 	/* Pound the ESR really hard over the head with a big hammer - mbligh */
-	if (esr_disable) {
+	if (lapic_is_integrated() && esr_disable) {
 		apic_write(APIC_ESR, 0);
 		apic_write(APIC_ESR, 0);
 		apic_write(APIC_ESR, 0);
-- 
GitLab


From b189892de4da4634560657aedd774752094dbfa0 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Fri, 12 Sep 2008 23:58:24 +0400
Subject: [PATCH 501/895] x86: apic - fix unused vars warning in
 calibrate_APIC_clock

If we don't have CONFIG_X86_PM_TIMER=y compiler warns about
unused variables. Move PM timer based calibration into a
separate function and make the code cleaner and the compiler
happy as well.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic.c | 81 ++++++++++++++++++++++++------------------
 1 file changed, 47 insertions(+), 34 deletions(-)

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index d730e8d31727..784116933c70 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -538,14 +538,51 @@ static void __init lapic_cal_handler(struct clock_event_device *dev)
 	}
 }
 
+static int __init calibrate_by_pmtimer(long deltapm, long *delta)
+{
+	const long pm_100ms = PMTMR_TICKS_PER_SEC / 10;
+	const long pm_thresh = pm_100ms / 100;
+	unsigned long mult;
+	u64 res;
+
+#ifndef CONFIG_X86_PM_TIMER
+	return -1;
+#endif
+
+	apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm);
+
+	/* Check, if the PM timer is available */
+	if (!deltapm)
+		return -1;
+
+	mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
+
+	if (deltapm > (pm_100ms - pm_thresh) &&
+	    deltapm < (pm_100ms + pm_thresh)) {
+		apic_printk(APIC_VERBOSE, "... PM timer result ok\n");
+	} else {
+		res = (((u64)deltapm) *  mult) >> 22;
+		do_div(res, 1000000);
+		printk(KERN_WARNING "APIC calibration not consistent "
+			"with PM Timer: %ldms instead of 100ms\n",
+			(long)res);
+		/* Correct the lapic counter value */
+		res = (((u64)(*delta)) * pm_100ms);
+		do_div(res, deltapm);
+		printk(KERN_INFO "APIC delta adjusted to PM-Timer: "
+			"%lu (%ld)\n", (unsigned long)res, *delta);
+		*delta = (long)res;
+	}
+
+	return 0;
+}
+
 static int __init calibrate_APIC_clock(void)
 {
 	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
-	const long pm_100ms = PMTMR_TICKS_PER_SEC/10;
-	const long pm_thresh = pm_100ms/100;
 	void (*real_handler)(struct clock_event_device *dev);
 	unsigned long deltaj;
-	long delta, deltapm;
+	long delta;
 	int pm_referenced = 0;
 
 	local_irq_disable();
@@ -575,36 +612,9 @@ static int __init calibrate_APIC_clock(void)
 	delta = lapic_cal_t1 - lapic_cal_t2;
 	apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
 
-#ifdef CONFIG_X86_PM_TIMER
-	/* Check, if the PM timer is available */
-	deltapm = lapic_cal_pm2 - lapic_cal_pm1;
-	apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm);
-
-	if (deltapm) {
-		unsigned long mult;
-		u64 res;
-
-		mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
-
-		if (deltapm > (pm_100ms - pm_thresh) &&
-		    deltapm < (pm_100ms + pm_thresh)) {
-			apic_printk(APIC_VERBOSE, "... PM timer result ok\n");
-		} else {
-			res = (((u64) deltapm) *  mult) >> 22;
-			do_div(res, 1000000);
-			printk(KERN_WARNING "APIC calibration not consistent "
-			       "with PM Timer: %ldms instead of 100ms\n",
-			       (long)res);
-			/* Correct the lapic counter value */
-			res = (((u64) delta) * pm_100ms);
-			do_div(res, deltapm);
-			printk(KERN_INFO "APIC delta adjusted to PM-Timer: "
-			       "%lu (%ld)\n", (unsigned long) res, delta);
-			delta = (long) res;
-		}
-		pm_referenced = 1;
-	}
-#endif
+	/* we trust the PM based calibration if possible */
+	pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
+					&delta);
 
 	/* Calculate the scaled math multiplication factor */
 	lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
@@ -646,7 +656,10 @@ static int __init calibrate_APIC_clock(void)
 
 	levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
 
-	/* We trust the pm timer based calibration */
+	/*
+	 * PM timer calibration failed or not turned on
+	 * so lets try APIC timer based calibration
+	 */
 	if (!pm_referenced) {
 		apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
 
-- 
GitLab


From 56ffa1a028b9fce3860a247c6fe79fce7cbf425b Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Sat, 13 Sep 2008 13:11:16 +0400
Subject: [PATCH 502/895] x86: io-apic - do not use KERN_DEBUG marker too much,
 fix

Yinghai Lu reported:

| >  0 add_pin_to_irq: irq 15 --> apic 0 pin 15
| > IOAPIC[0]: Set routing entry (8-15 -> 0x3f -> IRQ 15 Mode:0 Active:0)
| >  8-16 8-17 8-18 8-19 8-20 8-21 8-22 8-23 (apicid-pin) not connected
| >  9-0 9-1 9-2 9-3 9-4 9-5 9-6 9-7 9-8 9-9 9-10 9-11 9-12 9-13 9-14 9-15
| > 9-16 9-17 9-18 9-19 9-20 9-21 9-22 9-23 (apicid-pin) not connected
| >
|
| only first one not connected at first, and ...

here is a quick fix for this.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index ac47384724e3..6ce5873a406c 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -1541,6 +1541,11 @@ static void __init setup_IO_APIC_irqs(void)
 						pin);
 				continue;
 			}
+			if (notcon) {
+				apic_printk(APIC_VERBOSE,
+					" (apicid-pin) not connected\n");
+				notcon = 0;
+			}
 
 			irq = pin_2_irq(idx, apic, pin);
 #ifdef CONFIG_X86_32
@@ -1552,11 +1557,6 @@ static void __init setup_IO_APIC_irqs(void)
 			setup_IO_APIC_irq(apic, pin, irq,
 					irq_trigger(idx), irq_polarity(idx));
 		}
-		if (notcon) {
-			apic_printk(APIC_VERBOSE,
-				" (apicid-pin) not connected\n");
-			notcon = 0;
-		}
 	}
 
 	if (notcon)
-- 
GitLab


From e00585bb7fc3d0b601181b765a254df7ff4ea59b Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Mon, 15 Sep 2008 01:53:50 -0700
Subject: [PATCH 503/895] irq: fix irqpoll && sparseirq

Steven Noonan reported a boot hang when using irqpoll and
CONFIG_HAVE_SPARSE_IRQ=y.

The irqpoll loop needs to be updated to not iterate from 1 to nr_irqs
but to iterate via for_each_irq_desc(). (in the former case desc can
be NULL which crashes the box)

Reported-by: Steven Noonan <steven@uplinklabs.net>
Tested-by: Steven Noonan <steven@uplinklabs.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/irq/spurious.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index b5d906002e1d..ec5a4bef3054 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -90,14 +90,15 @@ static int misrouted_irq(int irq)
 {
 	int i;
 	int ok = 0;
+	struct irq_desc *desc;
 
-	for (i = 1; i < nr_irqs; i++) {
-		struct irq_desc *desc;
+	for_each_irq_desc(i, desc) {
+		if (!i)
+			 continue;
 
 		if (i == irq)	/* Already tried */
 			continue;
 
-		desc = irq_to_desc(i);
 		if (try_one_irq(i, desc))
 			ok = 1;
 	}
@@ -108,10 +109,14 @@ static int misrouted_irq(int irq)
 static void poll_spurious_irqs(unsigned long dummy)
 {
 	int i;
-	for (i = 1; i < nr_irqs; i++) {
-		struct irq_desc *desc = irq_to_desc(i);
+	struct irq_desc *desc;
+
+	for_each_irq_desc(i, desc) {
 		unsigned int status;
 
+		if (!i)
+			 continue;
+
 		/* Racy but it doesn't matter */
 		status = desc->status;
 		barrier();
@@ -278,7 +283,7 @@ static int __init irqfixup_setup(char *str)
 
 __setup("irqfixup", irqfixup_setup);
 module_param(irqfixup, int, 0644);
-MODULE_PARM_DESC("irqfixup", "0: No fixup, 1: irqfixup mode 2: irqpoll mode");
+MODULE_PARM_DESC("irqfixup", "0: No fixup, 1: irqfixup mode, 2: irqpoll mode");
 
 static int __init irqpoll_setup(char *str)
 {
-- 
GitLab


From 9d98598d2fc286c8dbcd0b681168639528428db0 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Fri, 19 Sep 2008 00:12:26 -0700
Subject: [PATCH 504/895] sparseirq: remove some debug print out

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 19 -------------------
 kernel/irq/handle.c       | 18 ------------------
 2 files changed, 37 deletions(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 6ce5873a406c..01419fdc1d5d 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -277,23 +277,6 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
 
 	spin_unlock_irqrestore(&irq_cfg_lock, flags);
 
-	printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq);
-#ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG
-	{
-		/* dump the results */
-		struct irq_cfg *cfg;
-		unsigned long phys;
-		unsigned long bytes = sizeof(struct irq_cfg);
-
-		printk(KERN_DEBUG "=========================== %d\n", irq);
-		printk(KERN_DEBUG "irq_cfg dump after get that for %d\n", irq);
-		for_each_irq_cfg(cfg) {
-			phys = __pa(cfg);
-			printk(KERN_DEBUG "irq_cfg %d ==> [%#lx - %#lx]\n", cfg->irq, phys, phys + bytes);
-		}
-		printk(KERN_DEBUG "===========================\n");
-	}
-#endif
 	return cfg;
 }
 #else
@@ -597,7 +580,6 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 		cfg->irq_2_pin = entry;
 		entry->apic = apic;
 		entry->pin = pin;
-		printk(KERN_DEBUG " 0 add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin);
 		return;
 	}
 
@@ -613,7 +595,6 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 	entry = entry->next;
 	entry->apic = apic;
 	entry->pin = pin;
-	printk(KERN_DEBUG " x add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin);
 }
 
 /*
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index eae69373a9c6..710db0ec97e3 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -238,24 +238,6 @@ struct irq_desc *irq_to_desc_alloc(unsigned int irq)
 
 	spin_unlock_irqrestore(&sparse_irq_lock, flags);
 
-	printk(KERN_DEBUG "found new irq_desc for irq %d\n", desc->irq);
-#ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG
-	{
-		/* dump the results */
-		struct irq_desc *desc;
-		unsigned long phys;
-		unsigned long bytes = sizeof(struct irq_desc);
-		unsigned int irqx;
-
-		printk(KERN_DEBUG "=========================== %d\n", irq);
-		printk(KERN_DEBUG "irq_desc dump after get that for %d\n", irq);
-		for_each_irq_desc(irqx, desc) {
-			phys = __pa(desc);
-			printk(KERN_DEBUG "irq_desc %d ==> [%#lx - %#lx]\n", irqx, phys, phys + bytes);
-		}
-		printk(KERN_DEBUG "===========================\n");
-	}
-#endif
 	return desc;
 }
 #else
-- 
GitLab


From 2976fe20125587c944c8df48d991c38f0891fb28 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 18 Sep 2008 16:10:48 -0700
Subject: [PATCH 505/895] fix warning: "x86: sparse_irq needs spin_lock in
 allocations"

caused by

commit a532e19680ada3b8579b81e67e76d3ebd19c340f
Author: Yinghai Lu <yhlu.kernel@gmail.com>
Date:   Wed Aug 20 20:46:25 2008 -0700

    x86: sparse_irq needs spin_lock in allocations

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/irq/handle.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 710db0ec97e3..7f625fbc9aa4 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -111,12 +111,11 @@ static void init_kstat_irqs(struct irq_desc *desc, int nr_desc, int nr)
 	}
 }
 
+#ifdef CONFIG_HAVE_SPARSE_IRQ
 /*
  * Protect the sparse_irqs_free freelist:
  */
 static DEFINE_SPINLOCK(sparse_irq_lock);
-
-#ifdef CONFIG_HAVE_SPARSE_IRQ
 static struct irq_desc *sparse_irqs_free;
 struct irq_desc *sparse_irqs;
 #endif
-- 
GitLab


From 5ffa4eb2224343ec3fbd492ffe71e28e797435b7 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Thu, 18 Sep 2008 23:37:57 +0400
Subject: [PATCH 506/895] x86: io-apic - interrupt remapping fix

Interrupt remapping could lead to NULL dereference in case of
kzalloc failed and memory leak in other way. So fix the
both cases.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: "Maciej W. Rozycki" <macro@linux-mips.org>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic.c    | 13 ++++++++++---
 arch/x86/kernel/io_apic.c | 19 +++++++++++++++++--
 2 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 784116933c70..1f48bd1c9f2d 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -1360,7 +1360,12 @@ void enable_IR_x2apic(void)
 
 	local_irq_save(flags);
 	mask_8259A();
-	save_mask_IO_APIC_setup();
+
+	ret = save_mask_IO_APIC_setup();
+	if (ret) {
+		printk(KERN_INFO "Saving IO-APIC state failed: %d\n", ret);
+		goto end;
+	}
 
 	ret = enable_intr_remapping(1);
 
@@ -1370,14 +1375,15 @@ void enable_IR_x2apic(void)
 	}
 
 	if (ret)
-		goto end;
+		goto end_restore;
 
 	if (!x2apic) {
 		x2apic = 1;
 		apic_ops = &x2apic_ops;
 		enable_x2apic();
 	}
-end:
+
+end_restore:
 	if (ret)
 		/*
 		 * IR enabling failed
@@ -1386,6 +1392,7 @@ end:
 	else
 		reinit_intr_remapped_IO_APIC(x2apic_preenabled);
 
+end:
 	unmask_8259A();
 	local_irq_restore(flags);
 
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 01419fdc1d5d..4cc9cb64c811 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -812,7 +812,7 @@ int save_mask_IO_APIC_setup(void)
 			kzalloc(sizeof(struct IO_APIC_route_entry) *
 				nr_ioapic_registers[apic], GFP_KERNEL);
 		if (!early_ioapic_entries[apic])
-			return -ENOMEM;
+			goto nomem;
 	}
 
 	for (apic = 0; apic < nr_ioapics; apic++)
@@ -826,17 +826,32 @@ int save_mask_IO_APIC_setup(void)
 				ioapic_write_entry(apic, pin, entry);
 			}
 		}
+
 	return 0;
+
+nomem:
+	for (; apic > 0; apic--)
+		kfree(early_ioapic_entries[apic]);
+	kfree(early_ioapic_entries[apic]);
+	memset(early_ioapic_entries, 0,
+		ARRAY_SIZE(early_ioapic_entries));
+
+	return -ENOMEM;
 }
 
 void restore_IO_APIC_setup(void)
 {
 	int apic, pin;
 
-	for (apic = 0; apic < nr_ioapics; apic++)
+	for (apic = 0; apic < nr_ioapics; apic++) {
+		if (!early_ioapic_entries[apic])
+			break;
 		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
 			ioapic_write_entry(apic, pin,
 					   early_ioapic_entries[apic][pin]);
+		kfree(early_ioapic_entries[apic]);
+		early_ioapic_entries[apic] = NULL;
+	}
 }
 
 void reinit_intr_remapped_IO_APIC(int intr_remapping)
-- 
GitLab


From 8da077d6f31da291ee3a7dd559671cb8ca48cbe2 Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Tue, 23 Sep 2008 08:42:05 -0500
Subject: [PATCH 507/895] x86, uv: fix ordering of calls to uv_system_init &
 uv_cpu_init

Fix problem caused by reordering of the calls to uv_cpu_init() &
uv_system_init. Originally, uv_cpu_init() was called AFTER uv_system_init.
This order was recently broken as a side-effect of other patches.

With this patch, initialization of cpu 0 is now done by the system_init
call.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/genx2apic_uv_x.c | 34 ++++++++++++++++----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 33581d94a90e..b689075bbe2f 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -356,7 +356,22 @@ static __init void uv_rtc_init(void)
 		sn_rtc_cycles_per_second = ticks_per_sec;
 }
 
-static bool uv_system_inited;
+/*
+ * Called on each cpu to initialize the per_cpu UV data area.
+ * 	ZZZ hotplug not supported yet
+ */
+void __cpuinit uv_cpu_init(void)
+{
+	/* CPU 0 initilization will be done via uv_system_init. */
+	if (!uv_blade_info)
+		return;
+
+	uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
+
+	if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
+		set_x2apic_extra_bits(uv_hub_info->pnode);
+}
+
 
 void __init uv_system_init(void)
 {
@@ -448,21 +463,6 @@ void __init uv_system_init(void)
 	map_mmr_high(max_pnode);
 	map_config_high(max_pnode);
 	map_mmioh_high(max_pnode);
-	uv_system_inited = true;
-}
 
-/*
- * Called on each cpu to initialize the per_cpu UV data area.
- * 	ZZZ hotplug not supported yet
- */
-void __cpuinit uv_cpu_init(void)
-{
-	BUG_ON(!uv_system_inited);
-
-	uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
-
-	if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
-		set_x2apic_extra_bits(uv_hub_info->pnode);
+	uv_cpu_init();
 }
-
-
-- 
GitLab


From aac3f2b6f6e88827432c4050ac73552f24b19de1 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Wed, 24 Sep 2008 19:04:35 -0700
Subject: [PATCH 508/895] x86: fix typo in irq_desc array

when SPARSE_IRQ is not used, should still use irq_desc->lock

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/irq/handle.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 7f625fbc9aa4..fb6bdb602a93 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -253,7 +253,7 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
 		.chip = &no_irq_chip,
 		.handle_irq = handle_bad_irq,
 		.depth = 1,
-		.lock = __SPIN_LOCK_UNLOCKED(sparse_irqs->lock),
+		.lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock),
 #ifdef CONFIG_SMP
 		.affinity = CPU_MASK_ALL
 #endif
-- 
GitLab


From 7564676813780e0ba4dacf95338202cb72d2172d Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Wed, 24 Sep 2008 19:04:36 -0700
Subject: [PATCH 509/895] x86: irq no should not use hex in /proc/interrupts

Arjan van de Ven noticed that we changed IRQ numbers from decimal
to hex in /proc/interrupts - that can break user-space utilities
like irqbalanced.

Reported-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/irq_32.c | 2 +-
 arch/x86/kernel/irq_64.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index b2e1082cf5ad..001772ffc918 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -307,7 +307,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		action = desc->action;
 		if (!action && !any_count)
 			goto skip;
-		seq_printf(p, "%#x: ",i);
+		seq_printf(p, "%3d: ", i);
 #ifndef CONFIG_SMP
 		seq_printf(p, "%10u ", kstat_irqs(i));
 #else
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index c2fca89a3f7e..ec2661091283 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -112,7 +112,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		action = desc->action;
 		if (!action && !any_count)
 			goto skip;
-		seq_printf(p, "%#x: ",i);
+		seq_printf(p, "%3d: ", i);
 #ifndef CONFIG_SMP
 		seq_printf(p, "%10u ", kstat_irqs(i));
 #else
-- 
GitLab


From c1370b49cc4f09de5b447ddf688a3988a297dbfc Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Tue, 23 Sep 2008 23:00:02 +0400
Subject: [PATCH 510/895] x86: io-apic - interrupt remapping fix

Clean up obscure for() cycle with straight while() form

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: "Maciej W. Rozycki" <macro@linux-mips.org>
Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 4cc9cb64c811..ed68a6f99dc2 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -830,9 +830,8 @@ int save_mask_IO_APIC_setup(void)
 	return 0;
 
 nomem:
-	for (; apic > 0; apic--)
-		kfree(early_ioapic_entries[apic]);
-	kfree(early_ioapic_entries[apic]);
+	while (apic >= 0)
+		kfree(early_ioapic_entries[apic--]);
 	memset(early_ioapic_entries, 0,
 		ARRAY_SIZE(early_ioapic_entries));
 
-- 
GitLab


From c81bba49a13cb3376654d0cc93dc069fd619ed76 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Thu, 25 Sep 2008 11:53:11 -0700
Subject: [PATCH 511/895] x86: print out irq nr for msi/ht, v3

v2: fix hpet compiling error
v3: Bjorn want to use dev_printk instead

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/hpet.c    | 3 +++
 arch/x86/kernel/io_apic.c | 5 +++++
 2 files changed, 8 insertions(+)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 422c577ef691..2913913f4a46 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -467,6 +467,9 @@ static int hpet_setup_irq(struct hpet_dev *dev)
 	irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu));
 	enable_irq(dev->irq);
 
+	printk(KERN_DEBUG "hpet: %s irq %d for MSI\n",
+			 dev->name, dev->irq);
+
 	return 0;
 }
 
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index ed68a6f99dc2..4ee270d30358 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -3354,6 +3354,8 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
 #endif
 		set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
 
+	dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
+
 	return 0;
 }
 
@@ -3586,6 +3588,7 @@ int arch_setup_hpet_msi(unsigned int irq)
 	hpet_msi_write(irq, &msg);
 	set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq,
 		"edge");
+
 	return 0;
 }
 #endif
@@ -3682,6 +3685,8 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 
 		set_irq_chip_and_handler_name(irq, &ht_irq_chip,
 					      handle_edge_irq, "edge");
+
+		dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
 	}
 	return err;
 }
-- 
GitLab


From 5f79f2f2ad39b5177c52ed08ffd066ea0c1da924 Mon Sep 17 00:00:00 2001
From: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Wed, 24 Sep 2008 10:03:17 -0700
Subject: [PATCH 512/895] hpet: clean up warning

Fix the below compile warnings due to recent HPET MSI changes

arch/x86/kernel/hpet.c:48: warning: 'hpet_devs' defined but not used
arch/x86/kernel/hpet.c:50: warning: 'per_cpu__cpu_hpet_dev' defined but not used

Reported-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/hpet.c | 57 ++++++++++++++++++++++++++----------------
 1 file changed, 35 insertions(+), 22 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 2913913f4a46..77017e834cf7 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -45,10 +45,6 @@ struct hpet_dev {
 	char				name[10];
 };
 
-static struct hpet_dev			*hpet_devs;
-
-static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev);
-
 unsigned long hpet_readl(unsigned long a)
 {
 	return readl(hpet_virt_address + a);
@@ -126,23 +122,8 @@ EXPORT_SYMBOL_GPL(is_hpet_enabled);
  * timer 0 and timer 1 in case of RTC emulation.
  */
 #ifdef CONFIG_HPET
-static void hpet_reserve_msi_timers(struct hpet_data *hd)
-{
-	int i;
 
-	if (!hpet_devs)
-		return;
-
-	for (i = 0; i < hpet_num_timers; i++) {
-		struct hpet_dev *hdev = &hpet_devs[i];
-
-		if (!(hdev->flags & HPET_DEV_VALID))
-			continue;
-
-		hd->hd_irq[hdev->num] = hdev->irq;
-		hpet_reserve_timer(hd, hdev->num);
-	}
-}
+static void hpet_reserve_msi_timers(struct hpet_data *hd);
 
 static void hpet_reserve_platform_timers(unsigned long id)
 {
@@ -362,6 +343,10 @@ static int hpet_legacy_next_event(unsigned long delta,
  * HPET MSI Support
  */
 #ifdef CONFIG_PCI_MSI
+
+static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev);
+static struct hpet_dev	*hpet_devs;
+
 void hpet_msi_unmask(unsigned int irq)
 {
 	struct hpet_dev *hdev = get_irq_data(irq);
@@ -525,7 +510,8 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
 #else
 #define RESERVE_TIMERS 0
 #endif
-void hpet_msi_capability_lookup(unsigned int start_timer)
+
+static void hpet_msi_capability_lookup(unsigned int start_timer)
 {
 	unsigned int id;
 	unsigned int num_timers;
@@ -571,6 +557,26 @@ void hpet_msi_capability_lookup(unsigned int start_timer)
 		num_timers, num_timers_used);
 }
 
+#ifdef CONFIG_HPET
+static void hpet_reserve_msi_timers(struct hpet_data *hd)
+{
+	int i;
+
+	if (!hpet_devs)
+		return;
+
+	for (i = 0; i < hpet_num_timers; i++) {
+		struct hpet_dev *hdev = &hpet_devs[i];
+
+		if (!(hdev->flags & HPET_DEV_VALID))
+			continue;
+
+		hd->hd_irq[hdev->num] = hdev->irq;
+		hpet_reserve_timer(hd, hdev->num);
+	}
+}
+#endif
+
 static struct hpet_dev *hpet_get_unused_timer(void)
 {
 	int i;
@@ -642,10 +648,17 @@ static int hpet_setup_msi_irq(unsigned int irq)
 {
 	return 0;
 }
-void hpet_msi_capability_lookup(unsigned int start_timer)
+static void hpet_msi_capability_lookup(unsigned int start_timer)
+{
+	return;
+}
+
+#ifdef CONFIG_HPET
+static void hpet_reserve_msi_timers(struct hpet_data *hd)
 {
 	return;
 }
+#endif
 
 static int hpet_cpuhp_notify(struct notifier_block *n,
 		unsigned long action, void *hcpu)
-- 
GitLab


From 4173a0e7371ece227559b44943c6fd456ee470d1 Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Thu, 2 Oct 2008 12:18:21 -0500
Subject: [PATCH 513/895] x86, UV: add uv_setup_irq() and uv_teardown_irq()
 functions, v3

Provide a means for UV interrupt MMRs to be setup with the message to be sent
when an MSI is raised.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/Makefile    |  2 +-
 arch/x86/kernel/io_apic.c   | 68 ++++++++++++++++++++++++++++++++
 arch/x86/kernel/uv_irq.c    | 77 +++++++++++++++++++++++++++++++++++++
 include/asm-x86/uv/uv_irq.h | 36 +++++++++++++++++
 4 files changed, 182 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kernel/uv_irq.c
 create mode 100644 include/asm-x86/uv/uv_irq.h

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 45cecc59d894..acc0e8bf043e 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -108,7 +108,7 @@ obj-$(CONFIG_MICROCODE)			+= microcode.o
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
         obj-y				+= genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
-	obj-y				+= bios_uv.o
+	obj-y				+= bios_uv.o uv_irq.o
         obj-y				+= genx2apic_cluster.o
         obj-y				+= genx2apic_phys.o
         obj-$(CONFIG_X86_PM_TIMER)	+= pmtimer_64.o
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 4ee270d30358..260c95a5e6dc 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -58,6 +58,8 @@
 #include <asm/setup.h>
 #include <asm/irq_remapping.h>
 #include <asm/hpet.h>
+#include <asm/uv/uv_hub.h>
+#include <asm/uv/uv_irq.h>
 
 #include <mach_ipi.h>
 #include <mach_apic.h>
@@ -3692,6 +3694,72 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 }
 #endif /* CONFIG_HT_IRQ */
 
+#ifdef CONFIG_X86_64
+/*
+ * Re-target the irq to the specified CPU and enable the specified MMR located
+ * on the specified blade to allow the sending of MSIs to the specified CPU.
+ */
+int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
+		       unsigned long mmr_offset)
+{
+	const cpumask_t *eligible_cpu = get_cpu_mask(cpu);
+	struct irq_cfg *cfg;
+	int mmr_pnode;
+	unsigned long mmr_value;
+	struct uv_IO_APIC_route_entry *entry;
+	unsigned long flags;
+	int err;
+
+	err = assign_irq_vector(irq, *eligible_cpu);
+	if (err != 0)
+		return err;
+
+	spin_lock_irqsave(&vector_lock, flags);
+	set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
+				      irq_name);
+	spin_unlock_irqrestore(&vector_lock, flags);
+
+	cfg = irq_cfg(irq);
+
+	mmr_value = 0;
+	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+	BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+
+	entry->vector = cfg->vector;
+	entry->delivery_mode = INT_DELIVERY_MODE;
+	entry->dest_mode = INT_DEST_MODE;
+	entry->polarity = 0;
+	entry->trigger = 0;
+	entry->mask = 0;
+	entry->dest = cpu_mask_to_apicid(*eligible_cpu);
+
+	mmr_pnode = uv_blade_to_pnode(mmr_blade);
+	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+
+	return irq;
+}
+
+/*
+ * Disable the specified MMR located on the specified blade so that MSIs are
+ * longer allowed to be sent.
+ */
+void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
+{
+	unsigned long mmr_value;
+	struct uv_IO_APIC_route_entry *entry;
+	int mmr_pnode;
+
+	mmr_value = 0;
+	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+	BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+
+	entry->mask = 1;
+
+	mmr_pnode = uv_blade_to_pnode(mmr_blade);
+	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+}
+#endif /* CONFIG_X86_64 */
+
 int __init io_apic_get_redir_entries (int ioapic)
 {
 	union IO_APIC_reg_01	reg_01;
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c
new file mode 100644
index 000000000000..6bd26c91c30b
--- /dev/null
+++ b/arch/x86/kernel/uv_irq.c
@@ -0,0 +1,77 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * SGI UV IRQ functions
+ *
+ * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/module.h>
+#include <linux/irq.h>
+#include <asm/uv/uv_irq.h>
+
+static void uv_noop(unsigned int irq)
+{
+}
+
+static unsigned int uv_noop_ret(unsigned int irq)
+{
+	return 0;
+}
+
+static void uv_ack_apic(unsigned int irq)
+{
+	ack_APIC_irq();
+}
+
+struct irq_chip uv_irq_chip = {
+	.name		= "UV-CORE",
+	.startup	= uv_noop_ret,
+	.shutdown	= uv_noop,
+	.enable		= uv_noop,
+	.disable	= uv_noop,
+	.ack		= uv_noop,
+	.mask		= uv_noop,
+	.unmask		= uv_noop,
+	.eoi		= uv_ack_apic,
+	.end		= uv_noop,
+};
+
+/*
+ * Set up a mapping of an available irq and vector, and enable the specified
+ * MMR that defines the MSI that is to be sent to the specified CPU when an
+ * interrupt is raised.
+ */
+int uv_setup_irq(char *irq_name, int cpu, int mmr_blade,
+		 unsigned long mmr_offset)
+{
+	int irq;
+	int ret;
+
+	irq = create_irq();
+	if (irq <= 0)
+		return -EBUSY;
+
+	ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset);
+	if (ret != irq)
+		destroy_irq(irq);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(uv_setup_irq);
+
+/*
+ * Tear down a mapping of an irq and vector, and disable the specified MMR that
+ * defined the MSI that was to be sent to the specified CPU when an interrupt
+ * was raised.
+ *
+ * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq().
+ */
+void uv_teardown_irq(unsigned int irq, int mmr_blade, unsigned long mmr_offset)
+{
+	arch_disable_uv_irq(mmr_blade, mmr_offset);
+	destroy_irq(irq);
+}
+EXPORT_SYMBOL_GPL(uv_teardown_irq);
diff --git a/include/asm-x86/uv/uv_irq.h b/include/asm-x86/uv/uv_irq.h
new file mode 100644
index 000000000000..8bf5f32da9c6
--- /dev/null
+++ b/include/asm-x86/uv/uv_irq.h
@@ -0,0 +1,36 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * SGI UV IRQ definitions
+ *
+ * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef ASM_X86__UV__UV_IRQ_H
+#define ASM_X86__UV__UV_IRQ_H
+
+/* If a generic version of this structure gets defined, eliminate this one. */
+struct uv_IO_APIC_route_entry {
+	__u64	vector		:  8,
+		delivery_mode	:  3,
+		dest_mode	:  1,
+		delivery_status	:  1,
+		polarity	:  1,
+		__reserved_1	:  1,
+		trigger		:  1,
+		mask		:  1,
+		__reserved_2	: 15,
+		dest		: 32;
+};
+
+extern struct irq_chip uv_irq_chip;
+
+extern int arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long);
+extern void arch_disable_uv_irq(int, unsigned long);
+
+extern int uv_setup_irq(char *, int, int, unsigned long);
+extern void uv_teardown_irq(unsigned int, int, unsigned long);
+
+#endif /* ASM_X86__UV__UV_IRQ_H */
-- 
GitLab


From 37762b6ffb37f9e21cbc6f80902aa06b7a053fd7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 3 Oct 2008 11:38:37 +0200
Subject: [PATCH 514/895] x86, UV: add uv_setup_irq() and uv_teardown_irq()
 functions, v3, fix

fix:

 arch/x86/kernel/uv_irq.c: In function 'uv_ack_apic':
 arch/x86/kernel/uv_irq.c:26: error: implicit declaration of function 'ack_APIC_irq'

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/uv_irq.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c
index 6bd26c91c30b..aeef529917e4 100644
--- a/arch/x86/kernel/uv_irq.c
+++ b/arch/x86/kernel/uv_irq.c
@@ -10,6 +10,8 @@
 
 #include <linux/module.h>
 #include <linux/irq.h>
+
+#include <asm/apic.h>
 #include <asm/uv/uv_irq.h>
 
 static void uv_noop(unsigned int irq)
-- 
GitLab


From a50f70b17541c0060967c6df61133e968bad3652 Mon Sep 17 00:00:00 2001
From: Russ Anderson <rja@sgi.com>
Date: Fri, 3 Oct 2008 11:58:54 -0500
Subject: [PATCH 515/895] x86: Add UV EFI table entry v4

Look for a UV entry in the EFI tables.

Signed-off-by: Russ Anderson <rja@sgi.com>
Signed-off-by: Paul Jackson <pj@sgi.com>
Acked-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/efi.c | 4 ++++
 include/linux/efi.h   | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 945a31cdd81f..1119d247fe11 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -366,6 +366,10 @@ void __init efi_init(void)
 					SMBIOS_TABLE_GUID)) {
 			efi.smbios = config_tables[i].table;
 			printk(" SMBIOS=0x%lx ", config_tables[i].table);
+		} else if (!efi_guidcmp(config_tables[i].guid,
+					UV_SYSTEM_TABLE_GUID)) {
+			efi.uv_systab = config_tables[i].table;
+			printk(" UVsystab=0x%lx ", config_tables[i].table);
 		} else if (!efi_guidcmp(config_tables[i].guid,
 					HCDP_TABLE_GUID)) {
 			efi.hcdp = config_tables[i].table;
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 807373d467f7..bb66feb164bd 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -208,6 +208,9 @@ typedef efi_status_t efi_set_virtual_address_map_t (unsigned long memory_map_siz
 #define EFI_GLOBAL_VARIABLE_GUID \
     EFI_GUID(  0x8be4df61, 0x93ca, 0x11d2, 0xaa, 0x0d, 0x00, 0xe0, 0x98, 0x03, 0x2b, 0x8c )
 
+#define UV_SYSTEM_TABLE_GUID \
+    EFI_GUID(  0x3b13a7d4, 0x633e, 0x11dd, 0x93, 0xec, 0xda, 0x25, 0x56, 0xd8, 0x95, 0x93 )
+
 typedef struct {
 	efi_guid_t guid;
 	unsigned long table;
@@ -255,6 +258,7 @@ extern struct efi {
 	unsigned long boot_info;	/* boot info table */
 	unsigned long hcdp;		/* HCDP table */
 	unsigned long uga;		/* UGA table */
+	unsigned long uv_systab;	/* UV system table */
 	efi_get_time_t *get_time;
 	efi_set_time_t *set_time;
 	efi_get_wakeup_time_t *get_wakeup_time;
-- 
GitLab


From 7f5942329e0787087a5e4dced838cee711ac2b58 Mon Sep 17 00:00:00 2001
From: Russ Anderson <rja@sgi.com>
Date: Fri, 3 Oct 2008 11:59:15 -0500
Subject: [PATCH 516/895] x86: Add UV bios call infrastructure v4

Add the EFI callback function and associated wrapper code.
Initialize SAL system table entry info at boot time.

Signed-off-by: Russ Anderson <rja@sgi.com>
Signed-off-by: Paul Jackson <pj@sgi.com>
Acked-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/bios_uv.c        | 101 ++++++++++++++++++++++++-------
 arch/x86/kernel/genx2apic_uv_x.c |   1 +
 include/asm-x86/efi.h            |  13 ++++
 include/asm-x86/uv/bios.h        |  73 ++++++++++++----------
 4 files changed, 135 insertions(+), 53 deletions(-)

diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index fdd585f9c53d..5481eb59f783 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -1,8 +1,6 @@
 /*
  * BIOS run time interface routines.
  *
- *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
- *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 2 of the License, or
@@ -16,33 +14,94 @@
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *  Copyright (c) Russ Anderson
  */
 
+#include <linux/efi.h>
+#include <asm/efi.h>
+#include <linux/io.h>
 #include <asm/uv/bios.h>
 
-const char *
-x86_bios_strerror(long status)
+struct uv_systab uv_systab;
+
+s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
 {
-	const char *str;
-	switch (status) {
-	case  0: str = "Call completed without error";	break;
-	case -1: str = "Not implemented";		break;
-	case -2: str = "Invalid argument";		break;
-	case -3: str = "Call completed with error";	break;
-	default: str = "Unknown BIOS status code";	break;
-	}
-	return str;
+	struct uv_systab *tab = &uv_systab;
+
+	if (!tab->function)
+		/*
+		 * BIOS does not support UV systab
+		 */
+		return BIOS_STATUS_UNIMPLEMENTED;
+
+	return efi_call6((void *)__va(tab->function),
+					(u64)which, a1, a2, a3, a4, a5);
 }
 
-long
-x86_bios_freq_base(unsigned long which, unsigned long *ticks_per_second,
-		   unsigned long *drift_info)
+s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
+					u64 a4, u64 a5)
 {
-	struct uv_bios_retval isrv;
+	unsigned long bios_flags;
+	s64 ret;
+
+	local_irq_save(bios_flags);
+	ret = uv_bios_call(which, a1, a2, a3, a4, a5);
+	local_irq_restore(bios_flags);
+
+	return ret;
+}
+
+s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
+					u64 a4, u64 a5)
+{
+	s64 ret;
+
+	preempt_disable();
+	ret = uv_bios_call(which, a1, a2, a3, a4, a5);
+	preempt_enable();
 
-	BIOS_CALL(isrv, BIOS_FREQ_BASE, which, 0, 0, 0, 0, 0, 0);
-	*ticks_per_second = isrv.v0;
-	*drift_info = isrv.v1;
-	return isrv.status;
+	return ret;
+}
+
+long
+x86_bios_freq_base(unsigned long clock_type, unsigned long *ticks_per_second,
+					unsigned long *drift_info)
+{
+	return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type,
+				(u64)ticks_per_second, 0, 0, 0);
 }
 EXPORT_SYMBOL_GPL(x86_bios_freq_base);
+
+
+#ifdef CONFIG_EFI
+void uv_bios_init(void)
+{
+	struct uv_systab *tab;
+
+	if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) ||
+	    (efi.uv_systab == (unsigned long)NULL)) {
+		printk(KERN_CRIT "No EFI UV System Table.\n");
+		uv_systab.function = (unsigned long)NULL;
+		return;
+	}
+
+	tab = (struct uv_systab *)ioremap(efi.uv_systab,
+					sizeof(struct uv_systab));
+	if (strncmp(tab->signature, "UVST", 4) != 0)
+		printk(KERN_ERR "bad signature in UV system table!");
+
+	/*
+	 * Copy table to permanent spot for later use.
+	 */
+	memcpy(&uv_systab, tab, sizeof(struct uv_systab));
+	iounmap(tab);
+
+	printk(KERN_INFO "EFI UV System Table Revision %d\n", tab->revision);
+}
+#else	/* !CONFIG_EFI */
+
+void uv_bios_init(void) { }
+#endif
+
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index b689075bbe2f..aa2e5e15bf69 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -427,6 +427,7 @@ void __init uv_system_init(void)
 	gnode_upper = (((unsigned long)node_id.s.node_id) &
 		       ~((1 << n_val) - 1)) << m_val;
 
+	uv_bios_init();
 	uv_rtc_init();
 
 	for_each_present_cpu(cpu) {
diff --git a/include/asm-x86/efi.h b/include/asm-x86/efi.h
index ed2de22e8705..313438e63348 100644
--- a/include/asm-x86/efi.h
+++ b/include/asm-x86/efi.h
@@ -94,4 +94,17 @@ extern void efi_reserve_early(void);
 extern void efi_call_phys_prelog(void);
 extern void efi_call_phys_epilog(void);
 
+#ifndef CONFIG_EFI
+/*
+ * IF EFI is not configured, have the EFI calls return -ENOSYS.
+ */
+#define efi_call0(_f)					(-ENOSYS)
+#define efi_call1(_f, _a1)				(-ENOSYS)
+#define efi_call2(_f, _a1, _a2)				(-ENOSYS)
+#define efi_call3(_f, _a1, _a2, _a3)			(-ENOSYS)
+#define efi_call4(_f, _a1, _a2, _a3, _a4)		(-ENOSYS)
+#define efi_call5(_f, _a1, _a2, _a3, _a4, _a5)		(-ENOSYS)
+#define efi_call6(_f, _a1, _a2, _a3, _a4, _a5, _a6)	(-ENOSYS)
+#endif /* CONFIG_EFI */
+
 #endif /* ASM_X86__EFI_H */
diff --git a/include/asm-x86/uv/bios.h b/include/asm-x86/uv/bios.h
index 7cd6d7ec1308..f63e46e5337c 100644
--- a/include/asm-x86/uv/bios.h
+++ b/include/asm-x86/uv/bios.h
@@ -2,9 +2,7 @@
 #define ASM_X86__UV__BIOS_H
 
 /*
- * BIOS layer definitions.
- *
- *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ * UV BIOS layer definitions.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -19,50 +17,61 @@
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *  Copyright (c) Russ Anderson
  */
 
 #include <linux/rtc.h>
 
-#define BIOS_FREQ_BASE			0x01000001
+/*
+ * Values for the BIOS calls.  It is passed as the first * argument in the
+ * BIOS call.  Passing any other value in the first argument will result
+ * in a BIOS_STATUS_UNIMPLEMENTED return status.
+ */
+enum uv_bios_cmd {
+	UV_BIOS_COMMON,
+	UV_BIOS_GET_SN_INFO,
+	UV_BIOS_FREQ_BASE
+};
 
+/*
+ * Status values returned from a BIOS call.
+ */
 enum {
-	BIOS_FREQ_BASE_PLATFORM = 0,
-	BIOS_FREQ_BASE_INTERVAL_TIMER = 1,
-	BIOS_FREQ_BASE_REALTIME_CLOCK = 2
+	BIOS_STATUS_SUCCESS		=  0,
+	BIOS_STATUS_UNIMPLEMENTED	= -ENOSYS,
+	BIOS_STATUS_EINVAL		= -EINVAL,
+	BIOS_STATUS_UNAVAIL		= -EBUSY
 };
 
-# define BIOS_CALL(result, a0, a1, a2, a3, a4, a5, a6, a7)		\
-	do {								\
-		/* XXX - the real call goes here */			\
-		result.status = BIOS_STATUS_UNIMPLEMENTED;		\
-		isrv.v0 = 0;						\
-		isrv.v1 = 0;						\
-	} while (0)
+/*
+ * The UV system table describes specific firmware
+ * capabilities available to the Linux kernel at runtime.
+ */
+struct uv_systab {
+	char signature[4];	/* must be "UVST" */
+	u32 revision;		/* distinguish different firmware revs */
+	u64 function;		/* BIOS runtime callback function ptr */
+};
 
 enum {
-	BIOS_STATUS_SUCCESS		=  0,
-	BIOS_STATUS_UNIMPLEMENTED	= -1,
-	BIOS_STATUS_EINVAL		= -2,
-	BIOS_STATUS_ERROR		= -3
+	BIOS_FREQ_BASE_PLATFORM = 0,
+	BIOS_FREQ_BASE_INTERVAL_TIMER = 1,
+	BIOS_FREQ_BASE_REALTIME_CLOCK = 2
 };
 
-struct uv_bios_retval {
-	/*
-	 * A zero status value indicates call completed without error.
-	 * A negative status value indicates reason of call failure.
-	 * A positive status value indicates success but an
-	 * informational value should be printed (e.g., "reboot for
-	 * change to take effect").
-	 */
-	s64 status;
-	u64 v0;
-	u64 v1;
-	u64 v2;
-};
+/*
+ * bios calls have 6 parameters
+ */
+extern s64 uv_bios_call(enum uv_bios_cmd, u64, u64, u64, u64, u64);
+extern s64 uv_bios_call_irqsave(enum uv_bios_cmd, u64, u64, u64, u64, u64);
+extern s64 uv_bios_call_reentrant(enum uv_bios_cmd, u64, u64, u64, u64, u64);
+
+extern void uv_bios_init(void);
 
 extern long
 x86_bios_freq_base(unsigned long which, unsigned long *ticks_per_second,
 		   unsigned long *drift_info);
-extern const char *x86_bios_strerror(long status);
 
 #endif /* ASM_X86__UV__BIOS_H */
-- 
GitLab


From 922402f15a85f7a064926eb1db68cc52bc4d4a91 Mon Sep 17 00:00:00 2001
From: Russ Anderson <rja@sgi.com>
Date: Fri, 3 Oct 2008 11:59:33 -0500
Subject: [PATCH 517/895] x86: Add UV partition call v4

Add a bios call to return partitioning related info.

Signed-off-by: Russ Anderson <rja@sgi.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/bios_uv.c        | 44 ++++++++++++++++++++++++++++----
 arch/x86/kernel/genx2apic_uv_x.c | 14 +++++-----
 include/asm-x86/uv/bios.h        | 22 +++++++++++++---
 3 files changed, 66 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index 5481eb59f783..f0dfe6f17e7e 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -23,6 +23,7 @@
 #include <asm/efi.h>
 #include <linux/io.h>
 #include <asm/uv/bios.h>
+#include <asm/uv/uv_hub.h>
 
 struct uv_systab uv_systab;
 
@@ -65,14 +66,47 @@ s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
 	return ret;
 }
 
-long
-x86_bios_freq_base(unsigned long clock_type, unsigned long *ticks_per_second,
-					unsigned long *drift_info)
+
+long sn_partition_id;
+EXPORT_SYMBOL_GPL(sn_partition_id);
+long uv_coherency_id;
+EXPORT_SYMBOL_GPL(uv_coherency_id);
+long uv_region_size;
+EXPORT_SYMBOL_GPL(uv_region_size);
+int uv_type;
+
+
+s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
+		long *region)
+{
+	s64 ret;
+	u64 v0, v1;
+	union partition_info_u part;
+
+	ret = uv_bios_call_irqsave(UV_BIOS_GET_SN_INFO, fc,
+				(u64)(&v0), (u64)(&v1), 0, 0);
+	if (ret != BIOS_STATUS_SUCCESS)
+		return ret;
+
+	part.val = v0;
+	if (uvtype)
+		*uvtype = part.hub_version;
+	if (partid)
+		*partid = part.partition_id;
+	if (coher)
+		*coher = part.coherence_id;
+	if (region)
+		*region = part.region_size;
+	return ret;
+}
+
+
+s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second)
 {
 	return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type,
-				(u64)ticks_per_second, 0, 0, 0);
+			   (u64)ticks_per_second, 0, 0, 0);
 }
-EXPORT_SYMBOL_GPL(x86_bios_freq_base);
+EXPORT_SYMBOL_GPL(uv_bios_freq_base);
 
 
 #ifdef CONFIG_EFI
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index aa2e5e15bf69..bfd532843df6 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -341,12 +341,12 @@ static __init void map_mmioh_high(int max_pnode)
 
 static __init void uv_rtc_init(void)
 {
-	long status, ticks_per_sec, drift;
+	long status;
+	u64 ticks_per_sec;
 
-	status =
-	    x86_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec,
-					&drift);
-	if (status != 0 || ticks_per_sec < 100000) {
+	status = uv_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK,
+					&ticks_per_sec);
+	if (status != BIOS_STATUS_SUCCESS || ticks_per_sec < 100000) {
 		printk(KERN_WARNING
 			"unable to determine platform RTC clock frequency, "
 			"guessing.\n");
@@ -428,6 +428,8 @@ void __init uv_system_init(void)
 		       ~((1 << n_val) - 1)) << m_val;
 
 	uv_bios_init();
+	uv_bios_get_sn_info(0, &uv_type, &sn_partition_id,
+			    &uv_coherency_id, &uv_region_size);
 	uv_rtc_init();
 
 	for_each_present_cpu(cpu) {
@@ -449,7 +451,7 @@ void __init uv_system_init(void)
 		uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
 		uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
 		uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
-		uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */
+		uv_cpu_hub_info(cpu)->coherency_domain_number = uv_coherency_id;
 		uv_node_to_blade[nid] = blade;
 		uv_cpu_to_blade[cpu] = blade;
 		max_pnode = max(pnode, max_pnode);
diff --git a/include/asm-x86/uv/bios.h b/include/asm-x86/uv/bios.h
index f63e46e5337c..3b305d897c84 100644
--- a/include/asm-x86/uv/bios.h
+++ b/include/asm-x86/uv/bios.h
@@ -61,6 +61,16 @@ enum {
 	BIOS_FREQ_BASE_REALTIME_CLOCK = 2
 };
 
+union partition_info_u {
+	u64	val;
+	struct {
+		u64	hub_version	:  8,
+			partition_id	: 16,
+			coherence_id	: 16,
+			region_size	: 24;
+	};
+};
+
 /*
  * bios calls have 6 parameters
  */
@@ -68,10 +78,16 @@ extern s64 uv_bios_call(enum uv_bios_cmd, u64, u64, u64, u64, u64);
 extern s64 uv_bios_call_irqsave(enum uv_bios_cmd, u64, u64, u64, u64, u64);
 extern s64 uv_bios_call_reentrant(enum uv_bios_cmd, u64, u64, u64, u64, u64);
 
+extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *);
+extern s64 uv_bios_freq_base(u64, u64 *);
+
 extern void uv_bios_init(void);
 
-extern long
-x86_bios_freq_base(unsigned long which, unsigned long *ticks_per_second,
-		   unsigned long *drift_info);
+extern int uv_type;
+extern long sn_partition_id;
+extern long uv_coherency_id;
+extern long uv_region_size;
+#define partition_coherence_id()	(uv_coherency_id)
+
 
 #endif /* ASM_X86__UV__BIOS_H */
-- 
GitLab


From fc8c2d763bacc02962048fa042e287debb1416aa Mon Sep 17 00:00:00 2001
From: Russ Anderson <rja@sgi.com>
Date: Fri, 3 Oct 2008 11:59:50 -0500
Subject: [PATCH 518/895] x86: Add sysfs entries for UV v4

Create /sys/firmware/sgi_uv sysfs entries for partition_id and coherence_id.

Signed-off-by: Russ Anderson <rja@sgi.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/Makefile   |  2 +-
 arch/x86/kernel/uv_sysfs.c | 72 ++++++++++++++++++++++++++++++++++++++
 include/asm-x86/uv/bios.h  |  1 +
 3 files changed, 74 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kernel/uv_sysfs.c

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index acc0e8bf043e..cb6ade443f00 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -108,7 +108,7 @@ obj-$(CONFIG_MICROCODE)			+= microcode.o
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
         obj-y				+= genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
-	obj-y				+= bios_uv.o uv_irq.o
+	obj-y				+= bios_uv.o uv_irq.o uv_sysfs.o
         obj-y				+= genx2apic_cluster.o
         obj-y				+= genx2apic_phys.o
         obj-$(CONFIG_X86_PM_TIMER)	+= pmtimer_64.o
diff --git a/arch/x86/kernel/uv_sysfs.c b/arch/x86/kernel/uv_sysfs.c
new file mode 100644
index 000000000000..67f9b9dbf800
--- /dev/null
+++ b/arch/x86/kernel/uv_sysfs.c
@@ -0,0 +1,72 @@
+/*
+ * This file supports the /sys/firmware/sgi_uv interfaces for SGI UV.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *  Copyright (c) Russ Anderson
+ */
+
+#include <linux/sysdev.h>
+#include <asm/uv/bios.h>
+
+struct kobject *sgi_uv_kobj;
+
+static ssize_t partition_id_show(struct kobject *kobj,
+			struct kobj_attribute *attr, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%ld\n", sn_partition_id);
+}
+
+static ssize_t coherence_id_show(struct kobject *kobj,
+			struct kobj_attribute *attr, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%ld\n", partition_coherence_id());
+}
+
+static struct kobj_attribute partition_id_attr =
+	__ATTR(partition_id, S_IRUGO, partition_id_show, NULL);
+
+static struct kobj_attribute coherence_id_attr =
+	__ATTR(coherence_id, S_IRUGO, coherence_id_show, NULL);
+
+
+static int __init sgi_uv_sysfs_init(void)
+{
+	unsigned long ret;
+
+	if (!sgi_uv_kobj)
+		sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj);
+	if (!sgi_uv_kobj) {
+		printk(KERN_WARNING "kobject_create_and_add sgi_uv failed \n");
+		return -EINVAL;
+	}
+
+	ret = sysfs_create_file(sgi_uv_kobj, &partition_id_attr.attr);
+	if (ret) {
+		printk(KERN_WARNING "sysfs_create_file partition_id failed \n");
+		return ret;
+	}
+
+	ret = sysfs_create_file(sgi_uv_kobj, &coherence_id_attr.attr);
+	if (ret) {
+		printk(KERN_WARNING "sysfs_create_file coherence_id failed \n");
+		return ret;
+	}
+
+	return 0;
+}
+
+device_initcall(sgi_uv_sysfs_init);
diff --git a/include/asm-x86/uv/bios.h b/include/asm-x86/uv/bios.h
index 3b305d897c84..215f1969c266 100644
--- a/include/asm-x86/uv/bios.h
+++ b/include/asm-x86/uv/bios.h
@@ -89,5 +89,6 @@ extern long uv_coherency_id;
 extern long uv_region_size;
 #define partition_coherence_id()	(uv_coherency_id)
 
+extern struct kobject *sgi_uv_kobj;	/* /sys/firmware/sgi_uv */
 
 #endif /* ASM_X86__UV__BIOS_H */
-- 
GitLab


From 4c66a73f0796dacc2ff0d4af75794ec843ceb3d1 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sat, 4 Oct 2008 15:52:15 -0700
Subject: [PATCH 519/895] x86: sparse_irq: fix typo in debug print out

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 260c95a5e6dc..f959acbc0db2 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -257,7 +257,7 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
 			panic("please boot with nr_irq_cfg= %d\n", count * 2);
 
 		phys = __pa(cfg);
-		printk(KERN_DEBUG "irq_irq ==> [%#lx - %#lx]\n", phys, phys + total_bytes);
+		printk(KERN_DEBUG "irq_cfg ==> [%#lx - %#lx]\n", phys, phys + total_bytes);
 
 		for (i = 0; i < nr_irq_cfg; i++)
 			init_one_irq_cfg(&cfg[i]);
-- 
GitLab


From 81608f3c254512b906ab78082ec5966b376aacd5 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Fri, 10 Oct 2008 19:00:17 +0400
Subject: [PATCH 520/895] x86: apic - unify APIC_DIVISOR

Use APIC_DIVISOR being set to 16 for both 32/64bit
mode. To escape APIC timer underflow during calibration
set it to the maximum possible value.

Also typo error (CONFG instead of proper CONFIG) fixed.
The error was caught by Venkatesh Pallipadi, thanks a lot Venkatesh!

See details on http://lkml.org/lkml/2008/10/9/425

Reported-by: Venkatesh Pallipad <venkatesh.pallipadi@intel.com>
Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Acked-by: "Maciej W. Rozycki" <macro@linux-mips.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 1f48bd1c9f2d..04a7f960bbc0 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -332,11 +332,7 @@ int lapic_get_maxlvt(void)
  */
 
 /* Clock divisor */
-#ifdef CONFG_X86_64
-#define APIC_DIVISOR 1
-#else
 #define APIC_DIVISOR 16
-#endif
 
 /*
  * This function sets up the local APIC timer, with a timeout of
@@ -592,10 +588,10 @@ static int __init calibrate_APIC_clock(void)
 	global_clock_event->event_handler = lapic_cal_handler;
 
 	/*
-	 * Setup the APIC counter to 1e9. There is no way the lapic
+	 * Setup the APIC counter to maximum. There is no way the lapic
 	 * can underflow in the 100ms detection time frame
 	 */
-	__setup_APIC_LVTT(1000000000, 0, 0);
+	__setup_APIC_LVTT(0xffffffff, 0, 0);
 
 	/* Let the interrupts run */
 	local_irq_enable();
-- 
GitLab


From 7ef0c30dbf96a8d9a234e90c248eb19df3c031be Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 15 Oct 2008 13:07:35 +0200
Subject: [PATCH 521/895] genirq: define nr_irqs for architectures with
 GENERIC_HARDIRQS=n

Revert the sparse irq changes in m68k/s390/sparc and just define
nr_irqs as NR_IRQS for those architectures.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/m68k/kernel/ints.c   | 3 ---
 arch/s390/kernel/irq.c    | 3 ---
 arch/sparc/kernel/irq.c   | 4 ----
 include/linux/interrupt.h | 8 +++++---
 4 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/arch/m68k/kernel/ints.c b/arch/m68k/kernel/ints.c
index 44169e4cd91d..7e8a0d394e61 100644
--- a/arch/m68k/kernel/ints.c
+++ b/arch/m68k/kernel/ints.c
@@ -46,9 +46,6 @@
 #include <asm/q40ints.h>
 #endif
 
-int nr_irqs = NR_IRQS;
-EXPORT_SYMBOL(nr_irqs);
-
 extern u32 auto_irqhandler_fixup[];
 extern u32 user_irqhandler_fixup[];
 extern u16 user_irqvec_fixup[];
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 3624c4a0037a..e7c5bfb7c755 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -17,9 +17,6 @@
 #include <linux/proc_fs.h>
 #include <linux/profile.h>
 
-int nr_irqs = NR_IRQS;
-EXPORT_SYMBOL(nr_irqs);
-
 /*
  * show_interrupts is needed by /proc/interrupts.
  */
diff --git a/arch/sparc/kernel/irq.c b/arch/sparc/kernel/irq.c
index 4b99e3ce3916..93e1d1c65290 100644
--- a/arch/sparc/kernel/irq.c
+++ b/arch/sparc/kernel/irq.c
@@ -55,10 +55,6 @@
 #define SMP_NOP2
 #define SMP_NOP3
 #endif /* SMP */
-
-int nr_irqs = NR_IRQS;
-EXPORT_SYMBOL(nr_irqs);
-
 unsigned long __raw_local_irq_save(void)
 {
 	unsigned long retval;
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index d4039a0b23f4..5a57df2ee922 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -15,11 +15,13 @@
 #include <asm/ptrace.h>
 #include <asm/system.h>
 
-extern int nr_irqs;
-
 #ifndef CONFIG_GENERIC_HARDIRQS
-#define for_each_irq_desc(irq, desc)		\
+# define for_each_irq_desc(irq, desc)		\
 	for (irq = 0; irq < nr_irqs; irq++)
+
+# define nr_irqs		NR_IRQS
+#else
+extern int nr_irqs;
 #endif
 
 /*
-- 
GitLab


From 3235e936c0cc3589309280b6f59e5096779adae3 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 15 Oct 2008 13:16:00 +0200
Subject: [PATCH 522/895] x86: remove sparse irq from Kconfig

This code is not ready yet.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 600584b7a497..8636ddf2f4a4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -237,17 +237,6 @@ config SMP
 
 	  If you don't know what to do here, say N.
 
-config HAVE_SPARSE_IRQ
-	bool "Support sparse irq numbering"
-	depends on PCI_MSI || HT_IRQ
-	default y
-	help
-	  This enables support for sparse irq, esp for msi/msi-x. the irq
-	  number will be bus/dev/fn + 12bit. You may need if you have lots of
-	  cards supports msi-x installed.
-
-	  If you don't know what to do here, say Y.
-
 config X86_FIND_SMP_CONFIG
 	def_bool y
 	depends on X86_MPPARSE || X86_VOYAGER
-- 
GitLab


From 70dd4d992ab324a59cdcd6bedc3f4e729863d514 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 15 Oct 2008 15:39:27 +0200
Subject: [PATCH 523/895] genirq: consolidate nr_irqs and for_each_irq_desc()

Move all of those to linux/irq.h where they belong.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/interrupt.h |  9 ---------
 include/linux/irq.h       | 17 ++++++++++++-----
 2 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 5a57df2ee922..58ff4e74b2f3 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -15,15 +15,6 @@
 #include <asm/ptrace.h>
 #include <asm/system.h>
 
-#ifndef CONFIG_GENERIC_HARDIRQS
-# define for_each_irq_desc(irq, desc)		\
-	for (irq = 0; irq < nr_irqs; irq++)
-
-# define nr_irqs		NR_IRQS
-#else
-extern int nr_irqs;
-#endif
-
 /*
  * These correspond to the IORESOURCE_IRQ_* defines in
  * linux/ioport.h to select the interrupt line behaviour.  When
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 93fe9a943e71..dbe8734ae86c 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -11,6 +11,18 @@
 
 #include <linux/smp.h>
 
+#ifndef CONFIG_GENERIC_HARDIRQS
+# define nr_irqs		NR_IRQS
+
+# define for_each_irq_desc(irq, desc)		\
+	for (irq = 0; irq < nr_irqs; irq++)
+#else
+extern int nr_irqs;
+
+# define for_each_irq_desc(irq, desc)		\
+	for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++)
+#endif
+
 #ifndef CONFIG_S390
 
 #include <linux/linkage.h>
@@ -204,11 +216,6 @@ extern struct irq_desc irq_desc[NR_IRQS];
 extern struct irq_desc *irq_desc;
 #endif
 
-#ifdef CONFIG_GENERIC_HARDIRQS
-#define for_each_irq_desc(irq, desc)		\
-	for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc = &irq_desc[irq])
-#endif
-
 #else
 
 extern struct irq_desc *sparse_irqs;
-- 
GitLab


From c6b7674f323622d86316bf7951ad9cae1ce24642 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 15 Oct 2008 14:31:29 +0200
Subject: [PATCH 524/895] genirq: use inline function for irq_to_desc

For the non sparse irq case an inline function is perfectly fine.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h | 15 +++++++++++++--
 kernel/irq/handle.c | 14 --------------
 2 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/include/linux/irq.h b/include/linux/irq.h
index dbe8734ae86c..7d1adacaadb4 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -204,8 +204,6 @@ struct irq_desc {
 	const char		*name;
 } ____cacheline_internodealigned_in_smp;
 
-extern struct irq_desc *irq_to_desc(unsigned int irq);
-extern struct irq_desc *irq_to_desc_alloc(unsigned int irq);
 
 #ifndef CONFIG_HAVE_SPARSE_IRQ
 
@@ -216,8 +214,21 @@ extern struct irq_desc irq_desc[NR_IRQS];
 extern struct irq_desc *irq_desc;
 #endif
 
+static inline struct irq_desc *irq_to_desc(unsigned int irq)
+{
+	return (irq < nr_irqs) ? irq_desc + irq : NULL;
+}
+
+static inline struct irq_desc *irq_to_desc_alloc(unsigned int irq)
+{
+	return irq_to_desc(irq);
+}
+
 #else
 
+extern struct irq_desc *irq_to_desc(unsigned int irq);
+extern struct irq_desc *irq_to_desc_alloc(unsigned int irq);
+
 extern struct irq_desc *sparse_irqs;
 #define for_each_irq_desc(irqX, desc)		\
 	for (desc = sparse_irqs, irqX = desc->irq; desc; desc = desc->next, irqX = desc ? desc->irq : -1U)
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index fb6bdb602a93..c19896f895f9 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -262,20 +262,6 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
 
 #endif
 
-#ifndef CONFIG_HAVE_SPARSE_IRQ
-struct irq_desc *irq_to_desc(unsigned int irq)
-{
-	if (irq < nr_irqs)
-		return &irq_desc[irq];
-
-	return NULL;
-}
-struct irq_desc *irq_to_desc_alloc(unsigned int irq)
-{
-	return irq_to_desc(irq);
-}
-#endif
-
 /*
  * What should we do if we get a hw irq event on an illegal vector?
  * Each architecture has to answer this themself.
-- 
GitLab


From 2cc21ef843d4fb7da122239b644a1f6f0aca60a6 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 15 Oct 2008 14:16:55 +0200
Subject: [PATCH 525/895] genirq: remove sparse irq code

This code is not ready, but we need to rip it out instead of rebasing
as we would lose the APIC/IO_APIC unification otherwise.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/io_apic.c    | 130 ++---------------------------------
 arch/x86/kernel/irq_32.c     |   8 ---
 arch/x86/kernel/irq_64.c     |   8 ---
 drivers/char/random.c        |  31 ---------
 drivers/pci/htirq.c          |  19 +----
 drivers/pci/intr_remapping.c |  75 --------------------
 fs/proc/proc_misc.c          |  43 ++----------
 include/linux/irq.h          |  20 ------
 kernel/irq/handle.c          | 114 ------------------------------
 9 files changed, 10 insertions(+), 438 deletions(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index f959acbc0db2..683610517d2a 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -111,9 +111,6 @@ struct irq_cfg;
 struct irq_pin_list;
 struct irq_cfg {
 	unsigned int irq;
-#ifdef CONFIG_HAVE_SPARSE_IRQ
-	struct irq_cfg *next;
-#endif
 	struct irq_pin_list *irq_2_pin;
 	cpumask_t domain;
 	cpumask_t old_domain;
@@ -151,15 +148,6 @@ static void init_one_irq_cfg(struct irq_cfg *cfg)
 
 static struct irq_cfg *irq_cfgx;
 
-#ifdef CONFIG_HAVE_SPARSE_IRQ
-/*
- * Protect the irq_cfgx_free freelist:
- */
-static DEFINE_SPINLOCK(irq_cfg_lock);
-
-static struct irq_cfg *irq_cfgx_free;
-#endif
-
 static void __init init_work(void *data)
 {
 	struct dyn_array *da = data;
@@ -174,114 +162,7 @@ static void __init init_work(void *data)
 	legacy_count = ARRAY_SIZE(irq_cfg_legacy);
 	for (i = legacy_count; i < *da->nr; i++)
 		init_one_irq_cfg(&cfg[i]);
-
-#ifdef CONFIG_HAVE_SPARSE_IRQ
-	for (i = 1; i < *da->nr; i++)
-		cfg[i-1].next = &cfg[i];
-
-	irq_cfgx_free = &irq_cfgx[legacy_count];
-	irq_cfgx[legacy_count - 1].next = NULL;
-#endif
-}
-
-#ifdef CONFIG_HAVE_SPARSE_IRQ
-/* need to be biger than size of irq_cfg_legacy */
-static int nr_irq_cfg = 32;
-
-static int __init parse_nr_irq_cfg(char *arg)
-{
-	if (arg) {
-		nr_irq_cfg = simple_strtoul(arg, NULL, 0);
-		if (nr_irq_cfg < 32)
-			nr_irq_cfg = 32;
-	}
-	return 0;
-}
-
-early_param("nr_irq_cfg", parse_nr_irq_cfg);
-
-#define for_each_irq_cfg(irqX, cfg)           \
-        for (cfg = irq_cfgx, irqX = cfg->irq; cfg; cfg = cfg->next, irqX = cfg ? cfg->irq : -1U)
-
-
-DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work);
-
-static struct irq_cfg *irq_cfg(unsigned int irq)
-{
-	struct irq_cfg *cfg;
-
-	cfg = irq_cfgx;
-	while (cfg) {
-		if (cfg->irq == irq)
-			return cfg;
-
-		cfg = cfg->next;
-	}
-
-	return NULL;
-}
-
-static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
-{
-	struct irq_cfg *cfg, *cfg_pri;
-	unsigned long flags;
-	int count = 0;
-	int i;
-
-	cfg_pri = cfg = irq_cfgx;
-	while (cfg) {
-		if (cfg->irq == irq)
-			return cfg;
-
-		cfg_pri = cfg;
-		cfg = cfg->next;
-		count++;
-	}
-
-	spin_lock_irqsave(&irq_cfg_lock, flags);
-	if (!irq_cfgx_free) {
-		unsigned long phys;
-		unsigned long total_bytes;
-		/*
-		 *  we run out of pre-allocate ones, allocate more
-		 */
-		printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg);
-
-		total_bytes = sizeof(struct irq_cfg) * nr_irq_cfg;
-		if (after_bootmem)
-			cfg = kzalloc(total_bytes, GFP_ATOMIC);
-		else
-			cfg = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0);
-
-		if (!cfg)
-			panic("please boot with nr_irq_cfg= %d\n", count * 2);
-
-		phys = __pa(cfg);
-		printk(KERN_DEBUG "irq_cfg ==> [%#lx - %#lx]\n", phys, phys + total_bytes);
-
-		for (i = 0; i < nr_irq_cfg; i++)
-			init_one_irq_cfg(&cfg[i]);
-
-		for (i = 1; i < nr_irq_cfg; i++)
-			cfg[i-1].next = &cfg[i];
-
-		irq_cfgx_free = cfg;
-	}
-
-	cfg = irq_cfgx_free;
-	irq_cfgx_free = irq_cfgx_free->next;
-	cfg->next = NULL;
-	if (cfg_pri)
-		cfg_pri->next = cfg;
-	else
-		irq_cfgx = cfg;
-	cfg->irq = irq;
-
-	spin_unlock_irqrestore(&irq_cfg_lock, flags);
-
-	return cfg;
 }
-#else
 
 #define for_each_irq_cfg(irq, cfg)		\
 	for (irq = 0, cfg = &irq_cfgx[irq]; irq < nr_irqs; irq++, cfg = &irq_cfgx[irq])
@@ -290,17 +171,16 @@ DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irqs, PAGE_SIZE, init_work
 
 struct irq_cfg *irq_cfg(unsigned int irq)
 {
-        if (irq < nr_irqs)
-                return &irq_cfgx[irq];
+	if (irq < nr_irqs)
+		return &irq_cfgx[irq];
 
-        return NULL;
+	return NULL;
 }
 struct irq_cfg *irq_cfg_alloc(unsigned int irq)
 {
-        return irq_cfg(irq);
+	return irq_cfg(irq);
 }
 
-#endif
 /*
  * This is performance-critical, we want to do it O(1)
  *
@@ -3068,9 +2948,7 @@ unsigned int create_irq_nr(unsigned int irq_want)
 	unsigned long flags;
 	struct irq_cfg *cfg_new;
 
-#ifndef CONFIG_HAVE_SPARSE_IRQ
 	irq_want = nr_irqs - 1;
-#endif
 
 	irq = 0;
 	spin_lock_irqsave(&vector_lock, flags);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 001772ffc918..ccf6c1bf7120 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -272,20 +272,12 @@ int show_interrupts(struct seq_file *p, void *v)
 	struct irq_desc *desc = NULL;
 	int tail = 0;
 
-#ifdef CONFIG_HAVE_SPARSE_IRQ
-	desc = (struct irq_desc *)v;
-	entries = -1U;
-	i = desc->irq;
-	if (!desc->next)
-		tail = 1;
-#else
 	entries = nr_irqs - 1;
 	i = *(loff_t *) v;
 	if (i == nr_irqs)
 		tail = 1;
 	else
 		desc = irq_to_desc(i);
-#endif
 
 	if (i == 0) {
 		seq_printf(p, "           ");
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index ec2661091283..21f53b911113 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -77,20 +77,12 @@ int show_interrupts(struct seq_file *p, void *v)
 	struct irq_desc *desc = NULL;
 	int tail = 0;
 
-#ifdef CONFIG_HAVE_SPARSE_IRQ
-	desc = (struct irq_desc *)v;
-	entries = -1U;
-	i = desc->irq;
-	if (!desc->next)
-		tail = 1;
-#else
 	entries = nr_irqs - 1;
 	i = *(loff_t *) v;
 	if (i == nr_irqs)
 		tail = 1;
 	else
 		desc = irq_to_desc(i);
-#endif
 
 	if (i == 0) {
 		seq_printf(p, "           ");
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 60c9c7ee6b2c..9ce80213007b 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -558,8 +558,6 @@ struct timer_rand_state {
 	unsigned dont_count_entropy:1;
 };
 
-#ifndef CONFIG_HAVE_SPARSE_IRQ
-
 #ifdef CONFIG_HAVE_DYN_ARRAY
 static struct timer_rand_state **irq_timer_state;
 DEFINE_DYN_ARRAY(irq_timer_state, sizeof(struct timer_rand_state *), nr_irqs, PAGE_SIZE, NULL);
@@ -583,33 +581,6 @@ static void set_timer_rand_state(unsigned int irq, struct timer_rand_state *stat
 	irq_timer_state[irq] = state;
 }
 
-#else
-
-static struct timer_rand_state *get_timer_rand_state(unsigned int irq)
-{
-	struct irq_desc *desc;
-
-	desc = irq_to_desc(irq);
-
-	if (!desc)
-		return NULL;
-
-	return desc->timer_rand_state;
-}
-
-static void set_timer_rand_state(unsigned int irq, struct timer_rand_state *state)
-{
-	struct irq_desc *desc;
-
-	desc = irq_to_desc(irq);
-
-	if (!desc)
-		return;
-
-	desc->timer_rand_state = state;
-}
-#endif
-
 static struct timer_rand_state input_timer_state;
 
 /*
@@ -967,10 +938,8 @@ void rand_initialize_irq(int irq)
 {
 	struct timer_rand_state *state;
 
-#ifndef CONFIG_HAVE_SPARSE_IRQ
 	if (irq >= nr_irqs)
 		return;
-#endif
 
 	state = get_timer_rand_state(irq);
 
diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c
index 9e4929a00832..bf7d6ce9bbb3 100644
--- a/drivers/pci/htirq.c
+++ b/drivers/pci/htirq.c
@@ -82,18 +82,6 @@ void unmask_ht_irq(unsigned int irq)
 	write_ht_irq_msg(irq, &msg);
 }
 
-static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
-{
-	unsigned int irq;
-
-	irq = dev->bus->number;
-	irq <<= 8;
-	irq |= dev->devfn;
-	irq <<= 12;
-
-	return irq;
-}
-
 /**
  * __ht_create_irq - create an irq and attach it to a device.
  * @dev: The hypertransport device to find the irq capability on.
@@ -110,7 +98,6 @@ int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update)
 	int max_irq;
 	int pos;
 	int irq;
-	unsigned int irq_want;
 
 	pos = pci_find_ht_capability(dev, HT_CAPTYPE_IRQ);
 	if (!pos)
@@ -138,12 +125,8 @@ int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update)
 	cfg->msg.address_lo = 0xffffffff;
 	cfg->msg.address_hi = 0xffffffff;
 
-	irq_want= build_irq_for_pci_dev(dev);
-#ifdef CONFIG_HAVE_SPARSE_IRQ
-	irq = create_irq_nr(irq_want + idx);
-#else
 	irq = create_irq();
-#endif
+
 	if (irq <= 0) {
 		kfree(cfg);
 		return -EBUSY;
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index 2dcf973890c4..0f43b265eee6 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -19,78 +19,6 @@ struct irq_2_iommu {
 	u8  irte_mask;
 };
 
-#ifdef CONFIG_HAVE_SPARSE_IRQ
-static struct irq_2_iommu *irq_2_iommuX;
-/* fill one page ? */
-static int nr_irq_2_iommu = 0x100;
-static int irq_2_iommu_index;
-DEFINE_DYN_ARRAY(irq_2_iommuX, sizeof(struct irq_2_iommu), nr_irq_2_iommu, PAGE_SIZE, NULL);
-
-extern void *__alloc_bootmem_nopanic(unsigned long size,
-				     unsigned long align,
-				     unsigned long goal);
-
-static struct irq_2_iommu *get_one_free_irq_2_iommu(int not_used)
-{
-	struct irq_2_iommu *iommu;
-	unsigned long total_bytes;
-
-	if (irq_2_iommu_index >= nr_irq_2_iommu) {
-		/*
-		 *  we run out of pre-allocate ones, allocate more
-		 */
-		printk(KERN_DEBUG "try to get more irq_2_iommu %d\n", nr_irq_2_iommu);
-
-		total_bytes = sizeof(struct irq_2_iommu)*nr_irq_2_iommu;
-
-		if (after_bootmem)
-			iommu = kzalloc(total_bytes, GFP_ATOMIC);
-		else
-			iommu = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0);
-
-		if (!iommu)
-			panic("can not get more irq_2_iommu\n");
-
-		irq_2_iommuX = iommu;
-		irq_2_iommu_index = 0;
-	}
-
-	iommu = &irq_2_iommuX[irq_2_iommu_index];
-	irq_2_iommu_index++;
-	return iommu;
-}
-
-static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
-{
-	struct irq_desc *desc;
-
-	desc = irq_to_desc(irq);
-
-	BUG_ON(!desc);
-
-	return desc->irq_2_iommu;
-}
-
-static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq)
-{
-	struct irq_desc *desc;
-	struct irq_2_iommu *irq_iommu;
-
-	/*
-	 * alloc irq desc if not allocated already.
-	 */
-	desc = irq_to_desc_alloc(irq);
-
-	irq_iommu = desc->irq_2_iommu;
-
-	if (!irq_iommu)
-		desc->irq_2_iommu = get_one_free_irq_2_iommu(irq);
-
-	return desc->irq_2_iommu;
-}
-
-#else /* !CONFIG_HAVE_SPARSE_IRQ */
-
 #ifdef CONFIG_HAVE_DYN_ARRAY
 static struct irq_2_iommu *irq_2_iommuX;
 DEFINE_DYN_ARRAY(irq_2_iommuX, sizeof(struct irq_2_iommu), nr_irqs, PAGE_SIZE, NULL);
@@ -109,7 +37,6 @@ static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq)
 {
 	return irq_2_iommu(irq);
 }
-#endif
 
 static DEFINE_SPINLOCK(irq_2_ir_lock);
 
@@ -166,11 +93,9 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
 	if (!count)
 		return -1;
 
-#ifndef	CONFIG_HAVE_SPARSE_IRQ
 	/* protect irq_2_iommu_alloc later */
 	if (irq >= nr_irqs)
 		return -1;
-#endif
 
 	/*
 	 * start the IRTE search from index 0.
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index d68c3592fe4a..3f5c7b9d1a70 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -529,13 +529,10 @@ static int show_stat(struct seq_file *p, void *v)
 		softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
 		steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
 		guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
+
 		for_each_irq_desc(j, desc)
-		{
-			unsigned int temp;
+			sum += kstat_irqs_cpu(j, i);
 
-			temp = kstat_irqs_cpu(j, i);
-			sum += temp;
-		}
 		sum += arch_irq_stat_cpu(i);
 	}
 	sum += arch_irq_stat();
@@ -578,21 +575,13 @@ static int show_stat(struct seq_file *p, void *v)
 	seq_printf(p, "intr %llu", (unsigned long long)sum);
 
 	/* sum again ? it could be updated? */
-	for_each_irq_desc(j, desc)
-	{
+	for_each_irq_desc(j, desc) {
 		per_irq_sum = 0;
-		for_each_possible_cpu(i) {
-			unsigned int temp;
 
-			temp = kstat_irqs_cpu(j, i);
-			per_irq_sum += temp;
-		}
+		for_each_possible_cpu(i)
+			per_irq_sum += kstat_irqs_cpu(j, i);
 
-#ifdef CONFIG_HAVE_SPARSE_IRQ
-		seq_printf(p, " %#x:%u", j, per_irq_sum);
-#else
 		seq_printf(p, " %u", per_irq_sum);
-#endif
 	}
 
 	seq_printf(p,
@@ -645,36 +634,14 @@ static const struct file_operations proc_stat_operations = {
  */
 static void *int_seq_start(struct seq_file *f, loff_t *pos)
 {
-#ifdef CONFIG_HAVE_SPARSE_IRQ
-	struct irq_desc *desc;
-	int irq;
-	int count = *pos;
-
-	for_each_irq_desc(irq, desc) {
-		if (count-- == 0)
-			return desc;
-	}
-
-	return NULL;
-#else
 	return (*pos <= nr_irqs) ? pos : NULL;
-#endif
 }
 
 
 static void *int_seq_next(struct seq_file *f, void *v, loff_t *pos)
 {
-#ifdef CONFIG_HAVE_SPARSE_IRQ
-	struct irq_desc *desc;
-
-	desc = ((struct irq_desc *)v)->next;
-	(*pos)++;
-
-	return desc;
-#else
 	(*pos)++;
 	return (*pos <= nr_irqs) ? pos : NULL;
-#endif
 }
 
 static void int_seq_stop(struct seq_file *f, void *v)
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 7d1adacaadb4..68e0f3f9df30 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -167,15 +167,8 @@ struct irq_2_iommu;
  */
 struct irq_desc {
 	unsigned int		irq;
-#ifdef CONFIG_HAVE_SPARSE_IRQ
-	struct irq_desc		*next;
-	struct timer_rand_state *timer_rand_state;
-#endif
 #ifdef CONFIG_HAVE_DYN_ARRAY
 	unsigned int            *kstat_irqs;
-#endif
-#if defined(CONFIG_INTR_REMAP) && defined(CONFIG_HAVE_SPARSE_IRQ)
-       struct irq_2_iommu      *irq_2_iommu;
 #endif
 	irq_flow_handler_t	handle_irq;
 	struct irq_chip		*chip;
@@ -205,8 +198,6 @@ struct irq_desc {
 } ____cacheline_internodealigned_in_smp;
 
 
-#ifndef CONFIG_HAVE_SPARSE_IRQ
-
 #ifndef CONFIG_HAVE_DYN_ARRAY
 /* could be removed if we get rid of all irq_desc reference */
 extern struct irq_desc irq_desc[NR_IRQS];
@@ -224,17 +215,6 @@ static inline struct irq_desc *irq_to_desc_alloc(unsigned int irq)
 	return irq_to_desc(irq);
 }
 
-#else
-
-extern struct irq_desc *irq_to_desc(unsigned int irq);
-extern struct irq_desc *irq_to_desc_alloc(unsigned int irq);
-
-extern struct irq_desc *sparse_irqs;
-#define for_each_irq_desc(irqX, desc)		\
-	for (desc = sparse_irqs, irqX = desc->irq; desc; desc = desc->next, irqX = desc ? desc->irq : -1U)
-
-#endif
-
 #ifdef CONFIG_HAVE_DYN_ARRAY
 #define kstat_irqs_this_cpu(DESC) \
 	((DESC)->kstat_irqs[smp_processor_id()])
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index c19896f895f9..f837133cdfbe 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -111,15 +111,6 @@ static void init_kstat_irqs(struct irq_desc *desc, int nr_desc, int nr)
 	}
 }
 
-#ifdef CONFIG_HAVE_SPARSE_IRQ
-/*
- * Protect the sparse_irqs_free freelist:
- */
-static DEFINE_SPINLOCK(sparse_irq_lock);
-static struct irq_desc *sparse_irqs_free;
-struct irq_desc *sparse_irqs;
-#endif
-
 static void __init init_work(void *data)
 {
 	struct dyn_array *da = data;
@@ -130,121 +121,16 @@ static void __init init_work(void *data)
 
 	for (i = 0; i < *da->nr; i++) {
 		init_one_irq_desc(&desc[i]);
-#ifndef CONFIG_HAVE_SPARSE_IRQ
 		desc[i].irq = i;
-#endif
 	}
 
 	/* init kstat_irqs, nr_cpu_ids is ready already */
 	init_kstat_irqs(desc, *da->nr, nr_cpu_ids);
-
-#ifdef CONFIG_HAVE_SPARSE_IRQ
-	for (i = 1; i < *da->nr; i++)
-		desc[i-1].next = &desc[i];
-
-	sparse_irqs_free = sparse_irqs;
-	sparse_irqs = NULL;
-#endif
-}
-
-#ifdef CONFIG_HAVE_SPARSE_IRQ
-static int nr_irq_desc = 32;
-
-static int __init parse_nr_irq_desc(char *arg)
-{
-	if (arg)
-		nr_irq_desc = simple_strtoul(arg, NULL, 0);
-	return 0;
-}
-
-early_param("nr_irq_desc", parse_nr_irq_desc);
-
-DEFINE_DYN_ARRAY(sparse_irqs, sizeof(struct irq_desc), nr_irq_desc, PAGE_SIZE, init_work);
-
-struct irq_desc *irq_to_desc(unsigned int irq)
-{
-	struct irq_desc *desc;
-
-	desc = sparse_irqs;
-	while (desc) {
-		if (desc->irq == irq)
-			return desc;
-
-		desc = desc->next;
-	}
-	return NULL;
 }
 
-struct irq_desc *irq_to_desc_alloc(unsigned int irq)
-{
-	struct irq_desc *desc, *desc_pri;
-	unsigned long flags;
-	int count = 0;
-	int i;
-
-	desc_pri = desc = sparse_irqs;
-	while (desc) {
-		if (desc->irq == irq)
-			return desc;
-
-		desc_pri = desc;
-		desc = desc->next;
-		count++;
-	}
-
-	spin_lock_irqsave(&sparse_irq_lock, flags);
-	/*
-	 *  we run out of pre-allocate ones, allocate more
-	 */
-	if (!sparse_irqs_free) {
-		unsigned long phys;
-		unsigned long total_bytes;
-
-		printk(KERN_DEBUG "try to get more irq_desc %d\n", nr_irq_desc);
-
-		total_bytes = sizeof(struct irq_desc) * nr_irq_desc;
-		if (after_bootmem)
-			desc = kzalloc(total_bytes, GFP_ATOMIC);
-		else
-			desc = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0);
-
-		if (!desc)
-			panic("please boot with nr_irq_desc= %d\n", count * 2);
-
-		phys = __pa(desc);
-		printk(KERN_DEBUG "irq_desc ==> [%#lx - %#lx]\n", phys, phys + total_bytes);
-
-		for (i = 0; i < nr_irq_desc; i++)
-			init_one_irq_desc(&desc[i]);
-
-		for (i = 1; i < nr_irq_desc; i++)
-			desc[i-1].next = &desc[i];
-
-		/* init kstat_irqs, nr_cpu_ids is ready already */
-		init_kstat_irqs(desc, nr_irq_desc, nr_cpu_ids);
-
-		sparse_irqs_free = desc;
-	}
-
-	desc = sparse_irqs_free;
-	sparse_irqs_free = sparse_irqs_free->next;
-	desc->next = NULL;
-	if (desc_pri)
-		desc_pri->next = desc;
-	else
-		sparse_irqs = desc;
-	desc->irq = irq;
-
-	spin_unlock_irqrestore(&sparse_irq_lock, flags);
-
-	return desc;
-}
-#else
 struct irq_desc *irq_desc;
 DEFINE_DYN_ARRAY(irq_desc, sizeof(struct irq_desc), nr_irqs, PAGE_SIZE, init_work);
 
-#endif
-
 #else
 
 struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
-- 
GitLab


From ee32c9732244bde4b9b59eeac2814c23e2b71f8d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 15 Oct 2008 14:34:09 +0200
Subject: [PATCH 526/895] genirq: remove irq_to_desc_alloc

Remove the leftover of sparseirqs.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/io_apic.c    | 6 +-----
 arch/x86/kernel/irqinit_32.c | 2 +-
 arch/x86/kernel/irqinit_64.c | 2 +-
 include/linux/irq.h          | 5 -----
 kernel/irq/chip.c            | 2 +-
 5 files changed, 4 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 683610517d2a..e03bc0f87eef 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -1257,11 +1257,7 @@ static void ioapic_register_intr(int irq, unsigned long trigger)
 {
 	struct irq_desc *desc;
 
-	/* first time to use this irq_desc */
-	if (irq < 16)
-		desc = irq_to_desc(irq);
-	else
-		desc = irq_to_desc_alloc(irq);
+	desc = irq_to_desc(irq);
 
 	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
 	    trigger == IOAPIC_LEVEL)
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 9092103a18eb..a8d35998d308 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -70,7 +70,7 @@ void __init init_ISA_irqs (void)
 	 */
 	for (i = 0; i < 16; i++) {
 		/* first time call this irq_desc */
-		struct irq_desc *desc = irq_to_desc_alloc(i);
+		struct irq_desc *desc = irq_to_desc(i);
 
 		desc->status = IRQ_DISABLED;
 		desc->action = NULL;
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index d17fbc26d96f..ff0235391285 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -144,7 +144,7 @@ void __init init_ISA_irqs(void)
 
 	for (i = 0; i < 16; i++) {
 		/* first time call this irq_desc */
-		struct irq_desc *desc = irq_to_desc_alloc(i);
+		struct irq_desc *desc = irq_to_desc(i);
 
 		desc->status = IRQ_DISABLED;
 		desc->action = NULL;
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 68e0f3f9df30..3f33c7790300 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -210,11 +210,6 @@ static inline struct irq_desc *irq_to_desc(unsigned int irq)
 	return (irq < nr_irqs) ? irq_desc + irq : NULL;
 }
 
-static inline struct irq_desc *irq_to_desc_alloc(unsigned int irq)
-{
-	return irq_to_desc(irq);
-}
-
 #ifdef CONFIG_HAVE_DYN_ARRAY
 #define kstat_irqs_this_cpu(DESC) \
 	((DESC)->kstat_irqs[smp_processor_id()])
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 570d1ea1db5d..e6f73dbfcc3d 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -28,7 +28,7 @@ void dynamic_irq_init(unsigned int irq)
 	unsigned long flags;
 
 	/* first time to use this irq_desc */
-	desc = irq_to_desc_alloc(irq);
+	desc = irq_to_desc(irq);
 	if (!desc) {
 		WARN(1, KERN_ERR "Trying to initialize invalid IRQ%d\n", irq);
 		return;
-- 
GitLab


From d6c88a507ef0b6afdb013cba4e7804ba7324d99a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 15 Oct 2008 15:27:23 +0200
Subject: [PATCH 527/895] genirq: revert dynarray

Revert the dynarray changes. They need more thought and polishing.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/Kconfig                      |   4 -
 arch/x86/Kconfig                  |   1 -
 arch/x86/kernel/io_apic.c         | 199 +++++++++++-------------------
 arch/x86/kernel/setup_percpu.c    |   8 +-
 arch/x86/kernel/visws_quirks.c    |   2 +-
 arch/x86/kernel/vmlinux_32.lds.S  |   1 -
 arch/x86/kernel/vmlinux_64.lds.S  |   2 -
 arch/x86/xen/spinlock.c           |   2 +-
 drivers/char/random.c             |   5 -
 drivers/pci/intr_remapping.c      |  11 +-
 include/asm-generic/vmlinux.lds.h |  13 --
 include/linux/init.h              |  43 -------
 include/linux/irq.h               |  15 ---
 include/linux/kernel_stat.h       |  16 +--
 init/Makefile                     |   2 +-
 init/dyn_array.c                  | 120 ------------------
 init/main.c                       |  11 +-
 kernel/irq/chip.c                 |  30 +----
 kernel/irq/handle.c               | 114 ++---------------
 19 files changed, 103 insertions(+), 496 deletions(-)
 delete mode 100644 init/dyn_array.c

diff --git a/arch/Kconfig b/arch/Kconfig
index c8a7c2eb6490..071004d3a1b1 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -102,7 +102,3 @@ config HAVE_CLK
 	help
 	  The <linux/clk.h> calls support software clock gating and
 	  thus are a key power management tool on many systems.
-
-config HAVE_DYN_ARRAY
-	def_bool n
-
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8636ddf2f4a4..8da6123a60d0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -33,7 +33,6 @@ config X86
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_GENERIC_DMA_COHERENT if X86_32
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
-	select HAVE_DYN_ARRAY
 
 config ARCH_DEFCONFIG
 	string
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index e03bc0f87eef..6f80dc2f137e 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -107,7 +107,6 @@ static int __init parse_noapic(char *str)
 }
 early_param("noapic", parse_noapic);
 
-struct irq_cfg;
 struct irq_pin_list;
 struct irq_cfg {
 	unsigned int irq;
@@ -120,7 +119,7 @@ struct irq_cfg {
 };
 
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
-static struct irq_cfg irq_cfg_legacy[] __initdata = {
+static struct irq_cfg irq_cfgx[NR_IRQS] = {
 	[0]  = { .irq =  0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
 	[1]  = { .irq =  1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
 	[2]  = { .irq =  2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
@@ -139,48 +138,26 @@ static struct irq_cfg irq_cfg_legacy[] __initdata = {
 	[15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
 };
 
-static struct irq_cfg irq_cfg_init = { .irq =  -1U, };
-
-static void init_one_irq_cfg(struct irq_cfg *cfg)
-{
-	memcpy(cfg, &irq_cfg_init, sizeof(struct irq_cfg));
-}
-
-static struct irq_cfg *irq_cfgx;
-
-static void __init init_work(void *data)
-{
-	struct dyn_array *da = data;
-	struct irq_cfg *cfg;
-	int legacy_count;
-	int i;
-
-	cfg = *da->name;
-
-	memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy));
-
-	legacy_count = ARRAY_SIZE(irq_cfg_legacy);
-	for (i = legacy_count; i < *da->nr; i++)
-		init_one_irq_cfg(&cfg[i]);
-}
-
 #define for_each_irq_cfg(irq, cfg)		\
-	for (irq = 0, cfg = &irq_cfgx[irq]; irq < nr_irqs; irq++, cfg = &irq_cfgx[irq])
+	for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++)
 
-DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irqs, PAGE_SIZE, init_work);
-
-struct irq_cfg *irq_cfg(unsigned int irq)
+static struct irq_cfg *irq_cfg(unsigned int irq)
 {
-	if (irq < nr_irqs)
-		return &irq_cfgx[irq];
-
-	return NULL;
+	return irq < nr_irqs ? irq_cfgx + irq : NULL;
 }
-struct irq_cfg *irq_cfg_alloc(unsigned int irq)
+
+static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
 {
 	return irq_cfg(irq);
 }
 
+/*
+ * Rough estimation of how many shared IRQs there are, can be changed
+ * anytime.
+ */
+#define MAX_PLUS_SHARED_IRQS NR_IRQS
+#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
+
 /*
  * This is performance-critical, we want to do it O(1)
  *
@@ -193,59 +170,29 @@ struct irq_pin_list {
 	struct irq_pin_list *next;
 };
 
-static struct irq_pin_list *irq_2_pin_head;
-/* fill one page ? */
-static int nr_irq_2_pin = 0x100;
+static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE];
 static struct irq_pin_list *irq_2_pin_ptr;
-static void __init irq_2_pin_init_work(void *data)
+
+static void __init irq_2_pin_init(void)
 {
-	struct dyn_array *da = data;
-	struct irq_pin_list *pin;
+	struct irq_pin_list *pin = irq_2_pin_head;
 	int i;
 
-	pin = *da->name;
-
-	for (i = 1; i < *da->nr; i++)
+	for (i = 1; i < PIN_MAP_SIZE; i++)
 		pin[i-1].next = &pin[i];
 
 	irq_2_pin_ptr = &pin[0];
 }
-DEFINE_DYN_ARRAY(irq_2_pin_head, sizeof(struct irq_pin_list), nr_irq_2_pin, PAGE_SIZE, irq_2_pin_init_work);
 
 static struct irq_pin_list *get_one_free_irq_2_pin(void)
 {
-	struct irq_pin_list *pin;
-	int i;
-
-	pin = irq_2_pin_ptr;
-
-	if (pin) {
-		irq_2_pin_ptr = pin->next;
-		pin->next = NULL;
-		return pin;
-	}
-
-	/*
-	 *  we run out of pre-allocate ones, allocate more
-	 */
-	printk(KERN_DEBUG "try to get more irq_2_pin %d\n", nr_irq_2_pin);
-
-	if (after_bootmem)
-		pin = kzalloc(sizeof(struct irq_pin_list)*nr_irq_2_pin,
-				 GFP_ATOMIC);
-	else
-		pin = __alloc_bootmem_nopanic(sizeof(struct irq_pin_list) *
-				nr_irq_2_pin, PAGE_SIZE, 0);
+	struct irq_pin_list *pin = irq_2_pin_ptr;
 
 	if (!pin)
 		panic("can not get more irq_2_pin\n");
 
-	for (i = 1; i < nr_irq_2_pin; i++)
-		pin[i-1].next = &pin[i];
-
 	irq_2_pin_ptr = pin->next;
 	pin->next = NULL;
-
 	return pin;
 }
 
@@ -284,8 +231,9 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i
 static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
 {
 	struct io_apic __iomem *io_apic = io_apic_base(apic);
-        if (sis_apic_bug)
-                writel(reg, &io_apic->index);
+
+	if (sis_apic_bug)
+		writel(reg, &io_apic->index);
 	writel(value, &io_apic->data);
 }
 
@@ -1044,11 +992,11 @@ static int pin_2_irq(int idx, int apic, int pin)
 		while (i < apic)
 			irq += nr_ioapic_registers[i++];
 		irq += pin;
-                /*
+		/*
                  * For MPS mode, so far only needed by ES7000 platform
                  */
-                if (ioapic_renumber_irq)
-                        irq = ioapic_renumber_irq(apic, irq);
+		if (ioapic_renumber_irq)
+			irq = ioapic_renumber_irq(apic, irq);
 	}
 
 #ifdef CONFIG_X86_32
@@ -1232,19 +1180,19 @@ static struct irq_chip ir_ioapic_chip;
 #ifdef CONFIG_X86_32
 static inline int IO_APIC_irq_trigger(int irq)
 {
-        int apic, idx, pin;
+	int apic, idx, pin;
 
-        for (apic = 0; apic < nr_ioapics; apic++) {
-                for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-                        idx = find_irq_entry(apic, pin, mp_INT);
-                        if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
-                                return irq_trigger(idx);
-                }
-        }
-        /*
+	for (apic = 0; apic < nr_ioapics; apic++) {
+		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+			idx = find_irq_entry(apic, pin, mp_INT);
+			if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
+				return irq_trigger(idx);
+		}
+	}
+	/*
          * nonexistent IRQs are edge default
          */
-        return 0;
+	return 0;
 }
 #else
 static inline int IO_APIC_irq_trigger(int irq)
@@ -1509,8 +1457,8 @@ __apicdebuginit(void) print_IO_APIC(void)
 	reg_01.raw = io_apic_read(apic, 1);
 	if (reg_01.bits.version >= 0x10)
 		reg_02.raw = io_apic_read(apic, 2);
-        if (reg_01.bits.version >= 0x20)
-                reg_03.raw = io_apic_read(apic, 3);
+	if (reg_01.bits.version >= 0x20)
+		reg_03.raw = io_apic_read(apic, 3);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 
 	printk("\n");
@@ -2089,9 +2037,9 @@ static int ioapic_retrigger_irq(unsigned int irq)
 #else
 static int ioapic_retrigger_irq(unsigned int irq)
 {
-        send_IPI_self(irq_cfg(irq)->vector);
+	send_IPI_self(irq_cfg(irq)->vector);
 
-        return 1;
+	return 1;
 }
 #endif
 
@@ -2189,7 +2137,7 @@ static int migrate_irq_remapped_level(int irq)
 
 	if (io_apic_level_ack_pending(irq)) {
 		/*
-	 	 * Interrupt in progress. Migrating irq now will change the
+		 * Interrupt in progress. Migrating irq now will change the
 		 * vector information in the IO-APIC RTE and that will confuse
 		 * the EOI broadcast performed by cpu.
 		 * So, delay the irq migration to the next instance.
@@ -2426,28 +2374,28 @@ static void ack_apic_level(unsigned int irq)
 }
 
 static struct irq_chip ioapic_chip __read_mostly = {
-	.name 		= "IO-APIC",
-	.startup 	= startup_ioapic_irq,
-	.mask	 	= mask_IO_APIC_irq,
-	.unmask	 	= unmask_IO_APIC_irq,
-	.ack 		= ack_apic_edge,
-	.eoi 		= ack_apic_level,
+	.name		= "IO-APIC",
+	.startup	= startup_ioapic_irq,
+	.mask		= mask_IO_APIC_irq,
+	.unmask		= unmask_IO_APIC_irq,
+	.ack		= ack_apic_edge,
+	.eoi		= ack_apic_level,
 #ifdef CONFIG_SMP
-	.set_affinity 	= set_ioapic_affinity_irq,
+	.set_affinity	= set_ioapic_affinity_irq,
 #endif
 	.retrigger	= ioapic_retrigger_irq,
 };
 
 #ifdef CONFIG_INTR_REMAP
 static struct irq_chip ir_ioapic_chip __read_mostly = {
-	.name 		= "IR-IO-APIC",
-	.startup 	= startup_ioapic_irq,
-	.mask	 	= mask_IO_APIC_irq,
-	.unmask	 	= unmask_IO_APIC_irq,
-	.ack 		= ack_x2apic_edge,
-	.eoi 		= ack_x2apic_level,
+	.name		= "IR-IO-APIC",
+	.startup	= startup_ioapic_irq,
+	.mask		= mask_IO_APIC_irq,
+	.unmask		= unmask_IO_APIC_irq,
+	.ack		= ack_x2apic_edge,
+	.eoi		= ack_x2apic_level,
 #ifdef CONFIG_SMP
-	.set_affinity 	= set_ir_ioapic_affinity_irq,
+	.set_affinity	= set_ir_ioapic_affinity_irq,
 #endif
 	.retrigger	= ioapic_retrigger_irq,
 };
@@ -2636,8 +2584,8 @@ static inline void __init check_timer(void)
 
 	local_irq_save(flags);
 
-        ver = apic_read(APIC_LVR);
-        ver = GET_APIC_VERSION(ver);
+	ver = apic_read(APIC_LVR);
+	ver = GET_APIC_VERSION(ver);
 
 	/*
 	 * get/set the timer IRQ vector:
@@ -2822,12 +2770,12 @@ void __init setup_IO_APIC(void)
 	io_apic_irqs = ~PIC_IRQS;
 
 	apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
-        /*
+	/*
          * Set up IO-APIC IRQ routing.
          */
 #ifdef CONFIG_X86_32
-        if (!acpi_ioapic)
-                setup_ioapic_ids_from_mpc();
+	if (!acpi_ioapic)
+		setup_ioapic_ids_from_mpc();
 #endif
 	sync_Arb_IDs();
 	setup_IO_APIC_irqs();
@@ -2842,9 +2790,9 @@ void __init setup_IO_APIC(void)
 
 static int __init io_apic_bug_finalize(void)
 {
-        if (sis_apic_bug == -1)
-                sis_apic_bug = 0;
-        return 0;
+	if (sis_apic_bug == -1)
+		sis_apic_bug = 0;
+	return 0;
 }
 
 late_initcall(io_apic_bug_finalize);
@@ -3199,7 +3147,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
 	if (index < 0) {
 		printk(KERN_ERR
 		       "Unable to allocate %d IRTE for PCI %s\n", nvec,
-		        pci_name(dev));
+		       pci_name(dev));
 		return -ENOSPC;
 	}
 	return index;
@@ -3885,23 +3833,24 @@ static struct resource * __init ioapic_setup_resources(void)
 void __init ioapic_init_mappings(void)
 {
 	unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
-	int i;
 	struct resource *ioapic_res;
+	int i;
 
+	irq_2_pin_init();
 	ioapic_res = ioapic_setup_resources();
 	for (i = 0; i < nr_ioapics; i++) {
 		if (smp_found_config) {
 			ioapic_phys = mp_ioapics[i].mp_apicaddr;
 #ifdef CONFIG_X86_32
-                        if (!ioapic_phys) {
-                                printk(KERN_ERR
-                                       "WARNING: bogus zero IO-APIC "
-                                       "address found in MPTABLE, "
-                                       "disabling IO/APIC support!\n");
-                                smp_found_config = 0;
-                                skip_ioapic_setup = 1;
-                                goto fake_ioapic_page;
-                        }
+			if (!ioapic_phys) {
+				printk(KERN_ERR
+				       "WARNING: bogus zero IO-APIC "
+				       "address found in MPTABLE, "
+				       "disabling IO/APIC support!\n");
+				smp_found_config = 0;
+				skip_ioapic_setup = 1;
+				goto fake_ioapic_page;
+			}
 #endif
 		} else {
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 2b7dab699e83..410c88f0bfeb 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -140,7 +140,7 @@ static void __init setup_cpu_pda_map(void)
  */
 void __init setup_per_cpu_areas(void)
 {
-	ssize_t size, old_size, da_size;
+	ssize_t size, old_size;
 	char *ptr;
 	int cpu;
 	unsigned long align = 1;
@@ -150,9 +150,8 @@ void __init setup_per_cpu_areas(void)
 
 	/* Copy section for each CPU (we discard the original) */
 	old_size = PERCPU_ENOUGH_ROOM;
-	da_size = per_cpu_dyn_array_size(&align);
 	align = max_t(unsigned long, PAGE_SIZE, align);
-	size = roundup(old_size + da_size, align);
+	size = roundup(old_size, align);
 	printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
 			  size);
 
@@ -182,9 +181,6 @@ void __init setup_per_cpu_areas(void)
 #endif
 		per_cpu_offset(cpu) = ptr - __per_cpu_start;
 		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
-
-		per_cpu_alloc_dyn_array(cpu, ptr + old_size);
-
 	}
 
 	printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n",
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index 817aa55a1209..0c9667f0752a 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -633,7 +633,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id)
 	/*
 	 * handle this 'virtual interrupt' as a Cobalt one now.
 	 */
-	kstat_irqs_this_cpu(desc)++;
+	kstat_incr_irqs_this_cpu(realirq, desc);
 
 	if (likely(desc->action != NULL))
 		handle_IRQ_event(realirq, desc->action);
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index c36007ab3940..a9b8560adbc2 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -145,7 +145,6 @@ SECTIONS
 	*(.x86_cpu_dev.init)
 	__x86_cpu_dev_end = .;
   }
-  DYN_ARRAY_INIT(8)
   SECURITY_INIT
   . = ALIGN(4);
   .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 30973dbac8c2..3245ad72594a 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -174,8 +174,6 @@ SECTIONS
   }
   __x86_cpu_dev_end = .;
 
-  DYN_ARRAY_INIT(8)
-
   SECURITY_INIT
 
   . = ALIGN(8);
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index bb6bc721b13d..5601506f2dd9 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -241,7 +241,7 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl
 		ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq));
 	} while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */
 
-	kstat_irqs_this_cpu(irq_to_desc(irq))++;
+	kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
 
 out:
 	raw_local_irq_restore(flags);
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 9ce80213007b..1137d2976043 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -558,12 +558,7 @@ struct timer_rand_state {
 	unsigned dont_count_entropy:1;
 };
 
-#ifdef CONFIG_HAVE_DYN_ARRAY
-static struct timer_rand_state **irq_timer_state;
-DEFINE_DYN_ARRAY(irq_timer_state, sizeof(struct timer_rand_state *), nr_irqs, PAGE_SIZE, NULL);
-#else
 static struct timer_rand_state *irq_timer_state[NR_IRQS];
-#endif
 
 static struct timer_rand_state *get_timer_rand_state(unsigned int irq)
 {
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index 0f43b265eee6..950769e87475 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -19,20 +19,13 @@ struct irq_2_iommu {
 	u8  irte_mask;
 };
 
-#ifdef CONFIG_HAVE_DYN_ARRAY
-static struct irq_2_iommu *irq_2_iommuX;
-DEFINE_DYN_ARRAY(irq_2_iommuX, sizeof(struct irq_2_iommu), nr_irqs, PAGE_SIZE, NULL);
-#else
 static struct irq_2_iommu irq_2_iommuX[NR_IRQS];
-#endif
 
 static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
 {
-	if (irq < nr_irqs)
-		return &irq_2_iommuX[irq];
-
-	return NULL;
+	return (irq < nr_irqs) ?: irq_2_iommuX + irq : NULL;
 }
+
 static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq)
 {
 	return irq_2_iommu(irq);
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index c68eda9d9a90..7440a0dceddb 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -210,19 +210,6 @@
  * All archs are supposed to use RO_DATA() */
 #define RODATA RO_DATA(4096)
 
-#define DYN_ARRAY_INIT(align)							\
-	. = ALIGN((align));						\
-	.dyn_array.init : AT(ADDR(.dyn_array.init) - LOAD_OFFSET) {	\
-		VMLINUX_SYMBOL(__dyn_array_start) = .;			\
-		*(.dyn_array.init)					\
-		VMLINUX_SYMBOL(__dyn_array_end) = .;			\
-	}								\
-	. = ALIGN((align));						\
-	.per_cpu_dyn_array.init : AT(ADDR(.per_cpu_dyn_array.init) - LOAD_OFFSET) {	\
-		VMLINUX_SYMBOL(__per_cpu_dyn_array_start) = .;		\
-		*(.per_cpu_dyn_array.init)				\
-		VMLINUX_SYMBOL(__per_cpu_dyn_array_end) = .;		\
-	}
 #define SECURITY_INIT							\
 	.security_initcall.init : AT(ADDR(.security_initcall.init) - LOAD_OFFSET) { \
 		VMLINUX_SYMBOL(__security_initcall_start) = .;		\
diff --git a/include/linux/init.h b/include/linux/init.h
index 59fbb4aaba6a..70ad53e1eab8 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -247,49 +247,6 @@ struct obs_kernel_param {
 /* Relies on boot_command_line being set */
 void __init parse_early_param(void);
 
-struct dyn_array {
-	void **name;
-	unsigned long size;
-	unsigned int *nr;
-	unsigned long align;
-	void (*init_work)(void *);
-};
-extern struct dyn_array *__dyn_array_start[], *__dyn_array_end[];
-extern struct dyn_array *__per_cpu_dyn_array_start[], *__per_cpu_dyn_array_end[];
-
-#define DEFINE_DYN_ARRAY_ADDR(nameX, addrX, sizeX, nrX, alignX, init_workX) \
-		static struct dyn_array __dyn_array_##nameX __initdata = \
-		{	.name = (void **)&(nameX),\
-			.size = sizeX,\
-			.nr   = &(nrX),\
-			.align = alignX,\
-			.init_work = init_workX,\
-		}; \
-		static struct dyn_array *__dyn_array_ptr_##nameX __used \
-		__attribute__((__section__(".dyn_array.init"))) = \
-			&__dyn_array_##nameX
-
-#define DEFINE_DYN_ARRAY(nameX, sizeX, nrX, alignX, init_workX) \
-	DEFINE_DYN_ARRAY_ADDR(nameX, nameX, sizeX, nrX, alignX, init_workX)
-
-#define DEFINE_PER_CPU_DYN_ARRAY_ADDR(nameX, addrX, sizeX, nrX, alignX, init_workX) \
-		static struct dyn_array __per_cpu_dyn_array_##nameX __initdata = \
-		{	.name = (void **)&(addrX),\
-			.size = sizeX,\
-			.nr   = &(nrX),\
-			.align = alignX,\
-			.init_work = init_workX,\
-		}; \
-		static struct dyn_array *__per_cpu_dyn_array_ptr_##nameX __used \
-		__attribute__((__section__(".per_cpu_dyn_array.init"))) = \
-			&__per_cpu_dyn_array_##nameX
-
-#define DEFINE_PER_CPU_DYN_ARRAY(nameX, sizeX, nrX, alignX, init_workX) \
-	DEFINE_PER_CPU_DYN_ARRAY_ADDR(nameX, nameX, nrX, alignX, init_workX)
-
-extern void pre_alloc_dyn_array(void);
-extern unsigned long per_cpu_dyn_array_size(unsigned long *align);
-extern void per_cpu_alloc_dyn_array(int cpu, char *ptr);
 #endif /* __ASSEMBLY__ */
 
 /**
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 3f33c7790300..38bf89f2ade0 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -139,8 +139,6 @@ struct irq_chip {
 	const char	*typename;
 };
 
-struct timer_rand_state;
-struct irq_2_iommu;
 /**
  * struct irq_desc - interrupt descriptor
  *
@@ -167,9 +165,6 @@ struct irq_2_iommu;
  */
 struct irq_desc {
 	unsigned int		irq;
-#ifdef CONFIG_HAVE_DYN_ARRAY
-	unsigned int            *kstat_irqs;
-#endif
 	irq_flow_handler_t	handle_irq;
 	struct irq_chip		*chip;
 	struct msi_desc		*msi_desc;
@@ -198,23 +193,13 @@ struct irq_desc {
 } ____cacheline_internodealigned_in_smp;
 
 
-#ifndef CONFIG_HAVE_DYN_ARRAY
-/* could be removed if we get rid of all irq_desc reference */
 extern struct irq_desc irq_desc[NR_IRQS];
-#else
-extern struct irq_desc *irq_desc;
-#endif
 
 static inline struct irq_desc *irq_to_desc(unsigned int irq)
 {
 	return (irq < nr_irqs) ? irq_desc + irq : NULL;
 }
 
-#ifdef CONFIG_HAVE_DYN_ARRAY
-#define kstat_irqs_this_cpu(DESC) \
-	((DESC)->kstat_irqs[smp_processor_id()])
-#endif
-
 /*
  * Migration helpers for obsolete names, they will go away:
  */
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 21249d8c1293..a9d0d360b776 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -28,9 +28,7 @@ struct cpu_usage_stat {
 
 struct kernel_stat {
 	struct cpu_usage_stat	cpustat;
-#ifndef CONFIG_HAVE_DYN_ARRAY
        unsigned int irqs[NR_IRQS];
-#endif
 };
 
 DECLARE_PER_CPU(struct kernel_stat, kstat);
@@ -41,20 +39,18 @@ DECLARE_PER_CPU(struct kernel_stat, kstat);
 
 extern unsigned long long nr_context_switches(void);
 
-#ifndef CONFIG_HAVE_DYN_ARRAY
-#define kstat_irqs_this_cpu(irq) \
-	(kstat_this_cpu.irqs[irq])
-#endif
+struct irq_desc;
 
+static inline void kstat_incr_irqs_this_cpu(unsigned int irq,
+					    struct irq_desc *desc)
+{
+	kstat_this_cpu.irqs[irq]++;
+}
 
-#ifndef CONFIG_HAVE_DYN_ARRAY
 static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
 {
        return kstat_cpu(cpu).irqs[irq];
 }
-#else
-extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu);
-#endif
 
 /*
  * Number of interrupts per specific IRQ source, since bootup
diff --git a/init/Makefile b/init/Makefile
index dc5eeca6eb6d..4a243df426f7 100644
--- a/init/Makefile
+++ b/init/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the linux kernel.
 #
 
-obj-y                          := main.o dyn_array.o version.o mounts.o
+obj-y                          := main.o version.o mounts.o
 ifneq ($(CONFIG_BLK_DEV_INITRD),y)
 obj-y                          += noinitramfs.o
 else
diff --git a/init/dyn_array.c b/init/dyn_array.c
deleted file mode 100644
index c8d5e2a18588..000000000000
--- a/init/dyn_array.c
+++ /dev/null
@@ -1,120 +0,0 @@
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/kallsyms.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/irq.h>
-
-void __init pre_alloc_dyn_array(void)
-{
-#ifdef CONFIG_HAVE_DYN_ARRAY
-	unsigned long total_size = 0, size, phys;
-	unsigned long max_align = 1;
-	struct dyn_array **daa;
-	char *ptr;
-
-	/* get the total size at first */
-	for (daa = __dyn_array_start ; daa < __dyn_array_end; daa++) {
-		struct dyn_array *da = *daa;
-
-		printk(KERN_DEBUG "dyn_array %pF size:%#lx nr:%d align:%#lx\n",
-			da->name, da->size, *da->nr, da->align);
-		size = da->size * (*da->nr);
-		total_size += roundup(size, da->align);
-		if (da->align > max_align)
-			max_align = da->align;
-	}
-	if (total_size)
-		printk(KERN_DEBUG "dyn_array total_size: %#lx\n",
-			 total_size);
-	else
-		return;
-
-	/* allocate them all together */
-	max_align = max_t(unsigned long, max_align, PAGE_SIZE);
-	ptr = __alloc_bootmem(total_size, max_align, 0);
-	phys = virt_to_phys(ptr);
-
-	for (daa = __dyn_array_start ; daa < __dyn_array_end; daa++) {
-		struct dyn_array *da = *daa;
-
-		size = da->size * (*da->nr);
-		phys = roundup(phys, da->align);
-		printk(KERN_DEBUG "dyn_array %pF ==> [%#lx - %#lx]\n",
-			da->name, phys, phys + size);
-		*da->name = phys_to_virt(phys);
-
-		phys += size;
-
-		if (da->init_work)
-			da->init_work(da);
-	}
-#else
-#ifdef CONFIG_GENERIC_HARDIRQS
-	unsigned int i;
-
-	for (i = 0; i < NR_IRQS; i++)
-		irq_desc[i].irq = i;
-#endif
-#endif
-}
-
-unsigned long __init per_cpu_dyn_array_size(unsigned long *align)
-{
-	unsigned long total_size = 0;
-#ifdef CONFIG_HAVE_DYN_ARRAY
-	unsigned long size;
-	struct dyn_array **daa;
-	unsigned max_align = 1;
-
-	for (daa = __per_cpu_dyn_array_start ; daa < __per_cpu_dyn_array_end; daa++) {
-		struct dyn_array *da = *daa;
-
-		printk(KERN_DEBUG "per_cpu_dyn_array %pF size:%#lx nr:%d align:%#lx\n",
-			da->name, da->size, *da->nr, da->align);
-		size = da->size * (*da->nr);
-		total_size += roundup(size, da->align);
-		if (da->align > max_align)
-			max_align = da->align;
-	}
-	if (total_size) {
-		printk(KERN_DEBUG "per_cpu_dyn_array total_size: %#lx\n",
-			 total_size);
-		*align = max_align;
-	}
-#endif
-	return total_size;
-}
-
-#ifdef CONFIG_SMP
-void __init per_cpu_alloc_dyn_array(int cpu, char *ptr)
-{
-#ifdef CONFIG_HAVE_DYN_ARRAY
-	unsigned long size, phys;
-	struct dyn_array **daa;
-	unsigned long addr;
-	void **array;
-
-	phys = virt_to_phys(ptr);
-	for (daa = __per_cpu_dyn_array_start ; daa < __per_cpu_dyn_array_end; daa++) {
-		struct dyn_array *da = *daa;
-
-		size = da->size * (*da->nr);
-		phys = roundup(phys, da->align);
-		printk(KERN_DEBUG "per_cpu_dyn_array %pF ==> [%#lx - %#lx]\n",
-			da->name, phys, phys + size);
-
-		addr = (unsigned long)da->name;
-		addr += per_cpu_offset(cpu);
-		array = (void **)addr;
-		*array = phys_to_virt(phys);
-		*da->name = *array; /* so init_work could use it directly */
-
-		phys += size;
-
-		if (da->init_work)
-			da->init_work(da);
-	}
-#endif
-}
-#endif
diff --git a/init/main.c b/init/main.c
index e81cf427d9c7..27f6bf6108e9 100644
--- a/init/main.c
+++ b/init/main.c
@@ -391,23 +391,17 @@ EXPORT_SYMBOL(__per_cpu_offset);
 
 static void __init setup_per_cpu_areas(void)
 {
-	unsigned long size, i, old_size;
+	unsigned long size, i;
 	char *ptr;
 	unsigned long nr_possible_cpus = num_possible_cpus();
-	unsigned long align = 1;
-	unsigned da_size;
 
 	/* Copy section for each CPU (we discard the original) */
-	old_size = PERCPU_ENOUGH_ROOM;
-	da_size = per_cpu_dyn_array_size(&align);
-	align = max_t(unsigned long, PAGE_SIZE, align);
-	size = ALIGN(old_size + da_size, align);
+	size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
 	ptr = alloc_bootmem_pages(size * nr_possible_cpus);
 
 	for_each_possible_cpu(i) {
 		__per_cpu_offset[i] = ptr - __per_cpu_start;
 		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
-		per_cpu_alloc_dyn_array(i, ptr + old_size);
 		ptr += size;
 	}
 }
@@ -573,7 +567,6 @@ asmlinkage void __init start_kernel(void)
 	printk(KERN_NOTICE);
 	printk(linux_banner);
 	setup_arch(&command_line);
-	pre_alloc_dyn_array();
 	mm_init_owner(&init_mm, &init_task);
 	setup_command_line(command_line);
 	unwind_setup();
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index e6f73dbfcc3d..d96d6f687c48 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -326,11 +326,7 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc)
 	if (unlikely(desc->status & IRQ_INPROGRESS))
 		goto out_unlock;
 	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
-#ifdef CONFIG_HAVE_DYN_ARRAY
-	kstat_irqs_this_cpu(desc)++;
-#else
-	kstat_irqs_this_cpu(irq)++;
-#endif
+	kstat_incr_irqs_this_cpu(irq, desc);
 
 	action = desc->action;
 	if (unlikely(!action || (desc->status & IRQ_DISABLED)))
@@ -371,11 +367,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
 	if (unlikely(desc->status & IRQ_INPROGRESS))
 		goto out_unlock;
 	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
-#ifdef CONFIG_HAVE_DYN_ARRAY
-	kstat_irqs_this_cpu(desc)++;
-#else
-	kstat_irqs_this_cpu(irq)++;
-#endif
+	kstat_incr_irqs_this_cpu(irq, desc);
 
 	/*
 	 * If its disabled or no action available
@@ -422,11 +414,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
 		goto out;
 
 	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
-#ifdef CONFIG_HAVE_DYN_ARRAY
-	kstat_irqs_this_cpu(desc)++;
-#else
-	kstat_irqs_this_cpu(irq)++;
-#endif
+	kstat_incr_irqs_this_cpu(irq, desc);
 
 	/*
 	 * If its disabled or no action available
@@ -490,11 +478,7 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
 		mask_ack_irq(desc, irq);
 		goto out_unlock;
 	}
-#ifdef CONFIG_HAVE_DYN_ARRAY
-	kstat_irqs_this_cpu(desc)++;
-#else
-	kstat_irqs_this_cpu(irq)++;
-#endif
+	kstat_incr_irqs_this_cpu(irq, desc);
 
 	/* Start handling the irq */
 	desc->chip->ack(irq);
@@ -549,11 +533,7 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
 {
 	irqreturn_t action_ret;
 
-#ifdef CONFIG_HAVE_DYN_ARRAY
-	kstat_irqs_this_cpu(desc)++;
-#else
-	kstat_irqs_this_cpu(irq)++;
-#endif
+	kstat_incr_irqs_this_cpu(irq, desc);
 
 	if (desc->chip->ack)
 		desc->chip->ack(irq);
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index f837133cdfbe..9fe86b3a60a5 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -18,11 +18,6 @@
 
 #include "internals.h"
 
-/*
- * lockdep: we want to handle all irq_desc locks as a single lock-class:
- */
-static struct lock_class_key irq_desc_lock_class;
-
 /**
  * handle_bad_irq - handle spurious and unhandled irqs
  * @irq:       the interrupt number
@@ -30,15 +25,10 @@ static struct lock_class_key irq_desc_lock_class;
  *
  * Handles spurious and unhandled IRQ's. It also prints a debugmessage.
  */
-void
-handle_bad_irq(unsigned int irq, struct irq_desc *desc)
+void handle_bad_irq(unsigned int irq, struct irq_desc *desc)
 {
 	print_irq_desc(irq, desc);
-#ifdef CONFIG_HAVE_DYN_ARRAY
-	kstat_irqs_this_cpu(desc)++;
-#else
-	kstat_irqs_this_cpu(irq)++;
-#endif
+	kstat_incr_irqs_this_cpu(irq, desc);
 	ack_bad_irq(irq);
 }
 
@@ -59,80 +49,6 @@ handle_bad_irq(unsigned int irq, struct irq_desc *desc)
 int nr_irqs = NR_IRQS;
 EXPORT_SYMBOL_GPL(nr_irqs);
 
-#ifdef CONFIG_HAVE_DYN_ARRAY
-static struct irq_desc irq_desc_init = {
-	.irq = -1U,
-	.status = IRQ_DISABLED,
-	.chip = &no_irq_chip,
-	.handle_irq = handle_bad_irq,
-	.depth = 1,
-	.lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
-#ifdef CONFIG_SMP
-	.affinity = CPU_MASK_ALL
-#endif
-};
-
-
-static void init_one_irq_desc(struct irq_desc *desc)
-{
-	memcpy(desc, &irq_desc_init, sizeof(struct irq_desc));
-	lockdep_set_class(&desc->lock, &irq_desc_lock_class);
-}
-
-extern int after_bootmem;
-extern void *__alloc_bootmem_nopanic(unsigned long size,
-			     unsigned long align,
-			     unsigned long goal);
-
-static void init_kstat_irqs(struct irq_desc *desc, int nr_desc, int nr)
-{
-	unsigned long bytes, total_bytes;
-	char *ptr;
-	int i;
-	unsigned long phys;
-
-	/* Compute how many bytes we need per irq and allocate them */
-	bytes = nr * sizeof(unsigned int);
-	total_bytes = bytes * nr_desc;
-	if (after_bootmem)
-		ptr = kzalloc(total_bytes, GFP_ATOMIC);
-	else
-		ptr = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0);
-
-	if (!ptr)
-		panic(" can not allocate kstat_irqs\n");
-
-	phys = __pa(ptr);
-	printk(KERN_DEBUG "kstat_irqs ==> [%#lx - %#lx]\n", phys, phys + total_bytes);
-
-	for (i = 0; i < nr_desc; i++) {
-		desc[i].kstat_irqs = (unsigned int *)ptr;
-		ptr += bytes;
-	}
-}
-
-static void __init init_work(void *data)
-{
-	struct dyn_array *da = data;
-	int i;
-	struct  irq_desc *desc;
-
-	desc = *da->name;
-
-	for (i = 0; i < *da->nr; i++) {
-		init_one_irq_desc(&desc[i]);
-		desc[i].irq = i;
-	}
-
-	/* init kstat_irqs, nr_cpu_ids is ready already */
-	init_kstat_irqs(desc, *da->nr, nr_cpu_ids);
-}
-
-struct irq_desc *irq_desc;
-DEFINE_DYN_ARRAY(irq_desc, sizeof(struct irq_desc), nr_irqs, PAGE_SIZE, init_work);
-
-#else
-
 struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
 	[0 ... NR_IRQS-1] = {
 		.status = IRQ_DISABLED,
@@ -146,8 +62,6 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
 	}
 };
 
-#endif
-
 /*
  * What should we do if we get a hw irq event on an illegal vector?
  * Each architecture has to answer this themself.
@@ -258,11 +172,8 @@ unsigned int __do_IRQ(unsigned int irq)
 	struct irqaction *action;
 	unsigned int status;
 
-#ifdef CONFIG_HAVE_DYN_ARRAY
-	kstat_irqs_this_cpu(desc)++;
-#else
-	kstat_irqs_this_cpu(irq)++;
-#endif
+	kstat_incr_irqs_this_cpu(irq, desc);
+
 	if (CHECK_IRQ_PER_CPU(desc->status)) {
 		irqreturn_t action_ret;
 
@@ -351,23 +262,16 @@ out:
 
 
 #ifdef CONFIG_TRACE_IRQFLAGS
+/*
+ * lockdep: we want to handle all irq_desc locks as a single lock-class:
+ */
+static struct lock_class_key irq_desc_lock_class;
+
 void early_init_irq_lock_class(void)
 {
-#ifndef CONFIG_HAVE_DYN_ARRAY
 	int i;
 
 	for (i = 0; i < nr_irqs; i++)
 		lockdep_set_class(&irq_desc[i].lock, &irq_desc_lock_class);
-#endif
 }
 #endif
-
-#ifdef CONFIG_HAVE_DYN_ARRAY
-unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
-{
-	struct irq_desc *desc = irq_to_desc(irq);
-	return desc->kstat_irqs[cpu];
-}
-#endif
-EXPORT_SYMBOL(kstat_irqs_cpu);
-
-- 
GitLab


From a1aca5de08a0cb840a90fb3f729a5940f8d21185 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 15 Oct 2008 19:29:15 +0200
Subject: [PATCH 528/895] genirq: remove artifacts from sparseirq removal

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/Kconfig                     | 1 +
 arch/x86/kernel/acpi/boot.c      | 1 -
 arch/x86/kernel/irqinit_32.c     | 1 -
 arch/x86/kernel/vmlinux_64.lds.S | 1 -
 arch/x86/mm/init_32.c            | 3 ---
 include/linux/init.h             | 1 -
 include/linux/kernel_stat.h      | 2 +-
 7 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 071004d3a1b1..0267babe5eb9 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -102,3 +102,4 @@ config HAVE_CLK
 	help
 	  The <linux/clk.h> calls support software clock gating and
 	  thus are a key power management tool on many systems.
+
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 5fef4fece4a5..0d1c26a583c5 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1254,7 +1254,6 @@ static int __init acpi_parse_madt_ioapic_entries(void)
 		return count;
 	}
 
-
 	count =
 	    acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr,
 				  nr_irqs);
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index a8d35998d308..845aa9803e80 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -184,4 +184,3 @@ void __init native_init_IRQ(void)
 
 	irq_ctx_init(smp_processor_id());
 }
-
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 3245ad72594a..46e05447405b 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -173,7 +173,6 @@ SECTIONS
 	*(.x86_cpu_dev.init)
   }
   __x86_cpu_dev_end = .;
-
   SECURITY_INIT
 
   . = ALIGN(8);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 91343c2694b4..8396868e82c5 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -66,7 +66,6 @@ static unsigned long __meminitdata table_end;
 static unsigned long __meminitdata table_top;
 
 static int __initdata after_init_bootmem;
-int after_bootmem;
 
 static __init void *alloc_low_page(unsigned long *phys)
 {
@@ -989,8 +988,6 @@ void __init mem_init(void)
 
 	set_highmem_pages_init();
 
-	after_bootmem = 1;
-
 	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
 	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
diff --git a/include/linux/init.h b/include/linux/init.h
index 70ad53e1eab8..93538b696e3d 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -246,7 +246,6 @@ struct obs_kernel_param {
 
 /* Relies on boot_command_line being set */
 void __init parse_early_param(void);
-
 #endif /* __ASSEMBLY__ */
 
 /**
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index a9d0d360b776..89b6ecd41473 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -28,7 +28,7 @@ struct cpu_usage_stat {
 
 struct kernel_stat {
 	struct cpu_usage_stat	cpustat;
-       unsigned int irqs[NR_IRQS];
+	unsigned int irqs[NR_IRQS];
 };
 
 DECLARE_PER_CPU(struct kernel_stat, kstat);
-- 
GitLab


From d3c60047bdb03199b93497ac40bd531315d43a86 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 16 Oct 2008 09:55:00 +0200
Subject: [PATCH 529/895] genirq: cleanup the sparseirq modifications

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/irq/chip.c     | 43 +++++++++++++------------------------------
 kernel/irq/handle.c   |  3 +--
 kernel/irq/manage.c   | 38 +++++++++++++++++++++++---------------
 kernel/irq/proc.c     |  1 +
 kernel/irq/spurious.c | 18 ++++++++++--------
 5 files changed, 48 insertions(+), 55 deletions(-)

diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index d96d6f687c48..4895fde4eb93 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -24,11 +24,9 @@
  */
 void dynamic_irq_init(unsigned int irq)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	/* first time to use this irq_desc */
-	desc = irq_to_desc(irq);
 	if (!desc) {
 		WARN(1, KERN_ERR "Trying to initialize invalid IRQ%d\n", irq);
 		return;
@@ -58,10 +56,9 @@ void dynamic_irq_init(unsigned int irq)
  */
 void dynamic_irq_cleanup(unsigned int irq)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	desc = irq_to_desc(irq);
 	if (!desc) {
 		WARN(1, KERN_ERR "Trying to cleanup invalid IRQ%d\n", irq);
 		return;
@@ -90,10 +87,9 @@ void dynamic_irq_cleanup(unsigned int irq)
  */
 int set_irq_chip(unsigned int irq, struct irq_chip *chip)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	desc = irq_to_desc(irq);
 	if (!desc) {
 		WARN(1, KERN_ERR "Trying to install chip for IRQ%d\n", irq);
 		return -EINVAL;
@@ -118,11 +114,10 @@ EXPORT_SYMBOL(set_irq_chip);
  */
 int set_irq_type(unsigned int irq, unsigned int type)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 	int ret = -ENXIO;
 
-	desc = irq_to_desc(irq);
 	if (!desc) {
 		printk(KERN_ERR "Trying to set irq type for IRQ%d\n", irq);
 		return -ENODEV;
@@ -147,10 +142,9 @@ EXPORT_SYMBOL(set_irq_type);
  */
 int set_irq_data(unsigned int irq, void *data)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	desc = irq_to_desc(irq);
 	if (!desc) {
 		printk(KERN_ERR
 		       "Trying to install controller data for IRQ%d\n", irq);
@@ -173,10 +167,9 @@ EXPORT_SYMBOL(set_irq_data);
  */
 int set_irq_msi(unsigned int irq, struct msi_desc *entry)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	desc = irq_to_desc(irq);
 	if (!desc) {
 		printk(KERN_ERR
 		       "Trying to install msi data for IRQ%d\n", irq);
@@ -200,10 +193,9 @@ int set_irq_msi(unsigned int irq, struct msi_desc *entry)
  */
 int set_irq_chip_data(unsigned int irq, void *data)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	desc = irq_to_desc(irq);
 	if (!desc) {
 		printk(KERN_ERR
 		       "Trying to install chip data for IRQ%d\n", irq);
@@ -228,9 +220,8 @@ EXPORT_SYMBOL(set_irq_chip_data);
  */
 static void default_enable(unsigned int irq)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 
-	desc = irq_to_desc(irq);
 	desc->chip->unmask(irq);
 	desc->status &= ~IRQ_MASKED;
 }
@@ -247,11 +238,9 @@ static void default_disable(unsigned int irq)
  */
 static unsigned int default_startup(unsigned int irq)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 
-	desc = irq_to_desc(irq);
 	desc->chip->enable(irq);
-
 	return 0;
 }
 
@@ -260,9 +249,8 @@ static unsigned int default_startup(unsigned int irq)
  */
 static void default_shutdown(unsigned int irq)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 
-	desc = irq_to_desc(irq);
 	desc->chip->mask(irq);
 	desc->status |= IRQ_MASKED;
 }
@@ -550,10 +538,9 @@ void
 __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 		  const char *name)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	desc = irq_to_desc(irq);
 	if (!desc) {
 		printk(KERN_ERR
 		       "Trying to install type control for IRQ%d\n", irq);
@@ -614,13 +601,11 @@ set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
 
 void __init set_irq_noprobe(unsigned int irq)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	desc = irq_to_desc(irq);
 	if (!desc) {
 		printk(KERN_ERR "Trying to mark IRQ%d non-probeable\n", irq);
-
 		return;
 	}
 
@@ -631,13 +616,11 @@ void __init set_irq_noprobe(unsigned int irq)
 
 void __init set_irq_probe(unsigned int irq)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	desc = irq_to_desc(irq);
 	if (!desc) {
 		printk(KERN_ERR "Trying to mark IRQ%d probeable\n", irq);
-
 		return;
 	}
 
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 9fe86b3a60a5..a69368ff607f 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -68,9 +68,8 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
  */
 static void ack_bad(unsigned int irq)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 
-	desc = irq_to_desc(irq);
 	print_irq_desc(irq, desc);
 	ack_bad_irq(irq);
 }
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index ad2ce72c83c4..c498a1b8c621 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -140,10 +140,9 @@ int irq_select_affinity(unsigned int irq)
  */
 void disable_irq_nosync(unsigned int irq)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	desc = irq_to_desc(irq);
 	if (!desc)
 		return;
 
@@ -170,9 +169,8 @@ EXPORT_SYMBOL(disable_irq_nosync);
  */
 void disable_irq(unsigned int irq)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 
-	desc = irq_to_desc(irq);
 	if (!desc)
 		return;
 
@@ -213,10 +211,9 @@ static void __enable_irq(struct irq_desc *desc, unsigned int irq)
  */
 void enable_irq(unsigned int irq)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	desc = irq_to_desc(irq);
 	if (!desc)
 		return;
 
@@ -291,10 +288,9 @@ EXPORT_SYMBOL(set_irq_wake);
  */
 int can_request_irq(unsigned int irq, unsigned long irqflags)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irqaction *action;
 
-	desc = irq_to_desc(irq);
 	if (!desc)
 		return 0;
 
@@ -355,16 +351,15 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
  * Internal function to register an irqaction - typically used to
  * allocate special interrupts that are part of the architecture.
  */
-int setup_irq(unsigned int irq, struct irqaction *new)
+static int
+__setup_irq(unsigned int irq, struct irq_desc * desc, struct irqaction *new)
 {
-	struct irq_desc *desc;
 	struct irqaction *old, **p;
 	const char *old_name = NULL;
 	unsigned long flags;
 	int shared = 0;
 	int ret;
 
-	desc = irq_to_desc(irq);
 	if (!desc)
 		return -EINVAL;
 
@@ -503,6 +498,20 @@ mismatch:
 	return -EBUSY;
 }
 
+/**
+ *	setup_irq - setup an interrupt
+ *	@irq: Interrupt line to setup
+ *	@act: irqaction for the interrupt
+ *
+ * Used to statically setup interrupts in the early boot process.
+ */
+int setup_irq(unsigned int irq, struct irqaction *act)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	return __setup_irq(irq, desc, act);
+}
+
 /**
  *	free_irq - free an interrupt
  *	@irq: Interrupt line to free
@@ -519,13 +528,12 @@ mismatch:
  */
 void free_irq(unsigned int irq, void *dev_id)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irqaction **p;
 	unsigned long flags;
 
 	WARN_ON(in_interrupt());
 
-	desc = irq_to_desc(irq);
 	if (!desc)
 		return;
 
@@ -624,8 +632,8 @@ int request_irq(unsigned int irq, irq_handler_t handler,
 		unsigned long irqflags, const char *devname, void *dev_id)
 {
 	struct irqaction *action;
-	int retval;
 	struct irq_desc *desc;
+	int retval;
 
 #ifdef CONFIG_LOCKDEP
 	/*
@@ -662,7 +670,7 @@ int request_irq(unsigned int irq, irq_handler_t handler,
 	action->next = NULL;
 	action->dev_id = dev_id;
 
-	retval = setup_irq(irq, action);
+	retval = __setup_irq(irq, desc, action);
 	if (retval)
 		kfree(action);
 
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index bc0993d86c8b..fac014a81b24 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -215,6 +215,7 @@ void unregister_handler_proc(unsigned int irq, struct irqaction *action)
 {
 	if (action->dir) {
 		struct irq_desc *desc = irq_to_desc(irq);
+
 		remove_proc_entry(action->dir->name, desc->dir);
 	}
 }
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index ec5a4bef3054..dd364c11e56e 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -26,8 +26,7 @@ static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs, 0, 0);
 static int try_one_irq(int irq, struct irq_desc *desc)
 {
 	struct irqaction *action;
-	int ok = 0;
-	int work = 0;	/* Did we do work for a real IRQ */
+	int ok = 0, work = 0;
 
 	spin_lock(&desc->lock);
 	/* Already running on another processor */
@@ -88,9 +87,8 @@ static int try_one_irq(int irq, struct irq_desc *desc)
 
 static int misrouted_irq(int irq)
 {
-	int i;
-	int ok = 0;
 	struct irq_desc *desc;
+	int i, ok = 0;
 
 	for_each_irq_desc(i, desc) {
 		if (!i)
@@ -108,8 +106,8 @@ static int misrouted_irq(int irq)
 
 static void poll_spurious_irqs(unsigned long dummy)
 {
-	int i;
 	struct irq_desc *desc;
+	int i;
 
 	for_each_irq_desc(i, desc) {
 		unsigned int status;
@@ -126,7 +124,8 @@ static void poll_spurious_irqs(unsigned long dummy)
 		try_one_irq(i, desc);
 	}
 
-	mod_timer(&poll_spurious_irq_timer, jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
+	mod_timer(&poll_spurious_irq_timer,
+		  jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
 }
 
 /*
@@ -177,7 +176,9 @@ report_bad_irq(unsigned int irq, struct irq_desc *desc, irqreturn_t action_ret)
 	}
 }
 
-static inline int try_misrouted_irq(unsigned int irq, struct irq_desc *desc, irqreturn_t action_ret)
+static inline int
+try_misrouted_irq(unsigned int irq, struct irq_desc *desc,
+		  irqreturn_t action_ret)
 {
 	struct irqaction *action;
 
@@ -253,7 +254,8 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc,
 		desc->depth++;
 		desc->chip->disable(irq);
 
-		mod_timer(&poll_spurious_irq_timer, jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
+		mod_timer(&poll_spurious_irq_timer,
+			  jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
 	}
 	desc->irqs_unhandled = 0;
 }
-- 
GitLab


From c0c168ca26b54a4a6ad34fc813fe00f275fbc94c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 16 Oct 2008 11:15:28 +0200
Subject: [PATCH 530/895] x86: cleanup show_interrupts

The sparseirq patches introduced some more ugliness in show_interrupts().
Clean it up all together and make the code easier to read by splitting out
the "tail" function  which prints the special interrupts.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/irq_32.c | 160 +++++++++++++++++++--------------------
 arch/x86/kernel/irq_64.c | 153 ++++++++++++++++++-------------------
 2 files changed, 154 insertions(+), 159 deletions(-)

diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index ccf6c1bf7120..8d525765a6c4 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -263,22 +263,67 @@ atomic_t irq_err_count;
  * /proc/interrupts printing:
  */
 
+static int show_other_interrupts(struct seq_file *p)
+{
+	int j;
+
+	seq_printf(p, "NMI: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", nmi_count(j));
+	seq_printf(p, "  Non-maskable interrupts\n");
+#ifdef CONFIG_X86_LOCAL_APIC
+	seq_printf(p, "LOC: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", per_cpu(irq_stat,j).apic_timer_irqs);
+	seq_printf(p, "  Local timer interrupts\n");
+#endif
+#ifdef CONFIG_SMP
+	seq_printf(p, "RES: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_resched_count);
+	seq_printf(p, "  Rescheduling interrupts\n");
+	seq_printf(p, "CAL: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_call_count);
+	seq_printf(p, "  Function call interrupts\n");
+	seq_printf(p, "TLB: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_tlb_count);
+	seq_printf(p, "  TLB shootdowns\n");
+#endif
+#ifdef CONFIG_X86_MCE
+	seq_printf(p, "TRM: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_thermal_count);
+	seq_printf(p, "  Thermal event interrupts\n");
+#endif
+#ifdef CONFIG_X86_LOCAL_APIC
+	seq_printf(p, "SPU: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_spurious_count);
+	seq_printf(p, "  Spurious interrupts\n");
+#endif
+	seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+#if defined(CONFIG_X86_IO_APIC)
+	seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
+#endif
+	return 0;
+}
+
 int show_interrupts(struct seq_file *p, void *v)
 {
+	unsigned long flags, any_count = 0;
 	int i = *(loff_t *) v, j;
-	struct irqaction * action;
-	unsigned long flags;
-	unsigned int entries;
-	struct irq_desc *desc = NULL;
-	int tail = 0;
+	struct irqaction *action;
+	struct irq_desc *desc;
+
+	if (i > nr_irqs)
+		return 0;
 
-	entries = nr_irqs - 1;
-	i = *(loff_t *) v;
 	if (i == nr_irqs)
-		tail = 1;
-	else
-		desc = irq_to_desc(i);
+		return show_other_interrupts(p);
 
+	/* print header */
 	if (i == 0) {
 		seq_printf(p, "           ");
 		for_each_online_cpu(j)
@@ -286,88 +331,37 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_putc(p, '\n');
 	}
 
-	if (i <= entries) {
-		unsigned any_count = 0;
-
-		spin_lock_irqsave(&desc->lock, flags);
+	desc = irq_to_desc(i);
+	spin_lock_irqsave(&desc->lock, flags);
 #ifndef CONFIG_SMP
-		any_count = kstat_irqs(i);
+	any_count = kstat_irqs(i);
 #else
-		for_each_online_cpu(j)
-			any_count |= kstat_irqs_cpu(i, j);
+	for_each_online_cpu(j)
+		any_count |= kstat_irqs_cpu(i, j);
 #endif
-		action = desc->action;
-		if (!action && !any_count)
-			goto skip;
-		seq_printf(p, "%3d: ", i);
+	action = desc->action;
+	if (!action && !any_count)
+		goto out;
+
+	seq_printf(p, "%3d: ", i);
 #ifndef CONFIG_SMP
-		seq_printf(p, "%10u ", kstat_irqs(i));
+	seq_printf(p, "%10u ", kstat_irqs(i));
 #else
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
 #endif
-		seq_printf(p, " %8s", desc->chip->name);
-		seq_printf(p, "-%-8s", desc->name);
-
-		if (action) {
-			seq_printf(p, "  %s", action->name);
-			while ((action = action->next) != NULL)
-				seq_printf(p, ", %s", action->name);
-		}
+	seq_printf(p, " %8s", desc->chip->name);
+	seq_printf(p, "-%-8s", desc->name);
 
-		seq_putc(p, '\n');
-skip:
-		spin_unlock_irqrestore(&desc->lock, flags);
+	if (action) {
+		seq_printf(p, "  %s", action->name);
+		while ((action = action->next) != NULL)
+			seq_printf(p, ", %s", action->name);
 	}
 
-	if (tail) {
-		seq_printf(p, "NMI: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", nmi_count(j));
-		seq_printf(p, "  Non-maskable interrupts\n");
-#ifdef CONFIG_X86_LOCAL_APIC
-		seq_printf(p, "LOC: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ",
-				per_cpu(irq_stat,j).apic_timer_irqs);
-		seq_printf(p, "  Local timer interrupts\n");
-#endif
-#ifdef CONFIG_SMP
-		seq_printf(p, "RES: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ",
-				per_cpu(irq_stat,j).irq_resched_count);
-		seq_printf(p, "  Rescheduling interrupts\n");
-		seq_printf(p, "CAL: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ",
-				per_cpu(irq_stat,j).irq_call_count);
-		seq_printf(p, "  Function call interrupts\n");
-		seq_printf(p, "TLB: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ",
-				per_cpu(irq_stat,j).irq_tlb_count);
-		seq_printf(p, "  TLB shootdowns\n");
-#endif
-#ifdef CONFIG_X86_MCE
-		seq_printf(p, "TRM: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ",
-				per_cpu(irq_stat,j).irq_thermal_count);
-		seq_printf(p, "  Thermal event interrupts\n");
-#endif
-#ifdef CONFIG_X86_LOCAL_APIC
-		seq_printf(p, "SPU: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ",
-				per_cpu(irq_stat,j).irq_spurious_count);
-		seq_printf(p, "  Spurious interrupts\n");
-#endif
-		seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
-#if defined(CONFIG_X86_IO_APIC)
-		seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
-#endif
-	}
+	seq_putc(p, '\n');
+out:
+	spin_unlock_irqrestore(&desc->lock, flags);
 	return 0;
 }
 
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 21f53b911113..4f374294f292 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -68,22 +68,65 @@ static inline void stack_overflow_check(struct pt_regs *regs)
  * Generic, controller-independent functions:
  */
 
+static int show_other_interrupts(struct seq_file *p)
+{
+	int j;
+
+	seq_printf(p, "NMI: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count);
+	seq_printf(p, "  Non-maskable interrupts\n");
+	seq_printf(p, "LOC: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs);
+	seq_printf(p, "  Local timer interrupts\n");
+#ifdef CONFIG_SMP
+	seq_printf(p, "RES: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", cpu_pda(j)->irq_resched_count);
+	seq_printf(p, "  Rescheduling interrupts\n");
+	seq_printf(p, "CAL: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count);
+	seq_printf(p, "  Function call interrupts\n");
+	seq_printf(p, "TLB: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count);
+	seq_printf(p, "  TLB shootdowns\n");
+#endif
+#ifdef CONFIG_X86_MCE
+	seq_printf(p, "TRM: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count);
+	seq_printf(p, "  Thermal event interrupts\n");
+	seq_printf(p, "THR: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count);
+	seq_printf(p, "  Threshold APIC interrupts\n");
+#endif
+	seq_printf(p, "SPU: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count);
+	seq_printf(p, "  Spurious interrupts\n");
+	seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+
+	return 0;
+}
+
 int show_interrupts(struct seq_file *p, void *v)
 {
-	int i, j;
-	struct irqaction * action;
-	unsigned long flags;
-	unsigned int entries;
-	struct irq_desc *desc = NULL;
-	int tail = 0;
-
-	entries = nr_irqs - 1;
-	i = *(loff_t *) v;
+	unsigned long flags, any_count = 0;
+	int i = *(loff_t *) v, j;
+	struct irqaction *action;
+	struct irq_desc *desc;
+
+	if (i > nr_irqs)
+		return 0;
+
 	if (i == nr_irqs)
-		tail = 1;
-	else
-		desc = irq_to_desc(i);
+		return show_other_interrupts(p);
 
+	/* print header */
 	if (i == 0) {
 		seq_printf(p, "           ");
 		for_each_online_cpu(j)
@@ -91,79 +134,37 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_putc(p, '\n');
 	}
 
-	if (i <= entries) {
-		unsigned any_count = 0;
-
-		spin_lock_irqsave(&desc->lock, flags);
+	desc = irq_to_desc(i);
+	spin_lock_irqsave(&desc->lock, flags);
 #ifndef CONFIG_SMP
-		any_count = kstat_irqs(i);
+	any_count = kstat_irqs(i);
 #else
-		for_each_online_cpu(j)
-			any_count |= kstat_irqs_cpu(i, j);
+	for_each_online_cpu(j)
+		any_count |= kstat_irqs_cpu(i, j);
 #endif
-		action = desc->action;
-		if (!action && !any_count)
-			goto skip;
-		seq_printf(p, "%3d: ", i);
+	action = desc->action;
+	if (!action && !any_count)
+		goto out;
+
+	seq_printf(p, "%3d: ", i);
 #ifndef CONFIG_SMP
-		seq_printf(p, "%10u ", kstat_irqs(i));
+	seq_printf(p, "%10u ", kstat_irqs(i));
 #else
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
 #endif
-		seq_printf(p, " %8s", desc->chip->name);
-		seq_printf(p, "-%-8s", desc->name);
+	seq_printf(p, " %8s", desc->chip->name);
+	seq_printf(p, "-%-8s", desc->name);
 
-		if (action) {
-			seq_printf(p, "  %s", action->name);
-			while ((action = action->next) != NULL)
-				seq_printf(p, ", %s", action->name);
-		}
-		seq_putc(p, '\n');
-skip:
-		spin_unlock_irqrestore(&desc->lock, flags);
-	}
-
-	if (tail) {
-		seq_printf(p, "NMI: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count);
-		seq_printf(p, "  Non-maskable interrupts\n");
-		seq_printf(p, "LOC: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs);
-		seq_printf(p, "  Local timer interrupts\n");
-#ifdef CONFIG_SMP
-		seq_printf(p, "RES: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", cpu_pda(j)->irq_resched_count);
-		seq_printf(p, "  Rescheduling interrupts\n");
-		seq_printf(p, "CAL: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count);
-		seq_printf(p, "  Function call interrupts\n");
-		seq_printf(p, "TLB: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count);
-		seq_printf(p, "  TLB shootdowns\n");
-#endif
-#ifdef CONFIG_X86_MCE
-		seq_printf(p, "TRM: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count);
-		seq_printf(p, "  Thermal event interrupts\n");
-		seq_printf(p, "THR: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count);
-		seq_printf(p, "  Threshold APIC interrupts\n");
-#endif
-		seq_printf(p, "SPU: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count);
-		seq_printf(p, "  Spurious interrupts\n");
-		seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+	if (action) {
+		seq_printf(p, "  %s", action->name);
+		while ((action = action->next) != NULL)
+			seq_printf(p, ", %s", action->name);
 	}
 
+	seq_putc(p, '\n');
+out:
+	spin_unlock_irqrestore(&desc->lock, flags);
 	return 0;
 }
 
-- 
GitLab


From 6b39ba771e3c78d00e0abcebad270bd4212b28bc Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 16 Oct 2008 11:32:24 +0200
Subject: [PATCH 531/895] x86: unify show_interrupts() and proc helpers

show_interrupts() and proc helpers are basically the same for
32 and 64 bit. Move them to a shared source file.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/Makefile |   2 +-
 arch/x86/kernel/irq.c    | 166 +++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/irq_32.c | 146 ----------------------------------
 arch/x86/kernel/irq_64.c | 132 -------------------------------
 4 files changed, 167 insertions(+), 279 deletions(-)
 create mode 100644 arch/x86/kernel/irq.c

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index cb6ade443f00..d7e5a58ee22f 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -23,7 +23,7 @@ CFLAGS_hpet.o		:= $(nostackp)
 CFLAGS_tsc.o		:= $(nostackp)
 
 obj-y			:= process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
-obj-y			+= traps.o irq_$(BITS).o dumpstack_$(BITS).o
+obj-y			+= traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
 obj-y			+= time_$(BITS).o ioport.o ldt.o
 obj-y			+= setup.o i8259.o irqinit_$(BITS).o setup_percpu.o
 obj-$(CONFIG_X86_VISWS)	+= visws_quirks.o
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
new file mode 100644
index 000000000000..3b9128498976
--- /dev/null
+++ b/arch/x86/kernel/irq.c
@@ -0,0 +1,166 @@
+/*
+ * Common interrupt code for 32 and 64 bit
+ */
+#include <linux/cpu.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+#include <linux/seq_file.h>
+
+#include <asm/apic.h>
+#include <asm/io_apic.h>
+#include <asm/smp.h>
+
+atomic_t irq_err_count;
+
+#ifdef CONFIG_X86_32
+# define irq_stats(x)		(&per_cpu(irq_stat,x))
+#else
+# define irq_stats(x)		cpu_pda(x)
+#endif
+/*
+ * /proc/interrupts printing:
+ */
+static int show_other_interrupts(struct seq_file *p)
+{
+	int j;
+
+	seq_printf(p, "NMI: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->__nmi_count);
+	seq_printf(p, "  Non-maskable interrupts\n");
+#ifdef CONFIG_X86_LOCAL_APIC
+	seq_printf(p, "LOC: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs);
+	seq_printf(p, "  Local timer interrupts\n");
+#endif
+#ifdef CONFIG_SMP
+	seq_printf(p, "RES: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count);
+	seq_printf(p, "  Rescheduling interrupts\n");
+	seq_printf(p, "CAL: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
+	seq_printf(p, "  Function call interrupts\n");
+	seq_printf(p, "TLB: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
+	seq_printf(p, "  TLB shootdowns\n");
+#endif
+#ifdef CONFIG_X86_MCE
+	seq_printf(p, "TRM: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
+	seq_printf(p, "  Thermal event interrupts\n");
+# ifdef CONFIG_X86_64
+	seq_printf(p, "THR: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
+	seq_printf(p, "  Threshold APIC interrupts\n");
+# endif
+#endif
+#ifdef CONFIG_X86_LOCAL_APIC
+	seq_printf(p, "SPU: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
+	seq_printf(p, "  Spurious interrupts\n");
+#endif
+	seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+#if defined(CONFIG_X86_IO_APIC)
+	seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
+#endif
+	return 0;
+}
+
+int show_interrupts(struct seq_file *p, void *v)
+{
+	unsigned long flags, any_count = 0;
+	int i = *(loff_t *) v, j;
+	struct irqaction *action;
+	struct irq_desc *desc;
+
+	if (i > nr_irqs)
+		return 0;
+
+	if (i == nr_irqs)
+		return show_other_interrupts(p);
+
+	/* print header */
+	if (i == 0) {
+		seq_printf(p, "           ");
+		for_each_online_cpu(j)
+			seq_printf(p, "CPU%-8d",j);
+		seq_putc(p, '\n');
+	}
+
+	desc = irq_to_desc(i);
+	spin_lock_irqsave(&desc->lock, flags);
+#ifndef CONFIG_SMP
+	any_count = kstat_irqs(i);
+#else
+	for_each_online_cpu(j)
+		any_count |= kstat_irqs_cpu(i, j);
+#endif
+	action = desc->action;
+	if (!action && !any_count)
+		goto out;
+
+	seq_printf(p, "%3d: ", i);
+#ifndef CONFIG_SMP
+	seq_printf(p, "%10u ", kstat_irqs(i));
+#else
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
+#endif
+	seq_printf(p, " %8s", desc->chip->name);
+	seq_printf(p, "-%-8s", desc->name);
+
+	if (action) {
+		seq_printf(p, "  %s", action->name);
+		while ((action = action->next) != NULL)
+			seq_printf(p, ", %s", action->name);
+	}
+
+	seq_putc(p, '\n');
+out:
+	spin_unlock_irqrestore(&desc->lock, flags);
+	return 0;
+}
+
+/*
+ * /proc/stat helpers
+ */
+u64 arch_irq_stat_cpu(unsigned int cpu)
+{
+	u64 sum = irq_stats(cpu)->__nmi_count;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	sum += irq_stats(cpu)->apic_timer_irqs;
+#endif
+#ifdef CONFIG_SMP
+	sum += irq_stats(cpu)->irq_resched_count;
+	sum += irq_stats(cpu)->irq_call_count;
+	sum += irq_stats(cpu)->irq_tlb_count;
+#endif
+#ifdef CONFIG_X86_MCE
+	sum += irq_stats(cpu)->irq_thermal_count;
+# ifdef CONFIG_X86_64
+	sum += irq_stats(cpu)->irq_threshold_count;
+#endif
+#endif
+#ifdef CONFIG_X86_LOCAL_APIC
+	sum += irq_stats(cpu)->irq_spurious_count;
+#endif
+	return sum;
+}
+
+u64 arch_irq_stat(void)
+{
+	u64 sum = atomic_read(&irq_err_count);
+
+#ifdef CONFIG_X86_IO_APIC
+	sum += atomic_read(&irq_mis_count);
+#endif
+	return sum;
+}
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 8d525765a6c4..6d9bf3936c78 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -253,152 +253,6 @@ unsigned int do_IRQ(struct pt_regs *regs)
 	return 1;
 }
 
-/*
- * Interrupt statistics:
- */
-
-atomic_t irq_err_count;
-
-/*
- * /proc/interrupts printing:
- */
-
-static int show_other_interrupts(struct seq_file *p)
-{
-	int j;
-
-	seq_printf(p, "NMI: ");
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", nmi_count(j));
-	seq_printf(p, "  Non-maskable interrupts\n");
-#ifdef CONFIG_X86_LOCAL_APIC
-	seq_printf(p, "LOC: ");
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", per_cpu(irq_stat,j).apic_timer_irqs);
-	seq_printf(p, "  Local timer interrupts\n");
-#endif
-#ifdef CONFIG_SMP
-	seq_printf(p, "RES: ");
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_resched_count);
-	seq_printf(p, "  Rescheduling interrupts\n");
-	seq_printf(p, "CAL: ");
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_call_count);
-	seq_printf(p, "  Function call interrupts\n");
-	seq_printf(p, "TLB: ");
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_tlb_count);
-	seq_printf(p, "  TLB shootdowns\n");
-#endif
-#ifdef CONFIG_X86_MCE
-	seq_printf(p, "TRM: ");
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_thermal_count);
-	seq_printf(p, "  Thermal event interrupts\n");
-#endif
-#ifdef CONFIG_X86_LOCAL_APIC
-	seq_printf(p, "SPU: ");
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_spurious_count);
-	seq_printf(p, "  Spurious interrupts\n");
-#endif
-	seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
-#if defined(CONFIG_X86_IO_APIC)
-	seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
-#endif
-	return 0;
-}
-
-int show_interrupts(struct seq_file *p, void *v)
-{
-	unsigned long flags, any_count = 0;
-	int i = *(loff_t *) v, j;
-	struct irqaction *action;
-	struct irq_desc *desc;
-
-	if (i > nr_irqs)
-		return 0;
-
-	if (i == nr_irqs)
-		return show_other_interrupts(p);
-
-	/* print header */
-	if (i == 0) {
-		seq_printf(p, "           ");
-		for_each_online_cpu(j)
-			seq_printf(p, "CPU%-8d",j);
-		seq_putc(p, '\n');
-	}
-
-	desc = irq_to_desc(i);
-	spin_lock_irqsave(&desc->lock, flags);
-#ifndef CONFIG_SMP
-	any_count = kstat_irqs(i);
-#else
-	for_each_online_cpu(j)
-		any_count |= kstat_irqs_cpu(i, j);
-#endif
-	action = desc->action;
-	if (!action && !any_count)
-		goto out;
-
-	seq_printf(p, "%3d: ", i);
-#ifndef CONFIG_SMP
-	seq_printf(p, "%10u ", kstat_irqs(i));
-#else
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
-#endif
-	seq_printf(p, " %8s", desc->chip->name);
-	seq_printf(p, "-%-8s", desc->name);
-
-	if (action) {
-		seq_printf(p, "  %s", action->name);
-		while ((action = action->next) != NULL)
-			seq_printf(p, ", %s", action->name);
-	}
-
-	seq_putc(p, '\n');
-out:
-	spin_unlock_irqrestore(&desc->lock, flags);
-	return 0;
-}
-
-/*
- * /proc/stat helpers
- */
-u64 arch_irq_stat_cpu(unsigned int cpu)
-{
-	u64 sum = nmi_count(cpu);
-
-#ifdef CONFIG_X86_LOCAL_APIC
-	sum += per_cpu(irq_stat, cpu).apic_timer_irqs;
-#endif
-#ifdef CONFIG_SMP
-	sum += per_cpu(irq_stat, cpu).irq_resched_count;
-	sum += per_cpu(irq_stat, cpu).irq_call_count;
-	sum += per_cpu(irq_stat, cpu).irq_tlb_count;
-#endif
-#ifdef CONFIG_X86_MCE
-	sum += per_cpu(irq_stat, cpu).irq_thermal_count;
-#endif
-#ifdef CONFIG_X86_LOCAL_APIC
-	sum += per_cpu(irq_stat, cpu).irq_spurious_count;
-#endif
-	return sum;
-}
-
-u64 arch_irq_stat(void)
-{
-	u64 sum = atomic_read(&irq_err_count);
-
-#ifdef CONFIG_X86_IO_APIC
-	sum += atomic_read(&irq_mis_count);
-#endif
-	return sum;
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 #include <mach_apic.h>
 
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 4f374294f292..39ef7feb9ea4 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -18,8 +18,6 @@
 #include <asm/idle.h>
 #include <asm/smp.h>
 
-atomic_t irq_err_count;
-
 /*
  * 'what should we do if we get a hw irq event on an illegal vector'.
  * each architecture has to answer this themselves.
@@ -64,136 +62,6 @@ static inline void stack_overflow_check(struct pt_regs *regs)
 }
 #endif
 
-/*
- * Generic, controller-independent functions:
- */
-
-static int show_other_interrupts(struct seq_file *p)
-{
-	int j;
-
-	seq_printf(p, "NMI: ");
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count);
-	seq_printf(p, "  Non-maskable interrupts\n");
-	seq_printf(p, "LOC: ");
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs);
-	seq_printf(p, "  Local timer interrupts\n");
-#ifdef CONFIG_SMP
-	seq_printf(p, "RES: ");
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", cpu_pda(j)->irq_resched_count);
-	seq_printf(p, "  Rescheduling interrupts\n");
-	seq_printf(p, "CAL: ");
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count);
-	seq_printf(p, "  Function call interrupts\n");
-	seq_printf(p, "TLB: ");
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count);
-	seq_printf(p, "  TLB shootdowns\n");
-#endif
-#ifdef CONFIG_X86_MCE
-	seq_printf(p, "TRM: ");
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count);
-	seq_printf(p, "  Thermal event interrupts\n");
-	seq_printf(p, "THR: ");
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count);
-	seq_printf(p, "  Threshold APIC interrupts\n");
-#endif
-	seq_printf(p, "SPU: ");
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count);
-	seq_printf(p, "  Spurious interrupts\n");
-	seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
-
-	return 0;
-}
-
-int show_interrupts(struct seq_file *p, void *v)
-{
-	unsigned long flags, any_count = 0;
-	int i = *(loff_t *) v, j;
-	struct irqaction *action;
-	struct irq_desc *desc;
-
-	if (i > nr_irqs)
-		return 0;
-
-	if (i == nr_irqs)
-		return show_other_interrupts(p);
-
-	/* print header */
-	if (i == 0) {
-		seq_printf(p, "           ");
-		for_each_online_cpu(j)
-			seq_printf(p, "CPU%-8d",j);
-		seq_putc(p, '\n');
-	}
-
-	desc = irq_to_desc(i);
-	spin_lock_irqsave(&desc->lock, flags);
-#ifndef CONFIG_SMP
-	any_count = kstat_irqs(i);
-#else
-	for_each_online_cpu(j)
-		any_count |= kstat_irqs_cpu(i, j);
-#endif
-	action = desc->action;
-	if (!action && !any_count)
-		goto out;
-
-	seq_printf(p, "%3d: ", i);
-#ifndef CONFIG_SMP
-	seq_printf(p, "%10u ", kstat_irqs(i));
-#else
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
-#endif
-	seq_printf(p, " %8s", desc->chip->name);
-	seq_printf(p, "-%-8s", desc->name);
-
-	if (action) {
-		seq_printf(p, "  %s", action->name);
-		while ((action = action->next) != NULL)
-			seq_printf(p, ", %s", action->name);
-	}
-
-	seq_putc(p, '\n');
-out:
-	spin_unlock_irqrestore(&desc->lock, flags);
-	return 0;
-}
-
-/*
- * /proc/stat helpers
- */
-u64 arch_irq_stat_cpu(unsigned int cpu)
-{
-	u64 sum = cpu_pda(cpu)->__nmi_count;
-
-	sum += cpu_pda(cpu)->apic_timer_irqs;
-#ifdef CONFIG_SMP
-	sum += cpu_pda(cpu)->irq_resched_count;
-	sum += cpu_pda(cpu)->irq_call_count;
-	sum += cpu_pda(cpu)->irq_tlb_count;
-#endif
-#ifdef CONFIG_X86_MCE
-	sum += cpu_pda(cpu)->irq_thermal_count;
-	sum += cpu_pda(cpu)->irq_threshold_count;
-#endif
-	sum += cpu_pda(cpu)->irq_spurious_count;
-	return sum;
-}
-
-u64 arch_irq_stat(void)
-{
-	return atomic_read(&irq_err_count);
-}
-
 /*
  * do_IRQ handles all normal device IRQ's (the special
  * SMP cross-CPU interrupts have their own specific
-- 
GitLab


From 249f6d9eab372790579ada8991bba3384c204e06 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 16 Oct 2008 12:18:50 +0200
Subject: [PATCH 532/895] x86: move ack_bad_irq() to irq.c

Share more duplicated code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/irq.c    | 23 +++++++++++++++++++++++
 arch/x86/kernel/irq_32.c | 23 -----------------------
 arch/x86/kernel/irq_64.c | 20 --------------------
 3 files changed, 23 insertions(+), 43 deletions(-)

diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 3b9128498976..ccf6c503fc3b 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -12,6 +12,29 @@
 
 atomic_t irq_err_count;
 
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves.
+ */
+void ack_bad_irq(unsigned int irq)
+{
+	printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq);
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	/*
+	 * Currently unexpected vectors happen only on SMP and APIC.
+	 * We _must_ ack these because every local APIC has only N
+	 * irq slots per priority level, and a 'hanging, unacked' IRQ
+	 * holds up an irq slot - in excessive cases (when multiple
+	 * unexpected vectors occur) that might lock up the APIC
+	 * completely.
+	 * But only ack when the APIC is enabled -AK
+	 */
+	if (cpu_has_apic)
+		ack_APIC_irq();
+#endif
+}
+
 #ifdef CONFIG_X86_32
 # define irq_stats(x)		(&per_cpu(irq_stat,x))
 #else
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 6d9bf3936c78..a51382672de0 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -25,29 +25,6 @@ EXPORT_PER_CPU_SYMBOL(irq_stat);
 DEFINE_PER_CPU(struct pt_regs *, irq_regs);
 EXPORT_PER_CPU_SYMBOL(irq_regs);
 
-/*
- * 'what should we do if we get a hw irq event on an illegal vector'.
- * each architecture has to answer this themselves.
- */
-void ack_bad_irq(unsigned int irq)
-{
-	printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq);
-
-#ifdef CONFIG_X86_LOCAL_APIC
-	/*
-	 * Currently unexpected vectors happen only on SMP and APIC.
-	 * We _must_ ack these because every local APIC has only N
-	 * irq slots per priority level, and a 'hanging, unacked' IRQ
-	 * holds up an irq slot - in excessive cases (when multiple
-	 * unexpected vectors occur) that might lock up the APIC
-	 * completely.
-	 * But only ack when the APIC is enabled -AK
-	 */
-	if (cpu_has_apic)
-		ack_APIC_irq();
-#endif
-}
-
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
 /* Debugging check for stack overflow: is there less than 1KB free? */
 static int check_stack_overflow(void)
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 39ef7feb9ea4..60eb84eb77a0 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -18,26 +18,6 @@
 #include <asm/idle.h>
 #include <asm/smp.h>
 
-/*
- * 'what should we do if we get a hw irq event on an illegal vector'.
- * each architecture has to answer this themselves.
- */
-void ack_bad_irq(unsigned int irq)
-{
-	printk(KERN_WARNING "unexpected IRQ trap at vector %02x\n", irq);
-	/*
-	 * Currently unexpected vectors happen only on SMP and APIC.
-	 * We _must_ ack these because every local APIC has only N
-	 * irq slots per priority level, and a 'hanging, unacked' IRQ
-	 * holds up an irq slot - in excessive cases (when multiple
-	 * unexpected vectors occur) that might lock up the APIC
-	 * completely.
-	 * But don't ack when the APIC is disabled. -AK
-	 */
-	if (!disable_apic)
-		ack_APIC_irq();
-}
-
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
 /*
  * Probabilistic stack overflow check:
-- 
GitLab


From 811410fdb6b9d82a518542289efe9b2a51e3cbfb Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 16 Oct 2008 14:16:11 +0200
Subject: [PATCH 533/895] genirq: add reverse iterator for irq_desc

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 38bf89f2ade0..31632aa65d16 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -21,6 +21,10 @@ extern int nr_irqs;
 
 # define for_each_irq_desc(irq, desc)		\
 	for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++)
+
+# define for_each_irq_desc_reverse(irq, desc)			\
+	for (irq = nr_irqs -1, desc = irq_desc + (nr_irqs -1 );	\
+	     irq > 0; irq--, desc--)
 #endif
 
 #ifndef CONFIG_S390
-- 
GitLab


From 2be3b52a5785a6a5c5349fbd315f57595f7074be Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 16 Oct 2008 14:50:27 +0200
Subject: [PATCH 534/895] proc: fixup irq iterator

There is no need for irq_desc here. Even for sparse_irq we can
handle this clever in for_each_irq_nr().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 fs/proc/proc_misc.c | 7 ++-----
 include/linux/irq.h | 3 +++
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 3f5c7b9d1a70..97b4579134d5 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -509,9 +509,6 @@ static int show_stat(struct seq_file *p, void *v)
 	u64 sum = 0;
 	struct timespec boottime;
 	unsigned int per_irq_sum;
-#ifdef CONFIG_GENERIC_HARDIRQS
-	struct irq_desc *desc;
-#endif
 
 	user = nice = system = idle = iowait =
 		irq = softirq = steal = cputime64_zero;
@@ -530,7 +527,7 @@ static int show_stat(struct seq_file *p, void *v)
 		steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
 		guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
 
-		for_each_irq_desc(j, desc)
+		for_each_irq_nr(j)
 			sum += kstat_irqs_cpu(j, i);
 
 		sum += arch_irq_stat_cpu(i);
@@ -575,7 +572,7 @@ static int show_stat(struct seq_file *p, void *v)
 	seq_printf(p, "intr %llu", (unsigned long long)sum);
 
 	/* sum again ? it could be updated? */
-	for_each_irq_desc(j, desc) {
+	for_each_irq_nr(j) {
 		per_irq_sum = 0;
 
 		for_each_possible_cpu(i)
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 31632aa65d16..0618fb362cb4 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -27,6 +27,9 @@ extern int nr_irqs;
 	     irq > 0; irq--, desc--)
 #endif
 
+#define for_each_irq_nr(irq)			\
+	for (irq = 0; irq < nr_irqs; irq++)
+
 #ifndef CONFIG_S390
 
 #include <linux/linkage.h>
-- 
GitLab


From 10e580842ec8e53dddf62e1ab1871f4906477376 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 16 Oct 2008 14:19:04 +0200
Subject: [PATCH 535/895] genirq: use iterators for irq_desc loops

Use for_each_irq_desc[_reverse] for all the iteration loops.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/xen/events.c   | 14 ++++++-------
 kernel/irq/autoprobe.c | 45 +++++++++++-------------------------------
 kernel/irq/handle.c    |  5 +++--
 3 files changed, 21 insertions(+), 43 deletions(-)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index e6d47e8ca1ac..9ce1ab6c268d 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -137,14 +137,12 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
 static void init_evtchn_cpu_bindings(void)
 {
 #ifdef CONFIG_SMP
+	struct irq_desc *desc;
 	int i;
+
 	/* By default all event channels notify CPU#0. */
-	for (i = 0; i < nr_irqs; i++) {
-		struct irq_desc *desc = irq_to_desc(i);
-		if (!desc)
-			continue;
+	for_each_irq_desc(i, desc)
 		desc->affinity = cpumask_of_cpu(0);
-	}
 #endif
 
 	memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
@@ -233,7 +231,7 @@ static int find_unbound_irq(void)
 	int irq;
 
 	/* Only allocate from dynirq range */
-	for (irq = 0; irq < nr_irqs; irq++)
+	for_each_irq_nr(irq)
 		if (irq_bindcount[irq] == 0)
 			break;
 
@@ -794,7 +792,7 @@ void xen_irq_resume(void)
 		mask_evtchn(evtchn);
 
 	/* No IRQ <-> event-channel mappings. */
-	for (irq = 0; irq < nr_irqs; irq++)
+	for_each_irq_nr(irq)
 		irq_info[irq].evtchn = 0; /* zap event-channel binding */
 
 	for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
@@ -826,7 +824,7 @@ void __init xen_init_IRQ(void)
 		mask_evtchn(i);
 
 	/* Dynamic IRQ space is currently unbound. Zero the refcnts. */
-	for (i = 0; i < nr_irqs; i++)
+	for_each_irq_nr(i)
 		irq_bindcount[i] = 0;
 
 	irq_ctx_init(smp_processor_id());
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index b3a5549ea81e..0cbff18efb6d 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -30,19 +30,16 @@ static DEFINE_MUTEX(probing_active);
 unsigned long probe_irq_on(void)
 {
 	struct irq_desc *desc;
-	unsigned long mask;
-	unsigned int i;
+	unsigned long mask = 0;
+	unsigned int status;
+	int i;
 
 	mutex_lock(&probing_active);
 	/*
 	 * something may have generated an irq long ago and we want to
 	 * flush such a longstanding irq before considering it as spurious.
 	 */
-	for (i = nr_irqs-1; i > 0; i--) {
-		desc = irq_to_desc(i);
-		if (!desc)
-			continue;
-
+	for_each_irq_desc_reverse(i, desc) {
 		spin_lock_irq(&desc->lock);
 		if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
 			/*
@@ -70,11 +67,7 @@ unsigned long probe_irq_on(void)
 	 * (we must startup again here because if a longstanding irq
 	 * happened in the previous stage, it may have masked itself)
 	 */
-	for (i = nr_irqs-1; i > 0; i--) {
-		desc = irq_to_desc(i);
-		if (!desc)
-			continue;
-
+	for_each_irq_desc_reverse(i, desc) {
 		spin_lock_irq(&desc->lock);
 		if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
 			desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
@@ -92,13 +85,7 @@ unsigned long probe_irq_on(void)
 	/*
 	 * Now filter out any obviously spurious interrupts
 	 */
-	mask = 0;
-	for (i = 0; i < nr_irqs; i++) {
-		unsigned int status;
-
-		desc = irq_to_desc(i);
-		if (!desc)
-			continue;
+	for_each_irq_desc(i, desc) {
 		spin_lock_irq(&desc->lock);
 		status = desc->status;
 
@@ -132,16 +119,11 @@ EXPORT_SYMBOL(probe_irq_on);
  */
 unsigned int probe_irq_mask(unsigned long val)
 {
-	unsigned int mask;
+	unsigned int status, mask = 0;
+	struct irq_desc *desc;
 	int i;
 
-	mask = 0;
-	for (i = 0; i < nr_irqs; i++) {
-		struct irq_desc *desc = irq_to_desc(i);
-		unsigned int status;
-
-		if (!desc)
-			continue;
+	for_each_irq_desc(i, desc) {
 		spin_lock_irq(&desc->lock);
 		status = desc->status;
 
@@ -180,13 +162,10 @@ EXPORT_SYMBOL(probe_irq_mask);
 int probe_irq_off(unsigned long val)
 {
 	int i, irq_found = 0, nr_irqs = 0;
+	struct irq_desc *desc;
+	unsigned int status;
 
-	for (i = 0; i < nr_irqs; i++) {
-		struct irq_desc *desc = irq_to_desc(i);
-		unsigned int status;
-
-		if (!desc)
-			continue;
+	for_each_irq_desc(i, desc) {
 		spin_lock_irq(&desc->lock);
 		status = desc->status;
 
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index a69368ff607f..c815b42d0f5b 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -268,9 +268,10 @@ static struct lock_class_key irq_desc_lock_class;
 
 void early_init_irq_lock_class(void)
 {
+	struct irq_desc *desc;
 	int i;
 
-	for (i = 0; i < nr_irqs; i++)
-		lockdep_set_class(&irq_desc[i].lock, &irq_desc_lock_class);
+	for_each_irq_desc(i, desc)
+		lockdep_set_class(&desc->lock, &irq_desc_lock_class);
 }
 #endif
-- 
GitLab


From 63d659d556f145d33798b8ad19ced10c254fe445 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 16 Oct 2008 15:33:18 +0200
Subject: [PATCH 536/895] genirq: fix name space collision of nr_irqs in
 autoprobe.c

probe_irq_off() is disfunctional as the local nr_irqs is referenced
instead of the global one for the for_each_irq_desc() iterator.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/irq/autoprobe.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 0cbff18efb6d..cc0f7321b8ce 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -161,7 +161,7 @@ EXPORT_SYMBOL(probe_irq_mask);
  */
 int probe_irq_off(unsigned long val)
 {
-	int i, irq_found = 0, nr_irqs = 0;
+	int i, irq_found = 0, nr_of_irqs = 0;
 	struct irq_desc *desc;
 	unsigned int status;
 
@@ -171,9 +171,9 @@ int probe_irq_off(unsigned long val)
 
 		if (status & IRQ_AUTODETECT) {
 			if (!(status & IRQ_WAITING)) {
-				if (!nr_irqs)
+				if (!nr_of_irqs)
 					irq_found = i;
-				nr_irqs++;
+				nr_of_irqs++;
 			}
 			desc->status = status & ~IRQ_AUTODETECT;
 			desc->chip->shutdown(i);
@@ -182,7 +182,7 @@ int probe_irq_off(unsigned long val)
 	}
 	mutex_unlock(&probing_active);
 
-	if (nr_irqs > 1)
+	if (nr_of_irqs > 1)
 		irq_found = -irq_found;
 
 	return irq_found;
-- 
GitLab


From 4b1135a277f4b38f60b9c9f28adae467feb07856 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 16 Oct 2008 15:33:18 +0200
Subject: [PATCH 537/895] genirq: fix name space collisions of nr_irqs in
 arch/*

local shadows of global variables are _bad_

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/alpha/kernel/sys_sable.c    | 6 +++---
 arch/arm/mach-ixp2000/ixdp2x00.c | 4 ++--
 arch/arm/mach-omap2/irq.c        | 8 ++++----
 arch/avr32/mach-at32ap/extint.c  | 8 ++++----
 4 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/arch/alpha/kernel/sys_sable.c b/arch/alpha/kernel/sys_sable.c
index 99a7f19da13a..a4555f497639 100644
--- a/arch/alpha/kernel/sys_sable.c
+++ b/arch/alpha/kernel/sys_sable.c
@@ -47,7 +47,7 @@ typedef struct irq_swizzle_struct
 
 static irq_swizzle_t *sable_lynx_irq_swizzle;
 
-static void sable_lynx_init_irq(int nr_irqs);
+static void sable_lynx_init_irq(int nr_of_irqs);
 
 #if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_SABLE)
 
@@ -530,11 +530,11 @@ sable_lynx_srm_device_interrupt(unsigned long vector)
 }
 
 static void __init
-sable_lynx_init_irq(int nr_irqs)
+sable_lynx_init_irq(int nr_of_irqs)
 {
 	long i;
 
-	for (i = 0; i < nr_irqs; ++i) {
+	for (i = 0; i < nr_of_irqs; ++i) {
 		irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL;
 		irq_desc[i].chip = &sable_lynx_irq_type;
 	}
diff --git a/arch/arm/mach-ixp2000/ixdp2x00.c b/arch/arm/mach-ixp2000/ixdp2x00.c
index b0653a87159a..30451300751b 100644
--- a/arch/arm/mach-ixp2000/ixdp2x00.c
+++ b/arch/arm/mach-ixp2000/ixdp2x00.c
@@ -143,7 +143,7 @@ static struct irq_chip ixdp2x00_cpld_irq_chip = {
 	.unmask	= ixdp2x00_irq_unmask
 };
 
-void __init ixdp2x00_init_irq(volatile unsigned long *stat_reg, volatile unsigned long *mask_reg, unsigned long nr_irqs)
+void __init ixdp2x00_init_irq(volatile unsigned long *stat_reg, volatile unsigned long *mask_reg, unsigned long nr_of_irqs)
 {
 	unsigned int irq;
 
@@ -154,7 +154,7 @@ void __init ixdp2x00_init_irq(volatile unsigned long *stat_reg, volatile unsigne
 
 	board_irq_stat = stat_reg;
 	board_irq_mask = mask_reg;
-	board_irq_count = nr_irqs;
+	board_irq_count = nr_of_irqs;
 
 	*board_irq_mask = 0xffffffff;
 
diff --git a/arch/arm/mach-omap2/irq.c b/arch/arm/mach-omap2/irq.c
index 196a9565a8dc..003901f1e2da 100644
--- a/arch/arm/mach-omap2/irq.c
+++ b/arch/arm/mach-omap2/irq.c
@@ -111,7 +111,7 @@ static void __init omap_irq_bank_init_one(struct omap_irq_bank *bank)
 
 void __init omap_init_irq(void)
 {
-	unsigned long nr_irqs = 0;
+	unsigned long nr_of_irqs = 0;
 	unsigned int nr_banks = 0;
 	int i;
 
@@ -124,14 +124,14 @@ void __init omap_init_irq(void)
 
 		omap_irq_bank_init_one(bank);
 
-		nr_irqs += bank->nr_irqs;
+		nr_of_irqs += bank->nr_irqs;
 		nr_banks++;
 	}
 
 	printk(KERN_INFO "Total of %ld interrupts on %d active controller%s\n",
-	       nr_irqs, nr_banks, nr_banks > 1 ? "s" : "");
+	       nr_of_irqs, nr_banks, nr_banks > 1 ? "s" : "");
 
-	for (i = 0; i < nr_irqs; i++) {
+	for (i = 0; i < nr_of_irqs; i++) {
 		set_irq_chip(i, &omap_irq_chip);
 		set_irq_handler(i, handle_level_irq);
 		set_irq_flags(i, IRQF_VALID);
diff --git a/arch/avr32/mach-at32ap/extint.c b/arch/avr32/mach-at32ap/extint.c
index c36a6d59d6f0..310477ba1bbf 100644
--- a/arch/avr32/mach-at32ap/extint.c
+++ b/arch/avr32/mach-at32ap/extint.c
@@ -191,7 +191,7 @@ static int __init eic_probe(struct platform_device *pdev)
 	struct eic *eic;
 	struct resource *regs;
 	unsigned int i;
-	unsigned int nr_irqs;
+	unsigned int nr_of_irqs;
 	unsigned int int_irq;
 	int ret;
 	u32 pattern;
@@ -224,7 +224,7 @@ static int __init eic_probe(struct platform_device *pdev)
 	eic_writel(eic, IDR, ~0UL);
 	eic_writel(eic, MODE, ~0UL);
 	pattern = eic_readl(eic, MODE);
-	nr_irqs = fls(pattern);
+	nr_of_irqs = fls(pattern);
 
 	/* Trigger on low level unless overridden by driver */
 	eic_writel(eic, EDGE, 0UL);
@@ -232,7 +232,7 @@ static int __init eic_probe(struct platform_device *pdev)
 
 	eic->chip = &eic_chip;
 
-	for (i = 0; i < nr_irqs; i++) {
+	for (i = 0; i < nr_of_irqs; i++) {
 		set_irq_chip_and_handler(eic->first_irq + i, &eic_chip,
 					 handle_level_irq);
 		set_irq_chip_data(eic->first_irq + i, eic);
@@ -256,7 +256,7 @@ static int __init eic_probe(struct platform_device *pdev)
 		 eic->regs, int_irq);
 	dev_info(&pdev->dev,
 		 "Handling %u external IRQs, starting with IRQ %u\n",
-		 nr_irqs, eic->first_irq);
+		 nr_of_irqs, eic->first_irq);
 
 	return 0;
 
-- 
GitLab


From 10e0298686be6249b0665465737a6b83446c4d35 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 16 Oct 2008 17:04:22 +0200
Subject: [PATCH 538/895] io_apic: make irq_mis_count available on 64-bit too

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 6f80dc2f137e..b764d7429c61 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -2277,9 +2277,7 @@ static void ack_apic_edge(unsigned int irq)
 	ack_APIC_irq();
 }
 
-#ifdef CONFIG_X86_32
 atomic_t irq_mis_count;
-#endif
 
 static void ack_apic_level(unsigned int irq)
 {
-- 
GitLab


From cc8e920aaf5558f87851169b33c420cc4516c253 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 16 Oct 2008 17:05:27 +0200
Subject: [PATCH 539/895] intr_remapping: fix typo

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/pci/intr_remapping.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index 950769e87475..d6040dd3ff5b 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -23,7 +23,7 @@ static struct irq_2_iommu irq_2_iommuX[NR_IRQS];
 
 static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
 {
-	return (irq < nr_irqs) ?: irq_2_iommuX + irq : NULL;
+	return (irq < nr_irqs) ? irq_2_iommuX + irq : NULL;
 }
 
 static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq)
-- 
GitLab


From 3baf63a507094992a5bf238ba3bcea71f458b1e8 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 16 Oct 2008 19:00:57 +0200
Subject: [PATCH 540/895] m32r: fix build due to notify_cpu_starting() change

fix:

arch/m32r/kernel/smpboot.c:501: err
or: implicit declaration of function 'notify_cpu_starting'
make[2]: *** [arch/m32r/kernel/smpboot.o] Error 1
make[2]: *** Waiting for unfinished jobs....

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/m32r/kernel/smpboot.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c
index fc2994811f15..39cb6da72dcb 100644
--- a/arch/m32r/kernel/smpboot.c
+++ b/arch/m32r/kernel/smpboot.c
@@ -40,6 +40,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/cpu.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
-- 
GitLab


From 0d4a7bc12ffecd3ba41dd94179cc5b272b71ce8a Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Tue, 26 Aug 2008 17:15:45 -0600
Subject: [PATCH 541/895] UIO: BKL removal

Fill in needed locking around idr accesses, then remove the big kernel lock
from the UIO driver.  Since there are no in-tree UIO drivers with open()
methods, no further BKL pushdown is required.

Acked-by: Hans J. Koch <hjk@linutronix.de>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 drivers/uio/uio.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index 3a6934bf7131..4f28f4bf8366 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -47,6 +47,9 @@ static struct uio_class {
 	struct class *class;
 } *uio_class;
 
+/* Protect idr accesses */
+static DEFINE_MUTEX(minor_lock);
+
 /*
  * attributes
  */
@@ -231,7 +234,6 @@ static void uio_dev_del_attributes(struct uio_device *idev)
 
 static int uio_get_minor(struct uio_device *idev)
 {
-	static DEFINE_MUTEX(minor_lock);
 	int retval = -ENOMEM;
 	int id;
 
@@ -253,7 +255,9 @@ exit:
 
 static void uio_free_minor(struct uio_device *idev)
 {
+	mutex_lock(&minor_lock);
 	idr_remove(&uio_idr, idev->minor);
+	mutex_unlock(&minor_lock);
 }
 
 /**
@@ -297,8 +301,9 @@ static int uio_open(struct inode *inode, struct file *filep)
 	struct uio_listener *listener;
 	int ret = 0;
 
-	lock_kernel();
+	mutex_lock(&minor_lock);
 	idev = idr_find(&uio_idr, iminor(inode));
+	mutex_unlock(&minor_lock);
 	if (!idev) {
 		ret = -ENODEV;
 		goto out;
@@ -324,18 +329,15 @@ static int uio_open(struct inode *inode, struct file *filep)
 		if (ret)
 			goto err_infoopen;
 	}
-	unlock_kernel();
 	return 0;
 
 err_infoopen:
-
 	kfree(listener);
-err_alloc_listener:
 
+err_alloc_listener:
 	module_put(idev->owner);
 
 out:
-	unlock_kernel();
 	return ret;
 }
 
-- 
GitLab


From 3038edabf48f01421c621cb77a712b446d3a5d67 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 17 Oct 2008 01:26:27 +0200
Subject: [PATCH 542/895] x86 ACPI: fix breakage of resume on 64-bit UP systems
 with SMP kernel

x86 ACPI: Fix breakage of resume on 64-bit UP systems with SMP kernel

We are now using per CPU GDT tables in head_64.S and the original
early_gdt_descr.address is invalidated after boot by
setup_per_cpu_areas().  This breaks resume from suspend to RAM on
x86_64 UP systems using SMP kernels, because this part of head_64.S
is also executed during the resume and the invalid GDT address
causes the system to crash.  It doesn't break on 'true' SMP systems,
because early_gdt_descr.address is modified every time
native_cpu_up() runs.  However, during resume it should point to the
GDT of the boot CPU rather than to another CPU's GDT.

For this reason, during suspend to RAM always make
early_gdt_descr.address point to the boot CPU's GDT.

This fixes http://bugzilla.kernel.org/show_bug.cgi?id=11568, which
is a regression from 2.6.26.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@suse.cz>
Cc: <stable@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Reported-and-tested-by: Andy Wettstein <ajw1980@gmail.com>
---
 arch/x86/kernel/acpi/sleep.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 426e5d91b63a..c44cd6dbfa14 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -10,6 +10,7 @@
 #include <linux/dmi.h>
 #include <linux/cpumask.h>
 #include <asm/segment.h>
+#include <asm/desc.h>
 
 #include "realmode/wakeup.h"
 #include "sleep.h"
@@ -98,6 +99,8 @@ int acpi_save_state_mem(void)
 	header->trampoline_segment = setup_trampoline() >> 4;
 #ifdef CONFIG_SMP
 	stack_start.sp = temp_stack + 4096;
+	early_gdt_descr.address =
+			(unsigned long)get_cpu_gdt_table(smp_processor_id());
 #endif
 	initial_code = (unsigned long)wakeup_long64;
 	saved_magic = 0x123456789abcdef0;
-- 
GitLab


From faa5c2a15e14b6a4e59fcae65dec5258e723ea9f Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Date: Fri, 17 Oct 2008 16:19:45 +0200
Subject: [PATCH 543/895] [JFFS2] Correct parameter names of jffs2_compress()
 in comments

Make the parameter names of jffs2_compress() in its comments match with the
actual implementation

Signed-off-by: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 fs/jffs2/compr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/jffs2/compr.c b/fs/jffs2/compr.c
index 86739ee53b37..f25e70c1b51c 100644
--- a/fs/jffs2/compr.c
+++ b/fs/jffs2/compr.c
@@ -53,8 +53,8 @@ static int jffs2_is_best_compression(struct jffs2_compressor *this,
 }
 
 /* jffs2_compress:
- * @data: Pointer to uncompressed data
- * @cdata: Pointer to returned pointer to buffer for compressed data
+ * @data_in: Pointer to uncompressed data
+ * @cpage_out: Pointer to returned pointer to buffer for compressed data
  * @datalen: On entry, holds the amount of data available for compression.
  *	On exit, expected to hold the amount of data actually compressed.
  * @cdatalen: On entry, holds the amount of space available for compressed
-- 
GitLab


From 992b3f1dbeec401e19a80bdb8c81e5df5381f4c5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 13 Oct 2008 18:45:25 -0500
Subject: [PATCH 544/895] 9p-trans_fd: use single poller

trans_fd used pool of upto 100 pollers to monitor the r/w fds.  The
approach makes sense in userspace back when the only available
interfaces were poll(2) and select(2).  As each event monitor -
trigger - handling iteration took O(n) where `n' is the number of
watched fds, it makes sense to spread them to many pollers such that
the `n' can be divided by the number of pollers.  However, this
doesn't make any sense in kernel because persistent edge triggered
event monitoring is how the whole thing is implemented in the kernel
in the first place.

This patch converts trans_fd to use single poller which watches all
the fds instead of the poll of pollers approach.  All the fds are
registered for monitoring on creation and only the fds with pending
events are scanned when something happens much like how epoll is
implemented.

This change makes trans_fd fd monitoring more efficient and simpler.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_fd.c | 252 ++++++++++++++++------------------------------
 1 file changed, 86 insertions(+), 166 deletions(-)

diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 6dabbdb66651..f84592345573 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -44,7 +44,6 @@
 #define P9_PORT 564
 #define MAX_SOCK_BUF (64*1024)
 #define ERREQFLUSH	1
-#define SCHED_TIMEOUT	10
 #define MAXPOLLWADDR	2
 
 /**
@@ -135,17 +134,16 @@ struct p9_req {
 	struct list_head req_list;
 };
 
-struct p9_mux_poll_task {
-	struct task_struct *task;
-	struct list_head mux_list;
-	int muxnum;
+struct p9_poll_wait {
+	struct p9_conn *conn;
+	wait_queue_t wait;
+	wait_queue_head_t *wait_addr;
 };
 
 /**
  * struct p9_conn - fd mux connection state information
  * @lock: protects mux_list (?)
  * @mux_list: list link for mux to manage multiple connections (?)
- * @poll_task: task polling on this connection
  * @msize: maximum size for connection (dup)
  * @extended: 9p2000.u flag (dup)
  * @trans: reference to transport instance for this connection
@@ -171,7 +169,6 @@ struct p9_mux_poll_task {
 struct p9_conn {
 	spinlock_t lock; /* protect lock structure */
 	struct list_head mux_list;
-	struct p9_mux_poll_task *poll_task;
 	int msize;
 	unsigned char extended;
 	struct p9_trans *trans;
@@ -185,8 +182,8 @@ struct p9_conn {
 	int wpos;
 	int wsize;
 	char *wbuf;
-	wait_queue_t poll_wait[MAXPOLLWADDR];
-	wait_queue_head_t *poll_waddr[MAXPOLLWADDR];
+	struct list_head poll_pending_link;
+	struct p9_poll_wait poll_wait[MAXPOLLWADDR];
 	poll_table pt;
 	struct work_struct rq;
 	struct work_struct wq;
@@ -220,12 +217,10 @@ static void p9_pollwait(struct file *filp, wait_queue_head_t *wait_address,
 static int p9_fd_write(struct p9_trans *trans, void *v, int len);
 static int p9_fd_read(struct p9_trans *trans, void *v, int len);
 
-static DEFINE_MUTEX(p9_mux_task_lock);
+static DEFINE_SPINLOCK(p9_poll_lock);
+static LIST_HEAD(p9_poll_pending_list);
 static struct workqueue_struct *p9_mux_wq;
-
-static int p9_mux_num;
-static int p9_mux_poll_task_num;
-static struct p9_mux_poll_task p9_mux_poll_tasks[100];
+static struct task_struct *p9_poll_task;
 
 static void p9_conn_destroy(struct p9_conn *);
 static unsigned int p9_fd_poll(struct p9_trans *trans,
@@ -255,130 +250,23 @@ static void p9_mux_put_tag(struct p9_conn *m, u16 tag)
 		p9_idpool_put(tag, m->tagpool);
 }
 
-/**
- * p9_mux_calc_poll_procs - calculates the number of polling procs
- * @muxnum: number of mounts
- *
- * Calculation is based on the number of mounted v9fs filesystems.
- * The current implementation returns sqrt of the number of mounts.
- */
-
-static int p9_mux_calc_poll_procs(int muxnum)
-{
-	int n;
-
-	if (p9_mux_poll_task_num)
-		n = muxnum / p9_mux_poll_task_num +
-		    (muxnum % p9_mux_poll_task_num ? 1 : 0);
-	else
-		n = 1;
-
-	if (n > ARRAY_SIZE(p9_mux_poll_tasks))
-		n = ARRAY_SIZE(p9_mux_poll_tasks);
-
-	return n;
-}
-
-static int p9_mux_poll_start(struct p9_conn *m)
+static void p9_mux_poll_stop(struct p9_conn *m)
 {
-	int i, n;
-	struct p9_mux_poll_task *vpt, *vptlast;
-	struct task_struct *pproc;
-
-	P9_DPRINTK(P9_DEBUG_MUX, "mux %p muxnum %d procnum %d\n", m, p9_mux_num,
-		p9_mux_poll_task_num);
-	mutex_lock(&p9_mux_task_lock);
-
-	n = p9_mux_calc_poll_procs(p9_mux_num + 1);
-	if (n > p9_mux_poll_task_num) {
-		for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) {
-			if (p9_mux_poll_tasks[i].task == NULL) {
-				vpt = &p9_mux_poll_tasks[i];
-				P9_DPRINTK(P9_DEBUG_MUX, "create proc %p\n",
-									vpt);
-				pproc = kthread_create(p9_poll_proc, vpt,
-								"v9fs-poll");
-
-				if (!IS_ERR(pproc)) {
-					vpt->task = pproc;
-					INIT_LIST_HEAD(&vpt->mux_list);
-					vpt->muxnum = 0;
-					p9_mux_poll_task_num++;
-					wake_up_process(vpt->task);
-				}
-				break;
-			}
-		}
-
-		if (i >= ARRAY_SIZE(p9_mux_poll_tasks))
-			P9_DPRINTK(P9_DEBUG_ERROR,
-					"warning: no free poll slots\n");
-	}
+	unsigned long flags;
+	int i;
 
-	n = (p9_mux_num + 1) / p9_mux_poll_task_num +
-	    ((p9_mux_num + 1) % p9_mux_poll_task_num ? 1 : 0);
-
-	vptlast = NULL;
-	for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) {
-		vpt = &p9_mux_poll_tasks[i];
-		if (vpt->task != NULL) {
-			vptlast = vpt;
-			if (vpt->muxnum < n) {
-				P9_DPRINTK(P9_DEBUG_MUX, "put in proc %d\n", i);
-				list_add(&m->mux_list, &vpt->mux_list);
-				vpt->muxnum++;
-				m->poll_task = vpt;
-				memset(&m->poll_waddr, 0,
-							sizeof(m->poll_waddr));
-				init_poll_funcptr(&m->pt, p9_pollwait);
-				break;
-			}
-		}
-	}
+	for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) {
+		struct p9_poll_wait *pwait = &m->poll_wait[i];
 
-	if (i >= ARRAY_SIZE(p9_mux_poll_tasks)) {
-		if (vptlast == NULL) {
-			mutex_unlock(&p9_mux_task_lock);
-			return -ENOMEM;
+		if (pwait->wait_addr) {
+			remove_wait_queue(pwait->wait_addr, &pwait->wait);
+			pwait->wait_addr = NULL;
 		}
-
-		P9_DPRINTK(P9_DEBUG_MUX, "put in proc %d\n", i);
-		list_add(&m->mux_list, &vptlast->mux_list);
-		vptlast->muxnum++;
-		m->poll_task = vptlast;
-		memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
-		init_poll_funcptr(&m->pt, p9_pollwait);
 	}
 
-	p9_mux_num++;
-	mutex_unlock(&p9_mux_task_lock);
-
-	return 0;
-}
-
-static void p9_mux_poll_stop(struct p9_conn *m)
-{
-	int i;
-	struct p9_mux_poll_task *vpt;
-
-	mutex_lock(&p9_mux_task_lock);
-	vpt = m->poll_task;
-	list_del(&m->mux_list);
-	for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) {
-		if (m->poll_waddr[i] != NULL) {
-			remove_wait_queue(m->poll_waddr[i], &m->poll_wait[i]);
-			m->poll_waddr[i] = NULL;
-		}
-	}
-	vpt->muxnum--;
-	if (!vpt->muxnum) {
-		P9_DPRINTK(P9_DEBUG_MUX, "destroy proc %p\n", vpt);
-		kthread_stop(vpt->task);
-		vpt->task = NULL;
-		p9_mux_poll_task_num--;
-	}
-	p9_mux_num--;
-	mutex_unlock(&p9_mux_task_lock);
+	spin_lock_irqsave(&p9_poll_lock, flags);
+	list_del_init(&m->poll_pending_link);
+	spin_unlock_irqrestore(&p9_poll_lock, flags);
 }
 
 /**
@@ -414,11 +302,8 @@ static struct p9_conn *p9_conn_create(struct p9_trans *trans)
 	INIT_LIST_HEAD(&m->unsent_req_list);
 	INIT_WORK(&m->rq, p9_read_work);
 	INIT_WORK(&m->wq, p9_write_work);
-	n = p9_mux_poll_start(m);
-	if (n) {
-		kfree(m);
-		return ERR_PTR(n);
-	}
+	INIT_LIST_HEAD(&m->poll_pending_link);
+	init_poll_funcptr(&m->pt, p9_pollwait);
 
 	n = p9_fd_poll(trans, &m->pt);
 	if (n & POLLIN) {
@@ -431,11 +316,12 @@ static struct p9_conn *p9_conn_create(struct p9_trans *trans)
 		set_bit(Wpending, &m->wsched);
 	}
 
-	for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) {
-		if (IS_ERR(m->poll_waddr[i])) {
+	for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) {
+		if (IS_ERR(m->poll_wait[i].wait_addr)) {
 			p9_mux_poll_stop(m);
 			kfree(m);
-			return (void *)m->poll_waddr;	/* the error code */
+			/* return the error code */
+			return (void *)m->poll_wait[i].wait_addr;
 		}
 	}
 
@@ -464,6 +350,23 @@ static void p9_conn_destroy(struct p9_conn *m)
 	kfree(m);
 }
 
+static int p9_pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
+{
+	struct p9_poll_wait *pwait =
+		container_of(wait, struct p9_poll_wait, wait);
+	struct p9_conn *m = pwait->conn;
+	unsigned long flags;
+	DECLARE_WAITQUEUE(dummy_wait, p9_poll_task);
+
+	spin_lock_irqsave(&p9_poll_lock, flags);
+	if (list_empty(&m->poll_pending_link))
+		list_add_tail(&m->poll_pending_link, &p9_poll_pending_list);
+	spin_unlock_irqrestore(&p9_poll_lock, flags);
+
+	/* perform the default wake up operation */
+	return default_wake_function(&dummy_wait, mode, sync, key);
+}
+
 /**
  * p9_pollwait - add poll task to the wait queue
  * @filp: file pointer being polled
@@ -476,29 +379,32 @@ static void p9_conn_destroy(struct p9_conn *m)
 static void
 p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p)
 {
+	struct p9_conn *m = container_of(p, struct p9_conn, pt);
+	struct p9_poll_wait *pwait = NULL;
 	int i;
-	struct p9_conn *m;
 
-	m = container_of(p, struct p9_conn, pt);
-	for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++)
-		if (m->poll_waddr[i] == NULL)
+	for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) {
+		if (m->poll_wait[i].wait_addr == NULL) {
+			pwait = &m->poll_wait[i];
 			break;
+		}
+	}
 
-	if (i >= ARRAY_SIZE(m->poll_waddr)) {
+	if (!pwait) {
 		P9_DPRINTK(P9_DEBUG_ERROR, "not enough wait_address slots\n");
 		return;
 	}
 
-	m->poll_waddr[i] = wait_address;
-
 	if (!wait_address) {
 		P9_DPRINTK(P9_DEBUG_ERROR, "no wait_address\n");
-		m->poll_waddr[i] = ERR_PTR(-EIO);
+		pwait->wait_addr = ERR_PTR(-EIO);
 		return;
 	}
 
-	init_waitqueue_entry(&m->poll_wait[i], m->poll_task->task);
-	add_wait_queue(wait_address, &m->poll_wait[i]);
+	pwait->conn = m;
+	pwait->wait_addr = wait_address;
+	init_waitqueue_func_entry(&pwait->wait, p9_pollwake);
+	add_wait_queue(wait_address, &pwait->wait);
 }
 
 /**
@@ -553,23 +459,34 @@ static void p9_poll_mux(struct p9_conn *m)
 
 static int p9_poll_proc(void *a)
 {
-	struct p9_conn *m, *mtmp;
-	struct p9_mux_poll_task *vpt;
+	unsigned long flags;
 
-	vpt = a;
-	P9_DPRINTK(P9_DEBUG_MUX, "start %p %p\n", current, vpt);
-	while (!kthread_should_stop()) {
-		set_current_state(TASK_INTERRUPTIBLE);
+	P9_DPRINTK(P9_DEBUG_MUX, "start %p\n", current);
+ repeat:
+	spin_lock_irqsave(&p9_poll_lock, flags);
+	while (!list_empty(&p9_poll_pending_list)) {
+		struct p9_conn *conn = list_first_entry(&p9_poll_pending_list,
+							struct p9_conn,
+							poll_pending_link);
+		list_del_init(&conn->poll_pending_link);
+		spin_unlock_irqrestore(&p9_poll_lock, flags);
 
-		list_for_each_entry_safe(m, mtmp, &vpt->mux_list, mux_list) {
-			p9_poll_mux(m);
-		}
+		p9_poll_mux(conn);
 
-		P9_DPRINTK(P9_DEBUG_MUX, "sleeping...\n");
-		schedule_timeout(SCHED_TIMEOUT * HZ);
+		spin_lock_irqsave(&p9_poll_lock, flags);
 	}
+	spin_unlock_irqrestore(&p9_poll_lock, flags);
 
+	set_current_state(TASK_INTERRUPTIBLE);
+	if (list_empty(&p9_poll_pending_list)) {
+		P9_DPRINTK(P9_DEBUG_MUX, "sleeping...\n");
+		schedule();
+	}
 	__set_current_state(TASK_RUNNING);
+
+	if (!kthread_should_stop())
+		goto repeat;
+
 	P9_DPRINTK(P9_DEBUG_MUX, "finish\n");
 	return 0;
 }
@@ -1602,17 +1519,19 @@ static struct p9_trans_module p9_fd_trans = {
 
 int p9_trans_fd_init(void)
 {
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++)
-		p9_mux_poll_tasks[i].task = NULL;
-
 	p9_mux_wq = create_workqueue("v9fs");
 	if (!p9_mux_wq) {
 		printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n");
 		return -ENOMEM;
 	}
 
+	p9_poll_task = kthread_run(p9_poll_proc, NULL, "v9fs-poll");
+	if (IS_ERR(p9_poll_task)) {
+		destroy_workqueue(p9_mux_wq);
+		printk(KERN_WARNING "v9fs: mux: creating poll task failed\n");
+		return PTR_ERR(p9_poll_task);
+	}
+
 	v9fs_register_trans(&p9_tcp_trans);
 	v9fs_register_trans(&p9_unix_trans);
 	v9fs_register_trans(&p9_fd_trans);
@@ -1622,6 +1541,7 @@ int p9_trans_fd_init(void)
 
 void p9_trans_fd_exit(void)
 {
+	kthread_stop(p9_poll_task);
 	v9fs_unregister_trans(&p9_tcp_trans);
 	v9fs_unregister_trans(&p9_unix_trans);
 	v9fs_unregister_trans(&p9_fd_trans);
-- 
GitLab


From 8b81ef589ad1483dd977ef47fe00d4ce4d91a0ab Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@ericvh-desktop.austin.ibm.com>
Date: Mon, 13 Oct 2008 18:45:25 -0500
Subject: [PATCH 545/895] 9p: consolidate transport structure

Right now there is a transport module structure which provides per-transport
type functions and data and a transport structure which contains per-instance
public data as well as function pointers to instance specific functions.

This patch moves public transport visible instance data to the client
structure (which in some cases had duplicate data) and consolidates the
functions into the transport module structure.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/v9fs.c               |   2 +-
 include/net/9p/client.h    |  19 +++-
 include/net/9p/transport.h |  55 ++--------
 net/9p/client.c            |  21 ++--
 net/9p/mod.c               |   1 +
 net/9p/trans_fd.c          | 205 +++++++++++++++++--------------------
 net/9p/trans_virtio.c      |  50 ++++-----
 7 files changed, 146 insertions(+), 207 deletions(-)

diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index c061c3f18e7c..b6b85cf01e0d 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -30,8 +30,8 @@
 #include <linux/parser.h>
 #include <linux/idr.h>
 #include <net/9p/9p.h>
-#include <net/9p/transport.h>
 #include <net/9p/client.h>
+#include <net/9p/transport.h>
 #include "v9fs.h"
 #include "v9fs_vfs.h"
 
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index c936dd14de41..c35fb548e7cf 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -26,6 +26,22 @@
 #ifndef NET_9P_CLIENT_H
 #define NET_9P_CLIENT_H
 
+/**
+ * enum p9_trans_status - different states of underlying transports
+ * @Connected: transport is connected and healthy
+ * @Disconnected: transport has been disconnected
+ * @Hung: transport is connected by wedged
+ *
+ * This enumeration details the various states a transport
+ * instatiation can be in.
+ */
+
+enum p9_trans_status {
+	Connected,
+	Disconnected,
+	Hung,
+};
+
 /**
  * struct p9_client - per client instance state
  * @lock: protect @fidlist
@@ -48,7 +64,8 @@ struct p9_client {
 	int msize;
 	unsigned char dotu;
 	struct p9_trans_module *trans_mod;
-	struct p9_trans *trans;
+	enum p9_trans_status status;
+	void *trans;
 	struct p9_conn *conn;
 
 	struct p9_idpool *fidpool;
diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h
index 3ca737120a90..3e0f2f6beba2 100644
--- a/include/net/9p/transport.h
+++ b/include/net/9p/transport.h
@@ -26,52 +26,6 @@
 #ifndef NET_9P_TRANSPORT_H
 #define NET_9P_TRANSPORT_H
 
-#include <linux/module.h>
-
-/**
- * enum p9_trans_status - different states of underlying transports
- * @Connected: transport is connected and healthy
- * @Disconnected: transport has been disconnected
- * @Hung: transport is connected by wedged
- *
- * This enumeration details the various states a transport
- * instatiation can be in.
- */
-
-enum p9_trans_status {
-	Connected,
-	Disconnected,
-	Hung,
-};
-
-/**
- * struct p9_trans - per-transport state and API
- * @status: transport &p9_trans_status
- * @msize: negotiated maximum packet size (duplicate from client)
- * @extended: negotiated protocol extensions (duplicate from client)
- * @priv: transport private data
- * @close: member function to disconnect and close the transport
- * @rpc: member function to issue a request to the transport
- *
- * This is the basic API for a transport instance.  It is used as
- * a handle by the client to issue requests.  This interface is currently
- * in flux during reorganization.
- *
- * Bugs: there is lots of duplicated data here and its not clear that
- * the member functions need to be per-instance versus per transport
- * module.
- */
-
-struct p9_trans {
-	enum p9_trans_status status;
-	int msize;
-	unsigned char extended;
-	void *priv;
-	void (*close) (struct p9_trans *);
-	int (*rpc) (struct p9_trans *t, struct p9_fcall *tc,
-							struct p9_fcall **rc);
-};
-
 /**
  * struct p9_trans_module - transport module interface
  * @list: used to maintain a list of currently available transports
@@ -79,12 +33,14 @@ struct p9_trans {
  * @maxsize: transport provided maximum packet size
  * @def: set if this transport should be considered the default
  * @create: member function to create a new connection on this transport
+ * @close: member function to disconnect and close the transport
+ * @rpc: member function to issue a request to the transport
  *
  * This is the basic API for a transport module which is registered by the
  * transport module with the 9P core network module and used by the client
  * to instantiate a new connection on a transport.
  *
- * Bugs: the transport module list isn't protected.
+ * BUGS: the transport module list isn't protected.
  */
 
 struct p9_trans_module {
@@ -92,8 +48,11 @@ struct p9_trans_module {
 	char *name;		/* name of transport */
 	int maxsize;		/* max message size of transport */
 	int def;		/* this transport should be default */
-	struct p9_trans * (*create)(const char *, char *, int, unsigned char);
 	struct module *owner;
+	int (*create)(struct p9_client *, const char *, char *);
+	void (*close) (struct p9_client *);
+	int (*rpc) (struct p9_client *t, struct p9_fcall *tc,
+							struct p9_fcall **rc);
 };
 
 void v9fs_register_trans(struct p9_trans_module *m);
diff --git a/net/9p/client.c b/net/9p/client.c
index e053e06028a5..f1a52a7ed724 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -33,8 +33,8 @@
 #include <linux/uaccess.h>
 #include <net/9p/9p.h>
 #include <linux/parser.h>
-#include <net/9p/transport.h>
 #include <net/9p/client.h>
+#include <net/9p/transport.h>
 
 static struct p9_fid *p9_fid_create(struct p9_client *clnt);
 static void p9_fid_destroy(struct p9_fid *fid);
@@ -136,7 +136,7 @@ int
 p9_client_rpc(struct p9_client *c, struct p9_fcall *tc,
 	struct p9_fcall **rc)
 {
-	return c->trans->rpc(c->trans, tc, rc);
+	return c->trans_mod->rpc(c, tc, rc);
 }
 
 struct p9_client *p9_client_create(const char *dev_name, char *options)
@@ -179,13 +179,9 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
 		clnt, clnt->trans_mod, clnt->msize, clnt->dotu);
 
 
-	clnt->trans = clnt->trans_mod->create(dev_name, options, clnt->msize,
-								clnt->dotu);
-	if (IS_ERR(clnt->trans)) {
-		err = PTR_ERR(clnt->trans);
-		clnt->trans = NULL;
+	err = clnt->trans_mod->create(clnt, dev_name, options);
+	if (err)
 		goto error;
-	}
 
 	if ((clnt->msize+P9_IOHDRSZ) > clnt->trans_mod->maxsize)
 		clnt->msize = clnt->trans_mod->maxsize-P9_IOHDRSZ;
@@ -233,11 +229,8 @@ void p9_client_destroy(struct p9_client *clnt)
 
 	P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt);
 
-	if (clnt->trans) {
-		clnt->trans->close(clnt->trans);
-		kfree(clnt->trans);
-		clnt->trans = NULL;
-	}
+	if (clnt->trans_mod)
+		clnt->trans_mod->close(clnt);
 
 	v9fs_put_trans(clnt->trans_mod);
 
@@ -254,7 +247,7 @@ EXPORT_SYMBOL(p9_client_destroy);
 void p9_client_disconnect(struct p9_client *clnt)
 {
 	P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt);
-	clnt->trans->status = Disconnected;
+	clnt->status = Disconnected;
 }
 EXPORT_SYMBOL(p9_client_disconnect);
 
diff --git a/net/9p/mod.c b/net/9p/mod.c
index 1084feb24cb0..cf8a4128cd5c 100644
--- a/net/9p/mod.c
+++ b/net/9p/mod.c
@@ -29,6 +29,7 @@
 #include <net/9p/9p.h>
 #include <linux/fs.h>
 #include <linux/parser.h>
+#include <net/9p/client.h>
 #include <net/9p/transport.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index f84592345573..d09389f08382 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -39,6 +39,7 @@
 #include <linux/file.h>
 #include <linux/parser.h>
 #include <net/9p/9p.h>
+#include <net/9p/client.h>
 #include <net/9p/transport.h>
 
 #define P9_PORT 564
@@ -146,7 +147,7 @@ struct p9_poll_wait {
  * @mux_list: list link for mux to manage multiple connections (?)
  * @msize: maximum size for connection (dup)
  * @extended: 9p2000.u flag (dup)
- * @trans: reference to transport instance for this connection
+ * @client: reference to client instance for this connection
  * @tagpool: id accounting for transactions
  * @err: error state
  * @req_list: accounting for requests which have been sent
@@ -171,7 +172,7 @@ struct p9_conn {
 	struct list_head mux_list;
 	int msize;
 	unsigned char extended;
-	struct p9_trans *trans;
+	struct p9_client *client;
 	struct p9_idpool *tagpool;
 	int err;
 	struct list_head req_list;
@@ -214,8 +215,8 @@ static void p9_read_work(struct work_struct *work);
 static void p9_write_work(struct work_struct *work);
 static void p9_pollwait(struct file *filp, wait_queue_head_t *wait_address,
 								poll_table *p);
-static int p9_fd_write(struct p9_trans *trans, void *v, int len);
-static int p9_fd_read(struct p9_trans *trans, void *v, int len);
+static int p9_fd_write(struct p9_client *client, void *v, int len);
+static int p9_fd_read(struct p9_client *client, void *v, int len);
 
 static DEFINE_SPINLOCK(p9_poll_lock);
 static LIST_HEAD(p9_poll_pending_list);
@@ -223,7 +224,7 @@ static struct workqueue_struct *p9_mux_wq;
 static struct task_struct *p9_poll_task;
 
 static void p9_conn_destroy(struct p9_conn *);
-static unsigned int p9_fd_poll(struct p9_trans *trans,
+static unsigned int p9_fd_poll(struct p9_client *client,
 						struct poll_table_struct *pt);
 
 #ifdef P9_NONBLOCK
@@ -271,27 +272,26 @@ static void p9_mux_poll_stop(struct p9_conn *m)
 
 /**
  * p9_conn_create - allocate and initialize the per-session mux data
- * @trans: transport structure
+ * @client: client instance
  *
  * Note: Creates the polling task if this is the first session.
  */
 
-static struct p9_conn *p9_conn_create(struct p9_trans *trans)
+static struct p9_conn *p9_conn_create(struct p9_client *client)
 {
 	int i, n;
 	struct p9_conn *m;
 
-	P9_DPRINTK(P9_DEBUG_MUX, "transport %p msize %d\n", trans,
-								trans->msize);
+	P9_DPRINTK(P9_DEBUG_MUX, "client %p msize %d\n", client, client->msize);
 	m = kzalloc(sizeof(struct p9_conn), GFP_KERNEL);
 	if (!m)
 		return ERR_PTR(-ENOMEM);
 
 	spin_lock_init(&m->lock);
 	INIT_LIST_HEAD(&m->mux_list);
-	m->msize = trans->msize;
-	m->extended = trans->extended;
-	m->trans = trans;
+	m->msize = client->msize;
+	m->extended = client->dotu;
+	m->client = client;
 	m->tagpool = p9_idpool_create();
 	if (IS_ERR(m->tagpool)) {
 		kfree(m);
@@ -305,7 +305,7 @@ static struct p9_conn *p9_conn_create(struct p9_trans *trans)
 	INIT_LIST_HEAD(&m->poll_pending_link);
 	init_poll_funcptr(&m->pt, p9_pollwait);
 
-	n = p9_fd_poll(trans, &m->pt);
+	n = p9_fd_poll(client, &m->pt);
 	if (n & POLLIN) {
 		P9_DPRINTK(P9_DEBUG_MUX, "mux %p can read\n", m);
 		set_bit(Rpending, &m->wsched);
@@ -345,7 +345,7 @@ static void p9_conn_destroy(struct p9_conn *m)
 
 	p9_conn_cancel(m, -ECONNRESET);
 
-	m->trans = NULL;
+	m->client = NULL;
 	p9_idpool_destroy(m->tagpool);
 	kfree(m);
 }
@@ -420,7 +420,7 @@ static void p9_poll_mux(struct p9_conn *m)
 	if (m->err < 0)
 		return;
 
-	n = p9_fd_poll(m->trans, NULL);
+	n = p9_fd_poll(m->client, NULL);
 	if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) {
 		P9_DPRINTK(P9_DEBUG_MUX, "error mux %p err %d\n", m, n);
 		if (n >= 0)
@@ -533,7 +533,7 @@ again:
 	P9_DPRINTK(P9_DEBUG_MUX, "mux %p pos %d size %d\n", m, m->wpos,
 								m->wsize);
 	clear_bit(Wpending, &m->wsched);
-	err = p9_fd_write(m->trans, m->wbuf + m->wpos, m->wsize - m->wpos);
+	err = p9_fd_write(m->client, m->wbuf + m->wpos, m->wsize - m->wpos);
 	P9_DPRINTK(P9_DEBUG_MUX, "mux %p sent %d bytes\n", m, err);
 	if (err == -EAGAIN) {
 		clear_bit(Wworksched, &m->wsched);
@@ -555,7 +555,7 @@ again:
 		if (test_and_clear_bit(Wpending, &m->wsched))
 			n = POLLOUT;
 		else
-			n = p9_fd_poll(m->trans, NULL);
+			n = p9_fd_poll(m->client, NULL);
 
 		if (n & POLLOUT) {
 			P9_DPRINTK(P9_DEBUG_MUX, "schedule write work %p\n", m);
@@ -640,7 +640,7 @@ static void p9_read_work(struct work_struct *work)
 	}
 
 	clear_bit(Rpending, &m->wsched);
-	err = p9_fd_read(m->trans, m->rbuf + m->rpos, m->msize - m->rpos);
+	err = p9_fd_read(m->client, m->rbuf + m->rpos, m->msize - m->rpos);
 	P9_DPRINTK(P9_DEBUG_MUX, "mux %p got %d bytes\n", m, err);
 	if (err == -EAGAIN) {
 		clear_bit(Rworksched, &m->wsched);
@@ -735,7 +735,7 @@ static void p9_read_work(struct work_struct *work)
 		if (test_and_clear_bit(Rpending, &m->wsched))
 			n = POLLIN;
 		else
-			n = p9_fd_poll(m->trans, NULL);
+			n = p9_fd_poll(m->client, NULL);
 
 		if (n & POLLIN) {
 			P9_DPRINTK(P9_DEBUG_MUX, "schedule read work %p\n", m);
@@ -819,7 +819,7 @@ static struct p9_req *p9_send_request(struct p9_conn *m,
 	if (test_and_clear_bit(Wpending, &m->wsched))
 		n = POLLOUT;
 	else
-		n = p9_fd_poll(m->trans, NULL);
+		n = p9_fd_poll(m->client, NULL);
 
 	if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched))
 		queue_work(p9_mux_wq, &m->wq);
@@ -933,16 +933,16 @@ p9_conn_rpc_cb(struct p9_req *req, void *a)
 /**
  * p9_fd_rpc- sends 9P request and waits until a response is available.
  *	The function can be interrupted.
- * @t: transport data
+ * @client: client instance
  * @tc: request to be sent
  * @rc: pointer where a pointer to the response is stored
  *
  */
 
 int
-p9_fd_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc)
+p9_fd_rpc(struct p9_client *client, struct p9_fcall *tc, struct p9_fcall **rc)
 {
-	struct p9_trans_fd *p = t->priv;
+	struct p9_trans_fd *p = client->trans;
 	struct p9_conn *m = p->conn;
 	int err, sigpending;
 	unsigned long flags;
@@ -975,7 +975,7 @@ p9_fd_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc)
 	if (r.err < 0)
 		err = r.err;
 
-	if (err == -ERESTARTSYS && m->trans->status == Connected
+	if (err == -ERESTARTSYS && client->status == Connected
 							&& m->err == 0) {
 		if (p9_mux_flush_request(m, req)) {
 			/* wait until we get response of the flush message */
@@ -984,7 +984,7 @@ p9_fd_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc)
 				err = wait_event_interruptible(r.wqueue,
 					r.rcall || r.err);
 			} while (!r.rcall && !r.err && err == -ERESTARTSYS &&
-				m->trans->status == Connected && !m->err);
+				client->status == Connected && !m->err);
 
 			err = -ERESTARTSYS;
 		}
@@ -1133,7 +1133,7 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
 	return 0;
 }
 
-static int p9_fd_open(struct p9_trans *trans, int rfd, int wfd)
+static int p9_fd_open(struct p9_client *client, int rfd, int wfd)
 {
 	struct p9_trans_fd *ts = kmalloc(sizeof(struct p9_trans_fd),
 					   GFP_KERNEL);
@@ -1151,13 +1151,13 @@ static int p9_fd_open(struct p9_trans *trans, int rfd, int wfd)
 		return -EIO;
 	}
 
-	trans->priv = ts;
-	trans->status = Connected;
+	client->trans = ts;
+	client->status = Connected;
 
 	return 0;
 }
 
-static int p9_socket_open(struct p9_trans *trans, struct socket *csocket)
+static int p9_socket_open(struct p9_client *client, struct socket *csocket)
 {
 	int fd, ret;
 
@@ -1168,33 +1168,33 @@ static int p9_socket_open(struct p9_trans *trans, struct socket *csocket)
 		return fd;
 	}
 
-	ret = p9_fd_open(trans, fd, fd);
+	ret = p9_fd_open(client, fd, fd);
 	if (ret < 0) {
 		P9_EPRINTK(KERN_ERR, "p9_socket_open: failed to open fd\n");
 		sockfd_put(csocket);
 		return ret;
 	}
 
-	((struct p9_trans_fd *)trans->priv)->rd->f_flags |= O_NONBLOCK;
+	((struct p9_trans_fd *)client->trans)->rd->f_flags |= O_NONBLOCK;
 
 	return 0;
 }
 
 /**
  * p9_fd_read- read from a fd
- * @trans: transport instance state
+ * @client: client instance
  * @v: buffer to receive data into
  * @len: size of receive buffer
  *
  */
 
-static int p9_fd_read(struct p9_trans *trans, void *v, int len)
+static int p9_fd_read(struct p9_client *client, void *v, int len)
 {
 	int ret;
 	struct p9_trans_fd *ts = NULL;
 
-	if (trans && trans->status != Disconnected)
-		ts = trans->priv;
+	if (client && client->status != Disconnected)
+		ts = client->trans;
 
 	if (!ts)
 		return -EREMOTEIO;
@@ -1204,26 +1204,26 @@ static int p9_fd_read(struct p9_trans *trans, void *v, int len)
 
 	ret = kernel_read(ts->rd, ts->rd->f_pos, v, len);
 	if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
-		trans->status = Disconnected;
+		client->status = Disconnected;
 	return ret;
 }
 
 /**
  * p9_fd_write - write to a socket
- * @trans: transport instance state
+ * @client: client instance
  * @v: buffer to send data from
  * @len: size of send buffer
  *
  */
 
-static int p9_fd_write(struct p9_trans *trans, void *v, int len)
+static int p9_fd_write(struct p9_client *client, void *v, int len)
 {
 	int ret;
 	mm_segment_t oldfs;
 	struct p9_trans_fd *ts = NULL;
 
-	if (trans && trans->status != Disconnected)
-		ts = trans->priv;
+	if (client && client->status != Disconnected)
+		ts = client->trans;
 
 	if (!ts)
 		return -EREMOTEIO;
@@ -1238,18 +1238,18 @@ static int p9_fd_write(struct p9_trans *trans, void *v, int len)
 	set_fs(oldfs);
 
 	if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
-		trans->status = Disconnected;
+		client->status = Disconnected;
 	return ret;
 }
 
 static unsigned int
-p9_fd_poll(struct p9_trans *trans, struct poll_table_struct *pt)
+p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt)
 {
 	int ret, n;
 	struct p9_trans_fd *ts = NULL;
 
-	if (trans && trans->status == Connected)
-		ts = trans->priv;
+	if (client && client->status == Connected)
+		ts = client->trans;
 
 	if (!ts)
 		return -EREMOTEIO;
@@ -1275,30 +1275,31 @@ p9_fd_poll(struct p9_trans *trans, struct poll_table_struct *pt)
 }
 
 /**
- * p9_fd_close - shutdown socket
- * @trans: private socket structure
+ * p9_fd_close - shutdown file descriptor transport
+ * @client: client instance
  *
  */
 
-static void p9_fd_close(struct p9_trans *trans)
+static void p9_fd_close(struct p9_client *client)
 {
 	struct p9_trans_fd *ts;
 
-	if (!trans)
+	if (!client)
 		return;
 
-	ts = xchg(&trans->priv, NULL);
-
+	ts = client->trans;
 	if (!ts)
 		return;
 
+	client->status = Disconnected;
+
 	p9_conn_destroy(ts->conn);
 
-	trans->status = Disconnected;
 	if (ts->rd)
 		fput(ts->rd);
 	if (ts->wr)
 		fput(ts->wr);
+
 	kfree(ts);
 }
 
@@ -1319,31 +1320,23 @@ static inline int valid_ipaddr4(const char *buf)
 	return 0;
 }
 
-static struct p9_trans *
-p9_trans_create_tcp(const char *addr, char *args, int msize, unsigned char dotu)
+static int
+p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
 {
 	int err;
-	struct p9_trans *trans;
 	struct socket *csocket;
 	struct sockaddr_in sin_server;
 	struct p9_fd_opts opts;
-	struct p9_trans_fd *p;
+	struct p9_trans_fd *p = NULL; /* this gets allocated in p9_fd_open */
 
 	err = parse_opts(args, &opts);
 	if (err < 0)
-		return ERR_PTR(err);
+		return err;
 
 	if (valid_ipaddr4(addr) < 0)
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 
 	csocket = NULL;
-	trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL);
-	if (!trans)
-		return ERR_PTR(-ENOMEM);
-	trans->msize = msize;
-	trans->extended = dotu;
-	trans->rpc = p9_fd_rpc;
-	trans->close = p9_fd_close;
 
 	sin_server.sin_family = AF_INET;
 	sin_server.sin_addr.s_addr = in_aton(addr);
@@ -1366,45 +1359,38 @@ p9_trans_create_tcp(const char *addr, char *args, int msize, unsigned char dotu)
 		goto error;
 	}
 
-	err = p9_socket_open(trans, csocket);
+	err = p9_socket_open(client, csocket);
 	if (err < 0)
 		goto error;
 
-	p = (struct p9_trans_fd *) trans->priv;
-	p->conn = p9_conn_create(trans);
+	p = (struct p9_trans_fd *) client->trans;
+	p->conn = p9_conn_create(client);
 	if (IS_ERR(p->conn)) {
 		err = PTR_ERR(p->conn);
 		p->conn = NULL;
 		goto error;
 	}
 
-	return trans;
+	return 0;
 
 error:
 	if (csocket)
 		sock_release(csocket);
 
-	kfree(trans);
-	return ERR_PTR(err);
+	kfree(p);
+
+	return err;
 }
 
-static struct p9_trans *
-p9_trans_create_unix(const char *addr, char *args, int msize,
-							unsigned char dotu)
+static int
+p9_fd_create_unix(struct p9_client *client, const char *addr, char *args)
 {
 	int err;
 	struct socket *csocket;
 	struct sockaddr_un sun_server;
-	struct p9_trans *trans;
-	struct p9_trans_fd *p;
+	struct p9_trans_fd *p = NULL; /* this gets allocated in p9_fd_open */
 
 	csocket = NULL;
-	trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL);
-	if (!trans)
-		return ERR_PTR(-ENOMEM);
-
-	trans->rpc = p9_fd_rpc;
-	trans->close = p9_fd_close;
 
 	if (strlen(addr) > UNIX_PATH_MAX) {
 		P9_EPRINTK(KERN_ERR, "p9_trans_unix: address too long: %s\n",
@@ -1425,79 +1411,68 @@ p9_trans_create_unix(const char *addr, char *args, int msize,
 		goto error;
 	}
 
-	err = p9_socket_open(trans, csocket);
+	err = p9_socket_open(client, csocket);
 	if (err < 0)
 		goto error;
 
-	trans->msize = msize;
-	trans->extended = dotu;
-	p = (struct p9_trans_fd *) trans->priv;
-	p->conn = p9_conn_create(trans);
+	p = (struct p9_trans_fd *) client->trans;
+	p->conn = p9_conn_create(client);
 	if (IS_ERR(p->conn)) {
 		err = PTR_ERR(p->conn);
 		p->conn = NULL;
 		goto error;
 	}
 
-	return trans;
+	return 0;
 
 error:
 	if (csocket)
 		sock_release(csocket);
 
-	kfree(trans);
-	return ERR_PTR(err);
+	kfree(p);
+	return err;
 }
 
-static struct p9_trans *
-p9_trans_create_fd(const char *name, char *args, int msize,
-							unsigned char extended)
+static int
+p9_fd_create(struct p9_client *client, const char *addr, char *args)
 {
 	int err;
-	struct p9_trans *trans;
 	struct p9_fd_opts opts;
-	struct p9_trans_fd *p;
+	struct p9_trans_fd *p = NULL; /* this get allocated in p9_fd_open */
 
 	parse_opts(args, &opts);
 
 	if (opts.rfd == ~0 || opts.wfd == ~0) {
 		printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n");
-		return ERR_PTR(-ENOPROTOOPT);
+		return -ENOPROTOOPT;
 	}
 
-	trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL);
-	if (!trans)
-		return ERR_PTR(-ENOMEM);
-
-	trans->rpc = p9_fd_rpc;
-	trans->close = p9_fd_close;
-
-	err = p9_fd_open(trans, opts.rfd, opts.wfd);
+	err = p9_fd_open(client, opts.rfd, opts.wfd);
 	if (err < 0)
 		goto error;
 
-	trans->msize = msize;
-	trans->extended = extended;
-	p = (struct p9_trans_fd *) trans->priv;
-	p->conn = p9_conn_create(trans);
+	p = (struct p9_trans_fd *) client->trans;
+	p->conn = p9_conn_create(client);
 	if (IS_ERR(p->conn)) {
 		err = PTR_ERR(p->conn);
 		p->conn = NULL;
 		goto error;
 	}
 
-	return trans;
+	return 0;
 
 error:
-	kfree(trans);
-	return ERR_PTR(err);
+	kfree(p);
+	return err;
 }
 
 static struct p9_trans_module p9_tcp_trans = {
 	.name = "tcp",
 	.maxsize = MAX_SOCK_BUF,
 	.def = 1,
-	.create = p9_trans_create_tcp,
+	.create = p9_fd_create_tcp,
+	.close = p9_fd_close,
+	.rpc = p9_fd_rpc,
 	.owner = THIS_MODULE,
 };
 
@@ -1505,7 +1480,9 @@ static struct p9_trans_module p9_unix_trans = {
 	.name = "unix",
 	.maxsize = MAX_SOCK_BUF,
 	.def = 0,
-	.create = p9_trans_create_unix,
+	.create = p9_fd_create_unix,
+	.close = p9_fd_close,
+	.rpc = p9_fd_rpc,
 	.owner = THIS_MODULE,
 };
 
@@ -1513,7 +1490,9 @@ static struct p9_trans_module p9_fd_trans = {
 	.name = "fd",
 	.maxsize = MAX_SOCK_BUF,
 	.def = 0,
-	.create = p9_trans_create_fd,
+	.create = p9_fd_create,
+	.close = p9_fd_close,
+	.rpc = p9_fd_rpc,
 	.owner = THIS_MODULE,
 };
 
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 94912e077a55..72493f04a76d 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -41,6 +41,7 @@
 #include <linux/file.h>
 #include <net/9p/9p.h>
 #include <linux/parser.h>
+#include <net/9p/client.h>
 #include <net/9p/transport.h>
 #include <linux/scatterlist.h>
 #include <linux/virtio.h>
@@ -55,7 +56,6 @@ static int chan_index;
 
 #define P9_INIT_MAXTAG	16
 
-
 /**
  * enum p9_req_status_t - virtio request status
  * @REQ_STATUS_IDLE: request slot unused
@@ -197,9 +197,9 @@ static unsigned int rest_of_page(void *data)
  *
  */
 
-static void p9_virtio_close(struct p9_trans *trans)
+static void p9_virtio_close(struct p9_client *client)
 {
-	struct virtio_chan *chan = trans->priv;
+	struct virtio_chan *chan = client->trans;
 	int count;
 	unsigned long flags;
 
@@ -215,7 +215,7 @@ static void p9_virtio_close(struct p9_trans *trans)
 	chan->inuse = false;
 	mutex_unlock(&virtio_9p_lock);
 
-	kfree(trans);
+	client->trans = NULL;
 }
 
 /**
@@ -292,17 +292,17 @@ pack_sg_list(struct scatterlist *sg, int start, int limit, char *data,
  */
 
 static int
-p9_virtio_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc)
+p9_virtio_rpc(struct p9_client *c, struct p9_fcall *tc, struct p9_fcall **rc)
 {
 	int in, out;
 	int n, err, size;
-	struct virtio_chan *chan = t->priv;
+	struct virtio_chan *chan = c->trans;
 	char *rdata;
 	struct p9_req_t *req;
 	unsigned long flags;
 
 	if (*rc == NULL) {
-		*rc = kmalloc(sizeof(struct p9_fcall) + t->msize, GFP_KERNEL);
+		*rc = kmalloc(sizeof(struct p9_fcall) + c->msize, GFP_KERNEL);
 		if (!*rc)
 			return -ENOMEM;
 	}
@@ -325,7 +325,7 @@ p9_virtio_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc)
 	P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio rpc tag %d\n", n);
 
 	out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, tc->sdata, tc->size);
-	in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata, t->msize);
+	in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata, c->msize);
 
 	req->status = REQ_STATUS_SENT;
 
@@ -341,7 +341,7 @@ p9_virtio_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc)
 
 	size = le32_to_cpu(*(__le32 *) rdata);
 
-	err = p9_deserialize_fcall(rdata, size, *rc, t->extended);
+	err = p9_deserialize_fcall(rdata, size, *rc, c->dotu);
 	if (err < 0) {
 		P9_DPRINTK(P9_DEBUG_TRANS,
 			"9p debug: virtio rpc deserialize returned %d\n", err);
@@ -352,8 +352,8 @@ p9_virtio_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc)
 	if ((p9_debug_level&P9_DEBUG_FCALL) == P9_DEBUG_FCALL) {
 		char buf[150];
 
-		p9_printfcall(buf, sizeof(buf), *rc, t->extended);
-		printk(KERN_NOTICE ">>> %p %s\n", t, buf);
+		p9_printfcall(buf, sizeof(buf), *rc, c->dotu);
+		printk(KERN_NOTICE ">>> %p %s\n", c, buf);
 	}
 #endif
 
@@ -422,10 +422,9 @@ fail:
 
 /**
  * p9_virtio_create - allocate a new virtio channel
+ * @client: client instance invoking this transport
  * @devname: string identifying the channel to connect to (unused)
  * @args: args passed from sys_mount() for per-transport options (unused)
- * @msize: requested maximum packet size
- * @extended: 9p2000.u enabled flag
  *
  * This sets up a transport channel for 9p communication.  Right now
  * we only match the first available channel, but eventually we couldlook up
@@ -441,11 +440,9 @@ fail:
  *
  */
 
-static struct p9_trans *
-p9_virtio_create(const char *devname, char *args, int msize,
-							unsigned char extended)
+static int
+p9_virtio_create(struct p9_client *client, const char *devname, char *args)
 {
-	struct p9_trans *trans;
 	struct virtio_chan *chan = channels;
 	int index = 0;
 
@@ -463,30 +460,21 @@ p9_virtio_create(const char *devname, char *args, int msize,
 
 	if (index >= MAX_9P_CHAN) {
 		printk(KERN_ERR "9p: no channels available\n");
-		return ERR_PTR(-ENODEV);
+		return -ENODEV;
 	}
 
 	chan->tagpool = p9_idpool_create();
 	if (IS_ERR(chan->tagpool)) {
 		printk(KERN_ERR "9p: couldn't allocate tagpool\n");
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 	}
 	p9_idpool_get(chan->tagpool); /* reserve tag 0 */
 	chan->max_tag = 0;
 	chan->reqs = NULL;
 
-	trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL);
-	if (!trans) {
-		printk(KERN_ERR "9p: couldn't allocate transport\n");
-		return ERR_PTR(-ENOMEM);
-	}
-	trans->extended = extended;
-	trans->msize = msize;
-	trans->close = p9_virtio_close;
-	trans->rpc = p9_virtio_rpc;
-	trans->priv = chan;
+	client->trans = (void *)chan;
 
-	return trans;
+	return 0;
 }
 
 /**
@@ -526,6 +514,8 @@ static struct virtio_driver p9_virtio_drv = {
 static struct p9_trans_module p9_virtio_trans = {
 	.name = "virtio",
 	.create = p9_virtio_create,
+	.close = p9_virtio_close,
+	.rpc = p9_virtio_rpc,
 	.maxsize = PAGE_SIZE*16,
 	.def = 0,
 	.owner = THIS_MODULE,
-- 
GitLab


From bead27f0a87f4055f6a0fd69ded9eacced485618 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@ericvh-desktop.austin.ibm.com>
Date: Mon, 13 Oct 2008 18:45:24 -0500
Subject: [PATCH 546/895] 9p: remove duplicate client state

Now that we are passing client state into the transport modules, remove
duplicate state which is present in transport private structures.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_fd.c | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index d09389f08382..bc5b6965981b 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -145,8 +145,6 @@ struct p9_poll_wait {
  * struct p9_conn - fd mux connection state information
  * @lock: protects mux_list (?)
  * @mux_list: list link for mux to manage multiple connections (?)
- * @msize: maximum size for connection (dup)
- * @extended: 9p2000.u flag (dup)
  * @client: reference to client instance for this connection
  * @tagpool: id accounting for transactions
  * @err: error state
@@ -170,8 +168,6 @@ struct p9_poll_wait {
 struct p9_conn {
 	spinlock_t lock; /* protect lock structure */
 	struct list_head mux_list;
-	int msize;
-	unsigned char extended;
 	struct p9_client *client;
 	struct p9_idpool *tagpool;
 	int err;
@@ -289,8 +285,6 @@ static struct p9_conn *p9_conn_create(struct p9_client *client)
 
 	spin_lock_init(&m->lock);
 	INIT_LIST_HEAD(&m->mux_list);
-	m->msize = client->msize;
-	m->extended = client->dotu;
 	m->client = client;
 	m->tagpool = p9_idpool_create();
 	if (IS_ERR(m->tagpool)) {
@@ -584,7 +578,7 @@ static void process_request(struct p9_conn *m, struct p9_req *req)
 		P9_DPRINTK(P9_DEBUG_MUX, "Rerror %.*s\n", ename->len,
 								ename->str);
 
-		if (m->extended)
+		if (m->client->dotu)
 			req->err = -ecode;
 
 		if (!req->err) {
@@ -629,7 +623,8 @@ static void p9_read_work(struct work_struct *work)
 
 	if (!m->rcall) {
 		m->rcall =
-		    kmalloc(sizeof(struct p9_fcall) + m->msize, GFP_KERNEL);
+		    kmalloc(sizeof(struct p9_fcall) + m->client->msize,
+								GFP_KERNEL);
 		if (!m->rcall) {
 			err = -ENOMEM;
 			goto error;
@@ -640,7 +635,8 @@ static void p9_read_work(struct work_struct *work)
 	}
 
 	clear_bit(Rpending, &m->wsched);
-	err = p9_fd_read(m->client, m->rbuf + m->rpos, m->msize - m->rpos);
+	err = p9_fd_read(m->client, m->rbuf + m->rpos,
+						m->client->msize - m->rpos);
 	P9_DPRINTK(P9_DEBUG_MUX, "mux %p got %d bytes\n", m, err);
 	if (err == -EAGAIN) {
 		clear_bit(Rworksched, &m->wsched);
@@ -653,7 +649,7 @@ static void p9_read_work(struct work_struct *work)
 	m->rpos += err;
 	while (m->rpos > 4) {
 		n = le32_to_cpu(*(__le32 *) m->rbuf);
-		if (n >= m->msize) {
+		if (n >= m->client->msize) {
 			P9_DPRINTK(P9_DEBUG_ERROR,
 				"requested packet size too big: %d\n", n);
 			err = -EIO;
@@ -664,7 +660,7 @@ static void p9_read_work(struct work_struct *work)
 			break;
 
 		err =
-		    p9_deserialize_fcall(m->rbuf, n, m->rcall, m->extended);
+		    p9_deserialize_fcall(m->rbuf, n, m->rcall, m->client->dotu);
 		if (err < 0)
 			goto error;
 
@@ -673,7 +669,7 @@ static void p9_read_work(struct work_struct *work)
 			char buf[150];
 
 			p9_printfcall(buf, sizeof(buf), m->rcall,
-				m->extended);
+				m->client->dotu);
 			printk(KERN_NOTICE ">>> %p %s\n", m, buf);
 		}
 #endif
@@ -681,8 +677,8 @@ static void p9_read_work(struct work_struct *work)
 		rcall = m->rcall;
 		rbuf = m->rbuf;
 		if (m->rpos > n) {
-			m->rcall = kmalloc(sizeof(struct p9_fcall) + m->msize,
-					   GFP_KERNEL);
+			m->rcall = kmalloc(sizeof(struct p9_fcall) +
+						m->client->msize, GFP_KERNEL);
 			if (!m->rcall) {
 				err = -ENOMEM;
 				goto error;
@@ -798,7 +794,7 @@ static struct p9_req *p9_send_request(struct p9_conn *m,
 	if ((p9_debug_level&P9_DEBUG_FCALL) == P9_DEBUG_FCALL) {
 		char buf[150];
 
-		p9_printfcall(buf, sizeof(buf), tc, m->extended);
+		p9_printfcall(buf, sizeof(buf), tc, m->client->dotu);
 		printk(KERN_NOTICE "<<< %p %s\n", m, buf);
 	}
 #endif
-- 
GitLab


From 5503ac565998837350f3ee1cc344c36143ea2386 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@ericvh-desktop.austin.ibm.com>
Date: Mon, 13 Oct 2008 18:45:24 -0500
Subject: [PATCH 547/895] 9p: remove unnecessary prototypes

Cleanup files by reordering functions in order to remove need for
unnecessary function prototypes.

There are no code changes here, just functions being moved around and
prototypes being eliminated.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c   | 183 +++++-----
 net/9p/trans_fd.c | 896 ++++++++++++++++++++++------------------------
 2 files changed, 513 insertions(+), 566 deletions(-)

diff --git a/net/9p/client.c b/net/9p/client.c
index f1a52a7ed724..712d4f336adc 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -36,10 +36,6 @@
 #include <net/9p/client.h>
 #include <net/9p/transport.h>
 
-static struct p9_fid *p9_fid_create(struct p9_client *clnt);
-static void p9_fid_destroy(struct p9_fid *fid);
-static struct p9_stat *p9_clone_stat(struct p9_stat *st, int dotu);
-
 /*
   * Client Option Parsing (code inspired by NFS code)
   *  - a little lazy - parse all client options
@@ -124,6 +120,55 @@ static int parse_opts(char *opts, struct p9_client *clnt)
 	return ret;
 }
 
+static struct p9_fid *p9_fid_create(struct p9_client *clnt)
+{
+	int err;
+	struct p9_fid *fid;
+
+	P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt);
+	fid = kmalloc(sizeof(struct p9_fid), GFP_KERNEL);
+	if (!fid)
+		return ERR_PTR(-ENOMEM);
+
+	fid->fid = p9_idpool_get(clnt->fidpool);
+	if (fid->fid < 0) {
+		err = -ENOSPC;
+		goto error;
+	}
+
+	memset(&fid->qid, 0, sizeof(struct p9_qid));
+	fid->mode = -1;
+	fid->rdir_fpos = 0;
+	fid->rdir_pos = 0;
+	fid->rdir_fcall = NULL;
+	fid->uid = current->fsuid;
+	fid->clnt = clnt;
+	fid->aux = NULL;
+
+	spin_lock(&clnt->lock);
+	list_add(&fid->flist, &clnt->fidlist);
+	spin_unlock(&clnt->lock);
+
+	return fid;
+
+error:
+	kfree(fid);
+	return ERR_PTR(err);
+}
+
+static void p9_fid_destroy(struct p9_fid *fid)
+{
+	struct p9_client *clnt;
+
+	P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid);
+	clnt = fid->clnt;
+	p9_idpool_put(fid->fid, clnt->fidpool);
+	spin_lock(&clnt->lock);
+	list_del(&fid->flist);
+	spin_unlock(&clnt->lock);
+	kfree(fid->rdir_fcall);
+	kfree(fid);
+}
 
 /**
  * p9_client_rpc - sends 9P request and waits until a response is available.
@@ -815,6 +860,46 @@ int p9_client_readn(struct p9_fid *fid, char *data, u64 offset, u32 count)
 }
 EXPORT_SYMBOL(p9_client_readn);
 
+static struct p9_stat *p9_clone_stat(struct p9_stat *st, int dotu)
+{
+	int n;
+	char *p;
+	struct p9_stat *ret;
+
+	n = sizeof(struct p9_stat) + st->name.len + st->uid.len + st->gid.len +
+		st->muid.len;
+
+	if (dotu)
+		n += st->extension.len;
+
+	ret = kmalloc(n, GFP_KERNEL);
+	if (!ret)
+		return ERR_PTR(-ENOMEM);
+
+	memmove(ret, st, sizeof(struct p9_stat));
+	p = ((char *) ret) + sizeof(struct p9_stat);
+	memmove(p, st->name.str, st->name.len);
+	ret->name.str = p;
+	p += st->name.len;
+	memmove(p, st->uid.str, st->uid.len);
+	ret->uid.str = p;
+	p += st->uid.len;
+	memmove(p, st->gid.str, st->gid.len);
+	ret->gid.str = p;
+	p += st->gid.len;
+	memmove(p, st->muid.str, st->muid.len);
+	ret->muid.str = p;
+	p += st->muid.len;
+
+	if (dotu) {
+		memmove(p, st->extension.str, st->extension.len);
+		ret->extension.str = p;
+		p += st->extension.len;
+	}
+
+	return ret;
+}
+
 struct p9_stat *p9_client_stat(struct p9_fid *fid)
 {
 	int err;
@@ -986,93 +1071,3 @@ error:
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL(p9_client_dirread);
-
-static struct p9_stat *p9_clone_stat(struct p9_stat *st, int dotu)
-{
-	int n;
-	char *p;
-	struct p9_stat *ret;
-
-	n = sizeof(struct p9_stat) + st->name.len + st->uid.len + st->gid.len +
-		st->muid.len;
-
-	if (dotu)
-		n += st->extension.len;
-
-	ret = kmalloc(n, GFP_KERNEL);
-	if (!ret)
-		return ERR_PTR(-ENOMEM);
-
-	memmove(ret, st, sizeof(struct p9_stat));
-	p = ((char *) ret) + sizeof(struct p9_stat);
-	memmove(p, st->name.str, st->name.len);
-	ret->name.str = p;
-	p += st->name.len;
-	memmove(p, st->uid.str, st->uid.len);
-	ret->uid.str = p;
-	p += st->uid.len;
-	memmove(p, st->gid.str, st->gid.len);
-	ret->gid.str = p;
-	p += st->gid.len;
-	memmove(p, st->muid.str, st->muid.len);
-	ret->muid.str = p;
-	p += st->muid.len;
-
-	if (dotu) {
-		memmove(p, st->extension.str, st->extension.len);
-		ret->extension.str = p;
-		p += st->extension.len;
-	}
-
-	return ret;
-}
-
-static struct p9_fid *p9_fid_create(struct p9_client *clnt)
-{
-	int err;
-	struct p9_fid *fid;
-
-	P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt);
-	fid = kmalloc(sizeof(struct p9_fid), GFP_KERNEL);
-	if (!fid)
-		return ERR_PTR(-ENOMEM);
-
-	fid->fid = p9_idpool_get(clnt->fidpool);
-	if (fid->fid < 0) {
-		err = -ENOSPC;
-		goto error;
-	}
-
-	memset(&fid->qid, 0, sizeof(struct p9_qid));
-	fid->mode = -1;
-	fid->rdir_fpos = 0;
-	fid->rdir_pos = 0;
-	fid->rdir_fcall = NULL;
-	fid->uid = current->fsuid;
-	fid->clnt = clnt;
-	fid->aux = NULL;
-
-	spin_lock(&clnt->lock);
-	list_add(&fid->flist, &clnt->fidlist);
-	spin_unlock(&clnt->lock);
-
-	return fid;
-
-error:
-	kfree(fid);
-	return ERR_PTR(err);
-}
-
-static void p9_fid_destroy(struct p9_fid *fid)
-{
-	struct p9_client *clnt;
-
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid);
-	clnt = fid->clnt;
-	p9_idpool_put(fid->fid, clnt->fidpool);
-	spin_lock(&clnt->lock);
-	list_del(&fid->flist);
-	spin_unlock(&clnt->lock);
-	kfree(fid->rdir_fcall);
-	kfree(fid);
-}
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index bc5b6965981b..334d39cc5ba3 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -61,7 +61,6 @@ struct p9_fd_opts {
 	u16 port;
 };
 
-
 /**
  * struct p9_trans_fd - transport state
  * @rd: reference to file to read from
@@ -206,30 +205,11 @@ struct p9_mux_rpc {
 	wait_queue_head_t wqueue;
 };
 
-static int p9_poll_proc(void *);
-static void p9_read_work(struct work_struct *work);
-static void p9_write_work(struct work_struct *work);
-static void p9_pollwait(struct file *filp, wait_queue_head_t *wait_address,
-								poll_table *p);
-static int p9_fd_write(struct p9_client *client, void *v, int len);
-static int p9_fd_read(struct p9_client *client, void *v, int len);
-
 static DEFINE_SPINLOCK(p9_poll_lock);
 static LIST_HEAD(p9_poll_pending_list);
 static struct workqueue_struct *p9_mux_wq;
 static struct task_struct *p9_poll_task;
 
-static void p9_conn_destroy(struct p9_conn *);
-static unsigned int p9_fd_poll(struct p9_client *client,
-						struct poll_table_struct *pt);
-
-#ifdef P9_NONBLOCK
-static int p9_conn_rpcnb(struct p9_conn *m, struct p9_fcall *tc,
-	p9_conn_req_callback cb, void *a);
-#endif /* P9_NONBLOCK */
-
-static void p9_conn_cancel(struct p9_conn *m, int err);
-
 static u16 p9_mux_get_tag(struct p9_conn *m)
 {
 	int tag;
@@ -267,222 +247,314 @@ static void p9_mux_poll_stop(struct p9_conn *m)
 }
 
 /**
- * p9_conn_create - allocate and initialize the per-session mux data
- * @client: client instance
+ * p9_conn_cancel - cancel all pending requests with error
+ * @m: mux data
+ * @err: error code
  *
- * Note: Creates the polling task if this is the first session.
  */
 
-static struct p9_conn *p9_conn_create(struct p9_client *client)
+void p9_conn_cancel(struct p9_conn *m, int err)
 {
-	int i, n;
-	struct p9_conn *m;
-
-	P9_DPRINTK(P9_DEBUG_MUX, "client %p msize %d\n", client, client->msize);
-	m = kzalloc(sizeof(struct p9_conn), GFP_KERNEL);
-	if (!m)
-		return ERR_PTR(-ENOMEM);
+	struct p9_req *req, *rtmp;
+	LIST_HEAD(cancel_list);
 
-	spin_lock_init(&m->lock);
-	INIT_LIST_HEAD(&m->mux_list);
-	m->client = client;
-	m->tagpool = p9_idpool_create();
-	if (IS_ERR(m->tagpool)) {
-		kfree(m);
-		return ERR_PTR(-ENOMEM);
+	P9_DPRINTK(P9_DEBUG_ERROR, "mux %p err %d\n", m, err);
+	m->err = err;
+	spin_lock(&m->lock);
+	list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
+		list_move(&req->req_list, &cancel_list);
+	}
+	list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) {
+		list_move(&req->req_list, &cancel_list);
 	}
+	spin_unlock(&m->lock);
 
-	INIT_LIST_HEAD(&m->req_list);
-	INIT_LIST_HEAD(&m->unsent_req_list);
-	INIT_WORK(&m->rq, p9_read_work);
-	INIT_WORK(&m->wq, p9_write_work);
-	INIT_LIST_HEAD(&m->poll_pending_link);
-	init_poll_funcptr(&m->pt, p9_pollwait);
+	list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
+		list_del(&req->req_list);
+		if (!req->err)
+			req->err = err;
 
-	n = p9_fd_poll(client, &m->pt);
-	if (n & POLLIN) {
-		P9_DPRINTK(P9_DEBUG_MUX, "mux %p can read\n", m);
-		set_bit(Rpending, &m->wsched);
+		if (req->cb)
+			(*req->cb) (req, req->cba);
+		else
+			kfree(req->rcall);
 	}
+}
 
-	if (n & POLLOUT) {
-		P9_DPRINTK(P9_DEBUG_MUX, "mux %p can write\n", m);
-		set_bit(Wpending, &m->wsched);
-	}
+static void process_request(struct p9_conn *m, struct p9_req *req)
+{
+	int ecode;
+	struct p9_str *ename;
 
-	for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) {
-		if (IS_ERR(m->poll_wait[i].wait_addr)) {
-			p9_mux_poll_stop(m);
-			kfree(m);
-			/* return the error code */
-			return (void *)m->poll_wait[i].wait_addr;
+	if (!req->err && req->rcall->id == P9_RERROR) {
+		ecode = req->rcall->params.rerror.errno;
+		ename = &req->rcall->params.rerror.error;
+
+		P9_DPRINTK(P9_DEBUG_MUX, "Rerror %.*s\n", ename->len,
+								ename->str);
+
+		if (m->client->dotu)
+			req->err = -ecode;
+
+		if (!req->err) {
+			req->err = p9_errstr2errno(ename->str, ename->len);
+
+			/* string match failed */
+			if (!req->err) {
+				PRINT_FCALL_ERROR("unknown error", req->rcall);
+				req->err = -ESERVERFAULT;
+			}
 		}
+	} else if (req->tcall && req->rcall->id != req->tcall->id + 1) {
+		P9_DPRINTK(P9_DEBUG_ERROR,
+				"fcall mismatch: expected %d, got %d\n",
+				req->tcall->id + 1, req->rcall->id);
+		if (!req->err)
+			req->err = -EIO;
 	}
-
-	return m;
 }
 
-/**
- * p9_mux_destroy - cancels all pending requests and frees mux resources
- * @m: mux to destroy
- *
- */
-
-static void p9_conn_destroy(struct p9_conn *m)
+static unsigned int
+p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt)
 {
-	P9_DPRINTK(P9_DEBUG_MUX, "mux %p prev %p next %p\n", m,
-		m->mux_list.prev, m->mux_list.next);
+	int ret, n;
+	struct p9_trans_fd *ts = NULL;
 
-	p9_mux_poll_stop(m);
-	cancel_work_sync(&m->rq);
-	cancel_work_sync(&m->wq);
+	if (client && client->status == Connected)
+		ts = client->trans;
 
-	p9_conn_cancel(m, -ECONNRESET);
+	if (!ts)
+		return -EREMOTEIO;
 
-	m->client = NULL;
-	p9_idpool_destroy(m->tagpool);
-	kfree(m);
-}
+	if (!ts->rd->f_op || !ts->rd->f_op->poll)
+		return -EIO;
 
-static int p9_pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
-{
-	struct p9_poll_wait *pwait =
-		container_of(wait, struct p9_poll_wait, wait);
-	struct p9_conn *m = pwait->conn;
-	unsigned long flags;
-	DECLARE_WAITQUEUE(dummy_wait, p9_poll_task);
+	if (!ts->wr->f_op || !ts->wr->f_op->poll)
+		return -EIO;
 
-	spin_lock_irqsave(&p9_poll_lock, flags);
-	if (list_empty(&m->poll_pending_link))
-		list_add_tail(&m->poll_pending_link, &p9_poll_pending_list);
-	spin_unlock_irqrestore(&p9_poll_lock, flags);
+	ret = ts->rd->f_op->poll(ts->rd, pt);
+	if (ret < 0)
+		return ret;
 
-	/* perform the default wake up operation */
-	return default_wake_function(&dummy_wait, mode, sync, key);
+	if (ts->rd != ts->wr) {
+		n = ts->wr->f_op->poll(ts->wr, pt);
+		if (n < 0)
+			return n;
+		ret = (ret & ~POLLOUT) | (n & ~POLLIN);
+	}
+
+	return ret;
 }
 
 /**
- * p9_pollwait - add poll task to the wait queue
- * @filp: file pointer being polled
- * @wait_address: wait_q to block on
- * @p: poll state
+ * p9_fd_read- read from a fd
+ * @client: client instance
+ * @v: buffer to receive data into
+ * @len: size of receive buffer
  *
- * called by files poll operation to add v9fs-poll task to files wait queue
  */
 
-static void
-p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p)
+static int p9_fd_read(struct p9_client *client, void *v, int len)
 {
-	struct p9_conn *m = container_of(p, struct p9_conn, pt);
-	struct p9_poll_wait *pwait = NULL;
-	int i;
+	int ret;
+	struct p9_trans_fd *ts = NULL;
 
-	for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) {
-		if (m->poll_wait[i].wait_addr == NULL) {
-			pwait = &m->poll_wait[i];
-			break;
-		}
-	}
+	if (client && client->status != Disconnected)
+		ts = client->trans;
 
-	if (!pwait) {
-		P9_DPRINTK(P9_DEBUG_ERROR, "not enough wait_address slots\n");
-		return;
-	}
+	if (!ts)
+		return -EREMOTEIO;
 
-	if (!wait_address) {
-		P9_DPRINTK(P9_DEBUG_ERROR, "no wait_address\n");
-		pwait->wait_addr = ERR_PTR(-EIO);
-		return;
-	}
+	if (!(ts->rd->f_flags & O_NONBLOCK))
+		P9_DPRINTK(P9_DEBUG_ERROR, "blocking read ...\n");
 
-	pwait->conn = m;
-	pwait->wait_addr = wait_address;
-	init_waitqueue_func_entry(&pwait->wait, p9_pollwake);
-	add_wait_queue(wait_address, &pwait->wait);
+	ret = kernel_read(ts->rd, ts->rd->f_pos, v, len);
+	if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
+		client->status = Disconnected;
+	return ret;
 }
 
 /**
- * p9_poll_mux - polls a mux and schedules read or write works if necessary
- * @m: connection to poll
+ * p9_read_work - called when there is some data to be read from a transport
+ * @work: container of work to be done
  *
  */
 
-static void p9_poll_mux(struct p9_conn *m)
+static void p9_read_work(struct work_struct *work)
 {
-	int n;
+	int n, err;
+	struct p9_conn *m;
+	struct p9_req *req, *rptr, *rreq;
+	struct p9_fcall *rcall;
+	char *rbuf;
+
+	m = container_of(work, struct p9_conn, rq);
 
 	if (m->err < 0)
 		return;
 
-	n = p9_fd_poll(m->client, NULL);
-	if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) {
-		P9_DPRINTK(P9_DEBUG_MUX, "error mux %p err %d\n", m, n);
-		if (n >= 0)
-			n = -ECONNRESET;
-		p9_conn_cancel(m, n);
-	}
+	rcall = NULL;
+	P9_DPRINTK(P9_DEBUG_MUX, "start mux %p pos %d\n", m, m->rpos);
 
-	if (n & POLLIN) {
-		set_bit(Rpending, &m->wsched);
-		P9_DPRINTK(P9_DEBUG_MUX, "mux %p can read\n", m);
-		if (!test_and_set_bit(Rworksched, &m->wsched)) {
-			P9_DPRINTK(P9_DEBUG_MUX, "schedule read work %p\n", m);
-			queue_work(p9_mux_wq, &m->rq);
+	if (!m->rcall) {
+		m->rcall =
+		    kmalloc(sizeof(struct p9_fcall) + m->client->msize,
+								GFP_KERNEL);
+		if (!m->rcall) {
+			err = -ENOMEM;
+			goto error;
 		}
+
+		m->rbuf = (char *)m->rcall + sizeof(struct p9_fcall);
+		m->rpos = 0;
 	}
 
-	if (n & POLLOUT) {
-		set_bit(Wpending, &m->wsched);
-		P9_DPRINTK(P9_DEBUG_MUX, "mux %p can write\n", m);
-		if ((m->wsize || !list_empty(&m->unsent_req_list))
-		    && !test_and_set_bit(Wworksched, &m->wsched)) {
-			P9_DPRINTK(P9_DEBUG_MUX, "schedule write work %p\n", m);
-			queue_work(p9_mux_wq, &m->wq);
-		}
+	clear_bit(Rpending, &m->wsched);
+	err = p9_fd_read(m->client, m->rbuf + m->rpos,
+						m->client->msize - m->rpos);
+	P9_DPRINTK(P9_DEBUG_MUX, "mux %p got %d bytes\n", m, err);
+	if (err == -EAGAIN) {
+		clear_bit(Rworksched, &m->wsched);
+		return;
 	}
-}
 
-/**
- * p9_poll_proc - poll worker thread
- * @a: thread state and arguments
- *
- * polls all v9fs transports for new events and queues the appropriate
- * work to the work queue
+	if (err <= 0)
+		goto error;
+
+	m->rpos += err;
+	while (m->rpos > 4) {
+		n = le32_to_cpu(*(__le32 *) m->rbuf);
+		if (n >= m->client->msize) {
+			P9_DPRINTK(P9_DEBUG_ERROR,
+				"requested packet size too big: %d\n", n);
+			err = -EIO;
+			goto error;
+		}
+
+		if (m->rpos < n)
+			break;
+
+		err =
+		    p9_deserialize_fcall(m->rbuf, n, m->rcall, m->client->dotu);
+		if (err < 0)
+			goto error;
+
+#ifdef CONFIG_NET_9P_DEBUG
+		if ((p9_debug_level&P9_DEBUG_FCALL) == P9_DEBUG_FCALL) {
+			char buf[150];
+
+			p9_printfcall(buf, sizeof(buf), m->rcall,
+				m->client->dotu);
+			printk(KERN_NOTICE ">>> %p %s\n", m, buf);
+		}
+#endif
+
+		rcall = m->rcall;
+		rbuf = m->rbuf;
+		if (m->rpos > n) {
+			m->rcall = kmalloc(sizeof(struct p9_fcall) +
+						m->client->msize, GFP_KERNEL);
+			if (!m->rcall) {
+				err = -ENOMEM;
+				goto error;
+			}
+
+			m->rbuf = (char *)m->rcall + sizeof(struct p9_fcall);
+			memmove(m->rbuf, rbuf + n, m->rpos - n);
+			m->rpos -= n;
+		} else {
+			m->rcall = NULL;
+			m->rbuf = NULL;
+			m->rpos = 0;
+		}
+
+		P9_DPRINTK(P9_DEBUG_MUX, "mux %p fcall id %d tag %d\n", m,
+							rcall->id, rcall->tag);
+
+		req = NULL;
+		spin_lock(&m->lock);
+		list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) {
+			if (rreq->tag == rcall->tag) {
+				req = rreq;
+				if (req->flush != Flushing)
+					list_del(&req->req_list);
+				break;
+			}
+		}
+		spin_unlock(&m->lock);
+
+		if (req) {
+			req->rcall = rcall;
+			process_request(m, req);
+
+			if (req->flush != Flushing) {
+				if (req->cb)
+					(*req->cb) (req, req->cba);
+				else
+					kfree(req->rcall);
+			}
+		} else {
+			if (err >= 0 && rcall->id != P9_RFLUSH)
+				P9_DPRINTK(P9_DEBUG_ERROR,
+				  "unexpected response mux %p id %d tag %d\n",
+				  m, rcall->id, rcall->tag);
+			kfree(rcall);
+		}
+	}
+
+	if (!list_empty(&m->req_list)) {
+		if (test_and_clear_bit(Rpending, &m->wsched))
+			n = POLLIN;
+		else
+			n = p9_fd_poll(m->client, NULL);
+
+		if (n & POLLIN) {
+			P9_DPRINTK(P9_DEBUG_MUX, "schedule read work %p\n", m);
+			queue_work(p9_mux_wq, &m->rq);
+		} else
+			clear_bit(Rworksched, &m->wsched);
+	} else
+		clear_bit(Rworksched, &m->wsched);
+
+	return;
+
+error:
+	p9_conn_cancel(m, err);
+	clear_bit(Rworksched, &m->wsched);
+}
+
+/**
+ * p9_fd_write - write to a socket
+ * @client: client instance
+ * @v: buffer to send data from
+ * @len: size of send buffer
  *
  */
 
-static int p9_poll_proc(void *a)
+static int p9_fd_write(struct p9_client *client, void *v, int len)
 {
-	unsigned long flags;
-
-	P9_DPRINTK(P9_DEBUG_MUX, "start %p\n", current);
- repeat:
-	spin_lock_irqsave(&p9_poll_lock, flags);
-	while (!list_empty(&p9_poll_pending_list)) {
-		struct p9_conn *conn = list_first_entry(&p9_poll_pending_list,
-							struct p9_conn,
-							poll_pending_link);
-		list_del_init(&conn->poll_pending_link);
-		spin_unlock_irqrestore(&p9_poll_lock, flags);
+	int ret;
+	mm_segment_t oldfs;
+	struct p9_trans_fd *ts = NULL;
 
-		p9_poll_mux(conn);
+	if (client && client->status != Disconnected)
+		ts = client->trans;
 
-		spin_lock_irqsave(&p9_poll_lock, flags);
-	}
-	spin_unlock_irqrestore(&p9_poll_lock, flags);
+	if (!ts)
+		return -EREMOTEIO;
 
-	set_current_state(TASK_INTERRUPTIBLE);
-	if (list_empty(&p9_poll_pending_list)) {
-		P9_DPRINTK(P9_DEBUG_MUX, "sleeping...\n");
-		schedule();
-	}
-	__set_current_state(TASK_RUNNING);
+	if (!(ts->wr->f_flags & O_NONBLOCK))
+		P9_DPRINTK(P9_DEBUG_ERROR, "blocking write ...\n");
 
-	if (!kthread_should_stop())
-		goto repeat;
+	oldfs = get_fs();
+	set_fs(get_ds());
+	/* The cast to a user pointer is valid due to the set_fs() */
+	ret = vfs_write(ts->wr, (void __user *)v, len, &ts->wr->f_pos);
+	set_fs(oldfs);
 
-	P9_DPRINTK(P9_DEBUG_MUX, "finish\n");
-	return 0;
+	if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
+		client->status = Disconnected;
+	return ret;
 }
 
 /**
@@ -566,186 +638,158 @@ error:
 	clear_bit(Wworksched, &m->wsched);
 }
 
-static void process_request(struct p9_conn *m, struct p9_req *req)
+static int p9_pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
 {
-	int ecode;
-	struct p9_str *ename;
-
-	if (!req->err && req->rcall->id == P9_RERROR) {
-		ecode = req->rcall->params.rerror.errno;
-		ename = &req->rcall->params.rerror.error;
-
-		P9_DPRINTK(P9_DEBUG_MUX, "Rerror %.*s\n", ename->len,
-								ename->str);
-
-		if (m->client->dotu)
-			req->err = -ecode;
+	struct p9_poll_wait *pwait =
+		container_of(wait, struct p9_poll_wait, wait);
+	struct p9_conn *m = pwait->conn;
+	unsigned long flags;
+	DECLARE_WAITQUEUE(dummy_wait, p9_poll_task);
 
-		if (!req->err) {
-			req->err = p9_errstr2errno(ename->str, ename->len);
+	spin_lock_irqsave(&p9_poll_lock, flags);
+	if (list_empty(&m->poll_pending_link))
+		list_add_tail(&m->poll_pending_link, &p9_poll_pending_list);
+	spin_unlock_irqrestore(&p9_poll_lock, flags);
 
-			/* string match failed */
-			if (!req->err) {
-				PRINT_FCALL_ERROR("unknown error", req->rcall);
-				req->err = -ESERVERFAULT;
-			}
-		}
-	} else if (req->tcall && req->rcall->id != req->tcall->id + 1) {
-		P9_DPRINTK(P9_DEBUG_ERROR,
-				"fcall mismatch: expected %d, got %d\n",
-				req->tcall->id + 1, req->rcall->id);
-		if (!req->err)
-			req->err = -EIO;
-	}
+	/* perform the default wake up operation */
+	return default_wake_function(&dummy_wait, mode, sync, key);
 }
 
 /**
- * p9_read_work - called when there is some data to be read from a transport
- * @work: container of work to be done
+ * p9_pollwait - add poll task to the wait queue
+ * @filp: file pointer being polled
+ * @wait_address: wait_q to block on
+ * @p: poll state
  *
+ * called by files poll operation to add v9fs-poll task to files wait queue
  */
 
-static void p9_read_work(struct work_struct *work)
+static void
+p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p)
 {
-	int n, err;
-	struct p9_conn *m;
-	struct p9_req *req, *rptr, *rreq;
-	struct p9_fcall *rcall;
-	char *rbuf;
-
-	m = container_of(work, struct p9_conn, rq);
-
-	if (m->err < 0)
-		return;
-
-	rcall = NULL;
-	P9_DPRINTK(P9_DEBUG_MUX, "start mux %p pos %d\n", m, m->rpos);
+	struct p9_conn *m = container_of(p, struct p9_conn, pt);
+	struct p9_poll_wait *pwait = NULL;
+	int i;
 
-	if (!m->rcall) {
-		m->rcall =
-		    kmalloc(sizeof(struct p9_fcall) + m->client->msize,
-								GFP_KERNEL);
-		if (!m->rcall) {
-			err = -ENOMEM;
-			goto error;
+	for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) {
+		if (m->poll_wait[i].wait_addr == NULL) {
+			pwait = &m->poll_wait[i];
+			break;
 		}
-
-		m->rbuf = (char *)m->rcall + sizeof(struct p9_fcall);
-		m->rpos = 0;
 	}
 
-	clear_bit(Rpending, &m->wsched);
-	err = p9_fd_read(m->client, m->rbuf + m->rpos,
-						m->client->msize - m->rpos);
-	P9_DPRINTK(P9_DEBUG_MUX, "mux %p got %d bytes\n", m, err);
-	if (err == -EAGAIN) {
-		clear_bit(Rworksched, &m->wsched);
+	if (!pwait) {
+		P9_DPRINTK(P9_DEBUG_ERROR, "not enough wait_address slots\n");
 		return;
 	}
 
-	if (err <= 0)
-		goto error;
-
-	m->rpos += err;
-	while (m->rpos > 4) {
-		n = le32_to_cpu(*(__le32 *) m->rbuf);
-		if (n >= m->client->msize) {
-			P9_DPRINTK(P9_DEBUG_ERROR,
-				"requested packet size too big: %d\n", n);
-			err = -EIO;
-			goto error;
-		}
-
-		if (m->rpos < n)
-			break;
-
-		err =
-		    p9_deserialize_fcall(m->rbuf, n, m->rcall, m->client->dotu);
-		if (err < 0)
-			goto error;
+	if (!wait_address) {
+		P9_DPRINTK(P9_DEBUG_ERROR, "no wait_address\n");
+		pwait->wait_addr = ERR_PTR(-EIO);
+		return;
+	}
 
-#ifdef CONFIG_NET_9P_DEBUG
-		if ((p9_debug_level&P9_DEBUG_FCALL) == P9_DEBUG_FCALL) {
-			char buf[150];
+	pwait->conn = m;
+	pwait->wait_addr = wait_address;
+	init_waitqueue_func_entry(&pwait->wait, p9_pollwake);
+	add_wait_queue(wait_address, &pwait->wait);
+}
 
-			p9_printfcall(buf, sizeof(buf), m->rcall,
-				m->client->dotu);
-			printk(KERN_NOTICE ">>> %p %s\n", m, buf);
-		}
-#endif
+/**
+ * p9_conn_create - allocate and initialize the per-session mux data
+ * @client: client instance
+ *
+ * Note: Creates the polling task if this is the first session.
+ */
 
-		rcall = m->rcall;
-		rbuf = m->rbuf;
-		if (m->rpos > n) {
-			m->rcall = kmalloc(sizeof(struct p9_fcall) +
-						m->client->msize, GFP_KERNEL);
-			if (!m->rcall) {
-				err = -ENOMEM;
-				goto error;
-			}
+static struct p9_conn *p9_conn_create(struct p9_client *client)
+{
+	int i, n;
+	struct p9_conn *m;
 
-			m->rbuf = (char *)m->rcall + sizeof(struct p9_fcall);
-			memmove(m->rbuf, rbuf + n, m->rpos - n);
-			m->rpos -= n;
-		} else {
-			m->rcall = NULL;
-			m->rbuf = NULL;
-			m->rpos = 0;
-		}
+	P9_DPRINTK(P9_DEBUG_MUX, "client %p msize %d\n", client, client->msize);
+	m = kzalloc(sizeof(struct p9_conn), GFP_KERNEL);
+	if (!m)
+		return ERR_PTR(-ENOMEM);
 
-		P9_DPRINTK(P9_DEBUG_MUX, "mux %p fcall id %d tag %d\n", m,
-							rcall->id, rcall->tag);
+	spin_lock_init(&m->lock);
+	INIT_LIST_HEAD(&m->mux_list);
+	m->client = client;
+	m->tagpool = p9_idpool_create();
+	if (IS_ERR(m->tagpool)) {
+		kfree(m);
+		return ERR_PTR(-ENOMEM);
+	}
 
-		req = NULL;
-		spin_lock(&m->lock);
-		list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) {
-			if (rreq->tag == rcall->tag) {
-				req = rreq;
-				if (req->flush != Flushing)
-					list_del(&req->req_list);
-				break;
-			}
-		}
-		spin_unlock(&m->lock);
+	INIT_LIST_HEAD(&m->req_list);
+	INIT_LIST_HEAD(&m->unsent_req_list);
+	INIT_WORK(&m->rq, p9_read_work);
+	INIT_WORK(&m->wq, p9_write_work);
+	INIT_LIST_HEAD(&m->poll_pending_link);
+	init_poll_funcptr(&m->pt, p9_pollwait);
 
-		if (req) {
-			req->rcall = rcall;
-			process_request(m, req);
+	n = p9_fd_poll(client, &m->pt);
+	if (n & POLLIN) {
+		P9_DPRINTK(P9_DEBUG_MUX, "mux %p can read\n", m);
+		set_bit(Rpending, &m->wsched);
+	}
 
-			if (req->flush != Flushing) {
-				if (req->cb)
-					(*req->cb) (req, req->cba);
-				else
-					kfree(req->rcall);
-			}
-		} else {
-			if (err >= 0 && rcall->id != P9_RFLUSH)
-				P9_DPRINTK(P9_DEBUG_ERROR,
-				  "unexpected response mux %p id %d tag %d\n",
-				  m, rcall->id, rcall->tag);
-			kfree(rcall);
+	if (n & POLLOUT) {
+		P9_DPRINTK(P9_DEBUG_MUX, "mux %p can write\n", m);
+		set_bit(Wpending, &m->wsched);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) {
+		if (IS_ERR(m->poll_wait[i].wait_addr)) {
+			p9_mux_poll_stop(m);
+			kfree(m);
+			/* return the error code */
+			return (void *)m->poll_wait[i].wait_addr;
 		}
 	}
 
-	if (!list_empty(&m->req_list)) {
-		if (test_and_clear_bit(Rpending, &m->wsched))
-			n = POLLIN;
-		else
-			n = p9_fd_poll(m->client, NULL);
+	return m;
+}
 
-		if (n & POLLIN) {
+/**
+ * p9_poll_mux - polls a mux and schedules read or write works if necessary
+ * @m: connection to poll
+ *
+ */
+
+static void p9_poll_mux(struct p9_conn *m)
+{
+	int n;
+
+	if (m->err < 0)
+		return;
+
+	n = p9_fd_poll(m->client, NULL);
+	if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) {
+		P9_DPRINTK(P9_DEBUG_MUX, "error mux %p err %d\n", m, n);
+		if (n >= 0)
+			n = -ECONNRESET;
+		p9_conn_cancel(m, n);
+	}
+
+	if (n & POLLIN) {
+		set_bit(Rpending, &m->wsched);
+		P9_DPRINTK(P9_DEBUG_MUX, "mux %p can read\n", m);
+		if (!test_and_set_bit(Rworksched, &m->wsched)) {
 			P9_DPRINTK(P9_DEBUG_MUX, "schedule read work %p\n", m);
 			queue_work(p9_mux_wq, &m->rq);
-		} else
-			clear_bit(Rworksched, &m->wsched);
-	} else
-		clear_bit(Rworksched, &m->wsched);
-
-	return;
+		}
+	}
 
-error:
-	p9_conn_cancel(m, err);
-	clear_bit(Rworksched, &m->wsched);
+	if (n & POLLOUT) {
+		set_bit(Wpending, &m->wsched);
+		P9_DPRINTK(P9_DEBUG_MUX, "mux %p can write\n", m);
+		if ((m->wsize || !list_empty(&m->unsent_req_list))
+		    && !test_and_set_bit(Wworksched, &m->wsched)) {
+			P9_DPRINTK(P9_DEBUG_MUX, "schedule write work %p\n", m);
+			queue_work(p9_mux_wq, &m->wq);
+		}
+	}
 }
 
 /**
@@ -1005,69 +1049,6 @@ p9_fd_rpc(struct p9_client *client, struct p9_fcall *tc, struct p9_fcall **rc)
 	return err;
 }
 
-#ifdef P9_NONBLOCK
-/**
- * p9_conn_rpcnb - sends 9P request without waiting for response.
- * @m: mux data
- * @tc: request to be sent
- * @cb: callback function to be called when response arrives
- * @a: value to pass to the callback function
- *
- */
-
-int p9_conn_rpcnb(struct p9_conn *m, struct p9_fcall *tc,
-		   p9_conn_req_callback cb, void *a)
-{
-	int err;
-	struct p9_req *req;
-
-	req = p9_send_request(m, tc, cb, a);
-	if (IS_ERR(req)) {
-		err = PTR_ERR(req);
-		P9_DPRINTK(P9_DEBUG_MUX, "error %d\n", err);
-		return PTR_ERR(req);
-	}
-
-	P9_DPRINTK(P9_DEBUG_MUX, "mux %p tc %p tag %d\n", m, tc, req->tag);
-	return 0;
-}
-#endif /* P9_NONBLOCK */
-
-/**
- * p9_conn_cancel - cancel all pending requests with error
- * @m: mux data
- * @err: error code
- *
- */
-
-void p9_conn_cancel(struct p9_conn *m, int err)
-{
-	struct p9_req *req, *rtmp;
-	LIST_HEAD(cancel_list);
-
-	P9_DPRINTK(P9_DEBUG_ERROR, "mux %p err %d\n", m, err);
-	m->err = err;
-	spin_lock(&m->lock);
-	list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
-		list_move(&req->req_list, &cancel_list);
-	}
-	list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) {
-		list_move(&req->req_list, &cancel_list);
-	}
-	spin_unlock(&m->lock);
-
-	list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
-		list_del(&req->req_list);
-		if (!req->err)
-			req->err = err;
-
-		if (req->cb)
-			(*req->cb) (req, req->cba);
-		else
-			kfree(req->rcall);
-	}
-}
-
 /**
  * parse_options - parse mount options into session structure
  * @options: options string passed from mount
@@ -1177,97 +1158,25 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket)
 }
 
 /**
- * p9_fd_read- read from a fd
- * @client: client instance
- * @v: buffer to receive data into
- * @len: size of receive buffer
- *
- */
-
-static int p9_fd_read(struct p9_client *client, void *v, int len)
-{
-	int ret;
-	struct p9_trans_fd *ts = NULL;
-
-	if (client && client->status != Disconnected)
-		ts = client->trans;
-
-	if (!ts)
-		return -EREMOTEIO;
-
-	if (!(ts->rd->f_flags & O_NONBLOCK))
-		P9_DPRINTK(P9_DEBUG_ERROR, "blocking read ...\n");
-
-	ret = kernel_read(ts->rd, ts->rd->f_pos, v, len);
-	if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
-		client->status = Disconnected;
-	return ret;
-}
-
-/**
- * p9_fd_write - write to a socket
- * @client: client instance
- * @v: buffer to send data from
- * @len: size of send buffer
+ * p9_mux_destroy - cancels all pending requests and frees mux resources
+ * @m: mux to destroy
  *
  */
 
-static int p9_fd_write(struct p9_client *client, void *v, int len)
-{
-	int ret;
-	mm_segment_t oldfs;
-	struct p9_trans_fd *ts = NULL;
-
-	if (client && client->status != Disconnected)
-		ts = client->trans;
-
-	if (!ts)
-		return -EREMOTEIO;
-
-	if (!(ts->wr->f_flags & O_NONBLOCK))
-		P9_DPRINTK(P9_DEBUG_ERROR, "blocking write ...\n");
-
-	oldfs = get_fs();
-	set_fs(get_ds());
-	/* The cast to a user pointer is valid due to the set_fs() */
-	ret = vfs_write(ts->wr, (void __user *)v, len, &ts->wr->f_pos);
-	set_fs(oldfs);
-
-	if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
-		client->status = Disconnected;
-	return ret;
-}
-
-static unsigned int
-p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt)
+static void p9_conn_destroy(struct p9_conn *m)
 {
-	int ret, n;
-	struct p9_trans_fd *ts = NULL;
-
-	if (client && client->status == Connected)
-		ts = client->trans;
-
-	if (!ts)
-		return -EREMOTEIO;
-
-	if (!ts->rd->f_op || !ts->rd->f_op->poll)
-		return -EIO;
-
-	if (!ts->wr->f_op || !ts->wr->f_op->poll)
-		return -EIO;
+	P9_DPRINTK(P9_DEBUG_MUX, "mux %p prev %p next %p\n", m,
+		m->mux_list.prev, m->mux_list.next);
 
-	ret = ts->rd->f_op->poll(ts->rd, pt);
-	if (ret < 0)
-		return ret;
+	p9_mux_poll_stop(m);
+	cancel_work_sync(&m->rq);
+	cancel_work_sync(&m->wq);
 
-	if (ts->rd != ts->wr) {
-		n = ts->wr->f_op->poll(ts->wr, pt);
-		if (n < 0)
-			return n;
-		ret = (ret & ~POLLOUT) | (n & ~POLLIN);
-	}
+	p9_conn_cancel(m, -ECONNRESET);
 
-	return ret;
+	m->client = NULL;
+	p9_idpool_destroy(m->tagpool);
+	kfree(m);
 }
 
 /**
@@ -1492,6 +1401,49 @@ static struct p9_trans_module p9_fd_trans = {
 	.owner = THIS_MODULE,
 };
 
+/**
+ * p9_poll_proc - poll worker thread
+ * @a: thread state and arguments
+ *
+ * polls all v9fs transports for new events and queues the appropriate
+ * work to the work queue
+ *
+ */
+
+static int p9_poll_proc(void *a)
+{
+	unsigned long flags;
+
+	P9_DPRINTK(P9_DEBUG_MUX, "start %p\n", current);
+ repeat:
+	spin_lock_irqsave(&p9_poll_lock, flags);
+	while (!list_empty(&p9_poll_pending_list)) {
+		struct p9_conn *conn = list_first_entry(&p9_poll_pending_list,
+							struct p9_conn,
+							poll_pending_link);
+		list_del_init(&conn->poll_pending_link);
+		spin_unlock_irqrestore(&p9_poll_lock, flags);
+
+		p9_poll_mux(conn);
+
+		spin_lock_irqsave(&p9_poll_lock, flags);
+	}
+	spin_unlock_irqrestore(&p9_poll_lock, flags);
+
+	set_current_state(TASK_INTERRUPTIBLE);
+	if (list_empty(&p9_poll_pending_list)) {
+		P9_DPRINTK(P9_DEBUG_MUX, "sleeping...\n");
+		schedule();
+	}
+	__set_current_state(TASK_RUNNING);
+
+	if (!kthread_should_stop())
+		goto repeat;
+
+	P9_DPRINTK(P9_DEBUG_MUX, "finish\n");
+	return 0;
+}
+
 int p9_trans_fd_init(void)
 {
 	p9_mux_wq = create_workqueue("v9fs");
-- 
GitLab


From 21c003687e2d1c589cf177a3ba17fd439af94850 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Mon, 13 Oct 2008 18:45:24 -0500
Subject: [PATCH 548/895] 9p: consolidate mux_rpc and request structure

Currently, trans_fd has two structures (p9_req and p9_mux-rpc)
which contain mostly duplicate data.

This patch consolidates these two structures and removes p9_mux_rpc.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_fd.c | 68 +++++++++++++++--------------------------------
 1 file changed, 22 insertions(+), 46 deletions(-)

diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 334d39cc5ba3..dbb057d7fa5f 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -119,6 +119,8 @@ typedef void (*p9_conn_req_callback)(struct p9_req *req, void *a);
  * @cba: argument to pass to callback
  * @flush: flag to indicate RPC has been flushed
  * @req_list: list link for higher level objects to chain requests
+ * @m: connection this request was issued on
+ * @wqueue: wait queue that client is blocked on for this rpc
  *
  */
 
@@ -132,6 +134,8 @@ struct p9_req {
 	void *cba;
 	int flush;
 	struct list_head req_list;
+	struct p9_conn *m;
+	wait_queue_head_t wqueue;
 };
 
 struct p9_poll_wait {
@@ -186,25 +190,6 @@ struct p9_conn {
 	unsigned long wsched;
 };
 
-/**
- * struct p9_mux_rpc - fd mux rpc accounting structure
- * @m: connection this request was issued on
- * @err: error state
- * @tcall: request &p9_fcall
- * @rcall: response &p9_fcall
- * @wqueue: wait queue that client is blocked on for this rpc
- *
- * Bug: isn't this information duplicated elsewhere like &p9_req
- */
-
-struct p9_mux_rpc {
-	struct p9_conn *m;
-	int err;
-	struct p9_fcall *tcall;
-	struct p9_fcall *rcall;
-	wait_queue_head_t wqueue;
-};
-
 static DEFINE_SPINLOCK(p9_poll_lock);
 static LIST_HEAD(p9_poll_pending_list);
 static struct workqueue_struct *p9_mux_wq;
@@ -844,6 +829,8 @@ static struct p9_req *p9_send_request(struct p9_conn *m,
 #endif
 
 	spin_lock_init(&req->lock);
+	req->m = m;
+	init_waitqueue_head(&req->wqueue);
 	req->tag = n;
 	req->tcall = tc;
 	req->rcall = NULL;
@@ -954,20 +941,14 @@ p9_mux_flush_request(struct p9_conn *m, struct p9_req *req)
 	return 1;
 }
 
-static void
-p9_conn_rpc_cb(struct p9_req *req, void *a)
+static void p9_conn_rpc_cb(struct p9_req *req, void *a)
 {
-	struct p9_mux_rpc *r;
-
-	P9_DPRINTK(P9_DEBUG_MUX, "req %p r %p\n", req, a);
-	r = a;
-	r->rcall = req->rcall;
-	r->err = req->err;
+	P9_DPRINTK(P9_DEBUG_MUX, "req %p arg %p\n", req, a);
 
 	if (req->flush != None && !req->err)
-		r->err = -ERESTARTSYS;
+		req->err = -ERESTARTSYS;
 
-	wake_up(&r->wqueue);
+	wake_up(&req->wqueue);
 }
 
 /**
@@ -987,13 +968,6 @@ p9_fd_rpc(struct p9_client *client, struct p9_fcall *tc, struct p9_fcall **rc)
 	int err, sigpending;
 	unsigned long flags;
 	struct p9_req *req;
-	struct p9_mux_rpc r;
-
-	r.err = 0;
-	r.tcall = tc;
-	r.rcall = NULL;
-	r.m = m;
-	init_waitqueue_head(&r.wqueue);
 
 	if (rc)
 		*rc = NULL;
@@ -1004,16 +978,17 @@ p9_fd_rpc(struct p9_client *client, struct p9_fcall *tc, struct p9_fcall **rc)
 		clear_thread_flag(TIF_SIGPENDING);
 	}
 
-	req = p9_send_request(m, tc, p9_conn_rpc_cb, &r);
+	req = p9_send_request(m, tc, p9_conn_rpc_cb, NULL);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
 		P9_DPRINTK(P9_DEBUG_MUX, "error %d\n", err);
 		return err;
 	}
 
-	err = wait_event_interruptible(r.wqueue, r.rcall != NULL || r.err < 0);
-	if (r.err < 0)
-		err = r.err;
+	err = wait_event_interruptible(req->wqueue, req->rcall != NULL ||
+								req->err < 0);
+	if (req->err < 0)
+		err = req->err;
 
 	if (err == -ERESTARTSYS && client->status == Connected
 							&& m->err == 0) {
@@ -1021,10 +996,11 @@ p9_fd_rpc(struct p9_client *client, struct p9_fcall *tc, struct p9_fcall **rc)
 			/* wait until we get response of the flush message */
 			do {
 				clear_thread_flag(TIF_SIGPENDING);
-				err = wait_event_interruptible(r.wqueue,
-					r.rcall || r.err);
-			} while (!r.rcall && !r.err && err == -ERESTARTSYS &&
-				client->status == Connected && !m->err);
+				err = wait_event_interruptible(req->wqueue,
+					req->rcall || req->err);
+			} while (!req->rcall && !req->err &&
+					err == -ERESTARTSYS &&
+					client->status == Connected && !m->err);
 
 			err = -ERESTARTSYS;
 		}
@@ -1038,9 +1014,9 @@ p9_fd_rpc(struct p9_client *client, struct p9_fcall *tc, struct p9_fcall **rc)
 	}
 
 	if (rc)
-		*rc = r.rcall;
+		*rc = req->rcall;
 	else
-		kfree(r.rcall);
+		kfree(req->rcall);
 
 	p9_mux_free_request(m, req);
 	if (err > 0)
-- 
GitLab


From 044c7768841f1ef39f951972d3c1e6537a535030 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Mon, 13 Oct 2008 18:45:23 -0500
Subject: [PATCH 549/895] 9p: eliminate callback complexity

The current trans_fd rpc mechanisms use a dynamic callback mechanism which
introduces a lot of complexity which only accomodates a single special case.
This patch removes much of that complexity in favor of a simple exception
mechanism to deal with flushes.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_fd.c | 149 ++++++++++++++++++++--------------------------
 1 file changed, 66 insertions(+), 83 deletions(-)

diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index dbb057d7fa5f..180163b3e8f9 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -105,9 +105,6 @@ enum {
 	Flushed,
 };
 
-struct p9_req;
-typedef void (*p9_conn_req_callback)(struct p9_req *req, void *a);
-
 /**
  * struct p9_req - fd mux encoding of an rpc transaction
  * @lock: protects req_list
@@ -115,8 +112,6 @@ typedef void (*p9_conn_req_callback)(struct p9_req *req, void *a);
  * @tcall: request &p9_fcall structure
  * @rcall: response &p9_fcall structure
  * @err: error state
- * @cb: callback for when response is received
- * @cba: argument to pass to callback
  * @flush: flag to indicate RPC has been flushed
  * @req_list: list link for higher level objects to chain requests
  * @m: connection this request was issued on
@@ -130,8 +125,6 @@ struct p9_req {
 	struct p9_fcall *tcall;
 	struct p9_fcall *rcall;
 	int err;
-	p9_conn_req_callback cb;
-	void *cba;
 	int flush;
 	struct list_head req_list;
 	struct p9_conn *m;
@@ -231,6 +224,65 @@ static void p9_mux_poll_stop(struct p9_conn *m)
 	spin_unlock_irqrestore(&p9_poll_lock, flags);
 }
 
+static void p9_mux_free_request(struct p9_conn *m, struct p9_req *req)
+{
+	p9_mux_put_tag(m, req->tag);
+	kfree(req);
+}
+
+static void p9_conn_rpc_cb(struct p9_req *req);
+
+static void p9_mux_flush_cb(struct p9_req *freq)
+{
+	int tag;
+	struct p9_conn *m = freq->m;
+	struct p9_req *req, *rreq, *rptr;
+
+	P9_DPRINTK(P9_DEBUG_MUX, "mux %p tc %p rc %p err %d oldtag %d\n", m,
+		freq->tcall, freq->rcall, freq->err,
+		freq->tcall->params.tflush.oldtag);
+
+	spin_lock(&m->lock);
+	tag = freq->tcall->params.tflush.oldtag;
+	req = NULL;
+	list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) {
+		if (rreq->tag == tag) {
+			req = rreq;
+			list_del(&req->req_list);
+			break;
+		}
+	}
+	spin_unlock(&m->lock);
+
+	if (req) {
+		spin_lock(&req->lock);
+		req->flush = Flushed;
+		spin_unlock(&req->lock);
+
+		p9_conn_rpc_cb(req);
+	}
+
+	kfree(freq->tcall);
+	kfree(freq->rcall);
+	p9_mux_free_request(m, freq);
+}
+
+static void p9_conn_rpc_cb(struct p9_req *req)
+{
+	P9_DPRINTK(P9_DEBUG_MUX, "req %p\n", req);
+
+	if (req->tcall->id == P9_TFLUSH) { /* flush callback */
+		P9_DPRINTK(P9_DEBUG_MUX, "flush req %p\n", req);
+		p9_mux_flush_cb(req);
+	} else {			/* normal wakeup path */
+		P9_DPRINTK(P9_DEBUG_MUX, "normal req %p\n", req);
+		if (req->flush != None && !req->err)
+			req->err = -ERESTARTSYS;
+
+		wake_up(&req->wqueue);
+	}
+}
+
 /**
  * p9_conn_cancel - cancel all pending requests with error
  * @m: mux data
@@ -259,10 +311,7 @@ void p9_conn_cancel(struct p9_conn *m, int err)
 		if (!req->err)
 			req->err = err;
 
-		if (req->cb)
-			(*req->cb) (req, req->cba);
-		else
-			kfree(req->rcall);
+		p9_conn_rpc_cb(req);
 	}
 }
 
@@ -472,12 +521,8 @@ static void p9_read_work(struct work_struct *work)
 			req->rcall = rcall;
 			process_request(m, req);
 
-			if (req->flush != Flushing) {
-				if (req->cb)
-					(*req->cb) (req, req->cba);
-				else
-					kfree(req->rcall);
-			}
+			if (req->flush != Flushing)
+				p9_conn_rpc_cb(req);
 		} else {
 			if (err >= 0 && rcall->id != P9_RFLUSH)
 				P9_DPRINTK(P9_DEBUG_ERROR,
@@ -786,14 +831,10 @@ static void p9_poll_mux(struct p9_conn *m)
  *
  * @m: mux data
  * @tc: request to be sent
- * @cb: callback function to call when response is received
- * @cba: parameter to pass to the callback function
  *
  */
 
-static struct p9_req *p9_send_request(struct p9_conn *m,
-					  struct p9_fcall *tc,
-					  p9_conn_req_callback cb, void *cba)
+static struct p9_req *p9_send_request(struct p9_conn *m, struct p9_fcall *tc)
 {
 	int n;
 	struct p9_req *req;
@@ -835,8 +876,6 @@ static struct p9_req *p9_send_request(struct p9_conn *m,
 	req->tcall = tc;
 	req->rcall = NULL;
 	req->err = 0;
-	req->cb = cb;
-	req->cba = cba;
 	req->flush = None;
 
 	spin_lock(&m->lock);
@@ -854,51 +893,6 @@ static struct p9_req *p9_send_request(struct p9_conn *m,
 	return req;
 }
 
-static void p9_mux_free_request(struct p9_conn *m, struct p9_req *req)
-{
-	p9_mux_put_tag(m, req->tag);
-	kfree(req);
-}
-
-static void p9_mux_flush_cb(struct p9_req *freq, void *a)
-{
-	int tag;
-	struct p9_conn *m;
-	struct p9_req *req, *rreq, *rptr;
-
-	m = a;
-	P9_DPRINTK(P9_DEBUG_MUX, "mux %p tc %p rc %p err %d oldtag %d\n", m,
-		freq->tcall, freq->rcall, freq->err,
-		freq->tcall->params.tflush.oldtag);
-
-	spin_lock(&m->lock);
-	tag = freq->tcall->params.tflush.oldtag;
-	req = NULL;
-	list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) {
-		if (rreq->tag == tag) {
-			req = rreq;
-			list_del(&req->req_list);
-			break;
-		}
-	}
-	spin_unlock(&m->lock);
-
-	if (req) {
-		spin_lock(&req->lock);
-		req->flush = Flushed;
-		spin_unlock(&req->lock);
-
-		if (req->cb)
-			(*req->cb) (req, req->cba);
-		else
-			kfree(req->rcall);
-	}
-
-	kfree(freq->tcall);
-	kfree(freq->rcall);
-	p9_mux_free_request(m, freq);
-}
-
 static int
 p9_mux_flush_request(struct p9_conn *m, struct p9_req *req)
 {
@@ -928,8 +922,7 @@ p9_mux_flush_request(struct p9_conn *m, struct p9_req *req)
 			list_del(&rreq->req_list);
 			req->flush = Flushed;
 			spin_unlock(&m->lock);
-			if (req->cb)
-				(*req->cb) (req, req->cba);
+			p9_conn_rpc_cb(req);
 			return 0;
 		}
 	}
@@ -937,20 +930,10 @@ p9_mux_flush_request(struct p9_conn *m, struct p9_req *req)
 
 	clear_thread_flag(TIF_SIGPENDING);
 	fc = p9_create_tflush(req->tag);
-	p9_send_request(m, fc, p9_mux_flush_cb, m);
+	p9_send_request(m, fc);
 	return 1;
 }
 
-static void p9_conn_rpc_cb(struct p9_req *req, void *a)
-{
-	P9_DPRINTK(P9_DEBUG_MUX, "req %p arg %p\n", req, a);
-
-	if (req->flush != None && !req->err)
-		req->err = -ERESTARTSYS;
-
-	wake_up(&req->wqueue);
-}
-
 /**
  * p9_fd_rpc- sends 9P request and waits until a response is available.
  *	The function can be interrupted.
@@ -978,7 +961,7 @@ p9_fd_rpc(struct p9_client *client, struct p9_fcall *tc, struct p9_fcall **rc)
 		clear_thread_flag(TIF_SIGPENDING);
 	}
 
-	req = p9_send_request(m, tc, p9_conn_rpc_cb, NULL);
+	req = p9_send_request(m, tc);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
 		P9_DPRINTK(P9_DEBUG_MUX, "error %d\n", err);
-- 
GitLab


From fea511a644fb0fb938309c6ab286725ac31b87e2 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Mon, 13 Oct 2008 18:45:23 -0500
Subject: [PATCH 550/895] 9p: move request management to client code

The virtio transport uses a simplified request management system
that I want to use for all transports.  This patch adapts and moves the
exisiting code for managing requests to the client common code.
Later patches will apply these mechanisms to the other transports.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 include/net/9p/client.h |  77 ++++++++++++++++++++
 net/9p/client.c         | 152 ++++++++++++++++++++++++++++++++++++++++
 net/9p/trans_virtio.c   | 136 +++--------------------------------
 3 files changed, 238 insertions(+), 127 deletions(-)

diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index c35fb548e7cf..140cf1d58452 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -26,6 +26,9 @@
 #ifndef NET_9P_CLIENT_H
 #define NET_9P_CLIENT_H
 
+/* Number of requests per row */
+#define P9_ROW_MAXTAG 255
+
 /**
  * enum p9_trans_status - different states of underlying transports
  * @Connected: transport is connected and healthy
@@ -42,6 +45,62 @@ enum p9_trans_status {
 	Hung,
 };
 
+/**
+ * enum p9_req_status_t - virtio request status
+ * @REQ_STATUS_IDLE: request slot unused
+ * @REQ_STATUS_ALLOC: request has been allocated but not sent
+ * @REQ_STATUS_SENT: request sent to server
+ * @REQ_STATUS_FLSH: a flush has been sent for this request
+ * @REQ_STATUS_RCVD: response received from server
+ * @REQ_STATUS_FLSHD: request has been flushed
+ * @REQ_STATUS_ERR: request encountered an error on the client side
+ *
+ * The @REQ_STATUS_IDLE state is used to mark a request slot as unused
+ * but use is actually tracked by the idpool structure which handles tag
+ * id allocation.
+ *
+ */
+
+enum p9_req_status_t {
+	REQ_STATUS_IDLE,
+	REQ_STATUS_ALLOC,
+	REQ_STATUS_SENT,
+	REQ_STATUS_FLSH,
+	REQ_STATUS_RCVD,
+	REQ_STATUS_FLSHD,
+	REQ_STATUS_ERROR,
+};
+
+/**
+ * struct p9_req_t - request slots
+ * @status: status of this request slot
+ * @t_err: transport error
+ * @wq: wait_queue for the client to block on for this request
+ * @tc: the request fcall structure
+ * @rc: the response fcall structure
+ * @aux: transport specific data (provided for trans_fd migration)
+ *
+ * Transport use an array to track outstanding requests
+ * instead of a list.  While this may incurr overhead during initial
+ * allocation or expansion, it makes request lookup much easier as the
+ * tag id is a index into an array.  (We use tag+1 so that we can accomodate
+ * the -1 tag for the T_VERSION request).
+ * This also has the nice effect of only having to allocate wait_queues
+ * once, instead of constantly allocating and freeing them.  Its possible
+ * other resources could benefit from this scheme as well.
+ *
+ */
+
+struct p9_req_t {
+	int status;
+	int t_err;
+	wait_queue_head_t *wq;
+	struct p9_fcall *tc;
+	struct p9_fcall *rc;
+	u16 flush_tag;
+	void *aux;
+};
+
 /**
  * struct p9_client - per client instance state
  * @lock: protect @fidlist
@@ -52,9 +111,20 @@ enum p9_trans_status {
  * @conn: connection state information used by trans_fd
  * @fidpool: fid handle accounting for session
  * @fidlist: List of active fid handles
+ * @tagpool - transaction id accounting for session
+ * @reqs - 2D array of requests
+ * @max_tag - current maximum tag id allocated
  *
  * The client structure is used to keep track of various per-client
  * state that has been instantiated.
+ * In order to minimize per-transaction overhead we use a
+ * simple array to lookup requests instead of a hash table
+ * or linked list.  In order to support larger number of
+ * transactions, we make this a 2D array, allocating new rows
+ * when we need to grow the total number of the transactions.
+ *
+ * Each row is 256 requests and we'll support up to 256 rows for
+ * a total of 64k concurrent requests per session.
  *
  * Bugs: duplicated data and potentially unnecessary elements.
  */
@@ -70,6 +140,10 @@ struct p9_client {
 
 	struct p9_idpool *fidpool;
 	struct list_head fidlist;
+
+	struct p9_idpool *tagpool;
+	struct p9_req_t *reqs[P9_ROW_MAXTAG];
+	int max_tag;
 };
 
 /**
@@ -131,4 +205,7 @@ struct p9_stat *p9_client_stat(struct p9_fid *fid);
 int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst);
 struct p9_stat *p9_client_dirread(struct p9_fid *fid, u64 offset);
 
+struct p9_req_t *p9_tag_alloc(struct p9_client *, u16);
+struct p9_req_t *p9_tag_lookup(struct p9_client *, u16);
+
 #endif /* NET_9P_CLIENT_H */
diff --git a/net/9p/client.c b/net/9p/client.c
index 712d4f336adc..867031934f75 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -120,6 +120,154 @@ static int parse_opts(char *opts, struct p9_client *clnt)
 	return ret;
 }
 
+/**
+ * p9_tag_alloc - lookup/allocate a request by tag
+ * @c: client session to lookup tag within
+ * @tag: numeric id for transaction
+ *
+ * this is a simple array lookup, but will grow the
+ * request_slots as necessary to accomodate transaction
+ * ids which did not previously have a slot.
+ *
+ * this code relies on the client spinlock to manage locks, its
+ * possible we should switch to something else, but I'd rather
+ * stick with something low-overhead for the common case.
+ *
+ */
+
+struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
+{
+	unsigned long flags;
+	int row, col;
+
+	/* This looks up the original request by tag so we know which
+	 * buffer to read the data into */
+	tag++;
+
+	if (tag >= c->max_tag) {
+		spin_lock_irqsave(&c->lock, flags);
+		/* check again since original check was outside of lock */
+		while (tag >= c->max_tag) {
+			row = (tag / P9_ROW_MAXTAG);
+			c->reqs[row] = kcalloc(P9_ROW_MAXTAG,
+					sizeof(struct p9_req_t), GFP_ATOMIC);
+
+			if (!c->reqs[row]) {
+				printk(KERN_ERR "Couldn't grow tag array\n");
+				BUG();
+			}
+			for (col = 0; col < P9_ROW_MAXTAG; col++) {
+				c->reqs[row][col].status = REQ_STATUS_IDLE;
+				c->reqs[row][col].flush_tag = P9_NOTAG;
+				c->reqs[row][col].wq = kmalloc(
+					sizeof(wait_queue_head_t), GFP_ATOMIC);
+				if (!c->reqs[row][col].wq) {
+					printk(KERN_ERR
+						"Couldn't grow tag array\n");
+					BUG();
+				}
+				init_waitqueue_head(c->reqs[row][col].wq);
+			}
+			c->max_tag += P9_ROW_MAXTAG;
+		}
+		spin_unlock_irqrestore(&c->lock, flags);
+	}
+	row = tag / P9_ROW_MAXTAG;
+	col = tag % P9_ROW_MAXTAG;
+
+	c->reqs[row][col].status = REQ_STATUS_ALLOC;
+	c->reqs[row][col].flush_tag = P9_NOTAG;
+
+	return &c->reqs[row][col];
+}
+EXPORT_SYMBOL(p9_tag_alloc);
+
+/**
+ * p9_tag_lookup - lookup a request by tag
+ * @c: client session to lookup tag within
+ * @tag: numeric id for transaction
+ *
+ */
+
+struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag)
+{
+	int row, col;
+
+	/* This looks up the original request by tag so we know which
+	 * buffer to read the data into */
+	tag++;
+
+	BUG_ON(tag >= c->max_tag);
+
+	row = tag / P9_ROW_MAXTAG;
+	col = tag % P9_ROW_MAXTAG;
+
+	return &c->reqs[row][col];
+}
+EXPORT_SYMBOL(p9_tag_lookup);
+
+/**
+ * p9_tag_init - setup tags structure and contents
+ * @tags: tags structure from the client struct
+ *
+ * This initializes the tags structure for each client instance.
+ *
+ */
+
+static int p9_tag_init(struct p9_client *c)
+{
+	int err = 0;
+
+	c->tagpool = p9_idpool_create();
+	if (IS_ERR(c->tagpool)) {
+		err = PTR_ERR(c->tagpool);
+		c->tagpool = NULL;
+		goto error;
+	}
+
+	p9_idpool_get(c->tagpool); /* reserve tag 0 */
+
+	c->max_tag = 0;
+error:
+	return err;
+}
+
+/**
+ * p9_tag_cleanup - cleans up tags structure and reclaims resources
+ * @tags: tags structure from the client struct
+ *
+ * This frees resources associated with the tags structure
+ *
+ */
+static void p9_tag_cleanup(struct p9_client *c)
+{
+	int row, col;
+
+	/* check to insure all requests are idle */
+	for (row = 0; row < (c->max_tag/P9_ROW_MAXTAG); row++) {
+		for (col = 0; col < P9_ROW_MAXTAG; col++) {
+			if (c->reqs[row][col].status != REQ_STATUS_IDLE) {
+				P9_DPRINTK(P9_DEBUG_MUX,
+				  "Attempting to cleanup non-free tag %d,%d\n",
+				  row, col);
+				/* TODO: delay execution of cleanup */
+				return;
+			}
+		}
+	}
+
+	if (c->tagpool)
+		p9_idpool_destroy(c->tagpool);
+
+	/* free requests associated with tags */
+	for (row = 0; row < (c->max_tag/P9_ROW_MAXTAG); row++) {
+		for (col = 0; col < P9_ROW_MAXTAG; col++)
+			kfree(c->reqs[row][col].wq);
+		kfree(c->reqs[row]);
+	}
+	c->max_tag = 0;
+}
+
 static struct p9_fid *p9_fid_create(struct p9_client *clnt)
 {
 	int err;
@@ -209,6 +357,8 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
 		goto error;
 	}
 
+	p9_tag_init(clnt);
+
 	err = parse_opts(options, clnt);
 	if (err < 0)
 		goto error;
@@ -285,6 +435,8 @@ void p9_client_destroy(struct p9_client *clnt)
 	if (clnt->fidpool)
 		p9_idpool_destroy(clnt->fidpool);
 
+	p9_tag_cleanup(clnt);
+
 	kfree(clnt);
 }
 EXPORT_SYMBOL(p9_client_destroy);
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 72493f04a76d..36bce45e4e44 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -1,12 +1,10 @@
 /*
- * The Guest 9p transport driver
+ * The Virtio 9p transport driver
  *
  * This is a block based transport driver based on the lguest block driver
  * code.
  *
- */
-/*
- *  Copyright (C) 2007 Eric Van Hensbergen, IBM Corporation
+ *  Copyright (C) 2007, 2008 Eric Van Hensbergen, IBM Corporation
  *
  *  Based on virtio console driver
  *  Copyright (C) 2006, 2007 Rusty Russell, IBM Corporation
@@ -54,49 +52,6 @@ static DEFINE_MUTEX(virtio_9p_lock);
 /* global which tracks highest initialized channel */
 static int chan_index;
 
-#define P9_INIT_MAXTAG	16
-
-/**
- * enum p9_req_status_t - virtio request status
- * @REQ_STATUS_IDLE: request slot unused
- * @REQ_STATUS_SENT: request sent to server
- * @REQ_STATUS_RCVD: response received from server
- * @REQ_STATUS_FLSH: request has been flushed
- *
- * The @REQ_STATUS_IDLE state is used to mark a request slot as unused
- * but use is actually tracked by the idpool structure which handles tag
- * id allocation.
- *
- */
-
-enum p9_req_status_t {
-	REQ_STATUS_IDLE,
-	REQ_STATUS_SENT,
-	REQ_STATUS_RCVD,
-	REQ_STATUS_FLSH,
-};
-
-/**
- * struct p9_req_t - virtio request slots
- * @status: status of this request slot
- * @wq: wait_queue for the client to block on for this request
- *
- * The virtio transport uses an array to track outstanding requests
- * instead of a list.  While this may incurr overhead during initial
- * allocation or expansion, it makes request lookup much easier as the
- * tag id is a index into an array.  (We use tag+1 so that we can accomodate
- * the -1 tag for the T_VERSION request).
- * This also has the nice effect of only having to allocate wait_queues
- * once, instead of constantly allocating and freeing them.  Its possible
- * other resources could benefit from this scheme as well.
- *
- */
-
-struct p9_req_t {
-	int status;
-	wait_queue_head_t *wq;
-};
-
 /**
  * struct virtio_chan - per-instance transport information
  * @initialized: whether the channel is initialized
@@ -121,67 +76,14 @@ static struct virtio_chan {
 
 	spinlock_t lock;
 
+	struct p9_client *client;
 	struct virtio_device *vdev;
 	struct virtqueue *vq;
 
-	struct p9_idpool *tagpool;
-	struct p9_req_t *reqs;
-	int max_tag;
-
 	/* Scatterlist: can be too big for stack. */
 	struct scatterlist sg[VIRTQUEUE_NUM];
 } channels[MAX_9P_CHAN];
 
-/**
- * p9_lookup_tag - Lookup requests by tag
- * @c: virtio channel to lookup tag within
- * @tag: numeric id for transaction
- *
- * this is a simple array lookup, but will grow the
- * request_slots as necessary to accomodate transaction
- * ids which did not previously have a slot.
- *
- * Bugs: there is currently no upper limit on request slots set
- * here, but that should be constrained by the id accounting.
- */
-
-static struct p9_req_t *p9_lookup_tag(struct virtio_chan *c, u16 tag)
-{
-	/* This looks up the original request by tag so we know which
-	 * buffer to read the data into */
-	tag++;
-
-	while (tag >= c->max_tag) {
-		int old_max = c->max_tag;
-		int count;
-
-		if (c->max_tag)
-			c->max_tag *= 2;
-		else
-			c->max_tag = P9_INIT_MAXTAG;
-
-		c->reqs = krealloc(c->reqs, sizeof(struct p9_req_t)*c->max_tag,
-								GFP_ATOMIC);
-		if (!c->reqs) {
-			printk(KERN_ERR "Couldn't grow tag array\n");
-			BUG();
-		}
-		for (count = old_max; count < c->max_tag; count++) {
-			c->reqs[count].status = REQ_STATUS_IDLE;
-			c->reqs[count].wq = kmalloc(sizeof(wait_queue_head_t),
-								GFP_ATOMIC);
-			if (!c->reqs[count].wq) {
-				printk(KERN_ERR "Couldn't grow tag array\n");
-				BUG();
-			}
-			init_waitqueue_head(c->reqs[count].wq);
-		}
-	}
-
-	return &c->reqs[tag];
-}
-
-
 /* How many bytes left in this page. */
 static unsigned int rest_of_page(void *data)
 {
@@ -200,22 +102,10 @@ static unsigned int rest_of_page(void *data)
 static void p9_virtio_close(struct p9_client *client)
 {
 	struct virtio_chan *chan = client->trans;
-	int count;
-	unsigned long flags;
-
-	spin_lock_irqsave(&chan->lock, flags);
-	p9_idpool_destroy(chan->tagpool);
-	for (count = 0; count < chan->max_tag; count++)
-		kfree(chan->reqs[count].wq);
-	kfree(chan->reqs);
-	chan->max_tag = 0;
-	spin_unlock_irqrestore(&chan->lock, flags);
 
 	mutex_lock(&virtio_9p_lock);
 	chan->inuse = false;
 	mutex_unlock(&virtio_9p_lock);
-
-	client->trans = NULL;
 }
 
 /**
@@ -241,7 +131,7 @@ static void req_done(struct virtqueue *vq)
 
 	spin_lock_irqsave(&chan->lock, flags);
 	while ((rc = chan->vq->vq_ops->get_buf(chan->vq, &len)) != NULL) {
-		req = p9_lookup_tag(chan, rc->tag);
+		req = p9_tag_lookup(chan->client, rc->tag);
 		req->status = REQ_STATUS_RCVD;
 		wake_up(req->wq);
 	}
@@ -311,13 +201,13 @@ p9_virtio_rpc(struct p9_client *c, struct p9_fcall *tc, struct p9_fcall **rc)
 
 	n = P9_NOTAG;
 	if (tc->id != P9_TVERSION) {
-		n = p9_idpool_get(chan->tagpool);
+		n = p9_idpool_get(c->tagpool);
 		if (n < 0)
 			return -ENOMEM;
 	}
 
 	spin_lock_irqsave(&chan->lock, flags);
-	req = p9_lookup_tag(chan, n);
+	req = p9_tag_alloc(c, n);
 	spin_unlock_irqrestore(&chan->lock, flags);
 
 	p9_set_tag(tc, n);
@@ -357,8 +247,8 @@ p9_virtio_rpc(struct p9_client *c, struct p9_fcall *tc, struct p9_fcall **rc)
 	}
 #endif
 
-	if (n != P9_NOTAG && p9_idpool_check(n, chan->tagpool))
-		p9_idpool_put(n, chan->tagpool);
+	if (n != P9_NOTAG && p9_idpool_check(n, c->tagpool))
+		p9_idpool_put(n, c->tagpool);
 
 	req->status = REQ_STATUS_IDLE;
 
@@ -463,16 +353,8 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args)
 		return -ENODEV;
 	}
 
-	chan->tagpool = p9_idpool_create();
-	if (IS_ERR(chan->tagpool)) {
-		printk(KERN_ERR "9p: couldn't allocate tagpool\n");
-		return -ENOMEM;
-	}
-	p9_idpool_get(chan->tagpool); /* reserve tag 0 */
-	chan->max_tag = 0;
-	chan->reqs = NULL;
-
 	client->trans = (void *)chan;
+	chan->client = client;
 
 	return 0;
 }
-- 
GitLab


From ff683452f7bc52d7fd653cf5e67b1134555734c7 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Mon, 13 Oct 2008 18:45:22 -0500
Subject: [PATCH 551/895] 9p: apply common tagpool handling to trans_fd

Simplify trans_fd by using new common client tagpool structure.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_fd.c | 44 ++++++++++----------------------------------
 1 file changed, 10 insertions(+), 34 deletions(-)

diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 180163b3e8f9..6243093934b2 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -142,7 +142,6 @@ struct p9_poll_wait {
  * @lock: protects mux_list (?)
  * @mux_list: list link for mux to manage multiple connections (?)
  * @client: reference to client instance for this connection
- * @tagpool: id accounting for transactions
  * @err: error state
  * @req_list: accounting for requests which have been sent
  * @unsent_req_list: accounting for requests that haven't been sent
@@ -165,7 +164,6 @@ struct p9_conn {
 	spinlock_t lock; /* protect lock structure */
 	struct list_head mux_list;
 	struct p9_client *client;
-	struct p9_idpool *tagpool;
 	int err;
 	struct list_head req_list;
 	struct list_head unsent_req_list;
@@ -188,23 +186,6 @@ static LIST_HEAD(p9_poll_pending_list);
 static struct workqueue_struct *p9_mux_wq;
 static struct task_struct *p9_poll_task;
 
-static u16 p9_mux_get_tag(struct p9_conn *m)
-{
-	int tag;
-
-	tag = p9_idpool_get(m->tagpool);
-	if (tag < 0)
-		return P9_NOTAG;
-	else
-		return (u16) tag;
-}
-
-static void p9_mux_put_tag(struct p9_conn *m, u16 tag)
-{
-	if (tag != P9_NOTAG && p9_idpool_check(tag, m->tagpool))
-		p9_idpool_put(tag, m->tagpool);
-}
-
 static void p9_mux_poll_stop(struct p9_conn *m)
 {
 	unsigned long flags;
@@ -226,7 +207,9 @@ static void p9_mux_poll_stop(struct p9_conn *m)
 
 static void p9_mux_free_request(struct p9_conn *m, struct p9_req *req)
 {
-	p9_mux_put_tag(m, req->tag);
+	if (req->tag != P9_NOTAG &&
+	    p9_idpool_check(req->tag, m->client->tagpool))
+		p9_idpool_put(req->tag, m->client->tagpool);
 	kfree(req);
 }
 
@@ -745,11 +728,6 @@ static struct p9_conn *p9_conn_create(struct p9_client *client)
 	spin_lock_init(&m->lock);
 	INIT_LIST_HEAD(&m->mux_list);
 	m->client = client;
-	m->tagpool = p9_idpool_create();
-	if (IS_ERR(m->tagpool)) {
-		kfree(m);
-		return ERR_PTR(-ENOMEM);
-	}
 
 	INIT_LIST_HEAD(&m->req_list);
 	INIT_LIST_HEAD(&m->unsent_req_list);
@@ -848,14 +826,13 @@ static struct p9_req *p9_send_request(struct p9_conn *m, struct p9_fcall *tc)
 	if (!req)
 		return ERR_PTR(-ENOMEM);
 
-	if (tc->id == P9_TVERSION)
-		n = P9_NOTAG;
-	else
-		n = p9_mux_get_tag(m);
-
-	if (n < 0) {
-		kfree(req);
-		return ERR_PTR(-ENOMEM);
+	n = P9_NOTAG;
+	if (tc->id != P9_TVERSION) {
+		n = p9_idpool_get(m->client->tagpool);
+		if (n < 0) {
+			kfree(req);
+			return ERR_PTR(-ENOMEM);
+		}
 	}
 
 	p9_set_tag(tc, n);
@@ -1134,7 +1111,6 @@ static void p9_conn_destroy(struct p9_conn *m)
 	p9_conn_cancel(m, -ECONNRESET);
 
 	m->client = NULL;
-	p9_idpool_destroy(m->tagpool);
 	kfree(m);
 }
 
-- 
GitLab


From 673d62cdaac6ffbce980a349d3174b3929ceb9e5 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Mon, 13 Oct 2008 18:45:22 -0500
Subject: [PATCH 552/895] 9p: apply common request code to trans_fd

Apply the now common p9_req_t structure to the fd transport.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 include/net/9p/client.h |  10 +-
 net/9p/client.c         |  21 ++++
 net/9p/trans_fd.c       | 262 +++++++++++++++-------------------------
 net/9p/trans_virtio.c   |   5 +-
 4 files changed, 126 insertions(+), 172 deletions(-)

diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index 140cf1d58452..4fecaabd17bd 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -49,11 +49,12 @@ enum p9_trans_status {
  * enum p9_req_status_t - virtio request status
  * @REQ_STATUS_IDLE: request slot unused
  * @REQ_STATUS_ALLOC: request has been allocated but not sent
+ * @REQ_STATUS_UNSENT: request waiting to be sent
  * @REQ_STATUS_SENT: request sent to server
  * @REQ_STATUS_FLSH: a flush has been sent for this request
  * @REQ_STATUS_RCVD: response received from server
  * @REQ_STATUS_FLSHD: request has been flushed
- * @REQ_STATUS_ERR: request encountered an error on the client side
+ * @REQ_STATUS_ERROR: request encountered an error on the client side
  *
  * The @REQ_STATUS_IDLE state is used to mark a request slot as unused
  * but use is actually tracked by the idpool structure which handles tag
@@ -64,6 +65,7 @@ enum p9_trans_status {
 enum p9_req_status_t {
 	REQ_STATUS_IDLE,
 	REQ_STATUS_ALLOC,
+	REQ_STATUS_UNSENT,
 	REQ_STATUS_SENT,
 	REQ_STATUS_FLSH,
 	REQ_STATUS_RCVD,
@@ -79,6 +81,8 @@ enum p9_req_status_t {
  * @tc: the request fcall structure
  * @rc: the response fcall structure
  * @aux: transport specific data (provided for trans_fd migration)
+ * @tag: tag on request (BUG: redundant)
+ * @req_list: link for higher level objects to chain requests
  *
  * Transport use an array to track outstanding requests
  * instead of a list.  While this may incurr overhead during initial
@@ -99,6 +103,9 @@ struct p9_req_t {
 	struct p9_fcall *rc;
 	u16 flush_tag;
 	void *aux;
+
+	int tag;
+	struct list_head req_list;
 };
 
 /**
@@ -207,5 +214,6 @@ struct p9_stat *p9_client_dirread(struct p9_fid *fid, u64 offset);
 
 struct p9_req_t *p9_tag_alloc(struct p9_client *, u16);
 struct p9_req_t *p9_tag_lookup(struct p9_client *, u16);
+void p9_free_req(struct p9_client *, struct p9_req_t *);
 
 #endif /* NET_9P_CLIENT_H */
diff --git a/net/9p/client.c b/net/9p/client.c
index 867031934f75..f2d07ef9e6a4 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -268,6 +268,27 @@ static void p9_tag_cleanup(struct p9_client *c)
 	c->max_tag = 0;
 }
 
+/**
+ * p9_free_req - free a request and clean-up as necessary
+ * c: client state
+ * r: request to release
+ *
+ */
+
+void p9_free_req(struct p9_client *c, struct p9_req_t *r)
+{
+	r->flush_tag = P9_NOTAG;
+	r->status = REQ_STATUS_IDLE;
+	if (r->tc->tag != P9_NOTAG && p9_idpool_check(r->tc->tag, c->tagpool))
+		p9_idpool_put(r->tc->tag, c->tagpool);
+
+	/* if this was a flush request we have to free response fcall */
+	if (r->tc->id == P9_TFLUSH) {
+		kfree(r->tc);
+		kfree(r->rc);
+	}
+}
+
 static struct p9_fid *p9_fid_create(struct p9_client *clnt)
 {
 	int err;
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 6243093934b2..cc9bc739e9d3 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -44,7 +44,6 @@
 
 #define P9_PORT 564
 #define MAX_SOCK_BUF (64*1024)
-#define ERREQFLUSH	1
 #define MAXPOLLWADDR	2
 
 /**
@@ -99,38 +98,6 @@ enum {
 	Wpending = 8,		/* can write */
 };
 
-enum {
-	None,
-	Flushing,
-	Flushed,
-};
-
-/**
- * struct p9_req - fd mux encoding of an rpc transaction
- * @lock: protects req_list
- * @tag: numeric tag for rpc transaction
- * @tcall: request &p9_fcall structure
- * @rcall: response &p9_fcall structure
- * @err: error state
- * @flush: flag to indicate RPC has been flushed
- * @req_list: list link for higher level objects to chain requests
- * @m: connection this request was issued on
- * @wqueue: wait queue that client is blocked on for this rpc
- *
- */
-
-struct p9_req {
-	spinlock_t lock;
-	int tag;
-	struct p9_fcall *tcall;
-	struct p9_fcall *rcall;
-	int err;
-	int flush;
-	struct list_head req_list;
-	struct p9_conn *m;
-	wait_queue_head_t wqueue;
-};
-
 struct p9_poll_wait {
 	struct p9_conn *conn;
 	wait_queue_t wait;
@@ -139,7 +106,6 @@ struct p9_poll_wait {
 
 /**
  * struct p9_conn - fd mux connection state information
- * @lock: protects mux_list (?)
  * @mux_list: list link for mux to manage multiple connections (?)
  * @client: reference to client instance for this connection
  * @err: error state
@@ -161,7 +127,6 @@ struct p9_poll_wait {
  */
 
 struct p9_conn {
-	spinlock_t lock; /* protect lock structure */
 	struct list_head mux_list;
 	struct p9_client *client;
 	int err;
@@ -205,64 +170,41 @@ static void p9_mux_poll_stop(struct p9_conn *m)
 	spin_unlock_irqrestore(&p9_poll_lock, flags);
 }
 
-static void p9_mux_free_request(struct p9_conn *m, struct p9_req *req)
-{
-	if (req->tag != P9_NOTAG &&
-	    p9_idpool_check(req->tag, m->client->tagpool))
-		p9_idpool_put(req->tag, m->client->tagpool);
-	kfree(req);
-}
-
-static void p9_conn_rpc_cb(struct p9_req *req);
+static void p9_conn_rpc_cb(struct p9_client *, struct p9_req_t *);
 
-static void p9_mux_flush_cb(struct p9_req *freq)
+static void p9_mux_flush_cb(struct p9_client *client, struct p9_req_t *freq)
 {
-	int tag;
-	struct p9_conn *m = freq->m;
-	struct p9_req *req, *rreq, *rptr;
+	struct p9_conn *m = client->trans;
+	struct p9_req_t *req;
 
 	P9_DPRINTK(P9_DEBUG_MUX, "mux %p tc %p rc %p err %d oldtag %d\n", m,
-		freq->tcall, freq->rcall, freq->err,
-		freq->tcall->params.tflush.oldtag);
-
-	spin_lock(&m->lock);
-	tag = freq->tcall->params.tflush.oldtag;
-	req = NULL;
-	list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) {
-		if (rreq->tag == tag) {
-			req = rreq;
-			list_del(&req->req_list);
-			break;
-		}
-	}
-	spin_unlock(&m->lock);
+		freq->tc, freq->rc, freq->t_err,
+		freq->tc->params.tflush.oldtag);
 
+	req = p9_tag_lookup(client, freq->tc->params.tflush.oldtag);
 	if (req) {
-		spin_lock(&req->lock);
-		req->flush = Flushed;
-		spin_unlock(&req->lock);
-
-		p9_conn_rpc_cb(req);
+		req->status = REQ_STATUS_FLSHD;
+		list_del(&req->req_list);
+		p9_conn_rpc_cb(client, req);
 	}
 
-	kfree(freq->tcall);
-	kfree(freq->rcall);
-	p9_mux_free_request(m, freq);
+	p9_free_req(client, freq);
 }
 
-static void p9_conn_rpc_cb(struct p9_req *req)
+static void p9_conn_rpc_cb(struct p9_client *client, struct p9_req_t *req)
 {
 	P9_DPRINTK(P9_DEBUG_MUX, "req %p\n", req);
 
-	if (req->tcall->id == P9_TFLUSH) { /* flush callback */
+	if (req->tc->id == P9_TFLUSH) { /* flush callback */
 		P9_DPRINTK(P9_DEBUG_MUX, "flush req %p\n", req);
-		p9_mux_flush_cb(req);
+		p9_mux_flush_cb(client, req);
 	} else {			/* normal wakeup path */
 		P9_DPRINTK(P9_DEBUG_MUX, "normal req %p\n", req);
-		if (req->flush != None && !req->err)
-			req->err = -ERESTARTSYS;
+		if (!req->t_err && (req->status == REQ_STATUS_FLSHD ||
+				 req->status == REQ_STATUS_FLSH))
+			req->t_err = -ERESTARTSYS;
 
-		wake_up(&req->wqueue);
+		wake_up(req->wq);
 	}
 }
 
@@ -275,59 +217,62 @@ static void p9_conn_rpc_cb(struct p9_req *req)
 
 void p9_conn_cancel(struct p9_conn *m, int err)
 {
-	struct p9_req *req, *rtmp;
+	struct p9_req_t *req, *rtmp;
 	LIST_HEAD(cancel_list);
 
 	P9_DPRINTK(P9_DEBUG_ERROR, "mux %p err %d\n", m, err);
 	m->err = err;
-	spin_lock(&m->lock);
+	spin_lock(&m->client->lock);
 	list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
+		req->status = REQ_STATUS_ERROR;
+		if (!req->t_err)
+			req->t_err = err;
 		list_move(&req->req_list, &cancel_list);
 	}
 	list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) {
+		req->status = REQ_STATUS_ERROR;
+		if (!req->t_err)
+			req->t_err = err;
 		list_move(&req->req_list, &cancel_list);
 	}
-	spin_unlock(&m->lock);
+	spin_unlock(&m->client->lock);
 
 	list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
 		list_del(&req->req_list);
-		if (!req->err)
-			req->err = err;
-
-		p9_conn_rpc_cb(req);
+		p9_conn_rpc_cb(m->client, req);
 	}
 }
 
-static void process_request(struct p9_conn *m, struct p9_req *req)
+static void process_request(struct p9_conn *m, struct p9_req_t *req)
 {
 	int ecode;
 	struct p9_str *ename;
 
-	if (!req->err && req->rcall->id == P9_RERROR) {
-		ecode = req->rcall->params.rerror.errno;
-		ename = &req->rcall->params.rerror.error;
+	if (!req->t_err && req->rc->id == P9_RERROR) {
+		ecode = req->rc->params.rerror.errno;
+		ename = &req->rc->params.rerror.error;
 
 		P9_DPRINTK(P9_DEBUG_MUX, "Rerror %.*s\n", ename->len,
 								ename->str);
 
 		if (m->client->dotu)
-			req->err = -ecode;
+			req->t_err = -ecode;
 
-		if (!req->err) {
-			req->err = p9_errstr2errno(ename->str, ename->len);
+		if (!req->t_err) {
+			req->t_err = p9_errstr2errno(ename->str, ename->len);
 
 			/* string match failed */
-			if (!req->err) {
-				PRINT_FCALL_ERROR("unknown error", req->rcall);
-				req->err = -ESERVERFAULT;
+			if (!req->t_err) {
+				PRINT_FCALL_ERROR("unknown error", req->rc);
+				req->t_err = -ESERVERFAULT;
 			}
 		}
-	} else if (req->tcall && req->rcall->id != req->tcall->id + 1) {
+	} else if (req->tc && req->rc->id != req->tc->id + 1) {
 		P9_DPRINTK(P9_DEBUG_ERROR,
 				"fcall mismatch: expected %d, got %d\n",
-				req->tcall->id + 1, req->rcall->id);
-		if (!req->err)
-			req->err = -EIO;
+				req->tc->id + 1, req->rc->id);
+		if (!req->t_err)
+			req->t_err = -EIO;
 	}
 }
 
@@ -401,7 +346,7 @@ static void p9_read_work(struct work_struct *work)
 {
 	int n, err;
 	struct p9_conn *m;
-	struct p9_req *req, *rptr, *rreq;
+	struct p9_req_t *req;
 	struct p9_fcall *rcall;
 	char *rbuf;
 
@@ -488,24 +433,19 @@ static void p9_read_work(struct work_struct *work)
 		P9_DPRINTK(P9_DEBUG_MUX, "mux %p fcall id %d tag %d\n", m,
 							rcall->id, rcall->tag);
 
-		req = NULL;
-		spin_lock(&m->lock);
-		list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) {
-			if (rreq->tag == rcall->tag) {
-				req = rreq;
-				if (req->flush != Flushing)
-					list_del(&req->req_list);
-				break;
-			}
-		}
-		spin_unlock(&m->lock);
+		req = p9_tag_lookup(m->client, rcall->tag);
 
 		if (req) {
-			req->rcall = rcall;
+			if (req->status != REQ_STATUS_FLSH) {
+				list_del(&req->req_list);
+				req->status = REQ_STATUS_RCVD;
+			}
+
+			req->rc = rcall;
 			process_request(m, req);
 
-			if (req->flush != Flushing)
-				p9_conn_rpc_cb(req);
+			if (req->status != REQ_STATUS_FLSH)
+				p9_conn_rpc_cb(m->client, req);
 		} else {
 			if (err >= 0 && rcall->id != P9_RFLUSH)
 				P9_DPRINTK(P9_DEBUG_ERROR,
@@ -580,7 +520,7 @@ static void p9_write_work(struct work_struct *work)
 {
 	int n, err;
 	struct p9_conn *m;
-	struct p9_req *req;
+	struct p9_req_t *req;
 
 	m = container_of(work, struct p9_conn, wq);
 
@@ -595,18 +535,16 @@ static void p9_write_work(struct work_struct *work)
 			return;
 		}
 
-		spin_lock(&m->lock);
-again:
-		req = list_entry(m->unsent_req_list.next, struct p9_req,
+		spin_lock(&m->client->lock);
+		req = list_entry(m->unsent_req_list.next, struct p9_req_t,
 			       req_list);
+		req->status = REQ_STATUS_SENT;
 		list_move_tail(&req->req_list, &m->req_list);
-		if (req->err == ERREQFLUSH)
-			goto again;
 
-		m->wbuf = req->tcall->sdata;
-		m->wsize = req->tcall->size;
+		m->wbuf = req->tc->sdata;
+		m->wsize = req->tc->size;
 		m->wpos = 0;
-		spin_unlock(&m->lock);
+		spin_unlock(&m->client->lock);
 	}
 
 	P9_DPRINTK(P9_DEBUG_MUX, "mux %p pos %d size %d\n", m, m->wpos,
@@ -725,7 +663,6 @@ static struct p9_conn *p9_conn_create(struct p9_client *client)
 	if (!m)
 		return ERR_PTR(-ENOMEM);
 
-	spin_lock_init(&m->lock);
 	INIT_LIST_HEAD(&m->mux_list);
 	m->client = client;
 
@@ -812,30 +749,27 @@ static void p9_poll_mux(struct p9_conn *m)
  *
  */
 
-static struct p9_req *p9_send_request(struct p9_conn *m, struct p9_fcall *tc)
+static struct p9_req_t *p9_send_request(struct p9_conn *m, struct p9_fcall *tc)
 {
+	int tag;
 	int n;
-	struct p9_req *req;
+	struct p9_req_t *req;
 
 	P9_DPRINTK(P9_DEBUG_MUX, "mux %p task %p tcall %p id %d\n", m, current,
 		tc, tc->id);
 	if (m->err < 0)
 		return ERR_PTR(m->err);
 
-	req = kmalloc(sizeof(struct p9_req), GFP_KERNEL);
-	if (!req)
-		return ERR_PTR(-ENOMEM);
-
-	n = P9_NOTAG;
+	tag = P9_NOTAG;
 	if (tc->id != P9_TVERSION) {
-		n = p9_idpool_get(m->client->tagpool);
-		if (n < 0) {
-			kfree(req);
+		tag = p9_idpool_get(m->client->tagpool);
+		if (tag < 0)
 			return ERR_PTR(-ENOMEM);
-		}
 	}
 
-	p9_set_tag(tc, n);
+	p9_set_tag(tc, tag);
+
+	req = p9_tag_alloc(m->client, tag);
 
 #ifdef CONFIG_NET_9P_DEBUG
 	if ((p9_debug_level&P9_DEBUG_FCALL) == P9_DEBUG_FCALL) {
@@ -846,18 +780,15 @@ static struct p9_req *p9_send_request(struct p9_conn *m, struct p9_fcall *tc)
 	}
 #endif
 
-	spin_lock_init(&req->lock);
-	req->m = m;
-	init_waitqueue_head(&req->wqueue);
-	req->tag = n;
-	req->tcall = tc;
-	req->rcall = NULL;
-	req->err = 0;
-	req->flush = None;
+	req->tag = tag;
+	req->tc = tc;
+	req->rc = NULL;
+	req->t_err = 0;
+	req->status = REQ_STATUS_UNSENT;
 
-	spin_lock(&m->lock);
+	spin_lock(&m->client->lock);
 	list_add_tail(&req->req_list, &m->unsent_req_list);
-	spin_unlock(&m->lock);
+	spin_unlock(&m->client->lock);
 
 	if (test_and_clear_bit(Wpending, &m->wsched))
 		n = POLLOUT;
@@ -871,39 +802,36 @@ static struct p9_req *p9_send_request(struct p9_conn *m, struct p9_fcall *tc)
 }
 
 static int
-p9_mux_flush_request(struct p9_conn *m, struct p9_req *req)
+p9_mux_flush_request(struct p9_conn *m, struct p9_req_t *req)
 {
 	struct p9_fcall *fc;
-	struct p9_req *rreq, *rptr;
+	struct p9_req_t *rreq, *rptr;
 
 	P9_DPRINTK(P9_DEBUG_MUX, "mux %p req %p tag %d\n", m, req, req->tag);
 
 	/* if a response was received for a request, do nothing */
-	spin_lock(&req->lock);
-	if (req->rcall || req->err) {
-		spin_unlock(&req->lock);
+	if (req->rc || req->t_err) {
 		P9_DPRINTK(P9_DEBUG_MUX,
 			"mux %p req %p response already received\n", m, req);
 		return 0;
 	}
 
-	req->flush = Flushing;
-	spin_unlock(&req->lock);
+	req->status = REQ_STATUS_FLSH;
 
-	spin_lock(&m->lock);
+	spin_lock(&m->client->lock);
 	/* if the request is not sent yet, just remove it from the list */
 	list_for_each_entry_safe(rreq, rptr, &m->unsent_req_list, req_list) {
 		if (rreq->tag == req->tag) {
 			P9_DPRINTK(P9_DEBUG_MUX,
 			   "mux %p req %p request is not sent yet\n", m, req);
 			list_del(&rreq->req_list);
-			req->flush = Flushed;
-			spin_unlock(&m->lock);
-			p9_conn_rpc_cb(req);
+			req->status = REQ_STATUS_FLSHD;
+			spin_unlock(&m->client->lock);
+			p9_conn_rpc_cb(m->client, req);
 			return 0;
 		}
 	}
-	spin_unlock(&m->lock);
+	spin_unlock(&m->client->lock);
 
 	clear_thread_flag(TIF_SIGPENDING);
 	fc = p9_create_tflush(req->tag);
@@ -927,7 +855,7 @@ p9_fd_rpc(struct p9_client *client, struct p9_fcall *tc, struct p9_fcall **rc)
 	struct p9_conn *m = p->conn;
 	int err, sigpending;
 	unsigned long flags;
-	struct p9_req *req;
+	struct p9_req_t *req;
 
 	if (rc)
 		*rc = NULL;
@@ -945,10 +873,10 @@ p9_fd_rpc(struct p9_client *client, struct p9_fcall *tc, struct p9_fcall **rc)
 		return err;
 	}
 
-	err = wait_event_interruptible(req->wqueue, req->rcall != NULL ||
-								req->err < 0);
-	if (req->err < 0)
-		err = req->err;
+	err = wait_event_interruptible(*req->wq, req->rc != NULL ||
+								req->t_err < 0);
+	if (req->t_err < 0)
+		err = req->t_err;
 
 	if (err == -ERESTARTSYS && client->status == Connected
 							&& m->err == 0) {
@@ -956,9 +884,9 @@ p9_fd_rpc(struct p9_client *client, struct p9_fcall *tc, struct p9_fcall **rc)
 			/* wait until we get response of the flush message */
 			do {
 				clear_thread_flag(TIF_SIGPENDING);
-				err = wait_event_interruptible(req->wqueue,
-					req->rcall || req->err);
-			} while (!req->rcall && !req->err &&
+				err = wait_event_interruptible(*req->wq,
+					req->rc || req->t_err);
+			} while (!req->rc && !req->t_err &&
 					err == -ERESTARTSYS &&
 					client->status == Connected && !m->err);
 
@@ -974,11 +902,11 @@ p9_fd_rpc(struct p9_client *client, struct p9_fcall *tc, struct p9_fcall **rc)
 	}
 
 	if (rc)
-		*rc = req->rcall;
+		*rc = req->rc;
 	else
-		kfree(req->rcall);
+		kfree(req->rc);
 
-	p9_mux_free_request(m, req);
+	p9_free_req(client, req);
 	if (err > 0)
 		err = -EIO;
 
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 36bce45e4e44..e18de14c30d5 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -247,10 +247,7 @@ p9_virtio_rpc(struct p9_client *c, struct p9_fcall *tc, struct p9_fcall **rc)
 	}
 #endif
 
-	if (n != P9_NOTAG && p9_idpool_check(n, c->tagpool))
-		p9_idpool_put(n, c->tagpool);
-
-	req->status = REQ_STATUS_IDLE;
+	p9_free_req(c, req);
 
 	return 0;
 }
-- 
GitLab


From 1b0a763bdd5ed467d0e03b88e045000c749303fb Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Mon, 13 Oct 2008 18:45:22 -0500
Subject: [PATCH 553/895] 9p: use the rcall structure passed in the request in
 trans_fd read_work

This patch reworks the read_work function to enable it to directly use a passed
in rcall structure.  This should help allow us to remove unnecessary copies
in the future.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_fd.c | 129 ++++++++++++++++++++++++----------------------
 1 file changed, 66 insertions(+), 63 deletions(-)

diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index cc9bc739e9d3..627e3f097fc5 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -111,7 +111,9 @@ struct p9_poll_wait {
  * @err: error state
  * @req_list: accounting for requests which have been sent
  * @unsent_req_list: accounting for requests that haven't been sent
- * @rcall: current response &p9_fcall structure
+ * @req: current request being processed (if any)
+ * @tmp_buf: temporary buffer to read in header
+ * @rsize: amount to read for current frame
  * @rpos: read position in current frame
  * @rbuf: current read buffer
  * @wpos: write position for current frame
@@ -132,7 +134,9 @@ struct p9_conn {
 	int err;
 	struct list_head req_list;
 	struct list_head unsent_req_list;
-	struct p9_fcall *rcall;
+	struct p9_req_t *req;
+	char tmp_buf[7];
+	int rsize;
 	int rpos;
 	char *rbuf;
 	int wpos;
@@ -346,34 +350,25 @@ static void p9_read_work(struct work_struct *work)
 {
 	int n, err;
 	struct p9_conn *m;
-	struct p9_req_t *req;
-	struct p9_fcall *rcall;
-	char *rbuf;
 
 	m = container_of(work, struct p9_conn, rq);
 
 	if (m->err < 0)
 		return;
 
-	rcall = NULL;
 	P9_DPRINTK(P9_DEBUG_MUX, "start mux %p pos %d\n", m, m->rpos);
 
-	if (!m->rcall) {
-		m->rcall =
-		    kmalloc(sizeof(struct p9_fcall) + m->client->msize,
-								GFP_KERNEL);
-		if (!m->rcall) {
-			err = -ENOMEM;
-			goto error;
-		}
-
-		m->rbuf = (char *)m->rcall + sizeof(struct p9_fcall);
+	if (!m->rbuf) {
+		m->rbuf = m->tmp_buf;
 		m->rpos = 0;
+		m->rsize = 7; /* start by reading header */
 	}
 
 	clear_bit(Rpending, &m->wsched);
+	P9_DPRINTK(P9_DEBUG_MUX, "read mux %p pos %d size: %d = %d\n", m,
+					m->rpos, m->rsize, m->rsize-m->rpos);
 	err = p9_fd_read(m->client, m->rbuf + m->rpos,
-						m->client->msize - m->rpos);
+						m->rsize - m->rpos);
 	P9_DPRINTK(P9_DEBUG_MUX, "mux %p got %d bytes\n", m, err);
 	if (err == -EAGAIN) {
 		clear_bit(Rworksched, &m->wsched);
@@ -384,8 +379,12 @@ static void p9_read_work(struct work_struct *work)
 		goto error;
 
 	m->rpos += err;
-	while (m->rpos > 4) {
-		n = le32_to_cpu(*(__le32 *) m->rbuf);
+
+	if ((!m->req) && (m->rpos == m->rsize)) { /* header read in */
+		u16 tag;
+		P9_DPRINTK(P9_DEBUG_MUX, "got new header\n");
+
+		n = le32_to_cpu(*(__le32 *) m->rbuf); /* read packet size */
 		if (n >= m->client->msize) {
 			P9_DPRINTK(P9_DEBUG_ERROR,
 				"requested packet size too big: %d\n", n);
@@ -393,66 +392,71 @@ static void p9_read_work(struct work_struct *work)
 			goto error;
 		}
 
-		if (m->rpos < n)
-			break;
+		tag = le16_to_cpu(*(__le16 *) (m->rbuf+5)); /* read tag */
+		P9_DPRINTK(P9_DEBUG_MUX, "mux %p pkt: size: %d bytes tag: %d\n",
+								 m, n, tag);
 
-		err =
-		    p9_deserialize_fcall(m->rbuf, n, m->rcall, m->client->dotu);
-		if (err < 0)
+		m->req = p9_tag_lookup(m->client, tag);
+		if (!m->req) {
+			P9_DPRINTK(P9_DEBUG_ERROR, "Unexpected packet tag %d\n",
+								 tag);
+			err = -EIO;
 			goto error;
+		}
+
+		if (m->req->rc == NULL) {
+			m->req->rc = kmalloc(sizeof(struct p9_fcall) +
+						m->client->msize, GFP_KERNEL);
+			if (!m->req->rc) {
+				m->req = NULL;
+				err = -ENOMEM;
+				goto error;
+			}
+		}
+		m->rbuf = (char *)m->req->rc + sizeof(struct p9_fcall);
+		memcpy(m->rbuf, m->tmp_buf, m->rsize);
+		m->rsize = n;
+	}
+
+	/* not an else because some packets (like clunk) have no payload */
+	if ((m->req) && (m->rpos == m->rsize)) { /* packet is read in */
+		P9_DPRINTK(P9_DEBUG_MUX, "got new packet\n");
+		m->rbuf = (char *)m->req->rc + sizeof(struct p9_fcall);
+		err = p9_deserialize_fcall(m->rbuf, m->rsize, m->req->rc,
+							m->client->dotu);
+		if (err < 0) {
+			m->req = NULL;
+			goto error;
+		}
 
 #ifdef CONFIG_NET_9P_DEBUG
 		if ((p9_debug_level&P9_DEBUG_FCALL) == P9_DEBUG_FCALL) {
 			char buf[150];
 
-			p9_printfcall(buf, sizeof(buf), m->rcall,
+			p9_printfcall(buf, sizeof(buf), m->req->rc,
 				m->client->dotu);
 			printk(KERN_NOTICE ">>> %p %s\n", m, buf);
 		}
 #endif
 
-		rcall = m->rcall;
-		rbuf = m->rbuf;
-		if (m->rpos > n) {
-			m->rcall = kmalloc(sizeof(struct p9_fcall) +
-						m->client->msize, GFP_KERNEL);
-			if (!m->rcall) {
-				err = -ENOMEM;
-				goto error;
-			}
+		P9_DPRINTK(P9_DEBUG_MUX, "mux %p fcall id %d tag %d\n", m,
+					m->req->rc->id, m->req->rc->tag);
 
-			m->rbuf = (char *)m->rcall + sizeof(struct p9_fcall);
-			memmove(m->rbuf, rbuf + n, m->rpos - n);
-			m->rpos -= n;
-		} else {
-			m->rcall = NULL;
-			m->rbuf = NULL;
-			m->rpos = 0;
-		}
+		m->rbuf = NULL;
+		m->rpos = 0;
+		m->rsize = 0;
 
-		P9_DPRINTK(P9_DEBUG_MUX, "mux %p fcall id %d tag %d\n", m,
-							rcall->id, rcall->tag);
+		if (m->req->status != REQ_STATUS_FLSH) {
+			list_del(&m->req->req_list);
+			m->req->status = REQ_STATUS_RCVD;
+		}
 
-		req = p9_tag_lookup(m->client, rcall->tag);
+		process_request(m, m->req);
 
-		if (req) {
-			if (req->status != REQ_STATUS_FLSH) {
-				list_del(&req->req_list);
-				req->status = REQ_STATUS_RCVD;
-			}
+		if (m->req->status != REQ_STATUS_FLSH)
+			p9_conn_rpc_cb(m->client, m->req);
 
-			req->rc = rcall;
-			process_request(m, req);
-
-			if (req->status != REQ_STATUS_FLSH)
-				p9_conn_rpc_cb(m->client, req);
-		} else {
-			if (err >= 0 && rcall->id != P9_RFLUSH)
-				P9_DPRINTK(P9_DEBUG_ERROR,
-				  "unexpected response mux %p id %d tag %d\n",
-				  m, rcall->id, rcall->tag);
-			kfree(rcall);
-		}
+		m->req = NULL;
 	}
 
 	if (!list_empty(&m->req_list)) {
@@ -470,7 +474,6 @@ static void p9_read_work(struct work_struct *work)
 		clear_bit(Rworksched, &m->wsched);
 
 	return;
-
 error:
 	p9_conn_cancel(m, err);
 	clear_bit(Rworksched, &m->wsched);
-- 
GitLab


From 91b8534fa8f5e01f249b1bf8df0a2540053549ad Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Mon, 13 Oct 2008 18:45:21 -0500
Subject: [PATCH 554/895] 9p: make rpc code common and rework flush code

This code moves the rpc function to the common client base,
reorganizes the flush code to be more simple and stable, and
makes the necessary adjustments to the underlying transports
to adapt to the new structure.

This reduces the overall amount of code duplication between the
transports and should make adding new transports more straightforward.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 include/net/9p/client.h    |   3 +-
 include/net/9p/transport.h |   8 +-
 net/9p/client.c            | 265 ++++++++++++++++++++++++++++++++++--
 net/9p/trans_fd.c          | 268 +++++--------------------------------
 net/9p/trans_virtio.c      |  85 ++++--------
 5 files changed, 316 insertions(+), 313 deletions(-)

diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index 4fecaabd17bd..6a71d9067818 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -212,8 +212,7 @@ struct p9_stat *p9_client_stat(struct p9_fid *fid);
 int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst);
 struct p9_stat *p9_client_dirread(struct p9_fid *fid, u64 offset);
 
-struct p9_req_t *p9_tag_alloc(struct p9_client *, u16);
 struct p9_req_t *p9_tag_lookup(struct p9_client *, u16);
-void p9_free_req(struct p9_client *, struct p9_req_t *);
+void p9_client_cb(struct p9_client *c, struct p9_req_t *req);
 
 #endif /* NET_9P_CLIENT_H */
diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h
index 3e0f2f6beba2..6d5886efb102 100644
--- a/include/net/9p/transport.h
+++ b/include/net/9p/transport.h
@@ -33,8 +33,8 @@
  * @maxsize: transport provided maximum packet size
  * @def: set if this transport should be considered the default
  * @create: member function to create a new connection on this transport
- * @close: member function to disconnect and close the transport
- * @rpc: member function to issue a request to the transport
+ * @request: member function to issue a request to the transport
+ * @cancel: member function to cancel a request (if it hasn't been sent)
  *
  * This is the basic API for a transport module which is registered by the
  * transport module with the 9P core network module and used by the client
@@ -51,8 +51,8 @@ struct p9_trans_module {
 	struct module *owner;
 	int (*create)(struct p9_client *, const char *, char *);
 	void (*close) (struct p9_client *);
-	int (*rpc) (struct p9_client *t, struct p9_fcall *tc,
-							struct p9_fcall **rc);
+	int (*request) (struct p9_client *, struct p9_req_t *req);
+	int (*cancel) (struct p9_client *, struct p9_req_t *req);
 };
 
 void v9fs_register_trans(struct p9_trans_module *m);
diff --git a/net/9p/client.c b/net/9p/client.c
index f2d07ef9e6a4..29934febecdb 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -55,6 +55,9 @@ static const match_table_t tokens = {
 	{Opt_err, NULL},
 };
 
+static int
+p9_client_rpc(struct p9_client *c, struct p9_fcall *tc, struct p9_fcall **rc);
+
 /**
  * v9fs_parse_options - parse mount options into session structure
  * @options: options string passed from mount
@@ -268,6 +271,36 @@ static void p9_tag_cleanup(struct p9_client *c)
 	c->max_tag = 0;
 }
 
+/**
+ * p9_client_flush - flush (cancel) a request
+ * c: client state
+ * req: request to cancel
+ *
+ * This sents a flush for a particular requests and links
+ * the flush request to the original request.  The current
+ * code only supports a single flush request although the protocol
+ * allows for multiple flush requests to be sent for a single request.
+ *
+ */
+
+static int p9_client_flush(struct p9_client *c, struct p9_req_t *req)
+{
+	struct p9_fcall *tc, *rc = NULL;
+	int err;
+
+	P9_DPRINTK(P9_DEBUG_9P, "client %p tag %d\n", c, req->tc->tag);
+
+	tc = p9_create_tflush(req->tc->tag);
+	if (IS_ERR(tc))
+		return PTR_ERR(tc);
+
+	err = p9_client_rpc(c, tc, &rc);
+
+	/* we don't free anything here because RPC isn't complete */
+
+	return err;
+}
+
 /**
  * p9_free_req - free a request and clean-up as necessary
  * c: client state
@@ -289,6 +322,224 @@ void p9_free_req(struct p9_client *c, struct p9_req_t *r)
 	}
 }
 
+/**
+ * p9_client_cb - call back from transport to client
+ * c: client state
+ * req: request received
+ *
+ */
+void p9_client_cb(struct p9_client *c, struct p9_req_t *req)
+{
+	struct p9_req_t *other_req;
+	unsigned long flags;
+
+	P9_DPRINTK(P9_DEBUG_MUX, ": %d\n", req->tc->tag);
+
+	if (req->status == REQ_STATUS_ERROR)
+		wake_up(req->wq);
+
+	if (req->tc->id == P9_TFLUSH) { /* flush receive path */
+		P9_DPRINTK(P9_DEBUG_MUX, "flush: %d\n", req->tc->tag);
+		spin_lock_irqsave(&c->lock, flags);
+		other_req = p9_tag_lookup(c, req->tc->params.tflush.oldtag);
+		if (other_req->flush_tag != req->tc->tag) /* stale flush */
+			spin_unlock_irqrestore(&c->lock, flags);
+		else {
+			BUG_ON(other_req->status != REQ_STATUS_FLSH);
+			other_req->status = REQ_STATUS_FLSHD;
+			spin_unlock_irqrestore(&c->lock, flags);
+			wake_up(other_req->wq);
+		}
+		p9_free_req(c, req);
+	} else { 				/* normal receive path */
+		P9_DPRINTK(P9_DEBUG_MUX, "normal: %d\n", req->tc->tag);
+		spin_lock_irqsave(&c->lock, flags);
+		if (req->status != REQ_STATUS_FLSHD)
+			req->status = REQ_STATUS_RCVD;
+		req->flush_tag = P9_NOTAG;
+		spin_unlock_irqrestore(&c->lock, flags);
+		wake_up(req->wq);
+		P9_DPRINTK(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag);
+	}
+}
+EXPORT_SYMBOL(p9_client_cb);
+
+/**
+ * p9_client_rpc - issue a request and wait for a response
+ * @c: client session
+ * @tc: &p9_fcall request to transmit
+ * @rc: &p9_fcall to put reponse into
+ *
+ * Returns 0 on success, error code on failure
+ */
+
+static int
+p9_client_rpc(struct p9_client *c, struct p9_fcall *tc, struct p9_fcall **rc)
+{
+	int tag, err, size;
+	char *rdata;
+	struct p9_req_t *req;
+	unsigned long flags;
+	int sigpending;
+	int flushed = 0;
+
+	P9_DPRINTK(P9_DEBUG_9P, "client %p tc %p rc %p\n", c, tc, rc);
+
+	if (c->status != Connected)
+		return -EIO;
+
+	if (signal_pending(current)) {
+		sigpending = 1;
+		clear_thread_flag(TIF_SIGPENDING);
+	} else
+		sigpending = 0;
+
+	tag = P9_NOTAG;
+	if (tc->id != P9_TVERSION) {
+		tag = p9_idpool_get(c->tagpool);
+		if (tag < 0)
+			return -ENOMEM;
+	}
+
+	req = p9_tag_alloc(c, tag);
+
+	/* if this is a flush request, backlink flush request now to
+	 * avoid race conditions later. */
+	if (tc->id == P9_TFLUSH) {
+		struct p9_req_t *other_req =
+				p9_tag_lookup(c, tc->params.tflush.oldtag);
+		if (other_req->status == REQ_STATUS_FLSH)
+			other_req->flush_tag = tag;
+	}
+
+	p9_set_tag(tc, tag);
+
+	/*
+	 * if client passed in a pre-allocated response fcall struct
+	 * then we just use that, otherwise we allocate one.
+	 */
+
+	if (rc == NULL)
+		req->rc = NULL;
+	else
+		req->rc = *rc;
+	if (req->rc == NULL) {
+		req->rc = kmalloc(sizeof(struct p9_fcall) + c->msize,
+								GFP_KERNEL);
+		if (!req->rc) {
+			err = -ENOMEM;
+			p9_idpool_put(tag, c->tagpool);
+			p9_free_req(c, req);
+			goto reterr;
+		}
+		*rc = req->rc;
+	}
+
+	rdata = (char *)req->rc+sizeof(struct p9_fcall);
+
+	req->tc = tc;
+	P9_DPRINTK(P9_DEBUG_9P, "request: tc: %p rc: %p\n", req->tc, req->rc);
+
+	err = c->trans_mod->request(c, req);
+	if (err < 0) {
+		c->status = Disconnected;
+		goto reterr;
+	}
+
+	/* if it was a flush we just transmitted, return our tag */
+	if (tc->id == P9_TFLUSH)
+		return 0;
+again:
+	P9_DPRINTK(P9_DEBUG_9P, "wait %p tag: %d\n", req->wq, tag);
+	err = wait_event_interruptible(*req->wq,
+						req->status >= REQ_STATUS_RCVD);
+	P9_DPRINTK(P9_DEBUG_9P, "wait %p tag: %d returned %d (flushed=%d)\n",
+						req->wq, tag, err, flushed);
+
+	if (req->status == REQ_STATUS_ERROR) {
+		P9_DPRINTK(P9_DEBUG_9P, "req_status error %d\n", req->t_err);
+		err = req->t_err;
+	} else if (err == -ERESTARTSYS && flushed) {
+		P9_DPRINTK(P9_DEBUG_9P, "flushed - going again\n");
+		goto again;
+	} else if (req->status == REQ_STATUS_FLSHD) {
+		P9_DPRINTK(P9_DEBUG_9P, "flushed - erestartsys\n");
+		err = -ERESTARTSYS;
+	}
+
+	if ((err == -ERESTARTSYS) && (c->status == Connected) && (!flushed)) {
+		P9_DPRINTK(P9_DEBUG_9P, "flushing\n");
+		spin_lock_irqsave(&c->lock, flags);
+		if (req->status == REQ_STATUS_SENT)
+			req->status = REQ_STATUS_FLSH;
+		spin_unlock_irqrestore(&c->lock, flags);
+		sigpending = 1;
+		flushed = 1;
+		clear_thread_flag(TIF_SIGPENDING);
+
+		if (c->trans_mod->cancel(c, req)) {
+			err = p9_client_flush(c, req);
+			if (err == 0)
+				goto again;
+		}
+	}
+
+	if (sigpending) {
+		spin_lock_irqsave(&current->sighand->siglock, flags);
+		recalc_sigpending();
+		spin_unlock_irqrestore(&current->sighand->siglock, flags);
+	}
+
+	if (err < 0)
+		goto reterr;
+
+	size = le32_to_cpu(*(__le32 *) rdata);
+
+	err = p9_deserialize_fcall(rdata, size, req->rc, c->dotu);
+	if (err < 0) {
+		P9_DPRINTK(P9_DEBUG_9P,
+			"9p debug: client rpc deserialize returned %d\n", err);
+		goto reterr;
+	}
+
+#ifdef CONFIG_NET_9P_DEBUG
+	if ((p9_debug_level&P9_DEBUG_FCALL) == P9_DEBUG_FCALL) {
+		char buf[150];
+
+		p9_printfcall(buf, sizeof(buf), req->rc, c->dotu);
+		printk(KERN_NOTICE ">>> %p %s\n", c, buf);
+	}
+#endif
+
+	if (req->rc->id == P9_RERROR) {
+		int ecode = req->rc->params.rerror.errno;
+		struct p9_str *ename = &req->rc->params.rerror.error;
+
+		P9_DPRINTK(P9_DEBUG_MUX, "Rerror %.*s\n", ename->len,
+								ename->str);
+
+		if (c->dotu)
+			err = -ecode;
+
+		if (!err) {
+			err = p9_errstr2errno(ename->str, ename->len);
+
+			/* string match failed */
+			if (!err) {
+				PRINT_FCALL_ERROR("unknown error", req->rc);
+				err = -ESERVERFAULT;
+			}
+		}
+	} else
+		err = 0;
+
+reterr:
+	p9_free_req(c, req);
+
+	P9_DPRINTK(P9_DEBUG_9P, "returning %d\n", err);
+	return err;
+}
+
 static struct p9_fid *p9_fid_create(struct p9_client *clnt)
 {
 	int err;
@@ -339,20 +590,6 @@ static void p9_fid_destroy(struct p9_fid *fid)
 	kfree(fid);
 }
 
-/**
- * p9_client_rpc - sends 9P request and waits until a response is available.
- *      The function can be interrupted.
- * @c: client data
- * @tc: request to be sent
- * @rc: pointer where a pointer to the response is stored
- */
-int
-p9_client_rpc(struct p9_client *c, struct p9_fcall *tc,
-	struct p9_fcall **rc)
-{
-	return c->trans_mod->rpc(c, tc, rc);
-}
-
 struct p9_client *p9_client_create(const char *dev_name, char *options)
 {
 	int err, n;
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 627e3f097fc5..6bfc013f8b6f 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -174,44 +174,6 @@ static void p9_mux_poll_stop(struct p9_conn *m)
 	spin_unlock_irqrestore(&p9_poll_lock, flags);
 }
 
-static void p9_conn_rpc_cb(struct p9_client *, struct p9_req_t *);
-
-static void p9_mux_flush_cb(struct p9_client *client, struct p9_req_t *freq)
-{
-	struct p9_conn *m = client->trans;
-	struct p9_req_t *req;
-
-	P9_DPRINTK(P9_DEBUG_MUX, "mux %p tc %p rc %p err %d oldtag %d\n", m,
-		freq->tc, freq->rc, freq->t_err,
-		freq->tc->params.tflush.oldtag);
-
-	req = p9_tag_lookup(client, freq->tc->params.tflush.oldtag);
-	if (req) {
-		req->status = REQ_STATUS_FLSHD;
-		list_del(&req->req_list);
-		p9_conn_rpc_cb(client, req);
-	}
-
-	p9_free_req(client, freq);
-}
-
-static void p9_conn_rpc_cb(struct p9_client *client, struct p9_req_t *req)
-{
-	P9_DPRINTK(P9_DEBUG_MUX, "req %p\n", req);
-
-	if (req->tc->id == P9_TFLUSH) { /* flush callback */
-		P9_DPRINTK(P9_DEBUG_MUX, "flush req %p\n", req);
-		p9_mux_flush_cb(client, req);
-	} else {			/* normal wakeup path */
-		P9_DPRINTK(P9_DEBUG_MUX, "normal req %p\n", req);
-		if (!req->t_err && (req->status == REQ_STATUS_FLSHD ||
-				 req->status == REQ_STATUS_FLSH))
-			req->t_err = -ERESTARTSYS;
-
-		wake_up(req->wq);
-	}
-}
-
 /**
  * p9_conn_cancel - cancel all pending requests with error
  * @m: mux data
@@ -222,11 +184,12 @@ static void p9_conn_rpc_cb(struct p9_client *client, struct p9_req_t *req)
 void p9_conn_cancel(struct p9_conn *m, int err)
 {
 	struct p9_req_t *req, *rtmp;
+	unsigned long flags;
 	LIST_HEAD(cancel_list);
 
 	P9_DPRINTK(P9_DEBUG_ERROR, "mux %p err %d\n", m, err);
 	m->err = err;
-	spin_lock(&m->client->lock);
+	spin_lock_irqsave(&m->client->lock, flags);
 	list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
 		req->status = REQ_STATUS_ERROR;
 		if (!req->t_err)
@@ -239,44 +202,12 @@ void p9_conn_cancel(struct p9_conn *m, int err)
 			req->t_err = err;
 		list_move(&req->req_list, &cancel_list);
 	}
-	spin_unlock(&m->client->lock);
+	spin_unlock_irqrestore(&m->client->lock, flags);
 
 	list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
 		list_del(&req->req_list);
-		p9_conn_rpc_cb(m->client, req);
-	}
-}
-
-static void process_request(struct p9_conn *m, struct p9_req_t *req)
-{
-	int ecode;
-	struct p9_str *ename;
-
-	if (!req->t_err && req->rc->id == P9_RERROR) {
-		ecode = req->rc->params.rerror.errno;
-		ename = &req->rc->params.rerror.error;
-
-		P9_DPRINTK(P9_DEBUG_MUX, "Rerror %.*s\n", ename->len,
-								ename->str);
-
-		if (m->client->dotu)
-			req->t_err = -ecode;
-
-		if (!req->t_err) {
-			req->t_err = p9_errstr2errno(ename->str, ename->len);
-
-			/* string match failed */
-			if (!req->t_err) {
-				PRINT_FCALL_ERROR("unknown error", req->rc);
-				req->t_err = -ESERVERFAULT;
-			}
-		}
-	} else if (req->tc && req->rc->id != req->tc->id + 1) {
-		P9_DPRINTK(P9_DEBUG_ERROR,
-				"fcall mismatch: expected %d, got %d\n",
-				req->tc->id + 1, req->rc->id);
-		if (!req->t_err)
-			req->t_err = -EIO;
+		P9_DPRINTK(P9_DEBUG_ERROR, "call back req %p\n", req);
+		p9_client_cb(m->client, req);
 	}
 }
 
@@ -421,41 +352,13 @@ static void p9_read_work(struct work_struct *work)
 	/* not an else because some packets (like clunk) have no payload */
 	if ((m->req) && (m->rpos == m->rsize)) { /* packet is read in */
 		P9_DPRINTK(P9_DEBUG_MUX, "got new packet\n");
-		m->rbuf = (char *)m->req->rc + sizeof(struct p9_fcall);
-		err = p9_deserialize_fcall(m->rbuf, m->rsize, m->req->rc,
-							m->client->dotu);
-		if (err < 0) {
-			m->req = NULL;
-			goto error;
-		}
-
-#ifdef CONFIG_NET_9P_DEBUG
-		if ((p9_debug_level&P9_DEBUG_FCALL) == P9_DEBUG_FCALL) {
-			char buf[150];
-
-			p9_printfcall(buf, sizeof(buf), m->req->rc,
-				m->client->dotu);
-			printk(KERN_NOTICE ">>> %p %s\n", m, buf);
-		}
-#endif
 
-		P9_DPRINTK(P9_DEBUG_MUX, "mux %p fcall id %d tag %d\n", m,
-					m->req->rc->id, m->req->rc->tag);
+		list_del(&m->req->req_list);
+		p9_client_cb(m->client, m->req);
 
 		m->rbuf = NULL;
 		m->rpos = 0;
 		m->rsize = 0;
-
-		if (m->req->status != REQ_STATUS_FLSH) {
-			list_del(&m->req->req_list);
-			m->req->status = REQ_STATUS_RCVD;
-		}
-
-		process_request(m, m->req);
-
-		if (m->req->status != REQ_STATUS_FLSH)
-			p9_conn_rpc_cb(m->client, m->req);
-
 		m->req = NULL;
 	}
 
@@ -741,57 +644,41 @@ static void p9_poll_mux(struct p9_conn *m)
 }
 
 /**
- * p9_send_request - send 9P request
+ * p9_fd_request - send 9P request
  * The function can sleep until the request is scheduled for sending.
  * The function can be interrupted. Return from the function is not
- * a guarantee that the request is sent successfully. Can return errors
- * that can be retrieved by PTR_ERR macros.
+ * a guarantee that the request is sent successfully.
  *
- * @m: mux data
- * @tc: request to be sent
+ * @client: client instance
+ * @req: request to be sent
  *
  */
 
-static struct p9_req_t *p9_send_request(struct p9_conn *m, struct p9_fcall *tc)
+static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
 {
-	int tag;
 	int n;
-	struct p9_req_t *req;
+	struct p9_trans_fd *ts = client->trans;
+	struct p9_conn *m = ts->conn;
 
 	P9_DPRINTK(P9_DEBUG_MUX, "mux %p task %p tcall %p id %d\n", m, current,
-		tc, tc->id);
+		req->tc, req->tc->id);
 	if (m->err < 0)
-		return ERR_PTR(m->err);
-
-	tag = P9_NOTAG;
-	if (tc->id != P9_TVERSION) {
-		tag = p9_idpool_get(m->client->tagpool);
-		if (tag < 0)
-			return ERR_PTR(-ENOMEM);
-	}
-
-	p9_set_tag(tc, tag);
-
-	req = p9_tag_alloc(m->client, tag);
+		return m->err;
 
 #ifdef CONFIG_NET_9P_DEBUG
 	if ((p9_debug_level&P9_DEBUG_FCALL) == P9_DEBUG_FCALL) {
 		char buf[150];
 
-		p9_printfcall(buf, sizeof(buf), tc, m->client->dotu);
+		p9_printfcall(buf, sizeof(buf), req->tc, client->dotu);
 		printk(KERN_NOTICE "<<< %p %s\n", m, buf);
 	}
 #endif
 
-	req->tag = tag;
-	req->tc = tc;
-	req->rc = NULL;
-	req->t_err = 0;
 	req->status = REQ_STATUS_UNSENT;
 
-	spin_lock(&m->client->lock);
+	spin_lock(&client->lock);
 	list_add_tail(&req->req_list, &m->unsent_req_list);
-	spin_unlock(&m->client->lock);
+	spin_unlock(&client->lock);
 
 	if (test_and_clear_bit(Wpending, &m->wsched))
 		n = POLLOUT;
@@ -801,17 +688,20 @@ static struct p9_req_t *p9_send_request(struct p9_conn *m, struct p9_fcall *tc)
 	if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched))
 		queue_work(p9_mux_wq, &m->wq);
 
-	return req;
+	return 0;
 }
 
-static int
-p9_mux_flush_request(struct p9_conn *m, struct p9_req_t *req)
+static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
 {
-	struct p9_fcall *fc;
-	struct p9_req_t *rreq, *rptr;
+	struct p9_trans_fd *ts = client->trans;
+	struct p9_conn *m = ts->conn;
 
 	P9_DPRINTK(P9_DEBUG_MUX, "mux %p req %p tag %d\n", m, req, req->tag);
 
+	spin_lock(&client->lock);
+	list_del(&req->req_list);
+	spin_unlock(&client->lock);
+
 	/* if a response was received for a request, do nothing */
 	if (req->rc || req->t_err) {
 		P9_DPRINTK(P9_DEBUG_MUX,
@@ -819,103 +709,14 @@ p9_mux_flush_request(struct p9_conn *m, struct p9_req_t *req)
 		return 0;
 	}
 
-	req->status = REQ_STATUS_FLSH;
-
-	spin_lock(&m->client->lock);
-	/* if the request is not sent yet, just remove it from the list */
-	list_for_each_entry_safe(rreq, rptr, &m->unsent_req_list, req_list) {
-		if (rreq->tag == req->tag) {
-			P9_DPRINTK(P9_DEBUG_MUX,
-			   "mux %p req %p request is not sent yet\n", m, req);
-			list_del(&rreq->req_list);
-			req->status = REQ_STATUS_FLSHD;
-			spin_unlock(&m->client->lock);
-			p9_conn_rpc_cb(m->client, req);
-			return 0;
-		}
+	if (req->status == REQ_STATUS_UNSENT) {
+		req->status = REQ_STATUS_FLSHD;
+		return 0;
 	}
-	spin_unlock(&m->client->lock);
 
-	clear_thread_flag(TIF_SIGPENDING);
-	fc = p9_create_tflush(req->tag);
-	p9_send_request(m, fc);
 	return 1;
 }
 
-/**
- * p9_fd_rpc- sends 9P request and waits until a response is available.
- *	The function can be interrupted.
- * @client: client instance
- * @tc: request to be sent
- * @rc: pointer where a pointer to the response is stored
- *
- */
-
-int
-p9_fd_rpc(struct p9_client *client, struct p9_fcall *tc, struct p9_fcall **rc)
-{
-	struct p9_trans_fd *p = client->trans;
-	struct p9_conn *m = p->conn;
-	int err, sigpending;
-	unsigned long flags;
-	struct p9_req_t *req;
-
-	if (rc)
-		*rc = NULL;
-
-	sigpending = 0;
-	if (signal_pending(current)) {
-		sigpending = 1;
-		clear_thread_flag(TIF_SIGPENDING);
-	}
-
-	req = p9_send_request(m, tc);
-	if (IS_ERR(req)) {
-		err = PTR_ERR(req);
-		P9_DPRINTK(P9_DEBUG_MUX, "error %d\n", err);
-		return err;
-	}
-
-	err = wait_event_interruptible(*req->wq, req->rc != NULL ||
-								req->t_err < 0);
-	if (req->t_err < 0)
-		err = req->t_err;
-
-	if (err == -ERESTARTSYS && client->status == Connected
-							&& m->err == 0) {
-		if (p9_mux_flush_request(m, req)) {
-			/* wait until we get response of the flush message */
-			do {
-				clear_thread_flag(TIF_SIGPENDING);
-				err = wait_event_interruptible(*req->wq,
-					req->rc || req->t_err);
-			} while (!req->rc && !req->t_err &&
-					err == -ERESTARTSYS &&
-					client->status == Connected && !m->err);
-
-			err = -ERESTARTSYS;
-		}
-		sigpending = 1;
-	}
-
-	if (sigpending) {
-		spin_lock_irqsave(&current->sighand->siglock, flags);
-		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sighand->siglock, flags);
-	}
-
-	if (rc)
-		*rc = req->rc;
-	else
-		kfree(req->rc);
-
-	p9_free_req(client, req);
-	if (err > 0)
-		err = -EIO;
-
-	return err;
-}
-
 /**
  * parse_options - parse mount options into session structure
  * @options: options string passed from mount
@@ -1243,7 +1044,8 @@ static struct p9_trans_module p9_tcp_trans = {
 	.def = 1,
 	.create = p9_fd_create_tcp,
 	.close = p9_fd_close,
-	.rpc = p9_fd_rpc,
+	.request = p9_fd_request,
+	.cancel = p9_fd_cancel,
 	.owner = THIS_MODULE,
 };
 
@@ -1253,7 +1055,8 @@ static struct p9_trans_module p9_unix_trans = {
 	.def = 0,
 	.create = p9_fd_create_unix,
 	.close = p9_fd_close,
-	.rpc = p9_fd_rpc,
+	.request = p9_fd_request,
+	.cancel = p9_fd_cancel,
 	.owner = THIS_MODULE,
 };
 
@@ -1263,7 +1066,8 @@ static struct p9_trans_module p9_fd_trans = {
 	.def = 0,
 	.create = p9_fd_create,
 	.close = p9_fd_close,
-	.rpc = p9_fd_rpc,
+	.request = p9_fd_request,
+	.cancel = p9_fd_cancel,
 	.owner = THIS_MODULE,
 };
 
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index e18de14c30d5..2d7781ec663b 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -126,17 +126,16 @@ static void req_done(struct virtqueue *vq)
 	struct virtio_chan *chan = vq->vdev->priv;
 	struct p9_fcall *rc;
 	unsigned int len;
-	unsigned long flags;
 	struct p9_req_t *req;
 
-	spin_lock_irqsave(&chan->lock, flags);
+	P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n");
+
 	while ((rc = chan->vq->vq_ops->get_buf(chan->vq, &len)) != NULL) {
+		P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
+		P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag);
 		req = p9_tag_lookup(chan->client, rc->tag);
-		req->status = REQ_STATUS_RCVD;
-		wake_up(req->wq);
+		p9_client_cb(chan->client, req);
 	}
-	/* In case queue is stopped waiting for more buffers. */
-	spin_unlock_irqrestore(&chan->lock, flags);
 }
 
 /**
@@ -173,8 +172,14 @@ pack_sg_list(struct scatterlist *sg, int start, int limit, char *data,
 	return index-start;
 }
 
+/* We don't currently allow canceling of virtio requests */
+static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req)
+{
+	return 1;
+}
+
 /**
- * p9_virtio_rpc - issue a request and wait for a response
+ * p9_virtio_request - issue a request
  * @t: transport state
  * @tc: &p9_fcall request to transmit
  * @rc: &p9_fcall to put reponse into
@@ -182,44 +187,22 @@ pack_sg_list(struct scatterlist *sg, int start, int limit, char *data,
  */
 
 static int
-p9_virtio_rpc(struct p9_client *c, struct p9_fcall *tc, struct p9_fcall **rc)
+p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
 {
 	int in, out;
-	int n, err, size;
-	struct virtio_chan *chan = c->trans;
-	char *rdata;
-	struct p9_req_t *req;
-	unsigned long flags;
-
-	if (*rc == NULL) {
-		*rc = kmalloc(sizeof(struct p9_fcall) + c->msize, GFP_KERNEL);
-		if (!*rc)
-			return -ENOMEM;
-	}
-
-	rdata = (char *)*rc+sizeof(struct p9_fcall);
-
-	n = P9_NOTAG;
-	if (tc->id != P9_TVERSION) {
-		n = p9_idpool_get(c->tagpool);
-		if (n < 0)
-			return -ENOMEM;
-	}
-
-	spin_lock_irqsave(&chan->lock, flags);
-	req = p9_tag_alloc(c, n);
-	spin_unlock_irqrestore(&chan->lock, flags);
-
-	p9_set_tag(tc, n);
+	struct virtio_chan *chan = client->trans;
+	char *rdata = (char *)req->rc+sizeof(struct p9_fcall);
 
-	P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio rpc tag %d\n", n);
+	P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n");
 
-	out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, tc->sdata, tc->size);
-	in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata, c->msize);
+	out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata,
+								req->tc->size);
+	in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata,
+								client->msize);
 
 	req->status = REQ_STATUS_SENT;
 
-	if (chan->vq->vq_ops->add_buf(chan->vq, chan->sg, out, in, tc)) {
+	if (chan->vq->vq_ops->add_buf(chan->vq, chan->sg, out, in, req->tc)) {
 		P9_DPRINTK(P9_DEBUG_TRANS,
 			"9p debug: virtio rpc add_buf returned failure");
 		return -EIO;
@@ -227,28 +210,7 @@ p9_virtio_rpc(struct p9_client *c, struct p9_fcall *tc, struct p9_fcall **rc)
 
 	chan->vq->vq_ops->kick(chan->vq);
 
-	wait_event(*req->wq, req->status == REQ_STATUS_RCVD);
-
-	size = le32_to_cpu(*(__le32 *) rdata);
-
-	err = p9_deserialize_fcall(rdata, size, *rc, c->dotu);
-	if (err < 0) {
-		P9_DPRINTK(P9_DEBUG_TRANS,
-			"9p debug: virtio rpc deserialize returned %d\n", err);
-		return err;
-	}
-
-#ifdef CONFIG_NET_9P_DEBUG
-	if ((p9_debug_level&P9_DEBUG_FCALL) == P9_DEBUG_FCALL) {
-		char buf[150];
-
-		p9_printfcall(buf, sizeof(buf), *rc, c->dotu);
-		printk(KERN_NOTICE ">>> %p %s\n", c, buf);
-	}
-#endif
-
-	p9_free_req(c, req);
-
+	P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n");
 	return 0;
 }
 
@@ -394,7 +356,8 @@ static struct p9_trans_module p9_virtio_trans = {
 	.name = "virtio",
 	.create = p9_virtio_create,
 	.close = p9_virtio_close,
-	.rpc = p9_virtio_rpc,
+	.request = p9_virtio_request,
+	.cancel = p9_virtio_cancel,
 	.maxsize = PAGE_SIZE*16,
 	.def = 0,
 	.owner = THIS_MODULE,
-- 
GitLab


From 95820a36516d12dcb49d066dd3d5b187a2557612 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 13 Oct 2008 18:45:20 -0500
Subject: [PATCH 555/895] 9p: drop broken unused error path from
 p9_conn_create()

Post p9_fd_poll() error path which checks m->poll_waddr[i] for PTR_ERR
value has the following problems.

* It's completely unused.  Error value is set iff NULL @wait_address
  has been specified to p9_pollwait() which is guaranteed not to
  happen.

* It dereferences @m after deallocating it (introduced by 571ffeaf and
  spotted by Raja R Harinath.

* It returned the wrong value on error.  It should return
  poll_waddr[i] but it returnes poll_waddr (introduced by 571ffeaf).

* p9_mux_poll_stop() doesn't handle PTR_ERR value.  It will try to
  operate on the PTR_ERR value as if it's a normal pointer and cause
  oops.

As the error path is bogus in the first place, there's no reason to
hold onto it.  Kill it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Cc: Raja R Harinath <harinath@hurrynot.org>
---
 net/9p/trans_fd.c | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 6bfc013f8b6f..c07f2ab8d0f7 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -540,12 +540,6 @@ p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p)
 		return;
 	}
 
-	if (!wait_address) {
-		P9_DPRINTK(P9_DEBUG_ERROR, "no wait_address\n");
-		pwait->wait_addr = ERR_PTR(-EIO);
-		return;
-	}
-
 	pwait->conn = m;
 	pwait->wait_addr = wait_address;
 	init_waitqueue_func_entry(&pwait->wait, p9_pollwake);
@@ -561,7 +555,7 @@ p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p)
 
 static struct p9_conn *p9_conn_create(struct p9_client *client)
 {
-	int i, n;
+	int n;
 	struct p9_conn *m;
 
 	P9_DPRINTK(P9_DEBUG_MUX, "client %p msize %d\n", client, client->msize);
@@ -590,15 +584,6 @@ static struct p9_conn *p9_conn_create(struct p9_client *client)
 		set_bit(Wpending, &m->wsched);
 	}
 
-	for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) {
-		if (IS_ERR(m->poll_wait[i].wait_addr)) {
-			p9_mux_poll_stop(m);
-			kfree(m);
-			/* return the error code */
-			return (void *)m->poll_wait[i].wait_addr;
-		}
-	}
-
 	return m;
 }
 
-- 
GitLab


From 0fc9655ec67ec5d4dfd08e469e0e9f0a494bf5bc Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Mon, 13 Oct 2008 20:36:17 -0500
Subject: [PATCH 556/895] 9p: consolidate read/write functions

Currently there are two separate versions of read and write.  One for
dealing with user buffers and the other for dealing with kernel buffers.
There is a tremendous amount of code duplication in the otherwise
identical versions of these functions.  This patch adds an additional
user buffer parameter to read and write and conditionalizes handling of
the buffer on whether the kernel buffer or the user buffer is populated.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/vfs_file.c        |   4 +-
 include/net/9p/client.h |  10 ++-
 net/9p/client.c         | 150 ++++++++--------------------------------
 3 files changed, 34 insertions(+), 130 deletions(-)

diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 52944d2249a4..3819a195de8f 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -136,7 +136,7 @@ v9fs_file_read(struct file *filp, char __user * data, size_t count,
 
 	P9_DPRINTK(P9_DEBUG_VFS, "\n");
 	fid = filp->private_data;
-	ret = p9_client_uread(fid, data, *offset, count);
+	ret = p9_client_read(fid, NULL, data, *offset, count);
 	if (ret > 0)
 		*offset += ret;
 
@@ -164,7 +164,7 @@ v9fs_file_write(struct file *filp, const char __user * data,
 		(int)count, (int)*offset);
 
 	fid = filp->private_data;
-	ret = p9_client_uwrite(fid, data, *offset, count);
+	ret = p9_client_write(fid, NULL, data, *offset, count);
 	if (ret > 0) {
 		invalidate_inode_pages2_range(inode->i_mapping, *offset,
 								*offset+ret);
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index 6a71d9067818..c70a0f0b448d 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -201,13 +201,11 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode,
 							char *extension);
 int p9_client_clunk(struct p9_fid *fid);
 int p9_client_remove(struct p9_fid *fid);
-int p9_client_read(struct p9_fid *fid, char *data, u64 offset, u32 count);
+int p9_client_read(struct p9_fid *fid, char *data, char __user *udata,
+							u64 offset, u32 count);
 int p9_client_readn(struct p9_fid *fid, char *data, u64 offset, u32 count);
-int p9_client_write(struct p9_fid *fid, char *data, u64 offset, u32 count);
-int p9_client_uread(struct p9_fid *fid, char __user *data, u64 offset,
-								u32 count);
-int p9_client_uwrite(struct p9_fid *fid, const char __user *data, u64 offset,
-								u32 count);
+int p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,
+							u64 offset, u32 count);
 struct p9_stat *p9_client_stat(struct p9_fid *fid);
 int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst);
 struct p9_stat *p9_client_dirread(struct p9_fid *fid, u64 offset);
diff --git a/net/9p/client.c b/net/9p/client.c
index 29934febecdb..5fc3aa1eb39b 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1016,7 +1016,9 @@ done:
 }
 EXPORT_SYMBOL(p9_client_remove);
 
-int p9_client_read(struct p9_fid *fid, char *data, u64 offset, u32 count)
+int
+p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
+								u32 count)
 {
 	int err, n, rsize, total;
 	struct p9_fcall *tc, *rc;
@@ -1053,125 +1055,21 @@ int p9_client_read(struct p9_fid *fid, char *data, u64 offset, u32 count)
 		if (n > count)
 			n = count;
 
-		memmove(data, rc->params.rread.data, n);
-		count -= n;
-		data += n;
-		offset += n;
-		total += n;
-		kfree(tc);
-		tc = NULL;
-		kfree(rc);
-		rc = NULL;
-	} while (count > 0 && n == rsize);
-
-	return total;
-
-error:
-	kfree(tc);
-	kfree(rc);
-	return err;
-}
-EXPORT_SYMBOL(p9_client_read);
-
-int p9_client_write(struct p9_fid *fid, char *data, u64 offset, u32 count)
-{
-	int err, n, rsize, total;
-	struct p9_fcall *tc, *rc;
-	struct p9_client *clnt;
-
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu count %d\n", fid->fid,
-					(long long unsigned) offset, count);
-	err = 0;
-	tc = NULL;
-	rc = NULL;
-	clnt = fid->clnt;
-	total = 0;
-
-	rsize = fid->iounit;
-	if (!rsize || rsize > clnt->msize-P9_IOHDRSZ)
-		rsize = clnt->msize - P9_IOHDRSZ;
-
-	do {
-		if (count < rsize)
-			rsize = count;
-
-		tc = p9_create_twrite(fid->fid, offset, rsize, data);
-		if (IS_ERR(tc)) {
-			err = PTR_ERR(tc);
-			tc = NULL;
-			goto error;
+		if (data) {
+			memmove(data, rc->params.rread.data, n);
+			data += n;
 		}
 
-		err = p9_client_rpc(clnt, tc, &rc);
-		if (err)
-			goto error;
-
-		n = rc->params.rread.count;
-		count -= n;
-		data += n;
-		offset += n;
-		total += n;
-		kfree(tc);
-		tc = NULL;
-		kfree(rc);
-		rc = NULL;
-	} while (count > 0);
-
-	return total;
-
-error:
-	kfree(tc);
-	kfree(rc);
-	return err;
-}
-EXPORT_SYMBOL(p9_client_write);
-
-int
-p9_client_uread(struct p9_fid *fid, char __user *data, u64 offset, u32 count)
-{
-	int err, n, rsize, total;
-	struct p9_fcall *tc, *rc;
-	struct p9_client *clnt;
-
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu count %d\n", fid->fid,
-					(long long unsigned) offset, count);
-	err = 0;
-	tc = NULL;
-	rc = NULL;
-	clnt = fid->clnt;
-	total = 0;
-
-	rsize = fid->iounit;
-	if (!rsize || rsize > clnt->msize-P9_IOHDRSZ)
-		rsize = clnt->msize - P9_IOHDRSZ;
-
-	do {
-		if (count < rsize)
-			rsize = count;
-
-		tc = p9_create_tread(fid->fid, offset, rsize);
-		if (IS_ERR(tc)) {
-			err = PTR_ERR(tc);
-			tc = NULL;
-			goto error;
-		}
-
-		err = p9_client_rpc(clnt, tc, &rc);
-		if (err)
-			goto error;
-
-		n = rc->params.rread.count;
-		if (n > count)
-			n = count;
-
-		err = copy_to_user(data, rc->params.rread.data, n);
-		if (err) {
-			err = -EFAULT;
-			goto error;
+		if (udata) {
+			err = copy_to_user(udata, rc->params.rread.data, n);
+			if (err) {
+				err = -EFAULT;
+				goto error;
+			}
+			udata += n;
 		}
 
 		count -= n;
-		data += n;
 		offset += n;
 		total += n;
 		kfree(tc);
@@ -1187,11 +1085,11 @@ error:
 	kfree(rc);
 	return err;
 }
-EXPORT_SYMBOL(p9_client_uread);
+EXPORT_SYMBOL(p9_client_read);
 
 int
-p9_client_uwrite(struct p9_fid *fid, const char __user *data, u64 offset,
-								   u32 count)
+p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,
+							u64 offset, u32 count)
 {
 	int err, n, rsize, total;
 	struct p9_fcall *tc, *rc;
@@ -1213,7 +1111,10 @@ p9_client_uwrite(struct p9_fid *fid, const char __user *data, u64 offset,
 		if (count < rsize)
 			rsize = count;
 
-		tc = p9_create_twrite_u(fid->fid, offset, rsize, data);
+		if (data)
+			tc = p9_create_twrite(fid->fid, offset, rsize, data);
+		else
+			tc = p9_create_twrite_u(fid->fid, offset, rsize, udata);
 		if (IS_ERR(tc)) {
 			err = PTR_ERR(tc);
 			tc = NULL;
@@ -1226,7 +1127,12 @@ p9_client_uwrite(struct p9_fid *fid, const char __user *data, u64 offset,
 
 		n = rc->params.rread.count;
 		count -= n;
-		data += n;
+
+		if (data)
+			data += n;
+		else
+			udata += n;
+
 		offset += n;
 		total += n;
 		kfree(tc);
@@ -1242,7 +1148,7 @@ error:
 	kfree(rc);
 	return err;
 }
-EXPORT_SYMBOL(p9_client_uwrite);
+EXPORT_SYMBOL(p9_client_write);
 
 int p9_client_readn(struct p9_fid *fid, char *data, u64 offset, u32 count)
 {
@@ -1253,7 +1159,7 @@ int p9_client_readn(struct p9_fid *fid, char *data, u64 offset, u32 count)
 	n = 0;
 	total = 0;
 	while (count) {
-		n = p9_client_read(fid, data, offset, count);
+		n = p9_client_read(fid, data, NULL, offset, count);
 		if (n <= 0)
 			break;
 
-- 
GitLab


From fbedadc16e5c888e4df9df3b1757de4993508d35 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Mon, 13 Oct 2008 20:36:16 -0500
Subject: [PATCH 557/895] 9p: move readn meta-function from client to fs layer

There are a couple of methods in the client code which aren't actually
wire operations.  To keep things organized cleaner, these operations are
being moved to the fs layer.

This patch moves the readn meta-function (which executes multiple wire
reads until a buffer is full) to the fs layer.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/v9fs.c            |  2 +-
 fs/9p/v9fs_vfs.h        |  2 ++
 fs/9p/vfs_addr.c        |  5 +---
 fs/9p/vfs_file.c        | 57 ++++++++++++++++++++++++++++++++++++++---
 include/net/9p/client.h |  1 -
 net/9p/client.c         | 26 -------------------
 6 files changed, 57 insertions(+), 36 deletions(-)

diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index b6b85cf01e0d..24eb01087b6d 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -234,7 +234,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
 	if (!v9ses->clnt->dotu)
 		v9ses->flags &= ~V9FS_EXTENDED;
 
-	v9ses->maxdata = v9ses->clnt->msize;
+	v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ;
 
 	/* for legacy mode, fall back to V9FS_ACCESS_ANY */
 	if (!v9fs_extended(v9ses) &&
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 57997fa14e69..046cff377f10 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -52,3 +52,5 @@ int v9fs_file_open(struct inode *inode, struct file *file);
 void v9fs_inode2stat(struct inode *inode, struct p9_stat *stat);
 void v9fs_dentry_release(struct dentry *);
 int v9fs_uflags2omode(int uflags, int extended);
+
+ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64);
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 97d3aed57983..6fcb1e7095cf 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -38,7 +38,6 @@
 
 #include "v9fs.h"
 #include "v9fs_vfs.h"
-#include "fid.h"
 
 /**
  * v9fs_vfs_readpage - read an entire page in from 9P
@@ -53,14 +52,12 @@ static int v9fs_vfs_readpage(struct file *filp, struct page *page)
 	int retval;
 	loff_t offset;
 	char *buffer;
-	struct p9_fid *fid;
 
 	P9_DPRINTK(P9_DEBUG_VFS, "\n");
-	fid = filp->private_data;
 	buffer = kmap(page);
 	offset = page_offset(page);
 
-	retval = p9_client_readn(fid, buffer, offset, PAGE_CACHE_SIZE);
+	retval = v9fs_file_readn(filp, buffer, NULL, offset, PAGE_CACHE_SIZE);
 	if (retval < 0)
 		goto done;
 
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 3819a195de8f..4d6d7657fb75 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -120,23 +120,72 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
 }
 
 /**
- * v9fs_file_read - read from a file
+ * v9fs_file_readn - read from a file
  * @filp: file pointer to read
  * @data: data buffer to read data into
+ * @udata: user data buffer to read data into
  * @count: size of buffer
  * @offset: offset at which to read data
  *
  */
+
+ssize_t
+v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
+	       u64 offset)
+{
+	int n, total;
+	struct p9_fid *fid = filp->private_data;
+
+	P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu count %d\n", fid->fid,
+					(long long unsigned) offset, count);
+
+	n = 0;
+	total = 0;
+	do {
+		n = p9_client_read(fid, data, udata, offset, count);
+		if (n <= 0)
+			break;
+
+		if (data)
+			data += n;
+		if (udata)
+			udata += n;
+
+		offset += n;
+		count -= n;
+		total += n;
+	} while (count > 0 && n == (fid->clnt->msize - P9_IOHDRSZ));
+
+	if (n < 0)
+		total = n;
+
+	return total;
+}
+
+/**
+ * v9fs_file_read - read from a file
+ * @filp: file pointer to read
+ * @udata: user data buffer to read data into
+ * @count: size of buffer
+ * @offset: offset at which to read data
+ *
+ */
+
 static ssize_t
-v9fs_file_read(struct file *filp, char __user * data, size_t count,
+v9fs_file_read(struct file *filp, char __user *udata, size_t count,
 	       loff_t * offset)
 {
 	int ret;
 	struct p9_fid *fid;
 
-	P9_DPRINTK(P9_DEBUG_VFS, "\n");
+	P9_DPRINTK(P9_DEBUG_VFS, "count %d offset %lld\n", count, *offset);
 	fid = filp->private_data;
-	ret = p9_client_read(fid, NULL, data, *offset, count);
+
+	if (count > (fid->clnt->msize - P9_IOHDRSZ))
+		ret = v9fs_file_readn(filp, NULL, udata, count, *offset);
+	else
+		ret = p9_client_read(fid, NULL, udata, *offset, count);
+
 	if (ret > 0)
 		*offset += ret;
 
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index c70a0f0b448d..bb8b0ede132d 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -203,7 +203,6 @@ int p9_client_clunk(struct p9_fid *fid);
 int p9_client_remove(struct p9_fid *fid);
 int p9_client_read(struct p9_fid *fid, char *data, char __user *udata,
 							u64 offset, u32 count);
-int p9_client_readn(struct p9_fid *fid, char *data, u64 offset, u32 count);
 int p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,
 							u64 offset, u32 count);
 struct p9_stat *p9_client_stat(struct p9_fid *fid);
diff --git a/net/9p/client.c b/net/9p/client.c
index 5fc3aa1eb39b..d5ea042eabb0 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1150,32 +1150,6 @@ error:
 }
 EXPORT_SYMBOL(p9_client_write);
 
-int p9_client_readn(struct p9_fid *fid, char *data, u64 offset, u32 count)
-{
-	int n, total;
-
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu count %d\n", fid->fid,
-					(long long unsigned) offset, count);
-	n = 0;
-	total = 0;
-	while (count) {
-		n = p9_client_read(fid, data, NULL, offset, count);
-		if (n <= 0)
-			break;
-
-		data += n;
-		offset += n;
-		count -= n;
-		total += n;
-	}
-
-	if (n < 0)
-		total = n;
-
-	return total;
-}
-EXPORT_SYMBOL(p9_client_readn);
-
 static struct p9_stat *p9_clone_stat(struct p9_stat *st, int dotu)
 {
 	int n;
-- 
GitLab


From dfb0ec2e13a906ff19a0bbfa9208caab50cfc2e3 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Mon, 13 Oct 2008 20:36:16 -0500
Subject: [PATCH 558/895] 9p: adjust 9p vfs write operation

Currently, the 9p net wire operation ensures that all data is sent by sending
multiple packets if the data requested is larger than the msize.  This is
better handled in the vfs code so that we can simplify wire operations to
being concerned with only putting data onto and taking data off of the wire.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/vfs_file.c | 36 +++++++++++++++++++++++++++++-------
 1 file changed, 29 insertions(+), 7 deletions(-)

diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 4d6d7657fb75..3fd28bbafc87 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -205,19 +205,38 @@ static ssize_t
 v9fs_file_write(struct file *filp, const char __user * data,
 		size_t count, loff_t * offset)
 {
-	int ret;
+	int n, rsize, total = 0;
 	struct p9_fid *fid;
+	struct p9_client *clnt;
 	struct inode *inode = filp->f_path.dentry->d_inode;
+	int origin = *offset;
 
 	P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data,
 		(int)count, (int)*offset);
 
 	fid = filp->private_data;
-	ret = p9_client_write(fid, NULL, data, *offset, count);
-	if (ret > 0) {
-		invalidate_inode_pages2_range(inode->i_mapping, *offset,
-								*offset+ret);
-		*offset += ret;
+	clnt = fid->clnt;
+
+	rsize = fid->iounit;
+	if (!rsize || rsize > clnt->msize-P9_IOHDRSZ)
+		rsize = clnt->msize - P9_IOHDRSZ;
+
+	do {
+		if (count < rsize)
+			rsize = count;
+
+		n = p9_client_write(fid, NULL, data+total, *offset+total,
+									rsize);
+		if (n <= 0)
+			break;
+		count -= n;
+		total += n;
+	} while (count > 0);
+
+	if (total > 0) {
+		invalidate_inode_pages2_range(inode->i_mapping, origin,
+								origin+total);
+		*offset += total;
 	}
 
 	if (*offset > inode->i_size) {
@@ -225,7 +244,10 @@ v9fs_file_write(struct file *filp, const char __user * data,
 		inode->i_blocks = (inode->i_size + 512 - 1) >> 9;
 	}
 
-	return ret;
+	if (n < 0)
+		return n;
+
+	return total;
 }
 
 static const struct file_operations v9fs_cached_file_operations = {
-- 
GitLab


From 06b55b464ee5b305aca75cb7d9424b184bf07f68 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Mon, 13 Oct 2008 20:36:15 -0500
Subject: [PATCH 559/895] 9p: move dirread to fs layer

Currently reading a directory is implemented in the client code.
This function is not actually a wire operation, but a meta operation
which calls read operations and processes the results.

This patch moves this functionality to the fs layer and calls component
wire operations instead of constructing their packets.  This provides a
cleaner separation and will help when we reorganize the client functions
and protocol processing methods.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/vfs_dir.c         |  54 ++++++++++++++-------
 include/net/9p/client.h |   5 --
 net/9p/client.c         | 103 ----------------------------------------
 3 files changed, 38 insertions(+), 124 deletions(-)

diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index e298fe194093..d7d0ac5a2ca3 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -69,32 +69,54 @@ static inline int dt_type(struct p9_stat *mistat)
 static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
 	int over;
+	struct p9_stat st;
+	int err;
 	struct p9_fid *fid;
-	struct v9fs_session_info *v9ses;
-	struct inode *inode;
-	struct p9_stat *st;
+	int buflen;
+	char *statbuf;
+	int n, i = 0;
 
 	P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name);
-	inode = filp->f_path.dentry->d_inode;
-	v9ses = v9fs_inode2v9ses(inode);
 	fid = filp->private_data;
-	while ((st = p9_client_dirread(fid, filp->f_pos)) != NULL) {
-		if (IS_ERR(st))
-			return PTR_ERR(st);
 
-		over = filldir(dirent, st->name.str, st->name.len, filp->f_pos,
-			v9fs_qid2ino(&st->qid), dt_type(st));
+	buflen = fid->clnt->msize - P9_IOHDRSZ;
+	statbuf = kmalloc(buflen, GFP_KERNEL);
+	if (!statbuf)
+		return -ENOMEM;
 
-		if (over)
+	while (1) {
+		err = v9fs_file_readn(filp, statbuf, NULL, fid->rdir_fpos,
+									buflen);
+		if (err <= 0)
 			break;
 
-		filp->f_pos += st->size;
-		kfree(st);
-		st = NULL;
+		n = err;
+		while (i < n) {
+			err = p9_deserialize_stat(statbuf + i, buflen-i, &st,
+							fid->clnt->dotu);
+			if (!err) {
+				err = -EIO;
+				goto free_and_exit;
+			}
+
+			i += err;
+			fid->rdir_fpos += err;
+
+			over = filldir(dirent, st.name.str, st.name.len,
+			    filp->f_pos, v9fs_qid2ino(&st.qid), dt_type(&st));
+
+			filp->f_pos += st.size;
+
+			if (over) {
+				err = 0;
+				goto free_and_exit;
+			}
+		}
 	}
 
-	kfree(st);
-	return 0;
+free_and_exit:
+	kfree(statbuf);
+	return err;
 }
 
 
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index bb8b0ede132d..eeb7d922816e 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -163,8 +163,6 @@ struct p9_client {
  * @uid: the numeric uid of the local user who owns this handle
  * @aux: transport specific information (unused?)
  * @rdir_fpos: tracks offset of file position when reading directory contents
- * @rdir_pos: (unused?)
- * @rdir_fcall: holds response of last directory read request
  * @flist: per-client-instance fid tracking
  * @dlist: per-dentry fid tracking
  *
@@ -181,8 +179,6 @@ struct p9_fid {
 	void *aux;
 
 	int rdir_fpos;
-	int rdir_pos;
-	struct p9_fcall *rdir_fcall;
 	struct list_head flist;
 	struct list_head dlist;	/* list of all fids attached to a dentry */
 };
@@ -207,7 +203,6 @@ int p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,
 							u64 offset, u32 count);
 struct p9_stat *p9_client_stat(struct p9_fid *fid);
 int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst);
-struct p9_stat *p9_client_dirread(struct p9_fid *fid, u64 offset);
 
 struct p9_req_t *p9_tag_lookup(struct p9_client *, u16);
 void p9_client_cb(struct p9_client *c, struct p9_req_t *req);
diff --git a/net/9p/client.c b/net/9p/client.c
index d5ea042eabb0..90ee9efeede3 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -559,8 +559,6 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt)
 	memset(&fid->qid, 0, sizeof(struct p9_qid));
 	fid->mode = -1;
 	fid->rdir_fpos = 0;
-	fid->rdir_pos = 0;
-	fid->rdir_fcall = NULL;
 	fid->uid = current->fsuid;
 	fid->clnt = clnt;
 	fid->aux = NULL;
@@ -586,7 +584,6 @@ static void p9_fid_destroy(struct p9_fid *fid)
 	spin_lock(&clnt->lock);
 	list_del(&fid->flist);
 	spin_unlock(&clnt->lock);
-	kfree(fid->rdir_fcall);
 	kfree(fid);
 }
 
@@ -1261,103 +1258,3 @@ done:
 	return err;
 }
 EXPORT_SYMBOL(p9_client_wstat);
-
-struct p9_stat *p9_client_dirread(struct p9_fid *fid, u64 offset)
-{
-	int err, n, m;
-	struct p9_fcall *tc, *rc;
-	struct p9_client *clnt;
-	struct p9_stat st, *ret;
-
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu\n", fid->fid,
-						(long long unsigned) offset);
-	err = 0;
-	tc = NULL;
-	rc = NULL;
-	ret = NULL;
-	clnt = fid->clnt;
-
-	/* if the offset is below or above the current response, free it */
-	if (offset < fid->rdir_fpos || (fid->rdir_fcall &&
-		offset >= fid->rdir_fpos+fid->rdir_fcall->params.rread.count)) {
-		fid->rdir_pos = 0;
-		if (fid->rdir_fcall)
-			fid->rdir_fpos += fid->rdir_fcall->params.rread.count;
-
-		kfree(fid->rdir_fcall);
-		fid->rdir_fcall = NULL;
-		if (offset < fid->rdir_fpos)
-			fid->rdir_fpos = 0;
-	}
-
-	if (!fid->rdir_fcall) {
-		n = fid->iounit;
-		if (!n || n > clnt->msize-P9_IOHDRSZ)
-			n = clnt->msize - P9_IOHDRSZ;
-
-		while (1) {
-			if (fid->rdir_fcall) {
-				fid->rdir_fpos +=
-					fid->rdir_fcall->params.rread.count;
-				kfree(fid->rdir_fcall);
-				fid->rdir_fcall = NULL;
-			}
-
-			tc = p9_create_tread(fid->fid, fid->rdir_fpos, n);
-			if (IS_ERR(tc)) {
-				err = PTR_ERR(tc);
-				tc = NULL;
-				goto error;
-			}
-
-			err = p9_client_rpc(clnt, tc, &rc);
-			if (err)
-				goto error;
-
-			n = rc->params.rread.count;
-			if (n == 0)
-				goto done;
-
-			fid->rdir_fcall = rc;
-			rc = NULL;
-			if (offset >= fid->rdir_fpos &&
-						offset < fid->rdir_fpos+n)
-				break;
-		}
-
-		fid->rdir_pos = 0;
-	}
-
-	m = offset - fid->rdir_fpos;
-	if (m < 0)
-		goto done;
-
-	n = p9_deserialize_stat(fid->rdir_fcall->params.rread.data + m,
-		fid->rdir_fcall->params.rread.count - m, &st, clnt->dotu);
-
-	if (!n) {
-		err = -EIO;
-		goto error;
-	}
-
-	fid->rdir_pos += n;
-	st.size = n;
-	ret = p9_clone_stat(&st, clnt->dotu);
-	if (IS_ERR(ret)) {
-		err = PTR_ERR(ret);
-		ret = NULL;
-		goto error;
-	}
-
-done:
-	kfree(tc);
-	kfree(rc);
-	return ret;
-
-error:
-	kfree(tc);
-	kfree(rc);
-	kfree(ret);
-	return ERR_PTR(err);
-}
-EXPORT_SYMBOL(p9_client_dirread);
-- 
GitLab


From 6936bf60d2c407449c09e3f28ec0301e1f937106 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Mon, 13 Oct 2008 20:36:14 -0500
Subject: [PATCH 560/895] 9p: encapsulate version function

Alsmot all 9P client wire functions have their own (set of) functions.
Tversion is an exception as its encapsulated into the client_create code.

This patch moves the protocol specifics of this to a function to match the
rest of the code.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c | 74 +++++++++++++++++++++++++++++--------------------
 1 file changed, 44 insertions(+), 30 deletions(-)

diff --git a/net/9p/client.c b/net/9p/client.c
index 90ee9efeede3..a9982df00a3a 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -587,16 +587,56 @@ static void p9_fid_destroy(struct p9_fid *fid)
 	kfree(fid);
 }
 
-struct p9_client *p9_client_create(const char *dev_name, char *options)
+static int p9_client_version(struct p9_client *clnt)
 {
-	int err, n;
-	struct p9_client *clnt;
+	int err = 0;
 	struct p9_fcall *tc, *rc;
 	struct p9_str *version;
 
+	P9_DPRINTK(P9_DEBUG_9P, "%p\n", clnt);
 	err = 0;
 	tc = NULL;
 	rc = NULL;
+
+	tc = p9_create_tversion(clnt->msize,
+					clnt->dotu ? "9P2000.u" : "9P2000");
+	if (IS_ERR(tc)) {
+		err = PTR_ERR(tc);
+		tc = NULL;
+		goto error;
+	}
+
+	err = p9_client_rpc(clnt, tc, &rc);
+	if (err)
+		goto error;
+
+	version = &rc->params.rversion.version;
+	if (version->len == 8 && !memcmp(version->str, "9P2000.u", 8))
+		clnt->dotu = 1;
+	else if (version->len == 6 && !memcmp(version->str, "9P2000", 6))
+		clnt->dotu = 0;
+	else {
+		err = -EREMOTEIO;
+		goto error;
+	}
+
+	if (rc->params.rversion.msize < clnt->msize)
+		clnt->msize = rc->params.rversion.msize;
+
+error:
+	kfree(tc);
+	kfree(rc);
+
+	return err;
+}
+EXPORT_SYMBOL(p9_client_auth);
+
+struct p9_client *p9_client_create(const char *dev_name, char *options)
+{
+	int err;
+	struct p9_client *clnt;
+
+	err = 0;
 	clnt = kmalloc(sizeof(struct p9_client), GFP_KERNEL);
 	if (!clnt)
 		return ERR_PTR(-ENOMEM);
@@ -628,7 +668,6 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
 	P9_DPRINTK(P9_DEBUG_9P, "clnt %p trans %p msize %d dotu %d\n",
 		clnt, clnt->trans_mod, clnt->msize, clnt->dotu);
 
-
 	err = clnt->trans_mod->create(clnt, dev_name, options);
 	if (err)
 		goto error;
@@ -636,38 +675,13 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
 	if ((clnt->msize+P9_IOHDRSZ) > clnt->trans_mod->maxsize)
 		clnt->msize = clnt->trans_mod->maxsize-P9_IOHDRSZ;
 
-	tc = p9_create_tversion(clnt->msize, clnt->dotu?"9P2000.u":"9P2000");
-	if (IS_ERR(tc)) {
-		err = PTR_ERR(tc);
-		tc = NULL;
-		goto error;
-	}
-
-	err = p9_client_rpc(clnt, tc, &rc);
+	err = p9_client_version(clnt);
 	if (err)
 		goto error;
 
-	version = &rc->params.rversion.version;
-	if (version->len == 8 && !memcmp(version->str, "9P2000.u", 8))
-		clnt->dotu = 1;
-	else if (version->len == 6 && !memcmp(version->str, "9P2000", 6))
-		clnt->dotu = 0;
-	else {
-		err = -EREMOTEIO;
-		goto error;
-	}
-
-	n = rc->params.rversion.msize;
-	if (n < clnt->msize)
-		clnt->msize = n;
-
-	kfree(tc);
-	kfree(rc);
 	return clnt;
 
 error:
-	kfree(tc);
-	kfree(rc);
 	p9_client_destroy(clnt);
 	return ERR_PTR(err);
 }
-- 
GitLab


From ace51c4dd2f968f427c4627023759ae7e3786cba Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Mon, 13 Oct 2008 20:40:27 -0500
Subject: [PATCH 561/895] 9p: add new protocol support code

This adds a new protocol processing support code based on Anthony Liguori's
9p library code.  This code performs protocol marshalling/unmarshalling using
printf like strings to represent protocol elements.  It is my intent to use
them to replace the current functions in conv.c as well as the
p9_create_* functions.

This should make the client implementation much more clear, and also make it
much easier to add new protocol extensions by limiting the number of places
in which changes need to be made.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 include/net/9p/9p.h |   8 +-
 net/9p/Makefile     |   1 +
 net/9p/protocol.c   | 457 ++++++++++++++++++++++++++++++++++++++++++++
 net/9p/protocol.h   |  31 +++
 4 files changed, 496 insertions(+), 1 deletion(-)
 create mode 100644 net/9p/protocol.c
 create mode 100644 net/9p/protocol.h

diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index fb163e2e0de6..f9e25268b70f 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -509,6 +509,8 @@ struct p9_rwstat {
  * @size: prefixed length of the structure
  * @id: protocol operating identifier of type &p9_msg_t
  * @tag: transaction id of the request
+ * @offset: used by marshalling routines to track currentposition in buffer
+ * @capacity: used by marshalling routines to track total capacity
  * @sdata: payload
  * @params: per-operation parameters
  *
@@ -523,7 +525,11 @@ struct p9_fcall {
 	u32 size;
 	u8 id;
 	u16 tag;
-	void *sdata;
+
+	size_t offset;
+	size_t capacity;
+
+	uint8_t *sdata;
 
 	union {
 		struct p9_tversion tversion;
diff --git a/net/9p/Makefile b/net/9p/Makefile
index 519219480db1..84c23499a293 100644
--- a/net/9p/Makefile
+++ b/net/9p/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o
 	error.o \
 	fcprint.o \
 	util.o \
+	protocol.o \
 	trans_fd.o \
 
 9pnet_virtio-objs := \
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
new file mode 100644
index 000000000000..43e98220e9a4
--- /dev/null
+++ b/net/9p/protocol.c
@@ -0,0 +1,457 @@
+/*
+ * net/9p/protocol.c
+ *
+ * 9P Protocol Support Code
+ *
+ *  Copyright (C) 2008 by Eric Van Hensbergen <ericvh@gmail.com>
+ *
+ *  Base on code from Anthony Liguori <aliguori@us.ibm.com>
+ *  Copyright (C) 2008 by IBM, Corp.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
+#include "protocol.h"
+
+#ifndef MIN
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+#endif
+
+#ifndef MAX
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+#endif
+
+#ifndef offset_of
+#define offset_of(type, memb) \
+	((unsigned long)(&((type *)0)->memb))
+#endif
+#ifndef container_of
+#define container_of(obj, type, memb) \
+	((type *)(((char *)obj) - offset_of(type, memb)))
+#endif
+
+static int
+p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...);
+
+void p9stat_free(struct p9_wstat *stbuf)
+{
+	kfree(stbuf->name);
+	kfree(stbuf->uid);
+	kfree(stbuf->gid);
+	kfree(stbuf->muid);
+	kfree(stbuf->extension);
+}
+EXPORT_SYMBOL(p9stat_free);
+
+static size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size)
+{
+	size_t len = MIN(pdu->size - pdu->offset, size);
+	memcpy(data, &pdu->sdata[pdu->offset], len);
+	pdu->offset += len;
+	return size - len;
+}
+
+static size_t pdu_write(struct p9_fcall *pdu, const void *data, size_t size)
+{
+	size_t len = MIN(pdu->capacity - pdu->size, size);
+	memcpy(&pdu->sdata[pdu->size], data, len);
+	pdu->size += len;
+	return size - len;
+}
+
+/*
+	b - int8_t
+	w - int16_t
+	d - int32_t
+	q - int64_t
+	s - string
+	S - stat
+	Q - qid
+	D - data blob (int32_t size followed by void *, results are not freed)
+	T - array of strings (int16_t count, followed by strings)
+	R - array of qids (int16_t count, followed by qids)
+	? - if optional = 1, continue parsing
+*/
+
+static int
+p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
+{
+	const char *ptr;
+	int errcode = 0;
+
+	for (ptr = fmt; *ptr; ptr++) {
+		switch (*ptr) {
+		case 'b':{
+				int8_t *val = va_arg(ap, int8_t *);
+				if (pdu_read(pdu, val, sizeof(*val))) {
+					errcode = -EFAULT;
+					break;
+				}
+			}
+			break;
+		case 'w':{
+				int16_t *val = va_arg(ap, int16_t *);
+				if (pdu_read(pdu, val, sizeof(*val))) {
+					errcode = -EFAULT;
+					break;
+				}
+				*val = cpu_to_le16(*val);
+			}
+			break;
+		case 'd':{
+				int32_t *val = va_arg(ap, int32_t *);
+				if (pdu_read(pdu, val, sizeof(*val))) {
+					errcode = -EFAULT;
+					break;
+				}
+				*val = cpu_to_le32(*val);
+			}
+			break;
+		case 'q':{
+				int64_t *val = va_arg(ap, int64_t *);
+				if (pdu_read(pdu, val, sizeof(*val))) {
+					errcode = -EFAULT;
+					break;
+				}
+				*val = cpu_to_le64(*val);
+			}
+			break;
+		case 's':{
+				char **ptr = va_arg(ap, char **);
+				int16_t len;
+				int size;
+
+				errcode = p9pdu_readf(pdu, optional, "w", &len);
+				if (errcode)
+					break;
+
+				size = MAX(len, 0);
+
+				*ptr = kmalloc(size + 1, GFP_KERNEL);
+				if (*ptr == NULL) {
+					errcode = -EFAULT;
+					break;
+				}
+				if (pdu_read(pdu, *ptr, size)) {
+					errcode = -EFAULT;
+					kfree(*ptr);
+					*ptr = NULL;
+				} else
+					(*ptr)[size] = 0;
+			}
+			break;
+		case 'Q':{
+				struct p9_qid *qid =
+				    va_arg(ap, struct p9_qid *);
+
+				errcode = p9pdu_readf(pdu, optional, "bdq",
+						      &qid->type, &qid->version,
+						      &qid->path);
+			}
+			break;
+		case 'S':{
+				struct p9_wstat *stbuf =
+				    va_arg(ap, struct p9_wstat *);
+
+				stbuf->extension = NULL;
+				stbuf->n_uid = stbuf->n_gid = stbuf->n_muid =
+				    -1;
+
+				errcode =
+				    p9pdu_readf(pdu, optional,
+						"wwdQdddqssss?sddd",
+						&stbuf->size, &stbuf->type,
+						&stbuf->dev, &stbuf->qid,
+						&stbuf->mode, &stbuf->atime,
+						&stbuf->mtime, &stbuf->length,
+						&stbuf->name, &stbuf->uid,
+						&stbuf->gid, &stbuf->muid,
+						&stbuf->extension,
+						&stbuf->n_uid, &stbuf->n_gid,
+						&stbuf->n_muid);
+				if (errcode)
+					p9stat_free(stbuf);
+			}
+			break;
+		case 'D':{
+				int32_t *count = va_arg(ap, int32_t *);
+				void **data = va_arg(ap, void **);
+
+				errcode =
+				    p9pdu_readf(pdu, optional, "d", count);
+				if (!errcode) {
+					*count =
+					    MIN(*count,
+						pdu->size - pdu->offset);
+					*data = &pdu->sdata[pdu->offset];
+				}
+			}
+			break;
+		case 'T':{
+				int16_t *nwname = va_arg(ap, int16_t *);
+				char ***wnames = va_arg(ap, char ***);
+
+				errcode =
+				    p9pdu_readf(pdu, optional, "w", nwname);
+				if (!errcode) {
+					*wnames =
+					    kmalloc(sizeof(char *) * *nwname,
+						    GFP_KERNEL);
+					if (!*wnames)
+						errcode = -ENOMEM;
+				}
+
+				if (!errcode) {
+					int i;
+
+					for (i = 0; i < *nwname; i++) {
+						errcode =
+						    p9pdu_readf(pdu, optional,
+								"s",
+								&(*wnames)[i]);
+						if (errcode)
+							break;
+					}
+				}
+
+				if (errcode) {
+					if (*wnames) {
+						int i;
+
+						for (i = 0; i < *nwname; i++)
+							kfree((*wnames)[i]);
+					}
+					kfree(*wnames);
+					*wnames = NULL;
+				}
+			}
+			break;
+		case 'R':{
+				int16_t *nwqid = va_arg(ap, int16_t *);
+				struct p9_qid **wqids =
+				    va_arg(ap, struct p9_qid **);
+
+				*wqids = NULL;
+
+				errcode =
+				    p9pdu_readf(pdu, optional, "w", nwqid);
+				if (!errcode) {
+					*wqids =
+					    kmalloc(*nwqid *
+						    sizeof(struct p9_qid),
+						    GFP_KERNEL);
+					if (*wqids == NULL)
+						errcode = -ENOMEM;
+				}
+
+				if (!errcode) {
+					int i;
+
+					for (i = 0; i < *nwqid; i++) {
+						errcode =
+						    p9pdu_readf(pdu, optional,
+								"Q",
+								&(*wqids)[i]);
+						if (errcode)
+							break;
+					}
+				}
+
+				if (errcode) {
+					kfree(*wqids);
+					*wqids = NULL;
+				}
+			}
+			break;
+		case '?':
+			if (!optional)
+				return 0;
+			break;
+		default:
+			BUG();
+			break;
+		}
+
+		if (errcode)
+			break;
+	}
+
+	return errcode;
+}
+
+int
+p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
+{
+	const char *ptr;
+	int errcode = 0;
+
+	for (ptr = fmt; *ptr; ptr++) {
+		switch (*ptr) {
+		case 'b':{
+				int8_t val = va_arg(ap, int);
+				if (pdu_write(pdu, &val, sizeof(val)))
+					errcode = -EFAULT;
+			}
+			break;
+		case 'w':{
+				int16_t val = va_arg(ap, int);
+				if (pdu_write(pdu, &val, sizeof(val)))
+					errcode = -EFAULT;
+			}
+			break;
+		case 'd':{
+				int32_t val = va_arg(ap, int32_t);
+				if (pdu_write(pdu, &val, sizeof(val)))
+					errcode = -EFAULT;
+			}
+			break;
+		case 'q':{
+				int64_t val = va_arg(ap, int64_t);
+				if (pdu_write(pdu, &val, sizeof(val)))
+					errcode = -EFAULT;
+			}
+			break;
+		case 's':{
+				const char *ptr = va_arg(ap, const char *);
+				int16_t len = 0;
+
+				if (ptr)
+					len = MIN(strlen(ptr), USHORT_MAX);
+
+				errcode = p9pdu_writef(pdu, optional, "w", len);
+				if (!errcode && pdu_write(pdu, ptr, len))
+					errcode = -EFAULT;
+			}
+			break;
+		case 'Q':{
+				const struct p9_qid *qid =
+				    va_arg(ap, const struct p9_qid *);
+				errcode =
+				    p9pdu_writef(pdu, optional, "bdq",
+						 qid->type, qid->version,
+						 qid->path);
+			} break;
+		case 'S':{
+				const struct p9_wstat *stbuf =
+				    va_arg(ap, const struct p9_wstat *);
+				errcode =
+				    p9pdu_writef(pdu, optional,
+						 "wwdQdddqssss?sddd",
+						 stbuf->size, stbuf->type,
+						 stbuf->dev, stbuf->qid,
+						 stbuf->mode, stbuf->atime,
+						 stbuf->mtime, stbuf->length,
+						 stbuf->name, stbuf->uid,
+						 stbuf->gid, stbuf->muid,
+						 stbuf->extension, stbuf->n_uid,
+						 stbuf->n_gid, stbuf->n_muid);
+			} break;
+		case 'D':{
+				int32_t count = va_arg(ap, int32_t);
+				const void *data = va_arg(ap, const void *);
+
+				errcode =
+				    p9pdu_writef(pdu, optional, "d", count);
+				if (!errcode && pdu_write(pdu, data, count))
+					errcode = -EFAULT;
+			}
+			break;
+		case 'T':{
+				int16_t nwname = va_arg(ap, int);
+				const char **wnames = va_arg(ap, const char **);
+
+				errcode =
+				    p9pdu_writef(pdu, optional, "w", nwname);
+				if (!errcode) {
+					int i;
+
+					for (i = 0; i < nwname; i++) {
+						errcode =
+						    p9pdu_writef(pdu, optional,
+								 "s",
+								 wnames[i]);
+						if (errcode)
+							break;
+					}
+				}
+			}
+			break;
+		case 'R':{
+				int16_t nwqid = va_arg(ap, int);
+				struct p9_qid *wqids =
+				    va_arg(ap, struct p9_qid *);
+
+				errcode =
+				    p9pdu_writef(pdu, optional, "w", nwqid);
+				if (!errcode) {
+					int i;
+
+					for (i = 0; i < nwqid; i++) {
+						errcode =
+						    p9pdu_writef(pdu, optional,
+								 "Q",
+								 &wqids[i]);
+						if (errcode)
+							break;
+					}
+				}
+			}
+			break;
+		case '?':
+			if (!optional)
+				return 0;
+			break;
+		default:
+			BUG();
+			break;
+		}
+
+		if (errcode)
+			break;
+	}
+
+	return errcode;
+}
+
+int p9pdu_readf(struct p9_fcall *pdu, int optional, const char *fmt, ...)
+{
+	va_list ap;
+	int ret;
+
+	va_start(ap, fmt);
+	ret = p9pdu_vreadf(pdu, optional, fmt, ap);
+	va_end(ap);
+
+	return ret;
+}
+
+static int
+p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...)
+{
+	va_list ap;
+	int ret;
+
+	va_start(ap, fmt);
+	ret = p9pdu_vwritef(pdu, optional, fmt, ap);
+	va_end(ap);
+
+	return ret;
+}
diff --git a/net/9p/protocol.h b/net/9p/protocol.h
new file mode 100644
index 000000000000..596ee10d506f
--- /dev/null
+++ b/net/9p/protocol.h
@@ -0,0 +1,31 @@
+/*
+ * net/9p/protocol.h
+ *
+ * 9P Protocol Support Code
+ *
+ *  Copyright (C) 2008 by Eric Van Hensbergen <ericvh@gmail.com>
+ *
+ *  Base on code from Anthony Liguori <aliguori@us.ibm.com>
+ *  Copyright (C) 2008 by IBM, Corp.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+int
+p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap);
+
+int p9pdu_readf(struct p9_fcall *pdu, int optional, const char *fmt, ...);
-- 
GitLab


From 51d71f9f7a639c8a39401de1ec5ce9b0b6476c99 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Thu, 16 Oct 2008 08:29:31 -0500
Subject: [PATCH 562/895] 9p: remove 9p fcall debug prints

One of the current debug options allows users to get a verbose dump of fcalls.
This isn't really necessary as correctly parsed protocol frames can be printed
as part of the code in the client functions.  The consolidated printfcalls
structure would require new entries to be added for every extension.  This
patch removes the debug print methods and their use.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 include/net/9p/9p.h |   1 -
 net/9p/Makefile     |   1 -
 net/9p/client.c     |   9 --
 net/9p/fcprint.c    | 366 --------------------------------------------
 net/9p/trans_fd.c   |   9 --
 5 files changed, 386 deletions(-)
 delete mode 100644 net/9p/fcprint.c

diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index f9e25268b70f..46d0b8f8e5ec 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -590,7 +590,6 @@ struct p9_fcall *p9_create_tstat(u32 fid);
 struct p9_fcall *p9_create_twstat(u32 fid, struct p9_wstat *wstat,
 	int dotu);
 
-int p9_printfcall(char *buf, int buflen, struct p9_fcall *fc, int dotu);
 int p9_errstr2errno(char *errstr, int len);
 
 struct p9_idpool *p9_idpool_create(void);
diff --git a/net/9p/Makefile b/net/9p/Makefile
index 84c23499a293..c3302d88b808 100644
--- a/net/9p/Makefile
+++ b/net/9p/Makefile
@@ -6,7 +6,6 @@ obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o
 	client.o \
 	conv.o \
 	error.o \
-	fcprint.o \
 	util.o \
 	protocol.o \
 	trans_fd.o \
diff --git a/net/9p/client.c b/net/9p/client.c
index a9982df00a3a..6004fded6682 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -502,15 +502,6 @@ again:
 		goto reterr;
 	}
 
-#ifdef CONFIG_NET_9P_DEBUG
-	if ((p9_debug_level&P9_DEBUG_FCALL) == P9_DEBUG_FCALL) {
-		char buf[150];
-
-		p9_printfcall(buf, sizeof(buf), req->rc, c->dotu);
-		printk(KERN_NOTICE ">>> %p %s\n", c, buf);
-	}
-#endif
-
 	if (req->rc->id == P9_RERROR) {
 		int ecode = req->rc->params.rerror.errno;
 		struct p9_str *ename = &req->rc->params.rerror.error;
diff --git a/net/9p/fcprint.c b/net/9p/fcprint.c
deleted file mode 100644
index 53dd8e28dd8a..000000000000
--- a/net/9p/fcprint.c
+++ /dev/null
@@ -1,366 +0,0 @@
-/*
- *  net/9p/fcprint.c
- *
- *  Print 9P call.
- *
- *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/idr.h>
-#include <net/9p/9p.h>
-
-#ifdef CONFIG_NET_9P_DEBUG
-
-static int
-p9_printqid(char *buf, int buflen, struct p9_qid *q)
-{
-	int n;
-	char b[10];
-
-	n = 0;
-	if (q->type & P9_QTDIR)
-		b[n++] = 'd';
-	if (q->type & P9_QTAPPEND)
-		b[n++] = 'a';
-	if (q->type & P9_QTAUTH)
-		b[n++] = 'A';
-	if (q->type & P9_QTEXCL)
-		b[n++] = 'l';
-	if (q->type & P9_QTTMP)
-		b[n++] = 't';
-	if (q->type & P9_QTSYMLINK)
-		b[n++] = 'L';
-	b[n] = '\0';
-
-	return scnprintf(buf, buflen, "(%.16llx %x %s)",
-					(long long int) q->path, q->version, b);
-}
-
-static int
-p9_printperm(char *buf, int buflen, int perm)
-{
-	int n;
-	char b[15];
-
-	n = 0;
-	if (perm & P9_DMDIR)
-		b[n++] = 'd';
-	if (perm & P9_DMAPPEND)
-		b[n++] = 'a';
-	if (perm & P9_DMAUTH)
-		b[n++] = 'A';
-	if (perm & P9_DMEXCL)
-		b[n++] = 'l';
-	if (perm & P9_DMTMP)
-		b[n++] = 't';
-	if (perm & P9_DMDEVICE)
-		b[n++] = 'D';
-	if (perm & P9_DMSOCKET)
-		b[n++] = 'S';
-	if (perm & P9_DMNAMEDPIPE)
-		b[n++] = 'P';
-	if (perm & P9_DMSYMLINK)
-		b[n++] = 'L';
-	b[n] = '\0';
-
-	return scnprintf(buf, buflen, "%s%03o", b, perm&077);
-}
-
-static int
-p9_printstat(char *buf, int buflen, struct p9_stat *st, int extended)
-{
-	int n;
-
-	n = scnprintf(buf, buflen, "'%.*s' '%.*s'", st->name.len,
-		st->name.str, st->uid.len, st->uid.str);
-	if (extended)
-		n += scnprintf(buf+n, buflen-n, "(%d)", st->n_uid);
-
-	n += scnprintf(buf+n, buflen-n, " '%.*s'", st->gid.len, st->gid.str);
-	if (extended)
-		n += scnprintf(buf+n, buflen-n, "(%d)", st->n_gid);
-
-	n += scnprintf(buf+n, buflen-n, " '%.*s'", st->muid.len, st->muid.str);
-	if (extended)
-		n += scnprintf(buf+n, buflen-n, "(%d)", st->n_muid);
-
-	n += scnprintf(buf+n, buflen-n, " q ");
-	n += p9_printqid(buf+n, buflen-n, &st->qid);
-	n += scnprintf(buf+n, buflen-n, " m ");
-	n += p9_printperm(buf+n, buflen-n, st->mode);
-	n += scnprintf(buf+n, buflen-n, " at %d mt %d l %lld",
-		st->atime, st->mtime, (long long int) st->length);
-
-	if (extended)
-		n += scnprintf(buf+n, buflen-n, " ext '%.*s'",
-			st->extension.len, st->extension.str);
-
-	return n;
-}
-
-static int
-p9_dumpdata(char *buf, int buflen, u8 *data, int datalen)
-{
-	int i, n;
-
-	i = n = 0;
-	while (i < datalen) {
-		n += scnprintf(buf + n, buflen - n, "%02x", data[i]);
-		if (i%4 == 3)
-			n += scnprintf(buf + n, buflen - n, " ");
-		if (i%32 == 31)
-			n += scnprintf(buf + n, buflen - n, "\n");
-
-		i++;
-	}
-	n += scnprintf(buf + n, buflen - n, "\n");
-
-	return n;
-}
-
-static int
-p9_printdata(char *buf, int buflen, u8 *data, int datalen)
-{
-	return p9_dumpdata(buf, buflen, data, datalen < 16?datalen:16);
-}
-
-/**
- * p9_printfcall - decode and print a protocol structure into a buffer
- * @buf: buffer to deposit decoded structure into
- * @buflen: available space in buffer
- * @fc: protocol rpc structure of type &p9_fcall
- * @extended: whether or not session is operating with extended protocol
- */
-
-int
-p9_printfcall(char *buf, int buflen, struct p9_fcall *fc, int extended)
-{
-	int i, ret, type, tag;
-
-	if (!fc)
-		return scnprintf(buf, buflen, "<NULL>");
-
-	type = fc->id;
-	tag = fc->tag;
-
-	ret = 0;
-	switch (type) {
-	case P9_TVERSION:
-		ret += scnprintf(buf+ret, buflen-ret,
-				"Tversion tag %u msize %u version '%.*s'", tag,
-				fc->params.tversion.msize,
-				fc->params.tversion.version.len,
-				fc->params.tversion.version.str);
-		break;
-
-	case P9_RVERSION:
-		ret += scnprintf(buf+ret, buflen-ret,
-				"Rversion tag %u msize %u version '%.*s'", tag,
-				fc->params.rversion.msize,
-				fc->params.rversion.version.len,
-				fc->params.rversion.version.str);
-		break;
-
-	case P9_TAUTH:
-		ret += scnprintf(buf+ret, buflen-ret,
-			"Tauth tag %u afid %d uname '%.*s' aname '%.*s'", tag,
-			fc->params.tauth.afid, fc->params.tauth.uname.len,
-			fc->params.tauth.uname.str, fc->params.tauth.aname.len,
-			fc->params.tauth.aname.str);
-		break;
-
-	case P9_RAUTH:
-		ret += scnprintf(buf+ret, buflen-ret, "Rauth tag %u qid ", tag);
-		p9_printqid(buf+ret, buflen-ret, &fc->params.rauth.qid);
-		break;
-
-	case P9_TATTACH:
-		ret += scnprintf(buf+ret, buflen-ret,
-		 "Tattach tag %u fid %d afid %d uname '%.*s' aname '%.*s'", tag,
-		 fc->params.tattach.fid, fc->params.tattach.afid,
-		 fc->params.tattach.uname.len, fc->params.tattach.uname.str,
-		 fc->params.tattach.aname.len, fc->params.tattach.aname.str);
-		break;
-
-	case P9_RATTACH:
-		ret += scnprintf(buf+ret, buflen-ret, "Rattach tag %u qid ",
-									tag);
-		p9_printqid(buf+ret, buflen-ret, &fc->params.rattach.qid);
-		break;
-
-	case P9_RERROR:
-		ret += scnprintf(buf+ret, buflen-ret,
-				"Rerror tag %u ename '%.*s'", tag,
-				fc->params.rerror.error.len,
-				fc->params.rerror.error.str);
-		if (extended)
-			ret += scnprintf(buf+ret, buflen-ret, " ecode %d\n",
-				fc->params.rerror.errno);
-		break;
-
-	case P9_TFLUSH:
-		ret += scnprintf(buf+ret, buflen-ret, "Tflush tag %u oldtag %u",
-			tag, fc->params.tflush.oldtag);
-		break;
-
-	case P9_RFLUSH:
-		ret += scnprintf(buf+ret, buflen-ret, "Rflush tag %u", tag);
-		break;
-
-	case P9_TWALK:
-		ret += scnprintf(buf+ret, buflen-ret,
-			"Twalk tag %u fid %d newfid %d nwname %d", tag,
-			fc->params.twalk.fid, fc->params.twalk.newfid,
-			fc->params.twalk.nwname);
-		for (i = 0; i < fc->params.twalk.nwname; i++)
-			ret += scnprintf(buf+ret, buflen-ret, " '%.*s'",
-				fc->params.twalk.wnames[i].len,
-				fc->params.twalk.wnames[i].str);
-		break;
-
-	case P9_RWALK:
-		ret += scnprintf(buf+ret, buflen-ret, "Rwalk tag %u nwqid %d",
-			tag, fc->params.rwalk.nwqid);
-		for (i = 0; i < fc->params.rwalk.nwqid; i++)
-			ret += p9_printqid(buf+ret, buflen-ret,
-				&fc->params.rwalk.wqids[i]);
-		break;
-
-	case P9_TOPEN:
-		ret += scnprintf(buf+ret, buflen-ret,
-			"Topen tag %u fid %d mode %d", tag,
-			fc->params.topen.fid, fc->params.topen.mode);
-		break;
-
-	case P9_ROPEN:
-		ret += scnprintf(buf+ret, buflen-ret, "Ropen tag %u", tag);
-		ret += p9_printqid(buf+ret, buflen-ret, &fc->params.ropen.qid);
-		ret += scnprintf(buf+ret, buflen-ret, " iounit %d",
-			fc->params.ropen.iounit);
-		break;
-
-	case P9_TCREATE:
-		ret += scnprintf(buf+ret, buflen-ret,
-			"Tcreate tag %u fid %d name '%.*s' perm ", tag,
-			fc->params.tcreate.fid, fc->params.tcreate.name.len,
-			fc->params.tcreate.name.str);
-
-		ret += p9_printperm(buf+ret, buflen-ret,
-						fc->params.tcreate.perm);
-		ret += scnprintf(buf+ret, buflen-ret, " mode %d",
-			fc->params.tcreate.mode);
-		break;
-
-	case P9_RCREATE:
-		ret += scnprintf(buf+ret, buflen-ret, "Rcreate tag %u", tag);
-		ret += p9_printqid(buf+ret, buflen-ret,
-						&fc->params.rcreate.qid);
-		ret += scnprintf(buf+ret, buflen-ret, " iounit %d",
-			fc->params.rcreate.iounit);
-		break;
-
-	case P9_TREAD:
-		ret += scnprintf(buf+ret, buflen-ret,
-			"Tread tag %u fid %d offset %lld count %u", tag,
-			fc->params.tread.fid,
-			(long long int) fc->params.tread.offset,
-			fc->params.tread.count);
-		break;
-
-	case P9_RREAD:
-		ret += scnprintf(buf+ret, buflen-ret,
-			"Rread tag %u count %u data ", tag,
-			fc->params.rread.count);
-		ret += p9_printdata(buf+ret, buflen-ret, fc->params.rread.data,
-			fc->params.rread.count);
-		break;
-
-	case P9_TWRITE:
-		ret += scnprintf(buf+ret, buflen-ret,
-			"Twrite tag %u fid %d offset %lld count %u data ",
-			tag, fc->params.twrite.fid,
-			(long long int) fc->params.twrite.offset,
-			fc->params.twrite.count);
-		ret += p9_printdata(buf+ret, buflen-ret, fc->params.twrite.data,
-			fc->params.twrite.count);
-		break;
-
-	case P9_RWRITE:
-		ret += scnprintf(buf+ret, buflen-ret, "Rwrite tag %u count %u",
-			tag, fc->params.rwrite.count);
-		break;
-
-	case P9_TCLUNK:
-		ret += scnprintf(buf+ret, buflen-ret, "Tclunk tag %u fid %d",
-			tag, fc->params.tclunk.fid);
-		break;
-
-	case P9_RCLUNK:
-		ret += scnprintf(buf+ret, buflen-ret, "Rclunk tag %u", tag);
-		break;
-
-	case P9_TREMOVE:
-		ret += scnprintf(buf+ret, buflen-ret, "Tremove tag %u fid %d",
-			tag, fc->params.tremove.fid);
-		break;
-
-	case P9_RREMOVE:
-		ret += scnprintf(buf+ret, buflen-ret, "Rremove tag %u", tag);
-		break;
-
-	case P9_TSTAT:
-		ret += scnprintf(buf+ret, buflen-ret, "Tstat tag %u fid %d",
-			tag, fc->params.tstat.fid);
-		break;
-
-	case P9_RSTAT:
-		ret += scnprintf(buf+ret, buflen-ret, "Rstat tag %u ", tag);
-		ret += p9_printstat(buf+ret, buflen-ret, &fc->params.rstat.stat,
-			extended);
-		break;
-
-	case P9_TWSTAT:
-		ret += scnprintf(buf+ret, buflen-ret, "Twstat tag %u fid %d ",
-			tag, fc->params.twstat.fid);
-		ret += p9_printstat(buf+ret, buflen-ret,
-					&fc->params.twstat.stat, extended);
-		break;
-
-	case P9_RWSTAT:
-		ret += scnprintf(buf+ret, buflen-ret, "Rwstat tag %u", tag);
-		break;
-
-	default:
-		ret += scnprintf(buf+ret, buflen-ret, "unknown type %d", type);
-		break;
-	}
-
-	return ret;
-}
-#else
-int
-p9_printfcall(char *buf, int buflen, struct p9_fcall *fc, int extended)
-{
-	return 0;
-}
-#endif /* CONFIG_NET_9P_DEBUG */
-EXPORT_SYMBOL(p9_printfcall);
-
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index c07f2ab8d0f7..95c9b949d1a5 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -650,15 +650,6 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
 	if (m->err < 0)
 		return m->err;
 
-#ifdef CONFIG_NET_9P_DEBUG
-	if ((p9_debug_level&P9_DEBUG_FCALL) == P9_DEBUG_FCALL) {
-		char buf[150];
-
-		p9_printfcall(buf, sizeof(buf), req->tc, client->dotu);
-		printk(KERN_NOTICE "<<< %p %s\n", m, buf);
-	}
-#endif
-
 	req->status = REQ_STATUS_UNSENT;
 
 	spin_lock(&client->lock);
-- 
GitLab


From cb198131b0e7aba755ac164744536d461e86ab82 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Thu, 16 Oct 2008 08:29:31 -0500
Subject: [PATCH 563/895] 9p: remove unnecessary tag field from p9_req_t
 structure

This removes the vestigial tag field from the p9_req_t structure.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 include/net/9p/client.h | 2 --
 net/9p/trans_fd.c       | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index eeb7d922816e..475ef5cf1644 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -81,7 +81,6 @@ enum p9_req_status_t {
  * @tc: the request fcall structure
  * @rc: the response fcall structure
  * @aux: transport specific data (provided for trans_fd migration)
- * @tag: tag on request (BUG: redundant)
  * @req_list: link for higher level objects to chain requests
  *
  * Transport use an array to track outstanding requests
@@ -104,7 +103,6 @@ struct p9_req_t {
 	u16 flush_tag;
 	void *aux;
 
-	int tag;
 	struct list_head req_list;
 };
 
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 95c9b949d1a5..e147ec539585 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -672,7 +672,7 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
 	struct p9_trans_fd *ts = client->trans;
 	struct p9_conn *m = ts->conn;
 
-	P9_DPRINTK(P9_DEBUG_MUX, "mux %p req %p tag %d\n", m, req, req->tag);
+	P9_DPRINTK(P9_DEBUG_MUX, "mux %p req %p\n", m, req);
 
 	spin_lock(&client->lock);
 	list_del(&req->req_list);
-- 
GitLab


From 51a87c552dfd428e304c865e24ecbe091556f226 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Thu, 16 Oct 2008 08:30:07 -0500
Subject: [PATCH 564/895] 9p: rework client code to use new protocol support
 functions

Now that the new protocol functions are in place, this patch switches
the client code to using the new support code.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/v9fs_vfs.h        |   4 +-
 fs/9p/vfs_dir.c         |   4 +-
 fs/9p/vfs_file.c        |   2 +-
 fs/9p/vfs_inode.c       |  38 +-
 fs/9p/vfs_super.c       |   6 +-
 include/net/9p/9p.h     |   7 +-
 include/net/9p/client.h |   7 +-
 net/9p/client.c         | 931 ++++++++++++++++++++--------------------
 net/9p/protocol.c       |  85 +++-
 net/9p/protocol.h       |   5 +-
 net/9p/trans_fd.c       |  62 ++-
 net/9p/util.c           |   4 +
 12 files changed, 611 insertions(+), 544 deletions(-)

diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 046cff377f10..c295ba786edd 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -46,10 +46,10 @@ extern struct dentry_operations v9fs_cached_dentry_operations;
 
 struct inode *v9fs_get_inode(struct super_block *sb, int mode);
 ino_t v9fs_qid2ino(struct p9_qid *qid);
-void v9fs_stat2inode(struct p9_stat *, struct inode *, struct super_block *);
+void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
 int v9fs_dir_release(struct inode *inode, struct file *filp);
 int v9fs_file_open(struct inode *inode, struct file *file);
-void v9fs_inode2stat(struct inode *inode, struct p9_stat *stat);
+void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat);
 void v9fs_dentry_release(struct dentry *);
 int v9fs_uflags2omode(int uflags, int extended);
 
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index d7d0ac5a2ca3..276aed625929 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -85,8 +85,8 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		return -ENOMEM;
 
 	while (1) {
-		err = v9fs_file_readn(filp, statbuf, NULL, fid->rdir_fpos,
-									buflen);
+		err = v9fs_file_readn(filp, statbuf, NULL, buflen,
+								fid->rdir_fpos);
 		if (err <= 0)
 			break;
 
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 3fd28bbafc87..041c52692284 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -136,7 +136,7 @@ v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
 	int n, total;
 	struct p9_fid *fid = filp->private_data;
 
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu count %d\n", fid->fid,
+	P9_DPRINTK(P9_DEBUG_VFS, "fid %d offset %llu count %d\n", fid->fid,
 					(long long unsigned) offset, count);
 
 	n = 0;
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index e83aa5ebe861..e96d84aafbe2 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -334,7 +334,7 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
 {
 	int err, umode;
 	struct inode *ret;
-	struct p9_stat *st;
+	struct p9_wstat *st;
 
 	ret = NULL;
 	st = p9_client_stat(fid);
@@ -417,6 +417,8 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
 	struct p9_fid *dfid, *ofid, *fid;
 	struct inode *inode;
 
+	P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
+
 	err = 0;
 	ofid = NULL;
 	fid = NULL;
@@ -424,6 +426,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
 	dfid = v9fs_fid_clone(dentry->d_parent);
 	if (IS_ERR(dfid)) {
 		err = PTR_ERR(dfid);
+		P9_DPRINTK(P9_DEBUG_VFS, "fid clone failed %d\n", err);
 		dfid = NULL;
 		goto error;
 	}
@@ -432,18 +435,22 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
 	ofid = p9_client_walk(dfid, 0, NULL, 1);
 	if (IS_ERR(ofid)) {
 		err = PTR_ERR(ofid);
+		P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
 		ofid = NULL;
 		goto error;
 	}
 
 	err = p9_client_fcreate(ofid, name, perm, mode, extension);
-	if (err < 0)
+	if (err < 0) {
+		P9_DPRINTK(P9_DEBUG_VFS, "p9_client_fcreate failed %d\n", err);
 		goto error;
+	}
 
 	/* now walk from the parent so we can get unopened fid */
 	fid = p9_client_walk(dfid, 1, &name, 0);
 	if (IS_ERR(fid)) {
 		err = PTR_ERR(fid);
+		P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
 		fid = NULL;
 		goto error;
 	} else
@@ -453,6 +460,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
 	inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
 	if (IS_ERR(inode)) {
 		err = PTR_ERR(inode);
+		P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
 		goto error;
 	}
 
@@ -734,7 +742,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	int err;
 	struct v9fs_session_info *v9ses;
 	struct p9_fid *fid;
-	struct p9_stat *st;
+	struct p9_wstat *st;
 
 	P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
 	err = -EPERM;
@@ -815,10 +823,9 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
  */
 
 void
-v9fs_stat2inode(struct p9_stat *stat, struct inode *inode,
+v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
 	struct super_block *sb)
 {
-	int n;
 	char ext[32];
 	struct v9fs_session_info *v9ses = sb->s_fs_info;
 
@@ -842,11 +849,7 @@ v9fs_stat2inode(struct p9_stat *stat, struct inode *inode,
 		int major = -1;
 		int minor = -1;
 
-		n = stat->extension.len;
-		if (n > sizeof(ext)-1)
-			n = sizeof(ext)-1;
-		memmove(ext, stat->extension.str, n);
-		ext[n] = 0;
+		strncpy(ext, stat->extension, sizeof(ext));
 		sscanf(ext, "%c %u %u", &type, &major, &minor);
 		switch (type) {
 		case 'c':
@@ -857,8 +860,8 @@ v9fs_stat2inode(struct p9_stat *stat, struct inode *inode,
 			break;
 		default:
 			P9_DPRINTK(P9_DEBUG_ERROR,
-				"Unknown special type %c (%.*s)\n", type,
-				stat->extension.len, stat->extension.str);
+				"Unknown special type %c %s\n", type,
+				stat->extension);
 		};
 		inode->i_rdev = MKDEV(major, minor);
 	} else
@@ -904,7 +907,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
 
 	struct v9fs_session_info *v9ses;
 	struct p9_fid *fid;
-	struct p9_stat *st;
+	struct p9_wstat *st;
 
 	P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name);
 	retval = -EPERM;
@@ -926,15 +929,10 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
 	}
 
 	/* copy extension buffer into buffer */
-	if (st->extension.len < buflen)
-		buflen = st->extension.len + 1;
-
-	memmove(buffer, st->extension.str, buflen - 1);
-	buffer[buflen-1] = 0;
+	strncpy(buffer, st->extension, buflen);
 
 	P9_DPRINTK(P9_DEBUG_VFS,
-		"%s -> %.*s (%s)\n", dentry->d_name.name, st->extension.len,
-		st->extension.str, buffer);
+		"%s -> %s (%s)\n", dentry->d_name.name, st->extension, buffer);
 
 	retval = buflen;
 
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index bf59c3960494..d6cb1a0ca724 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -111,7 +111,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
 	struct inode *inode = NULL;
 	struct dentry *root = NULL;
 	struct v9fs_session_info *v9ses = NULL;
-	struct p9_stat *st = NULL;
+	struct p9_wstat *st = NULL;
 	int mode = S_IRWXUGO | S_ISVTX;
 	uid_t uid = current->fsuid;
 	gid_t gid = current->fsgid;
@@ -161,10 +161,14 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
 
 	sb->s_root = root;
 	root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
+
 	v9fs_stat2inode(st, root->d_inode, sb);
+
 	v9fs_fid_add(root, fid);
+	p9stat_free(st);
 	kfree(st);
 
+P9_DPRINTK(P9_DEBUG_VFS, " return simple set mount\n");
 	return simple_set_mnt(mnt, sb);
 
 release_sb:
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 46d0b8f8e5ec..56c15aee6e61 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -39,6 +39,7 @@
  * @P9_DEBUG_TRANS: transport tracing
  * @P9_DEBUG_SLABS: memory management tracing
  * @P9_DEBUG_FCALL: verbose dump of protocol messages
+ * @P9_DEBUG_FID: fid allocation/deallocation tracking
  *
  * These flags are passed at mount time to turn on various levels of
  * verbosity and tracing which will be output to the system logs.
@@ -53,13 +54,17 @@ enum p9_debug_flags {
 	P9_DEBUG_TRANS =	(1<<6),
 	P9_DEBUG_SLABS =      	(1<<7),
 	P9_DEBUG_FCALL =	(1<<8),
+	P9_DEBUG_FID =		(1<<9),
 };
 
 extern unsigned int p9_debug_level;
 
 #define P9_DPRINTK(level, format, arg...) \
 do {  \
-	if ((p9_debug_level & level) == level) \
+	if (level == P9_DEBUG_9P) \
+		printk(KERN_NOTICE "(%8.8d) " \
+		format , task_pid_nr(current) , ## arg); \
+	else if ((p9_debug_level & level) == level) \
 		printk(KERN_NOTICE "-- %s (%d): " \
 		format , __func__, task_pid_nr(current) , ## arg); \
 } while (0)
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index 475ef5cf1644..1e49b4d1030b 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -77,6 +77,7 @@ enum p9_req_status_t {
  * struct p9_req_t - request slots
  * @status: status of this request slot
  * @t_err: transport error
+ * @flush_tag: tag of request being flushed (for flush requests)
  * @wq: wait_queue for the client to block on for this request
  * @tc: the request fcall structure
  * @rc: the response fcall structure
@@ -97,10 +98,10 @@ enum p9_req_status_t {
 struct p9_req_t {
 	int status;
 	int t_err;
+	u16 flush_tag;
 	wait_queue_head_t *wq;
 	struct p9_fcall *tc;
 	struct p9_fcall *rc;
-	u16 flush_tag;
 	void *aux;
 
 	struct list_head req_list;
@@ -199,10 +200,12 @@ int p9_client_read(struct p9_fid *fid, char *data, char __user *udata,
 							u64 offset, u32 count);
 int p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,
 							u64 offset, u32 count);
-struct p9_stat *p9_client_stat(struct p9_fid *fid);
+struct p9_wstat *p9_client_stat(struct p9_fid *fid);
 int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst);
 
 struct p9_req_t *p9_tag_lookup(struct p9_client *, u16);
 void p9_client_cb(struct p9_client *c, struct p9_req_t *req);
 
+void p9stat_free(struct p9_wstat *);
+
 #endif /* NET_9P_CLIENT_H */
diff --git a/net/9p/client.c b/net/9p/client.c
index 6004fded6682..2a166bfb95a3 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -35,6 +35,7 @@
 #include <linux/parser.h>
 #include <net/9p/client.h>
 #include <net/9p/transport.h>
+#include "protocol.h"
 
 /*
   * Client Option Parsing (code inspired by NFS code)
@@ -55,8 +56,8 @@ static const match_table_t tokens = {
 	{Opt_err, NULL},
 };
 
-static int
-p9_client_rpc(struct p9_client *c, struct p9_fcall *tc, struct p9_fcall **rc);
+static struct p9_req_t *
+p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...);
 
 /**
  * v9fs_parse_options - parse mount options into session structure
@@ -138,10 +139,11 @@ static int parse_opts(char *opts, struct p9_client *clnt)
  *
  */
 
-struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
+static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
 {
 	unsigned long flags;
 	int row, col;
+	struct p9_req_t *req;
 
 	/* This looks up the original request by tag so we know which
 	 * buffer to read the data into */
@@ -157,19 +159,11 @@ struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
 
 			if (!c->reqs[row]) {
 				printk(KERN_ERR "Couldn't grow tag array\n");
-				BUG();
+				return ERR_PTR(-ENOMEM);
 			}
 			for (col = 0; col < P9_ROW_MAXTAG; col++) {
 				c->reqs[row][col].status = REQ_STATUS_IDLE;
-				c->reqs[row][col].flush_tag = P9_NOTAG;
-				c->reqs[row][col].wq = kmalloc(
-					sizeof(wait_queue_head_t), GFP_ATOMIC);
-				if (!c->reqs[row][col].wq) {
-					printk(KERN_ERR
-						"Couldn't grow tag array\n");
-					BUG();
-				}
-				init_waitqueue_head(c->reqs[row][col].wq);
+				c->reqs[row][col].tc = NULL;
 			}
 			c->max_tag += P9_ROW_MAXTAG;
 		}
@@ -178,12 +172,39 @@ struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
 	row = tag / P9_ROW_MAXTAG;
 	col = tag % P9_ROW_MAXTAG;
 
-	c->reqs[row][col].status = REQ_STATUS_ALLOC;
-	c->reqs[row][col].flush_tag = P9_NOTAG;
+	req = &c->reqs[row][col];
+	if (!req->tc) {
+		req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL);
+		if (!req->wq) {
+			printk(KERN_ERR "Couldn't grow tag array\n");
+			return ERR_PTR(-ENOMEM);
+		}
+		init_waitqueue_head(req->wq);
+		req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize,
+								GFP_KERNEL);
+		req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize,
+								GFP_KERNEL);
+		if ((!req->tc) || (!req->rc)) {
+			printk(KERN_ERR "Couldn't grow tag array\n");
+			kfree(req->tc);
+			kfree(req->rc);
+			return ERR_PTR(-ENOMEM);
+		}
+		req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall);
+		req->tc->capacity = c->msize;
+		req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall);
+		req->rc->capacity = c->msize;
+	}
+
+	p9pdu_reset(req->tc);
+	p9pdu_reset(req->rc);
+
+	req->flush_tag = 0;
+	req->tc->tag = tag-1;
+	req->status = REQ_STATUS_ALLOC;
 
 	return &c->reqs[row][col];
 }
-EXPORT_SYMBOL(p9_tag_alloc);
 
 /**
  * p9_tag_lookup - lookup a request by tag
@@ -264,43 +285,16 @@ static void p9_tag_cleanup(struct p9_client *c)
 
 	/* free requests associated with tags */
 	for (row = 0; row < (c->max_tag/P9_ROW_MAXTAG); row++) {
-		for (col = 0; col < P9_ROW_MAXTAG; col++)
+		for (col = 0; col < P9_ROW_MAXTAG; col++) {
 			kfree(c->reqs[row][col].wq);
+			kfree(c->reqs[row][col].tc);
+			kfree(c->reqs[row][col].rc);
+		}
 		kfree(c->reqs[row]);
 	}
 	c->max_tag = 0;
 }
 
-/**
- * p9_client_flush - flush (cancel) a request
- * c: client state
- * req: request to cancel
- *
- * This sents a flush for a particular requests and links
- * the flush request to the original request.  The current
- * code only supports a single flush request although the protocol
- * allows for multiple flush requests to be sent for a single request.
- *
- */
-
-static int p9_client_flush(struct p9_client *c, struct p9_req_t *req)
-{
-	struct p9_fcall *tc, *rc = NULL;
-	int err;
-
-	P9_DPRINTK(P9_DEBUG_9P, "client %p tag %d\n", c, req->tc->tag);
-
-	tc = p9_create_tflush(req->tc->tag);
-	if (IS_ERR(tc))
-		return PTR_ERR(tc);
-
-	err = p9_client_rpc(c, tc, &rc);
-
-	/* we don't free anything here because RPC isn't complete */
-
-	return err;
-}
-
 /**
  * p9_free_req - free a request and clean-up as necessary
  * c: client state
@@ -308,15 +302,17 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *req)
  *
  */
 
-void p9_free_req(struct p9_client *c, struct p9_req_t *r)
+static void p9_free_req(struct p9_client *c, struct p9_req_t *r)
 {
-	r->flush_tag = P9_NOTAG;
+	int tag = r->tc->tag;
+	P9_DPRINTK(P9_DEBUG_MUX, "clnt %p req %p tag: %d\n", c, r, tag);
+
 	r->status = REQ_STATUS_IDLE;
-	if (r->tc->tag != P9_NOTAG && p9_idpool_check(r->tc->tag, c->tagpool))
-		p9_idpool_put(r->tc->tag, c->tagpool);
+	if (tag != P9_NOTAG && p9_idpool_check(tag, c->tagpool))
+		p9_idpool_put(tag, c->tagpool);
 
 	/* if this was a flush request we have to free response fcall */
-	if (r->tc->id == P9_TFLUSH) {
+	if (r->rc->id == P9_RFLUSH) {
 		kfree(r->tc);
 		kfree(r->rc);
 	}
@@ -333,30 +329,28 @@ void p9_client_cb(struct p9_client *c, struct p9_req_t *req)
 	struct p9_req_t *other_req;
 	unsigned long flags;
 
-	P9_DPRINTK(P9_DEBUG_MUX, ": %d\n", req->tc->tag);
+	P9_DPRINTK(P9_DEBUG_MUX, " tag %d\n", req->tc->tag);
 
 	if (req->status == REQ_STATUS_ERROR)
 		wake_up(req->wq);
 
-	if (req->tc->id == P9_TFLUSH) { /* flush receive path */
-		P9_DPRINTK(P9_DEBUG_MUX, "flush: %d\n", req->tc->tag);
+	if (req->flush_tag) { 			/* flush receive path */
+		P9_DPRINTK(P9_DEBUG_9P, "<<< RFLUSH %d\n", req->tc->tag);
 		spin_lock_irqsave(&c->lock, flags);
-		other_req = p9_tag_lookup(c, req->tc->params.tflush.oldtag);
-		if (other_req->flush_tag != req->tc->tag) /* stale flush */
+		other_req = p9_tag_lookup(c, req->flush_tag);
+		if (other_req->status != REQ_STATUS_FLSH) /* stale flush */
 			spin_unlock_irqrestore(&c->lock, flags);
 		else {
-			BUG_ON(other_req->status != REQ_STATUS_FLSH);
 			other_req->status = REQ_STATUS_FLSHD;
 			spin_unlock_irqrestore(&c->lock, flags);
 			wake_up(other_req->wq);
 		}
 		p9_free_req(c, req);
 	} else { 				/* normal receive path */
-		P9_DPRINTK(P9_DEBUG_MUX, "normal: %d\n", req->tc->tag);
+		P9_DPRINTK(P9_DEBUG_MUX, "normal: tag %d\n", req->tc->tag);
 		spin_lock_irqsave(&c->lock, flags);
 		if (req->status != REQ_STATUS_FLSHD)
 			req->status = REQ_STATUS_RCVD;
-		req->flush_tag = P9_NOTAG;
 		spin_unlock_irqrestore(&c->lock, flags);
 		wake_up(req->wq);
 		P9_DPRINTK(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag);
@@ -364,29 +358,165 @@ void p9_client_cb(struct p9_client *c, struct p9_req_t *req)
 }
 EXPORT_SYMBOL(p9_client_cb);
 
+/**
+ * p9_parse_header - parse header arguments out of a packet
+ * @pdu: packet to parse
+ * @size: size of packet
+ * @type: type of request
+ * @tag: tag of packet
+ * @rewind: set if we need to rewind offset afterwards
+ */
+
+int
+p9_parse_header(struct p9_fcall *pdu, int32_t *size, int8_t *type, int16_t *tag,
+								int rewind)
+{
+	int8_t r_type;
+	int16_t r_tag;
+	int32_t r_size;
+	int offset = pdu->offset;
+	int err;
+
+	pdu->offset = 0;
+	if (pdu->size == 0)
+		pdu->size = 7;
+
+	err = p9pdu_readf(pdu, 0, "dbw", &r_size, &r_type, &r_tag);
+	if (err)
+		goto rewind_and_exit;
+
+	pdu->size = r_size;
+	pdu->id = r_type;
+	pdu->tag = r_tag;
+
+	P9_DPRINTK(P9_DEBUG_MUX, "pdu: type: %d tag: %d size=%d offset=%d\n",
+				pdu->id, pdu->tag, pdu->size, pdu->offset);
+
+	if (type)
+		*type = r_type;
+	if (tag)
+		*tag = r_tag;
+	if (size)
+		*size = r_size;
+
+
+rewind_and_exit:
+	if (rewind)
+		pdu->offset = offset;
+	return err;
+}
+EXPORT_SYMBOL(p9_parse_header);
+
+/**
+ * p9_check_errors - check 9p packet for error return and process it
+ * @c: current client instance
+ * @req: request to parse and check for error conditions
+ *
+ * returns error code if one is discovered, otherwise returns 0
+ *
+ * this will have to be more complicated if we have multiple
+ * error packet types
+ */
+
+static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
+{
+	int8_t type;
+	int err;
+
+	err = p9_parse_header(req->rc, NULL, &type, NULL, 0);
+	if (err) {
+		P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse header %d\n", err);
+		return err;
+	}
+
+	if (type == P9_RERROR) {
+		int ecode;
+		char *ename;
+
+		err = p9pdu_readf(req->rc, c->dotu, "s?d", &ename, &ecode);
+		if (err) {
+			P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n",
+									err);
+			return err;
+		}
+
+		if (c->dotu)
+			err = -ecode;
+
+		if (!err) {
+			err = p9_errstr2errno(ename, strlen(ename));
+
+			/* string match failed */
+			if (!err)
+				err = -ESERVERFAULT;
+		}
+
+		P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, ename);
+
+		kfree(ename);
+	} else
+		err = 0;
+
+	return err;
+}
+
+/**
+ * p9_client_flush - flush (cancel) a request
+ * c: client state
+ * req: request to cancel
+ *
+ * This sents a flush for a particular requests and links
+ * the flush request to the original request.  The current
+ * code only supports a single flush request although the protocol
+ * allows for multiple flush requests to be sent for a single request.
+ *
+ */
+
+static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
+{
+	struct p9_req_t *req;
+	int16_t oldtag;
+	int err;
+
+	err = p9_parse_header(oldreq->tc, NULL, NULL, &oldtag, 1);
+	if (err)
+		return err;
+
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TFLUSH tag %d\n", oldtag);
+
+	req = p9_client_rpc(c, P9_TFLUSH, "w", oldtag);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	req->flush_tag = oldtag;
+
+	/* we don't free anything here because RPC isn't complete */
+	return 0;
+}
+
 /**
  * p9_client_rpc - issue a request and wait for a response
  * @c: client session
- * @tc: &p9_fcall request to transmit
- * @rc: &p9_fcall to put reponse into
+ * @type: type of request
+ * @fmt: protocol format string (see protocol.c)
  *
- * Returns 0 on success, error code on failure
+ * Returns request structure (which client must free using p9_free_req)
  */
 
-static int
-p9_client_rpc(struct p9_client *c, struct p9_fcall *tc, struct p9_fcall **rc)
+static struct p9_req_t *
+p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
 {
-	int tag, err, size;
-	char *rdata;
+	va_list ap;
+	int tag, err;
 	struct p9_req_t *req;
 	unsigned long flags;
 	int sigpending;
 	int flushed = 0;
 
-	P9_DPRINTK(P9_DEBUG_9P, "client %p tc %p rc %p\n", c, tc, rc);
+	P9_DPRINTK(P9_DEBUG_MUX, "client %p op %d\n", c, type);
 
 	if (c->status != Connected)
-		return -EIO;
+		return ERR_PTR(-EIO);
 
 	if (signal_pending(current)) {
 		sigpending = 1;
@@ -395,50 +525,22 @@ p9_client_rpc(struct p9_client *c, struct p9_fcall *tc, struct p9_fcall **rc)
 		sigpending = 0;
 
 	tag = P9_NOTAG;
-	if (tc->id != P9_TVERSION) {
+	if (type != P9_TVERSION) {
 		tag = p9_idpool_get(c->tagpool);
 		if (tag < 0)
-			return -ENOMEM;
+			return ERR_PTR(-ENOMEM);
 	}
 
 	req = p9_tag_alloc(c, tag);
+	if (IS_ERR(req))
+		return req;
 
-	/* if this is a flush request, backlink flush request now to
-	 * avoid race conditions later. */
-	if (tc->id == P9_TFLUSH) {
-		struct p9_req_t *other_req =
-				p9_tag_lookup(c, tc->params.tflush.oldtag);
-		if (other_req->status == REQ_STATUS_FLSH)
-			other_req->flush_tag = tag;
-	}
-
-	p9_set_tag(tc, tag);
-
-	/*
-	 * if client passed in a pre-allocated response fcall struct
-	 * then we just use that, otherwise we allocate one.
-	 */
-
-	if (rc == NULL)
-		req->rc = NULL;
-	else
-		req->rc = *rc;
-	if (req->rc == NULL) {
-		req->rc = kmalloc(sizeof(struct p9_fcall) + c->msize,
-								GFP_KERNEL);
-		if (!req->rc) {
-			err = -ENOMEM;
-			p9_idpool_put(tag, c->tagpool);
-			p9_free_req(c, req);
-			goto reterr;
-		}
-		*rc = req->rc;
-	}
-
-	rdata = (char *)req->rc+sizeof(struct p9_fcall);
-
-	req->tc = tc;
-	P9_DPRINTK(P9_DEBUG_9P, "request: tc: %p rc: %p\n", req->tc, req->rc);
+	/* marshall the data */
+	p9pdu_prepare(req->tc, tag, type);
+	va_start(ap, fmt);
+	err = p9pdu_vwritef(req->tc, c->dotu, fmt, ap);
+	va_end(ap);
+	p9pdu_finalize(req->tc);
 
 	err = c->trans_mod->request(c, req);
 	if (err < 0) {
@@ -447,28 +549,28 @@ p9_client_rpc(struct p9_client *c, struct p9_fcall *tc, struct p9_fcall **rc)
 	}
 
 	/* if it was a flush we just transmitted, return our tag */
-	if (tc->id == P9_TFLUSH)
-		return 0;
+	if (type == P9_TFLUSH)
+		return req;
 again:
-	P9_DPRINTK(P9_DEBUG_9P, "wait %p tag: %d\n", req->wq, tag);
+	P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d\n", req->wq, tag);
 	err = wait_event_interruptible(*req->wq,
 						req->status >= REQ_STATUS_RCVD);
-	P9_DPRINTK(P9_DEBUG_9P, "wait %p tag: %d returned %d (flushed=%d)\n",
+	P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d returned %d (flushed=%d)\n",
 						req->wq, tag, err, flushed);
 
 	if (req->status == REQ_STATUS_ERROR) {
-		P9_DPRINTK(P9_DEBUG_9P, "req_status error %d\n", req->t_err);
+		P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err);
 		err = req->t_err;
 	} else if (err == -ERESTARTSYS && flushed) {
-		P9_DPRINTK(P9_DEBUG_9P, "flushed - going again\n");
+		P9_DPRINTK(P9_DEBUG_MUX, "flushed - going again\n");
 		goto again;
 	} else if (req->status == REQ_STATUS_FLSHD) {
-		P9_DPRINTK(P9_DEBUG_9P, "flushed - erestartsys\n");
+		P9_DPRINTK(P9_DEBUG_MUX, "flushed - erestartsys\n");
 		err = -ERESTARTSYS;
 	}
 
 	if ((err == -ERESTARTSYS) && (c->status == Connected) && (!flushed)) {
-		P9_DPRINTK(P9_DEBUG_9P, "flushing\n");
+		P9_DPRINTK(P9_DEBUG_MUX, "flushing\n");
 		spin_lock_irqsave(&c->lock, flags);
 		if (req->status == REQ_STATUS_SENT)
 			req->status = REQ_STATUS_FLSH;
@@ -493,42 +595,17 @@ again:
 	if (err < 0)
 		goto reterr;
 
-	size = le32_to_cpu(*(__le32 *) rdata);
-
-	err = p9_deserialize_fcall(rdata, size, req->rc, c->dotu);
-	if (err < 0) {
-		P9_DPRINTK(P9_DEBUG_9P,
-			"9p debug: client rpc deserialize returned %d\n", err);
-		goto reterr;
+	err = p9_check_errors(c, req);
+	if (!err) {
+		P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d\n", c, type);
+		return req;
 	}
 
-	if (req->rc->id == P9_RERROR) {
-		int ecode = req->rc->params.rerror.errno;
-		struct p9_str *ename = &req->rc->params.rerror.error;
-
-		P9_DPRINTK(P9_DEBUG_MUX, "Rerror %.*s\n", ename->len,
-								ename->str);
-
-		if (c->dotu)
-			err = -ecode;
-
-		if (!err) {
-			err = p9_errstr2errno(ename->str, ename->len);
-
-			/* string match failed */
-			if (!err) {
-				PRINT_FCALL_ERROR("unknown error", req->rc);
-				err = -ESERVERFAULT;
-			}
-		}
-	} else
-		err = 0;
-
 reterr:
+	P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d error: %d\n", c, type,
+									err);
 	p9_free_req(c, req);
-
-	P9_DPRINTK(P9_DEBUG_9P, "returning %d\n", err);
-	return err;
+	return ERR_PTR(err);
 }
 
 static struct p9_fid *p9_fid_create(struct p9_client *clnt)
@@ -536,7 +613,7 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt)
 	int err;
 	struct p9_fid *fid;
 
-	P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt);
+	P9_DPRINTK(P9_DEBUG_FID, "clnt %p\n", clnt);
 	fid = kmalloc(sizeof(struct p9_fid), GFP_KERNEL);
 	if (!fid)
 		return ERR_PTR(-ENOMEM);
@@ -569,7 +646,7 @@ static void p9_fid_destroy(struct p9_fid *fid)
 {
 	struct p9_client *clnt;
 
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid);
+	P9_DPRINTK(P9_DEBUG_FID, "fid %d\n", fid->fid);
 	clnt = fid->clnt;
 	p9_idpool_put(fid->fid, clnt->fidpool);
 	spin_lock(&clnt->lock);
@@ -578,49 +655,46 @@ static void p9_fid_destroy(struct p9_fid *fid)
 	kfree(fid);
 }
 
-static int p9_client_version(struct p9_client *clnt)
+int p9_client_version(struct p9_client *c)
 {
 	int err = 0;
-	struct p9_fcall *tc, *rc;
-	struct p9_str *version;
+	struct p9_req_t *req;
+	char *version;
+	int msize;
 
-	P9_DPRINTK(P9_DEBUG_9P, "%p\n", clnt);
-	err = 0;
-	tc = NULL;
-	rc = NULL;
-
-	tc = p9_create_tversion(clnt->msize,
-					clnt->dotu ? "9P2000.u" : "9P2000");
-	if (IS_ERR(tc)) {
-		err = PTR_ERR(tc);
-		tc = NULL;
-		goto error;
-	}
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TVERSION msize %d extended %d\n",
+							c->msize, c->dotu);
+	req = p9_client_rpc(c, P9_TVERSION, "ds", c->msize,
+				c->dotu ? "9P2000.u" : "9P2000");
+	if (IS_ERR(req))
+		return PTR_ERR(req);
 
-	err = p9_client_rpc(clnt, tc, &rc);
-	if (err)
+	err = p9pdu_readf(req->rc, c->dotu, "ds", &msize, &version);
+	if (err) {
+		P9_DPRINTK(P9_DEBUG_9P, "version error %d\n", err);
 		goto error;
+	}
 
-	version = &rc->params.rversion.version;
-	if (version->len == 8 && !memcmp(version->str, "9P2000.u", 8))
-		clnt->dotu = 1;
-	else if (version->len == 6 && !memcmp(version->str, "9P2000", 6))
-		clnt->dotu = 0;
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version);
+	if (!memcmp(version, "9P2000.u", 8))
+		c->dotu = 1;
+	else if (!memcmp(version, "9P2000", 6))
+		c->dotu = 0;
 	else {
 		err = -EREMOTEIO;
 		goto error;
 	}
 
-	if (rc->params.rversion.msize < clnt->msize)
-		clnt->msize = rc->params.rversion.msize;
+	if (msize < c->msize)
+		c->msize = msize;
 
 error:
-	kfree(tc);
-	kfree(rc);
+	kfree(version);
+	p9_free_req(c, req);
 
 	return err;
 }
-EXPORT_SYMBOL(p9_client_auth);
+EXPORT_SYMBOL(p9_client_version);
 
 struct p9_client *p9_client_create(const char *dev_name, char *options)
 {
@@ -656,7 +730,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
 		goto error;
 	}
 
-	P9_DPRINTK(P9_DEBUG_9P, "clnt %p trans %p msize %d dotu %d\n",
+	P9_DPRINTK(P9_DEBUG_MUX, "clnt %p trans %p msize %d dotu %d\n",
 		clnt, clnt->trans_mod, clnt->msize, clnt->dotu);
 
 	err = clnt->trans_mod->create(clnt, dev_name, options);
@@ -682,7 +756,7 @@ void p9_client_destroy(struct p9_client *clnt)
 {
 	struct p9_fid *fid, *fidptr;
 
-	P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt);
+	P9_DPRINTK(P9_DEBUG_MUX, "clnt %p\n", clnt);
 
 	if (clnt->trans_mod)
 		clnt->trans_mod->close(clnt);
@@ -712,14 +786,13 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
 	char *uname, u32 n_uname, char *aname)
 {
 	int err;
-	struct p9_fcall *tc, *rc;
+	struct p9_req_t *req;
 	struct p9_fid *fid;
+	struct p9_qid qid;
 
-	P9_DPRINTK(P9_DEBUG_9P, "clnt %p afid %d uname %s aname %s\n",
-		clnt, afid?afid->fid:-1, uname, aname);
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TATTACH afid %d uname %s aname %s\n",
+					afid ? afid->fid : -1, uname, aname);
 	err = 0;
-	tc = NULL;
-	rc = NULL;
 
 	fid = p9_fid_create(clnt);
 	if (IS_ERR(fid)) {
@@ -728,73 +801,75 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
 		goto error;
 	}
 
-	tc = p9_create_tattach(fid->fid, afid?afid->fid:P9_NOFID, uname, aname,
-		n_uname, clnt->dotu);
-	if (IS_ERR(tc)) {
-		err = PTR_ERR(tc);
-		tc = NULL;
+	req = p9_client_rpc(clnt, P9_TATTACH, "ddss?d", fid->fid,
+			afid ? afid->fid : P9_NOFID, uname, aname, n_uname);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
 		goto error;
 	}
 
-	err = p9_client_rpc(clnt, tc, &rc);
-	if (err)
+	err = p9pdu_readf(req->rc, clnt->dotu, "Q", &qid);
+	if (err) {
+		p9_free_req(clnt, req);
 		goto error;
+	}
 
-	memmove(&fid->qid, &rc->params.rattach.qid, sizeof(struct p9_qid));
-	kfree(tc);
-	kfree(rc);
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RATTACH qid %x.%llx.%x\n",
+					qid.type, qid.path, qid.version);
+
+	memmove(&fid->qid, &qid, sizeof(struct p9_qid));
+
+	p9_free_req(clnt, req);
 	return fid;
 
 error:
-	kfree(tc);
-	kfree(rc);
 	if (fid)
 		p9_fid_destroy(fid);
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL(p9_client_attach);
 
-struct p9_fid *p9_client_auth(struct p9_client *clnt, char *uname,
-	u32 n_uname, char *aname)
+struct p9_fid *
+p9_client_auth(struct p9_client *clnt, char *uname, u32 n_uname, char *aname)
 {
 	int err;
-	struct p9_fcall *tc, *rc;
-	struct p9_fid *fid;
+	struct p9_req_t *req;
+	struct p9_qid qid;
+	struct p9_fid *afid;
 
-	P9_DPRINTK(P9_DEBUG_9P, "clnt %p uname %s aname %s\n", clnt, uname,
-									aname);
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TAUTH uname %s aname %s\n", uname, aname);
 	err = 0;
-	tc = NULL;
-	rc = NULL;
 
-	fid = p9_fid_create(clnt);
-	if (IS_ERR(fid)) {
-		err = PTR_ERR(fid);
-		fid = NULL;
+	afid = p9_fid_create(clnt);
+	if (IS_ERR(afid)) {
+		err = PTR_ERR(afid);
+		afid = NULL;
 		goto error;
 	}
 
-	tc = p9_create_tauth(fid->fid, uname, aname, n_uname, clnt->dotu);
-	if (IS_ERR(tc)) {
-		err = PTR_ERR(tc);
-		tc = NULL;
+	req = p9_client_rpc(clnt, P9_TAUTH, "dss?d",
+			afid ? afid->fid : P9_NOFID, uname, aname, n_uname);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
 		goto error;
 	}
 
-	err = p9_client_rpc(clnt, tc, &rc);
-	if (err)
+	err = p9pdu_readf(req->rc, clnt->dotu, "Q", &qid);
+	if (err) {
+		p9_free_req(clnt, req);
 		goto error;
+	}
 
-	memmove(&fid->qid, &rc->params.rauth.qid, sizeof(struct p9_qid));
-	kfree(tc);
-	kfree(rc);
-	return fid;
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RAUTH qid %x.%llx.%x\n",
+					qid.type, qid.path, qid.version);
+
+	memmove(&afid->qid, &qid, sizeof(struct p9_qid));
+	p9_free_req(clnt, req);
+	return afid;
 
 error:
-	kfree(tc);
-	kfree(rc);
-	if (fid)
-		p9_fid_destroy(fid);
+	if (afid)
+		p9_fid_destroy(afid);
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL(p9_client_auth);
@@ -803,15 +878,13 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames,
 	int clone)
 {
 	int err;
-	struct p9_fcall *tc, *rc;
 	struct p9_client *clnt;
 	struct p9_fid *fid;
+	struct p9_qid *wqids;
+	struct p9_req_t *req;
+	int16_t nwqids, count;
 
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d nwname %d wname[0] %s\n",
-		oldfid->fid, nwname, wnames?wnames[0]:NULL);
 	err = 0;
-	tc = NULL;
-	rc = NULL;
 	clnt = oldfid->clnt;
 	if (clone) {
 		fid = p9_fid_create(clnt);
@@ -825,53 +898,46 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames,
 	} else
 		fid = oldfid;
 
-	tc = p9_create_twalk(oldfid->fid, fid->fid, nwname, wnames);
-	if (IS_ERR(tc)) {
-		err = PTR_ERR(tc);
-		tc = NULL;
+
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %d wname[0] %s\n",
+		oldfid->fid, fid->fid, nwname, wnames ? wnames[0] : NULL);
+
+	req = p9_client_rpc(clnt, P9_TWALK, "ddT", oldfid->fid, fid->fid,
+								nwname, wnames);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
 		goto error;
 	}
 
-	err = p9_client_rpc(clnt, tc, &rc);
-	if (err) {
-		if (rc && rc->id == P9_RWALK)
-			goto clunk_fid;
-		else
-			goto error;
-	}
+	err = p9pdu_readf(req->rc, clnt->dotu, "R", &nwqids, &wqids);
+	p9_free_req(clnt, req);
+	if (err)
+		goto clunk_fid;
 
-	if (rc->params.rwalk.nwqid != nwname) {
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RWALK nwqid %d:\n", nwqids);
+
+	if (nwqids != nwname) {
 		err = -ENOENT;
 		goto clunk_fid;
 	}
 
+	for (count = 0; count < nwqids; count++)
+		P9_DPRINTK(P9_DEBUG_9P, "<<<     [%d] %x.%llx.%x\n",
+			count, wqids[count].type, wqids[count].path,
+			wqids[count].version);
+
 	if (nwname)
-		memmove(&fid->qid,
-			&rc->params.rwalk.wqids[rc->params.rwalk.nwqid - 1],
-			sizeof(struct p9_qid));
+		memmove(&fid->qid, &wqids[nwqids - 1], sizeof(struct p9_qid));
 	else
 		fid->qid = oldfid->qid;
 
-	kfree(tc);
-	kfree(rc);
 	return fid;
 
 clunk_fid:
-	kfree(tc);
-	kfree(rc);
-	rc = NULL;
-	tc = p9_create_tclunk(fid->fid);
-	if (IS_ERR(tc)) {
-		err = PTR_ERR(tc);
-		tc = NULL;
-		goto error;
-	}
-
-	p9_client_rpc(clnt, tc, &rc);
+	p9_client_clunk(fid);
+	fid = NULL;
 
 error:
-	kfree(tc);
-	kfree(rc);
 	if (fid && (fid != oldfid))
 		p9_fid_destroy(fid);
 
@@ -882,35 +948,36 @@ EXPORT_SYMBOL(p9_client_walk);
 int p9_client_open(struct p9_fid *fid, int mode)
 {
 	int err;
-	struct p9_fcall *tc, *rc;
 	struct p9_client *clnt;
+	struct p9_req_t *req;
+	struct p9_qid qid;
+	int iounit;
 
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d mode %d\n", fid->fid, mode);
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TOPEN fid %d mode %d\n", fid->fid, mode);
 	err = 0;
-	tc = NULL;
-	rc = NULL;
 	clnt = fid->clnt;
 
 	if (fid->mode != -1)
 		return -EINVAL;
 
-	tc = p9_create_topen(fid->fid, mode);
-	if (IS_ERR(tc)) {
-		err = PTR_ERR(tc);
-		tc = NULL;
-		goto done;
+	req = p9_client_rpc(clnt, P9_TOPEN, "db", fid->fid, mode);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
 	}
 
-	err = p9_client_rpc(clnt, tc, &rc);
+	err = p9pdu_readf(req->rc, clnt->dotu, "Qd", &qid, &iounit);
+	p9_free_req(clnt, req);
 	if (err)
-		goto done;
+		goto error;
+
+	P9_DPRINTK(P9_DEBUG_9P, "<<< ROPEN qid %x.%llx.%x iounit %x\n",
+				qid.type, qid.path, qid.version, iounit);
 
 	fid->mode = mode;
-	fid->iounit = rc->params.ropen.iounit;
+	fid->iounit = iounit;
 
-done:
-	kfree(tc);
-	kfree(rc);
+error:
 	return err;
 }
 EXPORT_SYMBOL(p9_client_open);
@@ -919,37 +986,38 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode,
 		     char *extension)
 {
 	int err;
-	struct p9_fcall *tc, *rc;
 	struct p9_client *clnt;
+	struct p9_req_t *req;
+	struct p9_qid qid;
+	int iounit;
 
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d name %s perm %d mode %d\n", fid->fid,
-		name, perm, mode);
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TCREATE fid %d name %s perm %d mode %d\n",
+						fid->fid, name, perm, mode);
 	err = 0;
-	tc = NULL;
-	rc = NULL;
 	clnt = fid->clnt;
 
 	if (fid->mode != -1)
 		return -EINVAL;
 
-	tc = p9_create_tcreate(fid->fid, name, perm, mode, extension,
-							       clnt->dotu);
-	if (IS_ERR(tc)) {
-		err = PTR_ERR(tc);
-		tc = NULL;
-		goto done;
+	req = p9_client_rpc(clnt, P9_TCREATE, "dsdb?s", fid->fid, name, perm,
+				mode, extension);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
 	}
 
-	err = p9_client_rpc(clnt, tc, &rc);
+	err = p9pdu_readf(req->rc, clnt->dotu, "Qd", &qid, &iounit);
+	p9_free_req(clnt, req);
 	if (err)
-		goto done;
+		goto error;
+
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RCREATE qid %x.%llx.%x iounit %x\n",
+				qid.type, qid.path, qid.version, iounit);
 
 	fid->mode = mode;
-	fid->iounit = rc->params.ropen.iounit;
+	fid->iounit = iounit;
 
-done:
-	kfree(tc);
-	kfree(rc);
+error:
 	return err;
 }
 EXPORT_SYMBOL(p9_client_fcreate);
@@ -957,31 +1025,25 @@ EXPORT_SYMBOL(p9_client_fcreate);
 int p9_client_clunk(struct p9_fid *fid)
 {
 	int err;
-	struct p9_fcall *tc, *rc;
 	struct p9_client *clnt;
+	struct p9_req_t *req;
 
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid);
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TCLUNK fid %d\n", fid->fid);
 	err = 0;
-	tc = NULL;
-	rc = NULL;
 	clnt = fid->clnt;
 
-	tc = p9_create_tclunk(fid->fid);
-	if (IS_ERR(tc)) {
-		err = PTR_ERR(tc);
-		tc = NULL;
-		goto done;
+	req = p9_client_rpc(clnt, P9_TCLUNK, "d", fid->fid);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
 	}
 
-	err = p9_client_rpc(clnt, tc, &rc);
-	if (err)
-		goto done;
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RCLUNK fid %d\n", fid->fid);
 
+	p9_free_req(clnt, req);
 	p9_fid_destroy(fid);
 
-done:
-	kfree(tc);
-	kfree(rc);
+error:
 	return err;
 }
 EXPORT_SYMBOL(p9_client_clunk);
@@ -989,31 +1051,25 @@ EXPORT_SYMBOL(p9_client_clunk);
 int p9_client_remove(struct p9_fid *fid)
 {
 	int err;
-	struct p9_fcall *tc, *rc;
 	struct p9_client *clnt;
+	struct p9_req_t *req;
 
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid);
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TREMOVE fid %d\n", fid->fid);
 	err = 0;
-	tc = NULL;
-	rc = NULL;
 	clnt = fid->clnt;
 
-	tc = p9_create_tremove(fid->fid);
-	if (IS_ERR(tc)) {
-		err = PTR_ERR(tc);
-		tc = NULL;
-		goto done;
+	req = p9_client_rpc(clnt, P9_TREMOVE, "d", fid->fid);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
 	}
 
-	err = p9_client_rpc(clnt, tc, &rc);
-	if (err)
-		goto done;
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RREMOVE fid %d\n", fid->fid);
 
+	p9_free_req(clnt, req);
 	p9_fid_destroy(fid);
 
-done:
-	kfree(tc);
-	kfree(rc);
+error:
 	return err;
 }
 EXPORT_SYMBOL(p9_client_remove);
@@ -1022,15 +1078,14 @@ int
 p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
 								u32 count)
 {
-	int err, n, rsize, total;
-	struct p9_fcall *tc, *rc;
+	int err, rsize, total;
 	struct p9_client *clnt;
+	struct p9_req_t *req;
+	char *dataptr;
 
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu %d\n", fid->fid,
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", fid->fid,
 					(long long unsigned) offset, count);
 	err = 0;
-	tc = NULL;
-	rc = NULL;
 	clnt = fid->clnt;
 	total = 0;
 
@@ -1038,53 +1093,40 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
 	if (!rsize || rsize > clnt->msize-P9_IOHDRSZ)
 		rsize = clnt->msize - P9_IOHDRSZ;
 
-	do {
-		if (count < rsize)
-			rsize = count;
+	if (count < rsize)
+		rsize = count;
 
-		tc = p9_create_tread(fid->fid, offset, rsize);
-		if (IS_ERR(tc)) {
-			err = PTR_ERR(tc);
-			tc = NULL;
-			goto error;
-		}
+	req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, rsize);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
+	}
 
-		err = p9_client_rpc(clnt, tc, &rc);
-		if (err)
-			goto error;
+	err = p9pdu_readf(req->rc, clnt->dotu, "D", &count, &dataptr);
+	if (err)
+		goto free_and_error;
 
-		n = rc->params.rread.count;
-		if (n > count)
-			n = count;
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
 
-		if (data) {
-			memmove(data, rc->params.rread.data, n);
-			data += n;
-		}
+	if (data) {
+		memmove(data, dataptr, count);
+		data += count;
+	}
 
-		if (udata) {
-			err = copy_to_user(udata, rc->params.rread.data, n);
-			if (err) {
-				err = -EFAULT;
-				goto error;
-			}
-			udata += n;
+	if (udata) {
+		err = copy_to_user(udata, dataptr, count);
+		if (err) {
+			err = -EFAULT;
+			goto free_and_error;
 		}
+	}
 
-		count -= n;
-		offset += n;
-		total += n;
-		kfree(tc);
-		tc = NULL;
-		kfree(rc);
-		rc = NULL;
-	} while (count > 0 && n == rsize);
-
-	return total;
+	p9_free_req(clnt, req);
+	return count;
 
+free_and_error:
+	p9_free_req(clnt, req);
 error:
-	kfree(tc);
-	kfree(rc);
 	return err;
 }
 EXPORT_SYMBOL(p9_client_read);
@@ -1093,15 +1135,13 @@ int
 p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,
 							u64 offset, u32 count)
 {
-	int err, n, rsize, total;
-	struct p9_fcall *tc, *rc;
+	int err, rsize, total;
 	struct p9_client *clnt;
+	struct p9_req_t *req;
 
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu count %d\n", fid->fid,
-					(long long unsigned) offset, count);
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d\n",
+				fid->fid, (long long unsigned) offset, count);
 	err = 0;
-	tc = NULL;
-	rc = NULL;
 	clnt = fid->clnt;
 	total = 0;
 
@@ -1109,129 +1149,70 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,
 	if (!rsize || rsize > clnt->msize-P9_IOHDRSZ)
 		rsize = clnt->msize - P9_IOHDRSZ;
 
-	do {
-		if (count < rsize)
-			rsize = count;
-
-		if (data)
-			tc = p9_create_twrite(fid->fid, offset, rsize, data);
-		else
-			tc = p9_create_twrite_u(fid->fid, offset, rsize, udata);
-		if (IS_ERR(tc)) {
-			err = PTR_ERR(tc);
-			tc = NULL;
-			goto error;
-		}
-
-		err = p9_client_rpc(clnt, tc, &rc);
-		if (err)
-			goto error;
-
-		n = rc->params.rread.count;
-		count -= n;
-
-		if (data)
-			data += n;
-		else
-			udata += n;
+	if (count < rsize)
+		rsize = count;
+	if (data)
+		req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, offset,
+								rsize, data);
+	else
+		req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, offset,
+								rsize, udata);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
+	}
 
-		offset += n;
-		total += n;
-		kfree(tc);
-		tc = NULL;
-		kfree(rc);
-		rc = NULL;
-	} while (count > 0);
+	err = p9pdu_readf(req->rc, clnt->dotu, "d", &count);
+	if (err)
+		goto free_and_error;
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RWRITE count %d\n", count);
 
-	return total;
+	p9_free_req(clnt, req);
+	return count;
 
+free_and_error:
+	p9_free_req(clnt, req);
 error:
-	kfree(tc);
-	kfree(rc);
 	return err;
 }
 EXPORT_SYMBOL(p9_client_write);
 
-static struct p9_stat *p9_clone_stat(struct p9_stat *st, int dotu)
+struct p9_wstat *p9_client_stat(struct p9_fid *fid)
 {
-	int n;
-	char *p;
-	struct p9_stat *ret;
-
-	n = sizeof(struct p9_stat) + st->name.len + st->uid.len + st->gid.len +
-		st->muid.len;
+	int err;
+	struct p9_client *clnt;
+	struct p9_wstat *ret = kmalloc(sizeof(struct p9_wstat), GFP_KERNEL);
+	struct p9_req_t *req;
+	u16 ignored;
 
-	if (dotu)
-		n += st->extension.len;
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TSTAT fid %d\n", fid->fid);
 
-	ret = kmalloc(n, GFP_KERNEL);
 	if (!ret)
 		return ERR_PTR(-ENOMEM);
 
-	memmove(ret, st, sizeof(struct p9_stat));
-	p = ((char *) ret) + sizeof(struct p9_stat);
-	memmove(p, st->name.str, st->name.len);
-	ret->name.str = p;
-	p += st->name.len;
-	memmove(p, st->uid.str, st->uid.len);
-	ret->uid.str = p;
-	p += st->uid.len;
-	memmove(p, st->gid.str, st->gid.len);
-	ret->gid.str = p;
-	p += st->gid.len;
-	memmove(p, st->muid.str, st->muid.len);
-	ret->muid.str = p;
-	p += st->muid.len;
-
-	if (dotu) {
-		memmove(p, st->extension.str, st->extension.len);
-		ret->extension.str = p;
-		p += st->extension.len;
-	}
-
-	return ret;
-}
-
-struct p9_stat *p9_client_stat(struct p9_fid *fid)
-{
-	int err;
-	struct p9_fcall *tc, *rc;
-	struct p9_client *clnt;
-	struct p9_stat *ret;
-
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid);
 	err = 0;
-	tc = NULL;
-	rc = NULL;
-	ret = NULL;
 	clnt = fid->clnt;
 
-	tc = p9_create_tstat(fid->fid);
-	if (IS_ERR(tc)) {
-		err = PTR_ERR(tc);
-		tc = NULL;
+	req = p9_client_rpc(clnt, P9_TSTAT, "d", fid->fid);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
 		goto error;
 	}
 
-	err = p9_client_rpc(clnt, tc, &rc);
+	err = p9pdu_readf(req->rc, clnt->dotu, "wS", &ignored, ret);
+	p9_free_req(clnt, req);
 	if (err)
 		goto error;
 
-	ret = p9_clone_stat(&rc->params.rstat.stat, clnt->dotu);
-	if (IS_ERR(ret)) {
-		err = PTR_ERR(ret);
-		ret = NULL;
-		goto error;
-	}
+	P9_DPRINTK(P9_DEBUG_9P,
+		"<<< RSTAT sz=%x type=%x dev=%x qid=%2.2x %4.4x %8.8llx"
+		" mode=%8.8x uid=%d gid=%d size=%lld %s\n",
+		ret->size, ret->type, ret->dev, ret->qid.type,
+		ret->qid.version, ret->qid.path, ret->mode,
+		ret->n_uid, ret->n_gid, ret->length, ret->name);
 
-	kfree(tc);
-	kfree(rc);
 	return ret;
-
 error:
-	kfree(tc);
-	kfree(rc);
-	kfree(ret);
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL(p9_client_stat);
@@ -1239,27 +1220,23 @@ EXPORT_SYMBOL(p9_client_stat);
 int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)
 {
 	int err;
-	struct p9_fcall *tc, *rc;
+	struct p9_req_t *req;
 	struct p9_client *clnt;
 
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid);
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid);
 	err = 0;
-	tc = NULL;
-	rc = NULL;
 	clnt = fid->clnt;
 
-	tc = p9_create_twstat(fid->fid, wst, clnt->dotu);
-	if (IS_ERR(tc)) {
-		err = PTR_ERR(tc);
-		tc = NULL;
-		goto done;
+	req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, 0, wst);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
 	}
 
-	err = p9_client_rpc(clnt, tc, &rc);
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RWSTAT fid %d\n", fid->fid);
 
-done:
-	kfree(tc);
-	kfree(rc);
+	p9_free_req(clnt, req);
+error:
 	return err;
 }
 EXPORT_SYMBOL(p9_client_wstat);
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 43e98220e9a4..4ebeffd21d3d 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -27,6 +27,7 @@
 
 #include <linux/module.h>
 #include <linux/errno.h>
+#include <linux/uaccess.h>
 #include <net/9p/9p.h>
 #include <net/9p/client.h>
 #include "protocol.h"
@@ -51,6 +52,38 @@
 static int
 p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...);
 
+#define PACKET_DEBUG 0
+
+void
+p9pdu_dump(int way, struct p9_fcall *pdu)
+{
+	int i, n;
+	u8 *data = pdu->sdata;
+	int datalen = pdu->size;
+	char buf[255];
+	int buflen = 255;
+
+	i = n = 0;
+	if (datalen > (buflen-16))
+		datalen = buflen-16;
+	while (i < datalen) {
+		n += scnprintf(buf + n, buflen - n, "%02x ", data[i]);
+		if (i%4 == 3)
+			n += scnprintf(buf + n, buflen - n, " ");
+		if (i%32 == 31)
+			n += scnprintf(buf + n, buflen - n, "\n");
+
+		i++;
+	}
+	n += scnprintf(buf + n, buflen - n, "\n");
+
+	if (way)
+		printk(KERN_NOTICE "[[(%d)[ %s\n", datalen, buf);
+	else
+		printk(KERN_NOTICE "]](%d)] %s\n", datalen, buf);
+}
+EXPORT_SYMBOL(p9pdu_dump);
+
 void p9stat_free(struct p9_wstat *stbuf)
 {
 	kfree(stbuf->name);
@@ -77,6 +110,18 @@ static size_t pdu_write(struct p9_fcall *pdu, const void *data, size_t size)
 	return size - len;
 }
 
+static size_t
+pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size)
+{
+	size_t len = MIN(pdu->capacity - pdu->size, size);
+	int err = copy_from_user(&pdu->sdata[pdu->size], udata, len);
+	if (err)
+		printk(KERN_WARNING "pdu_write_u returning: %d\n", err);
+
+	pdu->size += len;
+	return size - len;
+}
+
 /*
 	b - int8_t
 	w - int16_t
@@ -174,7 +219,6 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 				stbuf->extension = NULL;
 				stbuf->n_uid = stbuf->n_gid = stbuf->n_muid =
 				    -1;
-
 				errcode =
 				    p9pdu_readf(pdu, optional,
 						"wwdQdddqssss?sddd",
@@ -332,7 +376,6 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 		case 's':{
 				const char *ptr = va_arg(ap, const char *);
 				int16_t len = 0;
-
 				if (ptr)
 					len = MIN(strlen(ptr), USHORT_MAX);
 
@@ -356,7 +399,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 				    p9pdu_writef(pdu, optional,
 						 "wwdQdddqssss?sddd",
 						 stbuf->size, stbuf->type,
-						 stbuf->dev, stbuf->qid,
+						 stbuf->dev, &stbuf->qid,
 						 stbuf->mode, stbuf->atime,
 						 stbuf->mtime, stbuf->length,
 						 stbuf->name, stbuf->uid,
@@ -374,6 +417,16 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 					errcode = -EFAULT;
 			}
 			break;
+		case 'U':{
+				int32_t count = va_arg(ap, int32_t);
+				const char __user *udata =
+						va_arg(ap, const void *);
+				errcode =
+				    p9pdu_writef(pdu, optional, "d", count);
+				if (!errcode && pdu_write_u(pdu, udata, count))
+					errcode = -EFAULT;
+			}
+			break;
 		case 'T':{
 				int16_t nwname = va_arg(ap, int);
 				const char **wnames = va_arg(ap, const char **);
@@ -455,3 +508,29 @@ p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...)
 
 	return ret;
 }
+
+int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type)
+{
+	return p9pdu_writef(pdu, 0, "dbw", 0, type, tag);
+}
+
+int p9pdu_finalize(struct p9_fcall *pdu)
+{
+	int size = pdu->size;
+	int err;
+
+	pdu->size = 0;
+	err = p9pdu_writef(pdu, 0, "d", size);
+	pdu->size = size;
+
+	if (PACKET_DEBUG)
+		p9pdu_dump(0, pdu);
+
+	return err;
+}
+
+void p9pdu_reset(struct p9_fcall *pdu)
+{
+	pdu->offset = 0;
+	pdu->size = 0;
+}
diff --git a/net/9p/protocol.h b/net/9p/protocol.h
index 596ee10d506f..ccde462e7ac5 100644
--- a/net/9p/protocol.h
+++ b/net/9p/protocol.h
@@ -27,5 +27,8 @@
 
 int
 p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap);
-
 int p9pdu_readf(struct p9_fcall *pdu, int optional, const char *fmt, ...);
+int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type);
+int p9pdu_finalize(struct p9_fcall *pdu);
+void p9pdu_dump(int, struct p9_fcall *);
+void p9pdu_reset(struct p9_fcall *pdu);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index e147ec539585..e8ebe2cb7e8b 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -181,7 +181,7 @@ static void p9_mux_poll_stop(struct p9_conn *m)
  *
  */
 
-void p9_conn_cancel(struct p9_conn *m, int err)
+static void p9_conn_cancel(struct p9_conn *m, int err)
 {
 	struct p9_req_t *req, *rtmp;
 	unsigned long flags;
@@ -287,7 +287,7 @@ static void p9_read_work(struct work_struct *work)
 	if (m->err < 0)
 		return;
 
-	P9_DPRINTK(P9_DEBUG_MUX, "start mux %p pos %d\n", m, m->rpos);
+	P9_DPRINTK(P9_DEBUG_TRANS, "start mux %p pos %d\n", m, m->rpos);
 
 	if (!m->rbuf) {
 		m->rbuf = m->tmp_buf;
@@ -296,11 +296,11 @@ static void p9_read_work(struct work_struct *work)
 	}
 
 	clear_bit(Rpending, &m->wsched);
-	P9_DPRINTK(P9_DEBUG_MUX, "read mux %p pos %d size: %d = %d\n", m,
+	P9_DPRINTK(P9_DEBUG_TRANS, "read mux %p pos %d size: %d = %d\n", m,
 					m->rpos, m->rsize, m->rsize-m->rpos);
 	err = p9_fd_read(m->client, m->rbuf + m->rpos,
 						m->rsize - m->rpos);
-	P9_DPRINTK(P9_DEBUG_MUX, "mux %p got %d bytes\n", m, err);
+	P9_DPRINTK(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err);
 	if (err == -EAGAIN) {
 		clear_bit(Rworksched, &m->wsched);
 		return;
@@ -313,7 +313,7 @@ static void p9_read_work(struct work_struct *work)
 
 	if ((!m->req) && (m->rpos == m->rsize)) { /* header read in */
 		u16 tag;
-		P9_DPRINTK(P9_DEBUG_MUX, "got new header\n");
+		P9_DPRINTK(P9_DEBUG_TRANS, "got new header\n");
 
 		n = le32_to_cpu(*(__le32 *) m->rbuf); /* read packet size */
 		if (n >= m->client->msize) {
@@ -324,8 +324,8 @@ static void p9_read_work(struct work_struct *work)
 		}
 
 		tag = le16_to_cpu(*(__le16 *) (m->rbuf+5)); /* read tag */
-		P9_DPRINTK(P9_DEBUG_MUX, "mux %p pkt: size: %d bytes tag: %d\n",
-								 m, n, tag);
+		P9_DPRINTK(P9_DEBUG_TRANS,
+			"mux %p pkt: size: %d bytes tag: %d\n", m, n, tag);
 
 		m->req = p9_tag_lookup(m->client, tag);
 		if (!m->req) {
@@ -351,7 +351,7 @@ static void p9_read_work(struct work_struct *work)
 
 	/* not an else because some packets (like clunk) have no payload */
 	if ((m->req) && (m->rpos == m->rsize)) { /* packet is read in */
-		P9_DPRINTK(P9_DEBUG_MUX, "got new packet\n");
+		P9_DPRINTK(P9_DEBUG_TRANS, "got new packet\n");
 
 		list_del(&m->req->req_list);
 		p9_client_cb(m->client, m->req);
@@ -369,7 +369,7 @@ static void p9_read_work(struct work_struct *work)
 			n = p9_fd_poll(m->client, NULL);
 
 		if (n & POLLIN) {
-			P9_DPRINTK(P9_DEBUG_MUX, "schedule read work %p\n", m);
+			P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m);
 			queue_work(p9_mux_wq, &m->rq);
 		} else
 			clear_bit(Rworksched, &m->wsched);
@@ -453,11 +453,11 @@ static void p9_write_work(struct work_struct *work)
 		spin_unlock(&m->client->lock);
 	}
 
-	P9_DPRINTK(P9_DEBUG_MUX, "mux %p pos %d size %d\n", m, m->wpos,
+	P9_DPRINTK(P9_DEBUG_TRANS, "mux %p pos %d size %d\n", m, m->wpos,
 								m->wsize);
 	clear_bit(Wpending, &m->wsched);
 	err = p9_fd_write(m->client, m->wbuf + m->wpos, m->wsize - m->wpos);
-	P9_DPRINTK(P9_DEBUG_MUX, "mux %p sent %d bytes\n", m, err);
+	P9_DPRINTK(P9_DEBUG_TRANS, "mux %p sent %d bytes\n", m, err);
 	if (err == -EAGAIN) {
 		clear_bit(Wworksched, &m->wsched);
 		return;
@@ -481,7 +481,7 @@ static void p9_write_work(struct work_struct *work)
 			n = p9_fd_poll(m->client, NULL);
 
 		if (n & POLLOUT) {
-			P9_DPRINTK(P9_DEBUG_MUX, "schedule write work %p\n", m);
+			P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m);
 			queue_work(p9_mux_wq, &m->wq);
 		} else
 			clear_bit(Wworksched, &m->wsched);
@@ -558,7 +558,8 @@ static struct p9_conn *p9_conn_create(struct p9_client *client)
 	int n;
 	struct p9_conn *m;
 
-	P9_DPRINTK(P9_DEBUG_MUX, "client %p msize %d\n", client, client->msize);
+	P9_DPRINTK(P9_DEBUG_TRANS, "client %p msize %d\n", client,
+								client->msize);
 	m = kzalloc(sizeof(struct p9_conn), GFP_KERNEL);
 	if (!m)
 		return ERR_PTR(-ENOMEM);
@@ -575,12 +576,12 @@ static struct p9_conn *p9_conn_create(struct p9_client *client)
 
 	n = p9_fd_poll(client, &m->pt);
 	if (n & POLLIN) {
-		P9_DPRINTK(P9_DEBUG_MUX, "mux %p can read\n", m);
+		P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can read\n", m);
 		set_bit(Rpending, &m->wsched);
 	}
 
 	if (n & POLLOUT) {
-		P9_DPRINTK(P9_DEBUG_MUX, "mux %p can write\n", m);
+		P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can write\n", m);
 		set_bit(Wpending, &m->wsched);
 	}
 
@@ -602,7 +603,7 @@ static void p9_poll_mux(struct p9_conn *m)
 
 	n = p9_fd_poll(m->client, NULL);
 	if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) {
-		P9_DPRINTK(P9_DEBUG_MUX, "error mux %p err %d\n", m, n);
+		P9_DPRINTK(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n);
 		if (n >= 0)
 			n = -ECONNRESET;
 		p9_conn_cancel(m, n);
@@ -610,19 +611,19 @@ static void p9_poll_mux(struct p9_conn *m)
 
 	if (n & POLLIN) {
 		set_bit(Rpending, &m->wsched);
-		P9_DPRINTK(P9_DEBUG_MUX, "mux %p can read\n", m);
+		P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can read\n", m);
 		if (!test_and_set_bit(Rworksched, &m->wsched)) {
-			P9_DPRINTK(P9_DEBUG_MUX, "schedule read work %p\n", m);
+			P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m);
 			queue_work(p9_mux_wq, &m->rq);
 		}
 	}
 
 	if (n & POLLOUT) {
 		set_bit(Wpending, &m->wsched);
-		P9_DPRINTK(P9_DEBUG_MUX, "mux %p can write\n", m);
+		P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can write\n", m);
 		if ((m->wsize || !list_empty(&m->unsent_req_list))
 		    && !test_and_set_bit(Wworksched, &m->wsched)) {
-			P9_DPRINTK(P9_DEBUG_MUX, "schedule write work %p\n", m);
+			P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m);
 			queue_work(p9_mux_wq, &m->wq);
 		}
 	}
@@ -645,8 +646,8 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
 	struct p9_trans_fd *ts = client->trans;
 	struct p9_conn *m = ts->conn;
 
-	P9_DPRINTK(P9_DEBUG_MUX, "mux %p task %p tcall %p id %d\n", m, current,
-		req->tc, req->tc->id);
+	P9_DPRINTK(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n", m,
+						current, req->tc, req->tc->id);
 	if (m->err < 0)
 		return m->err;
 
@@ -672,19 +673,12 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
 	struct p9_trans_fd *ts = client->trans;
 	struct p9_conn *m = ts->conn;
 
-	P9_DPRINTK(P9_DEBUG_MUX, "mux %p req %p\n", m, req);
+	P9_DPRINTK(P9_DEBUG_TRANS, "mux %p req %p\n", m, req);
 
 	spin_lock(&client->lock);
 	list_del(&req->req_list);
 	spin_unlock(&client->lock);
 
-	/* if a response was received for a request, do nothing */
-	if (req->rc || req->t_err) {
-		P9_DPRINTK(P9_DEBUG_MUX,
-			"mux %p req %p response already received\n", m, req);
-		return 0;
-	}
-
 	if (req->status == REQ_STATUS_UNSENT) {
 		req->status = REQ_STATUS_FLSHD;
 		return 0;
@@ -809,7 +803,7 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket)
 
 static void p9_conn_destroy(struct p9_conn *m)
 {
-	P9_DPRINTK(P9_DEBUG_MUX, "mux %p prev %p next %p\n", m,
+	P9_DPRINTK(P9_DEBUG_TRANS, "mux %p prev %p next %p\n", m,
 		m->mux_list.prev, m->mux_list.next);
 
 	p9_mux_poll_stop(m);
@@ -1060,7 +1054,7 @@ static int p9_poll_proc(void *a)
 {
 	unsigned long flags;
 
-	P9_DPRINTK(P9_DEBUG_MUX, "start %p\n", current);
+	P9_DPRINTK(P9_DEBUG_TRANS, "start %p\n", current);
  repeat:
 	spin_lock_irqsave(&p9_poll_lock, flags);
 	while (!list_empty(&p9_poll_pending_list)) {
@@ -1078,7 +1072,7 @@ static int p9_poll_proc(void *a)
 
 	set_current_state(TASK_INTERRUPTIBLE);
 	if (list_empty(&p9_poll_pending_list)) {
-		P9_DPRINTK(P9_DEBUG_MUX, "sleeping...\n");
+		P9_DPRINTK(P9_DEBUG_TRANS, "sleeping...\n");
 		schedule();
 	}
 	__set_current_state(TASK_RUNNING);
@@ -1086,7 +1080,7 @@ static int p9_poll_proc(void *a)
 	if (!kthread_should_stop())
 		goto repeat;
 
-	P9_DPRINTK(P9_DEBUG_MUX, "finish\n");
+	P9_DPRINTK(P9_DEBUG_TRANS, "finish\n");
 	return 0;
 }
 
diff --git a/net/9p/util.c b/net/9p/util.c
index 958fc58cd1ff..dc4ec05ad93d 100644
--- a/net/9p/util.c
+++ b/net/9p/util.c
@@ -105,6 +105,7 @@ retry:
 	else if (error)
 		return -1;
 
+	P9_DPRINTK(P9_DEBUG_MUX, " id %d pool %p\n", i, p);
 	return i;
 }
 EXPORT_SYMBOL(p9_idpool_get);
@@ -121,6 +122,9 @@ EXPORT_SYMBOL(p9_idpool_get);
 void p9_idpool_put(int id, struct p9_idpool *p)
 {
 	unsigned long flags;
+
+	P9_DPRINTK(P9_DEBUG_MUX, " id %d pool %p\n", id, p);
+
 	spin_lock_irqsave(&p->lock, flags);
 	idr_remove(&p->pool, id);
 	spin_unlock_irqrestore(&p->lock, flags);
-- 
GitLab


From 02da398b950c5d079c20afaa23f322383e96070a Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Thu, 16 Oct 2008 08:29:30 -0500
Subject: [PATCH 565/895] 9p: eliminate depricated conv functions

Remove depricated conv functions which have been replaced with new
protocol routines.

This patch also reworks the one instance of the file-system code which
directly calls conversion routines (to accomplish unpacking dirreads).

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/vfs_dir.c         |   20 +-
 include/net/9p/9p.h     |  107 +---
 include/net/9p/client.h |    2 +
 net/9p/Makefile         |    1 -
 net/9p/conv.c           | 1054 ---------------------------------------
 net/9p/protocol.c       |   13 +
 6 files changed, 37 insertions(+), 1160 deletions(-)
 delete mode 100644 net/9p/conv.c

diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 276aed625929..873cd31baa47 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -45,7 +45,7 @@
  *
  */
 
-static inline int dt_type(struct p9_stat *mistat)
+static inline int dt_type(struct p9_wstat *mistat)
 {
 	unsigned long perm = mistat->mode;
 	int rettype = DT_REG;
@@ -69,7 +69,7 @@ static inline int dt_type(struct p9_stat *mistat)
 static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
 	int over;
-	struct p9_stat st;
+	struct p9_wstat st;
 	int err;
 	struct p9_fid *fid;
 	int buflen;
@@ -92,20 +92,24 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
 
 		n = err;
 		while (i < n) {
-			err = p9_deserialize_stat(statbuf + i, buflen-i, &st,
+			err = p9stat_read(statbuf + i, buflen-i, &st,
 							fid->clnt->dotu);
-			if (!err) {
+			if (err) {
+				P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err);
 				err = -EIO;
+				p9stat_free(&st);
 				goto free_and_exit;
 			}
 
-			i += err;
-			fid->rdir_fpos += err;
+			i += st.size+2;
+			fid->rdir_fpos += st.size+2;
 
-			over = filldir(dirent, st.name.str, st.name.len,
+			over = filldir(dirent, st.name, strlen(st.name),
 			    filp->f_pos, v9fs_qid2ino(&st.qid), dt_type(&st));
 
-			filp->f_pos += st.size;
+			filp->f_pos += st.size+2;
+
+			p9stat_free(&st);
 
 			if (over) {
 				err = 0;
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 56c15aee6e61..cb5bc731e885 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -61,21 +61,18 @@ extern unsigned int p9_debug_level;
 
 #define P9_DPRINTK(level, format, arg...) \
 do {  \
-	if (level == P9_DEBUG_9P) \
-		printk(KERN_NOTICE "(%8.8d) " \
-		format , task_pid_nr(current) , ## arg); \
-	else if ((p9_debug_level & level) == level) \
-		printk(KERN_NOTICE "-- %s (%d): " \
-		format , __func__, task_pid_nr(current) , ## arg); \
+	if ((p9_debug_level & level) == level) {\
+		if (level == P9_DEBUG_9P) \
+			printk(KERN_NOTICE "(%8.8d) " \
+			format , task_pid_nr(current) , ## arg); \
+		else \
+			printk(KERN_NOTICE "-- %s (%d): " \
+			format , __func__, task_pid_nr(current) , ## arg); \
+	} \
 } while (0)
 
-#define PRINT_FCALL_ERROR(s, fcall) P9_DPRINTK(P9_DEBUG_ERROR,   \
-	"%s: %.*s\n", s, fcall?fcall->params.rerror.error.len:0, \
-	fcall?fcall->params.rerror.error.str:"");
-
 #else
 #define P9_DPRINTK(level, format, arg...)  do { } while (0)
-#define PRINT_FCALL_ERROR(s, fcall) do { } while (0)
 #endif
 
 #define P9_EPRINTK(level, format, arg...) \
@@ -330,33 +327,6 @@ struct p9_qid {
  * See Also: http://plan9.bell-labs.com/magic/man2html/2/stat
  */
 
-struct p9_stat {
-	u16 size;
-	u16 type;
-	u32 dev;
-	struct p9_qid qid;
-	u32 mode;
-	u32 atime;
-	u32 mtime;
-	u64 length;
-	struct p9_str name;
-	struct p9_str uid;
-	struct p9_str gid;
-	struct p9_str muid;
-	struct p9_str extension;	/* 9p2000.u extensions */
-	u32 n_uid;			/* 9p2000.u extensions */
-	u32 n_gid;			/* 9p2000.u extensions */
-	u32 n_muid;			/* 9p2000.u extensions */
-};
-
-/*
- * file metadata (stat) structure used to create Twstat message
- * The is identical to &p9_stat, but the strings don't point to
- * the same memory block and should be freed separately
- *
- * See Also: http://plan9.bell-labs.com/magic/man2html/2/stat
- */
-
 struct p9_wstat {
 	u16 size;
 	u16 type;
@@ -498,12 +468,12 @@ struct p9_tstat {
 };
 
 struct p9_rstat {
-	struct p9_stat stat;
+	struct p9_wstat stat;
 };
 
 struct p9_twstat {
 	u32 fid;
-	struct p9_stat stat;
+	struct p9_wstat stat;
 };
 
 struct p9_rwstat {
@@ -517,7 +487,6 @@ struct p9_rwstat {
  * @offset: used by marshalling routines to track currentposition in buffer
  * @capacity: used by marshalling routines to track total capacity
  * @sdata: payload
- * @params: per-operation parameters
  *
  * &p9_fcall represents the structure for all 9P RPC
  * transactions.  Requests are packaged into fcalls, and reponses
@@ -535,66 +504,10 @@ struct p9_fcall {
 	size_t capacity;
 
 	uint8_t *sdata;
-
-	union {
-		struct p9_tversion tversion;
-		struct p9_rversion rversion;
-		struct p9_tauth tauth;
-		struct p9_rauth rauth;
-		struct p9_rerror rerror;
-		struct p9_tflush tflush;
-		struct p9_rflush rflush;
-		struct p9_tattach tattach;
-		struct p9_rattach rattach;
-		struct p9_twalk twalk;
-		struct p9_rwalk rwalk;
-		struct p9_topen topen;
-		struct p9_ropen ropen;
-		struct p9_tcreate tcreate;
-		struct p9_rcreate rcreate;
-		struct p9_tread tread;
-		struct p9_rread rread;
-		struct p9_twrite twrite;
-		struct p9_rwrite rwrite;
-		struct p9_tclunk tclunk;
-		struct p9_rclunk rclunk;
-		struct p9_tremove tremove;
-		struct p9_rremove rremove;
-		struct p9_tstat tstat;
-		struct p9_rstat rstat;
-		struct p9_twstat twstat;
-		struct p9_rwstat rwstat;
-	} params;
 };
 
 struct p9_idpool;
 
-int p9_deserialize_stat(void *buf, u32 buflen, struct p9_stat *stat,
-	int dotu);
-int p9_deserialize_fcall(void *buf, u32 buflen, struct p9_fcall *fc, int dotu);
-void p9_set_tag(struct p9_fcall *fc, u16 tag);
-struct p9_fcall *p9_create_tversion(u32 msize, char *version);
-struct p9_fcall *p9_create_tattach(u32 fid, u32 afid, char *uname,
-	char *aname, u32 n_uname, int dotu);
-struct p9_fcall *p9_create_tauth(u32 afid, char *uname, char *aname,
-	u32 n_uname, int dotu);
-struct p9_fcall *p9_create_tflush(u16 oldtag);
-struct p9_fcall *p9_create_twalk(u32 fid, u32 newfid, u16 nwname,
-	char **wnames);
-struct p9_fcall *p9_create_topen(u32 fid, u8 mode);
-struct p9_fcall *p9_create_tcreate(u32 fid, char *name, u32 perm, u8 mode,
-	char *extension, int dotu);
-struct p9_fcall *p9_create_tread(u32 fid, u64 offset, u32 count);
-struct p9_fcall *p9_create_twrite(u32 fid, u64 offset, u32 count,
-	const char *data);
-struct p9_fcall *p9_create_twrite_u(u32 fid, u64 offset, u32 count,
-	const char __user *data);
-struct p9_fcall *p9_create_tclunk(u32 fid);
-struct p9_fcall *p9_create_tremove(u32 fid);
-struct p9_fcall *p9_create_tstat(u32 fid);
-struct p9_fcall *p9_create_twstat(u32 fid, struct p9_wstat *wstat,
-	int dotu);
-
 int p9_errstr2errno(char *errstr, int len);
 
 struct p9_idpool *p9_idpool_create(void);
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index 1e49b4d1030b..1f17f3d93727 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -206,6 +206,8 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst);
 struct p9_req_t *p9_tag_lookup(struct p9_client *, u16);
 void p9_client_cb(struct p9_client *c, struct p9_req_t *req);
 
+int p9stat_read(char *, int, struct p9_wstat *, int);
 void p9stat_free(struct p9_wstat *);
 
+
 #endif /* NET_9P_CLIENT_H */
diff --git a/net/9p/Makefile b/net/9p/Makefile
index c3302d88b808..1041b7bd12e2 100644
--- a/net/9p/Makefile
+++ b/net/9p/Makefile
@@ -4,7 +4,6 @@ obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o
 9pnet-objs := \
 	mod.o \
 	client.o \
-	conv.o \
 	error.o \
 	util.o \
 	protocol.o \
diff --git a/net/9p/conv.c b/net/9p/conv.c
deleted file mode 100644
index 5ad3a3bd73b2..000000000000
--- a/net/9p/conv.c
+++ /dev/null
@@ -1,1054 +0,0 @@
-/*
- * net/9p/conv.c
- *
- * 9P protocol conversion functions
- *
- *  Copyright (C) 2004, 2005 by Latchesar Ionkov <lucho@ionkov.net>
- *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
- *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/idr.h>
-#include <linux/uaccess.h>
-#include <net/9p/9p.h>
-
-/*
- * Buffer to help with string parsing
- */
-struct cbuf {
-	unsigned char *sp;
-	unsigned char *p;
-	unsigned char *ep;
-};
-
-static inline void buf_init(struct cbuf *buf, void *data, int datalen)
-{
-	buf->sp = buf->p = data;
-	buf->ep = data + datalen;
-}
-
-static inline int buf_check_overflow(struct cbuf *buf)
-{
-	return buf->p > buf->ep;
-}
-
-static int buf_check_size(struct cbuf *buf, int len)
-{
-	if (buf->p + len > buf->ep) {
-		if (buf->p < buf->ep) {
-			P9_EPRINTK(KERN_ERR,
-				"buffer overflow: want %d has %d\n", len,
-				(int)(buf->ep - buf->p));
-			dump_stack();
-			buf->p = buf->ep + 1;
-		}
-
-		return 0;
-	}
-
-	return 1;
-}
-
-static void *buf_alloc(struct cbuf *buf, int len)
-{
-	void *ret = NULL;
-
-	if (buf_check_size(buf, len)) {
-		ret = buf->p;
-		buf->p += len;
-	}
-
-	return ret;
-}
-
-static void buf_put_int8(struct cbuf *buf, u8 val)
-{
-	if (buf_check_size(buf, 1)) {
-		buf->p[0] = val;
-		buf->p++;
-	}
-}
-
-static void buf_put_int16(struct cbuf *buf, u16 val)
-{
-	if (buf_check_size(buf, 2)) {
-		*(__le16 *) buf->p = cpu_to_le16(val);
-		buf->p += 2;
-	}
-}
-
-static void buf_put_int32(struct cbuf *buf, u32 val)
-{
-	if (buf_check_size(buf, 4)) {
-		*(__le32 *)buf->p = cpu_to_le32(val);
-		buf->p += 4;
-	}
-}
-
-static void buf_put_int64(struct cbuf *buf, u64 val)
-{
-	if (buf_check_size(buf, 8)) {
-		*(__le64 *)buf->p = cpu_to_le64(val);
-		buf->p += 8;
-	}
-}
-
-static char *buf_put_stringn(struct cbuf *buf, const char *s, u16 slen)
-{
-	char *ret;
-
-	ret = NULL;
-	if (buf_check_size(buf, slen + 2)) {
-		buf_put_int16(buf, slen);
-		ret = buf->p;
-		memcpy(buf->p, s, slen);
-		buf->p += slen;
-	}
-
-	return ret;
-}
-
-static u8 buf_get_int8(struct cbuf *buf)
-{
-	u8 ret = 0;
-
-	if (buf_check_size(buf, 1)) {
-		ret = buf->p[0];
-		buf->p++;
-	}
-
-	return ret;
-}
-
-static u16 buf_get_int16(struct cbuf *buf)
-{
-	u16 ret = 0;
-
-	if (buf_check_size(buf, 2)) {
-		ret = le16_to_cpu(*(__le16 *)buf->p);
-		buf->p += 2;
-	}
-
-	return ret;
-}
-
-static u32 buf_get_int32(struct cbuf *buf)
-{
-	u32 ret = 0;
-
-	if (buf_check_size(buf, 4)) {
-		ret = le32_to_cpu(*(__le32 *)buf->p);
-		buf->p += 4;
-	}
-
-	return ret;
-}
-
-static u64 buf_get_int64(struct cbuf *buf)
-{
-	u64 ret = 0;
-
-	if (buf_check_size(buf, 8)) {
-		ret = le64_to_cpu(*(__le64 *)buf->p);
-		buf->p += 8;
-	}
-
-	return ret;
-}
-
-static void buf_get_str(struct cbuf *buf, struct p9_str *vstr)
-{
-	vstr->len = buf_get_int16(buf);
-	if (!buf_check_overflow(buf) && buf_check_size(buf, vstr->len)) {
-		vstr->str = buf->p;
-		buf->p += vstr->len;
-	} else {
-		vstr->len = 0;
-		vstr->str = NULL;
-	}
-}
-
-static void buf_get_qid(struct cbuf *bufp, struct p9_qid *qid)
-{
-	qid->type = buf_get_int8(bufp);
-	qid->version = buf_get_int32(bufp);
-	qid->path = buf_get_int64(bufp);
-}
-
-/**
- * p9_size_wstat - calculate the size of a variable length stat struct
- * @wstat: metadata (stat) structure
- * @dotu: non-zero if 9P2000.u
- *
- */
-
-static int p9_size_wstat(struct p9_wstat *wstat, int dotu)
-{
-	int size = 0;
-
-	if (wstat == NULL) {
-		P9_EPRINTK(KERN_ERR, "p9_size_stat: got a NULL stat pointer\n");
-		return 0;
-	}
-
-	size =			/* 2 + *//* size[2] */
-	    2 +			/* type[2] */
-	    4 +			/* dev[4] */
-	    1 +			/* qid.type[1] */
-	    4 +			/* qid.vers[4] */
-	    8 +			/* qid.path[8] */
-	    4 +			/* mode[4] */
-	    4 +			/* atime[4] */
-	    4 +			/* mtime[4] */
-	    8 +			/* length[8] */
-	    8;			/* minimum sum of string lengths */
-
-	if (wstat->name)
-		size += strlen(wstat->name);
-	if (wstat->uid)
-		size += strlen(wstat->uid);
-	if (wstat->gid)
-		size += strlen(wstat->gid);
-	if (wstat->muid)
-		size += strlen(wstat->muid);
-
-	if (dotu) {
-		size += 4 +	/* n_uid[4] */
-		    4 +		/* n_gid[4] */
-		    4 +		/* n_muid[4] */
-		    2;		/* string length of extension[4] */
-		if (wstat->extension)
-			size += strlen(wstat->extension);
-	}
-
-	return size;
-}
-
-/**
- * buf_get_stat - safely decode a recieved metadata (stat) structure
- * @bufp: buffer to deserialize
- * @stat: metadata (stat) structure
- * @dotu: non-zero if 9P2000.u
- *
- */
-
-static void
-buf_get_stat(struct cbuf *bufp, struct p9_stat *stat, int dotu)
-{
-	stat->size = buf_get_int16(bufp);
-	stat->type = buf_get_int16(bufp);
-	stat->dev = buf_get_int32(bufp);
-	stat->qid.type = buf_get_int8(bufp);
-	stat->qid.version = buf_get_int32(bufp);
-	stat->qid.path = buf_get_int64(bufp);
-	stat->mode = buf_get_int32(bufp);
-	stat->atime = buf_get_int32(bufp);
-	stat->mtime = buf_get_int32(bufp);
-	stat->length = buf_get_int64(bufp);
-	buf_get_str(bufp, &stat->name);
-	buf_get_str(bufp, &stat->uid);
-	buf_get_str(bufp, &stat->gid);
-	buf_get_str(bufp, &stat->muid);
-
-	if (dotu) {
-		buf_get_str(bufp, &stat->extension);
-		stat->n_uid = buf_get_int32(bufp);
-		stat->n_gid = buf_get_int32(bufp);
-		stat->n_muid = buf_get_int32(bufp);
-	}
-}
-
-/**
- * p9_deserialize_stat - decode a received metadata structure
- * @buf: buffer to deserialize
- * @buflen: length of received buffer
- * @stat: metadata structure to decode into
- * @dotu: non-zero if 9P2000.u
- *
- * Note: stat will point to the buf region.
- */
-
-int
-p9_deserialize_stat(void *buf, u32 buflen, struct p9_stat *stat,
-		int dotu)
-{
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-	unsigned char *p;
-
-	buf_init(bufp, buf, buflen);
-	p = bufp->p;
-	buf_get_stat(bufp, stat, dotu);
-
-	if (buf_check_overflow(bufp))
-		return 0;
-	else
-		return bufp->p - p;
-}
-EXPORT_SYMBOL(p9_deserialize_stat);
-
-/**
- * deserialize_fcall - unmarshal a response
- * @buf: recieved buffer
- * @buflen: length of received buffer
- * @rcall: fcall structure to populate
- * @rcalllen: length of fcall structure to populate
- * @dotu: non-zero if 9P2000.u
- *
- */
-
-int
-p9_deserialize_fcall(void *buf, u32 buflen, struct p9_fcall *rcall,
-		       int dotu)
-{
-
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-	int i = 0;
-
-	buf_init(bufp, buf, buflen);
-
-	rcall->size = buf_get_int32(bufp);
-	rcall->id = buf_get_int8(bufp);
-	rcall->tag = buf_get_int16(bufp);
-
-	P9_DPRINTK(P9_DEBUG_CONV, "size %d id %d tag %d\n", rcall->size,
-							rcall->id, rcall->tag);
-
-	switch (rcall->id) {
-	default:
-		P9_EPRINTK(KERN_ERR, "unknown message type: %d\n", rcall->id);
-		return -EPROTO;
-	case P9_RVERSION:
-		rcall->params.rversion.msize = buf_get_int32(bufp);
-		buf_get_str(bufp, &rcall->params.rversion.version);
-		break;
-	case P9_RFLUSH:
-		break;
-	case P9_RATTACH:
-		rcall->params.rattach.qid.type = buf_get_int8(bufp);
-		rcall->params.rattach.qid.version = buf_get_int32(bufp);
-		rcall->params.rattach.qid.path = buf_get_int64(bufp);
-		break;
-	case P9_RWALK:
-		rcall->params.rwalk.nwqid = buf_get_int16(bufp);
-		if (rcall->params.rwalk.nwqid > P9_MAXWELEM) {
-			P9_EPRINTK(KERN_ERR,
-					"Rwalk with more than %d qids: %d\n",
-					P9_MAXWELEM, rcall->params.rwalk.nwqid);
-			return -EPROTO;
-		}
-
-		for (i = 0; i < rcall->params.rwalk.nwqid; i++)
-			buf_get_qid(bufp, &rcall->params.rwalk.wqids[i]);
-		break;
-	case P9_ROPEN:
-		buf_get_qid(bufp, &rcall->params.ropen.qid);
-		rcall->params.ropen.iounit = buf_get_int32(bufp);
-		break;
-	case P9_RCREATE:
-		buf_get_qid(bufp, &rcall->params.rcreate.qid);
-		rcall->params.rcreate.iounit = buf_get_int32(bufp);
-		break;
-	case P9_RREAD:
-		rcall->params.rread.count = buf_get_int32(bufp);
-		rcall->params.rread.data = bufp->p;
-		buf_check_size(bufp, rcall->params.rread.count);
-		break;
-	case P9_RWRITE:
-		rcall->params.rwrite.count = buf_get_int32(bufp);
-		break;
-	case P9_RCLUNK:
-		break;
-	case P9_RREMOVE:
-		break;
-	case P9_RSTAT:
-		buf_get_int16(bufp);
-		buf_get_stat(bufp, &rcall->params.rstat.stat, dotu);
-		break;
-	case P9_RWSTAT:
-		break;
-	case P9_RERROR:
-		buf_get_str(bufp, &rcall->params.rerror.error);
-		if (dotu)
-			rcall->params.rerror.errno = buf_get_int16(bufp);
-		break;
-	}
-
-	if (buf_check_overflow(bufp)) {
-		P9_DPRINTK(P9_DEBUG_ERROR, "buffer overflow\n");
-		return -EIO;
-	}
-
-	return bufp->p - bufp->sp;
-}
-EXPORT_SYMBOL(p9_deserialize_fcall);
-
-static inline void p9_put_int8(struct cbuf *bufp, u8 val, u8 * p)
-{
-	*p = val;
-	buf_put_int8(bufp, val);
-}
-
-static inline void p9_put_int16(struct cbuf *bufp, u16 val, u16 * p)
-{
-	*p = val;
-	buf_put_int16(bufp, val);
-}
-
-static inline void p9_put_int32(struct cbuf *bufp, u32 val, u32 * p)
-{
-	*p = val;
-	buf_put_int32(bufp, val);
-}
-
-static inline void p9_put_int64(struct cbuf *bufp, u64 val, u64 * p)
-{
-	*p = val;
-	buf_put_int64(bufp, val);
-}
-
-static void
-p9_put_str(struct cbuf *bufp, char *data, struct p9_str *str)
-{
-	int len;
-	char *s;
-
-	if (data)
-		len = strlen(data);
-	else
-		len = 0;
-
-	s = buf_put_stringn(bufp, data, len);
-	if (str) {
-		str->len = len;
-		str->str = s;
-	}
-}
-
-static int
-p9_put_data(struct cbuf *bufp, const char *data, int count,
-		   unsigned char **pdata)
-{
-	*pdata = buf_alloc(bufp, count);
-	if (*pdata == NULL)
-		return -ENOMEM;
-	memmove(*pdata, data, count);
-	return 0;
-}
-
-static int
-p9_put_user_data(struct cbuf *bufp, const char __user *data, int count,
-		   unsigned char **pdata)
-{
-	*pdata = buf_alloc(bufp, count);
-	if (*pdata == NULL)
-		return -ENOMEM;
-	return copy_from_user(*pdata, data, count);
-}
-
-static void
-p9_put_wstat(struct cbuf *bufp, struct p9_wstat *wstat,
-	       struct p9_stat *stat, int statsz, int dotu)
-{
-	p9_put_int16(bufp, statsz, &stat->size);
-	p9_put_int16(bufp, wstat->type, &stat->type);
-	p9_put_int32(bufp, wstat->dev, &stat->dev);
-	p9_put_int8(bufp, wstat->qid.type, &stat->qid.type);
-	p9_put_int32(bufp, wstat->qid.version, &stat->qid.version);
-	p9_put_int64(bufp, wstat->qid.path, &stat->qid.path);
-	p9_put_int32(bufp, wstat->mode, &stat->mode);
-	p9_put_int32(bufp, wstat->atime, &stat->atime);
-	p9_put_int32(bufp, wstat->mtime, &stat->mtime);
-	p9_put_int64(bufp, wstat->length, &stat->length);
-
-	p9_put_str(bufp, wstat->name, &stat->name);
-	p9_put_str(bufp, wstat->uid, &stat->uid);
-	p9_put_str(bufp, wstat->gid, &stat->gid);
-	p9_put_str(bufp, wstat->muid, &stat->muid);
-
-	if (dotu) {
-		p9_put_str(bufp, wstat->extension, &stat->extension);
-		p9_put_int32(bufp, wstat->n_uid, &stat->n_uid);
-		p9_put_int32(bufp, wstat->n_gid, &stat->n_gid);
-		p9_put_int32(bufp, wstat->n_muid, &stat->n_muid);
-	}
-}
-
-static struct p9_fcall *
-p9_create_common(struct cbuf *bufp, u32 size, u8 id)
-{
-	struct p9_fcall *fc;
-
-	size += 4 + 1 + 2;	/* size[4] id[1] tag[2] */
-	fc = kmalloc(sizeof(struct p9_fcall) + size, GFP_KERNEL);
-	if (!fc)
-		return ERR_PTR(-ENOMEM);
-
-	fc->sdata = (char *)fc + sizeof(*fc);
-
-	buf_init(bufp, (char *)fc->sdata, size);
-	p9_put_int32(bufp, size, &fc->size);
-	p9_put_int8(bufp, id, &fc->id);
-	p9_put_int16(bufp, P9_NOTAG, &fc->tag);
-
-	return fc;
-}
-
-/**
- * p9_set_tag - set the tag field of an &p9_fcall structure
- * @fc: fcall structure to set tag within
- * @tag: tag id to set
- */
-
-void p9_set_tag(struct p9_fcall *fc, u16 tag)
-{
-	fc->tag = tag;
-	*(__le16 *) (fc->sdata + 5) = cpu_to_le16(tag);
-}
-EXPORT_SYMBOL(p9_set_tag);
-
-/**
- * p9_create_tversion - allocates and creates a T_VERSION request
- * @msize: requested maximum data size
- * @version: version string to negotiate
- *
- */
-struct p9_fcall *p9_create_tversion(u32 msize, char *version)
-{
-	int size;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	size = 4 + 2 + strlen(version);	/* msize[4] version[s] */
-	fc = p9_create_common(bufp, size, P9_TVERSION);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, msize, &fc->params.tversion.msize);
-	p9_put_str(bufp, version, &fc->params.tversion.version);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_tversion);
-
-/**
- * p9_create_tauth - allocates and creates a T_AUTH request
- * @afid: handle to use for authentication protocol
- * @uname: user name attempting to authenticate
- * @aname: mount specifier for remote server
- * @n_uname: numeric id for user attempting to authneticate
- * @dotu: 9P2000.u extension flag
- *
- */
-
-struct p9_fcall *p9_create_tauth(u32 afid, char *uname, char *aname,
-	u32 n_uname, int dotu)
-{
-	int size;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	/* afid[4] uname[s] aname[s] */
-	size = 4 + 2 + 2;
-	if (uname)
-		size += strlen(uname);
-
-	if (aname)
-		size += strlen(aname);
-
-	if (dotu)
-		size += 4;	/* n_uname */
-
-	fc = p9_create_common(bufp, size, P9_TAUTH);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, afid, &fc->params.tauth.afid);
-	p9_put_str(bufp, uname, &fc->params.tauth.uname);
-	p9_put_str(bufp, aname, &fc->params.tauth.aname);
-	if (dotu)
-		p9_put_int32(bufp, n_uname, &fc->params.tauth.n_uname);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_tauth);
-
-/**
- * p9_create_tattach - allocates and creates a T_ATTACH request
- * @fid: handle to use for the new mount point
- * @afid: handle to use for authentication protocol
- * @uname: user name attempting to attach
- * @aname: mount specifier for remote server
- * @n_uname: numeric id for user attempting to attach
- * @n_uname: numeric id for user attempting to attach
- * @dotu: 9P2000.u extension flag
- *
- */
-
-struct p9_fcall *
-p9_create_tattach(u32 fid, u32 afid, char *uname, char *aname,
-	u32 n_uname, int dotu)
-{
-	int size;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	/* fid[4] afid[4] uname[s] aname[s] */
-	size = 4 + 4 + 2 + 2;
-	if (uname)
-		size += strlen(uname);
-
-	if (aname)
-		size += strlen(aname);
-
-	if (dotu)
-		size += 4;	/* n_uname */
-
-	fc = p9_create_common(bufp, size, P9_TATTACH);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, fid, &fc->params.tattach.fid);
-	p9_put_int32(bufp, afid, &fc->params.tattach.afid);
-	p9_put_str(bufp, uname, &fc->params.tattach.uname);
-	p9_put_str(bufp, aname, &fc->params.tattach.aname);
-	if (dotu)
-		p9_put_int32(bufp, n_uname, &fc->params.tattach.n_uname);
-
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_tattach);
-
-/**
- * p9_create_tflush - allocates and creates a T_FLUSH request
- * @oldtag: tag id for the transaction we are attempting to cancel
- *
- */
-
-struct p9_fcall *p9_create_tflush(u16 oldtag)
-{
-	int size;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	size = 2;		/* oldtag[2] */
-	fc = p9_create_common(bufp, size, P9_TFLUSH);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int16(bufp, oldtag, &fc->params.tflush.oldtag);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_tflush);
-
-/**
- * p9_create_twalk - allocates and creates a T_FLUSH request
- * @fid: handle we are traversing from
- * @newfid: a new handle for this transaction
- * @nwname: number of path elements to traverse
- * @wnames: array of path elements
- *
- */
-
-struct p9_fcall *p9_create_twalk(u32 fid, u32 newfid, u16 nwname,
-				     char **wnames)
-{
-	int i, size;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	if (nwname > P9_MAXWELEM) {
-		P9_DPRINTK(P9_DEBUG_ERROR, "nwname > %d\n", P9_MAXWELEM);
-		return NULL;
-	}
-
-	size = 4 + 4 + 2;	/* fid[4] newfid[4] nwname[2] ... */
-	for (i = 0; i < nwname; i++) {
-		size += 2 + strlen(wnames[i]);	/* wname[s] */
-	}
-
-	fc = p9_create_common(bufp, size, P9_TWALK);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, fid, &fc->params.twalk.fid);
-	p9_put_int32(bufp, newfid, &fc->params.twalk.newfid);
-	p9_put_int16(bufp, nwname, &fc->params.twalk.nwname);
-	for (i = 0; i < nwname; i++) {
-		p9_put_str(bufp, wnames[i], &fc->params.twalk.wnames[i]);
-	}
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_twalk);
-
-/**
- * p9_create_topen - allocates and creates a T_OPEN request
- * @fid: handle we are trying to open
- * @mode: what mode we are trying to open the file in
- *
- */
-
-struct p9_fcall *p9_create_topen(u32 fid, u8 mode)
-{
-	int size;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	size = 4 + 1;		/* fid[4] mode[1] */
-	fc = p9_create_common(bufp, size, P9_TOPEN);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, fid, &fc->params.topen.fid);
-	p9_put_int8(bufp, mode, &fc->params.topen.mode);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_topen);
-
-/**
- * p9_create_tcreate - allocates and creates a T_CREATE request
- * @fid: handle of directory we are trying to create in
- * @name: name of the file we are trying to create
- * @perm: permissions for the file we are trying to create
- * @mode: what mode we are trying to open the file in
- * @extension: 9p2000.u extension string (for special files)
- * @dotu: 9p2000.u enabled flag
- *
- * Note: Plan 9 create semantics include opening the resulting file
- * which is why mode is included.
- */
-
-struct p9_fcall *p9_create_tcreate(u32 fid, char *name, u32 perm, u8 mode,
-	char *extension, int dotu)
-{
-	int size;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	/* fid[4] name[s] perm[4] mode[1] */
-	size = 4 + 2 + strlen(name) + 4 + 1;
-	if (dotu) {
-		size += 2 +			/* extension[s] */
-		    (extension == NULL ? 0 : strlen(extension));
-	}
-
-	fc = p9_create_common(bufp, size, P9_TCREATE);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, fid, &fc->params.tcreate.fid);
-	p9_put_str(bufp, name, &fc->params.tcreate.name);
-	p9_put_int32(bufp, perm, &fc->params.tcreate.perm);
-	p9_put_int8(bufp, mode, &fc->params.tcreate.mode);
-	if (dotu)
-		p9_put_str(bufp, extension, &fc->params.tcreate.extension);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_tcreate);
-
-/**
- * p9_create_tread - allocates and creates a T_READ request
- * @fid: handle of the file we are trying to read
- * @offset: offset to start reading from
- * @count: how many bytes to read
- */
-
-struct p9_fcall *p9_create_tread(u32 fid, u64 offset, u32 count)
-{
-	int size;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	size = 4 + 8 + 4;	/* fid[4] offset[8] count[4] */
-	fc = p9_create_common(bufp, size, P9_TREAD);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, fid, &fc->params.tread.fid);
-	p9_put_int64(bufp, offset, &fc->params.tread.offset);
-	p9_put_int32(bufp, count, &fc->params.tread.count);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_tread);
-
-/**
- * p9_create_twrite - allocates and creates a T_WRITE request from the kernel
- * @fid: handle of the file we are trying to write
- * @offset: offset to start writing at
- * @count: how many bytes to write
- * @data: data to write
- *
- * This function will create a requst with data buffers from the kernel
- * such as the page cache.
- */
-
-struct p9_fcall *p9_create_twrite(u32 fid, u64 offset, u32 count,
-				      const char *data)
-{
-	int size, err;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	/* fid[4] offset[8] count[4] data[count] */
-	size = 4 + 8 + 4 + count;
-	fc = p9_create_common(bufp, size, P9_TWRITE);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, fid, &fc->params.twrite.fid);
-	p9_put_int64(bufp, offset, &fc->params.twrite.offset);
-	p9_put_int32(bufp, count, &fc->params.twrite.count);
-	err = p9_put_data(bufp, data, count, &fc->params.twrite.data);
-	if (err) {
-		kfree(fc);
-		fc = ERR_PTR(err);
-		goto error;
-	}
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_twrite);
-
-/**
- * p9_create_twrite_u - allocates and creates a T_WRITE request from userspace
- * @fid: handle of the file we are trying to write
- * @offset: offset to start writing at
- * @count: how many bytes to write
- * @data: data to write
- *
- * This function will create a request with data buffers from userspace
- */
-
-struct p9_fcall *p9_create_twrite_u(u32 fid, u64 offset, u32 count,
-				      const char __user *data)
-{
-	int size, err;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	/* fid[4] offset[8] count[4] data[count] */
-	size = 4 + 8 + 4 + count;
-	fc = p9_create_common(bufp, size, P9_TWRITE);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, fid, &fc->params.twrite.fid);
-	p9_put_int64(bufp, offset, &fc->params.twrite.offset);
-	p9_put_int32(bufp, count, &fc->params.twrite.count);
-	err = p9_put_user_data(bufp, data, count, &fc->params.twrite.data);
-	if (err) {
-		kfree(fc);
-		fc = ERR_PTR(err);
-		goto error;
-	}
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_twrite_u);
-
-/**
- * p9_create_tclunk - allocate a request to forget about a file handle
- * @fid: handle of the file we closing or forgetting about
- *
- * clunk is used both to close open files and to discard transient handles
- * which may be created during meta-data operations and hierarchy traversal.
- */
-
-struct p9_fcall *p9_create_tclunk(u32 fid)
-{
-	int size;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	size = 4;		/* fid[4] */
-	fc = p9_create_common(bufp, size, P9_TCLUNK);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, fid, &fc->params.tclunk.fid);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_tclunk);
-
-/**
- * p9_create_tremove - allocate and create a request to remove a file
- * @fid: handle of the file or directory we are removing
- *
- */
-
-struct p9_fcall *p9_create_tremove(u32 fid)
-{
-	int size;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	size = 4;		/* fid[4] */
-	fc = p9_create_common(bufp, size, P9_TREMOVE);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, fid, &fc->params.tremove.fid);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_tremove);
-
-/**
- * p9_create_tstat - allocate and populate a request for attributes
- * @fid: handle of the file or directory we are trying to get the attributes of
- *
- */
-
-struct p9_fcall *p9_create_tstat(u32 fid)
-{
-	int size;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	size = 4;		/* fid[4] */
-	fc = p9_create_common(bufp, size, P9_TSTAT);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, fid, &fc->params.tstat.fid);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_tstat);
-
-/**
- * p9_create_tstat - allocate and populate a request to change attributes
- * @fid: handle of the file or directory we are trying to change
- * @wstat: &p9_stat structure with attributes we wish to set
- * @dotu: 9p2000.u enabled flag
- *
- */
-
-struct p9_fcall *p9_create_twstat(u32 fid, struct p9_wstat *wstat,
-				      int dotu)
-{
-	int size, statsz;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	statsz = p9_size_wstat(wstat, dotu);
-	size = 4 + 2 + 2 + statsz;	/* fid[4] stat[n] */
-	fc = p9_create_common(bufp, size, P9_TWSTAT);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, fid, &fc->params.twstat.fid);
-	buf_put_int16(bufp, statsz + 2);
-	p9_put_wstat(bufp, wstat, &fc->params.twstat.stat, statsz, dotu);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_twstat);
-
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 4ebeffd21d3d..92cb60bb191b 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -509,6 +509,19 @@ p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...)
 	return ret;
 }
 
+int p9stat_read(char *buf, int len, struct p9_wstat *st, int dotu)
+{
+	struct p9_fcall fake_pdu;
+
+	fake_pdu.size = len;
+	fake_pdu.capacity = len;
+	fake_pdu.sdata = buf;
+	fake_pdu.offset = 0;
+
+	return p9pdu_readf(&fake_pdu, dotu, "S", st);
+}
+EXPORT_SYMBOL(p9stat_read);
+
 int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type)
 {
 	return p9pdu_writef(pdu, 0, "dbw", 0, type, tag);
-- 
GitLab


From 0a70c7f67a24b45e105ad10ac1d7e73fe50ec765 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 17 Oct 2008 18:09:09 +0200
Subject: [PATCH 566/895] ide-disk: fix IDE_DFLAG_LBA48 handling on resume

Some code in idedisk_setup() should be in idedisk_capacity() instead.

Acked-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-disk.c | 40 ++++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 3853bde8eedc..93ff15e6a9c4 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -403,6 +403,26 @@ static void init_idedisk_capacity(ide_drive_t *drive)
 		if (ata_id_hpa_enabled(id))
 			idedisk_check_hpa(drive);
 	}
+
+	/* limit drive capacity to 137GB if LBA48 cannot be used */
+	if ((drive->dev_flags & IDE_DFLAG_LBA48) == 0 &&
+	    drive->capacity64 > 1ULL << 28) {
+		printk(KERN_WARNING "%s: cannot use LBA48 - full capacity "
+		       "%llu sectors (%llu MB)\n",
+		       drive->name, (unsigned long long)drive->capacity64,
+		       sectors_to_MB(drive->capacity64));
+		drive->capacity64 = 1ULL << 28;
+	}
+
+	if ((drive->hwif->host_flags & IDE_HFLAG_NO_LBA48_DMA) &&
+	    (drive->dev_flags & IDE_DFLAG_LBA48)) {
+		if (drive->capacity64 > 1ULL << 28) {
+			printk(KERN_INFO "%s: cannot use LBA48 DMA - PIO mode"
+					 " will be used for accessing sectors "
+					 "> %u\n", drive->name, 1 << 28);
+		} else
+			drive->dev_flags &= ~IDE_DFLAG_LBA48;
+	}
 }
 
 sector_t ide_disk_capacity(ide_drive_t *drive)
@@ -654,26 +674,6 @@ static void idedisk_setup(ide_drive_t *drive)
 	/* calculate drive capacity, and select LBA if possible */
 	init_idedisk_capacity(drive);
 
-	/* limit drive capacity to 137GB if LBA48 cannot be used */
-	if ((drive->dev_flags & IDE_DFLAG_LBA48) == 0 &&
-	    drive->capacity64 > 1ULL << 28) {
-		printk(KERN_WARNING "%s: cannot use LBA48 - full capacity "
-		       "%llu sectors (%llu MB)\n",
-		       drive->name, (unsigned long long)drive->capacity64,
-		       sectors_to_MB(drive->capacity64));
-		drive->capacity64 = 1ULL << 28;
-	}
-
-	if ((hwif->host_flags & IDE_HFLAG_NO_LBA48_DMA) &&
-	    (drive->dev_flags & IDE_DFLAG_LBA48)) {
-		if (drive->capacity64 > 1ULL << 28) {
-			printk(KERN_INFO "%s: cannot use LBA48 DMA - PIO mode"
-					 " will be used for accessing sectors "
-					 "> %u\n", drive->name, 1 << 28);
-		} else
-			drive->dev_flags &= ~IDE_DFLAG_LBA48;
-	}
-
 	/*
 	 * if possible, give fdisk access to more of the drive,
 	 * by correcting bios_cyls:
-- 
GitLab


From 099ed4c2f5d54a5e1e490250805fb9727d622c0c Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 17 Oct 2008 18:09:09 +0200
Subject: [PATCH 567/895] ide-disk: lock media before checking for media change

Acked-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-disk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 93ff15e6a9c4..2c48bd81f537 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -842,7 +842,6 @@ static int idedisk_open(struct inode *inode, struct file *filp)
 	idkp->openers++;
 
 	if ((drive->dev_flags & IDE_DFLAG_REMOVABLE) && idkp->openers == 1) {
-		check_disk_change(inode->i_bdev);
 		/*
 		 * Ignore the return code from door_lock,
 		 * since the open() has already succeeded,
@@ -851,6 +850,7 @@ static int idedisk_open(struct inode *inode, struct file *filp)
 		if ((drive->dev_flags & IDE_DFLAG_DOORLOCKING) &&
 		    idedisk_set_doorlock(drive, 1))
 			drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
+		check_disk_change(inode->i_bdev);
 	}
 	return 0;
 }
-- 
GitLab


From 9c3ba7692bc1166c6a5d06fb7c59984f9f988a00 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 17 Oct 2008 18:09:09 +0200
Subject: [PATCH 568/895] ide-floppy: use alloc_disk_node()

Acked-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-floppy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index cf0aa25470ee..73458e46bf1e 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -788,7 +788,7 @@ static int ide_floppy_probe(ide_drive_t *drive)
 		goto failed;
 	}
 
-	g = alloc_disk(1 << PARTN_BITS);
+	g = alloc_disk_node(1 << PARTN_BITS, hwif_to_node(drive->hwif));
 	if (!g)
 		goto out_free_floppy;
 
-- 
GitLab


From d7e747596829c1c11833ca0a1f5e64f400d20bf2 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 17 Oct 2008 18:09:09 +0200
Subject: [PATCH 569/895] ide-disk: use to_ide_drv() and ide_drv_g()

There should be no functional changes caused by this patch.

Cc: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-disk.c       | 14 ++++++--------
 drivers/ide/ide-disk.h       |  3 ---
 drivers/ide/ide-disk_ioctl.c |  2 +-
 3 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 2c48bd81f537..6c9c898aff62 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -49,8 +49,6 @@
 
 static DEFINE_MUTEX(idedisk_ref_mutex);
 
-#define to_ide_disk(obj) container_of(obj, struct ide_disk_obj, kref)
-
 static void ide_disk_release(struct kref *);
 
 static struct ide_disk_obj *ide_disk_get(struct gendisk *disk)
@@ -58,7 +56,7 @@ static struct ide_disk_obj *ide_disk_get(struct gendisk *disk)
 	struct ide_disk_obj *idkp = NULL;
 
 	mutex_lock(&idedisk_ref_mutex);
-	idkp = ide_disk_g(disk);
+	idkp = ide_drv_g(disk, ide_disk_obj);
 	if (idkp) {
 		if (ide_device_get(idkp->drive))
 			idkp = NULL;
@@ -746,7 +744,7 @@ static void ide_disk_remove(ide_drive_t *drive)
 
 static void ide_disk_release(struct kref *kref)
 {
-	struct ide_disk_obj *idkp = to_ide_disk(kref);
+	struct ide_disk_obj *idkp = to_ide_drv(kref, ide_disk_obj);
 	ide_drive_t *drive = idkp->drive;
 	struct gendisk *g = idkp->disk;
 
@@ -858,7 +856,7 @@ static int idedisk_open(struct inode *inode, struct file *filp)
 static int idedisk_release(struct inode *inode, struct file *filp)
 {
 	struct gendisk *disk = inode->i_bdev->bd_disk;
-	struct ide_disk_obj *idkp = ide_disk_g(disk);
+	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
 	ide_drive_t *drive = idkp->drive;
 
 	if (idkp->openers == 1)
@@ -879,7 +877,7 @@ static int idedisk_release(struct inode *inode, struct file *filp)
 
 static int idedisk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 {
-	struct ide_disk_obj *idkp = ide_disk_g(bdev->bd_disk);
+	struct ide_disk_obj *idkp = ide_drv_g(bdev->bd_disk, ide_disk_obj);
 	ide_drive_t *drive = idkp->drive;
 
 	geo->heads = drive->bios_head;
@@ -890,7 +888,7 @@ static int idedisk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 
 static int idedisk_media_changed(struct gendisk *disk)
 {
-	struct ide_disk_obj *idkp = ide_disk_g(disk);
+	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
 	ide_drive_t *drive = idkp->drive;
 
 	/* do not scan partitions twice if this is a removable device */
@@ -905,7 +903,7 @@ static int idedisk_media_changed(struct gendisk *disk)
 
 static int idedisk_revalidate_disk(struct gendisk *disk)
 {
-	struct ide_disk_obj *idkp = ide_disk_g(disk);
+	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
 	set_capacity(disk, ide_disk_capacity(idkp->drive));
 	return 0;
 }
diff --git a/drivers/ide/ide-disk.h b/drivers/ide/ide-disk.h
index a82fa4355665..50d674b91c8b 100644
--- a/drivers/ide/ide-disk.h
+++ b/drivers/ide/ide-disk.h
@@ -9,9 +9,6 @@ struct ide_disk_obj {
 	unsigned int	openers;	/* protected by BKL for now */
 };
 
-#define ide_disk_g(disk) \
-	container_of((disk)->private_data, struct ide_disk_obj, driver)
-
 /* ide-disk.c */
 sector_t ide_disk_capacity(ide_drive_t *);
 ide_decl_devset(address);
diff --git a/drivers/ide/ide-disk_ioctl.c b/drivers/ide/ide-disk_ioctl.c
index a6cf1a03a806..e6624eda9e69 100644
--- a/drivers/ide/ide-disk_ioctl.c
+++ b/drivers/ide/ide-disk_ioctl.c
@@ -17,7 +17,7 @@ int ide_disk_ioctl(struct inode *inode, struct file *file,
 		   unsigned int cmd, unsigned long arg)
 {
 	struct block_device *bdev = inode->i_bdev;
-	struct ide_disk_obj *idkp = ide_disk_g(bdev->bd_disk);
+	struct ide_disk_obj *idkp = ide_drv_g(bdev->bd_disk, ide_disk_obj);
 	ide_drive_t *drive = idkp->drive;
 	int err;
 
-- 
GitLab


From 81ee1bb51fff76aaa738668b92406b5117f125ed Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 17 Oct 2008 18:09:10 +0200
Subject: [PATCH 570/895] ide-disk: move IDE_DFLAG_DOORLOCKING flag handling to
 idedisk_set_doorlock()

There should be no functional changes caused by this patch.

Acked-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-disk.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 6c9c898aff62..289a533afbd6 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -817,12 +817,21 @@ static ide_driver_t idedisk_driver = {
 static int idedisk_set_doorlock(ide_drive_t *drive, int on)
 {
 	ide_task_t task;
+	int ret;
+
+	if ((drive->dev_flags & IDE_DFLAG_DOORLOCKING) == 0)
+		return 0;
 
 	memset(&task, 0, sizeof(task));
 	task.tf.command = on ? ATA_CMD_MEDIA_LOCK : ATA_CMD_MEDIA_UNLOCK;
 	task.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE;
 
-	return ide_no_data_taskfile(drive, &task);
+	ret = ide_no_data_taskfile(drive, &task);
+
+	if (ret)
+		drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
+
+	return ret;
 }
 
 static int idedisk_open(struct inode *inode, struct file *filp)
@@ -845,9 +854,7 @@ static int idedisk_open(struct inode *inode, struct file *filp)
 		 * since the open() has already succeeded,
 		 * and the door_lock is irrelevant at this point.
 		 */
-		if ((drive->dev_flags & IDE_DFLAG_DOORLOCKING) &&
-		    idedisk_set_doorlock(drive, 1))
-			drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
+		idedisk_set_doorlock(drive, 1);
 		check_disk_change(inode->i_bdev);
 	}
 	return 0;
@@ -862,11 +869,8 @@ static int idedisk_release(struct inode *inode, struct file *filp)
 	if (idkp->openers == 1)
 		ide_cacheflush_p(drive);
 
-	if ((drive->dev_flags & IDE_DFLAG_REMOVABLE) && idkp->openers == 1) {
-		if ((drive->dev_flags & IDE_DFLAG_DOORLOCKING) &&
-		    idedisk_set_doorlock(drive, 0))
-			drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
-	}
+	if ((drive->dev_flags & IDE_DFLAG_REMOVABLE) && idkp->openers == 1)
+		idedisk_set_doorlock(drive, 0);
 
 	idkp->openers--;
 
-- 
GitLab


From ae9f9f073963c56dcc4601ed9a0921eda1e8fa9d Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 17 Oct 2008 18:09:10 +0200
Subject: [PATCH 571/895] ide-{disk,floppy}: set IDE_DFLAG_ATTACH in *_setup()

There should be no functional changes caused by this patch.

Acked-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-disk.c   | 18 ++++++++++--------
 drivers/ide/ide-floppy.c |  3 ++-
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 289a533afbd6..70b75f23a70e 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -716,6 +716,14 @@ static void idedisk_setup(ide_drive_t *drive)
 		drive->dev_flags |= IDE_DFLAG_WCACHE;
 
 	set_wcache(drive, 1);
+
+	if ((drive->dev_flags & IDE_DFLAG_LBA) == 0 &&
+	    (drive->head == 0 || drive->head > 16)) {
+		printk(KERN_ERR "%s: invalid geometry: %d physical heads?\n",
+			drive->name, drive->head);
+		drive->dev_flags &= ~IDE_DFLAG_ATTACH;
+	} else
+		drive->dev_flags |= IDE_DFLAG_ATTACH;
 }
 
 static void ide_cacheflush_p(ide_drive_t *drive)
@@ -957,20 +965,14 @@ static int ide_disk_probe(ide_drive_t *drive)
 	drive->driver_data = idkp;
 
 	idedisk_setup(drive);
-	if ((drive->dev_flags & IDE_DFLAG_LBA) == 0 &&
-	    (drive->head == 0 || drive->head > 16)) {
-		printk(KERN_ERR "%s: INVALID GEOMETRY: %d PHYSICAL HEADS?\n",
-			drive->name, drive->head);
-		drive->dev_flags &= ~IDE_DFLAG_ATTACH;
-	} else
-		drive->dev_flags |= IDE_DFLAG_ATTACH;
+
+	set_capacity(g, ide_disk_capacity(drive));
 
 	g->minors = IDE_DISK_MINORS;
 	g->driverfs_dev = &drive->gendev;
 	g->flags |= GENHD_FL_EXT_DEVT;
 	if (drive->dev_flags & IDE_DFLAG_REMOVABLE)
 		g->flags = GENHD_FL_REMOVABLE;
-	set_capacity(g, ide_disk_capacity(drive));
 	g->fops = &idedisk_ops;
 	add_disk(g);
 	return 0;
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 73458e46bf1e..bcbd980f7a48 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -598,6 +598,8 @@ static void idefloppy_setup(ide_drive_t *drive, idefloppy_floppy_t *floppy)
 	(void) ide_floppy_get_capacity(drive);
 
 	ide_proc_register_driver(drive, floppy->driver);
+
+	drive->dev_flags |= IDE_DFLAG_ATTACH;
 }
 
 static void ide_floppy_remove(ide_drive_t *drive)
@@ -807,7 +809,6 @@ static int ide_floppy_probe(ide_drive_t *drive)
 	drive->debug_mask = debug_mask;
 
 	idefloppy_setup(drive, floppy);
-	drive->dev_flags |= IDE_DFLAG_ATTACH;
 
 	g->minors = 1 << PARTN_BITS;
 	g->driverfs_dev = &drive->gendev;
-- 
GitLab


From 8f29cd9f12e97d46e601f59810ea6aadd938945d Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 17 Oct 2008 18:09:10 +0200
Subject: [PATCH 572/895] ide-floppy: drop 'floppy' argument from
 idefloppy_setup()

Acked-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-floppy.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index bcbd980f7a48..4a3d7e08b65a 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -560,8 +560,9 @@ sector_t ide_floppy_capacity(ide_drive_t *drive)
 	return capacity;
 }
 
-static void idefloppy_setup(ide_drive_t *drive, idefloppy_floppy_t *floppy)
+static void idefloppy_setup(ide_drive_t *drive)
 {
+	struct ide_floppy_obj *floppy = drive->driver_data;
 	u16 *id = drive->id;
 
 	drive->pc_callback	 = ide_floppy_callback;
@@ -808,7 +809,7 @@ static int ide_floppy_probe(ide_drive_t *drive)
 
 	drive->debug_mask = debug_mask;
 
-	idefloppy_setup(drive, floppy);
+	idefloppy_setup(drive);
 
 	g->minors = 1 << PARTN_BITS;
 	g->driverfs_dev = &drive->gendev;
-- 
GitLab


From 6f84083bbb7d206c8555e5834a2c9b887452fd54 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 17 Oct 2008 18:09:10 +0200
Subject: [PATCH 573/895] ide-floppy: use drive->capacity64 for caching current
 capacity

* Use drive->capacity64 for caching current capacity.

* Switch ide_floppy_capacity() to use drive->capacity64.

* Call set_capacity() in idefloppy_open() and ide_floppy_probe()
  instead of ide_floppy_get_capacity().

There should be no functional changes caused by this patch.

Cc: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-floppy.c | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 4a3d7e08b65a..3271e56d091c 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -445,7 +445,9 @@ static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive)
 			drive->name, lba_capacity, capacity);
 		floppy->blocks = floppy->block_size ?
 			capacity / floppy->block_size : 0;
+		drive->capacity64 = floppy->blocks * floppy->bs_factor;
 	}
+
 	return 0;
 }
 
@@ -466,7 +468,7 @@ static int ide_floppy_get_capacity(ide_drive_t *drive)
 	drive->bios_head = drive->bios_sect = 0;
 	floppy->blocks = 0;
 	floppy->bs_factor = 1;
-	set_capacity(floppy->disk, 0);
+	drive->capacity64 = 0;
 
 	ide_floppy_create_read_capacity_cmd(&pc);
 	if (ide_queue_pc_tail(drive, disk, &pc)) {
@@ -523,6 +525,8 @@ static int ide_floppy_get_capacity(ide_drive_t *drive)
 					       "non 512 bytes block size not "
 					       "fully supported\n",
 					       drive->name);
+				drive->capacity64 =
+					floppy->blocks * floppy->bs_factor;
 				rc = 0;
 			}
 			break;
@@ -547,17 +551,12 @@ static int ide_floppy_get_capacity(ide_drive_t *drive)
 	if (!(drive->atapi_flags & IDE_AFLAG_CLIK_DRIVE))
 		(void) ide_floppy_get_flexible_disk_page(drive);
 
-	set_capacity(disk, floppy->blocks * floppy->bs_factor);
-
 	return rc;
 }
 
 sector_t ide_floppy_capacity(ide_drive_t *drive)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
-	unsigned long capacity = floppy->blocks * floppy->bs_factor;
-
-	return capacity;
+	return drive->capacity64;
 }
 
 static void idefloppy_setup(ide_drive_t *drive)
@@ -671,14 +670,16 @@ static int idefloppy_open(struct inode *inode, struct file *filp)
 		if (ide_do_test_unit_ready(drive, disk))
 			ide_do_start_stop(drive, disk, 1);
 
-		if (ide_floppy_get_capacity(drive)
-		   && (filp->f_flags & O_NDELAY) == 0
+		ret = ide_floppy_get_capacity(drive);
+
+		set_capacity(disk, ide_floppy_capacity(drive));
+
+		if (ret && (filp->f_flags & O_NDELAY) == 0) {
 		    /*
 		     * Allow O_NDELAY to open a drive without a disk, or with an
 		     * unreadable disk, so that we can get the format capacity
 		     * of the drive or begin the format - Sam
 		     */
-		    ) {
 			ret = -EIO;
 			goto out_put_floppy;
 		}
@@ -811,6 +812,8 @@ static int ide_floppy_probe(ide_drive_t *drive)
 
 	idefloppy_setup(drive);
 
+	set_capacity(g, ide_floppy_capacity(drive));
+
 	g->minors = 1 << PARTN_BITS;
 	g->driverfs_dev = &drive->gendev;
 	if (drive->dev_flags & IDE_DFLAG_REMOVABLE)
-- 
GitLab


From fe11edfaabf1787c05d782a7b33e6497d1118b1d Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 17 Oct 2008 18:09:11 +0200
Subject: [PATCH 574/895] ide: IDE_AFLAG_MEDIA_CHANGED ->
 IDE_DFLAG_MEDIA_CHANGED

There should be no functional changes caused by this patch.

Acked-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-cd.c       | 6 +++---
 drivers/ide/ide-cd_ioctl.c | 4 ++--
 drivers/ide/ide-floppy.c   | 6 +++---
 include/linux/ide.h        | 4 ++--
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 3308b1cd3a33..7dc1a17a4dd8 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -99,7 +99,7 @@ static void ide_cd_put(struct cdrom_info *cd)
 /* Mark that we've seen a media change and invalidate our internal buffers. */
 static void cdrom_saw_media_change(ide_drive_t *drive)
 {
-	drive->atapi_flags |= IDE_AFLAG_MEDIA_CHANGED;
+	drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED;
 	drive->atapi_flags &= ~IDE_AFLAG_TOC_VALID;
 }
 
@@ -1986,8 +1986,8 @@ static int ide_cdrom_setup(ide_drive_t *drive)
 	if (!drive->queue->unplug_delay)
 		drive->queue->unplug_delay = 1;
 
-	drive->atapi_flags = IDE_AFLAG_MEDIA_CHANGED | IDE_AFLAG_NO_EJECT |
-		       ide_cd_flags(id);
+	drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED;
+	drive->atapi_flags = IDE_AFLAG_NO_EJECT | ide_cd_flags(id);
 
 	if ((drive->atapi_flags & IDE_AFLAG_VERTOS_300_SSD) &&
 	    fw_rev[4] == '1' && fw_rev[6] <= '2')
diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c
index 74231b41f611..37d89ead13dd 100644
--- a/drivers/ide/ide-cd_ioctl.c
+++ b/drivers/ide/ide-cd_ioctl.c
@@ -86,8 +86,8 @@ int ide_cdrom_check_media_change_real(struct cdrom_device_info *cdi,
 
 	if (slot_nr == CDSL_CURRENT) {
 		(void) cdrom_check_status(drive, NULL);
-		retval = (drive->atapi_flags & IDE_AFLAG_MEDIA_CHANGED) ? 1 : 0;
-		drive->atapi_flags &= ~IDE_AFLAG_MEDIA_CHANGED;
+		retval = (drive->dev_flags & IDE_DFLAG_MEDIA_CHANGED) ? 1 : 0;
+		drive->dev_flags &= ~IDE_DFLAG_MEDIA_CHANGED;
 		return retval;
 	} else {
 		return -EINVAL;
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 3271e56d091c..df410c7191ac 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -689,8 +689,8 @@ static int idefloppy_open(struct inode *inode, struct file *filp)
 			goto out_put_floppy;
 		}
 
-		drive->atapi_flags |= IDE_AFLAG_MEDIA_CHANGED;
 		ide_set_media_lock(drive, disk, 1);
+		drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED;
 		check_disk_change(inode->i_bdev);
 	} else if (drive->atapi_flags & IDE_AFLAG_FORMAT_IN_PROGRESS) {
 		ret = -EBUSY;
@@ -747,8 +747,8 @@ static int idefloppy_media_changed(struct gendisk *disk)
 		drive->dev_flags &= ~IDE_DFLAG_ATTACH;
 		return 0;
 	}
-	ret = !!(drive->atapi_flags & IDE_AFLAG_MEDIA_CHANGED);
-	drive->atapi_flags &= ~IDE_AFLAG_MEDIA_CHANGED;
+	ret = !!(drive->dev_flags & IDE_DFLAG_MEDIA_CHANGED);
+	drive->dev_flags &= ~IDE_DFLAG_MEDIA_CHANGED;
 	return ret;
 }
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index c47e371554c1..155a57f55c60 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -464,7 +464,6 @@ struct ide_acpi_hwif_link;
 /* ATAPI device flags */
 enum {
 	IDE_AFLAG_DRQ_INTERRUPT		= (1 << 0),
-	IDE_AFLAG_MEDIA_CHANGED		= (1 << 1),
 	/* Drive cannot lock the door. */
 	IDE_AFLAG_NO_DOORLOCK		= (1 << 2),
 
@@ -578,7 +577,8 @@ enum {
 	/* don't unload heads */
 	IDE_DFLAG_NO_UNLOAD		= (1 << 27),
 	/* heads unloaded, please don't reset port */
-	IDE_DFLAG_PARKED		= (1 << 28)
+	IDE_DFLAG_PARKED		= (1 << 28),
+	IDE_DFLAG_MEDIA_CHANGED		= (1 << 29),
 };
 
 struct ide_drive_s {
-- 
GitLab


From da167876bd0f71f1c646e5dd98997544d8d90e8e Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 17 Oct 2008 18:09:11 +0200
Subject: [PATCH 575/895] ide: IDE_AFLAG_WP -> IDE_DFLAG_WP

There should be no functional changes caused by this patch.

Acked-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-floppy.c | 8 ++++----
 include/linux/ide.h      | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index df410c7191ac..8078e0826cd3 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -410,11 +410,11 @@ static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive)
 	}
 
 	if (pc.buf[3] & 0x80)
-		drive->atapi_flags |= IDE_AFLAG_WP;
+		drive->dev_flags |= IDE_DFLAG_WP;
 	else
-		drive->atapi_flags &= ~IDE_AFLAG_WP;
+		drive->dev_flags &= ~IDE_DFLAG_WP;
 
-	set_disk_ro(disk, !!(drive->atapi_flags & IDE_AFLAG_WP));
+	set_disk_ro(disk, !!(drive->dev_flags & IDE_DFLAG_WP));
 
 	page = &pc.buf[8];
 
@@ -684,7 +684,7 @@ static int idefloppy_open(struct inode *inode, struct file *filp)
 			goto out_put_floppy;
 		}
 
-		if ((drive->atapi_flags & IDE_AFLAG_WP) && (filp->f_mode & 2)) {
+		if ((drive->dev_flags & IDE_DFLAG_WP) && (filp->f_mode & 2)) {
 			ret = -EROFS;
 			goto out_put_floppy;
 		}
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 155a57f55c60..bd0a4d36b6d3 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -503,8 +503,6 @@ enum {
 	IDE_AFLAG_CLIK_DRIVE		= (1 << 19),
 	/* Requires BH algorithm for packets */
 	IDE_AFLAG_ZIP_DRIVE		= (1 << 20),
-	/* Write protect */
-	IDE_AFLAG_WP			= (1 << 21),
 	/* Supports format progress report */
 	IDE_AFLAG_SRFP			= (1 << 22),
 
@@ -579,6 +577,8 @@ enum {
 	/* heads unloaded, please don't reset port */
 	IDE_DFLAG_PARKED		= (1 << 28),
 	IDE_DFLAG_MEDIA_CHANGED		= (1 << 29),
+	/* write protect */
+	IDE_DFLAG_WP			= (1 << 30),
 };
 
 struct ide_drive_s {
-- 
GitLab


From e01286282eef85e4783b06fb2e0ed84fc111eb32 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 17 Oct 2008 18:09:11 +0200
Subject: [PATCH 576/895] ide: IDE_AFLAG_FORMAT_IN_PROGRESS ->
 IDE_DFLAG_FORMAT_IN_PROGRESS

There should be no functional changes caused by this patch.

Acked-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-floppy.c       | 6 +++---
 drivers/ide/ide-floppy_ioctl.c | 6 +++---
 include/linux/ide.h            | 3 +--
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 8078e0826cd3..2cf98b531fd9 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -664,7 +664,7 @@ static int idefloppy_open(struct inode *inode, struct file *filp)
 	floppy->openers++;
 
 	if (floppy->openers == 1) {
-		drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS;
+		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
 		/* Just in case */
 
 		if (ide_do_test_unit_ready(drive, disk))
@@ -692,7 +692,7 @@ static int idefloppy_open(struct inode *inode, struct file *filp)
 		ide_set_media_lock(drive, disk, 1);
 		drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED;
 		check_disk_change(inode->i_bdev);
-	} else if (drive->atapi_flags & IDE_AFLAG_FORMAT_IN_PROGRESS) {
+	} else if (drive->dev_flags & IDE_DFLAG_FORMAT_IN_PROGRESS) {
 		ret = -EBUSY;
 		goto out_put_floppy;
 	}
@@ -714,7 +714,7 @@ static int idefloppy_release(struct inode *inode, struct file *filp)
 
 	if (floppy->openers == 1) {
 		ide_set_media_lock(drive, disk, 0);
-		drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS;
+		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
 	}
 
 	floppy->openers--;
diff --git a/drivers/ide/ide-floppy_ioctl.c b/drivers/ide/ide-floppy_ioctl.c
index a3a7a0809e2b..b1f391df6cca 100644
--- a/drivers/ide/ide-floppy_ioctl.c
+++ b/drivers/ide/ide-floppy_ioctl.c
@@ -138,11 +138,11 @@ static int ide_floppy_format_unit(ide_drive_t *drive, int __user *arg)
 
 	if (floppy->openers > 1) {
 		/* Don't format if someone is using the disk */
-		drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS;
+		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
 		return -EBUSY;
 	}
 
-	drive->atapi_flags |= IDE_AFLAG_FORMAT_IN_PROGRESS;
+	drive->dev_flags |= IDE_DFLAG_FORMAT_IN_PROGRESS;
 
 	/*
 	 * Send ATAPI_FORMAT_UNIT to the drive.
@@ -174,7 +174,7 @@ static int ide_floppy_format_unit(ide_drive_t *drive, int __user *arg)
 
 out:
 	if (err)
-		drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS;
+		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
 	return err;
 }
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index bd0a4d36b6d3..d111c3ebbbae 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -497,8 +497,6 @@ enum {
 	IDE_AFLAG_LE_SPEED_FIELDS	= (1 << 17),
 
 	/* ide-floppy */
-	/* Format in progress */
-	IDE_AFLAG_FORMAT_IN_PROGRESS	= (1 << 18),
 	/* Avoid commands not supported in Clik drive */
 	IDE_AFLAG_CLIK_DRIVE		= (1 << 19),
 	/* Requires BH algorithm for packets */
@@ -579,6 +577,7 @@ enum {
 	IDE_DFLAG_MEDIA_CHANGED		= (1 << 29),
 	/* write protect */
 	IDE_DFLAG_WP			= (1 << 30),
+	IDE_DFLAG_FORMAT_IN_PROGRESS	= (1 << 31),
 };
 
 struct ide_drive_s {
-- 
GitLab


From 42619d35c7af2f88cad56425fe3981f1f65ff0bd Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 17 Oct 2008 18:09:11 +0200
Subject: [PATCH 577/895] ide: remove IDE_AFLAG_NO_DOORLOCKING

Just use IDE_DFLAG_DOORLOCKING instead.

There should be no functional changes caused by this patch.

Acked-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-atapi.c    | 2 +-
 drivers/ide/ide-cd.c       | 2 +-
 drivers/ide/ide-cd_ioctl.c | 4 ++--
 drivers/ide/ide-floppy.c   | 2 +-
 drivers/ide/ide-probe.c    | 1 +
 drivers/ide/ide-tape.c     | 2 +-
 include/linux/ide.h        | 2 --
 7 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 2e305714c209..4e58b9e7a58a 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -191,7 +191,7 @@ int ide_set_media_lock(ide_drive_t *drive, struct gendisk *disk, int on)
 {
 	struct ide_atapi_pc pc;
 
-	if (drive->atapi_flags & IDE_AFLAG_NO_DOORLOCK)
+	if ((drive->dev_flags & IDE_DFLAG_DOORLOCKING) == 0)
 		return 0;
 
 	ide_init_pc(&pc);
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 7dc1a17a4dd8..2f4cc10391e5 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -1732,7 +1732,7 @@ static int ide_cdrom_probe_capabilities(ide_drive_t *drive)
 		return 0;
 
 	if ((buf[8 + 6] & 0x01) == 0)
-		drive->atapi_flags |= IDE_AFLAG_NO_DOORLOCK;
+		drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
 	if (buf[8 + 6] & 0x08)
 		drive->atapi_flags &= ~IDE_AFLAG_NO_EJECT;
 	if (buf[8 + 3] & 0x01)
diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c
index 37d89ead13dd..df3df0041eb6 100644
--- a/drivers/ide/ide-cd_ioctl.c
+++ b/drivers/ide/ide-cd_ioctl.c
@@ -136,7 +136,7 @@ int ide_cd_lockdoor(ide_drive_t *drive, int lockflag,
 		sense = &my_sense;
 
 	/* If the drive cannot lock the door, just pretend. */
-	if (drive->atapi_flags & IDE_AFLAG_NO_DOORLOCK) {
+	if ((drive->dev_flags & IDE_DFLAG_DOORLOCKING) == 0) {
 		stat = 0;
 	} else {
 		unsigned char cmd[BLK_MAX_CDB];
@@ -157,7 +157,7 @@ int ide_cd_lockdoor(ide_drive_t *drive, int lockflag,
 	    (sense->asc == 0x24 || sense->asc == 0x20)) {
 		printk(KERN_ERR "%s: door locking not supported\n",
 			drive->name);
-		drive->atapi_flags |= IDE_AFLAG_NO_DOORLOCK;
+		drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
 		stat = 0;
 	}
 
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 2cf98b531fd9..791a9d6f371c 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -592,7 +592,7 @@ static void idefloppy_setup(ide_drive_t *drive)
 		blk_queue_max_sectors(drive->queue, 64);
 		drive->atapi_flags |= IDE_AFLAG_CLIK_DRIVE;
 		/* IOMEGA Clik! drives do not support lock/unlock commands */
-		drive->atapi_flags |= IDE_AFLAG_NO_DOORLOCK;
+		drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
 	}
 
 	(void) ide_floppy_get_capacity(drive);
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 19f8c7770a25..1649ea54f76c 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -208,6 +208,7 @@ static inline void do_identify (ide_drive_t *drive, u8 cmd)
 		drive->ready_stat = 0;
 		if (ata_id_cdb_intr(id))
 			drive->atapi_flags |= IDE_AFLAG_DRQ_INTERRUPT;
+		drive->dev_flags |= IDE_DFLAG_DOORLOCKING;
 		/* we don't do head unloading on ATAPI devices */
 		drive->dev_flags |= IDE_DFLAG_NO_UNLOAD;
 		return;
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index d879c7797cde..a99e28f45156 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -2108,7 +2108,7 @@ static void idetape_get_mode_sense_results(ide_drive_t *drive)
 
 	/* device lacks locking support according to capabilities page */
 	if ((caps[6] & 1) == 0)
-		drive->atapi_flags |= IDE_AFLAG_NO_DOORLOCK;
+		drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
 
 	if (caps[7] & 0x02)
 		tape->blk_size = 512;
diff --git a/include/linux/ide.h b/include/linux/ide.h
index d111c3ebbbae..ba51a93fa547 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -464,8 +464,6 @@ struct ide_acpi_hwif_link;
 /* ATAPI device flags */
 enum {
 	IDE_AFLAG_DRQ_INTERRUPT		= (1 << 0),
-	/* Drive cannot lock the door. */
-	IDE_AFLAG_NO_DOORLOCK		= (1 << 2),
 
 	/* ide-cd */
 	/* Drive cannot eject the disc. */
-- 
GitLab


From 5fef0e5c0283949f95a7891c9424a9f84448116b Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 17 Oct 2008 18:09:12 +0200
Subject: [PATCH 578/895] ide-disk: factor out generic disk handling code to
 ide-gd.c

While at it:
- IDEDISK_VERSION -> IDE_GD_VERSION
- ide_cacheflush_p() -> ide_disk_flush()
- init_idedisk_capacity() -> ide_disk_init_capacity()
- idedisk_set_doorlock() -> ide_disk_set_doorlock()
- idedisk_setup() -> ide_disk_setup()

- ide_disk_capacity() -> ide_gd_capacity()
- ide_disk_remove() -> ide_gd_remove()
- ide_disk_probe() -> ide_gd_probe()
- ide_disk_resume() -> ide_gd_resume()
- ide_device_shutdown() -> ide_gd_shutdown()
- idedisk_driver -> ide_gd_driver
- idedisk_open() -> ide_gd_open()
- idedisk_release() -> ide_gd_release()
- idedisk_getgeo() -> ide_gd_getgeo()
- idedisk_media_changed() -> ide_gd_media_changed()
- idedisk_revalidate_disk() -> ide_gd_revalidate_disk()
- idedisk_ops -> ide_gd_ops
- idedisk_init() -> ide_gd_init()
- idedisk_exit() -> ide_gd_exit()

There should be no functional changes caused by this patch.

Acked-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/Makefile        |   2 +-
 drivers/ide/ide-disk.c      | 305 ++----------------------------------
 drivers/ide/ide-disk.h      |   8 +-
 drivers/ide/ide-disk_proc.c |   2 +-
 drivers/ide/ide-gd.c        | 296 ++++++++++++++++++++++++++++++++++
 5 files changed, 314 insertions(+), 299 deletions(-)
 create mode 100644 drivers/ide/ide-gd.c

diff --git a/drivers/ide/Makefile b/drivers/ide/Makefile
index ceaf779054ea..8aad9395a427 100644
--- a/drivers/ide/Makefile
+++ b/drivers/ide/Makefile
@@ -37,7 +37,7 @@ obj-$(CONFIG_IDE_H8300)			+= h8300/
 obj-$(CONFIG_IDE_GENERIC)		+= ide-generic.o
 obj-$(CONFIG_BLK_DEV_IDEPNP)		+= ide-pnp.o
 
-ide-disk_mod-y += ide-disk.o ide-disk_ioctl.o
+ide-disk_mod-y += ide-gd.o ide-disk.o ide-disk_ioctl.o
 ide-cd_mod-y += ide-cd.o ide-cd_ioctl.o ide-cd_verbose.o
 ide-floppy_mod-y += ide-floppy.o ide-floppy_ioctl.o
 
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 70b75f23a70e..751be7af22c2 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -14,9 +14,6 @@
  * This is the IDE/ATA disk driver, as evolved from hd.c and ide.c.
  */
 
-#define IDEDISK_VERSION	"1.18"
-
-#include <linux/module.h>
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
@@ -39,44 +36,8 @@
 #include <asm/io.h>
 #include <asm/div64.h>
 
-#if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT)
-#define IDE_DISK_MINORS		(1 << PARTN_BITS)
-#else
-#define IDE_DISK_MINORS		0
-#endif
-
 #include "ide-disk.h"
 
-static DEFINE_MUTEX(idedisk_ref_mutex);
-
-static void ide_disk_release(struct kref *);
-
-static struct ide_disk_obj *ide_disk_get(struct gendisk *disk)
-{
-	struct ide_disk_obj *idkp = NULL;
-
-	mutex_lock(&idedisk_ref_mutex);
-	idkp = ide_drv_g(disk, ide_disk_obj);
-	if (idkp) {
-		if (ide_device_get(idkp->drive))
-			idkp = NULL;
-		else
-			kref_get(&idkp->kref);
-	}
-	mutex_unlock(&idedisk_ref_mutex);
-	return idkp;
-}
-
-static void ide_disk_put(struct ide_disk_obj *idkp)
-{
-	ide_drive_t *drive = idkp->drive;
-
-	mutex_lock(&idedisk_ref_mutex);
-	kref_put(&idkp->kref, ide_disk_release);
-	ide_device_put(drive);
-	mutex_unlock(&idedisk_ref_mutex);
-}
-
 static const u8 ide_rw_cmds[] = {
 	ATA_CMD_READ_MULTI,
 	ATA_CMD_WRITE_MULTI,
@@ -223,8 +184,8 @@ static ide_startstop_t __ide_do_rw_disk(ide_drive_t *drive, struct request *rq,
  * 1073741822 == 549756 MB or 48bit addressing fake drive
  */
 
-static ide_startstop_t ide_do_rw_disk(ide_drive_t *drive, struct request *rq,
-				      sector_t block)
+ide_startstop_t ide_do_rw_disk(ide_drive_t *drive, struct request *rq,
+			       sector_t block)
 {
 	ide_hwif_t *hwif = HWIF(drive);
 
@@ -372,7 +333,7 @@ static void idedisk_check_hpa(ide_drive_t *drive)
 	}
 }
 
-static void init_idedisk_capacity(ide_drive_t *drive)
+void ide_disk_init_capacity(ide_drive_t *drive)
 {
 	u16 *id = drive->id;
 	int lba;
@@ -423,11 +384,6 @@ static void init_idedisk_capacity(ide_drive_t *drive)
 	}
 }
 
-sector_t ide_disk_capacity(ide_drive_t *drive)
-{
-	return drive->capacity64;
-}
-
 static void idedisk_prepare_flush(struct request_queue *q, struct request *rq)
 {
 	ide_drive_t *drive = q->queuedata;
@@ -526,7 +482,7 @@ static void update_ordered(ide_drive_t *drive)
 		 * time we have trimmed the drive capacity if LBA48 is
 		 * not available so we don't need to recheck that.
 		 */
-		capacity = ide_disk_capacity(drive);
+		capacity = ide_gd_capacity(drive);
 		barrier = ata_id_flush_enabled(id) &&
 			(drive->dev_flags & IDE_DFLAG_NOFLUSH) == 0 &&
 			((drive->dev_flags & IDE_DFLAG_LBA48) == 0 ||
@@ -634,7 +590,7 @@ ide_ext_devset_rw(wcache, wcache);
 
 ide_ext_devset_rw_sync(nowerr, nowerr);
 
-static void idedisk_setup(ide_drive_t *drive)
+void ide_disk_setup(ide_drive_t *drive)
 {
 	struct ide_disk_obj *idkp = drive->driver_data;
 	ide_hwif_t *hwif = drive->hwif;
@@ -670,13 +626,13 @@ static void idedisk_setup(ide_drive_t *drive)
 			 drive->queue->max_sectors / 2);
 
 	/* calculate drive capacity, and select LBA if possible */
-	init_idedisk_capacity(drive);
+	ide_disk_init_capacity(drive);
 
 	/*
 	 * if possible, give fdisk access to more of the drive,
 	 * by correcting bios_cyls:
 	 */
-	capacity = ide_disk_capacity(drive);
+	capacity = ide_gd_capacity(drive);
 
 	if ((drive->dev_flags & IDE_DFLAG_FORCED_GEOM) == 0) {
 		if (ata_id_lba48_enabled(drive->id)) {
@@ -726,7 +682,7 @@ static void idedisk_setup(ide_drive_t *drive)
 		drive->dev_flags |= IDE_DFLAG_ATTACH;
 }
 
-static void ide_cacheflush_p(ide_drive_t *drive)
+void ide_disk_flush(ide_drive_t *drive)
 {
 	if (ata_id_flush_enabled(drive->id) == 0 ||
 	    (drive->dev_flags & IDE_DFLAG_WCACHE) == 0)
@@ -736,93 +692,7 @@ static void ide_cacheflush_p(ide_drive_t *drive)
 		printk(KERN_INFO "%s: wcache flush failed!\n", drive->name);
 }
 
-static void ide_disk_remove(ide_drive_t *drive)
-{
-	struct ide_disk_obj *idkp = drive->driver_data;
-	struct gendisk *g = idkp->disk;
-
-	ide_proc_unregister_driver(drive, idkp->driver);
-
-	del_gendisk(g);
-
-	ide_cacheflush_p(drive);
-
-	ide_disk_put(idkp);
-}
-
-static void ide_disk_release(struct kref *kref)
-{
-	struct ide_disk_obj *idkp = to_ide_drv(kref, ide_disk_obj);
-	ide_drive_t *drive = idkp->drive;
-	struct gendisk *g = idkp->disk;
-
-	drive->driver_data = NULL;
-	g->private_data = NULL;
-	put_disk(g);
-	kfree(idkp);
-}
-
-static int ide_disk_probe(ide_drive_t *drive);
-
-/*
- * On HPA drives the capacity needs to be
- * reinitilized on resume otherwise the disk
- * can not be used and a hard reset is required
- */
-static void ide_disk_resume(ide_drive_t *drive)
-{
-	if (ata_id_hpa_enabled(drive->id))
-		init_idedisk_capacity(drive);
-}
-
-static void ide_device_shutdown(ide_drive_t *drive)
-{
-#ifdef	CONFIG_ALPHA
-	/* On Alpha, halt(8) doesn't actually turn the machine off,
-	   it puts you into the sort of firmware monitor. Typically,
-	   it's used to boot another kernel image, so it's not much
-	   different from reboot(8). Therefore, we don't need to
-	   spin down the disk in this case, especially since Alpha
-	   firmware doesn't handle disks in standby mode properly.
-	   On the other hand, it's reasonably safe to turn the power
-	   off when the shutdown process reaches the firmware prompt,
-	   as the firmware initialization takes rather long time -
-	   at least 10 seconds, which should be sufficient for
-	   the disk to expire its write cache. */
-	if (system_state != SYSTEM_POWER_OFF) {
-#else
-	if (system_state == SYSTEM_RESTART) {
-#endif
-		ide_cacheflush_p(drive);
-		return;
-	}
-
-	printk(KERN_INFO "Shutdown: %s\n", drive->name);
-
-	drive->gendev.bus->suspend(&drive->gendev, PMSG_SUSPEND);
-}
-
-static ide_driver_t idedisk_driver = {
-	.gen_driver = {
-		.owner		= THIS_MODULE,
-		.name		= "ide-disk",
-		.bus		= &ide_bus_type,
-	},
-	.probe			= ide_disk_probe,
-	.remove			= ide_disk_remove,
-	.resume			= ide_disk_resume,
-	.shutdown		= ide_device_shutdown,
-	.version		= IDEDISK_VERSION,
-	.do_request		= ide_do_rw_disk,
-	.end_request		= ide_end_request,
-	.error			= __ide_error,
-#ifdef CONFIG_IDE_PROC_FS
-	.proc			= ide_disk_proc,
-	.settings		= ide_disk_settings,
-#endif
-};
-
-static int idedisk_set_doorlock(ide_drive_t *drive, int on)
+int ide_disk_set_doorlock(ide_drive_t *drive, int on)
 {
 	ide_task_t task;
 	int ret;
@@ -841,160 +711,3 @@ static int idedisk_set_doorlock(ide_drive_t *drive, int on)
 
 	return ret;
 }
-
-static int idedisk_open(struct inode *inode, struct file *filp)
-{
-	struct gendisk *disk = inode->i_bdev->bd_disk;
-	struct ide_disk_obj *idkp;
-	ide_drive_t *drive;
-
-	idkp = ide_disk_get(disk);
-	if (idkp == NULL)
-		return -ENXIO;
-
-	drive = idkp->drive;
-
-	idkp->openers++;
-
-	if ((drive->dev_flags & IDE_DFLAG_REMOVABLE) && idkp->openers == 1) {
-		/*
-		 * Ignore the return code from door_lock,
-		 * since the open() has already succeeded,
-		 * and the door_lock is irrelevant at this point.
-		 */
-		idedisk_set_doorlock(drive, 1);
-		check_disk_change(inode->i_bdev);
-	}
-	return 0;
-}
-
-static int idedisk_release(struct inode *inode, struct file *filp)
-{
-	struct gendisk *disk = inode->i_bdev->bd_disk;
-	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
-	ide_drive_t *drive = idkp->drive;
-
-	if (idkp->openers == 1)
-		ide_cacheflush_p(drive);
-
-	if ((drive->dev_flags & IDE_DFLAG_REMOVABLE) && idkp->openers == 1)
-		idedisk_set_doorlock(drive, 0);
-
-	idkp->openers--;
-
-	ide_disk_put(idkp);
-
-	return 0;
-}
-
-static int idedisk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
-{
-	struct ide_disk_obj *idkp = ide_drv_g(bdev->bd_disk, ide_disk_obj);
-	ide_drive_t *drive = idkp->drive;
-
-	geo->heads = drive->bios_head;
-	geo->sectors = drive->bios_sect;
-	geo->cylinders = (u16)drive->bios_cyl; /* truncate */
-	return 0;
-}
-
-static int idedisk_media_changed(struct gendisk *disk)
-{
-	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
-	ide_drive_t *drive = idkp->drive;
-
-	/* do not scan partitions twice if this is a removable device */
-	if (drive->dev_flags & IDE_DFLAG_ATTACH) {
-		drive->dev_flags &= ~IDE_DFLAG_ATTACH;
-		return 0;
-	}
-
-	/* if removable, always assume it was changed */
-	return !!(drive->dev_flags & IDE_DFLAG_REMOVABLE);
-}
-
-static int idedisk_revalidate_disk(struct gendisk *disk)
-{
-	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
-	set_capacity(disk, ide_disk_capacity(idkp->drive));
-	return 0;
-}
-
-static struct block_device_operations idedisk_ops = {
-	.owner			= THIS_MODULE,
-	.open			= idedisk_open,
-	.release		= idedisk_release,
-	.ioctl			= ide_disk_ioctl,
-	.getgeo			= idedisk_getgeo,
-	.media_changed		= idedisk_media_changed,
-	.revalidate_disk	= idedisk_revalidate_disk
-};
-
-MODULE_DESCRIPTION("ATA DISK Driver");
-
-static int ide_disk_probe(ide_drive_t *drive)
-{
-	struct ide_disk_obj *idkp;
-	struct gendisk *g;
-
-	/* strstr("foo", "") is non-NULL */
-	if (!strstr("ide-disk", drive->driver_req))
-		goto failed;
-
-	if (drive->media != ide_disk)
-		goto failed;
-
-	idkp = kzalloc(sizeof(*idkp), GFP_KERNEL);
-	if (!idkp)
-		goto failed;
-
-	g = alloc_disk_node(IDE_DISK_MINORS, hwif_to_node(drive->hwif));
-	if (!g)
-		goto out_free_idkp;
-
-	ide_init_disk(g, drive);
-
-	kref_init(&idkp->kref);
-
-	idkp->drive = drive;
-	idkp->driver = &idedisk_driver;
-	idkp->disk = g;
-
-	g->private_data = &idkp->driver;
-
-	drive->driver_data = idkp;
-
-	idedisk_setup(drive);
-
-	set_capacity(g, ide_disk_capacity(drive));
-
-	g->minors = IDE_DISK_MINORS;
-	g->driverfs_dev = &drive->gendev;
-	g->flags |= GENHD_FL_EXT_DEVT;
-	if (drive->dev_flags & IDE_DFLAG_REMOVABLE)
-		g->flags = GENHD_FL_REMOVABLE;
-	g->fops = &idedisk_ops;
-	add_disk(g);
-	return 0;
-
-out_free_idkp:
-	kfree(idkp);
-failed:
-	return -ENODEV;
-}
-
-static void __exit idedisk_exit(void)
-{
-	driver_unregister(&idedisk_driver.gen_driver);
-}
-
-static int __init idedisk_init(void)
-{
-	return driver_register(&idedisk_driver.gen_driver);
-}
-
-MODULE_ALIAS("ide:*m-disk*");
-MODULE_ALIAS("ide-disk");
-module_init(idedisk_init);
-module_exit(idedisk_exit);
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/ide-disk.h b/drivers/ide/ide-disk.h
index 50d674b91c8b..104ad71288a5 100644
--- a/drivers/ide/ide-disk.h
+++ b/drivers/ide/ide-disk.h
@@ -9,8 +9,14 @@ struct ide_disk_obj {
 	unsigned int	openers;	/* protected by BKL for now */
 };
 
+sector_t ide_gd_capacity(ide_drive_t *);
+
 /* ide-disk.c */
-sector_t ide_disk_capacity(ide_drive_t *);
+void ide_disk_init_capacity(ide_drive_t *);
+void ide_disk_setup(ide_drive_t *);
+void ide_disk_flush(ide_drive_t *);
+int ide_disk_set_doorlock(ide_drive_t *, int);
+ide_startstop_t ide_do_rw_disk(ide_drive_t *, struct request *, sector_t);
 ide_decl_devset(address);
 ide_decl_devset(multcount);
 ide_decl_devset(nowerr);
diff --git a/drivers/ide/ide-disk_proc.c b/drivers/ide/ide-disk_proc.c
index 4724976afe71..1146f4204c6e 100644
--- a/drivers/ide/ide-disk_proc.c
+++ b/drivers/ide/ide-disk_proc.c
@@ -56,7 +56,7 @@ static int proc_idedisk_read_capacity
 	ide_drive_t*drive = (ide_drive_t *)data;
 	int len;
 
-	len = sprintf(page, "%llu\n", (long long)ide_disk_capacity(drive));
+	len = sprintf(page, "%llu\n", (long long)ide_gd_capacity(drive));
 
 	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
 }
diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c
new file mode 100644
index 000000000000..84bbcfee9233
--- /dev/null
+++ b/drivers/ide/ide-gd.c
@@ -0,0 +1,296 @@
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/genhd.h>
+#include <linux/mutex.h>
+#include <linux/ide.h>
+#include <linux/hdreg.h>
+
+#if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT)
+#define IDE_DISK_MINORS		(1 << PARTN_BITS)
+#else
+#define IDE_DISK_MINORS		0
+#endif
+
+#include "ide-disk.h"
+
+#define IDE_GD_VERSION	"1.18"
+
+static DEFINE_MUTEX(ide_disk_ref_mutex);
+
+static void ide_disk_release(struct kref *);
+
+static struct ide_disk_obj *ide_disk_get(struct gendisk *disk)
+{
+	struct ide_disk_obj *idkp = NULL;
+
+	mutex_lock(&ide_disk_ref_mutex);
+	idkp = ide_drv_g(disk, ide_disk_obj);
+	if (idkp) {
+		if (ide_device_get(idkp->drive))
+			idkp = NULL;
+		else
+			kref_get(&idkp->kref);
+	}
+	mutex_unlock(&ide_disk_ref_mutex);
+	return idkp;
+}
+
+static void ide_disk_put(struct ide_disk_obj *idkp)
+{
+	ide_drive_t *drive = idkp->drive;
+
+	mutex_lock(&ide_disk_ref_mutex);
+	kref_put(&idkp->kref, ide_disk_release);
+	ide_device_put(drive);
+	mutex_unlock(&ide_disk_ref_mutex);
+}
+
+sector_t ide_gd_capacity(ide_drive_t *drive)
+{
+	return drive->capacity64;
+}
+
+static int ide_gd_probe(ide_drive_t *);
+
+static void ide_gd_remove(ide_drive_t *drive)
+{
+	struct ide_disk_obj *idkp = drive->driver_data;
+	struct gendisk *g = idkp->disk;
+
+	ide_proc_unregister_driver(drive, idkp->driver);
+
+	del_gendisk(g);
+
+	ide_disk_flush(drive);
+
+	ide_disk_put(idkp);
+}
+
+static void ide_disk_release(struct kref *kref)
+{
+	struct ide_disk_obj *idkp = to_ide_drv(kref, ide_disk_obj);
+	ide_drive_t *drive = idkp->drive;
+	struct gendisk *g = idkp->disk;
+
+	drive->driver_data = NULL;
+	g->private_data = NULL;
+	put_disk(g);
+	kfree(idkp);
+}
+
+/*
+ * On HPA drives the capacity needs to be
+ * reinitilized on resume otherwise the disk
+ * can not be used and a hard reset is required
+ */
+static void ide_gd_resume(ide_drive_t *drive)
+{
+	if (ata_id_hpa_enabled(drive->id))
+		ide_disk_init_capacity(drive);
+}
+
+static void ide_gd_shutdown(ide_drive_t *drive)
+{
+#ifdef	CONFIG_ALPHA
+	/* On Alpha, halt(8) doesn't actually turn the machine off,
+	   it puts you into the sort of firmware monitor. Typically,
+	   it's used to boot another kernel image, so it's not much
+	   different from reboot(8). Therefore, we don't need to
+	   spin down the disk in this case, especially since Alpha
+	   firmware doesn't handle disks in standby mode properly.
+	   On the other hand, it's reasonably safe to turn the power
+	   off when the shutdown process reaches the firmware prompt,
+	   as the firmware initialization takes rather long time -
+	   at least 10 seconds, which should be sufficient for
+	   the disk to expire its write cache. */
+	if (system_state != SYSTEM_POWER_OFF) {
+#else
+	if (system_state == SYSTEM_RESTART) {
+#endif
+		ide_disk_flush(drive);
+		return;
+	}
+
+	printk(KERN_INFO "Shutdown: %s\n", drive->name);
+
+	drive->gendev.bus->suspend(&drive->gendev, PMSG_SUSPEND);
+}
+
+static ide_driver_t ide_gd_driver = {
+	.gen_driver = {
+		.owner		= THIS_MODULE,
+		.name		= "ide-disk",
+		.bus		= &ide_bus_type,
+	},
+	.probe			= ide_gd_probe,
+	.remove			= ide_gd_remove,
+	.resume			= ide_gd_resume,
+	.shutdown		= ide_gd_shutdown,
+	.version		= IDE_GD_VERSION,
+	.do_request		= ide_do_rw_disk,
+	.end_request		= ide_end_request,
+	.error			= __ide_error,
+#ifdef CONFIG_IDE_PROC_FS
+	.proc			= ide_disk_proc,
+	.settings		= ide_disk_settings,
+#endif
+};
+
+static int ide_gd_open(struct inode *inode, struct file *filp)
+{
+	struct gendisk *disk = inode->i_bdev->bd_disk;
+	struct ide_disk_obj *idkp;
+	ide_drive_t *drive;
+
+	idkp = ide_disk_get(disk);
+	if (idkp == NULL)
+		return -ENXIO;
+
+	drive = idkp->drive;
+
+	idkp->openers++;
+
+	if ((drive->dev_flags & IDE_DFLAG_REMOVABLE) && idkp->openers == 1) {
+		/*
+		 * Ignore the return code from door_lock,
+		 * since the open() has already succeeded,
+		 * and the door_lock is irrelevant at this point.
+		 */
+		ide_disk_set_doorlock(drive, 1);
+		check_disk_change(inode->i_bdev);
+	}
+	return 0;
+}
+
+static int ide_gd_release(struct inode *inode, struct file *filp)
+{
+	struct gendisk *disk = inode->i_bdev->bd_disk;
+	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
+	ide_drive_t *drive = idkp->drive;
+
+	if (idkp->openers == 1)
+		ide_disk_flush(drive);
+
+	if ((drive->dev_flags & IDE_DFLAG_REMOVABLE) && idkp->openers == 1)
+		ide_disk_set_doorlock(drive, 0);
+
+	idkp->openers--;
+
+	ide_disk_put(idkp);
+
+	return 0;
+}
+
+static int ide_gd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+	struct ide_disk_obj *idkp = ide_drv_g(bdev->bd_disk, ide_disk_obj);
+	ide_drive_t *drive = idkp->drive;
+
+	geo->heads = drive->bios_head;
+	geo->sectors = drive->bios_sect;
+	geo->cylinders = (u16)drive->bios_cyl; /* truncate */
+	return 0;
+}
+
+static int ide_gd_media_changed(struct gendisk *disk)
+{
+	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
+	ide_drive_t *drive = idkp->drive;
+
+	/* do not scan partitions twice if this is a removable device */
+	if (drive->dev_flags & IDE_DFLAG_ATTACH) {
+		drive->dev_flags &= ~IDE_DFLAG_ATTACH;
+		return 0;
+	}
+
+	/* if removable, always assume it was changed */
+	return !!(drive->dev_flags & IDE_DFLAG_REMOVABLE);
+}
+
+static int ide_gd_revalidate_disk(struct gendisk *disk)
+{
+	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
+	set_capacity(disk, ide_gd_capacity(idkp->drive));
+	return 0;
+}
+
+static struct block_device_operations ide_gd_ops = {
+	.owner			= THIS_MODULE,
+	.open			= ide_gd_open,
+	.release		= ide_gd_release,
+	.ioctl			= ide_disk_ioctl,
+	.getgeo			= ide_gd_getgeo,
+	.media_changed		= ide_gd_media_changed,
+	.revalidate_disk	= ide_gd_revalidate_disk
+};
+
+static int ide_gd_probe(ide_drive_t *drive)
+{
+	struct ide_disk_obj *idkp;
+	struct gendisk *g;
+
+	/* strstr("foo", "") is non-NULL */
+	if (!strstr("ide-disk", drive->driver_req))
+		goto failed;
+
+	if (drive->media != ide_disk)
+		goto failed;
+
+	idkp = kzalloc(sizeof(*idkp), GFP_KERNEL);
+	if (!idkp)
+		goto failed;
+
+	g = alloc_disk_node(IDE_DISK_MINORS, hwif_to_node(drive->hwif));
+	if (!g)
+		goto out_free_idkp;
+
+	ide_init_disk(g, drive);
+
+	kref_init(&idkp->kref);
+
+	idkp->drive = drive;
+	idkp->driver = &ide_gd_driver;
+	idkp->disk = g;
+
+	g->private_data = &idkp->driver;
+
+	drive->driver_data = idkp;
+
+	ide_disk_setup(drive);
+
+	set_capacity(g, ide_gd_capacity(drive));
+
+	g->minors = IDE_DISK_MINORS;
+	g->driverfs_dev = &drive->gendev;
+	g->flags |= GENHD_FL_EXT_DEVT;
+	if (drive->dev_flags & IDE_DFLAG_REMOVABLE)
+		g->flags = GENHD_FL_REMOVABLE;
+	g->fops = &ide_gd_ops;
+	add_disk(g);
+	return 0;
+
+out_free_idkp:
+	kfree(idkp);
+failed:
+	return -ENODEV;
+}
+
+static int __init ide_gd_init(void)
+{
+	return driver_register(&ide_gd_driver.gen_driver);
+}
+
+static void __exit ide_gd_exit(void)
+{
+	driver_unregister(&ide_gd_driver.gen_driver);
+}
+
+MODULE_ALIAS("ide:*m-disk*");
+MODULE_ALIAS("ide-disk");
+module_init(ide_gd_init);
+module_exit(ide_gd_exit);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ATA DISK Driver");
-- 
GitLab


From cedd120cac61fa149ba215eabc57b2578068be00 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 17 Oct 2008 18:09:12 +0200
Subject: [PATCH 579/895] ide-disk: use IDE_DFLAG_MEDIA_CHANGED

Set IDE_DFLAG_MEDIA_CHANGED in ide_gd_open() to signalize
ide_gd_media_changed() that that media has changed (instead
of relying on IDE_DFLAG_REMOVABLE).

There should be no functional changes caused by this patch.

Acked-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-gd.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c
index 84bbcfee9233..c08500270b9d 100644
--- a/drivers/ide/ide-gd.c
+++ b/drivers/ide/ide-gd.c
@@ -160,6 +160,7 @@ static int ide_gd_open(struct inode *inode, struct file *filp)
 		 * and the door_lock is irrelevant at this point.
 		 */
 		ide_disk_set_doorlock(drive, 1);
+		drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED;
 		check_disk_change(inode->i_bdev);
 	}
 	return 0;
@@ -199,6 +200,7 @@ static int ide_gd_media_changed(struct gendisk *disk)
 {
 	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
 	ide_drive_t *drive = idkp->drive;
+	int ret;
 
 	/* do not scan partitions twice if this is a removable device */
 	if (drive->dev_flags & IDE_DFLAG_ATTACH) {
@@ -206,8 +208,10 @@ static int ide_gd_media_changed(struct gendisk *disk)
 		return 0;
 	}
 
-	/* if removable, always assume it was changed */
-	return !!(drive->dev_flags & IDE_DFLAG_REMOVABLE);
+	ret = !!(drive->dev_flags & IDE_DFLAG_MEDIA_CHANGED);
+	drive->dev_flags &= ~IDE_DFLAG_MEDIA_CHANGED;
+
+	return ret;
 }
 
 static int ide_gd_revalidate_disk(struct gendisk *disk)
-- 
GitLab


From c84d9bbe7c77aea7e1194da056d44a2ed982e72b Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 17 Oct 2008 18:09:13 +0200
Subject: [PATCH 580/895] ide-floppy: factor out generic disk handling code to
 ide-gd-floppy.c

While at it:
- idefloppy_do_request() -> ide_floppy_do_request()
- idefloppy_end_request() -> ide_floppy_end_request()
- idefloppy_setup() -> ide_floppy_setup()

There should be no functional changes caused by this patch.

Acked-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/Makefile        |   2 +-
 drivers/ide/ide-floppy.c    | 319 ++----------------------------------
 drivers/ide/ide-floppy.h    |  19 ++-
 drivers/ide/ide-gd-floppy.c | 298 +++++++++++++++++++++++++++++++++
 4 files changed, 327 insertions(+), 311 deletions(-)
 create mode 100644 drivers/ide/ide-gd-floppy.c

diff --git a/drivers/ide/Makefile b/drivers/ide/Makefile
index 8aad9395a427..7eeeab597959 100644
--- a/drivers/ide/Makefile
+++ b/drivers/ide/Makefile
@@ -39,7 +39,7 @@ obj-$(CONFIG_BLK_DEV_IDEPNP)		+= ide-pnp.o
 
 ide-disk_mod-y += ide-gd.o ide-disk.o ide-disk_ioctl.o
 ide-cd_mod-y += ide-cd.o ide-cd_ioctl.o ide-cd_verbose.o
-ide-floppy_mod-y += ide-floppy.o ide-floppy_ioctl.o
+ide-floppy_mod-y += ide-gd-floppy.o ide-floppy.o ide-floppy_ioctl.o
 
 ifeq ($(CONFIG_IDE_PROC_FS), y)
 	ide-disk_mod-y += ide-disk_proc.o
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 791a9d6f371c..802e0968e32f 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -15,12 +15,6 @@
  * Documentation/ide/ChangeLog.ide-floppy.1996-2002
  */
 
-#define DRV_NAME "ide-floppy"
-#define PFX DRV_NAME ": "
-
-#define IDEFLOPPY_VERSION "1.00"
-
-#include <linux/module.h>
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
@@ -49,19 +43,6 @@
 
 #include "ide-floppy.h"
 
-/* module parameters */
-static unsigned long debug_mask;
-module_param(debug_mask, ulong, 0644);
-
-/* define to see debug info */
-#define IDEFLOPPY_DEBUG_LOG	0
-
-#if IDEFLOPPY_DEBUG_LOG
-#define ide_debug_log(lvl, fmt, args...) __ide_debug_log(lvl, fmt, args)
-#else
-#define ide_debug_log(lvl, fmt, args...) do {} while (0)
-#endif
-
 /*
  * After each failed packet command we issue a request sense command and retry
  * the packet command IDEFLOPPY_MAX_PC_RETRIES times.
@@ -83,41 +64,11 @@ module_param(debug_mask, ulong, 0644);
 /* Error code returned in rq->errors to the higher part of the driver. */
 #define	IDEFLOPPY_ERROR_GENERAL		101
 
-static DEFINE_MUTEX(idefloppy_ref_mutex);
-
-static void idefloppy_cleanup_obj(struct kref *);
-
-static struct ide_floppy_obj *ide_floppy_get(struct gendisk *disk)
-{
-	struct ide_floppy_obj *floppy = NULL;
-
-	mutex_lock(&idefloppy_ref_mutex);
-	floppy = ide_drv_g(disk, ide_floppy_obj);
-	if (floppy) {
-		if (ide_device_get(floppy->drive))
-			floppy = NULL;
-		else
-			kref_get(&floppy->kref);
-	}
-	mutex_unlock(&idefloppy_ref_mutex);
-	return floppy;
-}
-
-static void ide_floppy_put(struct ide_floppy_obj *floppy)
-{
-	ide_drive_t *drive = floppy->drive;
-
-	mutex_lock(&idefloppy_ref_mutex);
-	kref_put(&floppy->kref, idefloppy_cleanup_obj);
-	ide_device_put(drive);
-	mutex_unlock(&idefloppy_ref_mutex);
-}
-
 /*
  * Used to finish servicing a request. For read/write requests, we will call
  * ide_end_request to pass to the next buffer.
  */
-static int idefloppy_end_request(ide_drive_t *drive, int uptodate, int nsecs)
+int ide_floppy_end_request(ide_drive_t *drive, int uptodate, int nsecs)
 {
 	idefloppy_floppy_t *floppy = drive->driver_data;
 	struct request *rq = HWGROUP(drive)->rq;
@@ -161,7 +112,7 @@ static void idefloppy_update_buffers(ide_drive_t *drive,
 	struct bio *bio = rq->bio;
 
 	while ((bio = rq->bio) != NULL)
-		idefloppy_end_request(drive, 1, 0);
+		ide_floppy_end_request(drive, 1, 0);
 }
 
 static void ide_floppy_callback(ide_drive_t *drive, int dsc)
@@ -200,7 +151,7 @@ static void ide_floppy_callback(ide_drive_t *drive, int dsc)
 			       "Aborting request!\n");
 	}
 
-	idefloppy_end_request(drive, uptodate, 0);
+	ide_floppy_end_request(drive, uptodate, 0);
 }
 
 static void ide_floppy_report_error(idefloppy_floppy_t *floppy,
@@ -329,8 +280,8 @@ static void idefloppy_blockpc_cmd(idefloppy_floppy_t *floppy,
 	pc->req_xfer = pc->buf_size = rq->data_len;
 }
 
-static ide_startstop_t idefloppy_do_request(ide_drive_t *drive,
-		struct request *rq, sector_t block_s)
+ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, struct request *rq,
+				      sector_t block_s)
 {
 	idefloppy_floppy_t *floppy = drive->driver_data;
 	ide_hwif_t *hwif = drive->hwif;
@@ -353,7 +304,7 @@ static ide_startstop_t idefloppy_do_request(ide_drive_t *drive,
 		else
 			printk(KERN_ERR PFX "%s: I/O error\n", drive->name);
 
-		idefloppy_end_request(drive, 0, 0);
+		ide_floppy_end_request(drive, 0, 0);
 		return ide_stopped;
 	}
 	if (blk_fs_request(rq)) {
@@ -361,7 +312,7 @@ static ide_startstop_t idefloppy_do_request(ide_drive_t *drive,
 		    (rq->nr_sectors % floppy->bs_factor)) {
 			printk(KERN_ERR PFX "%s: unsupported r/w rq size\n",
 				drive->name);
-			idefloppy_end_request(drive, 0, 0);
+			ide_floppy_end_request(drive, 0, 0);
 			return ide_stopped;
 		}
 		pc = &floppy->queued_pc;
@@ -373,7 +324,7 @@ static ide_startstop_t idefloppy_do_request(ide_drive_t *drive,
 		idefloppy_blockpc_cmd(floppy, pc, rq);
 	} else {
 		blk_dump_rq_flags(rq, PFX "unsupported command in queue");
-		idefloppy_end_request(drive, 0, 0);
+		ide_floppy_end_request(drive, 0, 0);
 		return ide_stopped;
 	}
 
@@ -455,7 +406,7 @@ static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive)
  * Determine if a media is present in the floppy drive, and if so, its LBA
  * capacity.
  */
-static int ide_floppy_get_capacity(ide_drive_t *drive)
+int ide_floppy_get_capacity(ide_drive_t *drive)
 {
 	idefloppy_floppy_t *floppy = drive->driver_data;
 	struct gendisk *disk = floppy->disk;
@@ -554,12 +505,7 @@ static int ide_floppy_get_capacity(ide_drive_t *drive)
 	return rc;
 }
 
-sector_t ide_floppy_capacity(ide_drive_t *drive)
-{
-	return drive->capacity64;
-}
-
-static void idefloppy_setup(ide_drive_t *drive)
+void ide_floppy_setup(ide_drive_t *drive)
 {
 	struct ide_floppy_obj *floppy = drive->driver_data;
 	u16 *id = drive->id;
@@ -601,248 +547,3 @@ static void idefloppy_setup(ide_drive_t *drive)
 
 	drive->dev_flags |= IDE_DFLAG_ATTACH;
 }
-
-static void ide_floppy_remove(ide_drive_t *drive)
-{
-	idefloppy_floppy_t *floppy = drive->driver_data;
-	struct gendisk *g = floppy->disk;
-
-	ide_proc_unregister_driver(drive, floppy->driver);
-
-	del_gendisk(g);
-
-	ide_floppy_put(floppy);
-}
-
-static void idefloppy_cleanup_obj(struct kref *kref)
-{
-	struct ide_floppy_obj *floppy = to_ide_drv(kref, ide_floppy_obj);
-	ide_drive_t *drive = floppy->drive;
-	struct gendisk *g = floppy->disk;
-
-	drive->driver_data = NULL;
-	g->private_data = NULL;
-	put_disk(g);
-	kfree(floppy);
-}
-
-static int ide_floppy_probe(ide_drive_t *);
-
-static ide_driver_t idefloppy_driver = {
-	.gen_driver = {
-		.owner		= THIS_MODULE,
-		.name		= "ide-floppy",
-		.bus		= &ide_bus_type,
-	},
-	.probe			= ide_floppy_probe,
-	.remove			= ide_floppy_remove,
-	.version		= IDEFLOPPY_VERSION,
-	.do_request		= idefloppy_do_request,
-	.end_request		= idefloppy_end_request,
-	.error			= __ide_error,
-#ifdef CONFIG_IDE_PROC_FS
-	.proc			= ide_floppy_proc,
-	.settings		= ide_floppy_settings,
-#endif
-};
-
-static int idefloppy_open(struct inode *inode, struct file *filp)
-{
-	struct gendisk *disk = inode->i_bdev->bd_disk;
-	struct ide_floppy_obj *floppy;
-	ide_drive_t *drive;
-	int ret = 0;
-
-	floppy = ide_floppy_get(disk);
-	if (!floppy)
-		return -ENXIO;
-
-	drive = floppy->drive;
-
-	ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
-
-	floppy->openers++;
-
-	if (floppy->openers == 1) {
-		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
-		/* Just in case */
-
-		if (ide_do_test_unit_ready(drive, disk))
-			ide_do_start_stop(drive, disk, 1);
-
-		ret = ide_floppy_get_capacity(drive);
-
-		set_capacity(disk, ide_floppy_capacity(drive));
-
-		if (ret && (filp->f_flags & O_NDELAY) == 0) {
-		    /*
-		     * Allow O_NDELAY to open a drive without a disk, or with an
-		     * unreadable disk, so that we can get the format capacity
-		     * of the drive or begin the format - Sam
-		     */
-			ret = -EIO;
-			goto out_put_floppy;
-		}
-
-		if ((drive->dev_flags & IDE_DFLAG_WP) && (filp->f_mode & 2)) {
-			ret = -EROFS;
-			goto out_put_floppy;
-		}
-
-		ide_set_media_lock(drive, disk, 1);
-		drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED;
-		check_disk_change(inode->i_bdev);
-	} else if (drive->dev_flags & IDE_DFLAG_FORMAT_IN_PROGRESS) {
-		ret = -EBUSY;
-		goto out_put_floppy;
-	}
-	return 0;
-
-out_put_floppy:
-	floppy->openers--;
-	ide_floppy_put(floppy);
-	return ret;
-}
-
-static int idefloppy_release(struct inode *inode, struct file *filp)
-{
-	struct gendisk *disk = inode->i_bdev->bd_disk;
-	struct ide_floppy_obj *floppy = ide_drv_g(disk, ide_floppy_obj);
-	ide_drive_t *drive = floppy->drive;
-
-	ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
-
-	if (floppy->openers == 1) {
-		ide_set_media_lock(drive, disk, 0);
-		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
-	}
-
-	floppy->openers--;
-
-	ide_floppy_put(floppy);
-
-	return 0;
-}
-
-static int idefloppy_getgeo(struct block_device *bdev, struct hd_geometry *geo)
-{
-	struct ide_floppy_obj *floppy = ide_drv_g(bdev->bd_disk,
-						     ide_floppy_obj);
-	ide_drive_t *drive = floppy->drive;
-
-	geo->heads = drive->bios_head;
-	geo->sectors = drive->bios_sect;
-	geo->cylinders = (u16)drive->bios_cyl; /* truncate */
-	return 0;
-}
-
-static int idefloppy_media_changed(struct gendisk *disk)
-{
-	struct ide_floppy_obj *floppy = ide_drv_g(disk, ide_floppy_obj);
-	ide_drive_t *drive = floppy->drive;
-	int ret;
-
-	/* do not scan partitions twice if this is a removable device */
-	if (drive->dev_flags & IDE_DFLAG_ATTACH) {
-		drive->dev_flags &= ~IDE_DFLAG_ATTACH;
-		return 0;
-	}
-	ret = !!(drive->dev_flags & IDE_DFLAG_MEDIA_CHANGED);
-	drive->dev_flags &= ~IDE_DFLAG_MEDIA_CHANGED;
-	return ret;
-}
-
-static int idefloppy_revalidate_disk(struct gendisk *disk)
-{
-	struct ide_floppy_obj *floppy = ide_drv_g(disk, ide_floppy_obj);
-	set_capacity(disk, ide_floppy_capacity(floppy->drive));
-	return 0;
-}
-
-static struct block_device_operations idefloppy_ops = {
-	.owner			= THIS_MODULE,
-	.open			= idefloppy_open,
-	.release		= idefloppy_release,
-	.ioctl			= ide_floppy_ioctl,
-	.getgeo			= idefloppy_getgeo,
-	.media_changed		= idefloppy_media_changed,
-	.revalidate_disk	= idefloppy_revalidate_disk
-};
-
-static int ide_floppy_probe(ide_drive_t *drive)
-{
-	idefloppy_floppy_t *floppy;
-	struct gendisk *g;
-
-	if (!strstr("ide-floppy", drive->driver_req))
-		goto failed;
-
-	if (drive->media != ide_floppy)
-		goto failed;
-
-	if (!ide_check_atapi_device(drive, DRV_NAME)) {
-		printk(KERN_ERR PFX "%s: not supported by this version of "
-		       DRV_NAME "\n", drive->name);
-		goto failed;
-	}
-	floppy = kzalloc(sizeof(idefloppy_floppy_t), GFP_KERNEL);
-	if (!floppy) {
-		printk(KERN_ERR PFX "%s: Can't allocate a floppy structure\n",
-		       drive->name);
-		goto failed;
-	}
-
-	g = alloc_disk_node(1 << PARTN_BITS, hwif_to_node(drive->hwif));
-	if (!g)
-		goto out_free_floppy;
-
-	ide_init_disk(g, drive);
-
-	kref_init(&floppy->kref);
-
-	floppy->drive = drive;
-	floppy->driver = &idefloppy_driver;
-	floppy->disk = g;
-
-	g->private_data = &floppy->driver;
-
-	drive->driver_data = floppy;
-
-	drive->debug_mask = debug_mask;
-
-	idefloppy_setup(drive);
-
-	set_capacity(g, ide_floppy_capacity(drive));
-
-	g->minors = 1 << PARTN_BITS;
-	g->driverfs_dev = &drive->gendev;
-	if (drive->dev_flags & IDE_DFLAG_REMOVABLE)
-		g->flags = GENHD_FL_REMOVABLE;
-	g->fops = &idefloppy_ops;
-	add_disk(g);
-	return 0;
-
-out_free_floppy:
-	kfree(floppy);
-failed:
-	return -ENODEV;
-}
-
-static void __exit idefloppy_exit(void)
-{
-	driver_unregister(&idefloppy_driver.gen_driver);
-}
-
-static int __init idefloppy_init(void)
-{
-	printk(KERN_INFO DRV_NAME " driver " IDEFLOPPY_VERSION "\n");
-	return driver_register(&idefloppy_driver.gen_driver);
-}
-
-MODULE_ALIAS("ide:*m-floppy*");
-MODULE_ALIAS("ide-floppy");
-module_init(idefloppy_init);
-module_exit(idefloppy_exit);
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("ATAPI FLOPPY Driver");
-
diff --git a/drivers/ide/ide-floppy.h b/drivers/ide/ide-floppy.h
index 17cf865e583d..836a70e6e3ef 100644
--- a/drivers/ide/ide-floppy.h
+++ b/drivers/ide/ide-floppy.h
@@ -1,6 +1,18 @@
 #ifndef __IDE_FLOPPY_H
 #define __IDE_FLOPPY_H
 
+#define DRV_NAME "ide-floppy"
+#define PFX DRV_NAME ": "
+
+/* define to see debug info */
+#define IDEFLOPPY_DEBUG_LOG	0
+
+#if IDEFLOPPY_DEBUG_LOG
+#define ide_debug_log(lvl, fmt, args...) __ide_debug_log(lvl, fmt, args)
+#else
+#define ide_debug_log(lvl, fmt, args...) do {} while (0)
+#endif
+
 /*
  * Most of our global data which we need to save even as we leave the driver
  * due to an interrupt or a timer event is stored in a variable of type
@@ -45,10 +57,15 @@ typedef struct ide_floppy_obj {
 #define	IDEFLOPPY_IOCTL_FORMAT_START		0x4602
 #define IDEFLOPPY_IOCTL_FORMAT_GET_PROGRESS	0x4603
 
+sector_t ide_floppy_capacity(ide_drive_t *);
+
 /* ide-floppy.c */
 void ide_floppy_create_mode_sense_cmd(struct ide_atapi_pc *, u8);
 void ide_floppy_create_read_capacity_cmd(struct ide_atapi_pc *);
-sector_t ide_floppy_capacity(ide_drive_t *);
+int ide_floppy_get_capacity(ide_drive_t *);
+void ide_floppy_setup(ide_drive_t *);
+ide_startstop_t ide_floppy_do_request(ide_drive_t *, struct request *, sector_t);
+int ide_floppy_end_request(ide_drive_t *, int, int);
 
 /* ide-floppy_ioctl.c */
 int ide_floppy_ioctl(struct inode *, struct file *, unsigned, unsigned long);
diff --git a/drivers/ide/ide-gd-floppy.c b/drivers/ide/ide-gd-floppy.c
new file mode 100644
index 000000000000..7afd013b4c55
--- /dev/null
+++ b/drivers/ide/ide-gd-floppy.c
@@ -0,0 +1,298 @@
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/genhd.h>
+#include <linux/mutex.h>
+#include <linux/ide.h>
+#include <linux/hdreg.h>
+
+#include "ide-floppy.h"
+
+#define IDEFLOPPY_VERSION "1.00"
+
+/* module parameters */
+static unsigned long debug_mask;
+module_param(debug_mask, ulong, 0644);
+
+static DEFINE_MUTEX(idefloppy_ref_mutex);
+
+static void idefloppy_cleanup_obj(struct kref *);
+
+static struct ide_floppy_obj *ide_floppy_get(struct gendisk *disk)
+{
+	struct ide_floppy_obj *floppy = NULL;
+
+	mutex_lock(&idefloppy_ref_mutex);
+	floppy = ide_drv_g(disk, ide_floppy_obj);
+	if (floppy) {
+		if (ide_device_get(floppy->drive))
+			floppy = NULL;
+		else
+			kref_get(&floppy->kref);
+	}
+	mutex_unlock(&idefloppy_ref_mutex);
+	return floppy;
+}
+
+static void ide_floppy_put(struct ide_floppy_obj *floppy)
+{
+	ide_drive_t *drive = floppy->drive;
+
+	mutex_lock(&idefloppy_ref_mutex);
+	kref_put(&floppy->kref, idefloppy_cleanup_obj);
+	ide_device_put(drive);
+	mutex_unlock(&idefloppy_ref_mutex);
+}
+
+sector_t ide_floppy_capacity(ide_drive_t *drive)
+{
+	return drive->capacity64;
+}
+
+static void ide_floppy_remove(ide_drive_t *drive)
+{
+	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct gendisk *g = floppy->disk;
+
+	ide_proc_unregister_driver(drive, floppy->driver);
+
+	del_gendisk(g);
+
+	ide_floppy_put(floppy);
+}
+
+static void idefloppy_cleanup_obj(struct kref *kref)
+{
+	struct ide_floppy_obj *floppy = to_ide_drv(kref, ide_floppy_obj);
+	ide_drive_t *drive = floppy->drive;
+	struct gendisk *g = floppy->disk;
+
+	drive->driver_data = NULL;
+	g->private_data = NULL;
+	put_disk(g);
+	kfree(floppy);
+}
+
+static int ide_floppy_probe(ide_drive_t *);
+
+static ide_driver_t idefloppy_driver = {
+	.gen_driver = {
+		.owner		= THIS_MODULE,
+		.name		= "ide-floppy",
+		.bus		= &ide_bus_type,
+	},
+	.probe			= ide_floppy_probe,
+	.remove			= ide_floppy_remove,
+	.version		= IDEFLOPPY_VERSION,
+	.do_request		= ide_floppy_do_request,
+	.end_request		= ide_floppy_end_request,
+	.error			= __ide_error,
+#ifdef CONFIG_IDE_PROC_FS
+	.proc			= ide_floppy_proc,
+	.settings		= ide_floppy_settings,
+#endif
+};
+
+static int idefloppy_open(struct inode *inode, struct file *filp)
+{
+	struct gendisk *disk = inode->i_bdev->bd_disk;
+	struct ide_floppy_obj *floppy;
+	ide_drive_t *drive;
+	int ret = 0;
+
+	floppy = ide_floppy_get(disk);
+	if (!floppy)
+		return -ENXIO;
+
+	drive = floppy->drive;
+
+	ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
+
+	floppy->openers++;
+
+	if (floppy->openers == 1) {
+		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
+		/* Just in case */
+
+		if (ide_do_test_unit_ready(drive, disk))
+			ide_do_start_stop(drive, disk, 1);
+
+		ret = ide_floppy_get_capacity(drive);
+
+		set_capacity(disk, ide_floppy_capacity(drive));
+
+		if (ret && (filp->f_flags & O_NDELAY) == 0) {
+		    /*
+		     * Allow O_NDELAY to open a drive without a disk, or with an
+		     * unreadable disk, so that we can get the format capacity
+		     * of the drive or begin the format - Sam
+		     */
+			ret = -EIO;
+			goto out_put_floppy;
+		}
+
+		if ((drive->dev_flags & IDE_DFLAG_WP) && (filp->f_mode & 2)) {
+			ret = -EROFS;
+			goto out_put_floppy;
+		}
+
+		ide_set_media_lock(drive, disk, 1);
+		drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED;
+		check_disk_change(inode->i_bdev);
+	} else if (drive->dev_flags & IDE_DFLAG_FORMAT_IN_PROGRESS) {
+		ret = -EBUSY;
+		goto out_put_floppy;
+	}
+	return 0;
+
+out_put_floppy:
+	floppy->openers--;
+	ide_floppy_put(floppy);
+	return ret;
+}
+
+static int idefloppy_release(struct inode *inode, struct file *filp)
+{
+	struct gendisk *disk = inode->i_bdev->bd_disk;
+	struct ide_floppy_obj *floppy = ide_drv_g(disk, ide_floppy_obj);
+	ide_drive_t *drive = floppy->drive;
+
+	ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
+
+	if (floppy->openers == 1) {
+		ide_set_media_lock(drive, disk, 0);
+		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
+	}
+
+	floppy->openers--;
+
+	ide_floppy_put(floppy);
+
+	return 0;
+}
+
+static int idefloppy_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+	struct ide_floppy_obj *floppy = ide_drv_g(bdev->bd_disk,
+						     ide_floppy_obj);
+	ide_drive_t *drive = floppy->drive;
+
+	geo->heads = drive->bios_head;
+	geo->sectors = drive->bios_sect;
+	geo->cylinders = (u16)drive->bios_cyl; /* truncate */
+	return 0;
+}
+
+static int idefloppy_media_changed(struct gendisk *disk)
+{
+	struct ide_floppy_obj *floppy = ide_drv_g(disk, ide_floppy_obj);
+	ide_drive_t *drive = floppy->drive;
+	int ret;
+
+	/* do not scan partitions twice if this is a removable device */
+	if (drive->dev_flags & IDE_DFLAG_ATTACH) {
+		drive->dev_flags &= ~IDE_DFLAG_ATTACH;
+		return 0;
+	}
+
+	ret = !!(drive->dev_flags & IDE_DFLAG_MEDIA_CHANGED);
+	drive->dev_flags &= ~IDE_DFLAG_MEDIA_CHANGED;
+
+	return ret;
+}
+
+static int idefloppy_revalidate_disk(struct gendisk *disk)
+{
+	struct ide_floppy_obj *floppy = ide_drv_g(disk, ide_floppy_obj);
+	set_capacity(disk, ide_floppy_capacity(floppy->drive));
+	return 0;
+}
+
+static struct block_device_operations idefloppy_ops = {
+	.owner			= THIS_MODULE,
+	.open			= idefloppy_open,
+	.release		= idefloppy_release,
+	.ioctl			= ide_floppy_ioctl,
+	.getgeo			= idefloppy_getgeo,
+	.media_changed		= idefloppy_media_changed,
+	.revalidate_disk	= idefloppy_revalidate_disk
+};
+
+static int ide_floppy_probe(ide_drive_t *drive)
+{
+	idefloppy_floppy_t *floppy;
+	struct gendisk *g;
+
+	if (!strstr("ide-floppy", drive->driver_req))
+		goto failed;
+
+	if (drive->media != ide_floppy)
+		goto failed;
+
+	if (!ide_check_atapi_device(drive, DRV_NAME)) {
+		printk(KERN_ERR PFX "%s: not supported by this version of "
+		       DRV_NAME "\n", drive->name);
+		goto failed;
+	}
+	floppy = kzalloc(sizeof(idefloppy_floppy_t), GFP_KERNEL);
+	if (!floppy) {
+		printk(KERN_ERR PFX "%s: Can't allocate a floppy structure\n",
+		       drive->name);
+		goto failed;
+	}
+
+	g = alloc_disk_node(1 << PARTN_BITS, hwif_to_node(drive->hwif));
+	if (!g)
+		goto out_free_floppy;
+
+	ide_init_disk(g, drive);
+
+	kref_init(&floppy->kref);
+
+	floppy->drive = drive;
+	floppy->driver = &idefloppy_driver;
+	floppy->disk = g;
+
+	g->private_data = &floppy->driver;
+
+	drive->driver_data = floppy;
+
+	drive->debug_mask = debug_mask;
+
+	ide_floppy_setup(drive);
+
+	set_capacity(g, ide_floppy_capacity(drive));
+
+	g->minors = 1 << PARTN_BITS;
+	g->driverfs_dev = &drive->gendev;
+	if (drive->dev_flags & IDE_DFLAG_REMOVABLE)
+		g->flags = GENHD_FL_REMOVABLE;
+	g->fops = &idefloppy_ops;
+	add_disk(g);
+	return 0;
+
+out_free_floppy:
+	kfree(floppy);
+failed:
+	return -ENODEV;
+}
+
+static int __init idefloppy_init(void)
+{
+	printk(KERN_INFO DRV_NAME " driver " IDEFLOPPY_VERSION "\n");
+	return driver_register(&idefloppy_driver.gen_driver);
+}
+
+static void __exit idefloppy_exit(void)
+{
+	driver_unregister(&idefloppy_driver.gen_driver);
+}
+
+MODULE_ALIAS("ide:*m-floppy*");
+MODULE_ALIAS("ide-floppy");
+module_init(idefloppy_init);
+module_exit(idefloppy_exit);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ATAPI FLOPPY Driver");
-- 
GitLab


From 9a6eb74d07f9152dd0e0ea551e878e869b8d2fc1 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 17 Oct 2008 18:09:13 +0200
Subject: [PATCH 581/895] ide: prepare for merging ide-gd-floppy.c with
 ide-gd.c

- idefloppy_ref_mutex -> ide_disk_ref_mutex
- idefloppy_cleanup_obj() -> ide_disk_release()
- ide_floppy_get() -> ide_disk_get()
- ide_floppy_put() -> ide_disk_put()
- ide_floppy_capacity() -> ide_gd_capacity()
- ide_floppy_remove() -> ide_gd_remove()
- ide_floppy_probe() -> ide_gd_probe()
- idefloppy_driver -> ide_gd_driver
- idefloppy_open() -> ide_gd_open()
- idefloppy_release() -> ide_gd_release()
- idefloppy_getgeo() -> ide_gd_getgeo()
- idefloppy_media_changed() -> ide_gd_media_changed()
- idefloppy_revalidate_disk() -> ide_gd_revalidate_disk()
- idefloppy_ops -> ide_gd_ops
- idefloppy_init() -> ide_gd_init()
- idefloppy_exit() -> ide_gd_exit()

- 'floppy' -> 'idkp' in ide_disk_*() and ide_gd_*()
- idefloppy_floppy_t -> struct ide_floppy_obj

There should be no functional changes caused by this patch.

Acked-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-floppy.h      |   2 +-
 drivers/ide/ide-floppy_proc.c |   2 +-
 drivers/ide/ide-gd-floppy.c   | 179 +++++++++++++++++-----------------
 3 files changed, 91 insertions(+), 92 deletions(-)

diff --git a/drivers/ide/ide-floppy.h b/drivers/ide/ide-floppy.h
index 836a70e6e3ef..b965da2f41ce 100644
--- a/drivers/ide/ide-floppy.h
+++ b/drivers/ide/ide-floppy.h
@@ -57,7 +57,7 @@ typedef struct ide_floppy_obj {
 #define	IDEFLOPPY_IOCTL_FORMAT_START		0x4602
 #define IDEFLOPPY_IOCTL_FORMAT_GET_PROGRESS	0x4603
 
-sector_t ide_floppy_capacity(ide_drive_t *);
+sector_t ide_gd_capacity(ide_drive_t *);
 
 /* ide-floppy.c */
 void ide_floppy_create_mode_sense_cmd(struct ide_atapi_pc *, u8);
diff --git a/drivers/ide/ide-floppy_proc.c b/drivers/ide/ide-floppy_proc.c
index 76f0c6c4eca3..3ec762cb60ab 100644
--- a/drivers/ide/ide-floppy_proc.c
+++ b/drivers/ide/ide-floppy_proc.c
@@ -9,7 +9,7 @@ static int proc_idefloppy_read_capacity(char *page, char **start, off_t off,
 	ide_drive_t*drive = (ide_drive_t *)data;
 	int len;
 
-	len = sprintf(page, "%llu\n", (long long)ide_floppy_capacity(drive));
+	len = sprintf(page, "%llu\n", (long long)ide_gd_capacity(drive));
 	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
 }
 
diff --git a/drivers/ide/ide-gd-floppy.c b/drivers/ide/ide-gd-floppy.c
index 7afd013b4c55..986253418794 100644
--- a/drivers/ide/ide-gd-floppy.c
+++ b/drivers/ide/ide-gd-floppy.c
@@ -16,75 +16,75 @@
 static unsigned long debug_mask;
 module_param(debug_mask, ulong, 0644);
 
-static DEFINE_MUTEX(idefloppy_ref_mutex);
+static DEFINE_MUTEX(ide_disk_ref_mutex);
 
-static void idefloppy_cleanup_obj(struct kref *);
+static void ide_disk_release(struct kref *);
 
-static struct ide_floppy_obj *ide_floppy_get(struct gendisk *disk)
+static struct ide_floppy_obj *ide_disk_get(struct gendisk *disk)
 {
-	struct ide_floppy_obj *floppy = NULL;
+	struct ide_floppy_obj *idkp = NULL;
 
-	mutex_lock(&idefloppy_ref_mutex);
-	floppy = ide_drv_g(disk, ide_floppy_obj);
-	if (floppy) {
-		if (ide_device_get(floppy->drive))
-			floppy = NULL;
+	mutex_lock(&ide_disk_ref_mutex);
+	idkp = ide_drv_g(disk, ide_floppy_obj);
+	if (idkp) {
+		if (ide_device_get(idkp->drive))
+			idkp = NULL;
 		else
-			kref_get(&floppy->kref);
+			kref_get(&idkp->kref);
 	}
-	mutex_unlock(&idefloppy_ref_mutex);
-	return floppy;
+	mutex_unlock(&ide_disk_ref_mutex);
+	return idkp;
 }
 
-static void ide_floppy_put(struct ide_floppy_obj *floppy)
+static void ide_disk_put(struct ide_floppy_obj *idkp)
 {
-	ide_drive_t *drive = floppy->drive;
+	ide_drive_t *drive = idkp->drive;
 
-	mutex_lock(&idefloppy_ref_mutex);
-	kref_put(&floppy->kref, idefloppy_cleanup_obj);
+	mutex_lock(&ide_disk_ref_mutex);
+	kref_put(&idkp->kref, ide_disk_release);
 	ide_device_put(drive);
-	mutex_unlock(&idefloppy_ref_mutex);
+	mutex_unlock(&ide_disk_ref_mutex);
 }
 
-sector_t ide_floppy_capacity(ide_drive_t *drive)
+sector_t ide_gd_capacity(ide_drive_t *drive)
 {
 	return drive->capacity64;
 }
 
-static void ide_floppy_remove(ide_drive_t *drive)
+static int ide_gd_probe(ide_drive_t *);
+
+static void ide_gd_remove(ide_drive_t *drive)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
-	struct gendisk *g = floppy->disk;
+	struct ide_floppy_obj *idkp = drive->driver_data;
+	struct gendisk *g = idkp->disk;
 
-	ide_proc_unregister_driver(drive, floppy->driver);
+	ide_proc_unregister_driver(drive, idkp->driver);
 
 	del_gendisk(g);
 
-	ide_floppy_put(floppy);
+	ide_disk_put(idkp);
 }
 
-static void idefloppy_cleanup_obj(struct kref *kref)
+static void ide_disk_release(struct kref *kref)
 {
-	struct ide_floppy_obj *floppy = to_ide_drv(kref, ide_floppy_obj);
-	ide_drive_t *drive = floppy->drive;
-	struct gendisk *g = floppy->disk;
+	struct ide_floppy_obj *idkp = to_ide_drv(kref, ide_floppy_obj);
+	ide_drive_t *drive = idkp->drive;
+	struct gendisk *g = idkp->disk;
 
 	drive->driver_data = NULL;
 	g->private_data = NULL;
 	put_disk(g);
-	kfree(floppy);
+	kfree(idkp);
 }
 
-static int ide_floppy_probe(ide_drive_t *);
-
-static ide_driver_t idefloppy_driver = {
+static ide_driver_t ide_gd_driver = {
 	.gen_driver = {
 		.owner		= THIS_MODULE,
 		.name		= "ide-floppy",
 		.bus		= &ide_bus_type,
 	},
-	.probe			= ide_floppy_probe,
-	.remove			= ide_floppy_remove,
+	.probe			= ide_gd_probe,
+	.remove			= ide_gd_remove,
 	.version		= IDEFLOPPY_VERSION,
 	.do_request		= ide_floppy_do_request,
 	.end_request		= ide_floppy_end_request,
@@ -95,24 +95,24 @@ static ide_driver_t idefloppy_driver = {
 #endif
 };
 
-static int idefloppy_open(struct inode *inode, struct file *filp)
+static int ide_gd_open(struct inode *inode, struct file *filp)
 {
 	struct gendisk *disk = inode->i_bdev->bd_disk;
-	struct ide_floppy_obj *floppy;
+	struct ide_floppy_obj *idkp;
 	ide_drive_t *drive;
 	int ret = 0;
 
-	floppy = ide_floppy_get(disk);
-	if (!floppy)
+	idkp = ide_disk_get(disk);
+	if (idkp == NULL)
 		return -ENXIO;
 
-	drive = floppy->drive;
+	drive = idkp->drive;
 
 	ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
 
-	floppy->openers++;
+	idkp->openers++;
 
-	if (floppy->openers == 1) {
+	if (idkp->openers == 1) {
 		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
 		/* Just in case */
 
@@ -121,7 +121,7 @@ static int idefloppy_open(struct inode *inode, struct file *filp)
 
 		ret = ide_floppy_get_capacity(drive);
 
-		set_capacity(disk, ide_floppy_capacity(drive));
+		set_capacity(disk, ide_gd_capacity(drive));
 
 		if (ret && (filp->f_flags & O_NDELAY) == 0) {
 		    /*
@@ -130,12 +130,12 @@ static int idefloppy_open(struct inode *inode, struct file *filp)
 		     * of the drive or begin the format - Sam
 		     */
 			ret = -EIO;
-			goto out_put_floppy;
+			goto out_put_idkp;
 		}
 
 		if ((drive->dev_flags & IDE_DFLAG_WP) && (filp->f_mode & 2)) {
 			ret = -EROFS;
-			goto out_put_floppy;
+			goto out_put_idkp;
 		}
 
 		ide_set_media_lock(drive, disk, 1);
@@ -143,41 +143,40 @@ static int idefloppy_open(struct inode *inode, struct file *filp)
 		check_disk_change(inode->i_bdev);
 	} else if (drive->dev_flags & IDE_DFLAG_FORMAT_IN_PROGRESS) {
 		ret = -EBUSY;
-		goto out_put_floppy;
+		goto out_put_idkp;
 	}
 	return 0;
 
-out_put_floppy:
-	floppy->openers--;
-	ide_floppy_put(floppy);
+out_put_idkp:
+	idkp->openers--;
+	ide_disk_put(idkp);
 	return ret;
 }
 
-static int idefloppy_release(struct inode *inode, struct file *filp)
+static int ide_gd_release(struct inode *inode, struct file *filp)
 {
 	struct gendisk *disk = inode->i_bdev->bd_disk;
-	struct ide_floppy_obj *floppy = ide_drv_g(disk, ide_floppy_obj);
-	ide_drive_t *drive = floppy->drive;
+	struct ide_floppy_obj *idkp = ide_drv_g(disk, ide_floppy_obj);
+	ide_drive_t *drive = idkp->drive;
 
 	ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
 
-	if (floppy->openers == 1) {
+	if (idkp->openers == 1) {
 		ide_set_media_lock(drive, disk, 0);
 		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
 	}
 
-	floppy->openers--;
+	idkp->openers--;
 
-	ide_floppy_put(floppy);
+	ide_disk_put(idkp);
 
 	return 0;
 }
 
-static int idefloppy_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+static int ide_gd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 {
-	struct ide_floppy_obj *floppy = ide_drv_g(bdev->bd_disk,
-						     ide_floppy_obj);
-	ide_drive_t *drive = floppy->drive;
+	struct ide_floppy_obj *idkp = ide_drv_g(bdev->bd_disk, ide_floppy_obj);
+	ide_drive_t *drive = idkp->drive;
 
 	geo->heads = drive->bios_head;
 	geo->sectors = drive->bios_sect;
@@ -185,10 +184,10 @@ static int idefloppy_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
-static int idefloppy_media_changed(struct gendisk *disk)
+static int ide_gd_media_changed(struct gendisk *disk)
 {
-	struct ide_floppy_obj *floppy = ide_drv_g(disk, ide_floppy_obj);
-	ide_drive_t *drive = floppy->drive;
+	struct ide_floppy_obj *idkp = ide_drv_g(disk, ide_floppy_obj);
+	ide_drive_t *drive = idkp->drive;
 	int ret;
 
 	/* do not scan partitions twice if this is a removable device */
@@ -203,26 +202,26 @@ static int idefloppy_media_changed(struct gendisk *disk)
 	return ret;
 }
 
-static int idefloppy_revalidate_disk(struct gendisk *disk)
+static int ide_gd_revalidate_disk(struct gendisk *disk)
 {
-	struct ide_floppy_obj *floppy = ide_drv_g(disk, ide_floppy_obj);
-	set_capacity(disk, ide_floppy_capacity(floppy->drive));
+	struct ide_floppy_obj *idkp = ide_drv_g(disk, ide_floppy_obj);
+	set_capacity(disk, ide_gd_capacity(idkp->drive));
 	return 0;
 }
 
-static struct block_device_operations idefloppy_ops = {
+static struct block_device_operations ide_gd_ops = {
 	.owner			= THIS_MODULE,
-	.open			= idefloppy_open,
-	.release		= idefloppy_release,
+	.open			= ide_gd_open,
+	.release		= ide_gd_release,
 	.ioctl			= ide_floppy_ioctl,
-	.getgeo			= idefloppy_getgeo,
-	.media_changed		= idefloppy_media_changed,
-	.revalidate_disk	= idefloppy_revalidate_disk
+	.getgeo			= ide_gd_getgeo,
+	.media_changed		= ide_gd_media_changed,
+	.revalidate_disk	= ide_gd_revalidate_disk
 };
 
-static int ide_floppy_probe(ide_drive_t *drive)
+static int ide_gd_probe(ide_drive_t *drive)
 {
-	idefloppy_floppy_t *floppy;
+	struct ide_floppy_obj *idkp;
 	struct gendisk *g;
 
 	if (!strstr("ide-floppy", drive->driver_req))
@@ -236,8 +235,8 @@ static int ide_floppy_probe(ide_drive_t *drive)
 		       DRV_NAME "\n", drive->name);
 		goto failed;
 	}
-	floppy = kzalloc(sizeof(idefloppy_floppy_t), GFP_KERNEL);
-	if (!floppy) {
+	idkp = kzalloc(sizeof(*idkp), GFP_KERNEL);
+	if (!idkp) {
 		printk(KERN_ERR PFX "%s: Can't allocate a floppy structure\n",
 		       drive->name);
 		goto failed;
@@ -245,54 +244,54 @@ static int ide_floppy_probe(ide_drive_t *drive)
 
 	g = alloc_disk_node(1 << PARTN_BITS, hwif_to_node(drive->hwif));
 	if (!g)
-		goto out_free_floppy;
+		goto out_free_idkp;
 
 	ide_init_disk(g, drive);
 
-	kref_init(&floppy->kref);
+	kref_init(&idkp->kref);
 
-	floppy->drive = drive;
-	floppy->driver = &idefloppy_driver;
-	floppy->disk = g;
+	idkp->drive = drive;
+	idkp->driver = &ide_gd_driver;
+	idkp->disk = g;
 
-	g->private_data = &floppy->driver;
+	g->private_data = &idkp->driver;
 
-	drive->driver_data = floppy;
+	drive->driver_data = idkp;
 
 	drive->debug_mask = debug_mask;
 
 	ide_floppy_setup(drive);
 
-	set_capacity(g, ide_floppy_capacity(drive));
+	set_capacity(g, ide_gd_capacity(drive));
 
 	g->minors = 1 << PARTN_BITS;
 	g->driverfs_dev = &drive->gendev;
 	if (drive->dev_flags & IDE_DFLAG_REMOVABLE)
 		g->flags = GENHD_FL_REMOVABLE;
-	g->fops = &idefloppy_ops;
+	g->fops = &ide_gd_ops;
 	add_disk(g);
 	return 0;
 
-out_free_floppy:
-	kfree(floppy);
+out_free_idkp:
+	kfree(idkp);
 failed:
 	return -ENODEV;
 }
 
-static int __init idefloppy_init(void)
+static int __init ide_gd_init(void)
 {
 	printk(KERN_INFO DRV_NAME " driver " IDEFLOPPY_VERSION "\n");
-	return driver_register(&idefloppy_driver.gen_driver);
+	return driver_register(&ide_gd_driver.gen_driver);
 }
 
-static void __exit idefloppy_exit(void)
+static void __exit ide_gd_exit(void)
 {
-	driver_unregister(&idefloppy_driver.gen_driver);
+	driver_unregister(&ide_gd_driver.gen_driver);
 }
 
 MODULE_ALIAS("ide:*m-floppy*");
 MODULE_ALIAS("ide-floppy");
-module_init(idefloppy_init);
-module_exit(idefloppy_exit);
+module_init(ide_gd_init);
+module_exit(ide_gd_exit);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("ATAPI FLOPPY Driver");
-- 
GitLab


From 79cb380397c834a35952d8497651d93b543ef968 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 17 Oct 2008 18:09:13 +0200
Subject: [PATCH 582/895] ide: allow device drivers to specify per-device type
 /proc settings

Turn ide_driver_t's 'proc' field into ->proc_entries method
(and also 'settings' field into ->proc_devsets method).  Then
update all device drivers accordingly.

There should be no functional changes caused by this patch.

Acked-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-cd.c        | 14 ++++++++++++--
 drivers/ide/ide-gd-floppy.c | 16 ++++++++++++++--
 drivers/ide/ide-gd.c        | 16 ++++++++++++++--
 drivers/ide/ide-proc.c      |  6 +++---
 drivers/ide/ide-tape.c      | 14 ++++++++++++--
 drivers/scsi/ide-scsi.c     | 26 +++++++++++++++++---------
 include/linux/ide.h         |  4 ++--
 7 files changed, 74 insertions(+), 22 deletions(-)

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 2f4cc10391e5..32073666b9ca 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -1908,6 +1908,16 @@ static const struct ide_proc_devset idecd_settings[] = {
 	IDE_PROC_DEVSET(dsc_overlap, 0, 1),
 	{ 0 },
 };
+
+static ide_proc_entry_t *ide_cd_proc_entries(ide_drive_t *drive)
+{
+	return idecd_proc;
+}
+
+static const struct ide_proc_devset *ide_cd_proc_devsets(ide_drive_t *drive)
+{
+	return idecd_settings;
+}
 #endif
 
 static const struct cd_list_entry ide_cd_quirks_list[] = {
@@ -2069,8 +2079,8 @@ static ide_driver_t ide_cdrom_driver = {
 	.end_request		= ide_end_request,
 	.error			= __ide_error,
 #ifdef CONFIG_IDE_PROC_FS
-	.proc			= idecd_proc,
-	.settings		= idecd_settings,
+	.proc_entries		= ide_cd_proc_entries,
+	.proc_devsets		= ide_cd_proc_devsets,
 #endif
 };
 
diff --git a/drivers/ide/ide-gd-floppy.c b/drivers/ide/ide-gd-floppy.c
index 986253418794..082800b9a558 100644
--- a/drivers/ide/ide-gd-floppy.c
+++ b/drivers/ide/ide-gd-floppy.c
@@ -77,6 +77,18 @@ static void ide_disk_release(struct kref *kref)
 	kfree(idkp);
 }
 
+#ifdef CONFIG_IDE_PROC_FS
+static ide_proc_entry_t *ide_floppy_proc_entries(ide_drive_t *drive)
+{
+	return ide_floppy_proc;
+}
+
+static const struct ide_proc_devset *ide_floppy_proc_devsets(ide_drive_t *drive)
+{
+	return ide_floppy_settings;
+}
+#endif
+
 static ide_driver_t ide_gd_driver = {
 	.gen_driver = {
 		.owner		= THIS_MODULE,
@@ -90,8 +102,8 @@ static ide_driver_t ide_gd_driver = {
 	.end_request		= ide_floppy_end_request,
 	.error			= __ide_error,
 #ifdef CONFIG_IDE_PROC_FS
-	.proc			= ide_floppy_proc,
-	.settings		= ide_floppy_settings,
+	.proc_entries		= ide_floppy_proc_entries,
+	.proc_devsets		= ide_floppy_proc_devsets,
 #endif
 };
 
diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c
index c08500270b9d..a3d4ad7db2af 100644
--- a/drivers/ide/ide-gd.c
+++ b/drivers/ide/ide-gd.c
@@ -119,6 +119,18 @@ static void ide_gd_shutdown(ide_drive_t *drive)
 	drive->gendev.bus->suspend(&drive->gendev, PMSG_SUSPEND);
 }
 
+#ifdef CONFIG_IDE_PROC_FS
+static ide_proc_entry_t *ide_disk_proc_entries(ide_drive_t *drive)
+{
+	return ide_disk_proc;
+}
+
+static const struct ide_proc_devset *ide_disk_proc_devsets(ide_drive_t *drive)
+{
+	return ide_disk_settings;
+}
+#endif
+
 static ide_driver_t ide_gd_driver = {
 	.gen_driver = {
 		.owner		= THIS_MODULE,
@@ -134,8 +146,8 @@ static ide_driver_t ide_gd_driver = {
 	.end_request		= ide_end_request,
 	.error			= __ide_error,
 #ifdef CONFIG_IDE_PROC_FS
-	.proc			= ide_disk_proc,
-	.settings		= ide_disk_settings,
+	.proc_entries		= ide_disk_proc_entries,
+	.proc_devsets		= ide_disk_proc_devsets,
 #endif
 };
 
diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index b26926487cc0..c31d0dd7a532 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -567,10 +567,10 @@ static void ide_remove_proc_entries(struct proc_dir_entry *dir, ide_proc_entry_t
 void ide_proc_register_driver(ide_drive_t *drive, ide_driver_t *driver)
 {
 	mutex_lock(&ide_setting_mtx);
-	drive->settings = driver->settings;
+	drive->settings = driver->proc_devsets(drive);
 	mutex_unlock(&ide_setting_mtx);
 
-	ide_add_proc_entries(drive->proc, driver->proc, drive);
+	ide_add_proc_entries(drive->proc, driver->proc_entries(drive), drive);
 }
 
 EXPORT_SYMBOL(ide_proc_register_driver);
@@ -591,7 +591,7 @@ void ide_proc_unregister_driver(ide_drive_t *drive, ide_driver_t *driver)
 {
 	unsigned long flags;
 
-	ide_remove_proc_entries(drive->proc, driver->proc);
+	ide_remove_proc_entries(drive->proc, driver->proc_entries(drive));
 
 	mutex_lock(&ide_setting_mtx);
 	spin_lock_irqsave(&ide_lock, flags);
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index a99e28f45156..b2b2e5e8d38e 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -2298,6 +2298,16 @@ static ide_proc_entry_t idetape_proc[] = {
 	{ "name",	S_IFREG|S_IRUGO,	proc_idetape_read_name,	NULL },
 	{ NULL, 0, NULL, NULL }
 };
+
+static ide_proc_entry_t *ide_tape_proc_entries(ide_drive_t *drive)
+{
+	return idetape_proc;
+}
+
+static const struct ide_proc_devset *ide_tape_proc_devsets(ide_drive_t *drive)
+{
+	return idetape_settings;
+}
 #endif
 
 static int ide_tape_probe(ide_drive_t *);
@@ -2315,8 +2325,8 @@ static ide_driver_t idetape_driver = {
 	.end_request		= idetape_end_request,
 	.error			= __ide_error,
 #ifdef CONFIG_IDE_PROC_FS
-	.proc			= idetape_proc,
-	.settings		= idetape_settings,
+	.proc_entries		= ide_tape_proc_entries,
+	.proc_devsets		= ide_tape_proc_devsets,
 #endif
 };
 
diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c
index 740bad435995..afc96e844a25 100644
--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c
@@ -343,6 +343,11 @@ static ide_startstop_t idescsi_do_request (ide_drive_t *drive, struct request *r
 }
 
 #ifdef CONFIG_IDE_PROC_FS
+static ide_proc_entry_t idescsi_proc[] = {
+	{ "capacity", S_IFREG|S_IRUGO, proc_ide_read_capacity, NULL },
+	{ NULL, 0, NULL, NULL }
+};
+
 #define ide_scsi_devset_get(name, field) \
 static int get_##name(ide_drive_t *drive) \
 { \
@@ -378,6 +383,16 @@ static const struct ide_proc_devset idescsi_settings[] = {
 	IDE_PROC_DEVSET(transform, 0,	 3),
 	{ 0 },
 };
+
+static ide_proc_entry_t *ide_scsi_proc_entries(ide_drive_t *drive)
+{
+	return idescsi_proc;
+}
+
+static const struct ide_proc_devset *ide_scsi_proc_devsets(ide_drive_t *drive)
+{
+	return idescsi_settings;
+}
 #endif
 
 /*
@@ -419,13 +434,6 @@ static void ide_scsi_remove(ide_drive_t *drive)
 
 static int ide_scsi_probe(ide_drive_t *);
 
-#ifdef CONFIG_IDE_PROC_FS
-static ide_proc_entry_t idescsi_proc[] = {
-	{ "capacity", S_IFREG|S_IRUGO, proc_ide_read_capacity, NULL },
-	{ NULL, 0, NULL, NULL }
-};
-#endif
-
 static ide_driver_t idescsi_driver = {
 	.gen_driver = {
 		.owner		= THIS_MODULE,
@@ -439,8 +447,8 @@ static ide_driver_t idescsi_driver = {
 	.end_request		= idescsi_end_request,
 	.error                  = idescsi_atapi_error,
 #ifdef CONFIG_IDE_PROC_FS
-	.proc			= idescsi_proc,
-	.settings		= idescsi_settings,
+	.proc_entries		= ide_scsi_proc_entries,
+	.proc_devsets		= ide_scsi_proc_devsets,
 #endif
 };
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index ba51a93fa547..488808891acb 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1120,8 +1120,8 @@ struct ide_driver_s {
 	void		(*resume)(ide_drive_t *);
 	void		(*shutdown)(ide_drive_t *);
 #ifdef CONFIG_IDE_PROC_FS
-	ide_proc_entry_t		*proc;
-	const struct ide_proc_devset	*settings;
+	ide_proc_entry_t *		(*proc_entries)(ide_drive_t *);
+	const struct ide_proc_devset *	(*proc_devsets)(ide_drive_t *);
 #endif
 };
 
-- 
GitLab


From 806f80a6fc203ad0bde84e5a9e94572617d2ae45 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 17 Oct 2008 18:09:14 +0200
Subject: [PATCH 583/895] ide: add generic ATA/ATAPI disk driver

* Add struct ide_disk_ops containing protocol specific methods.

* Add 'struct ide_disk_ops *' to ide_drive_t.

* Convert ide-{disk,floppy} drivers to use struct ide_disk_ops.

* Merge ide-{disk,floppy} drivers into generic ide-gd driver.

While at it:
- ide_disk_init_capacity() -> ide_disk_get_capacity()

Acked-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/Kconfig            |  64 ++++---
 drivers/ide/Makefile           |  19 +-
 drivers/ide/ide-disk.c         |  39 ++++-
 drivers/ide/ide-disk.h         |  24 +--
 drivers/ide/ide-disk_ioctl.c   |   4 +-
 drivers/ide/ide-floppy.c       |  45 ++++-
 drivers/ide/ide-floppy.h       |  58 ++-----
 drivers/ide/ide-floppy_ioctl.c |   9 +-
 drivers/ide/ide-gd-floppy.c    | 309 ---------------------------------
 drivers/ide/ide-gd.c           | 122 +++++++++++--
 drivers/ide/ide-gd.h           |  44 +++++
 drivers/leds/Kconfig           |   2 +-
 include/linux/ide.h            |  19 ++
 13 files changed, 303 insertions(+), 455 deletions(-)
 delete mode 100644 drivers/ide/ide-gd-floppy.c
 create mode 100644 drivers/ide/ide-gd.h

diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index 74a369a6116f..faa974e615da 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -84,21 +84,40 @@ config BLK_DEV_IDE_SATA
 
 	  If unsure, say N.
 
-config BLK_DEV_IDEDISK
-	tristate "Include IDE/ATA-2 DISK support"
-	---help---
-	  This will include enhanced support for MFM/RLL/IDE hard disks.  If
-	  you have a MFM/RLL/IDE disk, and there is no special reason to use
-	  the old hard disk driver instead, say Y.  If you have an SCSI-only
-	  system, you can say N here.
+config IDE_GD
+	tristate "generic ATA/ATAPI disk support"
+	default y
+	help
+	  Support for ATA/ATAPI disks (including ATAPI floppy drives).
 
-	  To compile this driver as a module, choose M here: the
-	  module will be called ide-disk.
-	  Do not compile this driver as a module if your root file system
-	  (the one containing the directory /) is located on the IDE disk.
+	  To compile this driver as a module, choose M here.
+	  The module will be called ide-gd_mod.
 
 	  If unsure, say Y.
 
+config IDE_GD_ATA
+	bool "ATA disk support"
+	depends on IDE_GD
+	default y
+	help
+	  This will include support for ATA hard disks.
+
+	  If unsure, say Y.
+
+config IDE_GD_ATAPI
+	bool "ATAPI floppy support"
+	depends on IDE_GD
+	select IDE_ATAPI
+	help
+	  This will include support for ATAPI floppy drives
+	  (i.e. Iomega ZIP or MKE LS-120).
+
+	  For information about jumper settings and the question
+	  of when a ZIP drive uses a partition table, see
+	  <http://www.win.tue.nl/~aeb/linux/zip/zip-1.html>.
+
+	  If unsure, say N.
+
 config BLK_DEV_IDECS
 	tristate "PCMCIA IDE support"
 	depends on PCMCIA
@@ -163,29 +182,6 @@ config BLK_DEV_IDETAPE
 	  To compile this driver as a module, choose M here: the
 	  module will be called ide-tape.
 
-config BLK_DEV_IDEFLOPPY
-	tristate "Include IDE/ATAPI FLOPPY support"
-	select IDE_ATAPI
-	---help---
-	  If you have an IDE floppy drive which uses the ATAPI protocol,
-	  answer Y.  ATAPI is a newer protocol used by IDE CD-ROM/tape/floppy
-	  drives, similar to the SCSI protocol.
-
-	  The LS-120 and the IDE/ATAPI Iomega ZIP drive are also supported by
-	  this driver. For information about jumper settings and the question
-	  of when a ZIP drive uses a partition table, see
-	  <http://www.win.tue.nl/~aeb/linux/zip/zip-1.html>.
-	  (ATAPI PD-CD/CDR drives are not supported by this driver; support
-	  for PD-CD/CDR drives is available if you answer Y to
-	  "SCSI emulation support", below).
-
-	  If you say Y here, the FLOPPY drive will be identified along with
-	  other IDE devices, as "hdb" or "hdc", or something similar (check
-	  the boot messages with dmesg).
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called ide-floppy.
-
 config BLK_DEV_IDESCSI
 	tristate "SCSI emulation support (DEPRECATED)"
 	depends on SCSI
diff --git a/drivers/ide/Makefile b/drivers/ide/Makefile
index 7eeeab597959..093d3248ca89 100644
--- a/drivers/ide/Makefile
+++ b/drivers/ide/Makefile
@@ -37,18 +37,25 @@ obj-$(CONFIG_IDE_H8300)			+= h8300/
 obj-$(CONFIG_IDE_GENERIC)		+= ide-generic.o
 obj-$(CONFIG_BLK_DEV_IDEPNP)		+= ide-pnp.o
 
-ide-disk_mod-y += ide-gd.o ide-disk.o ide-disk_ioctl.o
+ide-gd_mod-y += ide-gd.o
 ide-cd_mod-y += ide-cd.o ide-cd_ioctl.o ide-cd_verbose.o
-ide-floppy_mod-y += ide-gd-floppy.o ide-floppy.o ide-floppy_ioctl.o
 
+ifeq ($(CONFIG_IDE_GD_ATA), y)
+	ide-gd_mod-y += ide-disk.o ide-disk_ioctl.o
 ifeq ($(CONFIG_IDE_PROC_FS), y)
-	ide-disk_mod-y += ide-disk_proc.o
-	ide-floppy_mod-y += ide-floppy_proc.o
+	ide-gd_mod-y += ide-disk_proc.o
+endif
+endif
+
+ifeq ($(CONFIG_IDE_GD_ATAPI), y)
+	ide-gd_mod-y += ide-floppy.o ide-floppy_ioctl.o
+ifeq ($(CONFIG_IDE_PROC_FS), y)
+	ide-gd_mod-y += ide-floppy_proc.o
+endif
 endif
 
-obj-$(CONFIG_BLK_DEV_IDEDISK)		+= ide-disk_mod.o
+obj-$(CONFIG_IDE_GD)			+= ide-gd_mod.o
 obj-$(CONFIG_BLK_DEV_IDECD)		+= ide-cd_mod.o
-obj-$(CONFIG_BLK_DEV_IDEFLOPPY)		+= ide-floppy_mod.o
 obj-$(CONFIG_BLK_DEV_IDETAPE)		+= ide-tape.o
 
 ifeq ($(CONFIG_BLK_DEV_IDECS), y)
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 751be7af22c2..223750c1b5a6 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -184,8 +184,8 @@ static ide_startstop_t __ide_do_rw_disk(ide_drive_t *drive, struct request *rq,
  * 1073741822 == 549756 MB or 48bit addressing fake drive
  */
 
-ide_startstop_t ide_do_rw_disk(ide_drive_t *drive, struct request *rq,
-			       sector_t block)
+static ide_startstop_t ide_do_rw_disk(ide_drive_t *drive, struct request *rq,
+				      sector_t block)
 {
 	ide_hwif_t *hwif = HWIF(drive);
 
@@ -333,7 +333,7 @@ static void idedisk_check_hpa(ide_drive_t *drive)
 	}
 }
 
-void ide_disk_init_capacity(ide_drive_t *drive)
+static int ide_disk_get_capacity(ide_drive_t *drive)
 {
 	u16 *id = drive->id;
 	int lba;
@@ -382,6 +382,8 @@ void ide_disk_init_capacity(ide_drive_t *drive)
 		} else
 			drive->dev_flags &= ~IDE_DFLAG_LBA48;
 	}
+
+	return 0;
 }
 
 static void idedisk_prepare_flush(struct request_queue *q, struct request *rq)
@@ -590,7 +592,12 @@ ide_ext_devset_rw(wcache, wcache);
 
 ide_ext_devset_rw_sync(nowerr, nowerr);
 
-void ide_disk_setup(ide_drive_t *drive)
+static int ide_disk_check(ide_drive_t *drive, const char *s)
+{
+	return 1;
+}
+
+static void ide_disk_setup(ide_drive_t *drive)
 {
 	struct ide_disk_obj *idkp = drive->driver_data;
 	ide_hwif_t *hwif = drive->hwif;
@@ -626,7 +633,7 @@ void ide_disk_setup(ide_drive_t *drive)
 			 drive->queue->max_sectors / 2);
 
 	/* calculate drive capacity, and select LBA if possible */
-	ide_disk_init_capacity(drive);
+	ide_disk_get_capacity(drive);
 
 	/*
 	 * if possible, give fdisk access to more of the drive,
@@ -682,7 +689,7 @@ void ide_disk_setup(ide_drive_t *drive)
 		drive->dev_flags |= IDE_DFLAG_ATTACH;
 }
 
-void ide_disk_flush(ide_drive_t *drive)
+static void ide_disk_flush(ide_drive_t *drive)
 {
 	if (ata_id_flush_enabled(drive->id) == 0 ||
 	    (drive->dev_flags & IDE_DFLAG_WCACHE) == 0)
@@ -692,7 +699,13 @@ void ide_disk_flush(ide_drive_t *drive)
 		printk(KERN_INFO "%s: wcache flush failed!\n", drive->name);
 }
 
-int ide_disk_set_doorlock(ide_drive_t *drive, int on)
+static int ide_disk_init_media(ide_drive_t *drive, struct gendisk *disk)
+{
+	return 0;
+}
+
+static int ide_disk_set_doorlock(ide_drive_t *drive, struct gendisk *disk,
+				 int on)
 {
 	ide_task_t task;
 	int ret;
@@ -711,3 +724,15 @@ int ide_disk_set_doorlock(ide_drive_t *drive, int on)
 
 	return ret;
 }
+
+const struct ide_disk_ops ide_ata_disk_ops = {
+	.check		= ide_disk_check,
+	.get_capacity	= ide_disk_get_capacity,
+	.setup		= ide_disk_setup,
+	.flush		= ide_disk_flush,
+	.init_media	= ide_disk_init_media,
+	.set_doorlock	= ide_disk_set_doorlock,
+	.do_request	= ide_do_rw_disk,
+	.end_request	= ide_end_request,
+	.ioctl		= ide_disk_ioctl,
+};
diff --git a/drivers/ide/ide-disk.h b/drivers/ide/ide-disk.h
index 104ad71288a5..b234b0feaf7b 100644
--- a/drivers/ide/ide-disk.h
+++ b/drivers/ide/ide-disk.h
@@ -1,22 +1,11 @@
 #ifndef __IDE_DISK_H
 #define __IDE_DISK_H
 
-struct ide_disk_obj {
-	ide_drive_t	*drive;
-	ide_driver_t	*driver;
-	struct gendisk	*disk;
-	struct kref	kref;
-	unsigned int	openers;	/* protected by BKL for now */
-};
-
-sector_t ide_gd_capacity(ide_drive_t *);
+#include "ide-gd.h"
 
+#ifdef CONFIG_IDE_GD_ATA
 /* ide-disk.c */
-void ide_disk_init_capacity(ide_drive_t *);
-void ide_disk_setup(ide_drive_t *);
-void ide_disk_flush(ide_drive_t *);
-int ide_disk_set_doorlock(ide_drive_t *, int);
-ide_startstop_t ide_do_rw_disk(ide_drive_t *, struct request *, sector_t);
+extern const struct ide_disk_ops ide_ata_disk_ops;
 ide_decl_devset(address);
 ide_decl_devset(multcount);
 ide_decl_devset(nowerr);
@@ -24,12 +13,17 @@ ide_decl_devset(wcache);
 ide_decl_devset(acoustic);
 
 /* ide-disk_ioctl.c */
-int ide_disk_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
+int ide_disk_ioctl(ide_drive_t *, struct inode *, struct file *, unsigned int,
+		   unsigned long);
 
 #ifdef CONFIG_IDE_PROC_FS
 /* ide-disk_proc.c */
 extern ide_proc_entry_t ide_disk_proc[];
 extern const struct ide_proc_devset ide_disk_settings[];
 #endif
+#else
+#define ide_disk_proc		NULL
+#define ide_disk_settings	NULL
+#endif
 
 #endif /* __IDE_DISK_H */
diff --git a/drivers/ide/ide-disk_ioctl.c b/drivers/ide/ide-disk_ioctl.c
index e6624eda9e69..a49698bcf966 100644
--- a/drivers/ide/ide-disk_ioctl.c
+++ b/drivers/ide/ide-disk_ioctl.c
@@ -13,12 +13,10 @@ static const struct ide_ioctl_devset ide_disk_ioctl_settings[] = {
 { 0 }
 };
 
-int ide_disk_ioctl(struct inode *inode, struct file *file,
+int ide_disk_ioctl(ide_drive_t *drive, struct inode *inode, struct file *file,
 		   unsigned int cmd, unsigned long arg)
 {
 	struct block_device *bdev = inode->i_bdev;
-	struct ide_disk_obj *idkp = ide_drv_g(bdev->bd_disk, ide_disk_obj);
-	ide_drive_t *drive = idkp->drive;
 	int err;
 
 	err = ide_setting_ioctl(drive, bdev, cmd, arg, ide_disk_ioctl_settings);
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 802e0968e32f..58746c748c12 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -68,7 +68,7 @@
  * Used to finish servicing a request. For read/write requests, we will call
  * ide_end_request to pass to the next buffer.
  */
-int ide_floppy_end_request(ide_drive_t *drive, int uptodate, int nsecs)
+static int ide_floppy_end_request(ide_drive_t *drive, int uptodate, int nsecs)
 {
 	idefloppy_floppy_t *floppy = drive->driver_data;
 	struct request *rq = HWGROUP(drive)->rq;
@@ -280,13 +280,12 @@ static void idefloppy_blockpc_cmd(idefloppy_floppy_t *floppy,
 	pc->req_xfer = pc->buf_size = rq->data_len;
 }
 
-ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, struct request *rq,
-				      sector_t block_s)
+static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
+					     struct request *rq, sector_t block)
 {
 	idefloppy_floppy_t *floppy = drive->driver_data;
 	ide_hwif_t *hwif = drive->hwif;
 	struct ide_atapi_pc *pc;
-	unsigned long block = (unsigned long)block_s;
 
 	ide_debug_log(IDE_DBG_FUNC, "%s: dev: %s, cmd: 0x%x, cmd_type: %x, "
 		      "errors: %d\n",
@@ -316,7 +315,7 @@ ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, struct request *rq,
 			return ide_stopped;
 		}
 		pc = &floppy->queued_pc;
-		idefloppy_create_rw_cmd(drive, pc, rq, block);
+		idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block);
 	} else if (blk_special_request(rq)) {
 		pc = (struct ide_atapi_pc *) rq->buffer;
 	} else if (blk_pc_request(rq)) {
@@ -406,7 +405,7 @@ static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive)
  * Determine if a media is present in the floppy drive, and if so, its LBA
  * capacity.
  */
-int ide_floppy_get_capacity(ide_drive_t *drive)
+static int ide_floppy_get_capacity(ide_drive_t *drive)
 {
 	idefloppy_floppy_t *floppy = drive->driver_data;
 	struct gendisk *disk = floppy->disk;
@@ -505,9 +504,9 @@ int ide_floppy_get_capacity(ide_drive_t *drive)
 	return rc;
 }
 
-void ide_floppy_setup(ide_drive_t *drive)
+static void ide_floppy_setup(ide_drive_t *drive)
 {
-	struct ide_floppy_obj *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	u16 *id = drive->id;
 
 	drive->pc_callback	 = ide_floppy_callback;
@@ -547,3 +546,33 @@ void ide_floppy_setup(ide_drive_t *drive)
 
 	drive->dev_flags |= IDE_DFLAG_ATTACH;
 }
+
+static void ide_floppy_flush(ide_drive_t *drive)
+{
+}
+
+static int ide_floppy_init_media(ide_drive_t *drive, struct gendisk *disk)
+{
+	int ret = 0;
+
+	if (ide_do_test_unit_ready(drive, disk))
+		ide_do_start_stop(drive, disk, 1);
+
+	ret = ide_floppy_get_capacity(drive);
+
+	set_capacity(disk, ide_gd_capacity(drive));
+
+	return ret;
+}
+
+const struct ide_disk_ops ide_atapi_disk_ops = {
+	.check		= ide_check_atapi_device,
+	.get_capacity	= ide_floppy_get_capacity,
+	.setup		= ide_floppy_setup,
+	.flush		= ide_floppy_flush,
+	.init_media	= ide_floppy_init_media,
+	.set_doorlock	= ide_set_media_lock,
+	.do_request	= ide_floppy_do_request,
+	.end_request	= ide_floppy_end_request,
+	.ioctl		= ide_floppy_ioctl,
+};
diff --git a/drivers/ide/ide-floppy.h b/drivers/ide/ide-floppy.h
index b965da2f41ce..acebc8c5a827 100644
--- a/drivers/ide/ide-floppy.h
+++ b/drivers/ide/ide-floppy.h
@@ -1,48 +1,10 @@
 #ifndef __IDE_FLOPPY_H
 #define __IDE_FLOPPY_H
 
-#define DRV_NAME "ide-floppy"
-#define PFX DRV_NAME ": "
+#include "ide-gd.h"
 
-/* define to see debug info */
-#define IDEFLOPPY_DEBUG_LOG	0
-
-#if IDEFLOPPY_DEBUG_LOG
-#define ide_debug_log(lvl, fmt, args...) __ide_debug_log(lvl, fmt, args)
-#else
-#define ide_debug_log(lvl, fmt, args...) do {} while (0)
-#endif
-
-/*
- * Most of our global data which we need to save even as we leave the driver
- * due to an interrupt or a timer event is stored in a variable of type
- * idefloppy_floppy_t, defined below.
- */
-typedef struct ide_floppy_obj {
-	ide_drive_t	*drive;
-	ide_driver_t	*driver;
-	struct gendisk	*disk;
-	struct kref	kref;
-	unsigned int	openers;	/* protected by BKL for now */
-
-	/* Last failed packet command */
-	struct ide_atapi_pc *failed_pc;
-	/* used for blk_{fs,pc}_request() requests */
-	struct ide_atapi_pc queued_pc;
-
-	/* Last error information */
-	u8 sense_key, asc, ascq;
-
-	int progress_indication;
-
-	/* Device information */
-	/* Current format */
-	int blocks, block_size, bs_factor;
-	/* Last format capacity descriptor */
-	u8 cap_desc[8];
-	/* Copy of the flexible disk page */
-	u8 flexible_disk_page[32];
-} idefloppy_floppy_t;
+#ifdef CONFIG_IDE_GD_ATAPI
+typedef struct ide_disk_obj idefloppy_floppy_t;
 
 /*
  * Pages of the SELECT SENSE / MODE SENSE packet commands.
@@ -57,23 +19,23 @@ typedef struct ide_floppy_obj {
 #define	IDEFLOPPY_IOCTL_FORMAT_START		0x4602
 #define IDEFLOPPY_IOCTL_FORMAT_GET_PROGRESS	0x4603
 
-sector_t ide_gd_capacity(ide_drive_t *);
-
 /* ide-floppy.c */
+extern const struct ide_disk_ops ide_atapi_disk_ops;
 void ide_floppy_create_mode_sense_cmd(struct ide_atapi_pc *, u8);
 void ide_floppy_create_read_capacity_cmd(struct ide_atapi_pc *);
-int ide_floppy_get_capacity(ide_drive_t *);
-void ide_floppy_setup(ide_drive_t *);
-ide_startstop_t ide_floppy_do_request(ide_drive_t *, struct request *, sector_t);
-int ide_floppy_end_request(ide_drive_t *, int, int);
 
 /* ide-floppy_ioctl.c */
-int ide_floppy_ioctl(struct inode *, struct file *, unsigned, unsigned long);
+int ide_floppy_ioctl(ide_drive_t *, struct inode *, struct file *, unsigned int,
+		     unsigned long);
 
 #ifdef CONFIG_IDE_PROC_FS
 /* ide-floppy_proc.c */
 extern ide_proc_entry_t ide_floppy_proc[];
 extern const struct ide_proc_devset ide_floppy_settings[];
 #endif
+#else
+#define ide_floppy_proc		NULL
+#define ide_floppy_settings	NULL
+#endif
 
 #endif /*__IDE_FLOPPY_H */
diff --git a/drivers/ide/ide-floppy_ioctl.c b/drivers/ide/ide-floppy_ioctl.c
index b1f391df6cca..e8aa0a5bf5dc 100644
--- a/drivers/ide/ide-floppy_ioctl.c
+++ b/drivers/ide/ide-floppy_ioctl.c
@@ -33,7 +33,7 @@
 
 static int ide_floppy_get_format_capacities(ide_drive_t *drive, int __user *arg)
 {
-	struct ide_floppy_obj *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	struct ide_atapi_pc pc;
 	u8 header_len, desc_cnt;
 	int i, blocks, length, u_array_size, u_index;
@@ -260,13 +260,10 @@ static int ide_floppy_format_ioctl(ide_drive_t *drive, struct file *file,
 	}
 }
 
-int ide_floppy_ioctl(struct inode *inode, struct file *file,
-		    unsigned int cmd, unsigned long arg)
+int ide_floppy_ioctl(ide_drive_t *drive, struct inode *inode,
+		     struct file *file, unsigned int cmd, unsigned long arg)
 {
 	struct block_device *bdev = inode->i_bdev;
-	struct ide_floppy_obj *floppy = ide_drv_g(bdev->bd_disk,
-						     ide_floppy_obj);
-	ide_drive_t *drive = floppy->drive;
 	struct ide_atapi_pc pc;
 	void __user *argp = (void __user *)arg;
 	int err;
diff --git a/drivers/ide/ide-gd-floppy.c b/drivers/ide/ide-gd-floppy.c
deleted file mode 100644
index 082800b9a558..000000000000
--- a/drivers/ide/ide-gd-floppy.c
+++ /dev/null
@@ -1,309 +0,0 @@
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/genhd.h>
-#include <linux/mutex.h>
-#include <linux/ide.h>
-#include <linux/hdreg.h>
-
-#include "ide-floppy.h"
-
-#define IDEFLOPPY_VERSION "1.00"
-
-/* module parameters */
-static unsigned long debug_mask;
-module_param(debug_mask, ulong, 0644);
-
-static DEFINE_MUTEX(ide_disk_ref_mutex);
-
-static void ide_disk_release(struct kref *);
-
-static struct ide_floppy_obj *ide_disk_get(struct gendisk *disk)
-{
-	struct ide_floppy_obj *idkp = NULL;
-
-	mutex_lock(&ide_disk_ref_mutex);
-	idkp = ide_drv_g(disk, ide_floppy_obj);
-	if (idkp) {
-		if (ide_device_get(idkp->drive))
-			idkp = NULL;
-		else
-			kref_get(&idkp->kref);
-	}
-	mutex_unlock(&ide_disk_ref_mutex);
-	return idkp;
-}
-
-static void ide_disk_put(struct ide_floppy_obj *idkp)
-{
-	ide_drive_t *drive = idkp->drive;
-
-	mutex_lock(&ide_disk_ref_mutex);
-	kref_put(&idkp->kref, ide_disk_release);
-	ide_device_put(drive);
-	mutex_unlock(&ide_disk_ref_mutex);
-}
-
-sector_t ide_gd_capacity(ide_drive_t *drive)
-{
-	return drive->capacity64;
-}
-
-static int ide_gd_probe(ide_drive_t *);
-
-static void ide_gd_remove(ide_drive_t *drive)
-{
-	struct ide_floppy_obj *idkp = drive->driver_data;
-	struct gendisk *g = idkp->disk;
-
-	ide_proc_unregister_driver(drive, idkp->driver);
-
-	del_gendisk(g);
-
-	ide_disk_put(idkp);
-}
-
-static void ide_disk_release(struct kref *kref)
-{
-	struct ide_floppy_obj *idkp = to_ide_drv(kref, ide_floppy_obj);
-	ide_drive_t *drive = idkp->drive;
-	struct gendisk *g = idkp->disk;
-
-	drive->driver_data = NULL;
-	g->private_data = NULL;
-	put_disk(g);
-	kfree(idkp);
-}
-
-#ifdef CONFIG_IDE_PROC_FS
-static ide_proc_entry_t *ide_floppy_proc_entries(ide_drive_t *drive)
-{
-	return ide_floppy_proc;
-}
-
-static const struct ide_proc_devset *ide_floppy_proc_devsets(ide_drive_t *drive)
-{
-	return ide_floppy_settings;
-}
-#endif
-
-static ide_driver_t ide_gd_driver = {
-	.gen_driver = {
-		.owner		= THIS_MODULE,
-		.name		= "ide-floppy",
-		.bus		= &ide_bus_type,
-	},
-	.probe			= ide_gd_probe,
-	.remove			= ide_gd_remove,
-	.version		= IDEFLOPPY_VERSION,
-	.do_request		= ide_floppy_do_request,
-	.end_request		= ide_floppy_end_request,
-	.error			= __ide_error,
-#ifdef CONFIG_IDE_PROC_FS
-	.proc_entries		= ide_floppy_proc_entries,
-	.proc_devsets		= ide_floppy_proc_devsets,
-#endif
-};
-
-static int ide_gd_open(struct inode *inode, struct file *filp)
-{
-	struct gendisk *disk = inode->i_bdev->bd_disk;
-	struct ide_floppy_obj *idkp;
-	ide_drive_t *drive;
-	int ret = 0;
-
-	idkp = ide_disk_get(disk);
-	if (idkp == NULL)
-		return -ENXIO;
-
-	drive = idkp->drive;
-
-	ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
-
-	idkp->openers++;
-
-	if (idkp->openers == 1) {
-		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
-		/* Just in case */
-
-		if (ide_do_test_unit_ready(drive, disk))
-			ide_do_start_stop(drive, disk, 1);
-
-		ret = ide_floppy_get_capacity(drive);
-
-		set_capacity(disk, ide_gd_capacity(drive));
-
-		if (ret && (filp->f_flags & O_NDELAY) == 0) {
-		    /*
-		     * Allow O_NDELAY to open a drive without a disk, or with an
-		     * unreadable disk, so that we can get the format capacity
-		     * of the drive or begin the format - Sam
-		     */
-			ret = -EIO;
-			goto out_put_idkp;
-		}
-
-		if ((drive->dev_flags & IDE_DFLAG_WP) && (filp->f_mode & 2)) {
-			ret = -EROFS;
-			goto out_put_idkp;
-		}
-
-		ide_set_media_lock(drive, disk, 1);
-		drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED;
-		check_disk_change(inode->i_bdev);
-	} else if (drive->dev_flags & IDE_DFLAG_FORMAT_IN_PROGRESS) {
-		ret = -EBUSY;
-		goto out_put_idkp;
-	}
-	return 0;
-
-out_put_idkp:
-	idkp->openers--;
-	ide_disk_put(idkp);
-	return ret;
-}
-
-static int ide_gd_release(struct inode *inode, struct file *filp)
-{
-	struct gendisk *disk = inode->i_bdev->bd_disk;
-	struct ide_floppy_obj *idkp = ide_drv_g(disk, ide_floppy_obj);
-	ide_drive_t *drive = idkp->drive;
-
-	ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
-
-	if (idkp->openers == 1) {
-		ide_set_media_lock(drive, disk, 0);
-		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
-	}
-
-	idkp->openers--;
-
-	ide_disk_put(idkp);
-
-	return 0;
-}
-
-static int ide_gd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
-{
-	struct ide_floppy_obj *idkp = ide_drv_g(bdev->bd_disk, ide_floppy_obj);
-	ide_drive_t *drive = idkp->drive;
-
-	geo->heads = drive->bios_head;
-	geo->sectors = drive->bios_sect;
-	geo->cylinders = (u16)drive->bios_cyl; /* truncate */
-	return 0;
-}
-
-static int ide_gd_media_changed(struct gendisk *disk)
-{
-	struct ide_floppy_obj *idkp = ide_drv_g(disk, ide_floppy_obj);
-	ide_drive_t *drive = idkp->drive;
-	int ret;
-
-	/* do not scan partitions twice if this is a removable device */
-	if (drive->dev_flags & IDE_DFLAG_ATTACH) {
-		drive->dev_flags &= ~IDE_DFLAG_ATTACH;
-		return 0;
-	}
-
-	ret = !!(drive->dev_flags & IDE_DFLAG_MEDIA_CHANGED);
-	drive->dev_flags &= ~IDE_DFLAG_MEDIA_CHANGED;
-
-	return ret;
-}
-
-static int ide_gd_revalidate_disk(struct gendisk *disk)
-{
-	struct ide_floppy_obj *idkp = ide_drv_g(disk, ide_floppy_obj);
-	set_capacity(disk, ide_gd_capacity(idkp->drive));
-	return 0;
-}
-
-static struct block_device_operations ide_gd_ops = {
-	.owner			= THIS_MODULE,
-	.open			= ide_gd_open,
-	.release		= ide_gd_release,
-	.ioctl			= ide_floppy_ioctl,
-	.getgeo			= ide_gd_getgeo,
-	.media_changed		= ide_gd_media_changed,
-	.revalidate_disk	= ide_gd_revalidate_disk
-};
-
-static int ide_gd_probe(ide_drive_t *drive)
-{
-	struct ide_floppy_obj *idkp;
-	struct gendisk *g;
-
-	if (!strstr("ide-floppy", drive->driver_req))
-		goto failed;
-
-	if (drive->media != ide_floppy)
-		goto failed;
-
-	if (!ide_check_atapi_device(drive, DRV_NAME)) {
-		printk(KERN_ERR PFX "%s: not supported by this version of "
-		       DRV_NAME "\n", drive->name);
-		goto failed;
-	}
-	idkp = kzalloc(sizeof(*idkp), GFP_KERNEL);
-	if (!idkp) {
-		printk(KERN_ERR PFX "%s: Can't allocate a floppy structure\n",
-		       drive->name);
-		goto failed;
-	}
-
-	g = alloc_disk_node(1 << PARTN_BITS, hwif_to_node(drive->hwif));
-	if (!g)
-		goto out_free_idkp;
-
-	ide_init_disk(g, drive);
-
-	kref_init(&idkp->kref);
-
-	idkp->drive = drive;
-	idkp->driver = &ide_gd_driver;
-	idkp->disk = g;
-
-	g->private_data = &idkp->driver;
-
-	drive->driver_data = idkp;
-
-	drive->debug_mask = debug_mask;
-
-	ide_floppy_setup(drive);
-
-	set_capacity(g, ide_gd_capacity(drive));
-
-	g->minors = 1 << PARTN_BITS;
-	g->driverfs_dev = &drive->gendev;
-	if (drive->dev_flags & IDE_DFLAG_REMOVABLE)
-		g->flags = GENHD_FL_REMOVABLE;
-	g->fops = &ide_gd_ops;
-	add_disk(g);
-	return 0;
-
-out_free_idkp:
-	kfree(idkp);
-failed:
-	return -ENODEV;
-}
-
-static int __init ide_gd_init(void)
-{
-	printk(KERN_INFO DRV_NAME " driver " IDEFLOPPY_VERSION "\n");
-	return driver_register(&ide_gd_driver.gen_driver);
-}
-
-static void __exit ide_gd_exit(void)
-{
-	driver_unregister(&ide_gd_driver.gen_driver);
-}
-
-MODULE_ALIAS("ide:*m-floppy*");
-MODULE_ALIAS("ide-floppy");
-module_init(ide_gd_init);
-module_exit(ide_gd_exit);
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("ATAPI FLOPPY Driver");
diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c
index a3d4ad7db2af..d44898f46c33 100644
--- a/drivers/ide/ide-gd.c
+++ b/drivers/ide/ide-gd.c
@@ -15,9 +15,14 @@
 #endif
 
 #include "ide-disk.h"
+#include "ide-floppy.h"
 
 #define IDE_GD_VERSION	"1.18"
 
+/* module parameters */
+static unsigned long debug_mask;
+module_param(debug_mask, ulong, 0644);
+
 static DEFINE_MUTEX(ide_disk_ref_mutex);
 
 static void ide_disk_release(struct kref *);
@@ -64,7 +69,7 @@ static void ide_gd_remove(ide_drive_t *drive)
 
 	del_gendisk(g);
 
-	ide_disk_flush(drive);
+	drive->disk_ops->flush(drive);
 
 	ide_disk_put(idkp);
 }
@@ -75,6 +80,7 @@ static void ide_disk_release(struct kref *kref)
 	ide_drive_t *drive = idkp->drive;
 	struct gendisk *g = idkp->disk;
 
+	drive->disk_ops = NULL;
 	drive->driver_data = NULL;
 	g->private_data = NULL;
 	put_disk(g);
@@ -89,7 +95,7 @@ static void ide_disk_release(struct kref *kref)
 static void ide_gd_resume(ide_drive_t *drive)
 {
 	if (ata_id_hpa_enabled(drive->id))
-		ide_disk_init_capacity(drive);
+		(void)drive->disk_ops->get_capacity(drive);
 }
 
 static void ide_gd_shutdown(ide_drive_t *drive)
@@ -110,7 +116,7 @@ static void ide_gd_shutdown(ide_drive_t *drive)
 #else
 	if (system_state == SYSTEM_RESTART) {
 #endif
-		ide_disk_flush(drive);
+		drive->disk_ops->flush(drive);
 		return;
 	}
 
@@ -122,19 +128,31 @@ static void ide_gd_shutdown(ide_drive_t *drive)
 #ifdef CONFIG_IDE_PROC_FS
 static ide_proc_entry_t *ide_disk_proc_entries(ide_drive_t *drive)
 {
-	return ide_disk_proc;
+	return (drive->media == ide_disk) ? ide_disk_proc : ide_floppy_proc;
 }
 
 static const struct ide_proc_devset *ide_disk_proc_devsets(ide_drive_t *drive)
 {
-	return ide_disk_settings;
+	return (drive->media == ide_disk) ? ide_disk_settings
+					  : ide_floppy_settings;
 }
 #endif
 
+static ide_startstop_t ide_gd_do_request(ide_drive_t *drive,
+					 struct request *rq, sector_t sector)
+{
+	return drive->disk_ops->do_request(drive, rq, sector);
+}
+
+static int ide_gd_end_request(ide_drive_t *drive, int uptodate, int nrsecs)
+{
+	return drive->disk_ops->end_request(drive, uptodate, nrsecs);
+}
+
 static ide_driver_t ide_gd_driver = {
 	.gen_driver = {
 		.owner		= THIS_MODULE,
-		.name		= "ide-disk",
+		.name		= "ide-gd",
 		.bus		= &ide_bus_type,
 	},
 	.probe			= ide_gd_probe,
@@ -142,8 +160,8 @@ static ide_driver_t ide_gd_driver = {
 	.resume			= ide_gd_resume,
 	.shutdown		= ide_gd_shutdown,
 	.version		= IDE_GD_VERSION,
-	.do_request		= ide_do_rw_disk,
-	.end_request		= ide_end_request,
+	.do_request		= ide_gd_do_request,
+	.end_request		= ide_gd_end_request,
 	.error			= __ide_error,
 #ifdef CONFIG_IDE_PROC_FS
 	.proc_entries		= ide_disk_proc_entries,
@@ -156,6 +174,7 @@ static int ide_gd_open(struct inode *inode, struct file *filp)
 	struct gendisk *disk = inode->i_bdev->bd_disk;
 	struct ide_disk_obj *idkp;
 	ide_drive_t *drive;
+	int ret = 0;
 
 	idkp = ide_disk_get(disk);
 	if (idkp == NULL)
@@ -163,19 +182,49 @@ static int ide_gd_open(struct inode *inode, struct file *filp)
 
 	drive = idkp->drive;
 
+	ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
+
 	idkp->openers++;
 
 	if ((drive->dev_flags & IDE_DFLAG_REMOVABLE) && idkp->openers == 1) {
+		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
+		/* Just in case */
+
+		ret = drive->disk_ops->init_media(drive, disk);
+
+		/*
+		 * Allow O_NDELAY to open a drive without a disk, or with an
+		 * unreadable disk, so that we can get the format capacity
+		 * of the drive or begin the format - Sam
+		 */
+		if (ret && (filp->f_flags & O_NDELAY) == 0) {
+			ret = -EIO;
+			goto out_put_idkp;
+		}
+
+		if ((drive->dev_flags & IDE_DFLAG_WP) && (filp->f_mode & 2)) {
+			ret = -EROFS;
+			goto out_put_idkp;
+		}
+
 		/*
 		 * Ignore the return code from door_lock,
 		 * since the open() has already succeeded,
 		 * and the door_lock is irrelevant at this point.
 		 */
-		ide_disk_set_doorlock(drive, 1);
+		drive->disk_ops->set_doorlock(drive, disk, 1);
 		drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED;
 		check_disk_change(inode->i_bdev);
+	} else if (drive->dev_flags & IDE_DFLAG_FORMAT_IN_PROGRESS) {
+		ret = -EBUSY;
+		goto out_put_idkp;
 	}
 	return 0;
+
+out_put_idkp:
+	idkp->openers--;
+	ide_disk_put(idkp);
+	return ret;
 }
 
 static int ide_gd_release(struct inode *inode, struct file *filp)
@@ -184,11 +233,15 @@ static int ide_gd_release(struct inode *inode, struct file *filp)
 	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
 	ide_drive_t *drive = idkp->drive;
 
+	ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
+
 	if (idkp->openers == 1)
-		ide_disk_flush(drive);
+		drive->disk_ops->flush(drive);
 
-	if ((drive->dev_flags & IDE_DFLAG_REMOVABLE) && idkp->openers == 1)
-		ide_disk_set_doorlock(drive, 0);
+	if ((drive->dev_flags & IDE_DFLAG_REMOVABLE) && idkp->openers == 1) {
+		drive->disk_ops->set_doorlock(drive, disk, 0);
+		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
+	}
 
 	idkp->openers--;
 
@@ -233,11 +286,21 @@ static int ide_gd_revalidate_disk(struct gendisk *disk)
 	return 0;
 }
 
+static int ide_gd_ioctl(struct inode *inode, struct file *file,
+			     unsigned int cmd, unsigned long arg)
+{
+	struct block_device *bdev = inode->i_bdev;
+	struct ide_disk_obj *idkp = ide_drv_g(bdev->bd_disk, ide_disk_obj);
+	ide_drive_t *drive = idkp->drive;
+
+	return drive->disk_ops->ioctl(drive, inode, file, cmd, arg);
+}
+
 static struct block_device_operations ide_gd_ops = {
 	.owner			= THIS_MODULE,
 	.open			= ide_gd_open,
 	.release		= ide_gd_release,
-	.ioctl			= ide_disk_ioctl,
+	.ioctl			= ide_gd_ioctl,
 	.getgeo			= ide_gd_getgeo,
 	.media_changed		= ide_gd_media_changed,
 	.revalidate_disk	= ide_gd_revalidate_disk
@@ -245,19 +308,37 @@ static struct block_device_operations ide_gd_ops = {
 
 static int ide_gd_probe(ide_drive_t *drive)
 {
+	const struct ide_disk_ops *disk_ops = NULL;
 	struct ide_disk_obj *idkp;
 	struct gendisk *g;
 
 	/* strstr("foo", "") is non-NULL */
-	if (!strstr("ide-disk", drive->driver_req))
+	if (!strstr("ide-gd", drive->driver_req))
+		goto failed;
+
+#ifdef CONFIG_IDE_GD_ATA
+	if (drive->media == ide_disk)
+		disk_ops = &ide_ata_disk_ops;
+#endif
+#ifdef CONFIG_IDE_GD_ATAPI
+	if (drive->media == ide_floppy)
+		disk_ops = &ide_atapi_disk_ops;
+#endif
+	if (disk_ops == NULL)
 		goto failed;
 
-	if (drive->media != ide_disk)
+	if (disk_ops->check(drive, DRV_NAME) == 0) {
+		printk(KERN_ERR PFX "%s: not supported by this driver\n",
+			drive->name);
 		goto failed;
+	}
 
 	idkp = kzalloc(sizeof(*idkp), GFP_KERNEL);
-	if (!idkp)
+	if (!idkp) {
+		printk(KERN_ERR PFX "%s: can't allocate a disk structure\n",
+			drive->name);
 		goto failed;
+	}
 
 	g = alloc_disk_node(IDE_DISK_MINORS, hwif_to_node(drive->hwif));
 	if (!g)
@@ -274,8 +355,10 @@ static int ide_gd_probe(ide_drive_t *drive)
 	g->private_data = &idkp->driver;
 
 	drive->driver_data = idkp;
+	drive->debug_mask = debug_mask;
+	drive->disk_ops = disk_ops;
 
-	ide_disk_setup(drive);
+	disk_ops->setup(drive);
 
 	set_capacity(g, ide_gd_capacity(drive));
 
@@ -296,6 +379,7 @@ failed:
 
 static int __init ide_gd_init(void)
 {
+	printk(KERN_INFO DRV_NAME " driver " IDE_GD_VERSION "\n");
 	return driver_register(&ide_gd_driver.gen_driver);
 }
 
@@ -306,7 +390,9 @@ static void __exit ide_gd_exit(void)
 
 MODULE_ALIAS("ide:*m-disk*");
 MODULE_ALIAS("ide-disk");
+MODULE_ALIAS("ide:*m-floppy*");
+MODULE_ALIAS("ide-floppy");
 module_init(ide_gd_init);
 module_exit(ide_gd_exit);
 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("ATA DISK Driver");
+MODULE_DESCRIPTION("generic ATA/ATAPI disk driver");
diff --git a/drivers/ide/ide-gd.h b/drivers/ide/ide-gd.h
new file mode 100644
index 000000000000..7d3d101713e0
--- /dev/null
+++ b/drivers/ide/ide-gd.h
@@ -0,0 +1,44 @@
+#ifndef __IDE_GD_H
+#define __IDE_GD_H
+
+#define DRV_NAME "ide-gd"
+#define PFX DRV_NAME ": "
+
+/* define to see debug info */
+#define IDE_GD_DEBUG_LOG	0
+
+#if IDE_GD_DEBUG_LOG
+#define ide_debug_log(lvl, fmt, args...) __ide_debug_log(lvl, fmt, args)
+#else
+#define ide_debug_log(lvl, fmt, args...) do {} while (0)
+#endif
+
+struct ide_disk_obj {
+	ide_drive_t	*drive;
+	ide_driver_t	*driver;
+	struct gendisk	*disk;
+	struct kref	kref;
+	unsigned int	openers;	/* protected by BKL for now */
+
+	/* Last failed packet command */
+	struct ide_atapi_pc *failed_pc;
+	/* used for blk_{fs,pc}_request() requests */
+	struct ide_atapi_pc queued_pc;
+
+	/* Last error information */
+	u8 sense_key, asc, ascq;
+
+	int progress_indication;
+
+	/* Device information */
+	/* Current format */
+	int blocks, block_size, bs_factor;
+	/* Last format capacity descriptor */
+	u8 cap_desc[8];
+	/* Copy of the flexible disk page */
+	u8 flexible_disk_page[32];
+};
+
+sector_t ide_gd_capacity(ide_drive_t *);
+
+#endif /* __IDE_GD_H */
diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index e3e40427e00e..c7ff1e11ea85 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -179,7 +179,7 @@ config LEDS_TRIGGER_TIMER
 
 config LEDS_TRIGGER_IDE_DISK
 	bool "LED IDE Disk Trigger"
-	depends on LEDS_TRIGGERS && BLK_DEV_IDEDISK
+	depends on LEDS_TRIGGERS && IDE_GD_ATA
 	help
 	  This allows LEDs to be controlled by IDE disk activity.
 	  If unsure, say Y.
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 488808891acb..89e53cfbc787 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -461,6 +461,23 @@ struct ide_acpi_drive_link;
 struct ide_acpi_hwif_link;
 #endif
 
+struct ide_drive_s;
+
+struct ide_disk_ops {
+	int		(*check)(struct ide_drive_s *, const char *);
+	int		(*get_capacity)(struct ide_drive_s *);
+	void		(*setup)(struct ide_drive_s *);
+	void		(*flush)(struct ide_drive_s *);
+	int		(*init_media)(struct ide_drive_s *, struct gendisk *);
+	int		(*set_doorlock)(struct ide_drive_s *, struct gendisk *,
+					int);
+	ide_startstop_t	(*do_request)(struct ide_drive_s *, struct request *,
+				      sector_t);
+	int		(*end_request)(struct ide_drive_s *, int, int);
+	int		(*ioctl)(struct ide_drive_s *, struct inode *,
+				 struct file *, unsigned int, unsigned long);
+};
+
 /* ATAPI device flags */
 enum {
 	IDE_AFLAG_DRQ_INTERRUPT		= (1 << 0),
@@ -594,6 +611,8 @@ struct ide_drive_s {
 #endif
 	struct hwif_s		*hwif;	/* actually (ide_hwif_t *) */
 
+	const struct ide_disk_ops *disk_ops;
+
 	unsigned long dev_flags;
 
 	unsigned long sleep;		/* sleep until this time */
-- 
GitLab


From 71b429ca4d5cb78a889128955a6ab891c5ab2a46 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Fri, 17 Oct 2008 18:09:14 +0200
Subject: [PATCH 584/895] ide-cd: debug log enhancements

Add some more verbosity to key function calls in ide-cd debug code. While at it,
delete a superfluous comment.

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-cd.c | 33 +++++++++++++++++++++------------
 1 file changed, 21 insertions(+), 12 deletions(-)

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 32073666b9ca..c36cab5785df 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -340,8 +340,8 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret)
 	}
 
 	ide_debug_log(IDE_DBG_RQ, "%s: stat: 0x%x, good_stat: 0x%x, "
-		      "rq->cmd_type: 0x%x, err: 0x%x\n", __func__, stat,
-		      good_stat, rq->cmd_type, err);
+		      "rq->cmd[0]: 0x%x, rq->cmd_type: 0x%x, err: 0x%x\n",
+		      __func__, stat, good_stat, rq->cmd[0], rq->cmd_type, err);
 
 	if (blk_sense_request(rq)) {
 		/*
@@ -849,7 +849,8 @@ static void ide_cd_restore_request(ide_drive_t *drive, struct request *rq)
 static void ide_cd_request_sense_fixup(ide_drive_t *drive, struct request *rq)
 {
 
-	ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
+	ide_debug_log(IDE_DBG_FUNC, "Call %s, rq->cmd[0]: 0x%x\n",
+		      __func__, rq->cmd[0]);
 
 	/*
 	 * Some of the trailing request sense fields are optional,
@@ -876,7 +877,7 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
 	if (!sense)
 		sense = &local_sense;
 
-	ide_debug_log(IDE_DBG_PC, "Call %s, rq->cmd[0]: 0x%x, write: 0x%x, "
+	ide_debug_log(IDE_DBG_PC, "Call %s, cmd[0]: 0x%x, write: 0x%x, "
 		      "timeout: %d, cmd_flags: 0x%x\n", __func__, cmd[0], write,
 		      timeout, cmd_flags);
 
@@ -1177,8 +1178,9 @@ static ide_startstop_t cdrom_start_rw(ide_drive_t *drive, struct request *rq)
 	unsigned short sectors_per_frame =
 		queue_hardsect_size(drive->queue) >> SECTOR_BITS;
 
-	ide_debug_log(IDE_DBG_RQ, "Call %s, write: 0x%x, secs_per_frame: %u\n",
-		      __func__, write, sectors_per_frame);
+	ide_debug_log(IDE_DBG_RQ, "Call %s, rq->cmd[0]: 0x%x, write: 0x%x, "
+		      "secs_per_frame: %u\n",
+		      __func__, rq->cmd[0], write, sectors_per_frame);
 
 	if (write) {
 		/* disk has become write protected */
@@ -1221,7 +1223,8 @@ static ide_startstop_t cdrom_do_newpc_cont(ide_drive_t *drive)
 static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
 {
 
-	ide_debug_log(IDE_DBG_PC, "Call %s, rq->cmd_type: 0x%x\n", __func__,
+	ide_debug_log(IDE_DBG_PC, "Call %s, rq->cmd[0]: 0x%x, "
+		      "rq->cmd_type: 0x%x\n", __func__, rq->cmd[0],
 		      rq->cmd_type);
 
 	if (blk_pc_request(rq))
@@ -1257,9 +1260,6 @@ static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
 	}
 }
 
-/*
- * cdrom driver request routine.
- */
 static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
 					sector_t block)
 {
@@ -1267,8 +1267,10 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
 	ide_handler_t *fn;
 	int xferlen;
 
-	ide_debug_log(IDE_DBG_RQ, "Call %s, rq->cmd_type: 0x%x, block: %llu\n",
-		      __func__, rq->cmd_type, (unsigned long long)block);
+	ide_debug_log(IDE_DBG_RQ, "Call %s, rq->cmd[0]: 0x%x, "
+		      "rq->cmd_type: 0x%x, block: %llu\n",
+		      __func__, rq->cmd[0], rq->cmd_type,
+		      (unsigned long long)block);
 
 	if (blk_fs_request(rq)) {
 		if (drive->atapi_flags & IDE_AFLAG_SEEKING) {
@@ -1412,6 +1414,10 @@ static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity,
 
 	*capacity = 1 + be32_to_cpu(capbuf.lba);
 	*sectors_per_frame = blocklen >> SECTOR_BITS;
+
+	ide_debug_log(IDE_DBG_PROBE, "%s: cap: %lu, sectors_per_frame: %lu\n",
+		      __func__, *capacity, *sectors_per_frame);
+
 	return 0;
 }
 
@@ -1643,6 +1649,9 @@ void ide_cdrom_update_speed(ide_drive_t *drive, u8 *buf)
 		maxspeed = be16_to_cpup((__be16 *)&buf[8 + 8]);
 	}
 
+	ide_debug_log(IDE_DBG_PROBE, "%s: curspeed: %u, maxspeed: %u\n",
+		      __func__, curspeed, maxspeed);
+
 	cd->current_speed = (curspeed + (176/2)) / 176;
 	cd->max_speed = (maxspeed + (176/2)) / 176;
 }
-- 
GitLab


From 419a5b67c3e901f8e6369d2630877a5f59692202 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Fri, 17 Oct 2008 18:09:14 +0200
Subject: [PATCH 585/895] ide-cd: small drive type print fix

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-cd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index c36cab5785df..fd2971891321 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -1786,7 +1786,7 @@ static int ide_cdrom_probe_capabilities(ide_drive_t *drive)
 	if ((cdi->mask & CDC_DVD_R) == 0 || (cdi->mask & CDC_DVD_RAM) == 0)
 		printk(KERN_CONT " DVD%s%s",
 				 (cdi->mask & CDC_DVD_R) ? "" : "-R",
-				 (cdi->mask & CDC_DVD_RAM) ? "" : "-RAM");
+				 (cdi->mask & CDC_DVD_RAM) ? "" : "/RAM");
 
 	if ((cdi->mask & CDC_CD_R) == 0 || (cdi->mask & CDC_CD_RW) == 0)
 		printk(KERN_CONT " CD%s%s",
-- 
GitLab


From 2a2267e7b11fa2de95bfb707c85f2a9880e5206a Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Fri, 17 Oct 2008 18:09:14 +0200
Subject: [PATCH 586/895] ide-cd: remove stale comment

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
[bart: split-up this change from a bigger patch]
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-cd.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index fd2971891321..13265a8827da 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -843,12 +843,8 @@ static void ide_cd_restore_request(ide_drive_t *drive, struct request *rq)
 	rq->q->prep_rq_fn(rq->q, rq);
 }
 
-/*
- * All other packet commands.
- */
 static void ide_cd_request_sense_fixup(ide_drive_t *drive, struct request *rq)
 {
-
 	ide_debug_log(IDE_DBG_FUNC, "Call %s, rq->cmd[0]: 0x%x\n",
 		      __func__, rq->cmd[0]);
 
-- 
GitLab


From 30c7ed5aba72bb7357282cf8f411b2e74d82081c Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 17 Oct 2008 18:09:14 +0200
Subject: [PATCH 587/895] ide: fix support for IDE PCI controllers using MMIO
 on frv

Just include <asm-generic/ide_iops.h> for __ide_mm_*() instead of
defining them to normal I/O helpers so PCI bus <-> CPU byte-swapping
is done as needed.

Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/asm-frv/ide.h | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/include/asm-frv/ide.h b/include/asm-frv/ide.h
index 7ebcc56a2229..361076611855 100644
--- a/include/asm-frv/ide.h
+++ b/include/asm-frv/ide.h
@@ -18,15 +18,7 @@
 #include <asm/io.h>
 #include <asm/irq.h>
 
-/****************************************************************************/
-/*
- * some bits needed for parts of the IDE subsystem to compile
- */
-#define __ide_mm_insw(port, addr, n)	insw((unsigned long) (port), addr, n)
-#define __ide_mm_insl(port, addr, n)	insl((unsigned long) (port), addr, n)
-#define __ide_mm_outsw(port, addr, n)	outsw((unsigned long) (port), addr, n)
-#define __ide_mm_outsl(port, addr, n)	outsl((unsigned long) (port), addr, n)
-
+#include <asm-generic/ide_iops.h>
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_IDE_H */
-- 
GitLab


From c36167de652f8acdf0b374c78805e662646cd327 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 17 Oct 2008 18:09:15 +0200
Subject: [PATCH 588/895] ide: remove dead <asm-arm/arch-sa1100/ide.h>

It has been dead for more than 3 years and needs to be converted
to use platform PATA support anyway.

Cc: Russell King <rmk@arm.linux.org.uk>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 arch/arm/mach-sa1100/include/mach/ide.h | 75 -------------------------
 1 file changed, 75 deletions(-)
 delete mode 100644 arch/arm/mach-sa1100/include/mach/ide.h

diff --git a/arch/arm/mach-sa1100/include/mach/ide.h b/arch/arm/mach-sa1100/include/mach/ide.h
deleted file mode 100644
index 4c99c8f5e617..000000000000
--- a/arch/arm/mach-sa1100/include/mach/ide.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * arch/arm/mach-sa1100/include/mach/ide.h
- *
- * Copyright (c) 1998 Hugo Fiennes & Nicolas Pitre
- *
- * 18-aug-2000: Cleanup by Erik Mouw (J.A.K.Mouw@its.tudelft.nl)
- *              Get rid of the special ide_init_hwif_ports() functions
- *              and make a generalised function that can be used by all
- *              architectures.
- */
-
-#include <asm/irq.h>
-#include <mach/hardware.h>
-#include <asm/mach-types.h>
-
-#error "This code is broken and needs update to match with current ide support"
-
-
-/*
- * Set up a hw structure for a specified data port, control port and IRQ.
- * This should follow whatever the default interface uses.
- */
-static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port,
-				       unsigned long ctrl_port, int *irq)
-{
-	unsigned long reg = data_port;
-	int i;
-	int regincr = 1;
-
-	/* The Empeg board has the first two address lines unused */
-	if (machine_is_empeg())
-		regincr = 1 << 2;
-
-	/* The LART doesn't use A0 for IDE */
-	if (machine_is_lart())
-		regincr = 1 << 1;
-
-	memset(hw, 0, sizeof(*hw));
-
-	for (i = 0; i <= 7; i++) {
-		hw->io_ports_array[i] = reg;
-		reg += regincr;
-	}
-
-	hw->io_ports.ctl_addr = ctrl_port;
-
-	if (irq)
-		*irq = 0;
-}
-
-/*
- * This registers the standard ports for this architecture with the IDE
- * driver.
- */
-static __inline__ void
-ide_init_default_hwifs(void)
-{
-    if (machine_is_lart()) {
-#ifdef CONFIG_SA1100_LART
-        hw_regs_t hw;
-
-        /* Enable GPIO as interrupt line */
-        GPDR &= ~LART_GPIO_IDE;
-	set_irq_type(LART_IRQ_IDE, IRQ_TYPE_EDGE_RISING);
-
-        /* set PCMCIA interface timing */
-        MECR = 0x00060006;
-
-        /* init the interface */
-	ide_init_hwif_ports(&hw, PCMCIA_IO_0_BASE + 0x0000, PCMCIA_IO_0_BASE + 0x1000, NULL);
-        hw.irq = LART_IRQ_IDE;
-        ide_register_hw(&hw);
-#endif
-    }
-}
-- 
GitLab


From dc10f6119636a5ff13b633e722e720fe0253202b Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 17 Oct 2008 18:09:15 +0200
Subject: [PATCH 589/895] ide: remove M68K_IDE_SWAPW define from
 <asm-m68k/ide.h>

Since we solved this by overriding default ->{in,out}put_data
methods in {q40,falcon}ide M68K_IDE_SWAP define can go away.

Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/asm-m68k/ide.h | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/include/asm-m68k/ide.h b/include/asm-m68k/ide.h
index 1daf6cbdd9f0..b996a3c8cff5 100644
--- a/include/asm-m68k/ide.h
+++ b/include/asm-m68k/ide.h
@@ -92,15 +92,6 @@
 #define outsw_swapw(port, addr, n)	raw_outsw_swapw((u16 *)port, addr, n)
 #endif
 
-
-/* Q40 and Atari have byteswapped IDE busses and since many interesting
- * values in the identification string are text, chars and words they
- * happened to be almost correct without swapping.. However *_capacity
- * is needed for drives over 8 GB. RZ */
-#if defined(CONFIG_Q40) || defined(CONFIG_ATARI)
-#define M68K_IDE_SWAPW  (MACH_IS_Q40 || MACH_IS_ATARI)
-#endif
-
 #ifdef CONFIG_BLK_DEV_FALCON_IDE
 #define IDE_ARCH_LOCK
 
-- 
GitLab


From 21f45eb1d381aad30094751c60ae8171d6223a66 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 17 Oct 2008 18:09:15 +0200
Subject: [PATCH 590/895] ide: remove unused macros from <asm-parisc/ide.h>

Acked-by: Kyle McMartin <kyle@mcmartin.ca>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/asm-parisc/ide.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/include/asm-parisc/ide.h b/include/asm-parisc/ide.h
index c246ef75017d..81700a2321cf 100644
--- a/include/asm-parisc/ide.h
+++ b/include/asm-parisc/ide.h
@@ -13,10 +13,6 @@
 
 #ifdef __KERNEL__
 
-#define ide_request_irq(irq,hand,flg,dev,id)	request_irq((irq),(hand),(flg),(dev),(id))
-#define ide_free_irq(irq,dev_id)		free_irq((irq), (dev_id))
-#define ide_request_region(from,extent,name)	request_region((from), (extent), (name))
-#define ide_release_region(from,extent)		release_region((from), (extent))
 /* Generic I/O and MEMIO string operations.  */
 
 #define __ide_insw	insw
-- 
GitLab


From 79104c687ca29e214142d8e8f30964be05e1276f Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 17 Oct 2008 18:09:15 +0200
Subject: [PATCH 591/895] hpt366: fix compile warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixup for commit 1785192b5310ee25165768f5bb80f13146788e3e
("hpt366: add hpt3xx_disable_fast_irq() helper"):

Â  Â CC Â  Â  Â drivers/ide/pci/hpt366.o
drivers/ide/pci/hpt366.c: In function `init_hwif_hpt366':
drivers/ide/pci/hpt366.c:1290: warning: unused variable `dev'

Reported-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/hpt366.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/ide/pci/hpt366.c b/drivers/ide/pci/hpt366.c
index 9cf171cb9376..ca0acb50bc61 100644
--- a/drivers/ide/pci/hpt366.c
+++ b/drivers/ide/pci/hpt366.c
@@ -1289,7 +1289,6 @@ static u8 hpt3xx_cable_detect(ide_hwif_t *hwif)
 
 static void __devinit init_hwif_hpt366(ide_hwif_t *hwif)
 {
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	struct hpt_info *info	= hpt3xx_get_info(hwif->dev);
 	int serialize		= HPT_SERIALIZE_IO;
 	u8  chip_type		= info->chip_type;
-- 
GitLab


From e5403bff8a4018240f012fd4c7afa24c2e75b469 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Date: Fri, 17 Oct 2008 18:09:15 +0200
Subject: [PATCH 592/895] ide: mask interrupt in ide_config_drive_speed()

Apparently, there is no sense in unmasking IRQ on the controller when you call
disable_irq_nosync() before doing this, set the nIEN bit afterwards, and then
unmask IRQ again after the command completion, hence 0 passed to SELECT_MASK()
before issuing the command in ide_config_drive_speed() is probably just a typo.

Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-iops.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index b762deb2dacb..bb7a1ed8094e 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -755,7 +755,7 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed)
 	 
 	udelay(1);
 	SELECT_DRIVE(drive);
-	SELECT_MASK(drive, 0);
+	SELECT_MASK(drive, 1);
 	udelay(1);
 	tp_ops->set_irq(hwif, 0);
 
-- 
GitLab


From ea2ac5a3b7d33ff9f41ddcee2a92c95b5a32f4e2 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Date: Fri, 17 Oct 2008 18:09:15 +0200
Subject: [PATCH 593/895] hpt366: cleanup maskproc() method

Since the maskproc() method calls either mirror the interrupt en/disable via
the nIEN bit of the device control register done by the IDE core before issuing
a command or unmask the interrupt after a command executed in polled mode (when
interrupt is already not expected), it is pointless to manipulate the nIEN bit
in this method; therefore, just do nothing for the drives not on the quirk list.
Move the code to the left by using an early return and the 'else if' construct,
while at it....

Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
[bart: fix checkpatch.pl warning]
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/hpt366.c | 34 ++++++++++++++++------------------
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/drivers/ide/pci/hpt366.c b/drivers/ide/pci/hpt366.c
index ca0acb50bc61..a7909e9c720e 100644
--- a/drivers/ide/pci/hpt366.c
+++ b/drivers/ide/pci/hpt366.c
@@ -3,7 +3,7 @@
  * Portions Copyright (C) 2001	        Sun Microsystems, Inc.
  * Portions Copyright (C) 2003		Red Hat Inc
  * Portions Copyright (C) 2007		Bartlomiej Zolnierkiewicz
- * Portions Copyright (C) 2005-2007	MontaVista Software, Inc.
+ * Portions Copyright (C) 2005-2008	MontaVista Software, Inc.
  *
  * Thanks to HighPoint Technologies for their assistance, and hardware.
  * Special Thanks to Jon Burchmore in SanDiego for the deep pockets, his
@@ -748,26 +748,24 @@ static void hpt3xx_maskproc(ide_drive_t *drive, int mask)
 	struct pci_dev	*dev	= to_pci_dev(hwif->dev);
 	struct hpt_info *info	= hpt3xx_get_info(hwif->dev);
 
-	if (drive->quirk_list) {
-		if (info->chip_type >= HPT370) {
-			u8 scr1 = 0;
-
-			pci_read_config_byte(dev, 0x5a, &scr1);
-			if (((scr1 & 0x10) >> 4) != mask) {
-				if (mask)
-					scr1 |=  0x10;
-				else
-					scr1 &= ~0x10;
-				pci_write_config_byte(dev, 0x5a, scr1);
-			}
-		} else {
+	if (drive->quirk_list == 0)
+		return;
+
+	if (info->chip_type >= HPT370) {
+		u8 scr1 = 0;
+
+		pci_read_config_byte(dev, 0x5a, &scr1);
+		if (((scr1 & 0x10) >> 4) != mask) {
 			if (mask)
-				disable_irq(hwif->irq);
+				scr1 |=  0x10;
 			else
-				enable_irq (hwif->irq);
+				scr1 &= ~0x10;
+			pci_write_config_byte(dev, 0x5a, scr1);
 		}
-	} else
-		outb(ATA_DEVCTL_OBS | (mask ? 2 : 0), hwif->io_ports.ctl_addr);
+	} else if (mask)
+		disable_irq(hwif->irq);
+	else
+		enable_irq(hwif->irq);
 }
 
 /*
-- 
GitLab


From ea656980f40d599400d2306b23f2fbc707ae7313 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Date: Fri, 17 Oct 2008 18:09:16 +0200
Subject: [PATCH 594/895] sgiioc4: remove maskproc() method

Since the maskproc() method calls either mirror the interrupt en/disable via
the nIEN bit of the device control register done by the IDE core before issuing
a command or unmask the interrupt after a command executed in polled mode (when
interrupt is already not expected), it is pointless to implement this method
by manipulating the nIEN bit...

Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Cc: jeremy@sgi.com
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/sgiioc4.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/drivers/ide/pci/sgiioc4.c b/drivers/ide/pci/sgiioc4.c
index dd634541ce36..8581789352aa 100644
--- a/drivers/ide/pci/sgiioc4.c
+++ b/drivers/ide/pci/sgiioc4.c
@@ -108,13 +108,6 @@ sgiioc4_init_hwif_ports(hw_regs_t * hw, unsigned long data_port,
 		hw->io_ports.irq_addr = irq_port;
 }
 
-static void
-sgiioc4_maskproc(ide_drive_t * drive, int mask)
-{
-	writeb(ATA_DEVCTL_OBS | (mask ? 2 : 0),
-	       (void __iomem *)drive->hwif->io_ports.ctl_addr);
-}
-
 static int
 sgiioc4_checkirq(ide_hwif_t * hwif)
 {
@@ -563,8 +556,6 @@ static const struct ide_port_ops sgiioc4_port_ops = {
 	.set_dma_mode		= sgiioc4_set_dma_mode,
 	/* reset DMA engine, clear IRQs */
 	.resetproc		= sgiioc4_resetproc,
-	/* mask on/off NIEN register */
-	.maskproc		= sgiioc4_maskproc,
 };
 
 static const struct ide_dma_ops sgiioc4_dma_ops = {
-- 
GitLab


From 0df962777b550a4b67191b3ee2555be139da4e7d Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 17 Oct 2008 18:09:16 +0200
Subject: [PATCH 595/895] ide-floppy: remove idefloppy_floppy_t typedef

There should be no functional changes caused by this patch.

Acked-by: Borislav Petkov <petkovbb@gmail.com>
Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-floppy.c       | 18 +++++++++---------
 drivers/ide/ide-floppy.h       |  2 --
 drivers/ide/ide-floppy_ioctl.c |  8 ++++----
 3 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 58746c748c12..aeb1ad782f54 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -70,7 +70,7 @@
  */
 static int ide_floppy_end_request(ide_drive_t *drive, int uptodate, int nsecs)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	struct request *rq = HWGROUP(drive)->rq;
 	int error;
 
@@ -117,7 +117,7 @@ static void idefloppy_update_buffers(ide_drive_t *drive,
 
 static void ide_floppy_callback(ide_drive_t *drive, int dsc)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	struct ide_atapi_pc *pc = drive->pc;
 	int uptodate = pc->error ? 0 : 1;
 
@@ -154,7 +154,7 @@ static void ide_floppy_callback(ide_drive_t *drive, int dsc)
 	ide_floppy_end_request(drive, uptodate, 0);
 }
 
-static void ide_floppy_report_error(idefloppy_floppy_t *floppy,
+static void ide_floppy_report_error(struct ide_disk_obj *floppy,
 				    struct ide_atapi_pc *pc)
 {
 	/* supress error messages resulting from Medium not present */
@@ -173,7 +173,7 @@ static void ide_floppy_report_error(idefloppy_floppy_t *floppy,
 static ide_startstop_t idefloppy_issue_pc(ide_drive_t *drive,
 		struct ide_atapi_pc *pc)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 
 	if (floppy->failed_pc == NULL &&
 	    pc->c[0] != GPCMD_REQUEST_SENSE)
@@ -237,7 +237,7 @@ static void idefloppy_create_rw_cmd(ide_drive_t *drive,
 				    struct ide_atapi_pc *pc, struct request *rq,
 				    unsigned long sector)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	int block = sector / floppy->bs_factor;
 	int blocks = rq->nr_sectors / floppy->bs_factor;
 	int cmd = rq_data_dir(rq);
@@ -261,7 +261,7 @@ static void idefloppy_create_rw_cmd(ide_drive_t *drive,
 	pc->flags |= PC_FLAG_DMA_OK;
 }
 
-static void idefloppy_blockpc_cmd(idefloppy_floppy_t *floppy,
+static void idefloppy_blockpc_cmd(struct ide_disk_obj *floppy,
 		struct ide_atapi_pc *pc, struct request *rq)
 {
 	ide_init_pc(pc);
@@ -283,7 +283,7 @@ static void idefloppy_blockpc_cmd(idefloppy_floppy_t *floppy,
 static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
 					     struct request *rq, sector_t block)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	ide_hwif_t *hwif = drive->hwif;
 	struct ide_atapi_pc *pc;
 
@@ -344,7 +344,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
  */
 static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	struct gendisk *disk = floppy->disk;
 	struct ide_atapi_pc pc;
 	u8 *page;
@@ -407,7 +407,7 @@ static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive)
  */
 static int ide_floppy_get_capacity(ide_drive_t *drive)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	struct gendisk *disk = floppy->disk;
 	struct ide_atapi_pc pc;
 	u8 *cap_desc;
diff --git a/drivers/ide/ide-floppy.h b/drivers/ide/ide-floppy.h
index acebc8c5a827..c17124dd6079 100644
--- a/drivers/ide/ide-floppy.h
+++ b/drivers/ide/ide-floppy.h
@@ -4,8 +4,6 @@
 #include "ide-gd.h"
 
 #ifdef CONFIG_IDE_GD_ATAPI
-typedef struct ide_disk_obj idefloppy_floppy_t;
-
 /*
  * Pages of the SELECT SENSE / MODE SENSE packet commands.
  * See SFF-8070i spec.
diff --git a/drivers/ide/ide-floppy_ioctl.c b/drivers/ide/ide-floppy_ioctl.c
index e8aa0a5bf5dc..409e4c15f9b7 100644
--- a/drivers/ide/ide-floppy_ioctl.c
+++ b/drivers/ide/ide-floppy_ioctl.c
@@ -113,7 +113,7 @@ static void ide_floppy_create_format_unit_cmd(struct ide_atapi_pc *pc, int b,
 
 static int ide_floppy_get_sfrp_bit(ide_drive_t *drive)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	struct ide_atapi_pc pc;
 
 	drive->atapi_flags &= ~IDE_AFLAG_SRFP;
@@ -132,7 +132,7 @@ static int ide_floppy_get_sfrp_bit(ide_drive_t *drive)
 
 static int ide_floppy_format_unit(ide_drive_t *drive, int __user *arg)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	struct ide_atapi_pc pc;
 	int blocks, length, flags, err = 0;
 
@@ -190,7 +190,7 @@ out:
 
 static int ide_floppy_get_format_progress(ide_drive_t *drive, int __user *arg)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	struct ide_atapi_pc pc;
 	int progress_indication = 0x10000;
 
@@ -226,7 +226,7 @@ static int ide_floppy_get_format_progress(ide_drive_t *drive, int __user *arg)
 static int ide_floppy_lockdoor(ide_drive_t *drive, struct ide_atapi_pc *pc,
 			       unsigned long arg, unsigned int cmd)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	struct gendisk *disk = floppy->disk;
 	int prevent = (arg && cmd != CDROMEJECT) ? 1 : 0;
 
-- 
GitLab


From 41d1a3d31d097a31380b83eea0ec10ea1d040376 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 17 Oct 2008 18:09:16 +0200
Subject: [PATCH 596/895] ide: remove broken hpt34x driver

No big loss since HPT343/363 controllers are properly supported
by pata_hpt3x3 driver from Alan Cox.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/Kconfig      |  24 +----
 drivers/ide/pci/Makefile |   1 -
 drivers/ide/pci/hpt34x.c | 193 ---------------------------------------
 3 files changed, 1 insertion(+), 217 deletions(-)
 delete mode 100644 drivers/ide/pci/hpt34x.c

diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index faa974e615da..a820ca6fc327 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -328,7 +328,7 @@ config IDEPCI_PCIBUS_ORDER
 # TODO: split it on per host driver config options (or module parameters)
 config BLK_DEV_OFFBOARD
 	bool "Boot off-board chipsets first support (DEPRECATED)"
-	depends on BLK_DEV_IDEPCI && (BLK_DEV_AEC62XX || BLK_DEV_GENERIC || BLK_DEV_HPT34X || BLK_DEV_HPT366 || BLK_DEV_PDC202XX_NEW || BLK_DEV_PDC202XX_OLD || BLK_DEV_TC86C001)
+	depends on BLK_DEV_IDEPCI && (BLK_DEV_AEC62XX || BLK_DEV_GENERIC || BLK_DEV_HPT366 || BLK_DEV_PDC202XX_NEW || BLK_DEV_PDC202XX_OLD || BLK_DEV_TC86C001)
 	help
 	  Normally, IDE controllers built into the motherboard (on-board
 	  controllers) are assigned to ide0 and ide1 while those on add-in PCI
@@ -478,28 +478,6 @@ config BLK_DEV_CS5535
 
 	  It is safe to say Y to this question.
 
-config BLK_DEV_HPT34X
-	tristate "HPT34X chipset support"
-	depends on BROKEN
-	select BLK_DEV_IDEDMA_PCI
-	help
-	  This driver adds up to 4 more EIDE devices sharing a single
-	  interrupt. The HPT343 chipset in its current form is a non-bootable
-	  controller; the HPT345/HPT363 chipset is a bootable (needs BIOS FIX)
-	  PCI UDMA controllers. This driver requires dynamic tuning of the
-	  chipset during the ide-probe at boot time. It is reported to support
-	  DVD II drives, by the manufacturer.
-
-config HPT34X_AUTODMA
-	bool "HPT34X AUTODMA support (EXPERIMENTAL)"
-	depends on BLK_DEV_HPT34X && EXPERIMENTAL
-	help
-	  This is a dangerous thing to attempt currently! Please read the
-	  comments at the top of <file:drivers/ide/pci/hpt34x.c>.  If you say Y
-	  here, then say Y to "Use DMA by default when available" as well.
-
-	  If unsure, say N.
-
 config BLK_DEV_HPT366
 	tristate "HPT36X/37X chipset support"
 	select BLK_DEV_IDEDMA_PCI
diff --git a/drivers/ide/pci/Makefile b/drivers/ide/pci/Makefile
index 02e6ee7d751d..ab44a1f5f5a9 100644
--- a/drivers/ide/pci/Makefile
+++ b/drivers/ide/pci/Makefile
@@ -11,7 +11,6 @@ obj-$(CONFIG_BLK_DEV_CS5535)		+= cs5535.o
 obj-$(CONFIG_BLK_DEV_SC1200)		+= sc1200.o
 obj-$(CONFIG_BLK_DEV_CY82C693)		+= cy82c693.o
 obj-$(CONFIG_BLK_DEV_DELKIN)		+= delkin_cb.o
-obj-$(CONFIG_BLK_DEV_HPT34X)		+= hpt34x.o
 obj-$(CONFIG_BLK_DEV_HPT366)		+= hpt366.o
 obj-$(CONFIG_BLK_DEV_IT8213)		+= it8213.o
 obj-$(CONFIG_BLK_DEV_IT821X)		+= it821x.o
diff --git a/drivers/ide/pci/hpt34x.c b/drivers/ide/pci/hpt34x.c
deleted file mode 100644
index fb1a3aa57f07..000000000000
--- a/drivers/ide/pci/hpt34x.c
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Copyright (C) 1998-2000	Andre Hedrick <andre@linux-ide.org>
- *
- * May be copied or modified under the terms of the GNU General Public License
- *
- *
- * 00:12.0 Unknown mass storage controller:
- * Triones Technologies, Inc.
- * Unknown device 0003 (rev 01)
- *
- * hde: UDMA 2 (0x0000 0x0002) (0x0000 0x0010)
- * hdf: UDMA 2 (0x0002 0x0012) (0x0010 0x0030)
- * hde: DMA 2  (0x0000 0x0002) (0x0000 0x0010)
- * hdf: DMA 2  (0x0002 0x0012) (0x0010 0x0030)
- * hdg: DMA 1  (0x0012 0x0052) (0x0030 0x0070)
- * hdh: DMA 1  (0x0052 0x0252) (0x0070 0x00f0)
- *
- * ide-pci.c reference
- *
- * Since there are two cards that report almost identically,
- * the only discernable difference is the values reported in pcicmd.
- * Booting-BIOS card or HPT363 :: pcicmd == 0x07
- * Non-bootable card or HPT343 :: pcicmd == 0x05
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/ioport.h>
-#include <linux/interrupt.h>
-#include <linux/pci.h>
-#include <linux/init.h>
-#include <linux/ide.h>
-
-#define DRV_NAME "hpt34x"
-
-#define HPT343_DEBUG_DRIVE_INFO		0
-
-static void hpt34x_set_mode(ide_drive_t *drive, const u8 speed)
-{
-	struct pci_dev *dev = to_pci_dev(drive->hwif->dev);
-	u32 reg1= 0, tmp1 = 0, reg2 = 0, tmp2 = 0;
-	u8			hi_speed, lo_speed;
-
-	hi_speed = speed >> 4;
-	lo_speed = speed & 0x0f;
-
-	if (hi_speed & 7) {
-		hi_speed = (hi_speed & 4) ? 0x01 : 0x10;
-	} else {
-		lo_speed <<= 5;
-		lo_speed >>= 5;
-	}
-
-	pci_read_config_dword(dev, 0x44, &reg1);
-	pci_read_config_dword(dev, 0x48, &reg2);
-	tmp1 = ((lo_speed << (3*drive->dn)) | (reg1 & ~(7 << (3*drive->dn))));
-	tmp2 = ((hi_speed << drive->dn) | (reg2 & ~(0x11 << drive->dn)));
-	pci_write_config_dword(dev, 0x44, tmp1);
-	pci_write_config_dword(dev, 0x48, tmp2);
-
-#if HPT343_DEBUG_DRIVE_INFO
-	printk("%s: %s drive%d (0x%04x 0x%04x) (0x%04x 0x%04x)" \
-		" (0x%02x 0x%02x)\n",
-		drive->name, ide_xfer_verbose(speed),
-		drive->dn, reg1, tmp1, reg2, tmp2,
-		hi_speed, lo_speed);
-#endif /* HPT343_DEBUG_DRIVE_INFO */
-}
-
-static void hpt34x_set_pio_mode(ide_drive_t *drive, const u8 pio)
-{
-	hpt34x_set_mode(drive, XFER_PIO_0 + pio);
-}
-
-/*
- * If the BIOS does not set the IO base addaress to XX00, 343 will fail.
- */
-#define	HPT34X_PCI_INIT_REG		0x80
-
-static unsigned int init_chipset_hpt34x(struct pci_dev *dev)
-{
-	int i = 0;
-	unsigned long hpt34xIoBase = pci_resource_start(dev, 4);
-	unsigned long hpt_addr[4] = { 0x20, 0x34, 0x28, 0x3c };
-	unsigned long hpt_addr_len[4] = { 7, 3, 7, 3 };
-	u16 cmd;
-	unsigned long flags;
-
-	local_irq_save(flags);
-
-	pci_write_config_byte(dev, HPT34X_PCI_INIT_REG, 0x00);
-	pci_read_config_word(dev, PCI_COMMAND, &cmd);
-
-	if (cmd & PCI_COMMAND_MEMORY)
-		pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0xF0);
-	else
-		pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0x20);
-
-	/*
-	 * Since 20-23 can be assigned and are R/W, we correct them.
-	 */
-	pci_write_config_word(dev, PCI_COMMAND, cmd & ~PCI_COMMAND_IO);
-	for(i=0; i<4; i++) {
-		dev->resource[i].start = (hpt34xIoBase + hpt_addr[i]);
-		dev->resource[i].end = dev->resource[i].start + hpt_addr_len[i];
-		dev->resource[i].flags = IORESOURCE_IO;
-		pci_write_config_dword(dev,
-				(PCI_BASE_ADDRESS_0 + (i * 4)),
-				dev->resource[i].start);
-	}
-	pci_write_config_word(dev, PCI_COMMAND, cmd);
-
-	local_irq_restore(flags);
-
-	return dev->irq;
-}
-
-static const struct ide_port_ops hpt34x_port_ops = {
-	.set_pio_mode		= hpt34x_set_pio_mode,
-	.set_dma_mode		= hpt34x_set_mode,
-};
-
-#define IDE_HFLAGS_HPT34X \
-	(IDE_HFLAG_NO_ATAPI_DMA | \
-	 IDE_HFLAG_NO_DSC | \
-	 IDE_HFLAG_NO_AUTODMA)
-
-static const struct ide_port_info hpt34x_chipsets[] __devinitdata = {
-	{ /* 0: HPT343 */
-		.name		= DRV_NAME,
-		.init_chipset	= init_chipset_hpt34x,
-		.port_ops	= &hpt34x_port_ops,
-		.host_flags	= IDE_HFLAGS_HPT34X | IDE_HFLAG_NON_BOOTABLE,
-		.pio_mask	= ATA_PIO5,
-	},
-	{ /* 1: HPT345 */
-		.name		= DRV_NAME,
-		.init_chipset	= init_chipset_hpt34x,
-		.port_ops	= &hpt34x_port_ops,
-		.host_flags	= IDE_HFLAGS_HPT34X | IDE_HFLAG_OFF_BOARD,
-		.pio_mask	= ATA_PIO5,
-#ifdef CONFIG_HPT34X_AUTODMA
-		.swdma_mask	= ATA_SWDMA2,
-		.mwdma_mask	= ATA_MWDMA2,
-		.udma_mask	= ATA_UDMA2,
-#endif
-	}
-};
-
-static int __devinit hpt34x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	const struct ide_port_info *d;
-	u16 pcicmd = 0;
-
-	pci_read_config_word(dev, PCI_COMMAND, &pcicmd);
-
-	d = &hpt34x_chipsets[(pcicmd & PCI_COMMAND_MEMORY) ? 1 : 0];
-
-	return ide_pci_init_one(dev, d, NULL);
-}
-
-static const struct pci_device_id hpt34x_pci_tbl[] = {
-	{ PCI_VDEVICE(TTI, PCI_DEVICE_ID_TTI_HPT343), 0 },
-	{ 0, },
-};
-MODULE_DEVICE_TABLE(pci, hpt34x_pci_tbl);
-
-static struct pci_driver hpt34x_pci_driver = {
-	.name		= "HPT34x_IDE",
-	.id_table	= hpt34x_pci_tbl,
-	.probe		= hpt34x_init_one,
-	.remove		= ide_pci_remove,
-	.suspend	= ide_pci_suspend,
-	.resume		= ide_pci_resume,
-};
-
-static int __init hpt34x_ide_init(void)
-{
-	return ide_pci_register_driver(&hpt34x_pci_driver);
-}
-
-static void __exit hpt34x_ide_exit(void)
-{
-	pci_unregister_driver(&hpt34x_pci_driver);
-}
-
-module_init(hpt34x_ide_init);
-module_exit(hpt34x_ide_exit);
-
-MODULE_AUTHOR("Andre Hedrick");
-MODULE_DESCRIPTION("PCI driver module for Highpoint 34x IDE");
-MODULE_LICENSE("GPL");
-- 
GitLab


From 8c061a40c293660793dab83b75223e6eaaa04f8b Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 17 Oct 2008 18:09:16 +0200
Subject: [PATCH 597/895] delkin_cb: add PM support

* Factor out chipset initialization code from delkin_cb_probe()
  to delkin_cb_init_chipset().

* Assign ->init_chipset in struct delkin_cb_port_info.

* Add delkin_cb_{suspend,resume}() (->suspend/->resume methods).

Fixes kernel bugzilla bug #11735:

http://bugzilla.kernel.org/show_bug.cgi?id=11735

Tested-by: bumble.bee@xs4all.nl
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/delkin_cb.c | 63 ++++++++++++++++++++++++++++++++-----
 1 file changed, 56 insertions(+), 7 deletions(-)

diff --git a/drivers/ide/pci/delkin_cb.c b/drivers/ide/pci/delkin_cb.c
index 8689a706f537..8f1b2d9f0513 100644
--- a/drivers/ide/pci/delkin_cb.c
+++ b/drivers/ide/pci/delkin_cb.c
@@ -46,10 +46,27 @@ static const struct ide_port_ops delkin_cb_port_ops = {
 	.quirkproc		= ide_undecoded_slave,
 };
 
+static unsigned int delkin_cb_init_chipset(struct pci_dev *dev)
+{
+	unsigned long base = pci_resource_start(dev, 0);
+	int i;
+
+	outb(0x02, base + 0x1e);	/* set nIEN to block interrupts */
+	inb(base + 0x17);		/* read status to clear interrupts */
+
+	for (i = 0; i < sizeof(setup); ++i) {
+		if (setup[i])
+			outb(setup[i], base + i);
+	}
+
+	return 0;
+}
+
 static const struct ide_port_info delkin_cb_port_info = {
 	.port_ops		= &delkin_cb_port_ops,
 	.host_flags		= IDE_HFLAG_IO_32BIT | IDE_HFLAG_UNMASK_IRQS |
 				  IDE_HFLAG_NO_DMA,
+	.init_chipset		= delkin_cb_init_chipset,
 };
 
 static int __devinit
@@ -57,7 +74,7 @@ delkin_cb_probe (struct pci_dev *dev, const struct pci_device_id *id)
 {
 	struct ide_host *host;
 	unsigned long base;
-	int i, rc;
+	int rc;
 	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
 
 	rc = pci_enable_device(dev);
@@ -72,12 +89,8 @@ delkin_cb_probe (struct pci_dev *dev, const struct pci_device_id *id)
 		return rc;
 	}
 	base = pci_resource_start(dev, 0);
-	outb(0x02, base + 0x1e);	/* set nIEN to block interrupts */
-	inb(base + 0x17);		/* read status to clear interrupts */
-	for (i = 0; i < sizeof(setup); ++i) {
-		if (setup[i])
-			outb(setup[i], base + i);
-	}
+
+	delkin_cb_init_chipset(dev);
 
 	memset(&hw, 0, sizeof(hw));
 	ide_std_init_ports(&hw, base + 0x10, base + 0x1e);
@@ -110,6 +123,40 @@ delkin_cb_remove (struct pci_dev *dev)
 	pci_disable_device(dev);
 }
 
+#ifdef CONFIG_PM
+static int delkin_cb_suspend(struct pci_dev *dev, pm_message_t state)
+{
+	pci_save_state(dev);
+	pci_disable_device(dev);
+	pci_set_power_state(dev, pci_choose_state(dev, state));
+
+	return 0;
+}
+
+static int delkin_cb_resume(struct pci_dev *dev)
+{
+	struct ide_host *host = pci_get_drvdata(dev);
+	int rc;
+
+	pci_set_power_state(dev, PCI_D0);
+
+	rc = pci_enable_device(dev);
+	if (rc)
+		return rc;
+
+	pci_restore_state(dev);
+	pci_set_master(dev);
+
+	if (host->init_chipset)
+		host->init_chipset(dev);
+
+	return 0;
+}
+#else
+#define delkin_cb_suspend NULL
+#define delkin_cb_resume NULL
+#endif
+
 static struct pci_device_id delkin_cb_pci_tbl[] __devinitdata = {
 	{ 0x1145, 0xf021, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
 	{ 0x1145, 0xf024, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
@@ -122,6 +169,8 @@ static struct pci_driver delkin_cb_pci_driver = {
 	.id_table	= delkin_cb_pci_tbl,
 	.probe		= delkin_cb_probe,
 	.remove		= delkin_cb_remove,
+	.suspend	= delkin_cb_suspend,
+	.resume		= delkin_cb_resume,
 };
 
 static int __init delkin_cb_init(void)
-- 
GitLab


From 8108b882329db7fbf9fbca6559aa36e6174dc91f Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Date: Fri, 17 Oct 2008 18:09:16 +0200
Subject: [PATCH 598/895] sgiioc4: kill useless address checks

The driver performs a number of checks on the virtual/physical addresses which
would always evaluate as true (except ide_dma_sgiioc4() -- always false):

- for sgiioc4_init_hwif_ports(), its caller, sgiioc4_ide_setup_pci_device(),
  guarantees that 'ctrl_port' and 'irq_port' parameters are never 0;

- in sgiioc4_read_status(), we always read the IDE status register, so there's
  no need to check the register's address (must be a leftover from the times
  when this function implemented the INB() method);

- in ide_dma_sgiioc4(), 'dma_base' can never be 0 as IOC4_DMA_OFFSET is not 0.

Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Cc: jeremy@sgi.com
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/sgiioc4.c | 26 +++++++++-----------------
 1 file changed, 9 insertions(+), 17 deletions(-)

diff --git a/drivers/ide/pci/sgiioc4.c b/drivers/ide/pci/sgiioc4.c
index 8581789352aa..f6c5ba07be71 100644
--- a/drivers/ide/pci/sgiioc4.c
+++ b/drivers/ide/pci/sgiioc4.c
@@ -101,11 +101,8 @@ sgiioc4_init_hwif_ports(hw_regs_t * hw, unsigned long data_port,
 	for (i = 0; i <= 7; i++)
 		hw->io_ports_array[i] = reg + i * 4;
 
-	if (ctrl_port)
-		hw->io_ports.ctl_addr = ctrl_port;
-
-	if (irq_port)
-		hw->io_ports.irq_addr = irq_port;
+	hw->io_ports.ctl_addr = ctrl_port;
+	hw->io_ports.irq_addr = irq_port;
 }
 
 static int
@@ -303,16 +300,14 @@ static u8 sgiioc4_read_status(ide_hwif_t *hwif)
 	unsigned long port = hwif->io_ports.status_addr;
 	u8 reg = (u8) readb((void __iomem *) port);
 
-	if ((port & 0xFFF) == 0x11C) {	/* Status register of IOC4 */
-		if (!(reg & ATA_BUSY)) { /* Not busy... check for interrupt */
-			unsigned long other_ir = port - 0x110;
-			unsigned int intr_reg = (u32) readl((void __iomem *) other_ir);
+	if (!(reg & ATA_BUSY)) {	/* Not busy... check for interrupt */
+		unsigned long other_ir = port - 0x110;
+		unsigned int intr_reg = (u32) readl((void __iomem *) other_ir);
 
-			/* Clear the Interrupt, Error bits on the IOC4 */
-			if (intr_reg & 0x03) {
-				writel(0x03, (void __iomem *) other_ir);
-				intr_reg = (u32) readl((void __iomem *) other_ir);
-			}
+		/* Clear the Interrupt, Error bits on the IOC4 */
+		if (intr_reg & 0x03) {
+			writel(0x03, (void __iomem *) other_ir);
+			intr_reg = (u32) readl((void __iomem *) other_ir);
 		}
 	}
 
@@ -329,9 +324,6 @@ ide_dma_sgiioc4(ide_hwif_t *hwif, const struct ide_port_info *d)
 	int num_ports = sizeof (ioc4_dma_regs_t);
 	void *pad;
 
-	if (dma_base == 0)
-		return -1;
-
 	printk(KERN_INFO "    %s: MMIO-DMA\n", hwif->name);
 
 	if (request_mem_region(dma_base, num_ports, hwif->name) == NULL) {
-- 
GitLab


From 107111d450541df8c2a8d3af1d538cc7cd85e81b Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Date: Fri, 17 Oct 2008 18:09:17 +0200
Subject: [PATCH 599/895] sgiioc4: kill duplicate ioremap()

By the time ide_dma_sgiioc4() gets called, sgiioc4_ide_setup_pci_device() will
have called ioremap() on the whole BAR0 region, so calling ioremap() on the DMA
registers means wasting a page. Replace this call by a mere address calculation,
based on the fact that IRQ registers (pointed to by 'hwif->io_ports.irq_addr')
are situated at offset 0 from BAR0.

Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Cc: jeremy@sgi.com
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/sgiioc4.c | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/drivers/ide/pci/sgiioc4.c b/drivers/ide/pci/sgiioc4.c
index f6c5ba07be71..8af9b23499fd 100644
--- a/drivers/ide/pci/sgiioc4.c
+++ b/drivers/ide/pci/sgiioc4.c
@@ -320,7 +320,6 @@ ide_dma_sgiioc4(ide_hwif_t *hwif, const struct ide_port_info *d)
 {
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	unsigned long dma_base = pci_resource_start(dev, 0) + IOC4_DMA_OFFSET;
-	void __iomem *virt_dma_base;
 	int num_ports = sizeof (ioc4_dma_regs_t);
 	void *pad;
 
@@ -333,14 +332,8 @@ ide_dma_sgiioc4(ide_hwif_t *hwif, const struct ide_port_info *d)
 		return -1;
 	}
 
-	virt_dma_base = ioremap(dma_base, num_ports);
-	if (virt_dma_base == NULL) {
-		printk(KERN_ERR "%s(%s) -- ERROR: unable to map addresses "
-		       "0x%lx to 0x%lx\n", __func__, hwif->name,
-		       dma_base, dma_base + num_ports - 1);
-		goto dma_remap_failure;
-	}
-	hwif->dma_base = (unsigned long) virt_dma_base;
+	hwif->dma_base = (unsigned long)hwif->io_ports.irq_addr +
+			 IOC4_DMA_OFFSET;
 
 	hwif->sg_max_nents = IOC4_PRD_ENTRIES;
 
@@ -364,9 +357,6 @@ ide_dma_sgiioc4(ide_hwif_t *hwif, const struct ide_port_info *d)
 	printk(KERN_INFO "%s: changing from DMA to PIO mode", hwif->name);
 
 dma_pci_alloc_failure:
-	iounmap(virt_dma_base);
-
-dma_remap_failure:
 	release_mem_region(dma_base, num_ports);
 
 	return -1;
-- 
GitLab


From a11e2afa77b9e1ddffc63e37cae5685683558870 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Date: Fri, 17 Oct 2008 18:09:17 +0200
Subject: [PATCH 600/895] scc_pata: kill unused variables

Fix the "unused variable" warning in init_hwif_scc() caused by the commit
48c3c1072651922ed153bcf0a33ea82cf20df390 (ide: add struct ide_host (take 3)).
Moreover, remove the write-only variable 'dma_status_port' in init_setup_scc()
about which gcc gives no warning and which has been there from the very start...

Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/scc_pata.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/ide/pci/scc_pata.c b/drivers/ide/pci/scc_pata.c
index 9ce1d8059921..49f163aa51e3 100644
--- a/drivers/ide/pci/scc_pata.c
+++ b/drivers/ide/pci/scc_pata.c
@@ -617,7 +617,6 @@ static int __devinit init_setup_scc(struct pci_dev *dev,
 	unsigned long intmask_port;
 	unsigned long mode_port;
 	unsigned long ecmode_port;
-	unsigned long dma_status_port;
 	u32 reg = 0;
 	struct scc_ports *ports;
 	int rc;
@@ -637,7 +636,6 @@ static int __devinit init_setup_scc(struct pci_dev *dev,
 	intmask_port = dma_base + 0x010;
 	mode_port = ctl_base + 0x024;
 	ecmode_port = ctl_base + 0xf00;
-	dma_status_port = dma_base + 0x004;
 
 	/* controller initialization */
 	reg = 0;
@@ -843,8 +841,6 @@ static u8 scc_cable_detect(ide_hwif_t *hwif)
 
 static void __devinit init_hwif_scc(ide_hwif_t *hwif)
 {
-	struct scc_ports *ports = ide_get_hwifdata(hwif);
-
 	/* PTERADD */
 	out_be32((void __iomem *)(hwif->dma_base + 0x018), hwif->dmatable_dma);
 
-- 
GitLab


From 769b49ce68386b21e45bb6e573b63c02020b17a1 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 17 Oct 2008 18:09:18 +0200
Subject: [PATCH 601/895] ide: re-add TRM290 fix lost during
 ide_build_dmatable() cleanup

commit 14c123f37187aba0b4e0e893a969efc6820c4170 ("ide: cleanup
ide_build_dmatable()") accidentally reverted TRM290 fix introduced
by commit 22e05b4549bf2405d6aca128540b20cd2dd33f1f ("ide-dma: fix
ide_build_dmatable() for TRM290").

Reported-by: Sergei Shtylylov <sshtylyov@ru.mvista.com>
Acked-by: Sergei Shtylylov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-dma-sff.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ide/ide-dma-sff.c b/drivers/ide/ide-dma-sff.c
index 0903782689e9..cac431f0df17 100644
--- a/drivers/ide/ide-dma-sff.c
+++ b/drivers/ide/ide-dma-sff.c
@@ -130,7 +130,7 @@ int ide_build_dmatable(ide_drive_t *drive, struct request *rq)
 			xcount = bcount & 0xffff;
 			if (is_trm290)
 				xcount = ((xcount >> 2) - 1) << 16;
-			if (xcount == 0x0000) {
+			else if (xcount == 0x0000) {
 				if (count++ >= PRD_ENTRIES)
 					goto use_pio_instead;
 				*table++ = cpu_to_le32(0x8000);
-- 
GitLab


From 719254faa17ffedc87ba0fadb9b34e535c9758d5 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 17 Oct 2008 09:59:47 +0200
Subject: [PATCH 602/895] NOHZ: unify the nohz function calls in irq_enter()

We have two separate nohz function calls in irq_enter() for no good
reason. Just call a single NOHZ function from irq_enter() and call
the bits in the tick code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/tick.h     |  7 +++----
 kernel/softirq.c         | 10 +++-------
 kernel/time/tick-sched.c | 13 ++++++++++++-
 3 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/include/linux/tick.h b/include/linux/tick.h
index 98921a3e1aa8..b6ec8189ac0c 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -96,9 +96,11 @@ extern cpumask_t *tick_get_broadcast_oneshot_mask(void);
 extern void tick_clock_notify(void);
 extern int tick_check_oneshot_change(int allow_nohz);
 extern struct tick_sched *tick_get_tick_sched(int cpu);
+extern void tick_check_idle(int cpu);
 # else
 static inline void tick_clock_notify(void) { }
 static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
+static inline void tick_check_idle(int cpu) { }
 # endif
 
 #else /* CONFIG_GENERIC_CLOCKEVENTS */
@@ -106,26 +108,23 @@ static inline void tick_init(void) { }
 static inline void tick_cancel_sched_timer(int cpu) { }
 static inline void tick_clock_notify(void) { }
 static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
+static inline void tick_check_idle(int cpu) { }
 #endif /* !CONFIG_GENERIC_CLOCKEVENTS */
 
 # ifdef CONFIG_NO_HZ
 extern void tick_nohz_stop_sched_tick(int inidle);
 extern void tick_nohz_restart_sched_tick(void);
-extern void tick_nohz_update_jiffies(void);
 extern ktime_t tick_nohz_get_sleep_length(void);
-extern void tick_nohz_stop_idle(int cpu);
 extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
 # else
 static inline void tick_nohz_stop_sched_tick(int inidle) { }
 static inline void tick_nohz_restart_sched_tick(void) { }
-static inline void tick_nohz_update_jiffies(void) { }
 static inline ktime_t tick_nohz_get_sleep_length(void)
 {
 	ktime_t len = { .tv64 = NSEC_PER_SEC/HZ };
 
 	return len;
 }
-static inline void tick_nohz_stop_idle(int cpu) { }
 static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; }
 # endif /* !NO_HZ */
 
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 37d67aa2d56f..d410014279e7 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -265,16 +265,12 @@ asmlinkage void do_softirq(void)
  */
 void irq_enter(void)
 {
-#ifdef CONFIG_NO_HZ
 	int cpu = smp_processor_id();
+
 	if (idle_cpu(cpu) && !in_interrupt())
-		tick_nohz_stop_idle(cpu);
-#endif
+		tick_check_idle(cpu);
+
 	__irq_enter();
-#ifdef CONFIG_NO_HZ
-	if (idle_cpu(cpu))
-		tick_nohz_update_jiffies();
-#endif
 }
 
 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index b711ffcb106c..fdcf3f93bb8d 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -155,7 +155,7 @@ void tick_nohz_update_jiffies(void)
 	touch_softlockup_watchdog();
 }
 
-void tick_nohz_stop_idle(int cpu)
+static void tick_nohz_stop_idle(int cpu)
 {
 	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
 
@@ -558,6 +558,17 @@ static inline void tick_nohz_switch_to_nohz(void) { }
 
 #endif /* NO_HZ */
 
+/*
+ * Called from irq_enter to notify about the possible interruption of idle()
+ */
+void tick_check_idle(int cpu)
+{
+#ifdef CONFIG_NO_HZ
+	tick_nohz_stop_idle(cpu);
+	tick_nohz_update_jiffies();
+#endif
+}
+
 /*
  * High resolution timer specific code
  */
-- 
GitLab


From c34bec5a44e9486597d78e7a686b2f9088a0564c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 17 Oct 2008 10:04:34 +0200
Subject: [PATCH 603/895] NOHZ: split tick_nohz_restart_sched_tick()

Split out the clock event device reprogramming. Preparatory
patch.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/tick-sched.c | 49 ++++++++++++++++++++++------------------
 1 file changed, 27 insertions(+), 22 deletions(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index fdcf3f93bb8d..7aedf4343539 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -377,6 +377,32 @@ ktime_t tick_nohz_get_sleep_length(void)
 	return ts->sleep_length;
 }
 
+static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
+{
+	hrtimer_cancel(&ts->sched_timer);
+	ts->sched_timer.expires = ts->idle_tick;
+
+	while (1) {
+		/* Forward the time to expire in the future */
+		hrtimer_forward(&ts->sched_timer, now, tick_period);
+
+		if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
+			hrtimer_start(&ts->sched_timer,
+				      ts->sched_timer.expires,
+				      HRTIMER_MODE_ABS);
+			/* Check, if the timer was already in the past */
+			if (hrtimer_active(&ts->sched_timer))
+				break;
+		} else {
+			if (!tick_program_event(ts->sched_timer.expires, 0))
+				break;
+		}
+		/* Update jiffies and reread time */
+		tick_do_update_jiffies64(now);
+		now = ktime_get();
+	}
+}
+
 /**
  * tick_nohz_restart_sched_tick - restart the idle tick from the idle task
  *
@@ -430,28 +456,7 @@ void tick_nohz_restart_sched_tick(void)
 	 */
 	ts->tick_stopped  = 0;
 	ts->idle_exittime = now;
-	hrtimer_cancel(&ts->sched_timer);
-	ts->sched_timer.expires = ts->idle_tick;
-
-	while (1) {
-		/* Forward the time to expire in the future */
-		hrtimer_forward(&ts->sched_timer, now, tick_period);
-
-		if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
-			hrtimer_start(&ts->sched_timer,
-				      ts->sched_timer.expires,
-				      HRTIMER_MODE_ABS);
-			/* Check, if the timer was already in the past */
-			if (hrtimer_active(&ts->sched_timer))
-				break;
-		} else {
-			if (!tick_program_event(ts->sched_timer.expires, 0))
-				break;
-		}
-		/* Update jiffies and reread time */
-		tick_do_update_jiffies64(now);
-		now = ktime_get();
-	}
+	tick_nohz_restart(ts, now);
 	local_irq_enable();
 }
 
-- 
GitLab


From fb02fbc14d17837b4b7b02dbb36142c16a7bf208 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 17 Oct 2008 10:01:23 +0200
Subject: [PATCH 604/895] NOHZ: restart tick device from irq_enter()

We did not restart the tick device from irq_enter() to avoid double
reprogramming and extra events in the return immediate to idle case.

But long lasting softirqs can lead to a situation where jiffies become
stale:

idle()
  tick stopped (reprogrammed to next pending timer)
  halt()
   interrupt
     jiffies updated from irq_enter()
     interrupt handler
     softirq function 1 runs 20ms
     softirq function 2 arms a 10ms timer with a stale jiffies value
     jiffies updated from irq_exit()
     timer wheel has now an already expired timer
     (the one added in function 2)
     timer fires and timer softirq runs

This was discovered when debugging a timer problem which happend only
when the ath5k driver is active. The debugging proved that there is a
softirq function running for more than 20ms, which is a bug by itself.

To solve this we restart the tick timer right from irq_enter(), but do
not go through the other functions which are necessary to return from
idle when need_resched() is set.

Reported-by: Elias Oltmanns <eo@nebensachen.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Elias Oltmanns <eo@nebensachen.de>
---
 kernel/time/tick-broadcast.c | 13 +++++++++++++
 kernel/time/tick-internal.h  |  2 ++
 kernel/time/tick-sched.c     | 31 +++++++++++++++++++++++--------
 3 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index cb01cd8f919b..f98a1b7b16e9 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -383,6 +383,19 @@ int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
 	return 0;
 }
 
+/*
+ * Called from irq_enter() when idle was interrupted to reenable the
+ * per cpu device.
+ */
+void tick_check_oneshot_broadcast(int cpu)
+{
+	if (cpu_isset(cpu, tick_broadcast_oneshot_mask)) {
+		struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
+
+		clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT);
+	}
+}
+
 /*
  * Handle oneshot mode broadcasting
  */
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 469248782c23..b1c05bf75ee0 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -36,6 +36,7 @@ extern void tick_broadcast_switch_to_oneshot(void);
 extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup);
 extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc);
 extern int tick_broadcast_oneshot_active(void);
+extern void tick_check_oneshot_broadcast(int cpu);
 # else /* BROADCAST */
 static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
 {
@@ -45,6 +46,7 @@ static inline void tick_broadcast_oneshot_control(unsigned long reason) { }
 static inline void tick_broadcast_switch_to_oneshot(void) { }
 static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
 static inline int tick_broadcast_oneshot_active(void) { return 0; }
+static inline void tick_check_oneshot_broadcast(int cpu) { }
 # endif /* !BROADCAST */
 
 #else /* !ONESHOT */
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 7aedf4343539..0581c11fe6c6 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -508,10 +508,6 @@ static void tick_nohz_handler(struct clock_event_device *dev)
 	update_process_times(user_mode(regs));
 	profile_tick(CPU_PROFILING);
 
-	/* Do not restart, when we are in the idle loop */
-	if (ts->tick_stopped)
-		return;
-
 	while (tick_nohz_reprogram(ts, now)) {
 		now = ktime_get();
 		tick_do_update_jiffies64(now);
@@ -557,6 +553,27 @@ static void tick_nohz_switch_to_nohz(void)
 	       smp_processor_id());
 }
 
+/*
+ * When NOHZ is enabled and the tick is stopped, we need to kick the
+ * tick timer from irq_enter() so that the jiffies update is kept
+ * alive during long running softirqs. That's ugly as hell, but
+ * correctness is key even if we need to fix the offending softirq in
+ * the first place.
+ *
+ * Note, this is different to tick_nohz_restart. We just kick the
+ * timer and do not touch the other magic bits which need to be done
+ * when idle is left.
+ */
+static void tick_nohz_kick_tick(int cpu)
+{
+	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+
+	if (!ts->tick_stopped)
+		return;
+
+	tick_nohz_restart(ts, ktime_get());
+}
+
 #else
 
 static inline void tick_nohz_switch_to_nohz(void) { }
@@ -568,9 +585,11 @@ static inline void tick_nohz_switch_to_nohz(void) { }
  */
 void tick_check_idle(int cpu)
 {
+	tick_check_oneshot_broadcast(cpu);
 #ifdef CONFIG_NO_HZ
 	tick_nohz_stop_idle(cpu);
 	tick_nohz_update_jiffies();
+	tick_nohz_kick_tick(cpu);
 #endif
 }
 
@@ -627,10 +646,6 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
 		profile_tick(CPU_PROFILING);
 	}
 
-	/* Do not restart, when we are in the idle loop */
-	if (ts->tick_stopped)
-		return HRTIMER_NORESTART;
-
 	hrtimer_forward(timer, now, tick_period);
 
 	return HRTIMER_RESTART;
-- 
GitLab


From 18de9735300756e3ca9c361ef58409d8561dfe0d Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 16 Oct 2008 17:41:11 -0400
Subject: [PATCH 605/895] NFS: Enable NFSv4 callback server to listen on
 AF_INET6 sockets

Allow the NFS callback server to listen for requests via an AF_INET6 or
AF_INET socket when IPv6 support is present in the kernel.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/callback.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 6a09760c5960..c2e9cfd9e5a4 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -40,6 +40,16 @@ unsigned short nfs_callback_tcpport;
 static const int nfs_set_port_min = 0;
 static const int nfs_set_port_max = 65535;
 
+/*
+ * If the kernel has IPv6 support available, always listen for
+ * both AF_INET and AF_INET6 requests.
+ */
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static const sa_family_t	nfs_callback_family = AF_INET6;
+#else
+static const sa_family_t	nfs_callback_family = AF_INET;
+#endif
+
 static int param_set_port(const char *val, struct kernel_param *kp)
 {
 	char *endp;
@@ -106,7 +116,7 @@ int nfs_callback_up(void)
 	if (nfs_callback_info.users++ || nfs_callback_info.task != NULL)
 		goto out;
 	serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE,
-				AF_INET, NULL);
+				nfs_callback_family, NULL);
 	ret = -ENOMEM;
 	if (!serv)
 		goto out_err;
@@ -116,7 +126,8 @@ int nfs_callback_up(void)
 	if (ret <= 0)
 		goto out_err;
 	nfs_callback_tcpport = ret;
-	dprintk("Callback port = 0x%x\n", nfs_callback_tcpport);
+	dprintk("NFS: Callback listener port = %u (af %u)\n",
+			nfs_callback_tcpport, nfs_callback_family);
 
 	nfs_callback_info.rqst = svc_prepare_thread(serv, &serv->sv_pools[0]);
 	if (IS_ERR(nfs_callback_info.rqst)) {
@@ -149,8 +160,8 @@ out:
 	mutex_unlock(&nfs_callback_mutex);
 	return ret;
 out_err:
-	dprintk("Couldn't create callback socket or server thread; err = %d\n",
-		ret);
+	dprintk("NFS: Couldn't create callback socket or server thread; "
+		"err = %d\n", ret);
 	nfs_callback_info.users--;
 	goto out;
 }
-- 
GitLab


From 504e518953a330c8d44a95bdd65a5c9f50f1012e Mon Sep 17 00:00:00 2001
From: Neil Brown <neilb@suse.de>
Date: Thu, 16 Oct 2008 14:15:16 +1100
Subject: [PATCH 606/895] Make nfs_file_cred more robust.

As not all files have an associated open_context (e.g. device special
files), it is safest to test for the existence of the open context
before de-referencing it.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c      | 6 ++++--
 include/linux/nfs_fs.h | 8 ++++++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c910413eaeca..83e700a2b0c0 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1659,8 +1659,10 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 		struct nfs_open_context *ctx;
 
 		ctx = nfs_file_open_context(sattr->ia_file);
-		cred = ctx->cred;
-		state = ctx->state;
+		if (ctx) {
+			cred = ctx->cred;
+			state = ctx->state;
+		}
 	}
 
 	status = nfs4_do_setattr(inode, cred, fattr, sattr, state);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index ac8d0233b05c..4eaa8347a0d9 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -367,8 +367,12 @@ static inline struct nfs_open_context *nfs_file_open_context(struct file *filp)
 
 static inline struct rpc_cred *nfs_file_cred(struct file *file)
 {
-	if (file != NULL)
-		return nfs_file_open_context(file)->cred;
+	if (file != NULL) {
+		struct nfs_open_context *ctx =
+			nfs_file_open_context(file);
+		if (ctx)
+			return ctx->cred;
+	}
 	return NULL;
 }
 
-- 
GitLab


From ec9a05c94c7cd00b4f0ab126c3d394f2fc2e4712 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Fri, 17 Oct 2008 10:44:37 -0400
Subject: [PATCH 607/895] NFS: use correct fs type for v4 submounts and
 referrals

Signed-off-by: Andy Adamson<andros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/super.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 8b28b95c9e44..a3b0061dfd45 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2459,7 +2459,7 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
 		compare_super = NULL;
 
 	/* Get a superblock - note that we may end up sharing one that already exists */
-	s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata);
+	s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
 		goto out_err_nosb;
@@ -2544,7 +2544,7 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
 		compare_super = NULL;
 
 	/* Get a superblock - note that we may end up sharing one that already exists */
-	s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata);
+	s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
 		goto out_err_nosb;
-- 
GitLab


From e7f4b8f1a5893ff8296b5b581e16a0b96f60a3b5 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Fri, 17 Oct 2008 16:20:07 -0500
Subject: [PATCH 608/895] 9p: Improve debug support

The new debug support lacks some of the information that the previous fcprint
code provided -- this patch focuses on better presentation of debug data along
with more helpful debug along error paths.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 include/net/9p/9p.h |  4 +--
 net/9p/client.c     | 76 ++++++++++++++++++++++++++++++++-------------
 net/9p/protocol.c   | 21 +++++++++----
 3 files changed, 72 insertions(+), 29 deletions(-)

diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index cb5bc731e885..d2c60c73619d 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -27,8 +27,6 @@
 #ifndef NET_9P_H
 #define NET_9P_H
 
-#ifdef CONFIG_NET_9P_DEBUG
-
 /**
  * enum p9_debug_flags - bits for mount time debug parameter
  * @P9_DEBUG_ERROR: more verbose error messages including original error string
@@ -55,10 +53,12 @@ enum p9_debug_flags {
 	P9_DEBUG_SLABS =      	(1<<7),
 	P9_DEBUG_FCALL =	(1<<8),
 	P9_DEBUG_FID =		(1<<9),
+	P9_DEBUG_PKT =		(1<<10),
 };
 
 extern unsigned int p9_debug_level;
 
+#ifdef CONFIG_NET_9P_DEBUG
 #define P9_DPRINTK(level, format, arg...) \
 do {  \
 	if ((p9_debug_level & level) == level) {\
diff --git a/net/9p/client.c b/net/9p/client.c
index 2a166bfb95a3..bbac2f72b4d2 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -389,8 +389,8 @@ p9_parse_header(struct p9_fcall *pdu, int32_t *size, int8_t *type, int16_t *tag,
 	pdu->id = r_type;
 	pdu->tag = r_tag;
 
-	P9_DPRINTK(P9_DEBUG_MUX, "pdu: type: %d tag: %d size=%d offset=%d\n",
-				pdu->id, pdu->tag, pdu->size, pdu->offset);
+	P9_DPRINTK(P9_DEBUG_9P, "<<< size=%d type: %d tag: %d\n", pdu->size,
+							pdu->id, pdu->tag);
 
 	if (type)
 		*type = r_type;
@@ -672,6 +672,7 @@ int p9_client_version(struct p9_client *c)
 	err = p9pdu_readf(req->rc, c->dotu, "ds", &msize, &version);
 	if (err) {
 		P9_DPRINTK(P9_DEBUG_9P, "version error %d\n", err);
+		p9pdu_dump(1, req->rc);
 		goto error;
 	}
 
@@ -810,6 +811,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
 
 	err = p9pdu_readf(req->rc, clnt->dotu, "Q", &qid);
 	if (err) {
+		p9pdu_dump(1, req->rc);
 		p9_free_req(clnt, req);
 		goto error;
 	}
@@ -856,6 +858,7 @@ p9_client_auth(struct p9_client *clnt, char *uname, u32 n_uname, char *aname)
 
 	err = p9pdu_readf(req->rc, clnt->dotu, "Q", &qid);
 	if (err) {
+		p9pdu_dump(1, req->rc);
 		p9_free_req(clnt, req);
 		goto error;
 	}
@@ -910,9 +913,12 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames,
 	}
 
 	err = p9pdu_readf(req->rc, clnt->dotu, "R", &nwqids, &wqids);
-	p9_free_req(clnt, req);
-	if (err)
+	if (err) {
+		p9pdu_dump(1, req->rc);
+		p9_free_req(clnt, req);
 		goto clunk_fid;
+	}
+	p9_free_req(clnt, req);
 
 	P9_DPRINTK(P9_DEBUG_9P, "<<< RWALK nwqid %d:\n", nwqids);
 
@@ -967,9 +973,10 @@ int p9_client_open(struct p9_fid *fid, int mode)
 	}
 
 	err = p9pdu_readf(req->rc, clnt->dotu, "Qd", &qid, &iounit);
-	p9_free_req(clnt, req);
-	if (err)
-		goto error;
+	if (err) {
+		p9pdu_dump(1, req->rc);
+		goto free_and_error;
+	}
 
 	P9_DPRINTK(P9_DEBUG_9P, "<<< ROPEN qid %x.%llx.%x iounit %x\n",
 				qid.type, qid.path, qid.version, iounit);
@@ -977,6 +984,8 @@ int p9_client_open(struct p9_fid *fid, int mode)
 	fid->mode = mode;
 	fid->iounit = iounit;
 
+free_and_error:
+	p9_free_req(clnt, req);
 error:
 	return err;
 }
@@ -1007,9 +1016,10 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode,
 	}
 
 	err = p9pdu_readf(req->rc, clnt->dotu, "Qd", &qid, &iounit);
-	p9_free_req(clnt, req);
-	if (err)
-		goto error;
+	if (err) {
+		p9pdu_dump(1, req->rc);
+		goto free_and_error;
+	}
 
 	P9_DPRINTK(P9_DEBUG_9P, "<<< RCREATE qid %x.%llx.%x iounit %x\n",
 				qid.type, qid.path, qid.version, iounit);
@@ -1017,6 +1027,8 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode,
 	fid->mode = mode;
 	fid->iounit = iounit;
 
+free_and_error:
+	p9_free_req(clnt, req);
 error:
 	return err;
 }
@@ -1103,8 +1115,10 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
 	}
 
 	err = p9pdu_readf(req->rc, clnt->dotu, "D", &count, &dataptr);
-	if (err)
+	if (err) {
+		p9pdu_dump(1, req->rc);
 		goto free_and_error;
+	}
 
 	P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
 
@@ -1163,8 +1177,11 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,
 	}
 
 	err = p9pdu_readf(req->rc, clnt->dotu, "d", &count);
-	if (err)
+	if (err) {
+		p9pdu_dump(1, req->rc);
 		goto free_and_error;
+	}
+
 	P9_DPRINTK(P9_DEBUG_9P, "<<< RWRITE count %d\n", count);
 
 	p9_free_req(clnt, req);
@@ -1200,20 +1217,27 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
 	}
 
 	err = p9pdu_readf(req->rc, clnt->dotu, "wS", &ignored, ret);
-	p9_free_req(clnt, req);
-	if (err)
-		goto error;
+	if (err) {
+		ret = ERR_PTR(err);
+		p9pdu_dump(1, req->rc);
+		goto free_and_error;
+	}
 
 	P9_DPRINTK(P9_DEBUG_9P,
-		"<<< RSTAT sz=%x type=%x dev=%x qid=%2.2x %4.4x %8.8llx"
-		" mode=%8.8x uid=%d gid=%d size=%lld %s\n",
+		"<<< RSTAT sz=%x type=%x dev=%x qid=%x.%llx.%x\n"
+		"<<<    mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n"
+		"<<<    name=%s uid=%s gid=%s muid=%s extension=(%s)\n"
+		"<<<    uid=%d gid=%d n_muid=%d\n",
 		ret->size, ret->type, ret->dev, ret->qid.type,
-		ret->qid.version, ret->qid.path, ret->mode,
-		ret->n_uid, ret->n_gid, ret->length, ret->name);
+		ret->qid.path, ret->qid.version, ret->mode,
+		ret->atime, ret->mtime, ret->length, ret->name,
+		ret->uid, ret->gid, ret->muid, ret->extension,
+		ret->n_uid, ret->n_gid, ret->n_muid);
 
-	return ret;
+free_and_error:
+	p9_free_req(clnt, req);
 error:
-	return ERR_PTR(err);
+	return ret;
 }
 EXPORT_SYMBOL(p9_client_stat);
 
@@ -1224,6 +1248,16 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)
 	struct p9_client *clnt;
 
 	P9_DPRINTK(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid);
+	P9_DPRINTK(P9_DEBUG_9P,
+		"     sz=%x type=%x dev=%x qid=%x.%llx.%x\n"
+		"     mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n"
+		"     name=%s uid=%s gid=%s muid=%s extension=(%s)\n"
+		"     uid=%d gid=%d n_muid=%d\n",
+		wst->size, wst->type, wst->dev, wst->qid.type,
+		wst->qid.path, wst->qid.version, wst->mode,
+		wst->atime, wst->mtime, wst->length, wst->name,
+		wst->uid, wst->gid, wst->muid, wst->extension,
+		wst->n_uid, wst->n_gid, wst->n_muid);
 	err = 0;
 	clnt = fid->clnt;
 
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 92cb60bb191b..84fa21271876 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -28,6 +28,7 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/uaccess.h>
+#include <linux/sched.h>
 #include <net/9p/9p.h>
 #include <net/9p/client.h>
 #include "protocol.h"
@@ -52,8 +53,6 @@
 static int
 p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...);
 
-#define PACKET_DEBUG 0
-
 void
 p9pdu_dump(int way, struct p9_fcall *pdu)
 {
@@ -78,9 +77,9 @@ p9pdu_dump(int way, struct p9_fcall *pdu)
 	n += scnprintf(buf + n, buflen - n, "\n");
 
 	if (way)
-		printk(KERN_NOTICE "[[(%d)[ %s\n", datalen, buf);
+		P9_DPRINTK(P9_DEBUG_PKT, "[[[(%d) %s\n", datalen, buf);
 	else
-		printk(KERN_NOTICE "]](%d)] %s\n", datalen, buf);
+		P9_DPRINTK(P9_DEBUG_PKT, "]]](%d) %s\n", datalen, buf);
 }
 EXPORT_SYMBOL(p9pdu_dump);
 
@@ -512,13 +511,20 @@ p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...)
 int p9stat_read(char *buf, int len, struct p9_wstat *st, int dotu)
 {
 	struct p9_fcall fake_pdu;
+	int ret;
 
 	fake_pdu.size = len;
 	fake_pdu.capacity = len;
 	fake_pdu.sdata = buf;
 	fake_pdu.offset = 0;
 
-	return p9pdu_readf(&fake_pdu, dotu, "S", st);
+	ret = p9pdu_readf(&fake_pdu, dotu, "S", st);
+	if (ret) {
+		P9_DPRINTK(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret);
+		p9pdu_dump(1, &fake_pdu);
+	}
+
+	return ret;
 }
 EXPORT_SYMBOL(p9stat_read);
 
@@ -536,9 +542,12 @@ int p9pdu_finalize(struct p9_fcall *pdu)
 	err = p9pdu_writef(pdu, 0, "d", size);
 	pdu->size = size;
 
-	if (PACKET_DEBUG)
+	if ((p9_debug_level & P9_DEBUG_PKT) == P9_DEBUG_PKT)
 		p9pdu_dump(0, pdu);
 
+	P9_DPRINTK(P9_DEBUG_9P, ">>> size=%d type: %d tag: %d\n", pdu->size,
+							pdu->id, pdu->tag);
+
 	return err;
 }
 
-- 
GitLab


From 57c7b4e68edf3b4fe7f977db9ad437e0f7f7c382 Mon Sep 17 00:00:00 2001
From: Magnus Deininger <dma05@web.de>
Date: Fri, 17 Oct 2008 12:44:46 -0500
Subject: [PATCH 609/895] 9p: fix device file handling

In v9fs_get_inode(), for block, as well as char devices (in theory),
the function init_special_inode() is called to set up callback functions
for file ops. this function uses the file mode's value to determine whether
to use block or char dev functions. In v9fs_inode_from_fid(), the function
p9mode2unixmode() is used, but for all devices it initially returns S_IFBLK,
then uses v9fs_get_inode() to initialise a new inode, then finally uses
v9fs_stat2inode(), which would determine whether the inode is a block or
character device. However, at that point init_special_inode() had already
decided to use the block device functions, so even if the inode's mode is
turned to a character device, the block functions are still used to operate
on them. The attached patch simply calls init_special_inode() again for devices
after parsing device node data in v9fs_stat2inode() so that the proper functions
are used.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/vfs_inode.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index e96d84aafbe2..8314d3f43b71 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -864,6 +864,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
 				stat->extension);
 		};
 		inode->i_rdev = MKDEV(major, minor);
+		init_special_inode(inode, inode->i_mode, inode->i_rdev);
 	} else
 		inode->i_rdev = 0;
 
-- 
GitLab


From f0a0ac2ee50c62cf4ad9b06cf8a12435cc5ac44d Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Fri, 17 Oct 2008 12:45:23 -0500
Subject: [PATCH 610/895] 9p: fix oops in protocol stat parsing error path.

When we get an error on parsing a stat due to a protocol bug,
we can generate an oops during cleanup because we didn't
initialize the string pointers in the stat structure.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/protocol.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 84fa21271876..29be52439086 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -215,9 +215,9 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 				struct p9_wstat *stbuf =
 				    va_arg(ap, struct p9_wstat *);
 
-				stbuf->extension = NULL;
+				memset(stbuf, 0, sizeof(struct p9_wstat));
 				stbuf->n_uid = stbuf->n_gid = stbuf->n_muid =
-				    -1;
+									-1;
 				errcode =
 				    p9pdu_readf(pdu, optional,
 						"wwdQdddqssss?sddd",
-- 
GitLab


From 7eb923b80c8ce16697129fb2dcdfaeabf83f0dbc Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Fri, 17 Oct 2008 12:45:40 -0500
Subject: [PATCH 611/895] 9p: add more conservative locking

During the reorganization some of the multi-theaded locking assumptions were
accidently relaxed.  This patch moves us back towards a more conservative
locking strategy.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_fd.c | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index e8ebe2cb7e8b..be65d8242fd2 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -188,8 +188,16 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
 	LIST_HEAD(cancel_list);
 
 	P9_DPRINTK(P9_DEBUG_ERROR, "mux %p err %d\n", m, err);
-	m->err = err;
+
 	spin_lock_irqsave(&m->client->lock, flags);
+
+	if (m->err) {
+		spin_unlock_irqrestore(&m->client->lock, flags);
+		return;
+	}
+
+	m->err = err;
+
 	list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
 		req->status = REQ_STATUS_ERROR;
 		if (!req->t_err)
@@ -352,8 +360,9 @@ static void p9_read_work(struct work_struct *work)
 	/* not an else because some packets (like clunk) have no payload */
 	if ((m->req) && (m->rpos == m->rsize)) { /* packet is read in */
 		P9_DPRINTK(P9_DEBUG_TRANS, "got new packet\n");
-
+		spin_lock(&m->client->lock);
 		list_del(&m->req->req_list);
+		spin_unlock(&m->client->lock);
 		p9_client_cb(m->client, m->req);
 
 		m->rbuf = NULL;
@@ -651,9 +660,8 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
 	if (m->err < 0)
 		return m->err;
 
-	req->status = REQ_STATUS_UNSENT;
-
 	spin_lock(&client->lock);
+	req->status = REQ_STATUS_UNSENT;
 	list_add_tail(&req->req_list, &m->unsent_req_list);
 	spin_unlock(&client->lock);
 
@@ -672,19 +680,21 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
 {
 	struct p9_trans_fd *ts = client->trans;
 	struct p9_conn *m = ts->conn;
+	int ret = 1;
 
 	P9_DPRINTK(P9_DEBUG_TRANS, "mux %p req %p\n", m, req);
 
 	spin_lock(&client->lock);
 	list_del(&req->req_list);
-	spin_unlock(&client->lock);
 
 	if (req->status == REQ_STATUS_UNSENT) {
 		req->status = REQ_STATUS_FLSHD;
-		return 0;
+		ret = 0;
 	}
 
-	return 1;
+	spin_unlock(&client->lock);
+
+	return ret;
 }
 
 /**
-- 
GitLab


From 81e192d6ce303b6792aa38ff35f41a1a7357f23a Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Fri, 17 Oct 2008 18:48:36 +0000
Subject: [PATCH 612/895] parisc: convert to generic compat_sys_ptrace

This patch does the compat_sys_ptrace conversion for parisc.
In addition it does convert the parisc ptrace code to use the
architecture-independent ptrace infrastructure instead of own coding.

Signed-off-by: Helge Deller <deller@gmx.de>
Signed-off-by: Kyle McMartin <kyle@mcmartin.ca>
---
 arch/parisc/include/asm/ptrace.h   |  10 +
 arch/parisc/kernel/ptrace.c        | 429 ++++++++++++-----------------
 arch/parisc/kernel/syscall_table.S |   2 +-
 3 files changed, 183 insertions(+), 258 deletions(-)

diff --git a/arch/parisc/include/asm/ptrace.h b/arch/parisc/include/asm/ptrace.h
index 3e94c5d85ff5..afa5333187b4 100644
--- a/arch/parisc/include/asm/ptrace.h
+++ b/arch/parisc/include/asm/ptrace.h
@@ -47,6 +47,16 @@ struct pt_regs {
 
 #define task_regs(task) ((struct pt_regs *) ((char *)(task) + TASK_REGS))
 
+#define __ARCH_WANT_COMPAT_SYS_PTRACE
+
+struct task_struct;
+#define arch_has_single_step()	1
+void user_disable_single_step(struct task_struct *task);
+void user_enable_single_step(struct task_struct *task);
+
+#define arch_has_block_step()	1
+void user_enable_block_step(struct task_struct *task);
+
 /* XXX should we use iaoq[1] or iaoq[0] ? */
 #define user_mode(regs)			(((regs)->iaoq[0] & 3) ? 1 : 0)
 #define user_space(regs)		(((regs)->iasq[1] != 0) ? 1 : 0)
diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c
index 49c637970789..90904f9dfc50 100644
--- a/arch/parisc/kernel/ptrace.c
+++ b/arch/parisc/kernel/ptrace.c
@@ -4,6 +4,7 @@
  * Copyright (C) 2000 Hewlett-Packard Co, Linuxcare Inc.
  * Copyright (C) 2000 Matthew Wilcox <matthew@wil.cx>
  * Copyright (C) 2000 David Huggins-Daines <dhd@debian.org>
+ * Copyright (C) 2008 Helge Deller <deller@gmx.de>
  */
 
 #include <linux/kernel.h>
@@ -27,15 +28,149 @@
 /* PSW bits we allow the debugger to modify */
 #define USER_PSW_BITS	(PSW_N | PSW_V | PSW_CB)
 
-#undef DEBUG_PTRACE
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Make sure single step bits etc are not set.
+ */
+void ptrace_disable(struct task_struct *task)
+{
+	task->ptrace &= ~(PT_SINGLESTEP|PT_BLOCKSTEP);
 
-#ifdef DEBUG_PTRACE
-#define DBG(x...)	printk(x)
-#else
-#define DBG(x...)
-#endif
+	/* make sure the trap bits are not set */
+	pa_psw(task)->r = 0;
+	pa_psw(task)->t = 0;
+	pa_psw(task)->h = 0;
+	pa_psw(task)->l = 0;
+}
+
+/*
+ * The following functions are called by ptrace_resume() when
+ * enabling or disabling single/block tracing.
+ */
+void user_disable_single_step(struct task_struct *task)
+{
+	ptrace_disable(task);
+}
+
+void user_enable_single_step(struct task_struct *task)
+{
+	task->ptrace &= ~PT_BLOCKSTEP;
+	task->ptrace |= PT_SINGLESTEP;
+
+	if (pa_psw(task)->n) {
+		struct siginfo si;
+
+		/* Nullified, just crank over the queue. */
+		task_regs(task)->iaoq[0] = task_regs(task)->iaoq[1];
+		task_regs(task)->iasq[0] = task_regs(task)->iasq[1];
+		task_regs(task)->iaoq[1] = task_regs(task)->iaoq[0] + 4;
+		pa_psw(task)->n = 0;
+		pa_psw(task)->x = 0;
+		pa_psw(task)->y = 0;
+		pa_psw(task)->z = 0;
+		pa_psw(task)->b = 0;
+		ptrace_disable(task);
+		/* Don't wake up the task, but let the
+		   parent know something happened. */
+		si.si_code = TRAP_TRACE;
+		si.si_addr = (void __user *) (task_regs(task)->iaoq[0] & ~3);
+		si.si_signo = SIGTRAP;
+		si.si_errno = 0;
+		force_sig_info(SIGTRAP, &si, task);
+		/* notify_parent(task, SIGCHLD); */
+		return;
+	}
+
+	/* Enable recovery counter traps.  The recovery counter
+	 * itself will be set to zero on a task switch.  If the
+	 * task is suspended on a syscall then the syscall return
+	 * path will overwrite the recovery counter with a suitable
+	 * value such that it traps once back in user space.  We
+	 * disable interrupts in the tasks PSW here also, to avoid
+	 * interrupts while the recovery counter is decrementing.
+	 */
+	pa_psw(task)->r = 1;
+	pa_psw(task)->t = 0;
+	pa_psw(task)->h = 0;
+	pa_psw(task)->l = 0;
+}
+
+void user_enable_block_step(struct task_struct *task)
+{
+	task->ptrace &= ~PT_SINGLESTEP;
+	task->ptrace |= PT_BLOCKSTEP;
+
+	/* Enable taken branch trap. */
+	pa_psw(task)->r = 0;
+	pa_psw(task)->t = 1;
+	pa_psw(task)->h = 0;
+	pa_psw(task)->l = 0;
+}
+
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+{
+	unsigned long tmp;
+	long ret = -EIO;
 
-#ifdef CONFIG_64BIT
+	switch (request) {
+
+	/* Read the word at location addr in the USER area.  For ptraced
+	   processes, the kernel saves all regs on a syscall. */
+	case PTRACE_PEEKUSR:
+		if ((addr & (sizeof(long)-1)) ||
+		    (unsigned long) addr >= sizeof(struct pt_regs))
+			break;
+		tmp = *(unsigned long *) ((char *) task_regs(child) + addr);
+		ret = put_user(tmp, (unsigned long *) data);
+		break;
+
+	/* Write the word at location addr in the USER area.  This will need
+	   to change when the kernel no longer saves all regs on a syscall.
+	   FIXME.  There is a problem at the moment in that r3-r18 are only
+	   saved if the process is ptraced on syscall entry, and even then
+	   those values are overwritten by actual register values on syscall
+	   exit. */
+	case PTRACE_POKEUSR:
+		/* Some register values written here may be ignored in
+		 * entry.S:syscall_restore_rfi; e.g. iaoq is written with
+		 * r31/r31+4, and not with the values in pt_regs.
+		 */
+		if (addr == PT_PSW) {
+			/* Allow writing to Nullify, Divide-step-correction,
+			 * and carry/borrow bits.
+			 * BEWARE, if you set N, and then single step, it won't
+			 * stop on the nullified instruction.
+			 */
+			data &= USER_PSW_BITS;
+			task_regs(child)->gr[0] &= ~USER_PSW_BITS;
+			task_regs(child)->gr[0] |= data;
+			ret = 0;
+			break;
+		}
+
+		if ((addr & (sizeof(long)-1)) ||
+		    (unsigned long) addr >= sizeof(struct pt_regs))
+			break;
+		if ((addr >= PT_GR1 && addr <= PT_GR31) ||
+				addr == PT_IAOQ0 || addr == PT_IAOQ1 ||
+				(addr >= PT_FR0 && addr <= PT_FR31 + 4) ||
+				addr == PT_SAR) {
+			*(unsigned long *) ((char *) task_regs(child) + addr) = data;
+			ret = 0;
+		}
+		break;
+
+	default:
+		ret = ptrace_request(child, request, addr, data);
+		break;
+	}
+
+	return ret;
+}
+
+
+#ifdef CONFIG_COMPAT
 
 /* This function is needed to translate 32 bit pt_regs offsets in to
  * 64 bit pt_regs offsets.  For example, a 32 bit gdb under a 64 bit kernel
@@ -61,106 +196,25 @@ static long translate_usr_offset(long offset)
 	else
 		return -1;
 }
-#endif
 
-/*
- * Called by kernel/ptrace.c when detaching..
- *
- * Make sure single step bits etc are not set.
- */
-void ptrace_disable(struct task_struct *child)
+long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
+			compat_ulong_t addr, compat_ulong_t data)
 {
-	/* make sure the trap bits are not set */
-	pa_psw(child)->r = 0;
-	pa_psw(child)->t = 0;
-	pa_psw(child)->h = 0;
-	pa_psw(child)->l = 0;
-}
-
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
-{
-	long ret;
-#ifdef DEBUG_PTRACE
-	long oaddr=addr, odata=data;
-#endif
+	compat_uint_t tmp;
+	long ret = -EIO;
 
 	switch (request) {
-	case PTRACE_PEEKTEXT: /* read word at location addr. */ 
-	case PTRACE_PEEKDATA: {
-#ifdef CONFIG_64BIT
-		if (__is_compat_task(child)) {
-			int copied;
-			unsigned int tmp;
-
-			addr &= 0xffffffffL;
-			copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
-			ret = -EIO;
-			if (copied != sizeof(tmp))
-				goto out_tsk;
-			ret = put_user(tmp,(unsigned int *) data);
-			DBG("sys_ptrace(PEEK%s, %d, %lx, %lx) returning %ld, data %x\n",
-				request == PTRACE_PEEKTEXT ? "TEXT" : "DATA",
-				pid, oaddr, odata, ret, tmp);
-		}
-		else
-#endif
-			ret = generic_ptrace_peekdata(child, addr, data);
-		goto out_tsk;
-	}
 
-	/* when I and D space are separate, this will have to be fixed. */
-	case PTRACE_POKETEXT: /* write the word at location addr. */
-	case PTRACE_POKEDATA:
-		ret = 0;
-#ifdef CONFIG_64BIT
-		if (__is_compat_task(child)) {
-			unsigned int tmp = (unsigned int)data;
-			DBG("sys_ptrace(POKE%s, %d, %lx, %lx)\n",
-				request == PTRACE_POKETEXT ? "TEXT" : "DATA",
-				pid, oaddr, odata);
-			addr &= 0xffffffffL;
-			if (access_process_vm(child, addr, &tmp, sizeof(tmp), 1) == sizeof(tmp))
-				goto out_tsk;
-		}
-		else
-#endif
-		{
-			if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data))
-				goto out_tsk;
-		}
-		ret = -EIO;
-		goto out_tsk;
-
-	/* Read the word at location addr in the USER area.  For ptraced
-	   processes, the kernel saves all regs on a syscall. */
-	case PTRACE_PEEKUSR: {
-		ret = -EIO;
-#ifdef CONFIG_64BIT
-		if (__is_compat_task(child)) {
-			unsigned int tmp;
-
-			if (addr & (sizeof(int)-1))
-				goto out_tsk;
-			if ((addr = translate_usr_offset(addr)) < 0)
-				goto out_tsk;
-
-			tmp = *(unsigned int *) ((char *) task_regs(child) + addr);
-			ret = put_user(tmp, (unsigned int *) data);
-			DBG("sys_ptrace(PEEKUSR, %d, %lx, %lx) returning %ld, addr %lx, data %x\n",
-				pid, oaddr, odata, ret, addr, tmp);
-		}
-		else
-#endif
-		{
-			unsigned long tmp;
+	case PTRACE_PEEKUSR:
+		if (addr & (sizeof(compat_uint_t)-1))
+			break;
+		addr = translate_usr_offset(addr);
+		if (addr < 0)
+			break;
 
-			if ((addr & (sizeof(long)-1)) || (unsigned long) addr >= sizeof(struct pt_regs))
-				goto out_tsk;
-			tmp = *(unsigned long *) ((char *) task_regs(child) + addr);
-			ret = put_user(tmp, (unsigned long *) data);
-		}
-		goto out_tsk;
-	}
+		tmp = *(compat_uint_t *) ((char *) task_regs(child) + addr);
+		ret = put_user(tmp, (compat_uint_t *) (unsigned long) data);
+		break;
 
 	/* Write the word at location addr in the USER area.  This will need
 	   to change when the kernel no longer saves all regs on a syscall.
@@ -169,185 +223,46 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 	   those values are overwritten by actual register values on syscall
 	   exit. */
 	case PTRACE_POKEUSR:
-		ret = -EIO;
 		/* Some register values written here may be ignored in
 		 * entry.S:syscall_restore_rfi; e.g. iaoq is written with
 		 * r31/r31+4, and not with the values in pt_regs.
 		 */
-		 /* PT_PSW=0, so this is valid for 32 bit processes under 64
-		 * bit kernels.
-		 */
 		if (addr == PT_PSW) {
-			/* PT_PSW=0, so this is valid for 32 bit processes
-			 * under 64 bit kernels.
-			 *
-			 * Allow writing to Nullify, Divide-step-correction,
-			 * and carry/borrow bits.
-			 * BEWARE, if you set N, and then single step, it won't
-			 * stop on the nullified instruction.
+			/* Since PT_PSW==0, it is valid for 32 bit processes
+			 * under 64 bit kernels as well.
 			 */
-			DBG("sys_ptrace(POKEUSR, %d, %lx, %lx)\n",
-				pid, oaddr, odata);
-			data &= USER_PSW_BITS;
-			task_regs(child)->gr[0] &= ~USER_PSW_BITS;
-			task_regs(child)->gr[0] |= data;
-			ret = 0;
-			goto out_tsk;
-		}
-#ifdef CONFIG_64BIT
-		if (__is_compat_task(child)) {
-			if (addr & (sizeof(int)-1))
-				goto out_tsk;
-			if ((addr = translate_usr_offset(addr)) < 0)
-				goto out_tsk;
-			DBG("sys_ptrace(POKEUSR, %d, %lx, %lx) addr %lx\n",
-				pid, oaddr, odata, addr);
+			ret = arch_ptrace(child, request, addr, data);
+		} else {
+			if (addr & (sizeof(compat_uint_t)-1))
+				break;
+			addr = translate_usr_offset(addr);
+			if (addr < 0)
+				break;
 			if (addr >= PT_FR0 && addr <= PT_FR31 + 4) {
 				/* Special case, fp regs are 64 bits anyway */
-				*(unsigned int *) ((char *) task_regs(child) + addr) = data;
+				*(__u64 *) ((char *) task_regs(child) + addr) = data;
 				ret = 0;
 			}
 			else if ((addr >= PT_GR1+4 && addr <= PT_GR31+4) ||
 					addr == PT_IAOQ0+4 || addr == PT_IAOQ1+4 ||
 					addr == PT_SAR+4) {
 				/* Zero the top 32 bits */
-				*(unsigned int *) ((char *) task_regs(child) + addr - 4) = 0;
-				*(unsigned int *) ((char *) task_regs(child) + addr) = data;
+				*(__u32 *) ((char *) task_regs(child) + addr - 4) = 0;
+				*(__u32 *) ((char *) task_regs(child) + addr) = data;
 				ret = 0;
 			}
-			goto out_tsk;
 		}
-		else
-#endif
-		{
-			if ((addr & (sizeof(long)-1)) || (unsigned long) addr >= sizeof(struct pt_regs))
-				goto out_tsk;
-			if ((addr >= PT_GR1 && addr <= PT_GR31) ||
-					addr == PT_IAOQ0 || addr == PT_IAOQ1 ||
-					(addr >= PT_FR0 && addr <= PT_FR31 + 4) ||
-					addr == PT_SAR) {
-				*(unsigned long *) ((char *) task_regs(child) + addr) = data;
-				ret = 0;
-			}
-			goto out_tsk;
-		}
-
-	case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
-	case PTRACE_CONT:
-		ret = -EIO;
-		DBG("sys_ptrace(%s)\n",
-			request == PTRACE_SYSCALL ? "SYSCALL" : "CONT");
-		if (!valid_signal(data))
-			goto out_tsk;
-		child->ptrace &= ~(PT_SINGLESTEP|PT_BLOCKSTEP);
-		if (request == PTRACE_SYSCALL) {
-			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		} else {
-			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		}		
-		child->exit_code = data;
-		goto out_wake_notrap;
-
-	case PTRACE_KILL:
-		/*
-		 * make the child exit.  Best I can do is send it a
-		 * sigkill.  perhaps it should be put in the status
-		 * that it wants to exit.
-		 */
-		ret = 0;
-		DBG("sys_ptrace(KILL)\n");
-		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
-			goto out_tsk;
-		child->exit_code = SIGKILL;
-		goto out_wake_notrap;
-
-	case PTRACE_SINGLEBLOCK:
-		DBG("sys_ptrace(SINGLEBLOCK)\n");
-		ret = -EIO;
-		if (!valid_signal(data))
-			goto out_tsk;
-		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		child->ptrace &= ~PT_SINGLESTEP;
-		child->ptrace |= PT_BLOCKSTEP;
-		child->exit_code = data;
-
-		/* Enable taken branch trap. */
-		pa_psw(child)->r = 0;
-		pa_psw(child)->t = 1;
-		pa_psw(child)->h = 0;
-		pa_psw(child)->l = 0;
-		goto out_wake;
-
-	case PTRACE_SINGLESTEP:
-		DBG("sys_ptrace(SINGLESTEP)\n");
-		ret = -EIO;
-		if (!valid_signal(data))
-			goto out_tsk;
-
-		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		child->ptrace &= ~PT_BLOCKSTEP;
-		child->ptrace |= PT_SINGLESTEP;
-		child->exit_code = data;
-
-		if (pa_psw(child)->n) {
-			struct siginfo si;
-
-			/* Nullified, just crank over the queue. */
-			task_regs(child)->iaoq[0] = task_regs(child)->iaoq[1];
-			task_regs(child)->iasq[0] = task_regs(child)->iasq[1];
-			task_regs(child)->iaoq[1] = task_regs(child)->iaoq[0] + 4;
-			pa_psw(child)->n = 0;
-			pa_psw(child)->x = 0;
-			pa_psw(child)->y = 0;
-			pa_psw(child)->z = 0;
-			pa_psw(child)->b = 0;
-			ptrace_disable(child);
-			/* Don't wake up the child, but let the
-			   parent know something happened. */
-			si.si_code = TRAP_TRACE;
-			si.si_addr = (void __user *) (task_regs(child)->iaoq[0] & ~3);
-			si.si_signo = SIGTRAP;
-			si.si_errno = 0;
-			force_sig_info(SIGTRAP, &si, child);
-			//notify_parent(child, SIGCHLD);
-			//ret = 0;
-			goto out_wake;
-		}
-
-		/* Enable recovery counter traps.  The recovery counter
-		 * itself will be set to zero on a task switch.  If the
-		 * task is suspended on a syscall then the syscall return
-		 * path will overwrite the recovery counter with a suitable
-		 * value such that it traps once back in user space.  We
-		 * disable interrupts in the childs PSW here also, to avoid
-		 * interrupts while the recovery counter is decrementing.
-		 */
-		pa_psw(child)->r = 1;
-		pa_psw(child)->t = 0;
-		pa_psw(child)->h = 0;
-		pa_psw(child)->l = 0;
-		/* give it a chance to run. */
-		goto out_wake;
-
-	case PTRACE_GETEVENTMSG:
-                ret = put_user(child->ptrace_message, (unsigned int __user *) data);
-		goto out_tsk;
+		break;
 
 	default:
-		ret = ptrace_request(child, request, addr, data);
-		goto out_tsk;
+		ret = compat_ptrace_request(child, request, addr, data);
+		break;
 	}
 
-out_wake_notrap:
-	ptrace_disable(child);
-out_wake:
-	wake_up_process(child);
-	ret = 0;
-out_tsk:
-	DBG("arch_ptrace(%ld, %d, %lx, %lx) returning %ld\n",
-		request, pid, oaddr, odata, ret);
 	return ret;
 }
+#endif
+
 
 void syscall_trace(void)
 {
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index 6084667eacf9..4ed01f2d655d 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -87,7 +87,7 @@
 	ENTRY_SAME(setuid)
 	ENTRY_SAME(getuid)
 	ENTRY_COMP(stime)		/* 25 */
-	ENTRY_SAME(ptrace)
+	ENTRY_COMP(ptrace)
 	ENTRY_SAME(alarm)
 	/* see stat comment */
 	ENTRY_COMP(newfstat)
-- 
GitLab


From ecd5b3102322011610a2521c389ab5804c811837 Mon Sep 17 00:00:00 2001
From: Richard Purdie <rpurdie@rpsys.net>
Date: Sat, 26 Jul 2008 09:17:41 +0100
Subject: [PATCH 613/895] [MTD] mtdoops: Fix an off by one error

Fix an off by one error in the mtdoops driver

Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdoops.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index 5a680e1e61f1..f40e45727aed 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -99,7 +99,7 @@ static void mtdoops_inc_counter(struct mtdoops_context *cxt)
 	int ret;
 
 	cxt->nextpage++;
-	if (cxt->nextpage > cxt->oops_pages)
+	if (cxt->nextpage >= cxt->oops_pages)
 		cxt->nextpage = 0;
 	cxt->nextcount++;
 	if (cxt->nextcount == 0xffffffff)
@@ -141,7 +141,7 @@ static void mtdoops_workfunc_erase(struct work_struct *work)
 	mod = (cxt->nextpage * OOPS_PAGE_SIZE) % mtd->erasesize;
 	if (mod != 0) {
 		cxt->nextpage = cxt->nextpage + ((mtd->erasesize - mod) / OOPS_PAGE_SIZE);
-		if (cxt->nextpage > cxt->oops_pages)
+		if (cxt->nextpage >= cxt->oops_pages)
 			cxt->nextpage = 0;
 	}
 
@@ -158,7 +158,7 @@ badblock:
 				cxt->nextpage * OOPS_PAGE_SIZE);
 		i++;
 		cxt->nextpage = cxt->nextpage + (mtd->erasesize / OOPS_PAGE_SIZE);
-		if (cxt->nextpage > cxt->oops_pages)
+		if (cxt->nextpage >= cxt->oops_pages)
 			cxt->nextpage = 0;
 		if (i == (cxt->oops_pages / (mtd->erasesize / OOPS_PAGE_SIZE))) {
 			printk(KERN_ERR "mtdoops: All blocks bad!\n");
-- 
GitLab


From f0482ee3669a78bdb1e15b9f9c58a9f1ffc5a997 Mon Sep 17 00:00:00 2001
From: Richard Purdie <rpurdie@rpsys.net>
Date: Sat, 26 Jul 2008 09:22:45 +0100
Subject: [PATCH 614/895] [MTD] mtdoops: Add a magic number to logged kernel
 oops

Add a magic number to logged kernel oops messages so that they
can be more accurately detected rather than just having to rely
on the sequence number. This also allows easier detection of
saved crashes by userspace.

Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdoops.c | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index f40e45727aed..6f6b2f3c70df 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -33,6 +33,7 @@
 #include <linux/interrupt.h>
 #include <linux/mtd/mtd.h>
 
+#define MTDOOPS_KERNMSG_MAGIC 0x5d005d00
 #define OOPS_PAGE_SIZE 4096
 
 static struct mtdoops_context {
@@ -224,31 +225,33 @@ static void find_next_position(struct mtdoops_context *cxt)
 {
 	struct mtd_info *mtd = cxt->mtd;
 	int ret, page, maxpos = 0;
-	u32 count, maxcount = 0xffffffff;
+	u32 count[2], maxcount = 0xffffffff;
 	size_t retlen;
 
 	for (page = 0; page < cxt->oops_pages; page++) {
-		ret = mtd->read(mtd, page * OOPS_PAGE_SIZE, 4, &retlen, (u_char *) &count);
-		if ((retlen != 4) || ((ret < 0) && (ret != -EUCLEAN))) {
-			printk(KERN_ERR "mtdoops: Read failure at %d (%td of 4 read)"
+		ret = mtd->read(mtd, page * OOPS_PAGE_SIZE, 8, &retlen, (u_char *) &count[0]);
+		if ((retlen != 8) || ((ret < 0) && (ret != -EUCLEAN))) {
+			printk(KERN_ERR "mtdoops: Read failure at %d (%td of 8 read)"
 				", err %d.\n", page * OOPS_PAGE_SIZE, retlen, ret);
 			continue;
 		}
 
-		if (count == 0xffffffff)
+		if (count[1] != MTDOOPS_KERNMSG_MAGIC)
+			continue;
+		if (count[0] == 0xffffffff)
 			continue;
 		if (maxcount == 0xffffffff) {
-			maxcount = count;
+			maxcount = count[0];
 			maxpos = page;
-		} else if ((count < 0x40000000) && (maxcount > 0xc0000000)) {
-			maxcount = count;
+		} else if ((count[0] < 0x40000000) && (maxcount > 0xc0000000)) {
+			maxcount = count[0];
 			maxpos = page;
-		} else if ((count > maxcount) && (count < 0xc0000000)) {
-			maxcount = count;
+		} else if ((count[0] > maxcount) && (count[0] < 0xc0000000)) {
+			maxcount = count[0];
 			maxpos = page;
-		} else if ((count > maxcount) && (count > 0xc0000000)
+		} else if ((count[0] > maxcount) && (count[0] > 0xc0000000)
 					&& (maxcount > 0x80000000)) {
-			maxcount = count;
+			maxcount = count[0];
 			maxpos = page;
 		}
 	}
@@ -358,8 +361,9 @@ mtdoops_console_write(struct console *co, const char *s, unsigned int count)
 
 	if (cxt->writecount == 0) {
 		u32 *stamp = cxt->oops_buf;
-		*stamp = cxt->nextcount;
-		cxt->writecount = 4;
+		*stamp++ = cxt->nextcount;
+		*stamp = MTDOOPS_KERNMSG_MAGIC;
+		cxt->writecount = 8;
 	}
 
 	if ((count + cxt->writecount) > OOPS_PAGE_SIZE)
-- 
GitLab


From 43b5693d404127697d62962def8c1bfe3a89811a Mon Sep 17 00:00:00 2001
From: Richard Purdie <rpurdie@rpsys.net>
Date: Sat, 26 Jul 2008 09:25:18 +0100
Subject: [PATCH 615/895] [MTD] mtdoops: Fix a bug where block may not be
 erased

This makes the driver erase a block when it doesn't find any
existing saved log messages which is safer than assuming the
flash was already erased.

Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdoops.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index 6f6b2f3c70df..aebb3b27edbd 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -258,9 +258,7 @@ static void find_next_position(struct mtdoops_context *cxt)
 	if (maxcount == 0xffffffff) {
 		cxt->nextpage = 0;
 		cxt->nextcount = 1;
-		cxt->ready = 1;
-		printk(KERN_DEBUG "mtdoops: Ready %d, %d (first init)\n",
-				cxt->nextpage, cxt->nextcount);
+		schedule_work(&cxt->work_erase);
 		return;
 	}
 
-- 
GitLab


From 5bf1723723487ddb0b9c9641b6559da96b27cc93 Mon Sep 17 00:00:00 2001
From: Alexander Belyakov <abelyako@mail.ru>
Date: Fri, 17 Oct 2008 19:19:13 +0400
Subject: [PATCH 616/895] [JFFS2] Write buffer offset adjustment for NOR-ECC
 (Sibley) flash

After choosing new c->nextblock, don't leave the wbuf offset field
occasionally pointing at the start of the next physical eraseblock.
This was causing a BUG() on NOR-ECC (Sibley) flash, where we start
writing after the cleanmarker.

Among other this fix should cover write buffer offset adjustment
after flushing the last page of an eraseblock.

Signed-off-by: Alexander Belyakov <abelyako@googlemail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 fs/jffs2/nodemgmt.c | 4 ++++
 fs/jffs2/wbuf.c     | 5 +----
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index a9bf9603c1ba..0875b60b4bf7 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -261,6 +261,10 @@ static int jffs2_find_nextblock(struct jffs2_sb_info *c)
 
 	jffs2_sum_reset_collected(c->summary); /* reset collected summary */
 
+	/* adjust write buffer offset, else we get a non contiguous write bug */
+	if (!(c->wbuf_ofs % c->sector_size) && !c->wbuf_len)
+		c->wbuf_ofs = 0xffffffff;
+
 	D1(printk(KERN_DEBUG "jffs2_find_nextblock(): new nextblock = 0x%08x\n", c->nextblock->offset));
 
 	return 0;
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index 0e78b00035e4..d9a721e6db70 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -679,10 +679,7 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad)
 
 	memset(c->wbuf,0xff,c->wbuf_pagesize);
 	/* adjust write buffer offset, else we get a non contiguous write bug */
-	if (SECTOR_ADDR(c->wbuf_ofs) == SECTOR_ADDR(c->wbuf_ofs+c->wbuf_pagesize))
-		c->wbuf_ofs += c->wbuf_pagesize;
-	else
-		c->wbuf_ofs = 0xffffffff;
+	c->wbuf_ofs += c->wbuf_pagesize;
 	c->wbuf_len = 0;
 	return 0;
 }
-- 
GitLab


From 6b8520296d67622dfa225436ef325f7dfd6ece8e Mon Sep 17 00:00:00 2001
From: Manish Katiyar <mkatiyar@gmail.com>
Date: Tue, 14 Oct 2008 23:43:29 +0530
Subject: [PATCH 617/895] [MTD] [NAND] Fix compilation warnings in
 drivers/mtd/nand/cs553x_nand.c

Below patch fixes the following compilation warnings.
drivers/mtd/nand/cs553x_nand.c:293: warning: unused variable 'mtd_parts'
drivers/mtd/nand/cs553x_nand.c:292: warning: unused variable 'mtd_parts_nb'

Signed-off-by: Manish Katiyar <mkatiyar@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/cs553x_nand.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/mtd/nand/cs553x_nand.c b/drivers/mtd/nand/cs553x_nand.c
index 3370a800fd36..9f1b451005ca 100644
--- a/drivers/mtd/nand/cs553x_nand.c
+++ b/drivers/mtd/nand/cs553x_nand.c
@@ -289,8 +289,10 @@ static int __init cs553x_init(void)
 	int i;
 	uint64_t val;
 
+#ifdef CONFIG_MTD_PARTITIONS
 	int mtd_parts_nb = 0;
 	struct mtd_partition *mtd_parts = NULL;
+#endif
 
 	/* If the CPU isn't a Geode GX or LX, abort */
 	if (!is_geode())
-- 
GitLab


From 87e92c062b19eea6054532f8143a91242f104a6f Mon Sep 17 00:00:00 2001
From: Christopher Moore <moore@free.fr>
Date: Fri, 17 Oct 2008 05:32:22 +0200
Subject: [PATCH 618/895] [MTD] cfi_cmdset_0002.c: Add Macronix CFI V1.0
 TopBottom detection

This patch adds TopBottom detection for most Macronix chips with CFI V1.0.

The main purpose of this patch is to add detection of the MX29LV400C B
used on the LaCie Ethernet Disk mini V2 NAS.

It detects the following parts correctly:-
MX28F640C3B T
MX29LV002C  B
MX29LV002NC B
MX29LV004C  T
MX29LV400C  T/B
MX29LV800C  T/B
MX29LV160C  T/B
MX29SL800C  T/B
MX29SL802C  T/B

It detects the following uniform part as bottom but it should work
correctly:-
MX29LV040C

For T parts it causes the erase block table to be reversed correctly.
For other parts it avoids the bogus "Assuming top" message.

It does not detect the following correctly:-
MX28F640C3B B
MX29LV002C  T
MX29LV002NC T
MX29LV004C  B
MX29SL400C  T/B
MX29SL402C  T/B

If desired I could supply a more complicated patch to handle these as
well.

Only the MX29LV400C B has been physically tested; others were checked
against their data sheets.

Signed-off-by: Christopher Moore <moore@free.fr>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/chips/cfi_cmdset_0002.c | 36 +++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index a972cc6be436..db16b7b0723f 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -13,6 +13,8 @@
  * XIP support hooks by Vitaly Wool (based on code for Intel flash
  * by Nicolas Pitre)
  *
+ * 25/09/2008 Christopher Moore: TopBottom fixup for many Macronix with CFI V1.0
+ *
  * Occasionally maintained by Thayne Harbaugh tharbaugh at lnxi dot com
  *
  * This code is GPL
@@ -43,6 +45,7 @@
 
 #define MANUFACTURER_AMD	0x0001
 #define MANUFACTURER_ATMEL	0x001F
+#define MANUFACTURER_MACRONIX	0x00C2
 #define MANUFACTURER_SST	0x00BF
 #define SST49LF004B	        0x0060
 #define SST49LF040B	        0x0050
@@ -144,12 +147,44 @@ static void fixup_amd_bootblock(struct mtd_info *mtd, void* param)
 
 	if (((major << 8) | minor) < 0x3131) {
 		/* CFI version 1.0 => don't trust bootloc */
+
+		DEBUG(MTD_DEBUG_LEVEL1,
+			"%s: JEDEC Vendor ID is 0x%02X Device ID is 0x%02X\n",
+			map->name, cfi->mfr, cfi->id);
+
+		/* AFAICS all 29LV400 with a bottom boot block have a device ID
+		 * of 0x22BA in 16-bit mode and 0xBA in 8-bit mode.
+		 * These were badly detected as they have the 0x80 bit set
+		 * so treat them as a special case.
+		 */
+		if (((cfi->id == 0xBA) || (cfi->id == 0x22BA)) &&
+
+			/* Macronix added CFI to their 2nd generation
+			 * MX29LV400C B/T but AFAICS no other 29LV400 (AMD,
+			 * Fujitsu, Spansion, EON, ESI and older Macronix)
+			 * has CFI.
+			 *
+			 * Therefore also check the manufacturer.
+			 * This reduces the risk of false detection due to
+			 * the 8-bit device ID.
+			 */
+			(cfi->mfr == MANUFACTURER_MACRONIX)) {
+			DEBUG(MTD_DEBUG_LEVEL1,
+				"%s: Macronix MX29LV400C with bottom boot block"
+				" detected\n", map->name);
+			extp->TopBottom = 2;	/* bottom boot */
+		} else
 		if (cfi->id & 0x80) {
 			printk(KERN_WARNING "%s: JEDEC Device ID is 0x%02X. Assuming broken CFI table.\n", map->name, cfi->id);
 			extp->TopBottom = 3;	/* top boot */
 		} else {
 			extp->TopBottom = 2;	/* bottom boot */
 		}
+
+		DEBUG(MTD_DEBUG_LEVEL1,
+			"%s: AMD CFI PRI V%c.%c has no boot block field;"
+			" deduced %s from Device ID\n", map->name, major, minor,
+			extp->TopBottom == 2 ? "bottom" : "top");
 	}
 }
 #endif
@@ -243,6 +278,7 @@ static struct cfi_fixup cfi_fixup_table[] = {
 	{ CFI_MFR_ATMEL, CFI_ID_ANY, fixup_convert_atmel_pri, NULL },
 #ifdef AMD_BOOTLOC_BUG
 	{ CFI_MFR_AMD, CFI_ID_ANY, fixup_amd_bootblock, NULL },
+	{ MANUFACTURER_MACRONIX, CFI_ID_ANY, fixup_amd_bootblock, NULL },
 #endif
 	{ CFI_MFR_AMD, 0x0050, fixup_use_secsi, NULL, },
 	{ CFI_MFR_AMD, 0x0053, fixup_use_secsi, NULL, },
-- 
GitLab


From a0ee24a03b1c06813c814b9f70946c8984752f01 Mon Sep 17 00:00:00 2001
From: Philip Rakity <prakity@yahoo.com>
Date: Wed, 8 Oct 2008 16:32:11 -0700
Subject: [PATCH 619/895] [MTD] cmdlineparts documentation change - explain
 where mtd-id comes from

Signed-off-by: Philip Rakity <prakity@yahoo.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/cmdlinepart.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mtd/cmdlinepart.c b/drivers/mtd/cmdlinepart.c
index 71bc07f149b7..50a340388e74 100644
--- a/drivers/mtd/cmdlinepart.c
+++ b/drivers/mtd/cmdlinepart.c
@@ -7,6 +7,7 @@
  *
  * mtdparts=<mtddef>[;<mtddef]
  * <mtddef>  := <mtd-id>:<partdef>[,<partdef>]
+ *              where <mtd-id> is the name from the "cat /proc/mtd" command
  * <partdef> := <size>[@offset][<name>][ro][lk]
  * <mtd-id>  := unique name used in mapping driver/device (mtd->name)
  * <size>    := standard linux memsize OR "-" to denote all remaining space
-- 
GitLab


From aaf7ea20000436df3cbb397ccb734ad1e2e5164d Mon Sep 17 00:00:00 2001
From: Mike Rapoport <mike@compulab.co.il>
Date: Wed, 15 Oct 2008 08:38:49 +0200
Subject: [PATCH 620/895] [MTD] [NAND] GPIO NAND flash driver

The patch adds support for NAND flashes connected to GPIOs.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Mike Rapoport <mike@compulab.co.il>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/Kconfig      |   6 +
 drivers/mtd/nand/Makefile     |   1 +
 drivers/mtd/nand/gpio.c       | 375 ++++++++++++++++++++++++++++++++++
 include/linux/mtd/nand-gpio.h |  19 ++
 4 files changed, 401 insertions(+)
 create mode 100644 drivers/mtd/nand/gpio.c
 create mode 100644 include/linux/mtd/nand-gpio.h

diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 89b4d39386ab..b9eed9925462 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -56,6 +56,12 @@ config MTD_NAND_H1900
 	help
 	  This enables the driver for the iPAQ h1900 flash.
 
+config MTD_NAND_GPIO
+	tristate "GPIO NAND Flash driver"
+	depends on GENERIC_GPIO
+	help
+	  This enables a GPIO based NAND flash driver.
+
 config MTD_NAND_SPIA
 	tristate "NAND Flash device on SPIA board"
 	depends on ARCH_P720T
diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index 9bfeca324b32..b661586afbfc 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_MTD_NAND_NANDSIM)		+= nandsim.o
 obj-$(CONFIG_MTD_NAND_CS553X)		+= cs553x_nand.o
 obj-$(CONFIG_MTD_NAND_NDFC)		+= ndfc.o
 obj-$(CONFIG_MTD_NAND_ATMEL)		+= atmel_nand.o
+obj-$(CONFIG_MTD_NAND_GPIO)		+= gpio.o
 obj-$(CONFIG_MTD_NAND_CM_X270)		+= cmx270_nand.o
 obj-$(CONFIG_MTD_NAND_BASLER_EXCITE)	+= excite_nandflash.o
 obj-$(CONFIG_MTD_NAND_PXA3xx)		+= pxa3xx_nand.o
diff --git a/drivers/mtd/nand/gpio.c b/drivers/mtd/nand/gpio.c
new file mode 100644
index 000000000000..8f902e75aa85
--- /dev/null
+++ b/drivers/mtd/nand/gpio.c
@@ -0,0 +1,375 @@
+/*
+ * drivers/mtd/nand/gpio.c
+ *
+ * Updated, and converted to generic GPIO based driver by Russell King.
+ *
+ * Written by Ben Dooks <ben@simtec.co.uk>
+ *   Based on 2.4 version by Mark Whittaker
+ *
+ * Â© 2004 Simtec Electronics
+ *
+ * Device driver for NAND connected via GPIO
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/gpio.h>
+#include <linux/io.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/nand-gpio.h>
+
+struct gpiomtd {
+	void __iomem		*io_sync;
+	struct mtd_info		mtd_info;
+	struct nand_chip	nand_chip;
+	struct gpio_nand_platdata plat;
+};
+
+#define gpio_nand_getpriv(x) container_of(x, struct gpiomtd, mtd_info)
+
+
+#ifdef CONFIG_ARM
+/* gpio_nand_dosync()
+ *
+ * Make sure the GPIO state changes occur in-order with writes to NAND
+ * memory region.
+ * Needed on PXA due to bus-reordering within the SoC itself (see section on
+ * I/O ordering in PXA manual (section 2.3, p35)
+ */
+static void gpio_nand_dosync(struct gpiomtd *gpiomtd)
+{
+	unsigned long tmp;
+
+	if (gpiomtd->io_sync) {
+		/*
+		 * Linux memory barriers don't cater for what's required here.
+		 * What's required is what's here - a read from a separate
+		 * region with a dependency on that read.
+		 */
+		tmp = readl(gpiomtd->io_sync);
+		asm volatile("mov %1, %0\n" : "=r" (tmp) : "r" (tmp));
+	}
+}
+#else
+static inline void gpio_nand_dosync(struct gpiomtd *gpiomtd) {}
+#endif
+
+static void gpio_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
+{
+	struct gpiomtd *gpiomtd = gpio_nand_getpriv(mtd);
+
+	gpio_nand_dosync(gpiomtd);
+
+	if (ctrl & NAND_CTRL_CHANGE) {
+		gpio_set_value(gpiomtd->plat.gpio_nce, !(ctrl & NAND_NCE));
+		gpio_set_value(gpiomtd->plat.gpio_cle, !!(ctrl & NAND_CLE));
+		gpio_set_value(gpiomtd->plat.gpio_ale, !!(ctrl & NAND_ALE));
+		gpio_nand_dosync(gpiomtd);
+	}
+	if (cmd == NAND_CMD_NONE)
+		return;
+
+	writeb(cmd, gpiomtd->nand_chip.IO_ADDR_W);
+	gpio_nand_dosync(gpiomtd);
+}
+
+static void gpio_nand_writebuf(struct mtd_info *mtd, const u_char *buf, int len)
+{
+	struct nand_chip *this = mtd->priv;
+
+	writesb(this->IO_ADDR_W, buf, len);
+}
+
+static void gpio_nand_readbuf(struct mtd_info *mtd, u_char *buf, int len)
+{
+	struct nand_chip *this = mtd->priv;
+
+	readsb(this->IO_ADDR_R, buf, len);
+}
+
+static int gpio_nand_verifybuf(struct mtd_info *mtd, const u_char *buf, int len)
+{
+	struct nand_chip *this = mtd->priv;
+	unsigned char read, *p = (unsigned char *) buf;
+	int i, err = 0;
+
+	for (i = 0; i < len; i++) {
+		read = readb(this->IO_ADDR_R);
+		if (read != p[i]) {
+			pr_debug("%s: err at %d (read %04x vs %04x)\n",
+			       __func__, i, read, p[i]);
+			err = -EFAULT;
+		}
+	}
+	return err;
+}
+
+static void gpio_nand_writebuf16(struct mtd_info *mtd, const u_char *buf,
+				 int len)
+{
+	struct nand_chip *this = mtd->priv;
+
+	if (IS_ALIGNED((unsigned long)buf, 2)) {
+		writesw(this->IO_ADDR_W, buf, len>>1);
+	} else {
+		int i;
+		unsigned short *ptr = (unsigned short *)buf;
+
+		for (i = 0; i < len; i += 2, ptr++)
+			writew(*ptr, this->IO_ADDR_W);
+	}
+}
+
+static void gpio_nand_readbuf16(struct mtd_info *mtd, u_char *buf, int len)
+{
+	struct nand_chip *this = mtd->priv;
+
+	if (IS_ALIGNED((unsigned long)buf, 2)) {
+		readsw(this->IO_ADDR_R, buf, len>>1);
+	} else {
+		int i;
+		unsigned short *ptr = (unsigned short *)buf;
+
+		for (i = 0; i < len; i += 2, ptr++)
+			*ptr = readw(this->IO_ADDR_R);
+	}
+}
+
+static int gpio_nand_verifybuf16(struct mtd_info *mtd, const u_char *buf,
+				 int len)
+{
+	struct nand_chip *this = mtd->priv;
+	unsigned short read, *p = (unsigned short *) buf;
+	int i, err = 0;
+	len >>= 1;
+
+	for (i = 0; i < len; i++) {
+		read = readw(this->IO_ADDR_R);
+		if (read != p[i]) {
+			pr_debug("%s: err at %d (read %04x vs %04x)\n",
+			       __func__, i, read, p[i]);
+			err = -EFAULT;
+		}
+	}
+	return err;
+}
+
+
+static int gpio_nand_devready(struct mtd_info *mtd)
+{
+	struct gpiomtd *gpiomtd = gpio_nand_getpriv(mtd);
+	return gpio_get_value(gpiomtd->plat.gpio_rdy);
+}
+
+static int __devexit gpio_nand_remove(struct platform_device *dev)
+{
+	struct gpiomtd *gpiomtd = platform_get_drvdata(dev);
+	struct resource *res;
+
+	nand_release(&gpiomtd->mtd_info);
+
+	res = platform_get_resource(dev, IORESOURCE_MEM, 1);
+	iounmap(gpiomtd->io_sync);
+	if (res)
+		release_mem_region(res->start, res->end - res->start + 1);
+
+	res = platform_get_resource(dev, IORESOURCE_MEM, 0);
+	iounmap(gpiomtd->nand_chip.IO_ADDR_R);
+	release_mem_region(res->start, res->end - res->start + 1);
+
+	if (gpio_is_valid(gpiomtd->plat.gpio_nwp))
+		gpio_set_value(gpiomtd->plat.gpio_nwp, 0);
+	gpio_set_value(gpiomtd->plat.gpio_nce, 1);
+
+	gpio_free(gpiomtd->plat.gpio_cle);
+	gpio_free(gpiomtd->plat.gpio_ale);
+	gpio_free(gpiomtd->plat.gpio_nce);
+	if (gpio_is_valid(gpiomtd->plat.gpio_nwp))
+		gpio_free(gpiomtd->plat.gpio_nwp);
+	gpio_free(gpiomtd->plat.gpio_rdy);
+
+	kfree(gpiomtd);
+
+	return 0;
+}
+
+static void __iomem *request_and_remap(struct resource *res, size_t size,
+					const char *name, int *err)
+{
+	void __iomem *ptr;
+
+	if (!request_mem_region(res->start, res->end - res->start + 1, name)) {
+		*err = -EBUSY;
+		return NULL;
+	}
+
+	ptr = ioremap(res->start, size);
+	if (!ptr) {
+		release_mem_region(res->start, res->end - res->start + 1);
+		*err = -ENOMEM;
+	}
+	return ptr;
+}
+
+static int __devinit gpio_nand_probe(struct platform_device *dev)
+{
+	struct gpiomtd *gpiomtd;
+	struct nand_chip *this;
+	struct resource *res0, *res1;
+	int ret;
+
+	if (!dev->dev.platform_data)
+		return -EINVAL;
+
+	res0 = platform_get_resource(dev, IORESOURCE_MEM, 0);
+	if (!res0)
+		return -EINVAL;
+
+	gpiomtd = kzalloc(sizeof(*gpiomtd), GFP_KERNEL);
+	if (gpiomtd == NULL) {
+		dev_err(&dev->dev, "failed to create NAND MTD\n");
+		return -ENOMEM;
+	}
+
+	this = &gpiomtd->nand_chip;
+	this->IO_ADDR_R = request_and_remap(res0, 2, "NAND", &ret);
+	if (!this->IO_ADDR_R) {
+		dev_err(&dev->dev, "unable to map NAND\n");
+		goto err_map;
+	}
+
+	res1 = platform_get_resource(dev, IORESOURCE_MEM, 1);
+	if (res1) {
+		gpiomtd->io_sync = request_and_remap(res1, 4, "NAND sync", &ret);
+		if (!gpiomtd->io_sync) {
+			dev_err(&dev->dev, "unable to map sync NAND\n");
+			goto err_sync;
+		}
+	}
+
+	memcpy(&gpiomtd->plat, dev->dev.platform_data, sizeof(gpiomtd->plat));
+
+	ret = gpio_request(gpiomtd->plat.gpio_nce, "NAND NCE");
+	if (ret)
+		goto err_nce;
+	gpio_direction_output(gpiomtd->plat.gpio_nce, 1);
+	if (gpio_is_valid(gpiomtd->plat.gpio_nwp)) {
+		ret = gpio_request(gpiomtd->plat.gpio_nwp, "NAND NWP");
+		if (ret)
+			goto err_nwp;
+		gpio_direction_output(gpiomtd->plat.gpio_nwp, 1);
+	}
+	ret = gpio_request(gpiomtd->plat.gpio_ale, "NAND ALE");
+	if (ret)
+		goto err_ale;
+	gpio_direction_output(gpiomtd->plat.gpio_ale, 0);
+	ret = gpio_request(gpiomtd->plat.gpio_cle, "NAND CLE");
+	if (ret)
+		goto err_cle;
+	gpio_direction_output(gpiomtd->plat.gpio_cle, 0);
+	ret = gpio_request(gpiomtd->plat.gpio_rdy, "NAND RDY");
+	if (ret)
+		goto err_rdy;
+	gpio_direction_input(gpiomtd->plat.gpio_rdy);
+
+
+	this->IO_ADDR_W  = this->IO_ADDR_R;
+	this->ecc.mode   = NAND_ECC_SOFT;
+	this->options    = gpiomtd->plat.options;
+	this->chip_delay = gpiomtd->plat.chip_delay;
+
+	/* install our routines */
+	this->cmd_ctrl   = gpio_nand_cmd_ctrl;
+	this->dev_ready  = gpio_nand_devready;
+
+	if (this->options & NAND_BUSWIDTH_16) {
+		this->read_buf   = gpio_nand_readbuf16;
+		this->write_buf  = gpio_nand_writebuf16;
+		this->verify_buf = gpio_nand_verifybuf16;
+	} else {
+		this->read_buf   = gpio_nand_readbuf;
+		this->write_buf  = gpio_nand_writebuf;
+		this->verify_buf = gpio_nand_verifybuf;
+	}
+
+	/* set the mtd private data for the nand driver */
+	gpiomtd->mtd_info.priv = this;
+	gpiomtd->mtd_info.owner = THIS_MODULE;
+
+	if (nand_scan(&gpiomtd->mtd_info, 1)) {
+		dev_err(&dev->dev, "no nand chips found?\n");
+		ret = -ENXIO;
+		goto err_wp;
+	}
+
+	if (gpiomtd->plat.adjust_parts)
+		gpiomtd->plat.adjust_parts(&gpiomtd->plat,
+					   gpiomtd->mtd_info.size);
+
+	add_mtd_partitions(&gpiomtd->mtd_info, gpiomtd->plat.parts,
+			   gpiomtd->plat.num_parts);
+	platform_set_drvdata(dev, gpiomtd);
+
+	return 0;
+
+err_wp:
+	if (gpio_is_valid(gpiomtd->plat.gpio_nwp))
+		gpio_set_value(gpiomtd->plat.gpio_nwp, 0);
+	gpio_free(gpiomtd->plat.gpio_rdy);
+err_rdy:
+	gpio_free(gpiomtd->plat.gpio_cle);
+err_cle:
+	gpio_free(gpiomtd->plat.gpio_ale);
+err_ale:
+	if (gpio_is_valid(gpiomtd->plat.gpio_nwp))
+		gpio_free(gpiomtd->plat.gpio_nwp);
+err_nwp:
+	gpio_free(gpiomtd->plat.gpio_nce);
+err_nce:
+	iounmap(gpiomtd->io_sync);
+	if (res1)
+		release_mem_region(res1->start, res1->end - res1->start + 1);
+err_sync:
+	iounmap(gpiomtd->nand_chip.IO_ADDR_R);
+	release_mem_region(res0->start, res0->end - res0->start + 1);
+err_map:
+	kfree(gpiomtd);
+	return ret;
+}
+
+static struct platform_driver gpio_nand_driver = {
+	.probe		= gpio_nand_probe,
+	.remove		= gpio_nand_remove,
+	.driver		= {
+		.name	= "gpio-nand",
+	},
+};
+
+static int __init gpio_nand_init(void)
+{
+	printk(KERN_INFO "GPIO NAND driver, Â© 2004 Simtec Electronics\n");
+
+	return platform_driver_register(&gpio_nand_driver);
+}
+
+static void __exit gpio_nand_exit(void)
+{
+	platform_driver_unregister(&gpio_nand_driver);
+}
+
+module_init(gpio_nand_init);
+module_exit(gpio_nand_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ben Dooks <ben@simtec.co.uk>");
+MODULE_DESCRIPTION("GPIO NAND Driver");
diff --git a/include/linux/mtd/nand-gpio.h b/include/linux/mtd/nand-gpio.h
new file mode 100644
index 000000000000..51534e50f7fc
--- /dev/null
+++ b/include/linux/mtd/nand-gpio.h
@@ -0,0 +1,19 @@
+#ifndef __LINUX_MTD_NAND_GPIO_H
+#define __LINUX_MTD_NAND_GPIO_H
+
+#include <linux/mtd/nand.h>
+
+struct gpio_nand_platdata {
+	int	gpio_nce;
+	int	gpio_nwp;
+	int	gpio_cle;
+	int	gpio_ale;
+	int	gpio_rdy;
+	void	(*adjust_parts)(struct gpio_nand_platdata *, size_t);
+	struct mtd_partition *parts;
+	unsigned int num_parts;
+	unsigned int options;
+	int	chip_delay;
+};
+
+#endif
-- 
GitLab


From be8f78b8e8b5bcafc19ac85b815e98049aa86314 Mon Sep 17 00:00:00 2001
From: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
Date: Tue, 30 Sep 2008 13:55:33 +0200
Subject: [PATCH 621/895] [MTD] [NOR] AT49BV6416 has swapped erase regions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The CFI information read from AT49BV6416 lists the erase regions in the
wrong order, causing problems when trying to erase or update the first
or last 64KiB block.

Work around this by inverting the "top boot" flag, which will
effectively reverse the order of the erase regions.

This chip is obsolete, but it's used in some existing designs.

Signed-off-by: HÃ¥vard Skinnemoen <haavard.skinnemoen@atmel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/chips/cfi_cmdset_0002.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index db16b7b0723f..3e6f5d8609e8 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -213,10 +213,18 @@ static void fixup_convert_atmel_pri(struct mtd_info *mtd, void *param)
 	if (atmel_pri.Features & 0x02)
 		extp->EraseSuspend = 2;
 
-	if (atmel_pri.BottomBoot)
-		extp->TopBottom = 2;
-	else
-		extp->TopBottom = 3;
+	/* Some chips got it backwards... */
+	if (cfi->id == AT49BV6416) {
+		if (atmel_pri.BottomBoot)
+			extp->TopBottom = 3;
+		else
+			extp->TopBottom = 2;
+	} else {
+		if (atmel_pri.BottomBoot)
+			extp->TopBottom = 2;
+		else
+			extp->TopBottom = 3;
+	}
 
 	/* burst write mode not supported */
 	cfi->cfiq->BufWriteTimeoutTyp = 0;
-- 
GitLab


From dd3a1db900f2a215a7d7dd71b836e149a6cf5fed Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 16 Oct 2008 18:20:58 +0200
Subject: [PATCH 622/895] genirq: improve include files

Move the irq_desc related iterators out of irq.h, into irqnr.h, also
available via interrupt.h.

This way non-genirq (and even non-hardirq) architectures get the
common definitions and iterators.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/interrupt.h |  1 +
 include/linux/irq.h       | 20 +-------------------
 include/linux/irqnr.h     | 24 ++++++++++++++++++++++++
 3 files changed, 26 insertions(+), 19 deletions(-)
 create mode 100644 include/linux/irqnr.h

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 58ff4e74b2f3..72fcfcff5637 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -8,6 +8,7 @@
 #include <linux/preempt.h>
 #include <linux/cpumask.h>
 #include <linux/irqreturn.h>
+#include <linux/irqnr.h>
 #include <linux/hardirq.h>
 #include <linux/sched.h>
 #include <linux/irqflags.h>
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 0618fb362cb4..d058c57be02d 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -11,25 +11,6 @@
 
 #include <linux/smp.h>
 
-#ifndef CONFIG_GENERIC_HARDIRQS
-# define nr_irqs		NR_IRQS
-
-# define for_each_irq_desc(irq, desc)		\
-	for (irq = 0; irq < nr_irqs; irq++)
-#else
-extern int nr_irqs;
-
-# define for_each_irq_desc(irq, desc)		\
-	for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++)
-
-# define for_each_irq_desc_reverse(irq, desc)			\
-	for (irq = nr_irqs -1, desc = irq_desc + (nr_irqs -1 );	\
-	     irq > 0; irq--, desc--)
-#endif
-
-#define for_each_irq_nr(irq)			\
-	for (irq = 0; irq < nr_irqs; irq++)
-
 #ifndef CONFIG_S390
 
 #include <linux/linkage.h>
@@ -37,6 +18,7 @@ extern int nr_irqs;
 #include <linux/spinlock.h>
 #include <linux/cpumask.h>
 #include <linux/irqreturn.h>
+#include <linux/irqnr.h>
 #include <linux/errno.h>
 
 #include <asm/irq.h>
diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
new file mode 100644
index 000000000000..3171ddc3b39d
--- /dev/null
+++ b/include/linux/irqnr.h
@@ -0,0 +1,24 @@
+#ifndef _LINUX_IRQNR_H
+#define _LINUX_IRQNR_H
+
+#ifndef CONFIG_GENERIC_HARDIRQS
+#include <asm/irq.h>
+# define nr_irqs		NR_IRQS
+
+# define for_each_irq_desc(irq, desc)		\
+	for (irq = 0; irq < nr_irqs; irq++)
+#else
+extern int nr_irqs;
+
+# define for_each_irq_desc(irq, desc)		\
+	for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++)
+
+# define for_each_irq_desc_reverse(irq, desc)			\
+	for (irq = nr_irqs -1, desc = irq_desc + (nr_irqs -1 );	\
+	     irq > 0; irq--, desc--)
+#endif
+
+#define for_each_irq_nr(irq)			\
+	for (irq = 0; irq < nr_irqs; irq++)
+
+#endif
-- 
GitLab


From 727d2dc045930b29dc68d56d5032d23661ba8503 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <ext-adrian.hunter@nokia.com>
Date: Fri, 17 Oct 2008 16:52:10 +0300
Subject: [PATCH 623/895] UBIFS: do not read unnecessary bytes when unpacking
 bits

Fixes the following Oops:

BUG: unable to handle kernel paging request at f8d24000
IP: [<f8ff0657>] :ubifs:ubifs_unpack_bits+0xcd/0x231
*pde = 34333067 *pte = 00000000
Oops: 0000 [#1] PREEMPT SMP
Modules linked in: deflate zlib_deflate lzo lzo_decompress lzo_compress
ubifs ubi nandsim nand nand_ids nand_ecc mtd nfsd lockd sunrpc exportfs
[last unloaded: nand_ecc]

Pid: 7450, comm: sync Not tainted (2.6.27-rc8-ubifs-2.6 #27)
EIP: 0060:[<f8ff0657>] EFLAGS: 00010206 CPU: 0
EIP is at ubifs_unpack_bits+0xcd/0x231 [ubifs]
EAX: 00000000 EBX: 00000000 ECX: d7e43dc0 EDX: 0000ff00
ESI: 00000004 EDI: f8d23ffe EBP: d7e43db4 ESP: d7e43d8c
DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068
Process sync (pid: 7450, ti=d7e42000 task=eb6f9530 task.ti=d7e42000)
Stack: 00000400 c0103db4 dc5e8090 d7e43dc0 d7e43dc0 d7e43dc4 0000001c 00000004
      f496d1e0 f8d23ffc d7e43dd4 f8ffac7e f8d23ffe 00000000 f8d23ffe f2b7af68
      f496d1e0 f8d23ffc d7e43e2c f8ffadc5 00000000 0001f000 00000000 c03b10a7
Call Trace:
[<c0103db4>] ? restore_nocheck_notrace+0x0/0xe
[<f8ffac7e>] ? is_a_node+0x43/0x92 [ubifs]
[<f8ffadc5>] ? dbg_check_ltab+0xf8/0x5c9 [ubifs]
[<c03b10a7>] ? mutex_lock_nested+0x1b2/0x2a0
[<f8ffc86e>] ? ubifs_lpt_start_commit+0x49/0xecb [ubifs]
[<c03b0ef3>] ? mutex_unlock+0xd/0xf
[<f8fef017>] ? ubifs_tnc_start_commit+0x1cf/0xef8 [ubifs]
[<f8fe65d8>] ? do_commit+0x18f/0x52d [ubifs]
[<f8fe69f6>] ? ubifs_run_commit+0x80/0xca [ubifs]
[<f8fd8d35>] ? ubifs_sync_fs+0xdb/0xf6 [ubifs]
[<c0181a07>] ? sync_filesystems+0xc6/0x10c
[<c019f279>] ? do_sync+0x3b/0x6a
[<c019f2ba>] ? sys_sync+0x12/0x18
[<c0103ced>] ? sysenter_do_call+0x12/0x35
=======================
Code: 4d ec 89 01 8b 45 e8 89 10 89 d8 89 f1 d3 e8 85 c0 74 07 29 d6 83 fe
20 75 2a 89 d8 83 c4 1c 5b 5e 5f 5d c3 0f b6 57 01 c1 e2 08 <0f> b6 47 02
c1 e0 10 09 c2 0f b6 07 09 c2 0f b
EIP: [<f8ff0657>] ubifs_unpack_bits+0xcd/0x231 [ubifs] SS:ESP 0068:d7e43d8c
---[ end trace 1bbb4c407a6dd816 ]---

Signed-off-by: Adrian Hunter <ext-adrian.hunter@nokia.com>
---
 fs/ubifs/lpt.c | 45 ++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 38 insertions(+), 7 deletions(-)

diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index cd11b23a1875..db8bd0e518b2 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -288,25 +288,56 @@ uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits)
 	const int k = 32 - nrbits;
 	uint8_t *p = *addr;
 	int b = *pos;
-	uint32_t val;
+	uint32_t uninitialized_var(val);
+	const int bytes = (nrbits + b + 7) >> 3;
 
 	ubifs_assert(nrbits > 0);
 	ubifs_assert(nrbits <= 32);
 	ubifs_assert(*pos >= 0);
 	ubifs_assert(*pos < 8);
 	if (b) {
-		val = p[1] | ((uint32_t)p[2] << 8) | ((uint32_t)p[3] << 16) |
-		      ((uint32_t)p[4] << 24);
+		switch (bytes) {
+		case 2:
+			val = p[1];
+			break;
+		case 3:
+			val = p[1] | ((uint32_t)p[2] << 8);
+			break;
+		case 4:
+			val = p[1] | ((uint32_t)p[2] << 8) |
+				     ((uint32_t)p[3] << 16);
+			break;
+		case 5:
+			val = p[1] | ((uint32_t)p[2] << 8) |
+				     ((uint32_t)p[3] << 16) |
+				     ((uint32_t)p[4] << 24);
+		}
 		val <<= (8 - b);
 		val |= *p >> b;
 		nrbits += b;
-	} else
-		val = p[0] | ((uint32_t)p[1] << 8) | ((uint32_t)p[2] << 16) |
-		      ((uint32_t)p[3] << 24);
+	} else {
+		switch (bytes) {
+		case 1:
+			val = p[0];
+			break;
+		case 2:
+			val = p[0] | ((uint32_t)p[1] << 8);
+			break;
+		case 3:
+			val = p[0] | ((uint32_t)p[1] << 8) |
+				     ((uint32_t)p[2] << 16);
+			break;
+		case 4:
+			val = p[0] | ((uint32_t)p[1] << 8) |
+				     ((uint32_t)p[2] << 16) |
+				     ((uint32_t)p[3] << 24);
+			break;
+		}
+	}
 	val <<= k;
 	val >>= k;
 	b = nrbits & 7;
-	p += nrbits / 8;
+	p += nrbits >> 3;
 	*addr = p;
 	*pos = b;
 	ubifs_assert((val >> nrbits) == 0 || nrbits - b == 32);
-- 
GitLab


From fae7fb299f382355145c6cb4eec6aa84f4cd1fca Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Fri, 17 Oct 2008 18:49:23 +0300
Subject: [PATCH 624/895] UBIFS: amend printk

It is better to print "Reserved for root" than
"Reserved pool size", because it is more obvious for users
what this means.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 fs/ubifs/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 7fd759dde796..046bf0879235 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1217,7 +1217,7 @@ static int mount_ubifs(struct ubifs_info *c)
 	ubifs_msg("media format:       %d (latest is %d)",
 		  c->fmt_version, UBIFS_FORMAT_VERSION);
 	ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr));
-	ubifs_msg("reserved pool size: %llu bytes (%llu KiB)",
+	ubifs_msg("reserved for root:  %llu bytes (%llu KiB)",
 		c->report_rp_size, c->report_rp_size >> 10);
 
 	dbg_msg("compiled on:         " __DATE__ " at " __TIME__);
-- 
GitLab


From 54779aabb0183bbe049d2b52e96cd148366dfb0b Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Date: Fri, 17 Oct 2008 16:16:10 +0200
Subject: [PATCH 625/895] UBIFS: fix ubifs_compress commentary

Update the comment for ubifs_compress(), which incorrectly states that it
returnsa success/failure indicator.

Signed-off-by: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 fs/ubifs/compress.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c
index 5bb51dac3c16..a0ada596b17c 100644
--- a/fs/ubifs/compress.c
+++ b/fs/ubifs/compress.c
@@ -91,8 +91,6 @@ struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT];
  *
  * Note, if the input buffer was not compressed, it is copied to the output
  * buffer and %UBIFS_COMPR_NONE is returned in @compr_type.
- *
- * This functions returns %0 on success or a negative error code on failure.
  */
 void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len,
 		    int *compr_type)
-- 
GitLab


From d9d332e0874f46b91d8ac4604b68ee42b8a7a2c6 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 19 Oct 2008 10:32:20 -0700
Subject: [PATCH 626/895] anon_vma_prepare: properly lock even newly allocated
 entries

The anon_vma code is very subtle, and we end up doing optimistic lookups
of anon_vmas under RCU in page_lock_anon_vma() with no locking.  Other
CPU's can also see the newly allocated entry immediately after we've
exposed it by setting "vma->anon_vma" to the new value.

We protect against the anon_vma being destroyed by having the SLAB
marked as SLAB_DESTROY_BY_RCU, so the RCU lookup can depend on the
allocation not being destroyed - but it might still be free'd and
re-allocated here to a new vma.

As a result, we should not do the anon_vma list ops on a newly allocated
vma without proper locking.

Acked-by: Nick Piggin <npiggin@suse.de>
Acked-by: Hugh Dickins <hugh@veritas.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/rmap.c | 42 ++++++++++++++++++++++++++++++++----------
 1 file changed, 32 insertions(+), 10 deletions(-)

diff --git a/mm/rmap.c b/mm/rmap.c
index 0383acfcb068..e8d639b16c6d 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -55,7 +55,33 @@
 
 struct kmem_cache *anon_vma_cachep;
 
-/* This must be called under the mmap_sem. */
+/**
+ * anon_vma_prepare - attach an anon_vma to a memory region
+ * @vma: the memory region in question
+ *
+ * This makes sure the memory mapping described by 'vma' has
+ * an 'anon_vma' attached to it, so that we can associate the
+ * anonymous pages mapped into it with that anon_vma.
+ *
+ * The common case will be that we already have one, but if
+ * if not we either need to find an adjacent mapping that we
+ * can re-use the anon_vma from (very common when the only
+ * reason for splitting a vma has been mprotect()), or we
+ * allocate a new one.
+ *
+ * Anon-vma allocations are very subtle, because we may have
+ * optimistically looked up an anon_vma in page_lock_anon_vma()
+ * and that may actually touch the spinlock even in the newly
+ * allocated vma (it depends on RCU to make sure that the
+ * anon_vma isn't actually destroyed).
+ *
+ * As a result, we need to do proper anon_vma locking even
+ * for the new allocation. At the same time, we do not want
+ * to do any locking for the common case of already having
+ * an anon_vma.
+ *
+ * This must be called with the mmap_sem held for reading.
+ */
 int anon_vma_prepare(struct vm_area_struct *vma)
 {
 	struct anon_vma *anon_vma = vma->anon_vma;
@@ -63,20 +89,17 @@ int anon_vma_prepare(struct vm_area_struct *vma)
 	might_sleep();
 	if (unlikely(!anon_vma)) {
 		struct mm_struct *mm = vma->vm_mm;
-		struct anon_vma *allocated, *locked;
+		struct anon_vma *allocated;
 
 		anon_vma = find_mergeable_anon_vma(vma);
-		if (anon_vma) {
-			allocated = NULL;
-			locked = anon_vma;
-			spin_lock(&locked->lock);
-		} else {
+		allocated = NULL;
+		if (!anon_vma) {
 			anon_vma = anon_vma_alloc();
 			if (unlikely(!anon_vma))
 				return -ENOMEM;
 			allocated = anon_vma;
-			locked = NULL;
 		}
+		spin_lock(&anon_vma->lock);
 
 		/* page_table_lock to protect against threads */
 		spin_lock(&mm->page_table_lock);
@@ -87,8 +110,7 @@ int anon_vma_prepare(struct vm_area_struct *vma)
 		}
 		spin_unlock(&mm->page_table_lock);
 
-		if (locked)
-			spin_unlock(&locked->lock);
+		spin_unlock(&anon_vma->lock);
 		if (unlikely(allocated))
 			anon_vma_free(allocated);
 	}
-- 
GitLab


From a87903f3b4fdbb2088d50a12eef872a1b3fa2ba4 Mon Sep 17 00:00:00 2001
From: Ian Molton <spyro@f2s.com>
Date: Fri, 25 Jul 2008 22:59:29 +0100
Subject: [PATCH 627/895] mfd: reduce stack usage in mfd-core.c

This patch moves the allocation of the resources off the stack in
mfd_add_device().

Signed-off-by: Ian Molton <spyro@f2s.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
---
 drivers/mfd/mfd-core.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c
index 9c9c126ed334..6c0d1bec4b76 100644
--- a/drivers/mfd/mfd-core.c
+++ b/drivers/mfd/mfd-core.c
@@ -20,7 +20,7 @@ static int mfd_add_device(struct device *parent, int id,
 			  struct resource *mem_base,
 			  int irq_base)
 {
-	struct resource res[cell->num_resources];
+	struct resource *res;
 	struct platform_device *pdev;
 	int ret = -ENOMEM;
 	int r;
@@ -29,14 +29,17 @@ static int mfd_add_device(struct device *parent, int id,
 	if (!pdev)
 		goto fail_alloc;
 
+	res = kzalloc(sizeof(*res) * cell->num_resources, GFP_KERNEL);
+	if (!res)
+		goto fail_device;
+
 	pdev->dev.parent = parent;
 
 	ret = platform_device_add_data(pdev,
 			cell->platform_data, cell->data_size);
 	if (ret)
-		goto fail_device;
+		goto fail_res;
 
-	memset(res, 0, sizeof(res));
 	for (r = 0; r < cell->num_resources; r++) {
 		res[r].name = cell->resources[r].name;
 		res[r].flags = cell->resources[r].flags;
@@ -64,11 +67,15 @@ static int mfd_add_device(struct device *parent, int id,
 
 	ret = platform_device_add(pdev);
 	if (ret)
-		goto fail_device;
+		goto fail_res;
+
+	kfree(res);
 
 	return 0;
 
 /*	platform_device_del(pdev); */
+fail_res:
+	kfree(res);
 fail_device:
 	platform_device_put(pdev);
 fail_alloc:
-- 
GitLab


From 80e74a805f0a6662b9b8de519439afd06ac35427 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Sun, 10 Aug 2008 23:45:24 +0200
Subject: [PATCH 628/895] mfd: update sm501 debugging/low information messages

Make the default output of the SM501 driver to be less noisy wrt to
message that have low informational value, or simply should be debug.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
---
 drivers/mfd/sm501.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c
index 7aebad4c06ff..220e4371266b 100644
--- a/drivers/mfd/sm501.c
+++ b/drivers/mfd/sm501.c
@@ -623,8 +623,8 @@ unsigned long sm501_set_clock(struct device *dev,
 
 	sm501_sync_regs(sm);
 
-	dev_info(sm->dev, "gate %08lx, clock %08lx, mode %08lx\n",
-		 gate, clock, mode);
+	dev_dbg(sm->dev, "gate %08lx, clock %08lx, mode %08lx\n",
+		gate, clock, mode);
 
 	sm501_mdelay(sm, 16);
 	mutex_unlock(&sm->clock_lock);
@@ -742,7 +742,7 @@ static int sm501_register_device(struct sm501_devdata *sm,
 	int ret;
 
 	for (ptr = 0; ptr < pdev->num_resources; ptr++) {
-		printk("%s[%d] flags %08lx: %08llx..%08llx\n",
+		printk(KERN_DEBUG "%s[%d] flags %08lx: %08llx..%08llx\n",
 		       pdev->name, ptr,
 		       pdev->resource[ptr].flags,
 		       (unsigned long long)pdev->resource[ptr].start,
-- 
GitLab


From 1c1b6ffce5737d764cc474b9bd6677bb9a344094 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dbaryshkov@gmail.com>
Date: Wed, 24 Sep 2008 23:36:23 +0200
Subject: [PATCH 629/895] mfd: provide and use setup hook for tc6393xb

Instead of using bitfields for initial gpio setup,
provide generic setup/teardown hooks that can be used
to set the gpio states, register child devices, etc.

Signed-off-by: Dmitry Baryshkov <dbaryshkov@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
---
 arch/arm/mach-pxa/include/mach/tosa.h |  2 --
 arch/arm/mach-pxa/tosa.c              | 35 ++++++++++++++++++++++-----
 drivers/mfd/tc6393xb.c                | 21 ++++++++++------
 include/linux/mfd/tc6393xb.h          |  4 +--
 4 files changed, 45 insertions(+), 17 deletions(-)

diff --git a/arch/arm/mach-pxa/include/mach/tosa.h b/arch/arm/mach-pxa/include/mach/tosa.h
index a72803f0461b..8bce6d8615b9 100644
--- a/arch/arm/mach-pxa/include/mach/tosa.h
+++ b/arch/arm/mach-pxa/include/mach/tosa.h
@@ -59,8 +59,6 @@
  * TC6393XB GPIOs
  */
 #define TOSA_TC6393XB_GPIO_BASE		(NR_BUILTIN_GPIO + 2 * 12)
-#define TOSA_TC6393XB_GPIO(i)		(TOSA_TC6393XB_GPIO_BASE + (i))
-#define TOSA_TC6393XB_GPIO_BIT(gpio)	(1 << (gpio - TOSA_TC6393XB_GPIO_BASE))
 
 #define TOSA_GPIO_TG_ON			(TOSA_TC6393XB_GPIO_BASE + 0)
 #define TOSA_GPIO_L_MUTE		(TOSA_TC6393XB_GPIO_BASE + 1)
diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c
index 130e37e4ebdd..fac846b0d070 100644
--- a/arch/arm/mach-pxa/tosa.c
+++ b/arch/arm/mach-pxa/tosa.c
@@ -706,16 +706,39 @@ static struct tmio_nand_data tosa_tc6393xb_nand_config = {
 	.badblock_pattern = &tosa_tc6393xb_nand_bbt,
 };
 
-static struct tc6393xb_platform_data tosa_tc6393xb_setup = {
+static int tosa_tc6393xb_setup(struct platform_device *dev)
+{
+	int rc;
+
+	rc = gpio_request(TOSA_GPIO_CARD_VCC_ON, "CARD_VCC_ON");
+	if (rc)
+		goto err_req;
+
+	rc = gpio_direction_output(TOSA_GPIO_CARD_VCC_ON, 1);
+	if (rc)
+		goto err_dir;
+
+	return rc;
+
+err_dir:
+	gpio_free(TOSA_GPIO_CARD_VCC_ON);
+err_req:
+	return rc;
+}
+
+static void tosa_tc6393xb_teardown(struct platform_device *dev)
+{
+	gpio_free(TOSA_GPIO_CARD_VCC_ON);
+}
+
+static struct tc6393xb_platform_data tosa_tc6393xb_data = {
 	.scr_pll2cr	= 0x0cc1,
 	.scr_gper	= 0x3300,
-	.scr_gpo_dsr	=
-		TOSA_TC6393XB_GPIO_BIT(TOSA_GPIO_CARD_VCC_ON),
-	.scr_gpo_doecr	=
-		TOSA_TC6393XB_GPIO_BIT(TOSA_GPIO_CARD_VCC_ON),
 
 	.irq_base	= IRQ_BOARD_START,
 	.gpio_base	= TOSA_TC6393XB_GPIO_BASE,
+	.setup		= tosa_tc6393xb_setup,
+	.teardown	= tosa_tc6393xb_teardown,
 
 	.enable		= tosa_tc6393xb_enable,
 	.disable	= tosa_tc6393xb_disable,
@@ -730,7 +753,7 @@ static struct platform_device tc6393xb_device = {
 	.name	= "tc6393xb",
 	.id	= -1,
 	.dev	= {
-		.platform_data	= &tosa_tc6393xb_setup,
+		.platform_data	= &tosa_tc6393xb_data,
 	},
 	.num_resources	= ARRAY_SIZE(tc6393xb_resources),
 	.resource	= tc6393xb_resources,
diff --git a/drivers/mfd/tc6393xb.c b/drivers/mfd/tc6393xb.c
index e4c1c788b5f8..83dc703f3767 100644
--- a/drivers/mfd/tc6393xb.c
+++ b/drivers/mfd/tc6393xb.c
@@ -460,13 +460,6 @@ static int __devinit tc6393xb_probe(struct platform_device *dev)
 
 	tc6393xb->suspend_state.fer = 0;
 
-	for (i = 0; i < 3; i++) {
-		tc6393xb->suspend_state.gpo_dsr[i] =
-			(tcpd->scr_gpo_dsr >> (8 * i)) & 0xff;
-		tc6393xb->suspend_state.gpo_doecr[i] =
-			(tcpd->scr_gpo_doecr >> (8 * i)) & 0xff;
-	}
-
 	tc6393xb->suspend_state.ccr = SCR_CCR_UNK1 |
 					SCR_CCR_HCLK_48;
 
@@ -488,6 +481,12 @@ static int __devinit tc6393xb_probe(struct platform_device *dev)
 
 	tc6393xb_attach_irq(dev);
 
+	if (tcpd->setup) {
+		ret = tcpd->setup(dev);
+		if (ret)
+			goto err_setup;
+	}
+
 	tc6393xb_cells[TC6393XB_CELL_NAND].driver_data = tcpd->nand_data;
 	tc6393xb_cells[TC6393XB_CELL_NAND].platform_data =
 		&tc6393xb_cells[TC6393XB_CELL_NAND];
@@ -506,6 +505,10 @@ static int __devinit tc6393xb_probe(struct platform_device *dev)
 	if (!ret)
 		return 0;
 
+	if (tcpd->teardown)
+		tcpd->teardown(dev);
+
+err_setup:
 	tc6393xb_detach_irq(dev);
 
 err_gpio_add:
@@ -535,6 +538,10 @@ static int __devexit tc6393xb_remove(struct platform_device *dev)
 	int ret;
 
 	mfd_remove_devices(&dev->dev);
+
+	if (tcpd->teardown)
+		tcpd->teardown(dev);
+
 	tc6393xb_detach_irq(dev);
 
 	if (tc6393xb->gpio.base != -1) {
diff --git a/include/linux/mfd/tc6393xb.h b/include/linux/mfd/tc6393xb.h
index fec7b3f7a81f..1fa820646d98 100644
--- a/include/linux/mfd/tc6393xb.h
+++ b/include/linux/mfd/tc6393xb.h
@@ -21,8 +21,6 @@
 struct tc6393xb_platform_data {
 	u16	scr_pll2cr;	/* PLL2 Control */
 	u16	scr_gper;	/* GP Enable */
-	u32	scr_gpo_doecr;	/* GPO Data OE Control */
-	u32	scr_gpo_dsr;	/* GPO Data Set */
 
 	int	(*enable)(struct platform_device *dev);
 	int	(*disable)(struct platform_device *dev);
@@ -31,6 +29,8 @@ struct tc6393xb_platform_data {
 
 	int	irq_base;	/* base for subdevice irqs */
 	int	gpio_base;
+	int	(*setup)(struct platform_device *dev);
+	void	(*teardown)(struct platform_device *dev);
 
 	struct tmio_nand_data	*nand_data;
 };
-- 
GitLab


From f98a0bd0e4b77b12e49ce01f4c9f04503931c291 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dbaryshkov@gmail.com>
Date: Wed, 24 Sep 2008 23:46:10 +0200
Subject: [PATCH 630/895] mfd: do tcb6393xb state restore on resume only if
 requested

As requested by Ian make state restore only if it's requested
by platform data: some platforms do correctly save the state of
the chip during suspend/resume, but some (like tosa) incorrectly
power off the chip at suspend, so the driver supports restoring
some bits of the tc6393xb state (not full, merely enough to support
resume on tosa). With this patch this code is disabled by default.

Signed-off-by: Dmitry Baryshkov <dbaryshkov@gmail.com>
Acked-by: Ian Molton <spyro@f2s.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
---
 arch/arm/mach-pxa/tosa.c     |  2 +
 drivers/mfd/tc6393xb.c       | 74 +++++++++++++++++-------------------
 include/linux/mfd/tc6393xb.h |  4 ++
 3 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c
index fac846b0d070..a6c4694359ca 100644
--- a/arch/arm/mach-pxa/tosa.c
+++ b/arch/arm/mach-pxa/tosa.c
@@ -746,6 +746,8 @@ static struct tc6393xb_platform_data tosa_tc6393xb_data = {
 	.resume		= tosa_tc6393xb_resume,
 
 	.nand_data	= &tosa_tc6393xb_nand_config,
+
+	.resume_restore = 1,
 };
 
 
diff --git a/drivers/mfd/tc6393xb.c b/drivers/mfd/tc6393xb.c
index 83dc703f3767..c3c64aeeb12a 100644
--- a/drivers/mfd/tc6393xb.c
+++ b/drivers/mfd/tc6393xb.c
@@ -369,41 +369,12 @@ static void tc6393xb_detach_irq(struct platform_device *dev)
 
 /*--------------------------------------------------------------------------*/
 
-static int tc6393xb_hw_init(struct platform_device *dev)
-{
-	struct tc6393xb_platform_data *tcpd = dev->dev.platform_data;
-	struct tc6393xb *tc6393xb = platform_get_drvdata(dev);
-	int i;
-
-	iowrite8(tc6393xb->suspend_state.fer,	tc6393xb->scr + SCR_FER);
-	iowrite16(tcpd->scr_pll2cr,		tc6393xb->scr + SCR_PLL2CR);
-	iowrite16(tc6393xb->suspend_state.ccr,	tc6393xb->scr + SCR_CCR);
-	iowrite16(SCR_MCR_RDY_OPENDRAIN | SCR_MCR_RDY_UNK | SCR_MCR_RDY_EN |
-		  SCR_MCR_INT_OPENDRAIN | SCR_MCR_INT_UNK | SCR_MCR_INT_EN |
-		  BIT(15),			tc6393xb->scr + SCR_MCR);
-	iowrite16(tcpd->scr_gper,		tc6393xb->scr + SCR_GPER);
-	iowrite8(0,				tc6393xb->scr + SCR_IRR);
-	iowrite8(0xbf,				tc6393xb->scr + SCR_IMR);
-
-	for (i = 0; i < 3; i++) {
-		iowrite8(tc6393xb->suspend_state.gpo_dsr[i],
-					tc6393xb->scr + SCR_GPO_DSR(i));
-		iowrite8(tc6393xb->suspend_state.gpo_doecr[i],
-					tc6393xb->scr + SCR_GPO_DOECR(i));
-		iowrite8(tc6393xb->suspend_state.gpi_bcr[i],
-					tc6393xb->scr + SCR_GPI_BCR(i));
-	}
-
-	return 0;
-}
-
 static int __devinit tc6393xb_probe(struct platform_device *dev)
 {
 	struct tc6393xb_platform_data *tcpd = dev->dev.platform_data;
 	struct tc6393xb *tc6393xb;
 	struct resource *iomem, *rscr;
 	int ret, temp;
-	int i;
 
 	iomem = platform_get_resource(dev, IORESOURCE_MEM, 0);
 	if (!iomem)
@@ -458,14 +429,16 @@ static int __devinit tc6393xb_probe(struct platform_device *dev)
 	if (ret)
 		goto err_enable;
 
-	tc6393xb->suspend_state.fer = 0;
-
-	tc6393xb->suspend_state.ccr = SCR_CCR_UNK1 |
-					SCR_CCR_HCLK_48;
-
-	ret = tc6393xb_hw_init(dev);
-	if (ret)
-		goto err_hw_init;
+	iowrite8(0,				tc6393xb->scr + SCR_FER);
+	iowrite16(tcpd->scr_pll2cr,		tc6393xb->scr + SCR_PLL2CR);
+	iowrite16(SCR_CCR_UNK1 | SCR_CCR_HCLK_48,
+						tc6393xb->scr + SCR_CCR);
+	iowrite16(SCR_MCR_RDY_OPENDRAIN | SCR_MCR_RDY_UNK | SCR_MCR_RDY_EN |
+		  SCR_MCR_INT_OPENDRAIN | SCR_MCR_INT_UNK | SCR_MCR_INT_EN |
+		  BIT(15),			tc6393xb->scr + SCR_MCR);
+	iowrite16(tcpd->scr_gper,		tc6393xb->scr + SCR_GPER);
+	iowrite8(0,				tc6393xb->scr + SCR_IRR);
+	iowrite8(0xbf,				tc6393xb->scr + SCR_IMR);
 
 	printk(KERN_INFO "Toshiba tc6393xb revision %d at 0x%08lx, irq %d\n",
 			tmio_ioread8(tc6393xb->scr + SCR_REVID),
@@ -514,7 +487,6 @@ err_setup:
 err_gpio_add:
 	if (tc6393xb->gpio.base != -1)
 		temp = gpiochip_remove(&tc6393xb->gpio);
-err_hw_init:
 	tcpd->disable(dev);
 err_clk_enable:
 	clk_disable(tc6393xb->clk);
@@ -592,15 +564,37 @@ static int tc6393xb_resume(struct platform_device *dev)
 	struct tc6393xb_platform_data *tcpd = dev->dev.platform_data;
 	struct tc6393xb *tc6393xb = platform_get_drvdata(dev);
 	int ret;
+	int i;
 
 	clk_enable(tc6393xb->clk);
 
 	ret = tcpd->resume(dev);
-
 	if (ret)
 		return ret;
 
-	return tc6393xb_hw_init(dev);
+	if (!tcpd->resume_restore)
+		return 0;
+
+	iowrite8(tc6393xb->suspend_state.fer,	tc6393xb->scr + SCR_FER);
+	iowrite16(tcpd->scr_pll2cr,		tc6393xb->scr + SCR_PLL2CR);
+	iowrite16(tc6393xb->suspend_state.ccr,	tc6393xb->scr + SCR_CCR);
+	iowrite16(SCR_MCR_RDY_OPENDRAIN | SCR_MCR_RDY_UNK | SCR_MCR_RDY_EN |
+		  SCR_MCR_INT_OPENDRAIN | SCR_MCR_INT_UNK | SCR_MCR_INT_EN |
+		  BIT(15),			tc6393xb->scr + SCR_MCR);
+	iowrite16(tcpd->scr_gper,		tc6393xb->scr + SCR_GPER);
+	iowrite8(0,				tc6393xb->scr + SCR_IRR);
+	iowrite8(0xbf,				tc6393xb->scr + SCR_IMR);
+
+	for (i = 0; i < 3; i++) {
+		iowrite8(tc6393xb->suspend_state.gpo_dsr[i],
+					tc6393xb->scr + SCR_GPO_DSR(i));
+		iowrite8(tc6393xb->suspend_state.gpo_doecr[i],
+					tc6393xb->scr + SCR_GPO_DOECR(i));
+		iowrite8(tc6393xb->suspend_state.gpi_bcr[i],
+					tc6393xb->scr + SCR_GPI_BCR(i));
+	}
+
+	return 0;
 }
 #else
 #define tc6393xb_suspend NULL
diff --git a/include/linux/mfd/tc6393xb.h b/include/linux/mfd/tc6393xb.h
index 1fa820646d98..3ce10ae0f397 100644
--- a/include/linux/mfd/tc6393xb.h
+++ b/include/linux/mfd/tc6393xb.h
@@ -33,6 +33,10 @@ struct tc6393xb_platform_data {
 	void	(*teardown)(struct platform_device *dev);
 
 	struct tmio_nand_data	*nand_data;
+
+	unsigned resume_restore : 1; /* make special actions
+					to preserve the state
+					on suspend/resume */
 };
 
 /*
-- 
GitLab


From 504f97f884bafcd14d28ab52533e7b92996c8b98 Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <sameo@openedhand.com>
Date: Thu, 25 Sep 2008 00:17:00 +0200
Subject: [PATCH 631/895] mfd: Fix htc-egpio compile warning

resource_size_t can be a 64 bits value. Since this is just a printk, we'll
cast it to a u32.

Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
---
 drivers/mfd/htc-egpio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mfd/htc-egpio.c b/drivers/mfd/htc-egpio.c
index 6be43172dc65..50dff6e0088d 100644
--- a/drivers/mfd/htc-egpio.c
+++ b/drivers/mfd/htc-egpio.c
@@ -289,7 +289,7 @@ static int __init egpio_probe(struct platform_device *pdev)
 	ei->base_addr = ioremap_nocache(res->start, res->end - res->start);
 	if (!ei->base_addr)
 		goto fail;
-	pr_debug("EGPIO phys=%08x virt=%p\n", res->start, ei->base_addr);
+	pr_debug("EGPIO phys=%08x virt=%p\n", (u32)res->start, ei->base_addr);
 
 	if ((pdata->bus_width != 16) && (pdata->bus_width != 32))
 		goto fail;
-- 
GitLab


From 51a55623565c6ca864f7cf19e87c2d4bde1c0c5e Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dbaryshkov@gmail.com>
Date: Fri, 3 Oct 2008 20:11:36 +0200
Subject: [PATCH 632/895] mfd: add OHCI cell to tc6393xb

Add information regarding OHCI cell of the tc6393xb

Signed-off-by: Dmitry Baryshkov <dbaryshkov@gmail.com>
Acked-by: Ian Molton <spyro@f2s.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
---
 drivers/mfd/tc6393xb.c       | 87 ++++++++++++++++++++++++++++++++++++
 include/linux/mfd/tc6393xb.h |  1 +
 2 files changed, 88 insertions(+)

diff --git a/drivers/mfd/tc6393xb.c b/drivers/mfd/tc6393xb.c
index c3c64aeeb12a..6197db7d4859 100644
--- a/drivers/mfd/tc6393xb.c
+++ b/drivers/mfd/tc6393xb.c
@@ -113,6 +113,7 @@ struct tc6393xb {
 enum {
 	TC6393XB_CELL_NAND,
 	TC6393XB_CELL_MMC,
+	TC6393XB_CELL_OHCI,
 };
 
 /*--------------------------------------------------------------------------*/
@@ -170,6 +171,78 @@ static struct resource __devinitdata tc6393xb_mmc_resources[] = {
 	},
 };
 
+const static struct resource tc6393xb_ohci_resources[] = {
+	{
+		.start	= 0x3000,
+		.end	= 0x31ff,
+		.flags	= IORESOURCE_MEM,
+	},
+	{
+		.start	= 0x0300,
+		.end	= 0x03ff,
+		.flags	= IORESOURCE_MEM,
+	},
+	{
+		.start	= 0x010000,
+		.end	= 0x017fff,
+		.flags	= IORESOURCE_MEM,
+	},
+	{
+		.start	= 0x018000,
+		.end	= 0x01ffff,
+		.flags	= IORESOURCE_MEM,
+	},
+	{
+		.start	= IRQ_TC6393_OHCI,
+		.end	= IRQ_TC6393_OHCI,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static int tc6393xb_ohci_enable(struct platform_device *dev)
+{
+	struct tc6393xb *tc6393xb = dev_get_drvdata(dev->dev.parent);
+	unsigned long flags;
+	u16 ccr;
+	u8 fer;
+
+	spin_lock_irqsave(&tc6393xb->lock, flags);
+
+	ccr = tmio_ioread16(tc6393xb->scr + SCR_CCR);
+	ccr |= SCR_CCR_USBCK;
+	tmio_iowrite16(ccr, tc6393xb->scr + SCR_CCR);
+
+	fer = tmio_ioread8(tc6393xb->scr + SCR_FER);
+	fer |= SCR_FER_USBEN;
+	tmio_iowrite8(fer, tc6393xb->scr + SCR_FER);
+
+	spin_unlock_irqrestore(&tc6393xb->lock, flags);
+
+	return 0;
+}
+
+static int tc6393xb_ohci_disable(struct platform_device *dev)
+{
+	struct tc6393xb *tc6393xb = dev_get_drvdata(dev->dev.parent);
+	unsigned long flags;
+	u16 ccr;
+	u8 fer;
+
+	spin_lock_irqsave(&tc6393xb->lock, flags);
+
+	fer = tmio_ioread8(tc6393xb->scr + SCR_FER);
+	fer &= ~SCR_FER_USBEN;
+	tmio_iowrite8(fer, tc6393xb->scr + SCR_FER);
+
+	ccr = tmio_ioread16(tc6393xb->scr + SCR_CCR);
+	ccr &= ~SCR_CCR_USBCK;
+	tmio_iowrite16(ccr, tc6393xb->scr + SCR_CCR);
+
+	spin_unlock_irqrestore(&tc6393xb->lock, flags);
+
+	return 0;
+}
+
 static struct mfd_cell __devinitdata tc6393xb_cells[] = {
 	[TC6393XB_CELL_NAND] = {
 		.name = "tmio-nand",
@@ -182,6 +255,15 @@ static struct mfd_cell __devinitdata tc6393xb_cells[] = {
 		.num_resources = ARRAY_SIZE(tc6393xb_mmc_resources),
 		.resources = tc6393xb_mmc_resources,
 	},
+	[TC6393XB_CELL_OHCI] = {
+		.name = "tmio-ohci",
+		.num_resources = ARRAY_SIZE(tc6393xb_ohci_resources),
+		.resources = tc6393xb_ohci_resources,
+		.enable = tc6393xb_ohci_enable,
+		.suspend = tc6393xb_ohci_disable,
+		.resume = tc6393xb_ohci_enable,
+		.disable = tc6393xb_ohci_disable,
+	},
 };
 
 /*--------------------------------------------------------------------------*/
@@ -470,6 +552,11 @@ static int __devinit tc6393xb_probe(struct platform_device *dev)
 	tc6393xb_cells[TC6393XB_CELL_MMC].data_size =
 		sizeof(tc6393xb_cells[TC6393XB_CELL_MMC]);
 
+	tc6393xb_cells[TC6393XB_CELL_OHCI].platform_data =
+		&tc6393xb_cells[TC6393XB_CELL_OHCI];
+	tc6393xb_cells[TC6393XB_CELL_OHCI].data_size =
+		sizeof(tc6393xb_cells[TC6393XB_CELL_OHCI]);
+
 
 	ret = mfd_add_devices(&dev->dev, dev->id,
 			tc6393xb_cells, ARRAY_SIZE(tc6393xb_cells),
diff --git a/include/linux/mfd/tc6393xb.h b/include/linux/mfd/tc6393xb.h
index 3ce10ae0f397..4437736ebe19 100644
--- a/include/linux/mfd/tc6393xb.h
+++ b/include/linux/mfd/tc6393xb.h
@@ -44,6 +44,7 @@ struct tc6393xb_platform_data {
  */
 #define	IRQ_TC6393_NAND		0
 #define	IRQ_TC6393_MMC		1
+#define	IRQ_TC6393_OHCI		2
 
 #define	TC6393XB_NR_IRQS	8
 
-- 
GitLab


From 9e78cfe53f3c2bc1b37870697c3cde1543fefa8b Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dbaryshkov@gmail.com>
Date: Sat, 4 Oct 2008 00:50:36 +0200
Subject: [PATCH 633/895] mfd: support tmiofb cell on tc6393xb

Add support for tmiofb cell found in tc6393xb chip.

Signed-off-by: Dmitry Baryshkov <dbaryshkov@gmail.com>
Cc: Ian Molton <spyro@f2s.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
---
 drivers/mfd/tc6393xb.c       | 114 +++++++++++++++++++++++++++++++++++
 include/linux/mfd/tc6393xb.h |   8 +++
 2 files changed, 122 insertions(+)

diff --git a/drivers/mfd/tc6393xb.c b/drivers/mfd/tc6393xb.c
index 6197db7d4859..f856e9463a9f 100644
--- a/drivers/mfd/tc6393xb.c
+++ b/drivers/mfd/tc6393xb.c
@@ -114,6 +114,7 @@ enum {
 	TC6393XB_CELL_NAND,
 	TC6393XB_CELL_MMC,
 	TC6393XB_CELL_OHCI,
+	TC6393XB_CELL_FB,
 };
 
 /*--------------------------------------------------------------------------*/
@@ -199,6 +200,29 @@ const static struct resource tc6393xb_ohci_resources[] = {
 	},
 };
 
+static struct resource __devinitdata tc6393xb_fb_resources[] = {
+	{
+		.start	= 0x5000,
+		.end	= 0x51ff,
+		.flags	= IORESOURCE_MEM,
+	},
+	{
+		.start	= 0x0500,
+		.end	= 0x05ff,
+		.flags	= IORESOURCE_MEM,
+	},
+	{
+		.start	= 0x100000,
+		.end	= 0x1fffff,
+		.flags	= IORESOURCE_MEM,
+	},
+	{
+		.start	= IRQ_TC6393_FB,
+		.end	= IRQ_TC6393_FB,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
 static int tc6393xb_ohci_enable(struct platform_device *dev)
 {
 	struct tc6393xb *tc6393xb = dev_get_drvdata(dev->dev.parent);
@@ -243,6 +267,81 @@ static int tc6393xb_ohci_disable(struct platform_device *dev)
 	return 0;
 }
 
+static int tc6393xb_fb_enable(struct platform_device *dev)
+{
+	struct tc6393xb *tc6393xb = dev_get_drvdata(dev->dev.parent);
+	unsigned long flags;
+	u16 ccr;
+
+	spin_lock_irqsave(&tc6393xb->lock, flags);
+
+	ccr = tmio_ioread16(tc6393xb->scr + SCR_CCR);
+	ccr &= ~SCR_CCR_MCLK_MASK;
+	ccr |= SCR_CCR_MCLK_48;
+	tmio_iowrite16(ccr, tc6393xb->scr + SCR_CCR);
+
+	spin_unlock_irqrestore(&tc6393xb->lock, flags);
+
+	return 0;
+}
+
+static int tc6393xb_fb_disable(struct platform_device *dev)
+{
+	struct tc6393xb *tc6393xb = dev_get_drvdata(dev->dev.parent);
+	unsigned long flags;
+	u16 ccr;
+
+	spin_lock_irqsave(&tc6393xb->lock, flags);
+
+	ccr = tmio_ioread16(tc6393xb->scr + SCR_CCR);
+	ccr &= ~SCR_CCR_MCLK_MASK;
+	ccr |= SCR_CCR_MCLK_OFF;
+	tmio_iowrite16(ccr, tc6393xb->scr + SCR_CCR);
+
+	spin_unlock_irqrestore(&tc6393xb->lock, flags);
+
+	return 0;
+}
+
+int tc6393xb_lcd_set_power(struct platform_device *fb, bool on)
+{
+	struct platform_device *dev = to_platform_device(fb->dev.parent);
+	struct tc6393xb *tc6393xb = platform_get_drvdata(dev);
+	u8 fer;
+	unsigned long flags;
+
+	spin_lock_irqsave(&tc6393xb->lock, flags);
+
+	fer = ioread8(tc6393xb->scr + SCR_FER);
+	if (on)
+		fer |= SCR_FER_SLCDEN;
+	else
+		fer &= ~SCR_FER_SLCDEN;
+	iowrite8(fer, tc6393xb->scr + SCR_FER);
+
+	spin_unlock_irqrestore(&tc6393xb->lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL(tc6393xb_lcd_set_power);
+
+int tc6393xb_lcd_mode(struct platform_device *fb,
+					const struct fb_videomode *mode) {
+	struct platform_device *dev = to_platform_device(fb->dev.parent);
+	struct tc6393xb *tc6393xb = platform_get_drvdata(dev);
+	unsigned long flags;
+
+	spin_lock_irqsave(&tc6393xb->lock, flags);
+
+	iowrite16(mode->pixclock, tc6393xb->scr + SCR_PLL1CR + 0);
+	iowrite16(mode->pixclock >> 16, tc6393xb->scr + SCR_PLL1CR + 2);
+
+	spin_unlock_irqrestore(&tc6393xb->lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL(tc6393xb_lcd_mode);
+
 static struct mfd_cell __devinitdata tc6393xb_cells[] = {
 	[TC6393XB_CELL_NAND] = {
 		.name = "tmio-nand",
@@ -264,6 +363,15 @@ static struct mfd_cell __devinitdata tc6393xb_cells[] = {
 		.resume = tc6393xb_ohci_enable,
 		.disable = tc6393xb_ohci_disable,
 	},
+	[TC6393XB_CELL_FB] = {
+		.name = "tmio-fb",
+		.num_resources = ARRAY_SIZE(tc6393xb_fb_resources),
+		.resources = tc6393xb_fb_resources,
+		.enable = tc6393xb_fb_enable,
+		.suspend = tc6393xb_fb_disable,
+		.resume = tc6393xb_fb_enable,
+		.disable = tc6393xb_fb_disable,
+	},
 };
 
 /*--------------------------------------------------------------------------*/
@@ -547,6 +655,7 @@ static int __devinit tc6393xb_probe(struct platform_device *dev)
 		&tc6393xb_cells[TC6393XB_CELL_NAND];
 	tc6393xb_cells[TC6393XB_CELL_NAND].data_size =
 		sizeof(tc6393xb_cells[TC6393XB_CELL_NAND]);
+
 	tc6393xb_cells[TC6393XB_CELL_MMC].platform_data =
 		&tc6393xb_cells[TC6393XB_CELL_MMC];
 	tc6393xb_cells[TC6393XB_CELL_MMC].data_size =
@@ -557,6 +666,11 @@ static int __devinit tc6393xb_probe(struct platform_device *dev)
 	tc6393xb_cells[TC6393XB_CELL_OHCI].data_size =
 		sizeof(tc6393xb_cells[TC6393XB_CELL_OHCI]);
 
+	tc6393xb_cells[TC6393XB_CELL_FB].driver_data = tcpd->fb_data;
+	tc6393xb_cells[TC6393XB_CELL_FB].platform_data =
+		&tc6393xb_cells[TC6393XB_CELL_FB];
+	tc6393xb_cells[TC6393XB_CELL_FB].data_size =
+		sizeof(tc6393xb_cells[TC6393XB_CELL_FB]);
 
 	ret = mfd_add_devices(&dev->dev, dev->id,
 			tc6393xb_cells, ARRAY_SIZE(tc6393xb_cells),
diff --git a/include/linux/mfd/tc6393xb.h b/include/linux/mfd/tc6393xb.h
index 4437736ebe19..626e448205c5 100644
--- a/include/linux/mfd/tc6393xb.h
+++ b/include/linux/mfd/tc6393xb.h
@@ -17,6 +17,8 @@
 #ifndef MFD_TC6393XB_H
 #define MFD_TC6393XB_H
 
+#include <linux/fb.h>
+
 /* Also one should provide the CK3P6MI clock */
 struct tc6393xb_platform_data {
 	u16	scr_pll2cr;	/* PLL2 Control */
@@ -33,18 +35,24 @@ struct tc6393xb_platform_data {
 	void	(*teardown)(struct platform_device *dev);
 
 	struct tmio_nand_data	*nand_data;
+	struct tmio_fb_data	*fb_data;
 
 	unsigned resume_restore : 1; /* make special actions
 					to preserve the state
 					on suspend/resume */
 };
 
+extern int tc6393xb_lcd_mode(struct platform_device *fb,
+			     const struct fb_videomode *mode);
+extern int tc6393xb_lcd_set_power(struct platform_device *fb, bool on);
+
 /*
  * Relative to irq_base
  */
 #define	IRQ_TC6393_NAND		0
 #define	IRQ_TC6393_MMC		1
 #define	IRQ_TC6393_OHCI		2
+#define	IRQ_TC6393_FB		4
 
 #define	TC6393XB_NR_IRQS	8
 
-- 
GitLab


From a603a7fa8717fb778bba91b5a879babf333dc6a3 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Wed, 15 Oct 2008 12:15:39 +0200
Subject: [PATCH 634/895] mfd: TWL4030 core driver

This patch adds the core of the TWL4030 driver, which supports
chips including the TPS65950.  These chips are multi-function; see

  http://focus.ti.com/docs/prod/folders/print/tps65950.html

Public specs are in the works.  For now, the block diagram on
the second page of the datasheet is fairly informative.

There are some known issues with this core code.  Most notably,
the IRQ dispatching needs simplification (to use more of genirq),
generalization (integrating support for secondary IRQ dispatch
as well as primary, and removing the build dependency on OMAP),
and then probably updating to leverage threaded IRQ support
(expected to arrive in mainline "soon").

Once the core is in mainline, drivers for other parts of this
chip can follow its lead and start swimming upstream too.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
---
 drivers/mfd/Kconfig         |   14 +
 drivers/mfd/Makefile        |    2 +
 drivers/mfd/twl4030-core.c  | 1257 +++++++++++++++++++++++++++++++++++
 include/linux/i2c/twl4030.h |  339 ++++++++++
 4 files changed, 1612 insertions(+)
 create mode 100644 drivers/mfd/twl4030-core.c
 create mode 100644 include/linux/i2c/twl4030.h

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 5eff8ad834d6..cccda99328f3 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -59,6 +59,20 @@ config UCB1400_CORE
 	  To compile this driver as a module, choose M here: the
 	  module will be called ucb1400_core.
 
+config TWL4030_CORE
+	bool "Texas Instruments TWL4030/TPS659x0 Support"
+	depends on I2C=y && GENERIC_HARDIRQS && (ARCH_OMAP2 || ARCH_OMAP3)
+	help
+	  Say yes here if you have TWL4030 family chip on your board.
+	  This core driver provides register access and IRQ handling
+	  facilities, and registers devices for the various functions
+	  so that function-specific drivers can bind to them.
+
+	  These multi-function chips are found on many OMAP2 and OMAP3
+	  boards, providing power management, RTC, GPIO, keypad, a
+	  high speed USB OTG transceiver, an audio codec (on most
+	  versions) and many other features.
+
 config MFD_TMIO
 	bool
 	default n
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 759b1fe1c891..68e237b830ad 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -17,6 +17,8 @@ wm8350-objs			:= wm8350-core.o wm8350-regmap.o wm8350-gpio.o
 obj-$(CONFIG_MFD_WM8350)	+= wm8350.o
 obj-$(CONFIG_MFD_WM8350_I2C)	+= wm8350-i2c.o
 
+obj-$(CONFIG_TWL4030_CORE)	+= twl4030-core.o
+
 obj-$(CONFIG_MFD_CORE)		+= mfd-core.o
 
 obj-$(CONFIG_MCP)		+= mcp-core.o
diff --git a/drivers/mfd/twl4030-core.c b/drivers/mfd/twl4030-core.c
new file mode 100644
index 000000000000..4af1624987c5
--- /dev/null
+++ b/drivers/mfd/twl4030-core.c
@@ -0,0 +1,1257 @@
+/*
+ * twl4030_core.c - driver for TWL4030/TPS659x0 PM and audio CODEC devices
+ *
+ * Copyright (C) 2005-2006 Texas Instruments, Inc.
+ *
+ * Modifications to defer interrupt handling to a kernel thread:
+ * Copyright (C) 2006 MontaVista Software, Inc.
+ *
+ * Based on tlv320aic23.c:
+ * Copyright (c) by Kai Svahn <kai.svahn@nokia.com>
+ *
+ * Code cleanup and modifications to IRQ handler.
+ * by syed khasim <x0khasim@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/kernel_stat.h>
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/random.h>
+#include <linux/kthread.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+
+#include <linux/i2c.h>
+#include <linux/i2c/twl4030.h>
+
+
+/*
+ * The TWL4030 "Triton 2" is one of a family of a multi-function "Power
+ * Management and System Companion Device" chips originally designed for
+ * use in OMAP2 and OMAP 3 based systems.  Its control interfaces use I2C,
+ * often at around 3 Mbit/sec, including for interrupt handling.
+ *
+ * This driver core provides genirq support for the interrupts emitted,
+ * by the various modules, and exports register access primitives.
+ *
+ * FIXME this driver currently requires use of the first interrupt line
+ * (and associated registers).
+ */
+
+#define DRIVER_NAME			"twl4030"
+
+#if defined(CONFIG_TWL4030_BCI_BATTERY) || \
+	defined(CONFIG_TWL4030_BCI_BATTERY_MODULE)
+#define twl_has_bci()		true
+#else
+#define twl_has_bci()		false
+#endif
+
+#if defined(CONFIG_KEYBOARD_TWL4030) || defined(CONFIG_KEYBOARD_TWL4030_MODULE)
+#define twl_has_keypad()	true
+#else
+#define twl_has_keypad()	false
+#endif
+
+#if defined(CONFIG_GPIO_TWL4030) || defined(CONFIG_GPIO_TWL4030_MODULE)
+#define twl_has_gpio()	true
+#else
+#define twl_has_gpio()	false
+#endif
+
+#if defined(CONFIG_TWL4030_MADC) || defined(CONFIG_TWL4030_MADC_MODULE)
+#define twl_has_madc()	true
+#else
+#define twl_has_madc()	false
+#endif
+
+#if defined(CONFIG_RTC_DRV_TWL4030) || defined(CONFIG_RTC_DRV_TWL4030_MODULE)
+#define twl_has_rtc()	true
+#else
+#define twl_has_rtc()	false
+#endif
+
+#if defined(CONFIG_TWL4030_USB) || defined(CONFIG_TWL4030_USB_MODULE)
+#define twl_has_usb()	true
+#else
+#define twl_has_usb()	false
+#endif
+
+static inline void activate_irq(int irq)
+{
+#ifdef CONFIG_ARM
+	/* ARM requires an extra step to clear IRQ_NOREQUEST, which it
+	 * sets on behalf of every irq_chip.  Also sets IRQ_NOPROBE.
+	 */
+	set_irq_flags(irq, IRQF_VALID);
+#else
+	/* same effect on other architectures */
+	set_irq_noprobe(irq);
+#endif
+}
+
+/* Primary Interrupt Handler on TWL4030 Registers */
+
+/* Register Definitions */
+
+#define REG_PIH_ISR_P1			(0x1)
+#define REG_PIH_ISR_P2			(0x2)
+#define REG_PIH_SIR			(0x3)
+
+/* Triton Core internal information (BEGIN) */
+
+/* Last - for index max*/
+#define TWL4030_MODULE_LAST		TWL4030_MODULE_SECURED_REG
+
+#define TWL4030_NUM_SLAVES		4
+
+
+/* Base Address defns for twl4030_map[] */
+
+/* subchip/slave 0 - USB ID */
+#define TWL4030_BASEADD_USB		0x0000
+
+/* subchip/slave 1 - AUD ID */
+#define TWL4030_BASEADD_AUDIO_VOICE	0x0000
+#define TWL4030_BASEADD_GPIO		0x0098
+#define TWL4030_BASEADD_INTBR		0x0085
+#define TWL4030_BASEADD_PIH		0x0080
+#define TWL4030_BASEADD_TEST		0x004C
+
+/* subchip/slave 2 - AUX ID */
+#define TWL4030_BASEADD_INTERRUPTS	0x00B9
+#define TWL4030_BASEADD_LED		0x00EE
+#define TWL4030_BASEADD_MADC		0x0000
+#define TWL4030_BASEADD_MAIN_CHARGE	0x0074
+#define TWL4030_BASEADD_PRECHARGE	0x00AA
+#define TWL4030_BASEADD_PWM0		0x00F8
+#define TWL4030_BASEADD_PWM1		0x00FB
+#define TWL4030_BASEADD_PWMA		0x00EF
+#define TWL4030_BASEADD_PWMB		0x00F1
+#define TWL4030_BASEADD_KEYPAD		0x00D2
+
+/* subchip/slave 3 - POWER ID */
+#define TWL4030_BASEADD_BACKUP		0x0014
+#define TWL4030_BASEADD_INT		0x002E
+#define TWL4030_BASEADD_PM_MASTER	0x0036
+#define TWL4030_BASEADD_PM_RECEIVER	0x005B
+#define TWL4030_BASEADD_RTC		0x001C
+#define TWL4030_BASEADD_SECURED_REG	0x0000
+
+/* Triton Core internal information (END) */
+
+
+/* Few power values */
+#define R_CFG_BOOT			0x05
+#define R_PROTECT_KEY			0x0E
+
+/* access control values for R_PROTECT_KEY */
+#define KEY_UNLOCK1			0xce
+#define KEY_UNLOCK2			0xec
+#define KEY_LOCK			0x00
+
+/* some fields in R_CFG_BOOT */
+#define HFCLK_FREQ_19p2_MHZ		(1 << 0)
+#define HFCLK_FREQ_26_MHZ		(2 << 0)
+#define HFCLK_FREQ_38p4_MHZ		(3 << 0)
+#define HIGH_PERF_SQ			(1 << 3)
+
+
+/*----------------------------------------------------------------------*/
+
+/**
+ * struct twl4030_mod_iregs - TWL module IMR/ISR regs to mask/clear at init
+ * @mod_no: TWL4030 module number (e.g., TWL4030_MODULE_GPIO)
+ * @sih_ctrl: address of module SIH_CTRL register
+ * @reg_cnt: number of IMR/ISR regs
+ * @imrs: pointer to array of TWL module interrupt mask register indices
+ * @isrs: pointer to array of TWL module interrupt status register indices
+ *
+ * Ties together TWL4030 modules and lists of IMR/ISR registers to mask/clear
+ * during twl_init_irq().
+ */
+struct twl4030_mod_iregs {
+	const u8 mod_no;
+	const u8 sih_ctrl;
+	const u8 reg_cnt;
+	const u8 *imrs;
+	const u8 *isrs;
+};
+
+/* TWL4030 INT module interrupt mask registers */
+static const u8 __initconst twl4030_int_imr_regs[] = {
+	TWL4030_INT_PWR_IMR1,
+	TWL4030_INT_PWR_IMR2,
+};
+
+/* TWL4030 INT module interrupt status registers */
+static const u8 __initconst twl4030_int_isr_regs[] = {
+	TWL4030_INT_PWR_ISR1,
+	TWL4030_INT_PWR_ISR2,
+};
+
+/* TWL4030 INTERRUPTS module interrupt mask registers */
+static const u8 __initconst twl4030_interrupts_imr_regs[] = {
+	TWL4030_INTERRUPTS_BCIIMR1A,
+	TWL4030_INTERRUPTS_BCIIMR1B,
+	TWL4030_INTERRUPTS_BCIIMR2A,
+	TWL4030_INTERRUPTS_BCIIMR2B,
+};
+
+/* TWL4030 INTERRUPTS module interrupt status registers */
+static const u8 __initconst twl4030_interrupts_isr_regs[] = {
+	TWL4030_INTERRUPTS_BCIISR1A,
+	TWL4030_INTERRUPTS_BCIISR1B,
+	TWL4030_INTERRUPTS_BCIISR2A,
+	TWL4030_INTERRUPTS_BCIISR2B,
+};
+
+/* TWL4030 MADC module interrupt mask registers */
+static const u8 __initconst twl4030_madc_imr_regs[] = {
+	TWL4030_MADC_IMR1,
+	TWL4030_MADC_IMR2,
+};
+
+/* TWL4030 MADC module interrupt status registers */
+static const u8 __initconst twl4030_madc_isr_regs[] = {
+	TWL4030_MADC_ISR1,
+	TWL4030_MADC_ISR2,
+};
+
+/* TWL4030 keypad module interrupt mask registers */
+static const u8 __initconst twl4030_keypad_imr_regs[] = {
+	TWL4030_KEYPAD_KEYP_IMR1,
+	TWL4030_KEYPAD_KEYP_IMR2,
+};
+
+/* TWL4030 keypad module interrupt status registers */
+static const u8 __initconst twl4030_keypad_isr_regs[] = {
+	TWL4030_KEYPAD_KEYP_ISR1,
+	TWL4030_KEYPAD_KEYP_ISR2,
+};
+
+/* TWL4030 GPIO module interrupt mask registers */
+static const u8 __initconst twl4030_gpio_imr_regs[] = {
+	REG_GPIO_IMR1A,
+	REG_GPIO_IMR1B,
+	REG_GPIO_IMR2A,
+	REG_GPIO_IMR2B,
+	REG_GPIO_IMR3A,
+	REG_GPIO_IMR3B,
+};
+
+/* TWL4030 GPIO module interrupt status registers */
+static const u8 __initconst twl4030_gpio_isr_regs[] = {
+	REG_GPIO_ISR1A,
+	REG_GPIO_ISR1B,
+	REG_GPIO_ISR2A,
+	REG_GPIO_ISR2B,
+	REG_GPIO_ISR3A,
+	REG_GPIO_ISR3B,
+};
+
+/* TWL4030 modules that have IMR/ISR registers that must be masked/cleared */
+static const struct twl4030_mod_iregs __initconst twl4030_mod_regs[] = {
+	{
+		.mod_no	  = TWL4030_MODULE_INT,
+		.sih_ctrl = TWL4030_INT_PWR_SIH_CTRL,
+		.reg_cnt  = ARRAY_SIZE(twl4030_int_imr_regs),
+		.imrs	  = twl4030_int_imr_regs,
+		.isrs	  = twl4030_int_isr_regs,
+	},
+	{
+		.mod_no	  = TWL4030_MODULE_INTERRUPTS,
+		.sih_ctrl = TWL4030_INTERRUPTS_BCISIHCTRL,
+		.reg_cnt  = ARRAY_SIZE(twl4030_interrupts_imr_regs),
+		.imrs	  = twl4030_interrupts_imr_regs,
+		.isrs	  = twl4030_interrupts_isr_regs,
+	},
+	{
+		.mod_no	  = TWL4030_MODULE_MADC,
+		.sih_ctrl = TWL4030_MADC_SIH_CTRL,
+		.reg_cnt  = ARRAY_SIZE(twl4030_madc_imr_regs),
+		.imrs	  = twl4030_madc_imr_regs,
+		.isrs	  = twl4030_madc_isr_regs,
+	},
+	{
+		.mod_no	  = TWL4030_MODULE_KEYPAD,
+		.sih_ctrl = TWL4030_KEYPAD_KEYP_SIH_CTRL,
+		.reg_cnt  = ARRAY_SIZE(twl4030_keypad_imr_regs),
+		.imrs	  = twl4030_keypad_imr_regs,
+		.isrs	  = twl4030_keypad_isr_regs,
+	},
+	{
+		.mod_no	  = TWL4030_MODULE_GPIO,
+		.sih_ctrl = REG_GPIO_SIH_CTRL,
+		.reg_cnt  = ARRAY_SIZE(twl4030_gpio_imr_regs),
+		.imrs	  = twl4030_gpio_imr_regs,
+		.isrs	  = twl4030_gpio_isr_regs,
+	},
+};
+
+/*----------------------------------------------------------------*/
+
+/* is driver active, bound to a chip? */
+static bool inuse;
+
+/* Structure for each TWL4030 Slave */
+struct twl4030_client {
+	struct i2c_client *client;
+	u8 address;
+
+	/* max numb of i2c_msg required is for read =2 */
+	struct i2c_msg xfer_msg[2];
+
+	/* To lock access to xfer_msg */
+	struct mutex xfer_lock;
+};
+
+static struct twl4030_client twl4030_modules[TWL4030_NUM_SLAVES];
+
+
+/* mapping the module id to slave id and base address */
+struct twl4030mapping {
+	unsigned char sid;	/* Slave ID */
+	unsigned char base;	/* base address */
+};
+
+static struct twl4030mapping twl4030_map[TWL4030_MODULE_LAST + 1] = {
+	/*
+	 * NOTE:  don't change this table without updating the
+	 * <linux/i2c/twl4030.h> defines for TWL4030_MODULE_*
+	 * so they continue to match the order in this table.
+	 */
+
+	{ 0, TWL4030_BASEADD_USB },
+
+	{ 1, TWL4030_BASEADD_AUDIO_VOICE },
+	{ 1, TWL4030_BASEADD_GPIO },
+	{ 1, TWL4030_BASEADD_INTBR },
+	{ 1, TWL4030_BASEADD_PIH },
+	{ 1, TWL4030_BASEADD_TEST },
+
+	{ 2, TWL4030_BASEADD_KEYPAD },
+	{ 2, TWL4030_BASEADD_MADC },
+	{ 2, TWL4030_BASEADD_INTERRUPTS },
+	{ 2, TWL4030_BASEADD_LED },
+	{ 2, TWL4030_BASEADD_MAIN_CHARGE },
+	{ 2, TWL4030_BASEADD_PRECHARGE },
+	{ 2, TWL4030_BASEADD_PWM0 },
+	{ 2, TWL4030_BASEADD_PWM1 },
+	{ 2, TWL4030_BASEADD_PWMA },
+	{ 2, TWL4030_BASEADD_PWMB },
+
+	{ 3, TWL4030_BASEADD_BACKUP },
+	{ 3, TWL4030_BASEADD_INT },
+	{ 3, TWL4030_BASEADD_PM_MASTER },
+	{ 3, TWL4030_BASEADD_PM_RECEIVER },
+	{ 3, TWL4030_BASEADD_RTC },
+	{ 3, TWL4030_BASEADD_SECURED_REG },
+};
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * TWL4030 doesn't have PIH mask, hence dummy function for mask
+ * and unmask of the (eight) interrupts reported at that level ...
+ * masking is only available from SIH (secondary) modules.
+ */
+
+static void twl4030_i2c_ackirq(unsigned int irq)
+{
+}
+
+static void twl4030_i2c_disableint(unsigned int irq)
+{
+}
+
+static void twl4030_i2c_enableint(unsigned int irq)
+{
+}
+
+static struct irq_chip twl4030_irq_chip = {
+	.name	= "twl4030",
+	.ack	= twl4030_i2c_ackirq,
+	.mask	= twl4030_i2c_disableint,
+	.unmask	= twl4030_i2c_enableint,
+};
+
+/*----------------------------------------------------------------------*/
+
+/* Exported Functions */
+
+/**
+ * twl4030_i2c_write - Writes a n bit register in TWL4030
+ * @mod_no: module number
+ * @value: an array of num_bytes+1 containing data to write
+ * @reg: register address (just offset will do)
+ * @num_bytes: number of bytes to transfer
+ *
+ * IMPORTANT: for 'value' parameter: Allocate value num_bytes+1 and
+ * valid data starts at Offset 1.
+ *
+ * Returns the result of operation - 0 is success
+ */
+int twl4030_i2c_write(u8 mod_no, u8 *value, u8 reg, u8 num_bytes)
+{
+	int ret;
+	int sid;
+	struct twl4030_client *twl;
+	struct i2c_msg *msg;
+
+	if (unlikely(mod_no > TWL4030_MODULE_LAST)) {
+		pr_err("%s: invalid module number %d\n", DRIVER_NAME, mod_no);
+		return -EPERM;
+	}
+	sid = twl4030_map[mod_no].sid;
+	twl = &twl4030_modules[sid];
+
+	if (unlikely(!inuse)) {
+		pr_err("%s: client %d is not initialized\n", DRIVER_NAME, sid);
+		return -EPERM;
+	}
+	mutex_lock(&twl->xfer_lock);
+	/*
+	 * [MSG1]: fill the register address data
+	 * fill the data Tx buffer
+	 */
+	msg = &twl->xfer_msg[0];
+	msg->addr = twl->address;
+	msg->len = num_bytes + 1;
+	msg->flags = 0;
+	msg->buf = value;
+	/* over write the first byte of buffer with the register address */
+	*value = twl4030_map[mod_no].base + reg;
+	ret = i2c_transfer(twl->client->adapter, twl->xfer_msg, 1);
+	mutex_unlock(&twl->xfer_lock);
+
+	/* i2cTransfer returns num messages.translate it pls.. */
+	if (ret >= 0)
+		ret = 0;
+	return ret;
+}
+EXPORT_SYMBOL(twl4030_i2c_write);
+
+/**
+ * twl4030_i2c_read - Reads a n bit register in TWL4030
+ * @mod_no: module number
+ * @value: an array of num_bytes containing data to be read
+ * @reg: register address (just offset will do)
+ * @num_bytes: number of bytes to transfer
+ *
+ * Returns result of operation - num_bytes is success else failure.
+ */
+int twl4030_i2c_read(u8 mod_no, u8 *value, u8 reg, u8 num_bytes)
+{
+	int ret;
+	u8 val;
+	int sid;
+	struct twl4030_client *twl;
+	struct i2c_msg *msg;
+
+	if (unlikely(mod_no > TWL4030_MODULE_LAST)) {
+		pr_err("%s: invalid module number %d\n", DRIVER_NAME, mod_no);
+		return -EPERM;
+	}
+	sid = twl4030_map[mod_no].sid;
+	twl = &twl4030_modules[sid];
+
+	if (unlikely(!inuse)) {
+		pr_err("%s: client %d is not initialized\n", DRIVER_NAME, sid);
+		return -EPERM;
+	}
+	mutex_lock(&twl->xfer_lock);
+	/* [MSG1] fill the register address data */
+	msg = &twl->xfer_msg[0];
+	msg->addr = twl->address;
+	msg->len = 1;
+	msg->flags = 0;	/* Read the register value */
+	val = twl4030_map[mod_no].base + reg;
+	msg->buf = &val;
+	/* [MSG2] fill the data rx buffer */
+	msg = &twl->xfer_msg[1];
+	msg->addr = twl->address;
+	msg->flags = I2C_M_RD;	/* Read the register value */
+	msg->len = num_bytes;	/* only n bytes */
+	msg->buf = value;
+	ret = i2c_transfer(twl->client->adapter, twl->xfer_msg, 2);
+	mutex_unlock(&twl->xfer_lock);
+
+	/* i2cTransfer returns num messages.translate it pls.. */
+	if (ret >= 0)
+		ret = 0;
+	return ret;
+}
+EXPORT_SYMBOL(twl4030_i2c_read);
+
+/**
+ * twl4030_i2c_write_u8 - Writes a 8 bit register in TWL4030
+ * @mod_no: module number
+ * @value: the value to be written 8 bit
+ * @reg: register address (just offset will do)
+ *
+ * Returns result of operation - 0 is success
+ */
+int twl4030_i2c_write_u8(u8 mod_no, u8 value, u8 reg)
+{
+
+	/* 2 bytes offset 1 contains the data offset 0 is used by i2c_write */
+	u8 temp_buffer[2] = { 0 };
+	/* offset 1 contains the data */
+	temp_buffer[1] = value;
+	return twl4030_i2c_write(mod_no, temp_buffer, reg, 1);
+}
+EXPORT_SYMBOL(twl4030_i2c_write_u8);
+
+/**
+ * twl4030_i2c_read_u8 - Reads a 8 bit register from TWL4030
+ * @mod_no: module number
+ * @value: the value read 8 bit
+ * @reg: register address (just offset will do)
+ *
+ * Returns result of operation - 0 is success
+ */
+int twl4030_i2c_read_u8(u8 mod_no, u8 *value, u8 reg)
+{
+	return twl4030_i2c_read(mod_no, value, reg, 1);
+}
+EXPORT_SYMBOL(twl4030_i2c_read_u8);
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * do_twl4030_module_irq() is the desc->handle method for each of the twl4030
+ * module interrupts that doesn't chain to another irq_chip (GPIO, power, etc).
+ * It executes in kernel thread context.  On entry, cpu interrupts are disabled.
+ */
+static void do_twl4030_module_irq(unsigned int irq, irq_desc_t *desc)
+{
+	struct irqaction *action;
+	const unsigned int cpu = smp_processor_id();
+
+	/*
+	 * Earlier this was desc->triggered = 1;
+	 */
+	desc->status |= IRQ_LEVEL;
+
+	/*
+	 * The desc->handle method would normally call the desc->chip->ack
+	 * method here, but we won't bother since our ack method is NULL.
+	 */
+
+	if (!desc->depth) {
+		kstat_cpu(cpu).irqs[irq]++;
+
+		action = desc->action;
+		if (action) {
+			int ret;
+			int status = 0;
+			int retval = 0;
+
+			local_irq_enable();
+
+			do {
+				/* Call the ISR with cpu interrupts enabled */
+				ret = action->handler(irq, action->dev_id);
+				if (ret == IRQ_HANDLED)
+					status |= action->flags;
+				retval |= ret;
+				action = action->next;
+			} while (action);
+
+			if (status & IRQF_SAMPLE_RANDOM)
+				add_interrupt_randomness(irq);
+
+			local_irq_disable();
+
+			if (retval != IRQ_HANDLED)
+				printk(KERN_ERR "ISR for TWL4030 module"
+					" irq %d can't handle interrupt\n",
+					irq);
+
+			/*
+			 * Here is where we should call the unmask method, but
+			 * again we won't bother since it is NULL.
+			 */
+		} else
+			printk(KERN_CRIT "TWL4030 module irq %d has no ISR"
+					" but can't be masked!\n", irq);
+	} else
+		printk(KERN_CRIT "TWL4030 module irq %d is disabled but can't"
+				" be masked!\n", irq);
+}
+
+static unsigned twl4030_irq_base;
+
+static struct completion irq_event;
+
+/*
+ * This thread processes interrupts reported by the Primary Interrupt Handler.
+ */
+static int twl4030_irq_thread(void *data)
+{
+	long irq = (long)data;
+	irq_desc_t *desc = irq_desc + irq;
+	static unsigned i2c_errors;
+	const static unsigned max_i2c_errors = 100;
+
+	daemonize("twl4030-irq");
+	current->flags |= PF_NOFREEZE;
+
+	while (!kthread_should_stop()) {
+		int ret;
+		int module_irq;
+		u8 pih_isr;
+
+		/* Wait for IRQ, then read PIH irq status (also blocking) */
+		wait_for_completion_interruptible(&irq_event);
+
+		ret = twl4030_i2c_read_u8(TWL4030_MODULE_PIH, &pih_isr,
+					  REG_PIH_ISR_P1);
+		if (ret) {
+			pr_warning("%s: I2C error %d reading PIH ISR\n",
+					DRIVER_NAME, ret);
+			if (++i2c_errors >= max_i2c_errors) {
+				printk(KERN_ERR "Maximum I2C error count"
+						" exceeded.  Terminating %s.\n",
+						__func__);
+				break;
+			}
+			complete(&irq_event);
+			continue;
+		}
+
+		/* these handlers deal with the relevant SIH irq status */
+		local_irq_disable();
+		for (module_irq = twl4030_irq_base;
+				pih_isr;
+				pih_isr >>= 1, module_irq++) {
+			if (pih_isr & 0x1) {
+				irq_desc_t *d = irq_desc + module_irq;
+
+				d->handle_irq(module_irq, d);
+			}
+		}
+		local_irq_enable();
+
+		desc->chip->unmask(irq);
+	}
+
+	return 0;
+}
+
+/*
+ * do_twl4030_irq() is the desc->handle method for the twl4030 interrupt.
+ * This is a chained interrupt, so there is no desc->action method for it.
+ * Now we need to query the interrupt controller in the twl4030 to determine
+ * which module is generating the interrupt request.  However, we can't do i2c
+ * transactions in interrupt context, so we must defer that work to a kernel
+ * thread.  All we do here is acknowledge and mask the interrupt and wakeup
+ * the kernel thread.
+ */
+static void do_twl4030_irq(unsigned int irq, irq_desc_t *desc)
+{
+	const unsigned int cpu = smp_processor_id();
+
+	/*
+	 * Earlier this was desc->triggered = 1;
+	 */
+	desc->status |= IRQ_LEVEL;
+
+	/*
+	 * Acknowledge, clear _AND_ disable the interrupt.
+	 */
+	desc->chip->ack(irq);
+
+	if (!desc->depth) {
+		kstat_cpu(cpu).irqs[irq]++;
+
+		complete(&irq_event);
+	}
+}
+
+static struct task_struct * __init start_twl4030_irq_thread(long irq)
+{
+	struct task_struct *thread;
+
+	init_completion(&irq_event);
+	thread = kthread_run(twl4030_irq_thread, (void *)irq,
+			     "twl4030 irq %ld", irq);
+	if (!thread)
+		pr_err("%s: could not create twl4030 irq %ld thread!\n",
+		       DRIVER_NAME, irq);
+
+	return thread;
+}
+
+/*----------------------------------------------------------------------*/
+
+static int add_children(struct twl4030_platform_data *pdata)
+{
+	struct platform_device	*pdev = NULL;
+	struct twl4030_client	*twl = NULL;
+	int			status = 0;
+
+	if (twl_has_bci() && pdata->bci) {
+		twl = &twl4030_modules[3];
+
+		pdev = platform_device_alloc("twl4030_bci", -1);
+		if (!pdev) {
+			pr_debug("%s: can't alloc bci dev\n", DRIVER_NAME);
+			status = -ENOMEM;
+			goto err;
+		}
+
+		if (status == 0) {
+			pdev->dev.parent = &twl->client->dev;
+			status = platform_device_add_data(pdev, pdata->bci,
+					sizeof(*pdata->bci));
+			if (status < 0) {
+				dev_dbg(&twl->client->dev,
+					"can't add bci data, %d\n",
+					status);
+				goto err;
+			}
+		}
+
+		if (status == 0) {
+			struct resource r = {
+				.start = TWL4030_PWRIRQ_CHG_PRES,
+				.flags = IORESOURCE_IRQ,
+			};
+
+			status = platform_device_add_resources(pdev, &r, 1);
+		}
+
+		if (status == 0)
+			status = platform_device_add(pdev);
+
+		if (status < 0) {
+			platform_device_put(pdev);
+			dev_dbg(&twl->client->dev,
+					"can't create bci dev, %d\n",
+					status);
+			goto err;
+		}
+	}
+
+	if (twl_has_gpio() && pdata->gpio) {
+		twl = &twl4030_modules[1];
+
+		pdev = platform_device_alloc("twl4030_gpio", -1);
+		if (!pdev) {
+			pr_debug("%s: can't alloc gpio dev\n", DRIVER_NAME);
+			status = -ENOMEM;
+			goto err;
+		}
+
+		/* more driver model init */
+		if (status == 0) {
+			pdev->dev.parent = &twl->client->dev;
+			/* device_init_wakeup(&pdev->dev, 1); */
+
+			status = platform_device_add_data(pdev, pdata->gpio,
+					sizeof(*pdata->gpio));
+			if (status < 0) {
+				dev_dbg(&twl->client->dev,
+					"can't add gpio data, %d\n",
+					status);
+				goto err;
+			}
+		}
+
+		/* GPIO module IRQ */
+		if (status == 0) {
+			struct resource	r = {
+				.start = pdata->irq_base + 0,
+				.flags = IORESOURCE_IRQ,
+			};
+
+			status = platform_device_add_resources(pdev, &r, 1);
+		}
+
+		if (status == 0)
+			status = platform_device_add(pdev);
+
+		if (status < 0) {
+			platform_device_put(pdev);
+			dev_dbg(&twl->client->dev,
+					"can't create gpio dev, %d\n",
+					status);
+			goto err;
+		}
+	}
+
+	if (twl_has_keypad() && pdata->keypad) {
+		pdev = platform_device_alloc("twl4030_keypad", -1);
+		if (pdev) {
+			twl = &twl4030_modules[2];
+			pdev->dev.parent = &twl->client->dev;
+			device_init_wakeup(&pdev->dev, 1);
+			status = platform_device_add_data(pdev, pdata->keypad,
+					sizeof(*pdata->keypad));
+			if (status < 0) {
+				dev_dbg(&twl->client->dev,
+					"can't add keypad data, %d\n",
+					status);
+				platform_device_put(pdev);
+				goto err;
+			}
+			status = platform_device_add(pdev);
+			if (status < 0) {
+				platform_device_put(pdev);
+				dev_dbg(&twl->client->dev,
+						"can't create keypad dev, %d\n",
+						status);
+				goto err;
+			}
+		} else {
+			pr_debug("%s: can't alloc keypad dev\n", DRIVER_NAME);
+			status = -ENOMEM;
+			goto err;
+		}
+	}
+
+	if (twl_has_madc() && pdata->madc) {
+		pdev = platform_device_alloc("twl4030_madc", -1);
+		if (pdev) {
+			twl = &twl4030_modules[2];
+			pdev->dev.parent = &twl->client->dev;
+			device_init_wakeup(&pdev->dev, 1);
+			status = platform_device_add_data(pdev, pdata->madc,
+					sizeof(*pdata->madc));
+			if (status < 0) {
+				platform_device_put(pdev);
+				dev_dbg(&twl->client->dev,
+					"can't add madc data, %d\n",
+					status);
+				goto err;
+			}
+			status = platform_device_add(pdev);
+			if (status < 0) {
+				platform_device_put(pdev);
+				dev_dbg(&twl->client->dev,
+						"can't create madc dev, %d\n",
+						status);
+				goto err;
+			}
+		} else {
+			pr_debug("%s: can't alloc madc dev\n", DRIVER_NAME);
+			status = -ENOMEM;
+			goto err;
+		}
+	}
+
+	if (twl_has_rtc()) {
+		twl = &twl4030_modules[3];
+
+		pdev = platform_device_alloc("twl4030_rtc", -1);
+		if (!pdev) {
+			pr_debug("%s: can't alloc rtc dev\n", DRIVER_NAME);
+			status = -ENOMEM;
+		} else {
+			pdev->dev.parent = &twl->client->dev;
+			device_init_wakeup(&pdev->dev, 1);
+		}
+
+		/*
+		 * REVISIT platform_data here currently might use of
+		 * "msecure" line ... but for now we just expect board
+		 * setup to tell the chip "we are secure" at all times.
+		 * Eventually, Linux might become more aware of such
+		 * HW security concerns, and "least privilege".
+		 */
+
+		/* RTC module IRQ */
+		if (status == 0) {
+			struct resource	r = {
+				/* REVISIT don't hard-wire this stuff */
+				.start = TWL4030_PWRIRQ_RTC,
+				.flags = IORESOURCE_IRQ,
+			};
+
+			status = platform_device_add_resources(pdev, &r, 1);
+		}
+
+		if (status == 0)
+			status = platform_device_add(pdev);
+
+		if (status < 0) {
+			platform_device_put(pdev);
+			dev_dbg(&twl->client->dev,
+					"can't create rtc dev, %d\n",
+					status);
+			goto err;
+		}
+	}
+
+	if (twl_has_usb() && pdata->usb) {
+		twl = &twl4030_modules[0];
+
+		pdev = platform_device_alloc("twl4030_usb", -1);
+		if (!pdev) {
+			pr_debug("%s: can't alloc usb dev\n", DRIVER_NAME);
+			status = -ENOMEM;
+			goto err;
+		}
+
+		if (status == 0) {
+			pdev->dev.parent = &twl->client->dev;
+			device_init_wakeup(&pdev->dev, 1);
+			status = platform_device_add_data(pdev, pdata->usb,
+					sizeof(*pdata->usb));
+			if (status < 0) {
+				platform_device_put(pdev);
+				dev_dbg(&twl->client->dev,
+					"can't add usb data, %d\n",
+					status);
+				goto err;
+			}
+		}
+
+		if (status == 0) {
+			struct resource r = {
+				.start = TWL4030_PWRIRQ_USB_PRES,
+				.flags = IORESOURCE_IRQ,
+			};
+
+			status = platform_device_add_resources(pdev, &r, 1);
+		}
+
+		if (status == 0)
+			status = platform_device_add(pdev);
+
+		if (status < 0) {
+			platform_device_put(pdev);
+			dev_dbg(&twl->client->dev,
+					"can't create usb dev, %d\n",
+					status);
+		}
+	}
+
+err:
+	if (status)
+		pr_err("failed to add twl4030's children (status %d)\n", status);
+	return status;
+}
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * These three functions initialize the on-chip clock framework,
+ * letting it generate the right frequencies for USB, MADC, and
+ * other purposes.
+ */
+static inline int __init protect_pm_master(void)
+{
+	int e = 0;
+
+	e = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, KEY_LOCK,
+			R_PROTECT_KEY);
+	return e;
+}
+
+static inline int __init unprotect_pm_master(void)
+{
+	int e = 0;
+
+	e |= twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, KEY_UNLOCK1,
+			R_PROTECT_KEY);
+	e |= twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, KEY_UNLOCK2,
+			R_PROTECT_KEY);
+	return e;
+}
+
+static void __init clocks_init(void)
+{
+	int e = 0;
+	struct clk *osc;
+	u32 rate;
+	u8 ctrl = HFCLK_FREQ_26_MHZ;
+
+#if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3)
+	if (cpu_is_omap2430())
+		osc = clk_get(NULL, "osc_ck");
+	else
+		osc = clk_get(NULL, "osc_sys_ck");
+#else
+	/* REVISIT for non-OMAP systems, pass the clock rate from
+	 * board init code, using platform_data.
+	 */
+	osc = ERR_PTR(-EIO);
+#endif
+	if (IS_ERR(osc)) {
+		printk(KERN_WARNING "Skipping twl4030 internal clock init and "
+				"using bootloader value (unknown osc rate)\n");
+		return;
+	}
+
+	rate = clk_get_rate(osc);
+	clk_put(osc);
+
+	switch (rate) {
+	case 19200000:
+		ctrl = HFCLK_FREQ_19p2_MHZ;
+		break;
+	case 26000000:
+		ctrl = HFCLK_FREQ_26_MHZ;
+		break;
+	case 38400000:
+		ctrl = HFCLK_FREQ_38p4_MHZ;
+		break;
+	}
+
+	ctrl |= HIGH_PERF_SQ;
+	e |= unprotect_pm_master();
+	/* effect->MADC+USB ck en */
+	e |= twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, ctrl, R_CFG_BOOT);
+	e |= protect_pm_master();
+
+	if (e < 0)
+		pr_err("%s: clock init err [%d]\n", DRIVER_NAME, e);
+}
+
+/*----------------------------------------------------------------------*/
+
+/**
+ * twl4030_i2c_clear_isr - clear TWL4030 SIH ISR regs via read + write
+ * @mod_no: TWL4030 module number
+ * @reg: register index to clear
+ * @cor: value of the <module>_SIH_CTRL.COR bit (1 or 0)
+ *
+ * Either reads (cor == 1) or writes (cor == 0) to a TWL4030 interrupt
+ * status register to ensure that any prior interrupts are cleared.
+ * Returns the status from the I2C read operation.
+ */
+static int __init twl4030_i2c_clear_isr(u8 mod_no, u8 reg, u8 cor)
+{
+	u8 tmp;
+
+	return (cor) ? twl4030_i2c_read_u8(mod_no, &tmp, reg) :
+		twl4030_i2c_write_u8(mod_no, 0xff, reg);
+}
+
+/**
+ * twl4030_read_cor_bit - are TWL module ISRs cleared by reads or writes?
+ * @mod_no: TWL4030 module number
+ * @reg: register index to clear
+ *
+ * Returns 1 if the TWL4030 SIH interrupt status registers (ISRs) for
+ * the specified TWL module are cleared by reads, or 0 if cleared by
+ * writes.
+ */
+static int twl4030_read_cor_bit(u8 mod_no, u8 reg)
+{
+	u8 tmp = 0;
+
+	WARN_ON(twl4030_i2c_read_u8(mod_no, &tmp, reg) < 0);
+
+	tmp &= TWL4030_SIH_CTRL_COR_MASK;
+	tmp >>= __ffs(TWL4030_SIH_CTRL_COR_MASK);
+
+	return tmp;
+}
+
+/**
+ * twl4030_mask_clear_intrs - mask and clear all TWL4030 interrupts
+ * @t: pointer to twl4030_mod_iregs array
+ * @t_sz: ARRAY_SIZE(t) (starting at 1)
+ *
+ * Mask all TWL4030 interrupt mask registers (IMRs) and clear all
+ * interrupt status registers (ISRs).  No return value, but will WARN if
+ * any I2C operations fail.
+ */
+static void __init twl4030_mask_clear_intrs(const struct twl4030_mod_iregs *t,
+					    const u8 t_sz)
+{
+	int i, j;
+
+	/*
+	 * N.B. - further efficiency is possible here.  Eight I2C
+	 * operations on BCI and GPIO modules are avoidable if I2C
+	 * burst read/write transactions were implemented.  Would
+	 * probably save about 1ms of boot time and a small amount of
+	 * power.
+	 */
+	for (i = 0; i < t_sz; i++) {
+		const struct twl4030_mod_iregs tmr = t[i];
+		int cor;
+
+		/* Are ISRs cleared by reads or writes? */
+		cor = twl4030_read_cor_bit(tmr.mod_no, tmr.sih_ctrl);
+
+		for (j = 0; j < tmr.reg_cnt; j++) {
+
+			/* Mask interrupts at the TWL4030 */
+			WARN_ON(twl4030_i2c_write_u8(tmr.mod_no, 0xff,
+						     tmr.imrs[j]) < 0);
+
+			/* Clear TWL4030 ISRs */
+			WARN_ON(twl4030_i2c_clear_isr(tmr.mod_no,
+						      tmr.isrs[j], cor) < 0);
+		}
+	}
+}
+
+
+static void twl_init_irq(int irq_num, unsigned irq_base, unsigned irq_end)
+{
+	int	i;
+
+	/*
+	 * Mask and clear all TWL4030 interrupts since initially we do
+	 * not have any TWL4030 module interrupt handlers present
+	 */
+	twl4030_mask_clear_intrs(twl4030_mod_regs,
+				 ARRAY_SIZE(twl4030_mod_regs));
+
+	twl4030_irq_base = irq_base;
+
+	/* install an irq handler for each of the PIH modules */
+	for (i = irq_base; i < irq_end; i++) {
+		set_irq_chip_and_handler(i, &twl4030_irq_chip,
+				do_twl4030_module_irq);
+		activate_irq(i);
+	}
+
+	/* install an irq handler to demultiplex the TWL4030 interrupt */
+	set_irq_data(irq_num, start_twl4030_irq_thread(irq_num));
+	set_irq_chained_handler(irq_num, do_twl4030_irq);
+}
+
+/*----------------------------------------------------------------------*/
+
+static int twl4030_remove(struct i2c_client *client)
+{
+	unsigned i;
+
+	/* FIXME undo twl_init_irq() */
+	if (twl4030_irq_base) {
+		dev_err(&client->dev, "can't yet clean up IRQs?\n");
+		return -ENOSYS;
+	}
+
+	for (i = 0; i < TWL4030_NUM_SLAVES; i++) {
+		struct twl4030_client	*twl = &twl4030_modules[i];
+
+		if (twl->client && twl->client != client)
+			i2c_unregister_device(twl->client);
+		twl4030_modules[i].client = NULL;
+	}
+	inuse = false;
+	return 0;
+}
+
+/* NOTE:  this driver only handles a single twl4030/tps659x0 chip */
+static int
+twl4030_probe(struct i2c_client *client, const struct i2c_device_id *id)
+{
+	int				status;
+	unsigned			i;
+	struct twl4030_platform_data	*pdata = client->dev.platform_data;
+
+	if (!pdata) {
+		dev_dbg(&client->dev, "no platform data?\n");
+		return -EINVAL;
+	}
+
+	if (i2c_check_functionality(client->adapter, I2C_FUNC_I2C) == 0) {
+		dev_dbg(&client->dev, "can't talk I2C?\n");
+		return -EIO;
+	}
+
+	if (inuse || twl4030_irq_base) {
+		dev_dbg(&client->dev, "driver is already in use\n");
+		return -EBUSY;
+	}
+
+	for (i = 0; i < TWL4030_NUM_SLAVES; i++) {
+		struct twl4030_client	*twl = &twl4030_modules[i];
+
+		twl->address = client->addr + i;
+		if (i == 0)
+			twl->client = client;
+		else {
+			twl->client = i2c_new_dummy(client->adapter,
+					twl->address);
+			if (!twl->client) {
+				dev_err(&twl->client->dev,
+					"can't attach client %d\n", i);
+				status = -ENOMEM;
+				goto fail;
+			}
+			strlcpy(twl->client->name, id->name,
+					sizeof(twl->client->name));
+		}
+		mutex_init(&twl->xfer_lock);
+	}
+	inuse = true;
+
+	/* setup clock framework */
+	clocks_init();
+
+	/* Maybe init the T2 Interrupt subsystem */
+	if (client->irq
+			&& pdata->irq_base
+			&& pdata->irq_end > pdata->irq_base) {
+		twl_init_irq(client->irq, pdata->irq_base, pdata->irq_end);
+		dev_info(&client->dev, "IRQ %d chains IRQs %d..%d\n",
+				client->irq, pdata->irq_base, pdata->irq_end - 1);
+	}
+
+	status = add_children(pdata);
+fail:
+	if (status < 0)
+		twl4030_remove(client);
+	return status;
+}
+
+static const struct i2c_device_id twl4030_ids[] = {
+	{ "twl4030", 0 },	/* "Triton 2" */
+	{ "tps65950", 0 },	/* catalog version of twl4030 */
+	{ "tps65930", 0 },	/* fewer LDOs and DACs; no charger */
+	{ "tps65920", 0 },	/* fewer LDOs; no codec or charger */
+	{ "twl5030", 0 },	/* T2 updated */
+	{ /* end of list */ },
+};
+MODULE_DEVICE_TABLE(i2c, twl4030_ids);
+
+/* One Client Driver , 4 Clients */
+static struct i2c_driver twl4030_driver = {
+	.driver.name	= DRIVER_NAME,
+	.id_table	= twl4030_ids,
+	.probe		= twl4030_probe,
+	.remove		= twl4030_remove,
+};
+
+static int __init twl4030_init(void)
+{
+	return i2c_add_driver(&twl4030_driver);
+}
+subsys_initcall(twl4030_init);
+
+static void __exit twl4030_exit(void)
+{
+	i2c_del_driver(&twl4030_driver);
+}
+module_exit(twl4030_exit);
+
+MODULE_AUTHOR("Texas Instruments, Inc.");
+MODULE_DESCRIPTION("I2C Core interface for TWL4030");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h
new file mode 100644
index 000000000000..cdb453162a97
--- /dev/null
+++ b/include/linux/i2c/twl4030.h
@@ -0,0 +1,339 @@
+/*
+ * twl4030.h - header for TWL4030 PM and audio CODEC device
+ *
+ * Copyright (C) 2005-2006 Texas Instruments, Inc.
+ *
+ * Based on tlv320aic23.c:
+ * Copyright (c) by Kai Svahn <kai.svahn@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ */
+
+#ifndef __TWL4030_H_
+#define __TWL4030_H_
+
+/*
+ * Using the twl4030 core we address registers using a pair
+ *	{ module id, relative register offset }
+ * which that core then maps to the relevant
+ *	{ i2c slave, absolute register address }
+ *
+ * The module IDs are meaningful only to the twl4030 core code,
+ * which uses them as array indices to look up the first register
+ * address each module uses within a given i2c slave.
+ */
+
+/* Slave 0 (i2c address 0x48) */
+#define TWL4030_MODULE_USB		0x00
+
+/* Slave 1 (i2c address 0x49) */
+#define TWL4030_MODULE_AUDIO_VOICE	0x01
+#define TWL4030_MODULE_GPIO		0x02
+#define TWL4030_MODULE_INTBR		0x03
+#define TWL4030_MODULE_PIH		0x04
+#define TWL4030_MODULE_TEST		0x05
+
+/* Slave 2 (i2c address 0x4a) */
+#define TWL4030_MODULE_KEYPAD		0x06
+#define TWL4030_MODULE_MADC		0x07
+#define TWL4030_MODULE_INTERRUPTS	0x08
+#define TWL4030_MODULE_LED		0x09
+#define TWL4030_MODULE_MAIN_CHARGE	0x0A
+#define TWL4030_MODULE_PRECHARGE	0x0B
+#define TWL4030_MODULE_PWM0		0x0C
+#define TWL4030_MODULE_PWM1		0x0D
+#define TWL4030_MODULE_PWMA		0x0E
+#define TWL4030_MODULE_PWMB		0x0F
+
+/* Slave 3 (i2c address 0x4b) */
+#define TWL4030_MODULE_BACKUP		0x10
+#define TWL4030_MODULE_INT		0x11
+#define TWL4030_MODULE_PM_MASTER	0x12
+#define TWL4030_MODULE_PM_RECEIVER	0x13
+#define TWL4030_MODULE_RTC		0x14
+#define TWL4030_MODULE_SECURED_REG	0x15
+
+/*
+ * Read and write single 8-bit registers
+ */
+int twl4030_i2c_write_u8(u8 mod_no, u8 val, u8 reg);
+int twl4030_i2c_read_u8(u8 mod_no, u8 *val, u8 reg);
+
+/*
+ * Read and write several 8-bit registers at once.
+ *
+ * IMPORTANT:  For twl4030_i2c_write(), allocate num_bytes + 1
+ * for the value, and populate your data starting at offset 1.
+ */
+int twl4030_i2c_write(u8 mod_no, u8 *value, u8 reg, u8 num_bytes);
+int twl4030_i2c_read(u8 mod_no, u8 *value, u8 reg, u8 num_bytes);
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * NOTE:  at up to 1024 registers, this is a big chip.
+ *
+ * Avoid putting register declarations in this file, instead of into
+ * a driver-private file, unless some of the registers in a block
+ * need to be shared with other drivers.  One example is blocks that
+ * have Secondary IRQ Handler (SIH) registers.
+ */
+
+#define TWL4030_SIH_CTRL_EXCLEN_MASK	BIT(0)
+#define TWL4030_SIH_CTRL_PENDDIS_MASK	BIT(1)
+#define TWL4030_SIH_CTRL_COR_MASK	BIT(2)
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * GPIO Block Register offsets (use TWL4030_MODULE_GPIO)
+ */
+
+#define REG_GPIODATAIN1			0x0
+#define REG_GPIODATAIN2			0x1
+#define REG_GPIODATAIN3			0x2
+#define REG_GPIODATADIR1		0x3
+#define REG_GPIODATADIR2		0x4
+#define REG_GPIODATADIR3		0x5
+#define REG_GPIODATAOUT1		0x6
+#define REG_GPIODATAOUT2		0x7
+#define REG_GPIODATAOUT3		0x8
+#define REG_CLEARGPIODATAOUT1		0x9
+#define REG_CLEARGPIODATAOUT2		0xA
+#define REG_CLEARGPIODATAOUT3		0xB
+#define REG_SETGPIODATAOUT1		0xC
+#define REG_SETGPIODATAOUT2		0xD
+#define REG_SETGPIODATAOUT3		0xE
+#define REG_GPIO_DEBEN1			0xF
+#define REG_GPIO_DEBEN2			0x10
+#define REG_GPIO_DEBEN3			0x11
+#define REG_GPIO_CTRL			0x12
+#define REG_GPIOPUPDCTR1		0x13
+#define REG_GPIOPUPDCTR2		0x14
+#define REG_GPIOPUPDCTR3		0x15
+#define REG_GPIOPUPDCTR4		0x16
+#define REG_GPIOPUPDCTR5		0x17
+#define REG_GPIO_ISR1A			0x19
+#define REG_GPIO_ISR2A			0x1A
+#define REG_GPIO_ISR3A			0x1B
+#define REG_GPIO_IMR1A			0x1C
+#define REG_GPIO_IMR2A			0x1D
+#define REG_GPIO_IMR3A			0x1E
+#define REG_GPIO_ISR1B			0x1F
+#define REG_GPIO_ISR2B			0x20
+#define REG_GPIO_ISR3B			0x21
+#define REG_GPIO_IMR1B			0x22
+#define REG_GPIO_IMR2B			0x23
+#define REG_GPIO_IMR3B			0x24
+#define REG_GPIO_EDR1			0x28
+#define REG_GPIO_EDR2			0x29
+#define REG_GPIO_EDR3			0x2A
+#define REG_GPIO_EDR4			0x2B
+#define REG_GPIO_EDR5			0x2C
+#define REG_GPIO_SIH_CTRL		0x2D
+
+/* Up to 18 signals are available as GPIOs, when their
+ * pins are not assigned to another use (such as ULPI/USB).
+ */
+#define TWL4030_GPIO_MAX		18
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Keypad register offsets (use TWL4030_MODULE_KEYPAD)
+ * ... SIH/interrupt only
+ */
+
+#define TWL4030_KEYPAD_KEYP_ISR1	0x11
+#define TWL4030_KEYPAD_KEYP_IMR1	0x12
+#define TWL4030_KEYPAD_KEYP_ISR2	0x13
+#define TWL4030_KEYPAD_KEYP_IMR2	0x14
+#define TWL4030_KEYPAD_KEYP_SIR		0x15	/* test register */
+#define TWL4030_KEYPAD_KEYP_EDR		0x16
+#define TWL4030_KEYPAD_KEYP_SIH_CTRL	0x17
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Multichannel ADC register offsets (use TWL4030_MODULE_MADC)
+ * ... SIH/interrupt only
+ */
+
+#define TWL4030_MADC_ISR1		0x61
+#define TWL4030_MADC_IMR1		0x62
+#define TWL4030_MADC_ISR2		0x63
+#define TWL4030_MADC_IMR2		0x64
+#define TWL4030_MADC_SIR		0x65	/* test register */
+#define TWL4030_MADC_EDR		0x66
+#define TWL4030_MADC_SIH_CTRL		0x67
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Battery charger register offsets (use TWL4030_MODULE_INTERRUPTS)
+ */
+
+#define TWL4030_INTERRUPTS_BCIISR1A	0x0
+#define TWL4030_INTERRUPTS_BCIISR2A	0x1
+#define TWL4030_INTERRUPTS_BCIIMR1A	0x2
+#define TWL4030_INTERRUPTS_BCIIMR2A	0x3
+#define TWL4030_INTERRUPTS_BCIISR1B	0x4
+#define TWL4030_INTERRUPTS_BCIISR2B	0x5
+#define TWL4030_INTERRUPTS_BCIIMR1B	0x6
+#define TWL4030_INTERRUPTS_BCIIMR2B	0x7
+#define TWL4030_INTERRUPTS_BCISIR1	0x8	/* test register */
+#define TWL4030_INTERRUPTS_BCISIR2	0x9	/* test register */
+#define TWL4030_INTERRUPTS_BCIEDR1	0xa
+#define TWL4030_INTERRUPTS_BCIEDR2	0xb
+#define TWL4030_INTERRUPTS_BCIEDR3	0xc
+#define TWL4030_INTERRUPTS_BCISIHCTRL	0xd
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Power Interrupt block register offsets (use TWL4030_MODULE_INT)
+ */
+
+#define TWL4030_INT_PWR_ISR1		0x0
+#define TWL4030_INT_PWR_IMR1		0x1
+#define TWL4030_INT_PWR_ISR2		0x2
+#define TWL4030_INT_PWR_IMR2		0x3
+#define TWL4030_INT_PWR_SIR		0x4	/* test register */
+#define TWL4030_INT_PWR_EDR1		0x5
+#define TWL4030_INT_PWR_EDR2		0x6
+#define TWL4030_INT_PWR_SIH_CTRL	0x7
+
+/*----------------------------------------------------------------------*/
+
+struct twl4030_bci_platform_data {
+	int *battery_tmp_tbl;
+	unsigned int tblsize;
+};
+
+/* TWL4030_GPIO_MAX (18) GPIOs, with interrupts */
+struct twl4030_gpio_platform_data {
+	int		gpio_base;
+	unsigned	irq_base, irq_end;
+
+	/* For gpio-N, bit (1 << N) in "pullups" is set if that pullup
+	 * should be enabled.  Else, if that bit is set in "pulldowns",
+	 * that pulldown is enabled.  Don't waste power by letting any
+	 * digital inputs float...
+	 */
+	u32		pullups;
+	u32		pulldowns;
+
+	int		(*setup)(struct device *dev,
+				unsigned gpio, unsigned ngpio);
+	int		(*teardown)(struct device *dev,
+				unsigned gpio, unsigned ngpio);
+};
+
+struct twl4030_madc_platform_data {
+	int		irq_line;
+};
+
+struct twl4030_keypad_data {
+	int rows;
+	int cols;
+	int *keymap;
+	int irq;
+	unsigned int keymapsize;
+	unsigned int rep:1;
+};
+
+enum twl4030_usb_mode {
+	T2_USB_MODE_ULPI = 1,
+	T2_USB_MODE_CEA2011_3PIN = 2,
+};
+
+struct twl4030_usb_data {
+	enum twl4030_usb_mode	usb_mode;
+};
+
+struct twl4030_platform_data {
+	unsigned				irq_base, irq_end;
+	struct twl4030_bci_platform_data	*bci;
+	struct twl4030_gpio_platform_data	*gpio;
+	struct twl4030_madc_platform_data	*madc;
+	struct twl4030_keypad_data		*keypad;
+	struct twl4030_usb_data			*usb;
+
+	/* REVISIT more to come ... _nothing_ should be hard-wired */
+};
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * FIXME completely stop using TWL4030_IRQ_BASE ... instead, pass the
+ * IRQ data to subsidiary devices using platform device resources.
+ */
+
+/* IRQ information-need base */
+#include <mach/irqs.h>
+/* TWL4030 interrupts */
+
+/* #define TWL4030_MODIRQ_GPIO		(TWL4030_IRQ_BASE + 0) */
+#define TWL4030_MODIRQ_KEYPAD		(TWL4030_IRQ_BASE + 1)
+#define TWL4030_MODIRQ_BCI		(TWL4030_IRQ_BASE + 2)
+#define TWL4030_MODIRQ_MADC		(TWL4030_IRQ_BASE + 3)
+/* #define TWL4030_MODIRQ_USB		(TWL4030_IRQ_BASE + 4) */
+#define TWL4030_MODIRQ_PWR		(TWL4030_IRQ_BASE + 5)
+
+#define TWL4030_PWRIRQ_PWRBTN		(TWL4030_PWR_IRQ_BASE + 0)
+#define TWL4030_PWRIRQ_CHG_PRES		(TWL4030_PWR_IRQ_BASE + 1)
+#define TWL4030_PWRIRQ_USB_PRES		(TWL4030_PWR_IRQ_BASE + 2)
+#define TWL4030_PWRIRQ_RTC		(TWL4030_PWR_IRQ_BASE + 3)
+#define TWL4030_PWRIRQ_HOT_DIE		(TWL4030_PWR_IRQ_BASE + 4)
+#define TWL4030_PWRIRQ_PWROK_TIMEOUT	(TWL4030_PWR_IRQ_BASE + 5)
+#define TWL4030_PWRIRQ_MBCHG		(TWL4030_PWR_IRQ_BASE + 6)
+#define TWL4030_PWRIRQ_SC_DETECT	(TWL4030_PWR_IRQ_BASE + 7)
+
+/* Rest are unsued currently*/
+
+/* Offsets to Power Registers */
+#define TWL4030_VDAC_DEV_GRP		0x3B
+#define TWL4030_VDAC_DEDICATED		0x3E
+#define TWL4030_VAUX1_DEV_GRP		0x17
+#define TWL4030_VAUX1_DEDICATED		0x1A
+#define TWL4030_VAUX2_DEV_GRP		0x1B
+#define TWL4030_VAUX2_DEDICATED		0x1E
+#define TWL4030_VAUX3_DEV_GRP		0x1F
+#define TWL4030_VAUX3_DEDICATED		0x22
+
+/* TWL4030 GPIO interrupt definitions */
+
+#define TWL4030_GPIO_IRQ_NO(n)		(TWL4030_GPIO_IRQ_BASE + (n))
+#define TWL4030_GPIO_IS_ENABLE		1
+
+/*
+ * Exported TWL4030 GPIO APIs
+ *
+ * WARNING -- use standard GPIO and IRQ calls instead; these will vanish.
+ */
+int twl4030_get_gpio_datain(int gpio);
+int twl4030_request_gpio(int gpio);
+int twl4030_set_gpio_debounce(int gpio, int enable);
+int twl4030_free_gpio(int gpio);
+
+#if defined(CONFIG_TWL4030_BCI_BATTERY) || \
+	defined(CONFIG_TWL4030_BCI_BATTERY_MODULE)
+	extern int twl4030charger_usb_en(int enable);
+#else
+	static inline int twl4030charger_usb_en(int enable) { return 0; }
+#endif
+
+#endif /* End of __TWL4030_H */
-- 
GitLab


From 26b8f5e1e2d1229c186d8e61d26513c43a058c5e Mon Sep 17 00:00:00 2001
From: Eric Miao <eric.miao@marvell.com>
Date: Wed, 15 Oct 2008 12:20:06 +0200
Subject: [PATCH 635/895] mfd: add base support for Dialog DA9030/DA9034 PMICs

DA9030 (a.k.a ARAVA) and DA9034 (a.k.a MICCO) are PMICs designed by
Dialog Semiconductor, usually found on PXA-based platforms. These
PMICs are I2C-based, multi-function devices, usually with LEDs, PWMs
for backlight, BUCKs and LDOs, ADCs and touchscreen controller (on
DA9034).

This is the base support for the I2C operations, event registration
and handling, sub-devices management.

Signed-off-by: Mike Rapoport <mike@compulab.co.il>
Signed-off-by: Eric Miao <eric.miao@marvell.com>
Signed-off-by: Liam Girdwood <lrg@kernel.org>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
---
 drivers/mfd/da903x.c       | 563 +++++++++++++++++++++++++++++++++++++
 include/linux/mfd/da903x.h | 201 +++++++++++++
 2 files changed, 764 insertions(+)
 create mode 100644 drivers/mfd/da903x.c
 create mode 100644 include/linux/mfd/da903x.h

diff --git a/drivers/mfd/da903x.c b/drivers/mfd/da903x.c
new file mode 100644
index 000000000000..b57326ae464d
--- /dev/null
+++ b/drivers/mfd/da903x.c
@@ -0,0 +1,563 @@
+/*
+ * Base driver for Dialog Semiconductor DA9030/DA9034
+ *
+ * Copyright (C) 2008 Compulab, Ltd.
+ * 	Mike Rapoport <mike@compulab.co.il>
+ *
+ * Copyright (C) 2006-2008 Marvell International Ltd.
+ * 	Eric Miao <eric.miao@marvell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/i2c.h>
+#include <linux/mfd/da903x.h>
+
+#define DA9030_CHIP_ID		0x00
+#define DA9030_EVENT_A		0x01
+#define DA9030_EVENT_B		0x02
+#define DA9030_EVENT_C		0x03
+#define DA9030_STATUS		0x04
+#define DA9030_IRQ_MASK_A	0x05
+#define DA9030_IRQ_MASK_B	0x06
+#define DA9030_IRQ_MASK_C	0x07
+#define DA9030_SYS_CTRL_A	0x08
+#define DA9030_SYS_CTRL_B	0x09
+#define DA9030_FAULT_LOG	0x0a
+
+#define DA9034_CHIP_ID		0x00
+#define DA9034_EVENT_A		0x01
+#define DA9034_EVENT_B		0x02
+#define DA9034_EVENT_C		0x03
+#define DA9034_EVENT_D		0x04
+#define DA9034_STATUS_A		0x05
+#define DA9034_STATUS_B		0x06
+#define DA9034_IRQ_MASK_A	0x07
+#define DA9034_IRQ_MASK_B	0x08
+#define DA9034_IRQ_MASK_C	0x09
+#define DA9034_IRQ_MASK_D	0x0a
+#define DA9034_SYS_CTRL_A	0x0b
+#define DA9034_SYS_CTRL_B	0x0c
+#define DA9034_FAULT_LOG	0x0d
+
+struct da903x_chip;
+
+struct da903x_chip_ops {
+	int	(*init_chip)(struct da903x_chip *);
+	int	(*unmask_events)(struct da903x_chip *, unsigned int events);
+	int	(*mask_events)(struct da903x_chip *, unsigned int events);
+	int	(*read_events)(struct da903x_chip *, unsigned int *events);
+	int	(*read_status)(struct da903x_chip *, unsigned int *status);
+};
+
+struct da903x_chip {
+	struct i2c_client	*client;
+	struct device		*dev;
+	struct da903x_chip_ops	*ops;
+
+	int			type;
+	uint32_t		events_mask;
+
+	struct mutex		lock;
+	struct work_struct	irq_work;
+
+	struct blocking_notifier_head notifier_list;
+};
+
+static inline int __da903x_read(struct i2c_client *client,
+				int reg, uint8_t *val)
+{
+	int ret;
+
+	ret = i2c_smbus_read_byte_data(client, reg);
+	if (ret < 0) {
+		dev_err(&client->dev, "failed reading at 0x%02x\n", reg);
+		return ret;
+	}
+
+	*val = (uint8_t)ret;
+	return 0;
+}
+
+static inline int __da903x_reads(struct i2c_client *client, int reg,
+				 int len, uint8_t *val)
+{
+	int ret;
+
+	ret = i2c_smbus_read_i2c_block_data(client, reg, len, val);
+	if (ret < 0) {
+		dev_err(&client->dev, "failed reading from 0x%02x\n", reg);
+		return ret;
+	}
+	return 0;
+}
+
+static inline int __da903x_write(struct i2c_client *client,
+				 int reg, uint8_t val)
+{
+	int ret;
+
+	ret = i2c_smbus_write_byte_data(client, reg, val);
+	if (ret < 0) {
+		dev_err(&client->dev, "failed writing 0x%02x to 0x%02x\n",
+				val, reg);
+		return ret;
+	}
+	return 0;
+}
+
+static inline int __da903x_writes(struct i2c_client *client, int reg,
+				  int len, uint8_t *val)
+{
+	int ret;
+
+	ret = i2c_smbus_write_i2c_block_data(client, reg, len, val);
+	if (ret < 0) {
+		dev_err(&client->dev, "failed writings to 0x%02x\n", reg);
+		return ret;
+	}
+	return 0;
+}
+
+int da903x_register_notifier(struct device *dev, struct notifier_block *nb,
+				unsigned int events)
+{
+	struct da903x_chip *chip = dev_get_drvdata(dev);
+
+	chip->ops->unmask_events(chip, events);
+	return blocking_notifier_chain_register(&chip->notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(da903x_register_notifier);
+
+int da903x_unregister_notifier(struct device *dev, struct notifier_block *nb,
+				unsigned int events)
+{
+	struct da903x_chip *chip = dev_get_drvdata(dev);
+
+	chip->ops->mask_events(chip, events);
+	return blocking_notifier_chain_unregister(&chip->notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(da903x_unregister_notifier);
+
+int da903x_write(struct device *dev, int reg, uint8_t val)
+{
+	return __da903x_write(to_i2c_client(dev), reg, val);
+}
+EXPORT_SYMBOL_GPL(da903x_write);
+
+int da903x_read(struct device *dev, int reg, uint8_t *val)
+{
+	return __da903x_read(to_i2c_client(dev), reg, val);
+}
+EXPORT_SYMBOL_GPL(da903x_read);
+
+int da903x_set_bits(struct device *dev, int reg, uint8_t bit_mask)
+{
+	struct da903x_chip *chip = dev_get_drvdata(dev);
+	uint8_t reg_val;
+	int ret = 0;
+
+	mutex_lock(&chip->lock);
+
+	ret = __da903x_read(chip->client, reg, &reg_val);
+	if (ret)
+		goto out;
+
+	if ((reg_val & bit_mask) == 0) {
+		reg_val |= bit_mask;
+		ret = __da903x_write(chip->client, reg, reg_val);
+	}
+out:
+	mutex_unlock(&chip->lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(da903x_set_bits);
+
+int da903x_clr_bits(struct device *dev, int reg, uint8_t bit_mask)
+{
+	struct da903x_chip *chip = dev_get_drvdata(dev);
+	uint8_t reg_val;
+	int ret = 0;
+
+	mutex_lock(&chip->lock);
+
+	ret = __da903x_read(chip->client, reg, &reg_val);
+	if (ret)
+		goto out;
+
+	if (reg_val & bit_mask) {
+		reg_val &= ~bit_mask;
+		ret = __da903x_write(chip->client, reg, reg_val);
+	}
+out:
+	mutex_unlock(&chip->lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(da903x_clr_bits);
+
+int da903x_update(struct device *dev, int reg, uint8_t val, uint8_t mask)
+{
+	struct da903x_chip *chip = dev_get_drvdata(dev);
+	uint8_t reg_val;
+	int ret = 0;
+
+	mutex_lock(&chip->lock);
+
+	ret = __da903x_read(chip->client, reg, &reg_val);
+	if (ret)
+		goto out;
+
+	if ((reg_val & mask) != val) {
+		reg_val = (reg_val & ~mask) | val;
+		ret = __da903x_write(chip->client, reg, reg_val);
+	}
+out:
+	mutex_unlock(&chip->lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(da903x_update);
+
+int da903x_query_status(struct device *dev, unsigned int sbits)
+{
+	struct da903x_chip *chip = dev_get_drvdata(dev);
+	unsigned int status = 0;
+
+	chip->ops->read_status(chip, &status);
+	return ((status & sbits) == sbits);
+}
+EXPORT_SYMBOL(da903x_query_status);
+
+static int __devinit da9030_init_chip(struct da903x_chip *chip)
+{
+	uint8_t chip_id;
+	int err;
+
+	err = __da903x_read(chip->client, DA9030_CHIP_ID, &chip_id);
+	if (err)
+		return err;
+
+	err = __da903x_write(chip->client, DA9030_SYS_CTRL_A, 0xE8);
+	if (err)
+		return err;
+
+	dev_info(chip->dev, "DA9030 (CHIP ID: 0x%02x) detected\n", chip_id);
+	return 0;
+}
+
+static int da9030_unmask_events(struct da903x_chip *chip, unsigned int events)
+{
+	uint8_t v[3];
+
+	chip->events_mask &= ~events;
+
+	v[0] = (chip->events_mask & 0xff);
+	v[1] = (chip->events_mask >> 8) & 0xff;
+	v[2] = (chip->events_mask >> 16) & 0xff;
+
+	return __da903x_writes(chip->client, DA9030_IRQ_MASK_A, 3, v);
+}
+
+static int da9030_mask_events(struct da903x_chip *chip, unsigned int events)
+{
+	uint8_t v[3];
+
+	chip->events_mask &= ~events;
+
+	v[0] = (chip->events_mask & 0xff);
+	v[1] = (chip->events_mask >> 8) & 0xff;
+	v[2] = (chip->events_mask >> 16) & 0xff;
+
+	return __da903x_writes(chip->client, DA9030_IRQ_MASK_A, 3, v);
+}
+
+static int da9030_read_events(struct da903x_chip *chip, unsigned int *events)
+{
+	uint8_t v[3] = {0, 0, 0};
+	int ret;
+
+	ret = __da903x_reads(chip->client, DA9030_EVENT_A, 3, v);
+	if (ret < 0)
+		return ret;
+
+	*events = (v[2] << 16) | (v[1] << 8) | v[0];
+	return 0;
+}
+
+static int da9030_read_status(struct da903x_chip *chip, unsigned int *status)
+{
+	return __da903x_read(chip->client, DA9030_STATUS, (uint8_t *)status);
+}
+
+static int da9034_init_chip(struct da903x_chip *chip)
+{
+	uint8_t chip_id;
+	int err;
+
+	err = __da903x_read(chip->client, DA9034_CHIP_ID, &chip_id);
+	if (err)
+		return err;
+
+	err = __da903x_write(chip->client, DA9034_SYS_CTRL_A, 0xE8);
+	if (err)
+		return err;
+
+	/* avoid SRAM power off during sleep*/
+	__da903x_write(chip->client, 0x10, 0x07);
+	__da903x_write(chip->client, 0x11, 0xff);
+	__da903x_write(chip->client, 0x12, 0xff);
+
+	/* Enable the ONKEY power down functionality */
+	__da903x_write(chip->client, DA9034_SYS_CTRL_B, 0x20);
+	__da903x_write(chip->client, DA9034_SYS_CTRL_A, 0x60);
+
+	/* workaround to make LEDs work */
+	__da903x_write(chip->client, 0x90, 0x01);
+	__da903x_write(chip->client, 0xB0, 0x08);
+
+	/* make ADTV1 and SDTV1 effective */
+	__da903x_write(chip->client, 0x20, 0x00);
+
+	dev_info(chip->dev, "DA9034 (CHIP ID: 0x%02x) detected\n", chip_id);
+	return 0;
+}
+
+static int da9034_unmask_events(struct da903x_chip *chip, unsigned int events)
+{
+	uint8_t v[4];
+
+	chip->events_mask &= ~events;
+
+	v[0] = (chip->events_mask & 0xff);
+	v[1] = (chip->events_mask >> 8) & 0xff;
+	v[2] = (chip->events_mask >> 16) & 0xff;
+	v[3] = (chip->events_mask >> 24) & 0xff;
+
+	return __da903x_writes(chip->client, DA9034_IRQ_MASK_A, 4, v);
+}
+
+static int da9034_mask_events(struct da903x_chip *chip, unsigned int events)
+{
+	uint8_t v[4];
+
+	chip->events_mask |= events;
+
+	v[0] = (chip->events_mask & 0xff);
+	v[1] = (chip->events_mask >> 8) & 0xff;
+	v[2] = (chip->events_mask >> 16) & 0xff;
+	v[3] = (chip->events_mask >> 24) & 0xff;
+
+	return __da903x_writes(chip->client, DA9034_IRQ_MASK_A, 4, v);
+}
+
+static int da9034_read_events(struct da903x_chip *chip, unsigned int *events)
+{
+	uint8_t v[4] = {0, 0, 0, 0};
+	int ret;
+
+	ret = __da903x_reads(chip->client, DA9034_EVENT_A, 4, v);
+	if (ret < 0)
+		return ret;
+
+	*events = (v[3] << 24) | (v[2] << 16) | (v[1] << 8) | v[0];
+	return 0;
+}
+
+static int da9034_read_status(struct da903x_chip *chip, unsigned int *status)
+{
+	uint8_t v[2] = {0, 0};
+	int ret = 0;
+
+	ret = __da903x_reads(chip->client, DA9034_STATUS_A, 2, v);
+	if (ret)
+		return ret;
+
+	*status = (v[1] << 8) | v[0];
+	return 0;
+}
+
+static void da903x_irq_work(struct work_struct *work)
+{
+	struct da903x_chip *chip =
+		container_of(work, struct da903x_chip, irq_work);
+	unsigned int events = 0;
+
+	while (1) {
+		if (chip->ops->read_events(chip, &events))
+			break;
+
+		events &= ~chip->events_mask;
+		if (events == 0)
+			break;
+
+		blocking_notifier_call_chain(
+				&chip->notifier_list, events, NULL);
+	}
+	enable_irq(chip->client->irq);
+}
+
+static int da903x_irq_handler(int irq, void *data)
+{
+	struct da903x_chip *chip = data;
+
+	disable_irq_nosync(irq);
+	(void)schedule_work(&chip->irq_work);
+
+	return IRQ_HANDLED;
+}
+
+static struct da903x_chip_ops da903x_ops[] = {
+	[0] = {
+		.init_chip	= da9030_init_chip,
+		.unmask_events	= da9030_unmask_events,
+		.mask_events	= da9030_mask_events,
+		.read_events	= da9030_read_events,
+		.read_status	= da9030_read_status,
+	},
+	[1] = {
+		.init_chip	= da9034_init_chip,
+		.unmask_events	= da9034_unmask_events,
+		.mask_events	= da9034_mask_events,
+		.read_events	= da9034_read_events,
+		.read_status	= da9034_read_status,
+	}
+};
+
+static const struct i2c_device_id da903x_id_table[] = {
+	{ "da9030", 0 },
+	{ "da9034", 1 },
+	{ },
+};
+MODULE_DEVICE_TABLE(i2c, da903x_id_table);
+
+static int __devexit __remove_subdev(struct device *dev, void *unused)
+{
+	platform_device_unregister(to_platform_device(dev));
+	return 0;
+}
+
+static int __devexit da903x_remove_subdevs(struct da903x_chip *chip)
+{
+	return device_for_each_child(chip->dev, NULL, __remove_subdev);
+}
+
+static int __devinit da903x_add_subdevs(struct da903x_chip *chip,
+					struct da903x_platform_data *pdata)
+{
+	struct da903x_subdev_info *subdev;
+	struct platform_device *pdev;
+	int i, ret = 0;
+
+	for (i = 0; i < pdata->num_subdevs; i++) {
+		subdev = &pdata->subdevs[i];
+
+		pdev = platform_device_alloc(subdev->name, subdev->id);
+
+		pdev->dev.parent = chip->dev;
+		pdev->dev.platform_data = subdev->platform_data;
+
+		ret = platform_device_add(pdev);
+		if (ret)
+			goto failed;
+	}
+	return 0;
+
+failed:
+	da903x_remove_subdevs(chip);
+	return ret;
+}
+
+static int __devinit da903x_probe(struct i2c_client *client,
+				  const struct i2c_device_id *id)
+{
+	struct da903x_platform_data *pdata = client->dev.platform_data;
+	struct da903x_chip *chip;
+	unsigned int tmp;
+	int ret;
+
+	chip = kzalloc(sizeof(struct da903x_chip), GFP_KERNEL);
+	if (chip == NULL)
+		return -ENOMEM;
+
+	chip->client = client;
+	chip->dev = &client->dev;
+	chip->ops = &da903x_ops[id->driver_data];
+
+	mutex_init(&chip->lock);
+	INIT_WORK(&chip->irq_work, da903x_irq_work);
+	BLOCKING_INIT_NOTIFIER_HEAD(&chip->notifier_list);
+
+	i2c_set_clientdata(client, chip);
+
+	ret = chip->ops->init_chip(chip);
+	if (ret)
+		goto out_free_chip;
+
+	/* mask and clear all IRQs */
+	chip->events_mask = 0xffffffff;
+	chip->ops->mask_events(chip, chip->events_mask);
+	chip->ops->read_events(chip, &tmp);
+
+	ret = request_irq(client->irq, da903x_irq_handler,
+			IRQF_DISABLED | IRQF_TRIGGER_FALLING,
+			"da903x", chip);
+	if (ret) {
+		dev_err(&client->dev, "failed to request irq %d\n",
+				client->irq);
+		goto out_free_chip;
+	}
+
+	ret = da903x_add_subdevs(chip, pdata);
+	if (ret)
+		goto out_free_irq;
+
+	return 0;
+
+out_free_irq:
+	free_irq(client->irq, chip);
+out_free_chip:
+	i2c_set_clientdata(client, NULL);
+	kfree(chip);
+	return ret;
+}
+
+static int __devexit da903x_remove(struct i2c_client *client)
+{
+	struct da903x_chip *chip = i2c_get_clientdata(client);
+
+	da903x_remove_subdevs(chip);
+	kfree(chip);
+	return 0;
+}
+
+static struct i2c_driver da903x_driver = {
+	.driver	= {
+		.name	= "da903x",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= da903x_probe,
+	.remove		= __devexit_p(da903x_remove),
+	.id_table	= da903x_id_table,
+};
+
+static int __init da903x_init(void)
+{
+	return i2c_add_driver(&da903x_driver);
+}
+module_init(da903x_init);
+
+static void __exit da903x_exit(void)
+{
+	i2c_del_driver(&da903x_driver);
+}
+module_exit(da903x_exit);
+
+MODULE_DESCRIPTION("PMIC Driver for Dialog Semiconductor DA9034");
+MODULE_AUTHOR("Eric Miao <eric.miao@marvell.com>"
+	      "Mike Rapoport <mike@compulab.co.il>");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/da903x.h b/include/linux/mfd/da903x.h
new file mode 100644
index 000000000000..cad314c12439
--- /dev/null
+++ b/include/linux/mfd/da903x.h
@@ -0,0 +1,201 @@
+#ifndef __LINUX_PMIC_DA903X_H
+#define __LINUX_PMIC_DA903X_H
+
+/* Unified sub device IDs for DA9030/DA9034 */
+enum {
+	DA9030_ID_LED_1,
+	DA9030_ID_LED_2,
+	DA9030_ID_LED_3,
+	DA9030_ID_LED_4,
+	DA9030_ID_LED_PC,
+	DA9030_ID_VIBRA,
+	DA9030_ID_WLED,
+	DA9030_ID_BUCK1,
+	DA9030_ID_BUCK2,
+	DA9030_ID_LDO1,
+	DA9030_ID_LDO2,
+	DA9030_ID_LDO3,
+	DA9030_ID_LDO4,
+	DA9030_ID_LDO5,
+	DA9030_ID_LDO6,
+	DA9030_ID_LDO7,
+	DA9030_ID_LDO8,
+	DA9030_ID_LDO9,
+	DA9030_ID_LDO10,
+	DA9030_ID_LDO11,
+	DA9030_ID_LDO12,
+	DA9030_ID_LDO13,
+	DA9030_ID_LDO14,
+	DA9030_ID_LDO15,
+	DA9030_ID_LDO16,
+	DA9030_ID_LDO17,
+	DA9030_ID_LDO18,
+	DA9030_ID_LDO19,
+	DA9030_ID_LDO_INT,	/* LDO Internal */
+
+	DA9034_ID_LED_1,
+	DA9034_ID_LED_2,
+	DA9034_ID_VIBRA,
+	DA9034_ID_WLED,
+	DA9034_ID_TOUCH,
+
+	DA9034_ID_BUCK1,
+	DA9034_ID_BUCK2,
+	DA9034_ID_LDO1,
+	DA9034_ID_LDO2,
+	DA9034_ID_LDO3,
+	DA9034_ID_LDO4,
+	DA9034_ID_LDO5,
+	DA9034_ID_LDO6,
+	DA9034_ID_LDO7,
+	DA9034_ID_LDO8,
+	DA9034_ID_LDO9,
+	DA9034_ID_LDO10,
+	DA9034_ID_LDO11,
+	DA9034_ID_LDO12,
+	DA9034_ID_LDO13,
+	DA9034_ID_LDO14,
+	DA9034_ID_LDO15,
+};
+
+/*
+ * DA9030/DA9034 LEDs sub-devices uses generic "struct led_info"
+ * as the platform_data
+ */
+
+/* DA9030 flags for "struct led_info"
+ */
+#define DA9030_LED_RATE_ON	(0 << 5)
+#define DA9030_LED_RATE_052S	(1 << 5)
+#define DA9030_LED_DUTY_1_16	(0 << 3)
+#define DA9030_LED_DUTY_1_8	(1 << 3)
+#define DA9030_LED_DUTY_1_4	(2 << 3)
+#define DA9030_LED_DUTY_1_2	(3 << 3)
+
+#define DA9030_VIBRA_MODE_1P3V	(0 << 1)
+#define DA9030_VIBRA_MODE_2P7V	(1 << 1)
+#define DA9030_VIBRA_FREQ_1HZ	(0 << 2)
+#define DA9030_VIBRA_FREQ_2HZ	(1 << 2)
+#define DA9030_VIBRA_FREQ_4HZ	(2 << 2)
+#define DA9030_VIBRA_FREQ_8HZ	(3 << 2)
+#define DA9030_VIBRA_DUTY_ON	(0 << 4)
+#define DA9030_VIBRA_DUTY_75P	(1 << 4)
+#define DA9030_VIBRA_DUTY_50P	(2 << 4)
+#define DA9030_VIBRA_DUTY_25P	(3 << 4)
+
+/* DA9034 flags for "struct led_info" */
+#define DA9034_LED_RAMP		(1 << 7)
+
+/* DA9034 touch screen platform data */
+struct da9034_touch_pdata {
+	int	interval_ms;	/* sampling interval while pen down */
+	int	x_inverted;
+	int	y_inverted;
+};
+
+struct da903x_subdev_info {
+	int		id;
+	const char	*name;
+	void		*platform_data;
+};
+
+struct da903x_platform_data {
+	int num_subdevs;
+	struct da903x_subdev_info *subdevs;
+};
+
+/* bit definitions for DA9030 events */
+#define DA9030_EVENT_ONKEY		(1 << 0)
+#define	DA9030_EVENT_PWREN		(1 << 1)
+#define	DA9030_EVENT_EXTON		(1 << 2)
+#define	DA9030_EVENT_CHDET		(1 << 3)
+#define	DA9030_EVENT_TBAT		(1 << 4)
+#define	DA9030_EVENT_VBATMON		(1 << 5)
+#define	DA9030_EVENT_VBATMON_TXON	(1 << 6)
+#define	DA9030_EVENT_CHIOVER		(1 << 7)
+#define	DA9030_EVENT_TCTO		(1 << 8)
+#define	DA9030_EVENT_CCTO		(1 << 9)
+#define	DA9030_EVENT_ADC_READY		(1 << 10)
+#define	DA9030_EVENT_VBUS_4P4		(1 << 11)
+#define	DA9030_EVENT_VBUS_4P0		(1 << 12)
+#define	DA9030_EVENT_SESS_VALID		(1 << 13)
+#define	DA9030_EVENT_SRP_DETECT		(1 << 14)
+#define	DA9030_EVENT_WATCHDOG		(1 << 15)
+#define	DA9030_EVENT_LDO15		(1 << 16)
+#define	DA9030_EVENT_LDO16		(1 << 17)
+#define	DA9030_EVENT_LDO17		(1 << 18)
+#define	DA9030_EVENT_LDO18		(1 << 19)
+#define	DA9030_EVENT_LDO19		(1 << 20)
+#define	DA9030_EVENT_BUCK2		(1 << 21)
+
+/* bit definitions for DA9034 events */
+#define DA9034_EVENT_ONKEY		(1 << 0)
+#define DA9034_EVENT_EXTON		(1 << 2)
+#define DA9034_EVENT_CHDET		(1 << 3)
+#define DA9034_EVENT_TBAT		(1 << 4)
+#define DA9034_EVENT_VBATMON		(1 << 5)
+#define DA9034_EVENT_REV_IOVER		(1 << 6)
+#define DA9034_EVENT_CH_IOVER		(1 << 7)
+#define DA9034_EVENT_CH_TCTO		(1 << 8)
+#define DA9034_EVENT_CH_CCTO		(1 << 9)
+#define DA9034_EVENT_USB_DEV		(1 << 10)
+#define DA9034_EVENT_OTGCP_IOVER	(1 << 11)
+#define DA9034_EVENT_VBUS_4P55		(1 << 12)
+#define DA9034_EVENT_VBUS_3P8		(1 << 13)
+#define DA9034_EVENT_SESS_1P8		(1 << 14)
+#define DA9034_EVENT_SRP_READY		(1 << 15)
+#define DA9034_EVENT_ADC_MAN		(1 << 16)
+#define DA9034_EVENT_ADC_AUTO4		(1 << 17)
+#define DA9034_EVENT_ADC_AUTO5		(1 << 18)
+#define DA9034_EVENT_ADC_AUTO6		(1 << 19)
+#define DA9034_EVENT_PEN_DOWN		(1 << 20)
+#define DA9034_EVENT_TSI_READY		(1 << 21)
+#define DA9034_EVENT_UART_TX		(1 << 22)
+#define DA9034_EVENT_UART_RX		(1 << 23)
+#define DA9034_EVENT_HEADSET		(1 << 25)
+#define DA9034_EVENT_HOOKSWITCH		(1 << 26)
+#define DA9034_EVENT_WATCHDOG		(1 << 27)
+
+extern int da903x_register_notifier(struct device *dev,
+		struct notifier_block *nb, unsigned int events);
+extern int da903x_unregister_notifier(struct device *dev,
+		struct notifier_block *nb, unsigned int events);
+
+/* Status Query Interface */
+#define DA9030_STATUS_ONKEY		(1 << 0)
+#define DA9030_STATUS_PWREN1		(1 << 1)
+#define DA9030_STATUS_EXTON		(1 << 2)
+#define DA9030_STATUS_CHDET		(1 << 3)
+#define DA9030_STATUS_TBAT		(1 << 4)
+#define DA9030_STATUS_VBATMON		(1 << 5)
+#define DA9030_STATUS_VBATMON_TXON	(1 << 6)
+#define DA9030_STATUS_MCLKDET		(1 << 7)
+
+#define DA9034_STATUS_ONKEY		(1 << 0)
+#define DA9034_STATUS_EXTON		(1 << 2)
+#define DA9034_STATUS_CHDET		(1 << 3)
+#define DA9034_STATUS_TBAT		(1 << 4)
+#define DA9034_STATUS_VBATMON		(1 << 5)
+#define DA9034_STATUS_PEN_DOWN		(1 << 6)
+#define DA9034_STATUS_MCLKDET		(1 << 7)
+#define DA9034_STATUS_USB_DEV		(1 << 8)
+#define DA9034_STATUS_HEADSET		(1 << 9)
+#define DA9034_STATUS_HOOKSWITCH	(1 << 10)
+#define DA9034_STATUS_REMCON		(1 << 11)
+#define DA9034_STATUS_VBUS_VALID_4P55	(1 << 12)
+#define DA9034_STATUS_VBUS_VALID_3P8	(1 << 13)
+#define DA9034_STATUS_SESS_VALID_1P8	(1 << 14)
+#define DA9034_STATUS_SRP_READY		(1 << 15)
+
+extern int da903x_query_status(struct device *dev, unsigned int status);
+
+
+/* NOTE: the two functions below are not intended for use outside
+ * of the DA9034 sub-device drivers
+ */
+extern int da903x_write(struct device *dev, int reg, uint8_t val);
+extern int da903x_read(struct device *dev, int reg, uint8_t *val);
+extern int da903x_update(struct device *dev, int reg, uint8_t val, uint8_t mask);
+extern int da903x_set_bits(struct device *dev, int reg, uint8_t bit_mask);
+extern int da903x_clr_bits(struct device *dev, int reg, uint8_t bit_mask);
+#endif /* __LINUX_PMIC_DA903X_H */
-- 
GitLab


From 9297a0e7e033b7f2776610197fcbb9ff563efe32 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Thu, 9 Oct 2008 00:30:11 +0200
Subject: [PATCH 636/895] mfd: twl4030-core irq simplification

Simplify twl4030 IRQ handling by removing a needless custom flow
handler.  The top level IRQs, from the PIH, are well suited for
handle_simple_irq() ... they can't be acked or masked.

Switching resolves some issues with how IRQs were dispatched.
Notably, abuse of desc->status, IRQ accounting, and handling
of various faults.

In short, use standard genirq code.

Drivers that request_irq() to the PIH will need to pay more
attention to things like setting IRQF_DISABLED (since it's
no longer ignored), and making I2C calls from handlers (you'll
need a lockdep workaround).

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
---
 drivers/mfd/twl4030-core.c | 68 ++------------------------------------
 1 file changed, 2 insertions(+), 66 deletions(-)

diff --git a/drivers/mfd/twl4030-core.c b/drivers/mfd/twl4030-core.c
index 4af1624987c5..fd9a0160202c 100644
--- a/drivers/mfd/twl4030-core.c
+++ b/drivers/mfd/twl4030-core.c
@@ -535,68 +535,6 @@ EXPORT_SYMBOL(twl4030_i2c_read_u8);
 
 /*----------------------------------------------------------------------*/
 
-/*
- * do_twl4030_module_irq() is the desc->handle method for each of the twl4030
- * module interrupts that doesn't chain to another irq_chip (GPIO, power, etc).
- * It executes in kernel thread context.  On entry, cpu interrupts are disabled.
- */
-static void do_twl4030_module_irq(unsigned int irq, irq_desc_t *desc)
-{
-	struct irqaction *action;
-	const unsigned int cpu = smp_processor_id();
-
-	/*
-	 * Earlier this was desc->triggered = 1;
-	 */
-	desc->status |= IRQ_LEVEL;
-
-	/*
-	 * The desc->handle method would normally call the desc->chip->ack
-	 * method here, but we won't bother since our ack method is NULL.
-	 */
-
-	if (!desc->depth) {
-		kstat_cpu(cpu).irqs[irq]++;
-
-		action = desc->action;
-		if (action) {
-			int ret;
-			int status = 0;
-			int retval = 0;
-
-			local_irq_enable();
-
-			do {
-				/* Call the ISR with cpu interrupts enabled */
-				ret = action->handler(irq, action->dev_id);
-				if (ret == IRQ_HANDLED)
-					status |= action->flags;
-				retval |= ret;
-				action = action->next;
-			} while (action);
-
-			if (status & IRQF_SAMPLE_RANDOM)
-				add_interrupt_randomness(irq);
-
-			local_irq_disable();
-
-			if (retval != IRQ_HANDLED)
-				printk(KERN_ERR "ISR for TWL4030 module"
-					" irq %d can't handle interrupt\n",
-					irq);
-
-			/*
-			 * Here is where we should call the unmask method, but
-			 * again we won't bother since it is NULL.
-			 */
-		} else
-			printk(KERN_CRIT "TWL4030 module irq %d has no ISR"
-					" but can't be masked!\n", irq);
-	} else
-		printk(KERN_CRIT "TWL4030 module irq %d is disabled but can't"
-				" be masked!\n", irq);
-}
-
 static unsigned twl4030_irq_base;
 
 static struct completion irq_event;
@@ -611,7 +549,6 @@ static int twl4030_irq_thread(void *data)
 	static unsigned i2c_errors;
 	const static unsigned max_i2c_errors = 100;
 
-	daemonize("twl4030-irq");
 	current->flags |= PF_NOFREEZE;
 
 	while (!kthread_should_stop()) {
@@ -691,8 +628,7 @@ static struct task_struct * __init start_twl4030_irq_thread(long irq)
 	struct task_struct *thread;
 
 	init_completion(&irq_event);
-	thread = kthread_run(twl4030_irq_thread, (void *)irq,
-			     "twl4030 irq %ld", irq);
+	thread = kthread_run(twl4030_irq_thread, (void *)irq, "twl4030-irq");
 	if (!thread)
 		pr_err("%s: could not create twl4030 irq %ld thread!\n",
 		       DRIVER_NAME, irq);
@@ -1126,7 +1062,7 @@ static void twl_init_irq(int irq_num, unsigned irq_base, unsigned irq_end)
 	/* install an irq handler for each of the PIH modules */
 	for (i = irq_base; i < irq_end; i++) {
 		set_irq_chip_and_handler(i, &twl4030_irq_chip,
-				do_twl4030_module_irq);
+				handle_simple_irq);
 		activate_irq(i);
 	}
 
-- 
GitLab


From 7acb706ca97fce84bda4a902a33de2f3dae10260 Mon Sep 17 00:00:00 2001
From: Ian Molton <spyro@f2s.com>
Date: Thu, 9 Oct 2008 20:06:09 +0200
Subject: [PATCH 637/895] mfd: update TMIO drivers to use the clock API

This patch updates the remaining two TMIO drivers to use the clock API
rather than callback hooks into platform code.

Signed-off-by: Ian Molton <spyro@f2s.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
---
 drivers/mfd/t7l66xb.c        | 40 ++++++++++++++++++++++++------
 drivers/mfd/tc6387xb.c       | 47 ++++++++++++++++++++++--------------
 include/linux/mfd/t7l66xb.h  |  2 --
 include/linux/mfd/tc6387xb.h |  3 ---
 4 files changed, 61 insertions(+), 31 deletions(-)

diff --git a/drivers/mfd/t7l66xb.c b/drivers/mfd/t7l66xb.c
index 49a0fffc02af..9f7024c0f8ec 100644
--- a/drivers/mfd/t7l66xb.c
+++ b/drivers/mfd/t7l66xb.c
@@ -24,8 +24,10 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/err.h>
 #include <linux/io.h>
 #include <linux/irq.h>
+#include <linux/clk.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/tmio.h>
@@ -56,6 +58,8 @@ struct t7l66xb {
 	spinlock_t		lock;
 
 	struct resource		rscr;
+	struct clk		*clk48m;
+	struct clk		*clk32k;
 	int			irq;
 	int			irq_base;
 };
@@ -65,13 +69,11 @@ struct t7l66xb {
 static int t7l66xb_mmc_enable(struct platform_device *mmc)
 {
 	struct platform_device *dev = to_platform_device(mmc->dev.parent);
-	struct t7l66xb_platform_data   *pdata = dev->dev.platform_data;
 	struct t7l66xb *t7l66xb = platform_get_drvdata(dev);
 	unsigned long flags;
 	u8 dev_ctl;
 
-	if (pdata->enable_clk32k)
-		pdata->enable_clk32k(dev);
+	clk_enable(t7l66xb->clk32k);
 
 	spin_lock_irqsave(&t7l66xb->lock, flags);
 
@@ -87,7 +89,6 @@ static int t7l66xb_mmc_enable(struct platform_device *mmc)
 static int t7l66xb_mmc_disable(struct platform_device *mmc)
 {
 	struct platform_device *dev = to_platform_device(mmc->dev.parent);
-	struct t7l66xb_platform_data   *pdata = dev->dev.platform_data;
 	struct t7l66xb *t7l66xb = platform_get_drvdata(dev);
 	unsigned long flags;
 	u8 dev_ctl;
@@ -100,8 +101,7 @@ static int t7l66xb_mmc_disable(struct platform_device *mmc)
 
 	spin_unlock_irqrestore(&t7l66xb->lock, flags);
 
-	if (pdata->disable_clk32k)
-		pdata->disable_clk32k(dev);
+	clk_disable(t7l66xb->clk32k);
 
 	return 0;
 }
@@ -258,18 +258,22 @@ static void t7l66xb_detach_irq(struct platform_device *dev)
 #ifdef CONFIG_PM
 static int t7l66xb_suspend(struct platform_device *dev, pm_message_t state)
 {
+	struct t7l66xb *t7l66xb = platform_get_drvdata(dev);
 	struct t7l66xb_platform_data *pdata = dev->dev.platform_data;
 
 	if (pdata && pdata->suspend)
 		pdata->suspend(dev);
+	clk_disable(t7l66xb->clk48m);
 
 	return 0;
 }
 
 static int t7l66xb_resume(struct platform_device *dev)
 {
+	struct t7l66xb *t7l66xb = platform_get_drvdata(dev);
 	struct t7l66xb_platform_data *pdata = dev->dev.platform_data;
 
+	clk_enable(t7l66xb->clk48m);
 	if (pdata && pdata->resume)
 		pdata->resume(dev);
 
@@ -309,6 +313,19 @@ static int t7l66xb_probe(struct platform_device *dev)
 
 	t7l66xb->irq_base = pdata->irq_base;
 
+	t7l66xb->clk32k = clk_get(&dev->dev, "CLK_CK32K");
+	if (IS_ERR(t7l66xb->clk32k)) {
+		ret = PTR_ERR(t7l66xb->clk32k);
+		goto err_clk32k_get;
+	}
+
+	t7l66xb->clk48m = clk_get(&dev->dev, "CLK_CK48M");
+	if (IS_ERR(t7l66xb->clk48m)) {
+		ret = PTR_ERR(t7l66xb->clk48m);
+		clk_put(t7l66xb->clk32k);
+		goto err_clk48m_get;
+	}
+
 	rscr = &t7l66xb->rscr;
 	rscr->name = "t7l66xb-core";
 	rscr->start = iomem->start;
@@ -325,6 +342,8 @@ static int t7l66xb_probe(struct platform_device *dev)
 		goto err_ioremap;
 	}
 
+	clk_enable(t7l66xb->clk48m);
+
 	if (pdata && pdata->enable)
 		pdata->enable(dev);
 
@@ -359,9 +378,13 @@ static int t7l66xb_probe(struct platform_device *dev)
 	iounmap(t7l66xb->scr);
 err_ioremap:
 	release_resource(&t7l66xb->rscr);
-err_noirq:
 err_request_scr:
 	kfree(t7l66xb);
+	clk_put(t7l66xb->clk48m);
+err_clk48m_get:
+	clk_put(t7l66xb->clk32k);
+err_clk32k_get:
+err_noirq:
 	return ret;
 }
 
@@ -372,7 +395,8 @@ static int t7l66xb_remove(struct platform_device *dev)
 	int ret;
 
 	ret = pdata->disable(dev);
-
+	clk_disable(t7l66xb->clk48m);
+	clk_put(t7l66xb->clk48m);
 	t7l66xb_detach_irq(dev);
 	iounmap(t7l66xb->scr);
 	release_resource(&t7l66xb->rscr);
diff --git a/drivers/mfd/tc6387xb.c b/drivers/mfd/tc6387xb.c
index a22b21ac6cf8..43222c12fec1 100644
--- a/drivers/mfd/tc6387xb.c
+++ b/drivers/mfd/tc6387xb.c
@@ -12,6 +12,7 @@
 
 #include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/tmio.h>
@@ -24,18 +25,22 @@ enum {
 #ifdef CONFIG_PM
 static int tc6387xb_suspend(struct platform_device *dev, pm_message_t state)
 {
-	struct tc6387xb_platform_data *pdata = platform_get_drvdata(dev);
+	struct clk *clk32k = platform_get_drvdata(dev);
+	struct tc6387xb_platform_data *pdata = dev->dev.platform_data;
 
 	if (pdata && pdata->suspend)
 		pdata->suspend(dev);
+	clk_disable(clk32k);
 
 	return 0;
 }
 
 static int tc6387xb_resume(struct platform_device *dev)
 {
-	struct tc6387xb_platform_data *pdata = platform_get_drvdata(dev);
+	struct clk *clk32k = platform_get_drvdata(dev);
+	struct tc6387xb_platform_data *pdata = dev->dev.platform_data;
 
+	clk_enable(clk32k);
 	if (pdata && pdata->resume)
 		pdata->resume(dev);
 
@@ -51,10 +56,9 @@ static int tc6387xb_resume(struct platform_device *dev)
 static int tc6387xb_mmc_enable(struct platform_device *mmc)
 {
 	struct platform_device *dev      = to_platform_device(mmc->dev.parent);
-	struct tc6387xb_platform_data *tc6387xb = dev->dev.platform_data;
+	struct clk *clk32k = platform_get_drvdata(dev);
 
-	if (tc6387xb->enable_clk32k)
-		tc6387xb->enable_clk32k(dev);
+	clk_enable(clk32k);
 
 	return 0;
 }
@@ -62,10 +66,9 @@ static int tc6387xb_mmc_enable(struct platform_device *mmc)
 static int tc6387xb_mmc_disable(struct platform_device *mmc)
 {
 	struct platform_device *dev      = to_platform_device(mmc->dev.parent);
-	struct tc6387xb_platform_data *tc6387xb = dev->dev.platform_data;
+	struct clk *clk32k = platform_get_drvdata(dev);
 
-	if (tc6387xb->disable_clk32k)
-		tc6387xb->disable_clk32k(dev);
+	clk_disable(clk32k);
 
 	return 0;
 }
@@ -102,14 +105,14 @@ static struct mfd_cell tc6387xb_cells[] = {
 
 static int tc6387xb_probe(struct platform_device *dev)
 {
-	struct tc6387xb_platform_data *data = platform_get_drvdata(dev);
+	struct tc6387xb_platform_data *pdata = dev->dev.platform_data;
 	struct resource *iomem;
+	struct clk *clk32k;
 	int irq, ret;
 
 	iomem = platform_get_resource(dev, IORESOURCE_MEM, 0);
 	if (!iomem) {
-		ret = -EINVAL;
-		goto err_resource;
+		return -EINVAL;
 	}
 
 	ret  = platform_get_irq(dev, 0);
@@ -118,8 +121,15 @@ static int tc6387xb_probe(struct platform_device *dev)
 	else
 		goto err_resource;
 
-	if (data && data->enable)
-		data->enable(dev);
+	clk32k = clk_get(&dev->dev, "CLK_CK32K");
+	if (IS_ERR(clk32k)) {
+		ret = PTR_ERR(clk32k);
+		goto err_resource;
+	}
+	platform_set_drvdata(dev, clk32k);
+
+	if (pdata && pdata->enable)
+		pdata->enable(dev);
 
 	printk(KERN_INFO "Toshiba tc6387xb initialised\n");
 
@@ -134,18 +144,19 @@ static int tc6387xb_probe(struct platform_device *dev)
 	if (!ret)
 		return 0;
 
+	clk_put(clk32k);
 err_resource:
 	return ret;
 }
 
 static int tc6387xb_remove(struct platform_device *dev)
 {
-	struct tc6387xb_platform_data *data = platform_get_drvdata(dev);
-
-	if (data && data->disable)
-		data->disable(dev);
+	struct clk *clk32k = platform_get_drvdata(dev);
 
-	/* FIXME - free the resources! */
+	mfd_remove_devices(&dev->dev);
+	clk_disable(clk32k);
+	clk_put(clk32k);
+	platform_set_drvdata(dev, NULL);
 
 	return 0;
 }
diff --git a/include/linux/mfd/t7l66xb.h b/include/linux/mfd/t7l66xb.h
index e83c7f2036f9..b4629818aea5 100644
--- a/include/linux/mfd/t7l66xb.h
+++ b/include/linux/mfd/t7l66xb.h
@@ -15,8 +15,6 @@
 #include <linux/mfd/tmio.h>
 
 struct t7l66xb_platform_data {
-	int (*enable_clk32k)(struct platform_device *dev);
-	void (*disable_clk32k)(struct platform_device *dev);
 	int (*enable)(struct platform_device *dev);
 	int (*disable)(struct platform_device *dev);
 	int (*suspend)(struct platform_device *dev);
diff --git a/include/linux/mfd/tc6387xb.h b/include/linux/mfd/tc6387xb.h
index fa06e0610b8e..b4888209494a 100644
--- a/include/linux/mfd/tc6387xb.h
+++ b/include/linux/mfd/tc6387xb.h
@@ -11,9 +11,6 @@
 #define MFD_TC6387XB_H
 
 struct tc6387xb_platform_data {
-	int (*enable_clk32k)(struct platform_device *dev);
-	void (*disable_clk32k)(struct platform_device *dev);
-
 	int (*enable)(struct platform_device *dev);
 	int (*disable)(struct platform_device *dev);
 	int (*suspend)(struct platform_device *dev);
-- 
GitLab


From c7752351c34d852ca0da697f812534101eecd82e Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 15 Oct 2008 12:28:06 +0200
Subject: [PATCH 638/895] mfd: Don't use NO_IRQ in WM8350

NO_IRQ is only defined on some architectures - the general way to test
for an invalid IRQ in the modern kernel is by comparing with zero.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
---
 drivers/mfd/wm8350-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index 25a7a5d08bce..bf87f675e7fa 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c
@@ -1217,7 +1217,7 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq,
 
 	mutex_init(&wm8350->irq_mutex);
 	INIT_WORK(&wm8350->irq_work, wm8350_irq_worker);
-	if (irq != NO_IRQ) {
+	if (irq) {
 		ret = request_irq(irq, wm8350_irq, 0,
 				  "wm8350", wm8350);
 		if (ret != 0) {
-- 
GitLab


From e48318025477272d09720f09085cd0ec71caeb73 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed, 15 Oct 2008 13:29:07 +0200
Subject: [PATCH 639/895] mfd: ucb1400 sound driver uses/depends on AC97_BUS:

ERROR: "ac97_bus_type" [drivers/mfd/ucb1400_core.ko] undefined!

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/mfd/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index cccda99328f3..57d972f39cf0 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -52,6 +52,7 @@ config HTC_PASIC3
 
 config UCB1400_CORE
 	tristate "Philips UCB1400 Core driver"
+	depends on AC97_BUS
 	help
 	  This enables support for the Philips UCB1400 core functions.
 	  The UCB1400 is an AC97 audio codec.
-- 
GitLab


From 5a49a540df7c9984976aaabcb13f79ed5ce9672c Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Wed, 15 Oct 2008 13:30:47 +0200
Subject: [PATCH 640/895] mfd: ucb1400 needs GPIO

ia64 allmodconfig:

In file included from include/linux/ucb1400.h:27,
                 from drivers/mfd/ucb1400_core.c:24:
include/asm-generic/gpio.h: In function `gpio_get_value_cansleep':
include/asm-generic/gpio.h:147: error: implicit declaration of function `gpio_get_value'
include/asm-generic/gpio.h: In function `gpio_set_value_cansleep':
include/asm-generic/gpio.h:153: error: implicit declaration of function `gpio_set_value'
drivers/mfd/ucb1400_core.c: At top level:

Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/mfd/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 57d972f39cf0..5a79d2d4cdae 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -53,6 +53,7 @@ config HTC_PASIC3
 config UCB1400_CORE
 	tristate "Philips UCB1400 Core driver"
 	depends on AC97_BUS
+	depends on GPIOLIB
 	help
 	  This enables support for the Philips UCB1400 core functions.
 	  The UCB1400 is an AC97 audio codec.
-- 
GitLab


From 64c12e9b7edb0bfce9c5d7db43091b67894d8339 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Wed, 15 Oct 2008 13:50:20 +0200
Subject: [PATCH 641/895] mfd: further unbork the ucb1400 ac97_bus dependencies

Cc: "Randy.Dunlap" <rdunlap@xenotime.net>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/input/touchscreen/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index 6e1e8c624f9e..8317fdef1691 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -219,7 +219,7 @@ config TOUCHSCREEN_ATMEL_TSADCC
 
 config TOUCHSCREEN_UCB1400
 	tristate "Philips UCB1400 touchscreen"
-	select AC97_BUS
+	depends on AC97_BUS
 	depends on UCB1400_CORE
 	help
 	  This enables support for the Philips UCB1400 touchscreen interface.
-- 
GitLab


From 2967dab1ae37e30f1b71316513b49fd25c42eabe Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 8 Oct 2008 20:41:43 +0900
Subject: [PATCH 642/895] sh: GPIO and pinmux base code

This patch adds gpio code together with the pinmux table parser.
In the future we should optimize this and switch back to gpiolib.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/Kconfig            |   3 +
 arch/sh/include/asm/gpio.h |  92 ++++++-
 arch/sh/kernel/Makefile_32 |   2 +-
 arch/sh/kernel/Makefile_64 |   2 +-
 arch/sh/kernel/gpio.c      | 498 +++++++++++++++++++++++++++++++++++++
 5 files changed, 593 insertions(+), 4 deletions(-)
 create mode 100644 arch/sh/kernel/gpio.c

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index dca54ccdf291..b4aa2a03e19b 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -59,6 +59,9 @@ config GENERIC_HARDIRQS_NO__DO_IRQ
 config GENERIC_IRQ_PROBE
 	def_bool y
 
+config GENERIC_GPIO
+	def_bool n
+
 config GENERIC_CALIBRATE_DELAY
 	bool
 
diff --git a/arch/sh/include/asm/gpio.h b/arch/sh/include/asm/gpio.h
index cf32bd2df881..9650e7c9c39e 100644
--- a/arch/sh/include/asm/gpio.h
+++ b/arch/sh/include/asm/gpio.h
@@ -1,9 +1,9 @@
 /*
  *  include/asm-sh/gpio.h
  *
- *  Copyright (C) 2007 Markus Brunner, Mark Jonas
+ * Generic GPIO API and pinmux table support for SuperH.
  *
- *  Addresses for the Pin Function Controller
+ * Copyright (c) 2008 Magnus Damm
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
@@ -16,4 +16,92 @@
 #include <cpu/gpio.h>
 #endif
 
+typedef unsigned short pinmux_enum_t;
+typedef unsigned char pinmux_flag_t;
+
+#define PINMUX_TYPE_NONE            0
+#define PINMUX_TYPE_FUNCTION        1
+#define PINMUX_TYPE_GPIO            2
+#define PINMUX_TYPE_OUTPUT          3
+#define PINMUX_TYPE_INPUT           4
+#define PINMUX_TYPE_INPUT_PULLUP    5
+#define PINMUX_TYPE_INPUT_PULLDOWN  6
+
+#define PINMUX_FLAG_TYPE            (0x7)
+#define PINMUX_FLAG_WANT_PULLUP     (1 << 3)
+#define PINMUX_FLAG_WANT_PULLDOWN   (1 << 4)
+
+struct pinmux_gpio {
+	pinmux_enum_t enum_id;
+	pinmux_flag_t flags;
+};
+
+#define PINMUX_GPIO(gpio, data_or_mark) [gpio] = { data_or_mark }
+#define PINMUX_DATA(data_or_mark, ids...) data_or_mark, ids, 0
+
+struct pinmux_cfg_reg {
+	unsigned long reg, reg_width, field_width;
+	unsigned long *cnt;
+	pinmux_enum_t *enum_ids;
+};
+
+#define PINMUX_CFG_REG(name, r, r_width, f_width) \
+	.reg = r, .reg_width = r_width, .field_width = f_width,		\
+	.cnt = (unsigned long [r_width / f_width]) {}, \
+	.enum_ids = (pinmux_enum_t [(r_width / f_width) * (1 << f_width)]) \
+
+struct pinmux_data_reg {
+	unsigned long reg, reg_width;
+	pinmux_enum_t *enum_ids;
+};
+
+#define PINMUX_DATA_REG(name, r, r_width) \
+	.reg = r, .reg_width = r_width,	\
+	.enum_ids = (pinmux_enum_t [r_width]) \
+
+struct pinmux_range {
+	pinmux_enum_t begin;
+	pinmux_enum_t end;
+};
+
+struct pinmux_info {
+	char *name;
+	pinmux_enum_t reserved_id;
+	struct pinmux_range data;
+	struct pinmux_range input;
+	struct pinmux_range input_pd;
+	struct pinmux_range input_pu;
+	struct pinmux_range output;
+	struct pinmux_range mark;
+	struct pinmux_range function;
+
+	unsigned first_gpio, last_gpio;
+
+	struct pinmux_gpio *gpios;
+	struct pinmux_cfg_reg *cfg_regs;
+	struct pinmux_data_reg *data_regs;
+
+	pinmux_enum_t *gpio_data;
+	unsigned int gpio_data_size;
+
+	unsigned long *gpio_in_use;
+};
+
+int register_pinmux(struct pinmux_info *pip);
+
+int __gpio_request(unsigned gpio);
+static inline int gpio_request(unsigned gpio, const char *label)
+{
+	return __gpio_request(gpio);
+}
+void gpio_free(unsigned gpio);
+int gpio_direction_input(unsigned gpio);
+int gpio_direction_output(unsigned gpio, int value);
+int gpio_get_value(unsigned gpio);
+void gpio_set_value(unsigned gpio, int value);
+static inline int gpio_export(unsigned gpio, bool direction_may_change)
+{
+	return 0;
+}
+
 #endif /* __ASM_SH_GPIO_H */
diff --git a/arch/sh/kernel/Makefile_32 b/arch/sh/kernel/Makefile_32
index 8497de8afbff..7c41ac7c298c 100644
--- a/arch/sh/kernel/Makefile_32
+++ b/arch/sh/kernel/Makefile_32
@@ -5,7 +5,7 @@
 extra-y	:= head_32.o init_task.o vmlinux.lds
 
 obj-y	:= debugtraps.o io.o io_generic.o irq.o machvec.o process_32.o \
-	   ptrace_32.o setup.o signal_32.o sys_sh.o sys_sh32.o \
+	   ptrace_32.o setup.o signal_32.o sys_sh.o sys_sh32.o gpio.o \
 	   syscalls_32.o time_32.o topology.o traps.o traps_32.o
 
 obj-y				+= cpu/ timers/
diff --git a/arch/sh/kernel/Makefile_64 b/arch/sh/kernel/Makefile_64
index e987eb2e9bb5..2a7a5e694a1f 100644
--- a/arch/sh/kernel/Makefile_64
+++ b/arch/sh/kernel/Makefile_64
@@ -1,7 +1,7 @@
 extra-y	:= head_64.o init_task.o vmlinux.lds
 
 obj-y	:= debugtraps.o io.o io_generic.o irq.o machvec.o process_64.o \
-	   ptrace_64.o setup.o signal_64.o sys_sh.o sys_sh64.o \
+	   ptrace_64.o setup.o signal_64.o sys_sh.o sys_sh64.o gpio.o \
 	   syscalls_64.o time_64.o topology.o traps.o traps_64.o
 
 obj-y				+= cpu/ timers/
diff --git a/arch/sh/kernel/gpio.c b/arch/sh/kernel/gpio.c
new file mode 100644
index 000000000000..bb8b812c6895
--- /dev/null
+++ b/arch/sh/kernel/gpio.c
@@ -0,0 +1,498 @@
+/*
+ * Pinmuxed GPIO support for SuperH.
+ *
+ * Copyright (C) 2008 Magnus Damm
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/bitops.h>
+#include <linux/gpio.h>
+
+static struct pinmux_info *registered_gpio;
+
+static struct pinmux_info *gpio_controller(unsigned gpio)
+{
+	if (!registered_gpio)
+		return NULL;
+
+	if (gpio < registered_gpio->first_gpio)
+		return NULL;
+
+	if (gpio > registered_gpio->last_gpio)
+		return NULL;
+
+	return registered_gpio;
+}
+
+static int enum_in_range(pinmux_enum_t enum_id, struct pinmux_range *r)
+{
+	if (enum_id < r->begin)
+		return 0;
+
+	if (enum_id > r->end)
+		return 0;
+
+	return 1;
+}
+
+static int read_write_reg(unsigned long reg, unsigned long reg_width,
+			  unsigned long field_width, unsigned long in_pos,
+			  unsigned long value, int do_write)
+{
+	unsigned long data, mask, pos;
+
+	data = 0;
+	mask = (1 << field_width) - 1;
+	pos = reg_width - ((in_pos + 1) * field_width);
+
+#ifdef DEBUG
+	pr_info("%s, addr = %lx, value = %ld, pos = %ld, "
+		"r_width = %ld, f_width = %ld\n",
+		do_write ? "write" : "read", reg, value, pos,
+		reg_width, field_width);
+#endif
+
+	switch (reg_width) {
+	case 8:
+		data = ctrl_inb(reg);
+		break;
+	case 16:
+		data = ctrl_inw(reg);
+		break;
+	case 32:
+		data = ctrl_inl(reg);
+		break;
+	}
+
+	if (!do_write)
+		return (data >> pos) & mask;
+
+	data &= ~(mask << pos);
+	data |= value << pos;
+
+	switch (reg_width) {
+	case 8:
+		ctrl_outb(data, reg);
+		break;
+	case 16:
+		ctrl_outw(data, reg);
+		break;
+	case 32:
+		ctrl_outl(data, reg);
+		break;
+	}
+	return 0;
+}
+
+static int get_data_reg(struct pinmux_info *gpioc, unsigned gpio,
+			struct pinmux_data_reg **drp, int *bitp)
+{
+	pinmux_enum_t enum_id = gpioc->gpios[gpio].enum_id;
+	struct pinmux_data_reg *data_reg;
+	int k, n;
+
+	if (!enum_in_range(enum_id, &gpioc->data))
+		return -1;
+
+	k = 0;
+	while (1) {
+		data_reg = gpioc->data_regs + k;
+
+		if (!data_reg->reg_width)
+			break;
+
+		for (n = 0; n < data_reg->reg_width; n++) {
+			if (data_reg->enum_ids[n] == enum_id) {
+				*drp = data_reg;
+				*bitp = n;
+				return 0;
+
+			}
+		}
+		k++;
+	}
+
+	return -1;
+}
+
+static int get_config_reg(struct pinmux_info *gpioc, pinmux_enum_t enum_id,
+			  struct pinmux_cfg_reg **crp, int *indexp,
+			  unsigned long **cntp)
+{
+	struct pinmux_cfg_reg *config_reg;
+	unsigned long r_width, f_width;
+	int k, n;
+
+	k = 0;
+	while (1) {
+		config_reg = gpioc->cfg_regs + k;
+
+		r_width = config_reg->reg_width;
+		f_width = config_reg->field_width;
+
+		if (!r_width)
+			break;
+		for (n = 0; n < (r_width / f_width) * 1 << f_width; n++) {
+			if (config_reg->enum_ids[n] == enum_id) {
+				*crp = config_reg;
+				*indexp = n;
+				*cntp = &config_reg->cnt[n / (1 << f_width)];
+				return 0;
+			}
+		}
+		k++;
+	}
+
+	return -1;
+}
+
+static int get_gpio_enum_id(struct pinmux_info *gpioc, unsigned gpio,
+			    int pos, pinmux_enum_t *enum_idp)
+{
+	pinmux_enum_t enum_id = gpioc->gpios[gpio].enum_id;
+	pinmux_enum_t *data = gpioc->gpio_data;
+	int k;
+
+	if (!enum_in_range(enum_id, &gpioc->data)) {
+		if (!enum_in_range(enum_id, &gpioc->mark)) {
+			pr_err("non data/mark enum_id for gpio %d\n", gpio);
+			return -1;
+		}
+	}
+
+	if (pos) {
+		*enum_idp = data[pos + 1];
+		return pos + 1;
+	}
+
+	for (k = 0; k < gpioc->gpio_data_size; k++) {
+		if (data[k] == enum_id) {
+			*enum_idp = data[k + 1];
+			return k + 1;
+		}
+	}
+
+	pr_err("cannot locate data/mark enum_id for gpio %d\n", gpio);
+	return -1;
+}
+
+static int write_config_reg(struct pinmux_info *gpioc,
+			    struct pinmux_cfg_reg *crp,
+			    int index)
+{
+	unsigned long ncomb, pos, value;
+
+	ncomb = 1 << crp->field_width;
+	pos = index / ncomb;
+	value = index % ncomb;
+
+	return read_write_reg(crp->reg, crp->reg_width,
+			      crp->field_width, pos, value, 1);
+}
+
+static int check_config_reg(struct pinmux_info *gpioc,
+			    struct pinmux_cfg_reg *crp,
+			    int index)
+{
+	unsigned long ncomb, pos, value;
+
+	ncomb = 1 << crp->field_width;
+	pos = index / ncomb;
+	value = index % ncomb;
+
+	if (read_write_reg(crp->reg, crp->reg_width,
+			   crp->field_width, pos, 0, 0) == value)
+		return 0;
+
+	return -1;
+}
+
+enum { GPIO_CFG_DRYRUN, GPIO_CFG_REQ, GPIO_CFG_FREE };
+
+int pinmux_config_gpio(struct pinmux_info *gpioc, unsigned gpio,
+		       int pinmux_type, int cfg_mode)
+{
+	struct pinmux_cfg_reg *cr = NULL;
+	pinmux_enum_t enum_id;
+	struct pinmux_range *range;
+	int in_range, pos, index;
+	unsigned long *cntp;
+
+	switch (pinmux_type) {
+
+	case PINMUX_TYPE_FUNCTION:
+		range = NULL;
+		break;
+
+	case PINMUX_TYPE_OUTPUT:
+		range = &gpioc->output;
+		break;
+
+	case PINMUX_TYPE_INPUT:
+		range = &gpioc->input;
+		break;
+
+	case PINMUX_TYPE_INPUT_PULLUP:
+		range = &gpioc->input_pu;
+		break;
+
+	case PINMUX_TYPE_INPUT_PULLDOWN:
+		range = &gpioc->input_pd;
+		break;
+
+	default:
+		goto out_err;
+	}
+
+	pos = 0;
+	enum_id = 0;
+	index = 0;
+	while (1) {
+		pos = get_gpio_enum_id(gpioc, gpio, pos, &enum_id);
+		if (pos <= 0)
+			goto out_err;
+
+		if (!enum_id)
+			break;
+
+		in_range = enum_in_range(enum_id, &gpioc->function);
+		if (!in_range && range)
+			in_range = enum_in_range(enum_id, range);
+
+		if (!in_range)
+			continue;
+
+		if (get_config_reg(gpioc, enum_id, &cr, &index, &cntp) != 0)
+			goto out_err;
+
+		switch (cfg_mode) {
+		case GPIO_CFG_DRYRUN:
+			if (!*cntp || !check_config_reg(gpioc, cr, index))
+				continue;
+			break;
+
+		case GPIO_CFG_REQ:
+			if (write_config_reg(gpioc, cr, index) != 0)
+				goto out_err;
+			*cntp = *cntp + 1;
+			break;
+
+		case GPIO_CFG_FREE:
+			*cntp = *cntp - 1;
+			break;
+		}
+	}
+
+	return 0;
+ out_err:
+	return -1;
+}
+
+static DEFINE_SPINLOCK(gpio_lock);
+
+int __gpio_request(unsigned gpio)
+{
+	struct pinmux_info *gpioc = gpio_controller(gpio);
+	struct pinmux_data_reg *dummy;
+	unsigned long flags;
+	int i, ret, pinmux_type;
+
+	ret = -EINVAL;
+
+	if (!gpioc)
+		goto err_out;
+
+	spin_lock_irqsave(&gpio_lock, flags);
+
+	if ((gpioc->gpios[gpio].flags & PINMUX_FLAG_TYPE) != PINMUX_TYPE_NONE)
+		goto err_unlock;
+
+	/* setup pin function here if no data is associated with pin */
+
+	if (get_data_reg(gpioc, gpio, &dummy, &i) != 0)
+		pinmux_type = PINMUX_TYPE_FUNCTION;
+	else
+		pinmux_type = PINMUX_TYPE_GPIO;
+
+	if (pinmux_type == PINMUX_TYPE_FUNCTION) {
+		if (pinmux_config_gpio(gpioc, gpio,
+				       pinmux_type,
+				       GPIO_CFG_DRYRUN) != 0)
+			goto err_unlock;
+
+		if (pinmux_config_gpio(gpioc, gpio,
+				       pinmux_type,
+				       GPIO_CFG_REQ) != 0)
+			BUG();
+	}
+
+	gpioc->gpios[gpio].flags = pinmux_type;
+
+	ret = 0;
+ err_unlock:
+	spin_unlock_irqrestore(&gpio_lock, flags);
+ err_out:
+	return ret;
+}
+EXPORT_SYMBOL(__gpio_request);
+
+void gpio_free(unsigned gpio)
+{
+	struct pinmux_info *gpioc = gpio_controller(gpio);
+	unsigned long flags;
+	int pinmux_type;
+
+	if (!gpioc)
+		return;
+
+	spin_lock_irqsave(&gpio_lock, flags);
+
+	pinmux_type = gpioc->gpios[gpio].flags & PINMUX_FLAG_TYPE;
+	pinmux_config_gpio(gpioc, gpio, pinmux_type, GPIO_CFG_FREE);
+	gpioc->gpios[gpio].flags = PINMUX_TYPE_NONE;
+
+	spin_unlock_irqrestore(&gpio_lock, flags);
+}
+EXPORT_SYMBOL(gpio_free);
+
+static int pinmux_direction(struct pinmux_info *gpioc,
+			    unsigned gpio, int new_pinmux_type)
+{
+	int ret, pinmux_type;
+
+	ret = -EINVAL;
+	pinmux_type = gpioc->gpios[gpio].flags & PINMUX_FLAG_TYPE;
+
+	switch (pinmux_type) {
+	case PINMUX_TYPE_GPIO:
+		break;
+	case PINMUX_TYPE_OUTPUT:
+	case PINMUX_TYPE_INPUT:
+	case PINMUX_TYPE_INPUT_PULLUP:
+	case PINMUX_TYPE_INPUT_PULLDOWN:
+		pinmux_config_gpio(gpioc, gpio, pinmux_type, GPIO_CFG_FREE);
+		break;
+	default:
+		goto err_out;
+	}
+
+	if (pinmux_config_gpio(gpioc, gpio,
+			       new_pinmux_type,
+			       GPIO_CFG_DRYRUN) != 0)
+		goto err_out;
+
+	if (pinmux_config_gpio(gpioc, gpio,
+			       new_pinmux_type,
+			       GPIO_CFG_REQ) != 0)
+		BUG();
+
+	gpioc->gpios[gpio].flags = new_pinmux_type;
+
+	ret = 0;
+ err_out:
+	return ret;
+}
+
+int gpio_direction_input(unsigned gpio)
+{
+	struct pinmux_info *gpioc = gpio_controller(gpio);
+	unsigned long flags;
+	int ret = -EINVAL;
+
+	if (!gpioc)
+		goto err_out;
+
+	spin_lock_irqsave(&gpio_lock, flags);
+	ret = pinmux_direction(gpioc, gpio, PINMUX_TYPE_INPUT);
+	spin_unlock_irqrestore(&gpio_lock, flags);
+ err_out:
+	return ret;
+}
+EXPORT_SYMBOL(gpio_direction_input);
+
+static int __gpio_get_set_value(struct pinmux_info *gpioc,
+				unsigned gpio, int value,
+				int do_write)
+{
+	struct pinmux_data_reg *dr = NULL;
+	int bit = 0;
+
+	if (get_data_reg(gpioc, gpio, &dr, &bit) != 0)
+		BUG();
+	else
+		value = read_write_reg(dr->reg, dr->reg_width,
+				       1, bit, value, do_write);
+
+	return value;
+}
+
+int gpio_direction_output(unsigned gpio, int value)
+{
+	struct pinmux_info *gpioc = gpio_controller(gpio);
+	unsigned long flags;
+	int ret = -EINVAL;
+
+	if (!gpioc)
+		goto err_out;
+
+	spin_lock_irqsave(&gpio_lock, flags);
+	__gpio_get_set_value(gpioc, gpio, value, 1);
+	ret = pinmux_direction(gpioc, gpio, PINMUX_TYPE_OUTPUT);
+	spin_unlock_irqrestore(&gpio_lock, flags);
+ err_out:
+	return ret;
+}
+EXPORT_SYMBOL(gpio_direction_output);
+
+int gpio_get_value(unsigned gpio)
+{
+	struct pinmux_info *gpioc = gpio_controller(gpio);
+	unsigned long flags;
+	int value = 0;
+
+	if (!gpioc)
+		BUG();
+	else {
+		spin_lock_irqsave(&gpio_lock, flags);
+		value = __gpio_get_set_value(gpioc, gpio, 0, 0);
+		spin_unlock_irqrestore(&gpio_lock, flags);
+	}
+
+	return value;
+}
+EXPORT_SYMBOL(gpio_get_value);
+
+void gpio_set_value(unsigned gpio, int value)
+{
+	struct pinmux_info *gpioc = gpio_controller(gpio);
+	unsigned long flags;
+
+	if (!gpioc)
+		BUG();
+	else {
+		spin_lock_irqsave(&gpio_lock, flags);
+		__gpio_get_set_value(gpioc, gpio, value, 1);
+		spin_unlock_irqrestore(&gpio_lock, flags);
+	}
+}
+EXPORT_SYMBOL(gpio_set_value);
+
+int register_pinmux(struct pinmux_info *pip)
+{
+	registered_gpio = pip;
+	pr_info("pinmux: %s handling gpio %d -> %d\n",
+		pip->name, pip->first_gpio, pip->last_gpio);
+
+	return 0;
+}
-- 
GitLab


From 8d7b5b0af7e070b9ae0852541f67aa84da078315 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 8 Oct 2008 20:41:52 +0900
Subject: [PATCH 643/895] sh: Add sh7722 pinmux code

This patch adds pinmux and gpio support for the sh7722 processor.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/sh7722.h            |  210 +++
 arch/sh/kernel/cpu/sh4a/Makefile        |    4 +
 arch/sh/kernel/cpu/sh4a/pinmux-sh7722.c | 1783 +++++++++++++++++++++++
 3 files changed, 1997 insertions(+)
 create mode 100644 arch/sh/include/asm/sh7722.h
 create mode 100644 arch/sh/kernel/cpu/sh4a/pinmux-sh7722.c

diff --git a/arch/sh/include/asm/sh7722.h b/arch/sh/include/asm/sh7722.h
new file mode 100644
index 000000000000..4b3096f5307b
--- /dev/null
+++ b/arch/sh/include/asm/sh7722.h
@@ -0,0 +1,210 @@
+#ifndef __ASM_SH7722_H__
+#define __ASM_SH7722_H__
+
+enum {
+	/* PTA */
+	GPIO_PTA7, GPIO_PTA6, GPIO_PTA5, GPIO_PTA4,
+	GPIO_PTA3, GPIO_PTA2, GPIO_PTA1, GPIO_PTA0,
+
+	/* PTB */
+	GPIO_PTB7, GPIO_PTB6, GPIO_PTB5, GPIO_PTB4,
+	GPIO_PTB3, GPIO_PTB2, GPIO_PTB1, GPIO_PTB0,
+
+	/* PTC */
+	GPIO_PTC7, GPIO_PTC5, GPIO_PTC4, GPIO_PTC3,
+	GPIO_PTC2, GPIO_PTC0,
+
+	/* PTD */
+	GPIO_PTD7, GPIO_PTD6, GPIO_PTD5, GPIO_PTD4,
+	GPIO_PTD3, GPIO_PTD2, GPIO_PTD1, GPIO_PTD0,
+
+	/* PTE */
+	GPIO_PTE7, GPIO_PTE6, GPIO_PTE5, GPIO_PTE4,
+	GPIO_PTE1, GPIO_PTE0,
+
+	/* PTF */
+	GPIO_PTF6, GPIO_PTF5, GPIO_PTF4, GPIO_PTF3,
+	GPIO_PTF2, GPIO_PTF1, GPIO_PTF0,
+
+	/* PTG */
+	GPIO_PTG4, GPIO_PTG3, GPIO_PTG2, GPIO_PTG1, GPIO_PTG0,
+
+	/* PTH */
+	GPIO_PTH7, GPIO_PTH6, GPIO_PTH5, GPIO_PTH4,
+	GPIO_PTH3, GPIO_PTH2, GPIO_PTH1, GPIO_PTH0,
+
+	/* PTJ */
+	GPIO_PTJ7, GPIO_PTJ6, GPIO_PTJ5, GPIO_PTJ1, GPIO_PTJ0,
+
+	/* PTK */
+	GPIO_PTK6, GPIO_PTK5, GPIO_PTK4, GPIO_PTK3,
+	GPIO_PTK2, GPIO_PTK1, GPIO_PTK0,
+
+	/* PTL */
+	GPIO_PTL7, GPIO_PTL6, GPIO_PTL5, GPIO_PTL4,
+	GPIO_PTL3, GPIO_PTL2, GPIO_PTL1, GPIO_PTL0,
+
+	/* PTM */
+	GPIO_PTM7, GPIO_PTM6, GPIO_PTM5, GPIO_PTM4,
+	GPIO_PTM3, GPIO_PTM2, GPIO_PTM1, GPIO_PTM0,
+
+	/* PTN */
+	GPIO_PTN7, GPIO_PTN6, GPIO_PTN5, GPIO_PTN4,
+	GPIO_PTN3, GPIO_PTN2, GPIO_PTN1, GPIO_PTN0,
+
+	/* PTQ */
+	GPIO_PTQ7, GPIO_PTQ6, GPIO_PTQ5, GPIO_PTQ4,
+	GPIO_PTQ3, GPIO_PTQ2, GPIO_PTQ1, GPIO_PTQ0,
+
+	/* PTR */
+	GPIO_PTR4, GPIO_PTR3, GPIO_PTR2, GPIO_PTR1, GPIO_PTR0,
+
+	/* PTS */
+	GPIO_PTS4, GPIO_PTS3, GPIO_PTS2, GPIO_PTS1, GPIO_PTS0,
+
+	/* PTT */
+	GPIO_PTT4, GPIO_PTT3, GPIO_PTT2, GPIO_PTT1, GPIO_PTT0,
+
+	/* PTU */
+	GPIO_PTU4, GPIO_PTU3, GPIO_PTU2, GPIO_PTU1, GPIO_PTU0,
+
+	/* PTV */
+	GPIO_PTV4, GPIO_PTV3, GPIO_PTV2, GPIO_PTV1, GPIO_PTV0,
+
+	/* PTW */
+	GPIO_PTW6, GPIO_PTW5, GPIO_PTW4, GPIO_PTW3,
+	GPIO_PTW2, GPIO_PTW1, GPIO_PTW0,
+
+	/* PTX */
+	GPIO_PTX6, GPIO_PTX5, GPIO_PTX4, GPIO_PTX3,
+	GPIO_PTX2, GPIO_PTX1, GPIO_PTX0,
+
+	/* PTY */
+	GPIO_PTY5, GPIO_PTY4, GPIO_PTY3, GPIO_PTY2,
+	GPIO_PTY1, GPIO_PTY0,
+
+	/* PTZ */
+	GPIO_PTZ5, GPIO_PTZ4, GPIO_PTZ3, GPIO_PTZ2, GPIO_PTZ1,
+
+	/* SCIF0 */
+	GPIO_FN_SCIF0_TXD, GPIO_FN_SCIF0_RXD,
+	GPIO_FN_SCIF0_RTS, GPIO_FN_SCIF0_CTS, GPIO_FN_SCIF0_SCK,
+
+	/* SCIF1 */
+	GPIO_FN_SCIF1_TXD, GPIO_FN_SCIF1_RXD,
+	GPIO_FN_SCIF1_RTS, GPIO_FN_SCIF1_CTS, GPIO_FN_SCIF1_SCK,
+
+	/* SCIF2 */
+	GPIO_FN_SCIF2_TXD, GPIO_FN_SCIF2_RXD,
+	GPIO_FN_SCIF2_RTS, GPIO_FN_SCIF2_CTS, GPIO_FN_SCIF2_SCK,
+
+	/* SIO */
+	GPIO_FN_SIOTXD, GPIO_FN_SIORXD,
+	GPIO_FN_SIOD, GPIO_FN_SIOSTRB0, GPIO_FN_SIOSTRB1,
+	GPIO_FN_SIOSCK, GPIO_FN_SIOMCK,
+
+	/* CEU */
+	GPIO_FN_VIO_D15, GPIO_FN_VIO_D14, GPIO_FN_VIO_D13, GPIO_FN_VIO_D12,
+	GPIO_FN_VIO_D11, GPIO_FN_VIO_D10, GPIO_FN_VIO_D9, GPIO_FN_VIO_D8,
+	GPIO_FN_VIO_D7, GPIO_FN_VIO_D6, GPIO_FN_VIO_D5, GPIO_FN_VIO_D4,
+	GPIO_FN_VIO_D3, GPIO_FN_VIO_D2, GPIO_FN_VIO_D1, GPIO_FN_VIO_D0,
+	GPIO_FN_VIO_FLD, GPIO_FN_VIO_CKO, GPIO_FN_VIO_STEX, GPIO_FN_VIO_STEM,
+	GPIO_FN_VIO_VD, GPIO_FN_VIO_HD, GPIO_FN_VIO_CLK,
+	GPIO_FN_VIO_VD2, GPIO_FN_VIO_HD2, GPIO_FN_VIO_CLK2,
+
+	/* LCDC */
+	GPIO_FN_LCDD23, GPIO_FN_LCDD22, GPIO_FN_LCDD21, GPIO_FN_LCDD20,
+	GPIO_FN_LCDD19, GPIO_FN_LCDD18, GPIO_FN_LCDD17, GPIO_FN_LCDD16,
+	GPIO_FN_LCDD15, GPIO_FN_LCDD14, GPIO_FN_LCDD13, GPIO_FN_LCDD12,
+	GPIO_FN_LCDD11, GPIO_FN_LCDD10, GPIO_FN_LCDD9, GPIO_FN_LCDD8,
+	GPIO_FN_LCDD7, GPIO_FN_LCDD6, GPIO_FN_LCDD5, GPIO_FN_LCDD4,
+	GPIO_FN_LCDD3, GPIO_FN_LCDD2, GPIO_FN_LCDD1, GPIO_FN_LCDD0,
+	GPIO_FN_LCDLCLK,
+	/* Main LCD */
+	GPIO_FN_LCDDON, GPIO_FN_LCDVCPWC, GPIO_FN_LCDVEPWC, GPIO_FN_LCDVSYN,
+	/* Main LCD - RGB Mode */
+	GPIO_FN_LCDDCK, GPIO_FN_LCDHSYN, GPIO_FN_LCDDISP,
+	/* Main LCD - SYS Mode */
+	GPIO_FN_LCDRS, GPIO_FN_LCDCS, GPIO_FN_LCDWR, GPIO_FN_LCDRD,
+	/* Sub LCD - SYS Mode */
+	GPIO_FN_LCDDON2, GPIO_FN_LCDVCPWC2, GPIO_FN_LCDVEPWC2,
+	GPIO_FN_LCDVSYN2, GPIO_FN_LCDCS2,
+
+	/* BSC */
+	GPIO_FN_IOIS16, GPIO_FN_A25, GPIO_FN_A24, GPIO_FN_A23, GPIO_FN_A22,
+	GPIO_FN_BS, GPIO_FN_CS6B_CE1B, GPIO_FN_WAIT, GPIO_FN_CS6A_CE2B,
+
+	/* SBSC */
+	GPIO_FN_HPD63, GPIO_FN_HPD62, GPIO_FN_HPD61, GPIO_FN_HPD60,
+	GPIO_FN_HPD59, GPIO_FN_HPD58, GPIO_FN_HPD57, GPIO_FN_HPD56,
+	GPIO_FN_HPD55, GPIO_FN_HPD54, GPIO_FN_HPD53, GPIO_FN_HPD52,
+	GPIO_FN_HPD51, GPIO_FN_HPD50, GPIO_FN_HPD49, GPIO_FN_HPD48,
+	GPIO_FN_HPDQM7, GPIO_FN_HPDQM6, GPIO_FN_HPDQM5, GPIO_FN_HPDQM4,
+
+	/* IRQ */
+	GPIO_FN_IRQ0, GPIO_FN_IRQ1, GPIO_FN_IRQ2, GPIO_FN_IRQ3,
+	GPIO_FN_IRQ4, GPIO_FN_IRQ5, GPIO_FN_IRQ6, GPIO_FN_IRQ7,
+
+	/* SDHI */
+	GPIO_FN_SDHICD, GPIO_FN_SDHIWP, GPIO_FN_SDHID3, GPIO_FN_SDHID2,
+	GPIO_FN_SDHID1, GPIO_FN_SDHID0, GPIO_FN_SDHICMD, GPIO_FN_SDHICLK,
+
+	/* SIU - Port A */
+	GPIO_FN_SIUAOLR, GPIO_FN_SIUAOBT, GPIO_FN_SIUAISLD, GPIO_FN_SIUAILR,
+	GPIO_FN_SIUAIBT, GPIO_FN_SIUAOSLD, GPIO_FN_SIUMCKA, GPIO_FN_SIUFCKA,
+
+	/* SIU - Port B */
+	GPIO_FN_SIUBOLR, GPIO_FN_SIUBOBT, GPIO_FN_SIUBISLD, GPIO_FN_SIUBILR,
+	GPIO_FN_SIUBIBT, GPIO_FN_SIUBOSLD, GPIO_FN_SIUMCKB, GPIO_FN_SIUFCKB,
+
+	/* AUD */
+	GPIO_FN_AUDSYNC, GPIO_FN_AUDATA3, GPIO_FN_AUDATA2, GPIO_FN_AUDATA1,
+	GPIO_FN_AUDATA0,
+
+	/* DMAC */
+	GPIO_FN_DACK, GPIO_FN_DREQ0,
+
+	/* VOU */
+	GPIO_FN_DV_CLKI, GPIO_FN_DV_CLK, GPIO_FN_DV_HSYNC, GPIO_FN_DV_VSYNC,
+	GPIO_FN_DV_D15, GPIO_FN_DV_D14, GPIO_FN_DV_D13, GPIO_FN_DV_D12,
+	GPIO_FN_DV_D11, GPIO_FN_DV_D10, GPIO_FN_DV_D9, GPIO_FN_DV_D8,
+	GPIO_FN_DV_D7, GPIO_FN_DV_D6, GPIO_FN_DV_D5, GPIO_FN_DV_D4,
+	GPIO_FN_DV_D3, GPIO_FN_DV_D2, GPIO_FN_DV_D1, GPIO_FN_DV_D0,
+
+	/* CPG */
+	GPIO_FN_STATUS0, GPIO_FN_PDSTATUS,
+
+	/* SIOF0 */
+	GPIO_FN_SIOF0_MCK, GPIO_FN_SIOF0_SCK,
+	GPIO_FN_SIOF0_SYNC, GPIO_FN_SIOF0_SS1, GPIO_FN_SIOF0_SS2,
+	GPIO_FN_SIOF0_TXD, GPIO_FN_SIOF0_RXD,
+
+	/* SIOF1 */
+	GPIO_FN_SIOF1_MCK, GPIO_FN_SIOF1_SCK,
+	GPIO_FN_SIOF1_SYNC, GPIO_FN_SIOF1_SS1, GPIO_FN_SIOF1_SS2,
+	GPIO_FN_SIOF1_TXD, GPIO_FN_SIOF1_RXD,
+
+	/* SIM */
+	GPIO_FN_SIM_D, GPIO_FN_SIM_CLK, GPIO_FN_SIM_RST,
+
+	/* TSIF */
+	GPIO_FN_TS_SDAT, GPIO_FN_TS_SCK, GPIO_FN_TS_SDEN, GPIO_FN_TS_SPSYNC,
+
+	/* IRDA */
+	GPIO_FN_IRDA_IN, GPIO_FN_IRDA_OUT,
+
+	/* TPU */
+	GPIO_FN_TPUTO,
+
+	/* FLCTL */
+	GPIO_FN_FCE, GPIO_FN_NAF7, GPIO_FN_NAF6, GPIO_FN_NAF5, GPIO_FN_NAF4,
+	GPIO_FN_NAF3, GPIO_FN_NAF2, GPIO_FN_NAF1, GPIO_FN_NAF0, GPIO_FN_FCDE,
+	GPIO_FN_FOE, GPIO_FN_FSC, GPIO_FN_FWE, GPIO_FN_FRB,
+
+	/* KEYSC */
+	GPIO_FN_KEYIN0, GPIO_FN_KEYIN1, GPIO_FN_KEYIN2, GPIO_FN_KEYIN3,
+	GPIO_FN_KEYIN4, GPIO_FN_KEYOUT0, GPIO_FN_KEYOUT1, GPIO_FN_KEYOUT2,
+	GPIO_FN_KEYOUT3, GPIO_FN_KEYOUT4_IN6, GPIO_FN_KEYOUT5_IN5,
+};
+
+#endif /* __ASM_SH7722_H__ */
diff --git a/arch/sh/kernel/cpu/sh4a/Makefile b/arch/sh/kernel/cpu/sh4a/Makefile
index 9381ad8da263..aced813b03ca 100644
--- a/arch/sh/kernel/cpu/sh4a/Makefile
+++ b/arch/sh/kernel/cpu/sh4a/Makefile
@@ -27,5 +27,9 @@ clock-$(CONFIG_CPU_SUBTYPE_SH7723)	:= clock-sh7722.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7366)	:= clock-sh7722.o
 clock-$(CONFIG_CPU_SUBTYPE_SHX3)	:= clock-shx3.o
 
+# Pinmux setup
+pinmux-$(CONFIG_CPU_SUBTYPE_SH7722)	:= pinmux-sh7722.o
+
 obj-y			+= $(clock-y)
 obj-$(CONFIG_SMP)	+= $(smp-y)
+obj-$(CONFIG_GENERIC_GPIO)	+= $(pinmux-y)
diff --git a/arch/sh/kernel/cpu/sh4a/pinmux-sh7722.c b/arch/sh/kernel/cpu/sh4a/pinmux-sh7722.c
new file mode 100644
index 000000000000..51b12ab58c9e
--- /dev/null
+++ b/arch/sh/kernel/cpu/sh4a/pinmux-sh7722.c
@@ -0,0 +1,1783 @@
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/gpio.h>
+#include <asm/sh7722.h>
+
+enum {
+	PINMUX_RESERVED = 0,
+
+	PINMUX_DATA_BEGIN,
+	PTA7_DATA, PTA6_DATA, PTA5_DATA, PTA4_DATA,
+	PTA3_DATA, PTA2_DATA, PTA1_DATA, PTA0_DATA,
+	PTB7_DATA, PTB6_DATA, PTB5_DATA, PTB4_DATA,
+	PTB3_DATA, PTB2_DATA, PTB1_DATA, PTB0_DATA,
+	PTC7_DATA, PTC5_DATA, PTC4_DATA, PTC3_DATA, PTC2_DATA, PTC0_DATA,
+	PTD7_DATA, PTD6_DATA, PTD5_DATA, PTD4_DATA,
+	PTD3_DATA, PTD2_DATA, PTD1_DATA, PTD0_DATA,
+	PTE7_DATA, PTE6_DATA, PTE5_DATA, PTE4_DATA, PTE1_DATA, PTE0_DATA,
+	PTF6_DATA, PTF5_DATA, PTF4_DATA,
+	PTF3_DATA, PTF2_DATA, PTF1_DATA, PTF0_DATA,
+	PTG4_DATA, PTG3_DATA, PTG2_DATA, PTG1_DATA, PTG0_DATA,
+	PTH7_DATA, PTH6_DATA, PTH5_DATA, PTH4_DATA,
+	PTH3_DATA, PTH2_DATA, PTH1_DATA, PTH0_DATA,
+	PTJ7_DATA, PTJ6_DATA, PTJ5_DATA, PTJ1_DATA, PTJ0_DATA,
+	PTK6_DATA, PTK5_DATA, PTK4_DATA,
+	PTK3_DATA, PTK2_DATA, PTK1_DATA, PTK0_DATA,
+	PTL7_DATA, PTL6_DATA, PTL5_DATA, PTL4_DATA,
+	PTL3_DATA, PTL2_DATA, PTL1_DATA, PTL0_DATA,
+	PTM7_DATA, PTM6_DATA, PTM5_DATA, PTM4_DATA,
+	PTM3_DATA, PTM2_DATA, PTM1_DATA, PTM0_DATA,
+	PTN7_DATA, PTN6_DATA, PTN5_DATA, PTN4_DATA,
+	PTN3_DATA, PTN2_DATA, PTN1_DATA, PTN0_DATA,
+	PTQ6_DATA, PTQ5_DATA, PTQ4_DATA,
+	PTQ3_DATA, PTQ2_DATA, PTQ1_DATA, PTQ0_DATA,
+	PTR4_DATA, PTR3_DATA, PTR2_DATA, PTR1_DATA, PTR0_DATA,
+	PTS4_DATA, PTS3_DATA, PTS2_DATA, PTS1_DATA, PTS0_DATA,
+	PTT4_DATA, PTT3_DATA, PTT2_DATA, PTT1_DATA, PTT0_DATA,
+	PTU4_DATA, PTU3_DATA, PTU2_DATA, PTU1_DATA, PTU0_DATA,
+	PTV4_DATA, PTV3_DATA, PTV2_DATA, PTV1_DATA, PTV0_DATA,
+	PTW6_DATA, PTW5_DATA, PTW4_DATA,
+	PTW3_DATA, PTW2_DATA, PTW1_DATA, PTW0_DATA,
+	PTX6_DATA, PTX5_DATA, PTX4_DATA,
+	PTX3_DATA, PTX2_DATA, PTX1_DATA, PTX0_DATA,
+	PTY6_DATA, PTY5_DATA, PTY4_DATA,
+	PTY3_DATA, PTY2_DATA, PTY1_DATA, PTY0_DATA,
+	PTZ5_DATA, PTZ4_DATA, PTZ3_DATA, PTZ2_DATA, PTZ1_DATA, PTZ0_DATA,
+	PINMUX_DATA_END,
+
+	PINMUX_INPUT_BEGIN,
+	PTA7_IN, PTA6_IN, PTA5_IN, PTA4_IN,
+	PTA3_IN, PTA2_IN, PTA1_IN, PTA0_IN,
+	PTB7_IN, PTB6_IN, PTB5_IN, PTB4_IN,
+	PTB3_IN, PTB2_IN, PTB1_IN, PTB0_IN,
+	PTC7_IN, PTC5_IN, PTC4_IN, PTC3_IN, PTC2_IN, PTC0_IN,
+	PTD7_IN, PTD6_IN, PTD5_IN, PTD4_IN, PTD3_IN, PTD2_IN, PTD1_IN,
+	PTE7_IN, PTE6_IN, PTE5_IN, PTE4_IN, PTE1_IN, PTE0_IN,
+	PTF6_IN, PTF5_IN, PTF4_IN, PTF3_IN, PTF2_IN, PTF1_IN,
+	PTH6_IN, PTH5_IN, PTH1_IN, PTH0_IN,
+	PTJ1_IN, PTJ0_IN,
+	PTK6_IN, PTK5_IN, PTK4_IN, PTK3_IN, PTK2_IN, PTK0_IN,
+	PTL7_IN, PTL6_IN, PTL5_IN, PTL4_IN,
+	PTL3_IN, PTL2_IN, PTL1_IN, PTL0_IN,
+	PTM7_IN, PTM6_IN, PTM5_IN, PTM4_IN,
+	PTM3_IN, PTM2_IN, PTM1_IN, PTM0_IN,
+	PTN7_IN, PTN6_IN, PTN5_IN, PTN4_IN,
+	PTN3_IN, PTN2_IN, PTN1_IN, PTN0_IN,
+	PTQ5_IN, PTQ4_IN, PTQ3_IN, PTQ2_IN, PTQ0_IN,
+	PTR2_IN,
+	PTS4_IN, PTS2_IN, PTS1_IN,
+	PTT4_IN, PTT3_IN, PTT2_IN, PTT1_IN,
+	PTU4_IN, PTU3_IN, PTU2_IN, PTU1_IN, PTU0_IN,
+	PTV4_IN, PTV3_IN, PTV2_IN, PTV1_IN, PTV0_IN,
+	PTW6_IN, PTW4_IN, PTW3_IN, PTW2_IN, PTW1_IN, PTW0_IN,
+	PTX6_IN, PTX5_IN, PTX4_IN, PTX3_IN, PTX2_IN, PTX1_IN, PTX0_IN,
+	PTY5_IN, PTY4_IN, PTY3_IN, PTY2_IN, PTY0_IN,
+	PTZ5_IN, PTZ4_IN, PTZ3_IN, PTZ2_IN, PTZ1_IN,
+	PINMUX_INPUT_END,
+
+	PINMUX_INPUT_PULLDOWN_BEGIN,
+	PTA7_IN_PD, PTA6_IN_PD, PTA5_IN_PD, PTA4_IN_PD,
+	PTA3_IN_PD, PTA2_IN_PD, PTA1_IN_PD, PTA0_IN_PD,
+	PTE7_IN_PD, PTE6_IN_PD, PTE5_IN_PD, PTE4_IN_PD,	PTE1_IN_PD, PTE0_IN_PD,
+	PTF6_IN_PD, PTF5_IN_PD, PTF4_IN_PD, PTF3_IN_PD, PTF2_IN_PD, PTF1_IN_PD,
+	PTH6_IN_PD, PTH5_IN_PD, PTH1_IN_PD, PTH0_IN_PD,
+	PTK6_IN_PD, PTK5_IN_PD, PTK4_IN_PD, PTK3_IN_PD, PTK2_IN_PD, PTK0_IN_PD,
+	PTL7_IN_PD, PTL6_IN_PD, PTL5_IN_PD, PTL4_IN_PD,
+	PTL3_IN_PD, PTL2_IN_PD, PTL1_IN_PD, PTL0_IN_PD,
+	PTM7_IN_PD, PTM6_IN_PD, PTM5_IN_PD, PTM4_IN_PD,
+	PTM3_IN_PD, PTM2_IN_PD, PTM1_IN_PD, PTM0_IN_PD,
+	PTQ5_IN_PD, PTQ4_IN_PD, PTQ3_IN_PD, PTQ2_IN_PD,
+	PTS4_IN_PD, PTS2_IN_PD, PTS1_IN_PD,
+	PTT4_IN_PD, PTT3_IN_PD, PTT2_IN_PD, PTT1_IN_PD,
+	PTU4_IN_PD, PTU3_IN_PD, PTU2_IN_PD, PTU1_IN_PD, PTU0_IN_PD,
+	PTV4_IN_PD, PTV3_IN_PD, PTV2_IN_PD, PTV1_IN_PD, PTV0_IN_PD,
+	PTW6_IN_PD, PTW4_IN_PD,	PTW3_IN_PD, PTW2_IN_PD, PTW1_IN_PD, PTW0_IN_PD,
+	PTX6_IN_PD, PTX5_IN_PD, PTX4_IN_PD,
+	PTX3_IN_PD, PTX2_IN_PD, PTX1_IN_PD, PTX0_IN_PD,
+	PINMUX_INPUT_PULLDOWN_END,
+
+	PINMUX_INPUT_PULLUP_BEGIN,
+	PTC7_IN_PU, PTC5_IN_PU,
+	PTD7_IN_PU, PTD6_IN_PU, PTD5_IN_PU, PTD4_IN_PU,
+	PTD3_IN_PU, PTD2_IN_PU, PTD1_IN_PU,
+	PTJ1_IN_PU, PTJ0_IN_PU,
+	PTQ0_IN_PU,
+	PTR2_IN_PU,
+	PTX6_IN_PU,
+	PTY5_IN_PU, PTY4_IN_PU, PTY3_IN_PU, PTY2_IN_PU, PTY0_IN_PU,
+	PTZ5_IN_PU, PTZ4_IN_PU, PTZ3_IN_PU, PTZ2_IN_PU, PTZ1_IN_PU,
+	PINMUX_INPUT_PULLUP_END,
+
+	PINMUX_OUTPUT_BEGIN,
+	PTA7_OUT, PTA5_OUT,
+	PTB7_OUT, PTB6_OUT, PTB5_OUT, PTB4_OUT,
+	PTB3_OUT, PTB2_OUT, PTB1_OUT, PTB0_OUT,
+	PTC4_OUT, PTC3_OUT, PTC2_OUT, PTC0_OUT,
+	PTD6_OUT, PTD5_OUT, PTD4_OUT,
+	PTD3_OUT, PTD2_OUT, PTD1_OUT, PTD0_OUT,
+	PTE7_OUT, PTE6_OUT, PTE5_OUT, PTE4_OUT, PTE1_OUT, PTE0_OUT,
+	PTF6_OUT, PTF5_OUT, PTF4_OUT, PTF3_OUT, PTF2_OUT, PTF0_OUT,
+	PTG4_OUT, PTG3_OUT, PTG2_OUT, PTG1_OUT, PTG0_OUT,
+	PTH7_OUT, PTH6_OUT, PTH5_OUT, PTH4_OUT,
+	PTH3_OUT, PTH2_OUT, PTH1_OUT, PTH0_OUT,
+	PTJ7_OUT, PTJ6_OUT, PTJ5_OUT, PTJ1_OUT, PTJ0_OUT,
+	PTK6_OUT, PTK5_OUT, PTK4_OUT, PTK3_OUT, PTK1_OUT, PTK0_OUT,
+	PTL7_OUT, PTL6_OUT, PTL5_OUT, PTL4_OUT,
+	PTL3_OUT, PTL2_OUT, PTL1_OUT, PTL0_OUT,
+	PTM7_OUT, PTM6_OUT, PTM5_OUT, PTM4_OUT,
+	PTM3_OUT, PTM2_OUT, PTM1_OUT, PTM0_OUT,
+	PTN7_OUT, PTN6_OUT, PTN5_OUT, PTN4_OUT,
+	PTN3_OUT, PTN2_OUT, PTN1_OUT, PTN0_OUT,	PTQ6_OUT, PTQ5_OUT, PTQ4_OUT,
+	PTQ3_OUT, PTQ2_OUT, PTQ1_OUT, PTQ0_OUT,
+	PTR4_OUT, PTR3_OUT, PTR1_OUT, PTR0_OUT,
+	PTS3_OUT, PTS2_OUT, PTS0_OUT,
+	PTT4_OUT, PTT3_OUT, PTT2_OUT, PTT0_OUT,
+	PTU4_OUT, PTU3_OUT, PTU2_OUT, PTU0_OUT,
+	PTV4_OUT, PTV3_OUT, PTV2_OUT, PTV1_OUT, PTV0_OUT,
+	PTW5_OUT, PTW4_OUT, PTW3_OUT, PTW2_OUT, PTW1_OUT, PTW0_OUT,
+	PTX6_OUT, PTX5_OUT, PTX4_OUT, PTX3_OUT, PTX2_OUT, PTX1_OUT, PTX0_OUT,
+	PTY5_OUT, PTY4_OUT, PTY3_OUT, PTY2_OUT, PTY1_OUT, PTY0_OUT,
+	PINMUX_OUTPUT_END,
+
+	PINMUX_MARK_BEGIN,
+	SCIF0_TXD_MARK, SCIF0_RXD_MARK,
+	SCIF0_RTS_MARK, SCIF0_CTS_MARK, SCIF0_SCK_MARK,
+	SCIF1_TXD_MARK, SCIF1_RXD_MARK,
+	SCIF1_RTS_MARK, SCIF1_CTS_MARK, SCIF1_SCK_MARK,
+	SCIF2_TXD_MARK, SCIF2_RXD_MARK,
+	SCIF2_RTS_MARK, SCIF2_CTS_MARK, SCIF2_SCK_MARK,
+	SIOTXD_MARK, SIORXD_MARK,
+	SIOD_MARK, SIOSTRB0_MARK, SIOSTRB1_MARK,
+	SIOSCK_MARK, SIOMCK_MARK,
+	VIO_D15_MARK, VIO_D14_MARK, VIO_D13_MARK, VIO_D12_MARK,
+	VIO_D11_MARK, VIO_D10_MARK, VIO_D9_MARK, VIO_D8_MARK,
+	VIO_D7_MARK, VIO_D6_MARK, VIO_D5_MARK, VIO_D4_MARK,
+	VIO_D3_MARK, VIO_D2_MARK, VIO_D1_MARK, VIO_D0_MARK,
+	VIO_CLK_MARK, VIO_VD_MARK, VIO_HD_MARK, VIO_FLD_MARK,
+	VIO_CKO_MARK, VIO_STEX_MARK, VIO_STEM_MARK, VIO_VD2_MARK,
+	VIO_HD2_MARK, VIO_CLK2_MARK,
+	LCDD23_MARK, LCDD22_MARK, LCDD21_MARK, LCDD20_MARK,
+	LCDD19_MARK, LCDD18_MARK, LCDD17_MARK, LCDD16_MARK,
+	LCDD15_MARK, LCDD14_MARK, LCDD13_MARK, LCDD12_MARK,
+	LCDD11_MARK, LCDD10_MARK, LCDD9_MARK, LCDD8_MARK,
+	LCDD7_MARK, LCDD6_MARK, LCDD5_MARK, LCDD4_MARK,
+	LCDD3_MARK, LCDD2_MARK, LCDD1_MARK, LCDD0_MARK,
+	LCDLCLK_MARK, LCDDON_MARK, LCDVCPWC_MARK, LCDVEPWC_MARK,
+	LCDVSYN_MARK, LCDDCK_MARK, LCDHSYN_MARK, LCDDISP_MARK,
+	LCDRS_MARK, LCDCS_MARK, LCDWR_MARK, LCDRD_MARK,
+	LCDDON2_MARK, LCDVCPWC2_MARK, LCDVEPWC2_MARK, LCDVSYN2_MARK,
+	LCDCS2_MARK,
+	IOIS16_MARK, A25_MARK, A24_MARK, A23_MARK, A22_MARK,
+	BS_MARK, CS6B_CE1B_MARK, WAIT_MARK, CS6A_CE2B_MARK,
+	HPD63_MARK, HPD62_MARK, HPD61_MARK, HPD60_MARK,
+	HPD59_MARK, HPD58_MARK, HPD57_MARK, HPD56_MARK,
+	HPD55_MARK, HPD54_MARK, HPD53_MARK, HPD52_MARK,
+	HPD51_MARK, HPD50_MARK, HPD49_MARK, HPD48_MARK,
+	HPDQM7_MARK, HPDQM6_MARK, HPDQM5_MARK, HPDQM4_MARK,
+	IRQ0_MARK, IRQ1_MARK, IRQ2_MARK, IRQ3_MARK,
+	IRQ4_MARK, IRQ5_MARK, IRQ6_MARK, IRQ7_MARK,
+	SDHICD_MARK, SDHIWP_MARK, SDHID3_MARK, SDHID2_MARK,
+	SDHID1_MARK, SDHID0_MARK, SDHICMD_MARK, SDHICLK_MARK,
+	SIUAOLR_MARK, SIUAOBT_MARK, SIUAISLD_MARK, SIUAILR_MARK,
+	SIUAIBT_MARK, SIUAOSLD_MARK, SIUMCKA_MARK, SIUFCKA_MARK,
+	SIUBOLR_MARK, SIUBOBT_MARK, SIUBISLD_MARK, SIUBILR_MARK,
+	SIUBIBT_MARK, SIUBOSLD_MARK, SIUMCKB_MARK, SIUFCKB_MARK,
+	AUDSYNC_MARK, AUDATA3_MARK, AUDATA2_MARK, AUDATA1_MARK,	AUDATA0_MARK,
+	DACK_MARK, DREQ0_MARK,
+	DV_CLKI_MARK, DV_CLK_MARK, DV_HSYNC_MARK, DV_VSYNC_MARK,
+	DV_D15_MARK, DV_D14_MARK, DV_D13_MARK, DV_D12_MARK,
+	DV_D11_MARK, DV_D10_MARK, DV_D9_MARK, DV_D8_MARK,
+	DV_D7_MARK, DV_D6_MARK, DV_D5_MARK, DV_D4_MARK,
+	DV_D3_MARK, DV_D2_MARK, DV_D1_MARK, DV_D0_MARK,
+	STATUS0_MARK, PDSTATUS_MARK,
+	SIOF0_MCK_MARK, SIOF0_SCK_MARK,
+	SIOF0_SYNC_MARK, SIOF0_SS1_MARK, SIOF0_SS2_MARK,
+	SIOF0_TXD_MARK,	SIOF0_RXD_MARK,
+	SIOF1_MCK_MARK, SIOF1_SCK_MARK,
+	SIOF1_SYNC_MARK, SIOF1_SS1_MARK, SIOF1_SS2_MARK,
+	SIOF1_TXD_MARK, SIOF1_RXD_MARK,
+	SIM_D_MARK, SIM_CLK_MARK, SIM_RST_MARK,
+	TS_SDAT_MARK, TS_SCK_MARK, TS_SDEN_MARK, TS_SPSYNC_MARK,
+	IRDA_IN_MARK, IRDA_OUT_MARK,
+	TPUTO_MARK,
+	FCE_MARK, NAF7_MARK, NAF6_MARK, NAF5_MARK, NAF4_MARK,
+	NAF3_MARK, NAF2_MARK, NAF1_MARK, NAF0_MARK, FCDE_MARK,
+	FOE_MARK, FSC_MARK, FWE_MARK, FRB_MARK,
+	KEYIN0_MARK, KEYIN1_MARK, KEYIN2_MARK, KEYIN3_MARK, KEYIN4_MARK,
+	KEYOUT0_MARK, KEYOUT1_MARK, KEYOUT2_MARK, KEYOUT3_MARK,
+	KEYOUT4_IN6_MARK, KEYOUT5_IN5_MARK,
+	PINMUX_MARK_END,
+
+	PINMUX_FUNCTION_BEGIN,
+	VIO_D7_SCIF1_SCK, VIO_D6_SCIF1_RXD, VIO_D5_SCIF1_TXD, VIO_D4,
+	VIO_D3, VIO_D2, VIO_D1, VIO_D0_LCDLCLK,
+	HPD55, HPD54, HPD53, HPD52, HPD51, HPD50, HPD49, HPD48,
+	IOIS16, HPDQM7, HPDQM6, HPDQM5, HPDQM4,
+	SDHICD, SDHIWP, SDHID3, IRQ2_SDHID2, SDHID1, SDHID0, SDHICMD, SDHICLK,
+	A25, A24, A23, A22, IRQ5, IRQ4_BS,
+	PTF6, SIOSCK_SIUBOBT, SIOSTRB1_SIUBOLR,
+	SIOSTRB0_SIUBIBT, SIOD_SIUBILR, SIORXD_SIUBISLD, SIOTXD_SIUBOSLD,
+	AUDSYNC, AUDATA3, AUDATA2, AUDATA1, AUDATA0,
+	LCDVCPWC_LCDVCPWC2, LCDVSYN2_DACK, LCDVSYN, LCDDISP_LCDRS,
+	LCDHSYN_LCDCS, LCDDON_LCDDON2, LCDD17_DV_HSYNC, LCDD16_DV_VSYNC,
+	STATUS0, PDSTATUS, IRQ1, IRQ0,
+	SIUAILR_SIOF1_SS2, SIUAIBT_SIOF1_SS1, SIUAOLR_SIOF1_SYNC,
+	SIUAOBT_SIOF1_SCK, SIUAISLD_SIOF1_RXD, SIUAOSLD_SIOF1_TXD, PTK0,
+	LCDD15_DV_D15, LCDD14_DV_D14, LCDD13_DV_D13, LCDD12_DV_D12,
+	LCDD11_DV_D11, LCDD10_DV_D10, LCDD9_DV_D9, LCDD8_DV_D8,
+	LCDD7_DV_D7, LCDD6_DV_D6, LCDD5_DV_D5, LCDD4_DV_D4,
+	LCDD3_DV_D3, LCDD2_DV_D2, LCDD1_DV_D1, LCDD0_DV_D0,
+	HPD63, HPD62, HPD61, HPD60, HPD59, HPD58, HPD57, HPD56,
+	SIOF0_SS2_SIM_RST, SIOF0_SS1_TS_SPSYNC, SIOF0_SYNC_TS_SDEN,
+	SIOF0_SCK_TS_SCK, PTQ2, PTQ1, PTQ0,
+	LCDRD, CS6B_CE1B_LCDCS2, WAIT, LCDDCK_LCDWR, LCDVEPWC_LCDVEPWC2,
+	SCIF0_CTS_SIUAISPD, SCIF0_RTS_SIUAOSPD,
+	SCIF0_SCK_TPUTO, SCIF0_RXD, SCIF0_TXD,
+	FOE_VIO_VD2, FWE, FSC, DREQ0, FCDE,
+	NAF2_VIO_D10, NAF1_VIO_D9, NAF0_VIO_D8,
+	FRB_VIO_CLK2, FCE_VIO_HD2,
+	NAF7_VIO_D15, NAF6_VIO_D14, NAF5_VIO_D13, NAF4_VIO_D12, NAF3_VIO_D11,
+	VIO_FLD_SCIF2_CTS, VIO_CKO_SCIF2_RTS, VIO_STEX_SCIF2_SCK,
+	VIO_STEM_SCIF2_TXD, VIO_HD_SCIF2_RXD,
+	VIO_VD_SCIF1_CTS, VIO_CLK_SCIF1_RTS,
+	CS6A_CE2B, LCDD23, LCDD22, LCDD21, LCDD20,
+	LCDD19_DV_CLKI, LCDD18_DV_CLK,
+	KEYOUT5_IN5, KEYOUT4_IN6, KEYOUT3, KEYOUT2, KEYOUT1, KEYOUT0,
+	KEYIN4_IRQ7, KEYIN3, KEYIN2, KEYIN1, KEYIN0_IRQ6,
+
+	PSA15_KEYIN0, PSA15_IRQ6, PSA14_KEYIN4, PSA14_IRQ7,
+	PSA9_IRQ4, PSA9_BS, PSA4_IRQ2, PSA4_SDHID2,
+	PSB15_SIOTXD, PSB15_SIUBOSLD, PSB14_SIORXD, PSB14_SIUBISLD,
+	PSB13_SIOD, PSB13_SIUBILR, PSB12_SIOSTRB0, PSB12_SIUBIBT,
+	PSB11_SIOSTRB1, PSB11_SIUBOLR, PSB10_SIOSCK, PSB10_SIUBOBT,
+	PSB9_SIOMCK, PSB9_SIUMCKB, PSB8_SIOF0_MCK, PSB8_IRQ3,
+	PSB7_SIOF0_TXD, PSB7_IRDA_OUT, PSB6_SIOF0_RXD, PSB6_IRDA_IN,
+	PSB5_SIOF0_SCK, PSB5_TS_SCK, PSB4_SIOF0_SYNC, PSB4_TS_SDEN,
+	PSB3_SIOF0_SS1, PSB3_TS_SPSYNC, PSB2_SIOF0_SS2, PSB2_SIM_RST,
+	PSB1_SIUMCKA, PSB1_SIOF1_MCK, PSB0_SIUAOSLD, PSB0_SIOF1_TXD,
+	PSC15_SIUAISLD, PSC15_SIOF1_RXD, PSC14_SIUAOBT, PSC14_SIOF1_SCK,
+	PSC13_SIUAOLR, PSC13_SIOF1_SYNC, PSC12_SIUAIBT, PSC12_SIOF1_SS1,
+	PSC11_SIUAILR, PSC11_SIOF1_SS2, PSC0_NAF, PSC0_VIO,
+	PSD13_VIO, PSD13_SCIF2, PSD12_VIO, PSD12_SCIF1,
+	PSD11_VIO, PSD11_SCIF1, PSD10_VIO_D0, PSD10_LCDLCLK,
+	PSD9_SIOMCK_SIUMCKB, PSD9_SIUFCKB, PSD8_SCIF0_SCK, PSD8_TPUTO,
+	PSD7_SCIF0_RTS, PSD7_SIUAOSPD, PSD6_SCIF0_CTS, PSD6_SIUAISPD,
+	PSD5_CS6B_CE1B, PSD5_LCDCS2,
+	PSD3_LCDVEPWC_LCDVCPWC, PSD3_LCDVEPWC2_LCDVCPWC2,
+	PSD2_LCDDON, PSD2_LCDDON2, PSD0_LCDD19_LCDD0, PSD0_DV,
+	PSE15_SIOF0_MCK_IRQ3, PSE15_SIM_D,
+	PSE14_SIOF0_TXD_IRDA_OUT, PSE14_SIM_CLK,
+	PSE13_SIOF0_RXD_IRDA_IN, PSE13_TS_SDAT, PSE12_LCDVSYN2, PSE12_DACK,
+	PSE11_SIUMCKA_SIOF1_MCK, PSE11_SIUFCKA,
+	PSE3_FLCTL, PSE3_VIO, PSE2_NAF2, PSE2_VIO_D10,
+	PSE1_NAF1, PSE1_VIO_D9, PSE0_NAF0, PSE0_VIO_D8,
+
+	HIZA14_KEYSC, HIZA14_HIZ,
+	HIZA10_NAF, HIZA10_HIZ,
+	HIZA9_VIO, HIZA9_HIZ,
+	HIZA8_LCDC, HIZA8_HIZ,
+	HIZA7_LCDC, HIZA7_HIZ,
+	HIZA6_LCDC, HIZA6_HIZ,
+	HIZB1_VIO, HIZB1_HIZ,
+	HIZB0_VIO, HIZB0_HIZ,
+	HIZC15_IRQ7, HIZC15_HIZ,
+	HIZC14_IRQ6, HIZC14_HIZ,
+	HIZC13_IRQ5, HIZC13_HIZ,
+	HIZC12_IRQ4, HIZC12_HIZ,
+	HIZC11_IRQ3, HIZC11_HIZ,
+	HIZC10_IRQ2, HIZC10_HIZ,
+	HIZC9_IRQ1, HIZC9_HIZ,
+	HIZC8_IRQ0, HIZC8_HIZ,
+	MSELB9_VIO, MSELB9_VIO2,
+	MSELB8_RGB, MSELB8_SYS,
+	PINMUX_FUNCTION_END,
+};
+
+static pinmux_enum_t pinmux_data[] = {
+	/* PTA */
+	PINMUX_DATA(PTA7_DATA, PTA7_IN, PTA7_IN_PD, PTA7_OUT),
+	PINMUX_DATA(PTA6_DATA, PTA6_IN, PTA6_IN_PD),
+	PINMUX_DATA(PTA5_DATA, PTA5_IN, PTA5_IN_PD, PTA5_OUT),
+	PINMUX_DATA(PTA4_DATA, PTA4_IN, PTA4_IN_PD),
+	PINMUX_DATA(PTA3_DATA, PTA3_IN, PTA3_IN_PD),
+	PINMUX_DATA(PTA2_DATA, PTA2_IN, PTA2_IN_PD),
+	PINMUX_DATA(PTA1_DATA, PTA1_IN, PTA1_IN_PD),
+	PINMUX_DATA(PTA0_DATA, PTA0_IN, PTA0_IN_PD),
+
+	/* PTB */
+	PINMUX_DATA(PTB7_DATA, PTB7_IN, PTB7_OUT),
+	PINMUX_DATA(PTB6_DATA, PTB6_IN, PTB6_OUT),
+	PINMUX_DATA(PTB5_DATA, PTB5_IN, PTB5_OUT),
+	PINMUX_DATA(PTB4_DATA, PTB4_IN, PTB4_OUT),
+	PINMUX_DATA(PTB3_DATA, PTB3_IN, PTB3_OUT),
+	PINMUX_DATA(PTB2_DATA, PTB2_IN, PTB2_OUT),
+	PINMUX_DATA(PTB1_DATA, PTB1_IN, PTB1_OUT),
+	PINMUX_DATA(PTB0_DATA, PTB0_IN, PTB0_OUT),
+
+	/* PTC */
+	PINMUX_DATA(PTC7_DATA, PTC7_IN, PTC7_IN_PU),
+	PINMUX_DATA(PTC5_DATA, PTC5_IN, PTC5_IN_PU),
+	PINMUX_DATA(PTC4_DATA, PTC4_IN, PTC4_OUT),
+	PINMUX_DATA(PTC3_DATA, PTC3_IN, PTC3_OUT),
+	PINMUX_DATA(PTC2_DATA, PTC2_IN, PTC2_OUT),
+	PINMUX_DATA(PTC0_DATA, PTC0_IN, PTC0_OUT),
+
+	/* PTD */
+	PINMUX_DATA(PTD7_DATA, PTD7_IN, PTD7_IN_PU),
+	PINMUX_DATA(PTD6_DATA, PTD6_OUT, PTD6_IN, PTD6_IN_PU),
+	PINMUX_DATA(PTD5_DATA, PTD5_OUT, PTD5_IN, PTD5_IN_PU),
+	PINMUX_DATA(PTD4_DATA, PTD4_OUT, PTD4_IN, PTD4_IN_PU),
+	PINMUX_DATA(PTD3_DATA, PTD3_OUT, PTD3_IN, PTD3_IN_PU),
+	PINMUX_DATA(PTD2_DATA, PTD2_OUT, PTD2_IN, PTD2_IN_PU),
+	PINMUX_DATA(PTD1_DATA, PTD1_OUT, PTD1_IN, PTD1_IN_PU),
+	PINMUX_DATA(PTD0_DATA, PTD0_OUT),
+
+	/* PTE */
+	PINMUX_DATA(PTE7_DATA, PTE7_OUT, PTE7_IN, PTE7_IN_PD),
+	PINMUX_DATA(PTE6_DATA, PTE6_OUT, PTE6_IN, PTE6_IN_PD),
+	PINMUX_DATA(PTE5_DATA, PTE5_OUT, PTE5_IN, PTE5_IN_PD),
+	PINMUX_DATA(PTE4_DATA, PTE4_OUT, PTE4_IN, PTE4_IN_PD),
+	PINMUX_DATA(PTE1_DATA, PTE1_OUT, PTE1_IN, PTE1_IN_PD),
+	PINMUX_DATA(PTE0_DATA, PTE0_OUT, PTE0_IN, PTE0_IN_PD),
+
+	/* PTF */
+	PINMUX_DATA(PTF6_DATA, PTF6_OUT, PTF6_IN, PTF6_IN_PD),
+	PINMUX_DATA(PTF5_DATA, PTF5_OUT, PTF5_IN, PTF5_IN_PD),
+	PINMUX_DATA(PTF4_DATA, PTF4_OUT, PTF4_IN, PTF4_IN_PD),
+	PINMUX_DATA(PTF3_DATA, PTF3_OUT, PTF3_IN, PTF3_IN_PD),
+	PINMUX_DATA(PTF2_DATA, PTF2_OUT, PTF2_IN, PTF2_IN_PD),
+	PINMUX_DATA(PTF1_DATA, PTF1_IN, PTF1_IN_PD),
+	PINMUX_DATA(PTF0_DATA, PTF0_OUT),
+
+	/* PTG */
+	PINMUX_DATA(PTG4_DATA, PTG4_OUT),
+	PINMUX_DATA(PTG3_DATA, PTG3_OUT),
+	PINMUX_DATA(PTG2_DATA, PTG2_OUT),
+	PINMUX_DATA(PTG1_DATA, PTG1_OUT),
+	PINMUX_DATA(PTG0_DATA, PTG0_OUT),
+
+	/* PTH */
+	PINMUX_DATA(PTH7_DATA, PTH7_OUT),
+	PINMUX_DATA(PTH6_DATA, PTH6_OUT, PTH6_IN, PTH6_IN_PD),
+	PINMUX_DATA(PTH5_DATA, PTH5_OUT, PTH5_IN, PTH5_IN_PD),
+	PINMUX_DATA(PTH4_DATA, PTH4_OUT),
+	PINMUX_DATA(PTH3_DATA, PTH3_OUT),
+	PINMUX_DATA(PTH2_DATA, PTH2_OUT),
+	PINMUX_DATA(PTH1_DATA, PTH1_OUT, PTH1_IN, PTH1_IN_PD),
+	PINMUX_DATA(PTH0_DATA, PTH0_OUT, PTH0_IN, PTH0_IN_PD),
+
+	/* PTJ */
+	PINMUX_DATA(PTJ7_DATA, PTJ7_OUT),
+	PINMUX_DATA(PTJ6_DATA, PTJ6_OUT),
+	PINMUX_DATA(PTJ5_DATA, PTJ5_OUT),
+	PINMUX_DATA(PTJ1_DATA, PTJ1_OUT, PTJ1_IN, PTJ1_IN_PU),
+	PINMUX_DATA(PTJ0_DATA, PTJ0_OUT, PTJ0_IN, PTJ0_IN_PU),
+
+	/* PTK */
+	PINMUX_DATA(PTK6_DATA, PTK6_OUT, PTK6_IN, PTK6_IN_PD),
+	PINMUX_DATA(PTK5_DATA, PTK5_OUT, PTK5_IN, PTK5_IN_PD),
+	PINMUX_DATA(PTK4_DATA, PTK4_OUT, PTK4_IN, PTK4_IN_PD),
+	PINMUX_DATA(PTK3_DATA, PTK3_OUT, PTK3_IN, PTK3_IN_PD),
+	PINMUX_DATA(PTK2_DATA, PTK2_IN, PTK2_IN_PD),
+	PINMUX_DATA(PTK1_DATA, PTK1_OUT),
+	PINMUX_DATA(PTK0_DATA, PTK0_OUT, PTK0_IN, PTK0_IN_PD),
+
+	/* PTL */
+	PINMUX_DATA(PTL7_DATA, PTL7_OUT, PTL7_IN, PTL7_IN_PD),
+	PINMUX_DATA(PTL6_DATA, PTL6_OUT, PTL6_IN, PTL6_IN_PD),
+	PINMUX_DATA(PTL5_DATA, PTL5_OUT, PTL5_IN, PTL5_IN_PD),
+	PINMUX_DATA(PTL4_DATA, PTL4_OUT, PTL4_IN, PTL4_IN_PD),
+	PINMUX_DATA(PTL3_DATA, PTL3_OUT, PTL3_IN, PTL3_IN_PD),
+	PINMUX_DATA(PTL2_DATA, PTL2_OUT, PTL2_IN, PTL2_IN_PD),
+	PINMUX_DATA(PTL1_DATA, PTL1_OUT, PTL1_IN, PTL1_IN_PD),
+	PINMUX_DATA(PTL0_DATA, PTL0_OUT, PTL0_IN, PTL0_IN_PD),
+
+	/* PTM */
+	PINMUX_DATA(PTM7_DATA, PTM7_OUT, PTM7_IN, PTM7_IN_PD),
+	PINMUX_DATA(PTM6_DATA, PTM6_OUT, PTM6_IN, PTM6_IN_PD),
+	PINMUX_DATA(PTM5_DATA, PTM5_OUT, PTM5_IN, PTM5_IN_PD),
+	PINMUX_DATA(PTM4_DATA, PTM4_OUT, PTM4_IN, PTM4_IN_PD),
+	PINMUX_DATA(PTM3_DATA, PTM3_OUT, PTM3_IN, PTM3_IN_PD),
+	PINMUX_DATA(PTM2_DATA, PTM2_OUT, PTM2_IN, PTM2_IN_PD),
+	PINMUX_DATA(PTM1_DATA, PTM1_OUT, PTM1_IN, PTM1_IN_PD),
+	PINMUX_DATA(PTM0_DATA, PTM0_OUT, PTM0_IN, PTM0_IN_PD),
+
+	/* PTN */
+	PINMUX_DATA(PTN7_DATA, PTN7_OUT, PTN7_IN),
+	PINMUX_DATA(PTN6_DATA, PTN6_OUT, PTN6_IN),
+	PINMUX_DATA(PTN5_DATA, PTN5_OUT, PTN5_IN),
+	PINMUX_DATA(PTN4_DATA, PTN4_OUT, PTN4_IN),
+	PINMUX_DATA(PTN3_DATA, PTN3_OUT, PTN3_IN),
+	PINMUX_DATA(PTN2_DATA, PTN2_OUT, PTN2_IN),
+	PINMUX_DATA(PTN1_DATA, PTN1_OUT, PTN1_IN),
+	PINMUX_DATA(PTN0_DATA, PTN0_OUT, PTN0_IN),
+
+	/* PTQ */
+	PINMUX_DATA(PTQ6_DATA, PTQ6_OUT),
+	PINMUX_DATA(PTQ5_DATA, PTQ5_OUT, PTQ5_IN, PTQ5_IN_PD),
+	PINMUX_DATA(PTQ4_DATA, PTQ4_OUT, PTQ4_IN, PTQ4_IN_PD),
+	PINMUX_DATA(PTQ3_DATA, PTQ3_OUT, PTQ3_IN, PTQ3_IN_PD),
+	PINMUX_DATA(PTQ2_DATA, PTQ2_IN, PTQ2_IN_PD),
+	PINMUX_DATA(PTQ1_DATA, PTQ1_OUT),
+	PINMUX_DATA(PTQ0_DATA, PTQ0_OUT, PTQ0_IN, PTQ0_IN_PU),
+
+	/* PTR */
+	PINMUX_DATA(PTR4_DATA, PTR4_OUT),
+	PINMUX_DATA(PTR3_DATA, PTR3_OUT),
+	PINMUX_DATA(PTR2_DATA, PTR2_IN, PTR2_IN_PU),
+	PINMUX_DATA(PTR1_DATA, PTR1_OUT),
+	PINMUX_DATA(PTR0_DATA, PTR0_OUT),
+
+	/* PTS */
+	PINMUX_DATA(PTS4_DATA, PTS4_IN, PTS4_IN_PD),
+	PINMUX_DATA(PTS3_DATA, PTS3_OUT),
+	PINMUX_DATA(PTS2_DATA, PTS2_OUT, PTS2_IN, PTS2_IN_PD),
+	PINMUX_DATA(PTS1_DATA, PTS1_IN, PTS1_IN_PD),
+	PINMUX_DATA(PTS0_DATA, PTS0_OUT),
+
+	/* PTT */
+	PINMUX_DATA(PTT4_DATA, PTT4_OUT, PTT4_IN, PTT4_IN_PD),
+	PINMUX_DATA(PTT3_DATA, PTT3_OUT, PTT3_IN, PTT3_IN_PD),
+	PINMUX_DATA(PTT2_DATA, PTT2_OUT, PTT2_IN, PTT2_IN_PD),
+	PINMUX_DATA(PTT1_DATA, PTT1_IN, PTT1_IN_PD),
+	PINMUX_DATA(PTT0_DATA, PTT0_OUT),
+
+	/* PTU */
+	PINMUX_DATA(PTU4_DATA, PTU4_OUT, PTU4_IN, PTU4_IN_PD),
+	PINMUX_DATA(PTU3_DATA, PTU3_OUT, PTU3_IN, PTU3_IN_PD),
+	PINMUX_DATA(PTU2_DATA, PTU2_OUT, PTU2_IN, PTU2_IN_PD),
+	PINMUX_DATA(PTU1_DATA, PTU1_IN, PTU1_IN_PD),
+	PINMUX_DATA(PTU0_DATA, PTU0_OUT, PTU0_IN, PTU0_IN_PD),
+
+	/* PTV */
+	PINMUX_DATA(PTV4_DATA, PTV4_OUT, PTV4_IN, PTV4_IN_PD),
+	PINMUX_DATA(PTV3_DATA, PTV3_OUT, PTV3_IN, PTV3_IN_PD),
+	PINMUX_DATA(PTV2_DATA, PTV2_OUT, PTV2_IN, PTV2_IN_PD),
+	PINMUX_DATA(PTV1_DATA, PTV1_OUT, PTV1_IN, PTV1_IN_PD),
+	PINMUX_DATA(PTV0_DATA, PTV0_OUT, PTV0_IN, PTV0_IN_PD),
+
+	/* PTW */
+	PINMUX_DATA(PTW6_DATA, PTW6_IN, PTW6_IN_PD),
+	PINMUX_DATA(PTW5_DATA, PTW5_OUT),
+	PINMUX_DATA(PTW4_DATA, PTW4_OUT, PTW4_IN, PTW4_IN_PD),
+	PINMUX_DATA(PTW3_DATA, PTW3_OUT, PTW3_IN, PTW3_IN_PD),
+	PINMUX_DATA(PTW2_DATA, PTW2_OUT, PTW2_IN, PTW2_IN_PD),
+	PINMUX_DATA(PTW1_DATA, PTW1_OUT, PTW1_IN, PTW1_IN_PD),
+	PINMUX_DATA(PTW0_DATA, PTW0_OUT, PTW0_IN, PTW0_IN_PD),
+
+	/* PTX */
+	PINMUX_DATA(PTX6_DATA, PTX6_OUT, PTX6_IN, PTX6_IN_PD),
+	PINMUX_DATA(PTX5_DATA, PTX5_OUT, PTX5_IN, PTX5_IN_PD),
+	PINMUX_DATA(PTX4_DATA, PTX4_OUT, PTX4_IN, PTX4_IN_PD),
+	PINMUX_DATA(PTX3_DATA, PTX3_OUT, PTX3_IN, PTX3_IN_PD),
+	PINMUX_DATA(PTX2_DATA, PTX2_OUT, PTX2_IN, PTX2_IN_PD),
+	PINMUX_DATA(PTX1_DATA, PTX1_OUT, PTX1_IN, PTX1_IN_PD),
+	PINMUX_DATA(PTX0_DATA, PTX0_OUT, PTX0_IN, PTX0_IN_PD),
+
+	/* PTY */
+	PINMUX_DATA(PTY5_DATA, PTY5_OUT, PTY5_IN, PTY5_IN_PU),
+	PINMUX_DATA(PTY4_DATA, PTY4_OUT, PTY4_IN, PTY4_IN_PU),
+	PINMUX_DATA(PTY3_DATA, PTY3_OUT, PTY3_IN, PTY3_IN_PU),
+	PINMUX_DATA(PTY2_DATA, PTY2_OUT, PTY2_IN, PTY2_IN_PU),
+	PINMUX_DATA(PTY1_DATA, PTY1_OUT),
+	PINMUX_DATA(PTY0_DATA, PTY0_OUT, PTY0_IN, PTY0_IN_PU),
+
+	/* PTZ */
+	PINMUX_DATA(PTZ5_DATA, PTZ5_IN, PTZ5_IN_PU),
+	PINMUX_DATA(PTZ4_DATA, PTZ4_IN, PTZ4_IN_PU),
+	PINMUX_DATA(PTZ3_DATA, PTZ3_IN, PTZ3_IN_PU),
+	PINMUX_DATA(PTZ2_DATA, PTZ2_IN, PTZ2_IN_PU),
+	PINMUX_DATA(PTZ1_DATA, PTZ1_IN, PTZ1_IN_PU),
+
+	/* SCIF0 */
+	PINMUX_DATA(SCIF0_TXD_MARK, SCIF0_TXD),
+	PINMUX_DATA(SCIF0_RXD_MARK, SCIF0_RXD),
+	PINMUX_DATA(SCIF0_RTS_MARK, PSD7_SCIF0_RTS, SCIF0_RTS_SIUAOSPD),
+	PINMUX_DATA(SCIF0_CTS_MARK, PSD6_SCIF0_CTS, SCIF0_CTS_SIUAISPD),
+	PINMUX_DATA(SCIF0_SCK_MARK, PSD8_SCIF0_SCK, SCIF0_SCK_TPUTO),
+
+	/* SCIF1 */
+	PINMUX_DATA(SCIF1_TXD_MARK, PSD11_SCIF1, VIO_D5_SCIF1_TXD),
+	PINMUX_DATA(SCIF1_RXD_MARK, PSD11_SCIF1, VIO_D6_SCIF1_RXD),
+	PINMUX_DATA(SCIF1_RTS_MARK, PSD12_SCIF1, VIO_CLK_SCIF1_RTS),
+	PINMUX_DATA(SCIF1_CTS_MARK, PSD12_SCIF1, VIO_VD_SCIF1_CTS),
+	PINMUX_DATA(SCIF1_SCK_MARK, PSD11_SCIF1, VIO_D7_SCIF1_SCK),
+
+	/* SCIF2 */
+	PINMUX_DATA(SCIF2_TXD_MARK, PSD13_SCIF2, VIO_STEM_SCIF2_TXD),
+	PINMUX_DATA(SCIF2_RXD_MARK, PSD13_SCIF2, VIO_HD_SCIF2_RXD),
+	PINMUX_DATA(SCIF2_RTS_MARK, PSD13_SCIF2, VIO_CKO_SCIF2_RTS),
+	PINMUX_DATA(SCIF2_CTS_MARK, PSD13_SCIF2, VIO_FLD_SCIF2_CTS),
+	PINMUX_DATA(SCIF2_SCK_MARK, PSD13_SCIF2, VIO_STEX_SCIF2_SCK),
+
+	/* SIO */
+	PINMUX_DATA(SIOTXD_MARK, PSB15_SIOTXD, SIOTXD_SIUBOSLD),
+	PINMUX_DATA(SIORXD_MARK, PSB14_SIORXD, SIORXD_SIUBISLD),
+	PINMUX_DATA(SIOD_MARK, PSB13_SIOD, SIOD_SIUBILR),
+	PINMUX_DATA(SIOSTRB0_MARK, PSB12_SIOSTRB0, SIOSTRB0_SIUBIBT),
+	PINMUX_DATA(SIOSTRB1_MARK, PSB11_SIOSTRB1, SIOSTRB1_SIUBOLR),
+	PINMUX_DATA(SIOSCK_MARK, PSB10_SIOSCK, SIOSCK_SIUBOBT),
+	PINMUX_DATA(SIOMCK_MARK, PSD9_SIOMCK_SIUMCKB, PSB9_SIOMCK, PTF6),
+
+	/* CEU */
+	PINMUX_DATA(VIO_D15_MARK, PSC0_VIO, HIZA10_NAF, NAF7_VIO_D15),
+	PINMUX_DATA(VIO_D14_MARK, PSC0_VIO, HIZA10_NAF, NAF6_VIO_D14),
+	PINMUX_DATA(VIO_D13_MARK, PSC0_VIO, HIZA10_NAF, NAF5_VIO_D13),
+	PINMUX_DATA(VIO_D12_MARK, PSC0_VIO, HIZA10_NAF, NAF4_VIO_D12),
+	PINMUX_DATA(VIO_D11_MARK, PSC0_VIO, HIZA10_NAF, NAF3_VIO_D11),
+	PINMUX_DATA(VIO_D10_MARK, PSE2_VIO_D10, HIZB0_VIO, NAF2_VIO_D10),
+	PINMUX_DATA(VIO_D9_MARK, PSE1_VIO_D9, HIZB0_VIO, NAF1_VIO_D9),
+	PINMUX_DATA(VIO_D8_MARK, PSE0_VIO_D8, HIZB0_VIO, NAF0_VIO_D8),
+	PINMUX_DATA(VIO_D7_MARK, PSD11_VIO, VIO_D7_SCIF1_SCK),
+	PINMUX_DATA(VIO_D6_MARK, PSD11_VIO, VIO_D6_SCIF1_RXD),
+	PINMUX_DATA(VIO_D5_MARK, PSD11_VIO, VIO_D5_SCIF1_TXD),
+	PINMUX_DATA(VIO_D4_MARK, VIO_D4),
+	PINMUX_DATA(VIO_D3_MARK, VIO_D3),
+	PINMUX_DATA(VIO_D2_MARK, VIO_D2),
+	PINMUX_DATA(VIO_D1_MARK, VIO_D1),
+	PINMUX_DATA(VIO_D0_MARK, PSD10_VIO_D0, VIO_D0_LCDLCLK),
+	PINMUX_DATA(VIO_CLK_MARK, PSD12_VIO, MSELB9_VIO, VIO_CLK_SCIF1_RTS),
+	PINMUX_DATA(VIO_VD_MARK, PSD12_VIO, MSELB9_VIO, VIO_VD_SCIF1_CTS),
+	PINMUX_DATA(VIO_HD_MARK, PSD13_VIO, MSELB9_VIO, VIO_HD_SCIF2_RXD),
+	PINMUX_DATA(VIO_FLD_MARK, PSD13_VIO, HIZA9_VIO, VIO_FLD_SCIF2_CTS),
+	PINMUX_DATA(VIO_CKO_MARK, PSD13_VIO, HIZA9_VIO, VIO_CKO_SCIF2_RTS),
+	PINMUX_DATA(VIO_STEX_MARK, PSD13_VIO, HIZA9_VIO, VIO_STEX_SCIF2_SCK),
+	PINMUX_DATA(VIO_STEM_MARK, PSD13_VIO, HIZA9_VIO, VIO_STEM_SCIF2_TXD),
+	PINMUX_DATA(VIO_VD2_MARK, PSE3_VIO, MSELB9_VIO2,
+		    HIZB0_VIO, FOE_VIO_VD2),
+	PINMUX_DATA(VIO_HD2_MARK, PSE3_VIO, MSELB9_VIO2,
+		    HIZB1_VIO, HIZB1_VIO, FCE_VIO_HD2),
+	PINMUX_DATA(VIO_CLK2_MARK, PSE3_VIO, MSELB9_VIO2,
+		    HIZB1_VIO, FRB_VIO_CLK2),
+
+	/* LCDC */
+	PINMUX_DATA(LCDD23_MARK, HIZA8_LCDC, LCDD23),
+	PINMUX_DATA(LCDD22_MARK, HIZA8_LCDC, LCDD22),
+	PINMUX_DATA(LCDD21_MARK, HIZA8_LCDC, LCDD21),
+	PINMUX_DATA(LCDD20_MARK, HIZA8_LCDC, LCDD20),
+	PINMUX_DATA(LCDD19_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD19_DV_CLKI),
+	PINMUX_DATA(LCDD18_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD18_DV_CLK),
+	PINMUX_DATA(LCDD17_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC,
+		    LCDD17_DV_HSYNC),
+	PINMUX_DATA(LCDD16_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC,
+		    LCDD16_DV_VSYNC),
+	PINMUX_DATA(LCDD15_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD15_DV_D15),
+	PINMUX_DATA(LCDD14_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD14_DV_D14),
+	PINMUX_DATA(LCDD13_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD13_DV_D13),
+	PINMUX_DATA(LCDD12_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD12_DV_D12),
+	PINMUX_DATA(LCDD11_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD11_DV_D11),
+	PINMUX_DATA(LCDD10_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD10_DV_D10),
+	PINMUX_DATA(LCDD9_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD9_DV_D9),
+	PINMUX_DATA(LCDD8_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD8_DV_D8),
+	PINMUX_DATA(LCDD7_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD7_DV_D7),
+	PINMUX_DATA(LCDD6_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD6_DV_D6),
+	PINMUX_DATA(LCDD5_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD5_DV_D5),
+	PINMUX_DATA(LCDD4_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD4_DV_D4),
+	PINMUX_DATA(LCDD3_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD3_DV_D3),
+	PINMUX_DATA(LCDD2_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD2_DV_D2),
+	PINMUX_DATA(LCDD1_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD1_DV_D1),
+	PINMUX_DATA(LCDD0_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD0_DV_D0),
+	PINMUX_DATA(LCDLCLK_MARK, PSD10_LCDLCLK, VIO_D0_LCDLCLK),
+	/* Main LCD */
+	PINMUX_DATA(LCDDON_MARK, PSD2_LCDDON, HIZA7_LCDC, LCDDON_LCDDON2),
+	PINMUX_DATA(LCDVCPWC_MARK, PSD3_LCDVEPWC_LCDVCPWC,
+		    HIZA6_LCDC, LCDVCPWC_LCDVCPWC2),
+	PINMUX_DATA(LCDVEPWC_MARK, PSD3_LCDVEPWC_LCDVCPWC,
+		    HIZA6_LCDC, LCDVEPWC_LCDVEPWC2),
+	PINMUX_DATA(LCDVSYN_MARK, HIZA7_LCDC, LCDVSYN),
+	/* Main LCD - RGB Mode */
+	PINMUX_DATA(LCDDCK_MARK, MSELB8_RGB, HIZA8_LCDC, LCDDCK_LCDWR),
+	PINMUX_DATA(LCDHSYN_MARK, MSELB8_RGB, HIZA7_LCDC, LCDHSYN_LCDCS),
+	PINMUX_DATA(LCDDISP_MARK, MSELB8_RGB, HIZA7_LCDC, LCDDISP_LCDRS),
+	/* Main LCD - SYS Mode */
+	PINMUX_DATA(LCDRS_MARK, MSELB8_SYS, HIZA7_LCDC, LCDDISP_LCDRS),
+	PINMUX_DATA(LCDCS_MARK, MSELB8_SYS, HIZA7_LCDC, LCDHSYN_LCDCS),
+	PINMUX_DATA(LCDWR_MARK, MSELB8_SYS, HIZA8_LCDC, LCDDCK_LCDWR),
+	PINMUX_DATA(LCDRD_MARK, HIZA7_LCDC, LCDRD),
+	/* Sub LCD - SYS Mode */
+	PINMUX_DATA(LCDDON2_MARK, PSD2_LCDDON2, HIZA7_LCDC, LCDDON_LCDDON2),
+	PINMUX_DATA(LCDVCPWC2_MARK, PSD3_LCDVEPWC2_LCDVCPWC2,
+		    HIZA6_LCDC, LCDVCPWC_LCDVCPWC2),
+	PINMUX_DATA(LCDVEPWC2_MARK, PSD3_LCDVEPWC2_LCDVCPWC2,
+		    HIZA6_LCDC, LCDVEPWC_LCDVEPWC2),
+	PINMUX_DATA(LCDVSYN2_MARK, PSE12_LCDVSYN2, HIZA8_LCDC, LCDVSYN2_DACK),
+	PINMUX_DATA(LCDCS2_MARK, PSD5_LCDCS2, CS6B_CE1B_LCDCS2),
+
+	/* BSC */
+	PINMUX_DATA(IOIS16_MARK, IOIS16),
+	PINMUX_DATA(A25_MARK, A25),
+	PINMUX_DATA(A24_MARK, A24),
+	PINMUX_DATA(A23_MARK, A23),
+	PINMUX_DATA(A22_MARK, A22),
+	PINMUX_DATA(BS_MARK, PSA9_BS, IRQ4_BS),
+	PINMUX_DATA(CS6B_CE1B_MARK, PSD5_CS6B_CE1B, CS6B_CE1B_LCDCS2),
+	PINMUX_DATA(WAIT_MARK, WAIT),
+	PINMUX_DATA(CS6A_CE2B_MARK, CS6A_CE2B),
+
+	/* SBSC */
+	PINMUX_DATA(HPD63_MARK, HPD63),
+	PINMUX_DATA(HPD62_MARK, HPD62),
+	PINMUX_DATA(HPD61_MARK, HPD61),
+	PINMUX_DATA(HPD60_MARK, HPD60),
+	PINMUX_DATA(HPD59_MARK, HPD59),
+	PINMUX_DATA(HPD58_MARK, HPD58),
+	PINMUX_DATA(HPD57_MARK, HPD57),
+	PINMUX_DATA(HPD56_MARK, HPD56),
+	PINMUX_DATA(HPD55_MARK, HPD55),
+	PINMUX_DATA(HPD54_MARK, HPD54),
+	PINMUX_DATA(HPD53_MARK, HPD53),
+	PINMUX_DATA(HPD52_MARK, HPD52),
+	PINMUX_DATA(HPD51_MARK, HPD51),
+	PINMUX_DATA(HPD50_MARK, HPD50),
+	PINMUX_DATA(HPD49_MARK, HPD49),
+	PINMUX_DATA(HPD48_MARK, HPD48),
+	PINMUX_DATA(HPDQM7_MARK, HPDQM7),
+	PINMUX_DATA(HPDQM6_MARK, HPDQM6),
+	PINMUX_DATA(HPDQM5_MARK, HPDQM5),
+	PINMUX_DATA(HPDQM4_MARK, HPDQM4),
+
+	/* IRQ */
+	PINMUX_DATA(IRQ0_MARK, HIZC8_IRQ0, IRQ0),
+	PINMUX_DATA(IRQ1_MARK, HIZC9_IRQ1, IRQ1),
+	PINMUX_DATA(IRQ2_MARK, PSA4_IRQ2, HIZC10_IRQ2, IRQ2_SDHID2),
+	PINMUX_DATA(IRQ3_MARK, PSE15_SIOF0_MCK_IRQ3, PSB8_IRQ3,
+		    HIZC11_IRQ3, PTQ0),
+	PINMUX_DATA(IRQ4_MARK, PSA9_IRQ4, HIZC12_IRQ4, IRQ4_BS),
+	PINMUX_DATA(IRQ5_MARK, HIZC13_IRQ5, IRQ5),
+	PINMUX_DATA(IRQ6_MARK, PSA15_IRQ6, HIZC14_IRQ6, KEYIN0_IRQ6),
+	PINMUX_DATA(IRQ7_MARK, PSA14_IRQ7, HIZC15_IRQ7, KEYIN4_IRQ7),
+
+	/* SDHI */
+	PINMUX_DATA(SDHICD_MARK, SDHICD),
+	PINMUX_DATA(SDHIWP_MARK, SDHIWP),
+	PINMUX_DATA(SDHID3_MARK, SDHID3),
+	PINMUX_DATA(SDHID2_MARK, PSA4_SDHID2, IRQ2_SDHID2),
+	PINMUX_DATA(SDHID1_MARK, SDHID1),
+	PINMUX_DATA(SDHID0_MARK, SDHID0),
+	PINMUX_DATA(SDHICMD_MARK, SDHICMD),
+	PINMUX_DATA(SDHICLK_MARK, SDHICLK),
+
+	/* SIU - Port A */
+	PINMUX_DATA(SIUAOLR_MARK, PSC13_SIUAOLR, SIUAOLR_SIOF1_SYNC),
+	PINMUX_DATA(SIUAOBT_MARK, PSC14_SIUAOBT, SIUAOBT_SIOF1_SCK),
+	PINMUX_DATA(SIUAISLD_MARK, PSC15_SIUAISLD, SIUAISLD_SIOF1_RXD),
+	PINMUX_DATA(SIUAILR_MARK, PSC11_SIUAILR, SIUAILR_SIOF1_SS2),
+	PINMUX_DATA(SIUAIBT_MARK, PSC12_SIUAIBT, SIUAIBT_SIOF1_SS1),
+	PINMUX_DATA(SIUAOSLD_MARK, PSB0_SIUAOSLD, SIUAOSLD_SIOF1_TXD),
+	PINMUX_DATA(SIUMCKA_MARK, PSE11_SIUMCKA_SIOF1_MCK, PSB1_SIUMCKA, PTK0),
+	PINMUX_DATA(SIUFCKA_MARK, PSE11_SIUFCKA, PTK0),
+
+	/* SIU - Port B */
+	PINMUX_DATA(SIUBOLR_MARK, PSB11_SIUBOLR, SIOSTRB1_SIUBOLR),
+	PINMUX_DATA(SIUBOBT_MARK, PSB10_SIUBOBT, SIOSCK_SIUBOBT),
+	PINMUX_DATA(SIUBISLD_MARK, PSB14_SIUBISLD, SIORXD_SIUBISLD),
+	PINMUX_DATA(SIUBILR_MARK, PSB13_SIUBILR, SIOD_SIUBILR),
+	PINMUX_DATA(SIUBIBT_MARK, PSB12_SIUBIBT, SIOSTRB0_SIUBIBT),
+	PINMUX_DATA(SIUBOSLD_MARK, PSB15_SIUBOSLD, SIOTXD_SIUBOSLD),
+	PINMUX_DATA(SIUMCKB_MARK, PSD9_SIOMCK_SIUMCKB, PSB9_SIUMCKB, PTF6),
+	PINMUX_DATA(SIUFCKB_MARK, PSD9_SIUFCKB, PTF6),
+
+	/* AUD */
+	PINMUX_DATA(AUDSYNC_MARK, AUDSYNC),
+	PINMUX_DATA(AUDATA3_MARK, AUDATA3),
+	PINMUX_DATA(AUDATA2_MARK, AUDATA2),
+	PINMUX_DATA(AUDATA1_MARK, AUDATA1),
+	PINMUX_DATA(AUDATA0_MARK, AUDATA0),
+
+	/* DMAC */
+	PINMUX_DATA(DACK_MARK, PSE12_DACK, LCDVSYN2_DACK),
+	PINMUX_DATA(DREQ0_MARK, DREQ0),
+
+	/* VOU */
+	PINMUX_DATA(DV_CLKI_MARK, PSD0_DV, LCDD19_DV_CLKI),
+	PINMUX_DATA(DV_CLK_MARK, PSD0_DV, LCDD18_DV_CLK),
+	PINMUX_DATA(DV_HSYNC_MARK, PSD0_DV, LCDD17_DV_HSYNC),
+	PINMUX_DATA(DV_VSYNC_MARK, PSD0_DV, LCDD16_DV_VSYNC),
+	PINMUX_DATA(DV_D15_MARK, PSD0_DV, LCDD15_DV_D15),
+	PINMUX_DATA(DV_D14_MARK, PSD0_DV, LCDD14_DV_D14),
+	PINMUX_DATA(DV_D13_MARK, PSD0_DV, LCDD13_DV_D13),
+	PINMUX_DATA(DV_D12_MARK, PSD0_DV, LCDD12_DV_D12),
+	PINMUX_DATA(DV_D11_MARK, PSD0_DV, LCDD11_DV_D11),
+	PINMUX_DATA(DV_D10_MARK, PSD0_DV, LCDD10_DV_D10),
+	PINMUX_DATA(DV_D9_MARK, PSD0_DV, LCDD9_DV_D9),
+	PINMUX_DATA(DV_D8_MARK, PSD0_DV, LCDD8_DV_D8),
+	PINMUX_DATA(DV_D7_MARK, PSD0_DV, LCDD7_DV_D7),
+	PINMUX_DATA(DV_D6_MARK, PSD0_DV, LCDD6_DV_D6),
+	PINMUX_DATA(DV_D5_MARK, PSD0_DV, LCDD5_DV_D5),
+	PINMUX_DATA(DV_D4_MARK, PSD0_DV, LCDD4_DV_D4),
+	PINMUX_DATA(DV_D3_MARK, PSD0_DV, LCDD3_DV_D3),
+	PINMUX_DATA(DV_D2_MARK, PSD0_DV, LCDD2_DV_D2),
+	PINMUX_DATA(DV_D1_MARK, PSD0_DV, LCDD1_DV_D1),
+	PINMUX_DATA(DV_D0_MARK, PSD0_DV, LCDD0_DV_D0),
+
+	/* CPG */
+	PINMUX_DATA(STATUS0_MARK, STATUS0),
+	PINMUX_DATA(PDSTATUS_MARK, PDSTATUS),
+
+	/* SIOF0 */
+	PINMUX_DATA(SIOF0_MCK_MARK, PSE15_SIOF0_MCK_IRQ3, PSB8_SIOF0_MCK, PTQ0),
+	PINMUX_DATA(SIOF0_SCK_MARK, PSB5_SIOF0_SCK, SIOF0_SCK_TS_SCK),
+	PINMUX_DATA(SIOF0_SYNC_MARK, PSB4_SIOF0_SYNC, SIOF0_SYNC_TS_SDEN),
+	PINMUX_DATA(SIOF0_SS1_MARK, PSB3_SIOF0_SS1, SIOF0_SS1_TS_SPSYNC),
+	PINMUX_DATA(SIOF0_SS2_MARK, PSB2_SIOF0_SS2, SIOF0_SS2_SIM_RST),
+	PINMUX_DATA(SIOF0_TXD_MARK, PSE14_SIOF0_TXD_IRDA_OUT,
+		    PSB7_SIOF0_TXD, PTQ1),
+	PINMUX_DATA(SIOF0_RXD_MARK, PSE13_SIOF0_RXD_IRDA_IN,
+		    PSB6_SIOF0_RXD, PTQ2),
+
+	/* SIOF1 */
+	PINMUX_DATA(SIOF1_MCK_MARK, PSE11_SIUMCKA_SIOF1_MCK,
+		    PSB1_SIOF1_MCK, PTK0),
+	PINMUX_DATA(SIOF1_SCK_MARK, PSC14_SIOF1_SCK, SIUAOBT_SIOF1_SCK),
+	PINMUX_DATA(SIOF1_SYNC_MARK, PSC13_SIOF1_SYNC, SIUAOLR_SIOF1_SYNC),
+	PINMUX_DATA(SIOF1_SS1_MARK, PSC12_SIOF1_SS1, SIUAIBT_SIOF1_SS1),
+	PINMUX_DATA(SIOF1_SS2_MARK, PSC11_SIOF1_SS2, SIUAILR_SIOF1_SS2),
+	PINMUX_DATA(SIOF1_TXD_MARK, PSB0_SIOF1_TXD, SIUAOSLD_SIOF1_TXD),
+	PINMUX_DATA(SIOF1_RXD_MARK, PSC15_SIOF1_RXD, SIUAISLD_SIOF1_RXD),
+
+	/* SIM */
+	PINMUX_DATA(SIM_D_MARK, PSE15_SIM_D, PTQ0),
+	PINMUX_DATA(SIM_CLK_MARK, PSE14_SIM_CLK, PTQ1),
+	PINMUX_DATA(SIM_RST_MARK, PSB2_SIM_RST, SIOF0_SS2_SIM_RST),
+
+	/* TSIF */
+	PINMUX_DATA(TS_SDAT_MARK, PSE13_TS_SDAT, PTQ2),
+	PINMUX_DATA(TS_SCK_MARK, PSB5_TS_SCK, SIOF0_SCK_TS_SCK),
+	PINMUX_DATA(TS_SDEN_MARK, PSB4_TS_SDEN, SIOF0_SYNC_TS_SDEN),
+	PINMUX_DATA(TS_SPSYNC_MARK, PSB3_TS_SPSYNC, SIOF0_SS1_TS_SPSYNC),
+
+	/* IRDA */
+	PINMUX_DATA(IRDA_IN_MARK, PSE13_SIOF0_RXD_IRDA_IN, PSB6_IRDA_IN, PTQ2),
+	PINMUX_DATA(IRDA_OUT_MARK, PSE14_SIOF0_TXD_IRDA_OUT,
+		    PSB7_IRDA_OUT, PTQ1),
+
+	/* TPU */
+	PINMUX_DATA(TPUTO_MARK, PSD8_TPUTO, SCIF0_SCK_TPUTO),
+
+	/* FLCTL */
+	PINMUX_DATA(FCE_MARK, PSE3_FLCTL, FCE_VIO_HD2),
+	PINMUX_DATA(NAF7_MARK, PSC0_NAF, HIZA10_NAF, NAF7_VIO_D15),
+	PINMUX_DATA(NAF6_MARK, PSC0_NAF, HIZA10_NAF, NAF6_VIO_D14),
+	PINMUX_DATA(NAF5_MARK, PSC0_NAF, HIZA10_NAF, NAF5_VIO_D13),
+	PINMUX_DATA(NAF4_MARK, PSC0_NAF, HIZA10_NAF, NAF4_VIO_D12),
+	PINMUX_DATA(NAF3_MARK, PSC0_NAF, HIZA10_NAF, NAF3_VIO_D11),
+	PINMUX_DATA(NAF2_MARK, PSE2_NAF2, HIZB0_VIO, NAF2_VIO_D10),
+	PINMUX_DATA(NAF1_MARK, PSE1_NAF1, HIZB0_VIO, NAF1_VIO_D9),
+	PINMUX_DATA(NAF0_MARK, PSE0_NAF0, HIZB0_VIO, NAF0_VIO_D8),
+	PINMUX_DATA(FCDE_MARK, FCDE),
+	PINMUX_DATA(FOE_MARK, PSE3_FLCTL, HIZB0_VIO, FOE_VIO_VD2),
+	PINMUX_DATA(FSC_MARK, FSC),
+	PINMUX_DATA(FWE_MARK, FWE),
+	PINMUX_DATA(FRB_MARK, PSE3_FLCTL, FRB_VIO_CLK2),
+
+	/* KEYSC */
+	PINMUX_DATA(KEYIN0_MARK, PSA15_KEYIN0, HIZC14_IRQ6, KEYIN0_IRQ6),
+	PINMUX_DATA(KEYIN1_MARK, HIZA14_KEYSC, KEYIN1),
+	PINMUX_DATA(KEYIN2_MARK, HIZA14_KEYSC, KEYIN2),
+	PINMUX_DATA(KEYIN3_MARK, HIZA14_KEYSC, KEYIN3),
+	PINMUX_DATA(KEYIN4_MARK, PSA14_KEYIN4, HIZC15_IRQ7, KEYIN4_IRQ7),
+	PINMUX_DATA(KEYOUT0_MARK, HIZA14_KEYSC, KEYOUT0),
+	PINMUX_DATA(KEYOUT1_MARK, HIZA14_KEYSC, KEYOUT1),
+	PINMUX_DATA(KEYOUT2_MARK, HIZA14_KEYSC, KEYOUT2),
+	PINMUX_DATA(KEYOUT3_MARK, HIZA14_KEYSC, KEYOUT3),
+	PINMUX_DATA(KEYOUT4_IN6_MARK, HIZA14_KEYSC, KEYOUT4_IN6),
+	PINMUX_DATA(KEYOUT5_IN5_MARK, HIZA14_KEYSC, KEYOUT5_IN5),
+};
+
+static struct pinmux_gpio pinmux_gpios[] = {
+	/* PTA */
+	PINMUX_GPIO(GPIO_PTA7, PTA7_DATA),
+	PINMUX_GPIO(GPIO_PTA6, PTA6_DATA),
+	PINMUX_GPIO(GPIO_PTA5, PTA5_DATA),
+	PINMUX_GPIO(GPIO_PTA4, PTA4_DATA),
+	PINMUX_GPIO(GPIO_PTA3, PTA3_DATA),
+	PINMUX_GPIO(GPIO_PTA2, PTA2_DATA),
+	PINMUX_GPIO(GPIO_PTA1, PTA1_DATA),
+	PINMUX_GPIO(GPIO_PTA0, PTA0_DATA),
+
+	/* PTB */
+	PINMUX_GPIO(GPIO_PTB7, PTB7_DATA),
+	PINMUX_GPIO(GPIO_PTB6, PTB6_DATA),
+	PINMUX_GPIO(GPIO_PTB5, PTB5_DATA),
+	PINMUX_GPIO(GPIO_PTB4, PTB4_DATA),
+	PINMUX_GPIO(GPIO_PTB3, PTB3_DATA),
+	PINMUX_GPIO(GPIO_PTB2, PTB2_DATA),
+	PINMUX_GPIO(GPIO_PTB1, PTB1_DATA),
+	PINMUX_GPIO(GPIO_PTB0, PTB0_DATA),
+
+	/* PTC */
+	PINMUX_GPIO(GPIO_PTC7, PTC7_DATA),
+	PINMUX_GPIO(GPIO_PTC5, PTC5_DATA),
+	PINMUX_GPIO(GPIO_PTC4, PTC4_DATA),
+	PINMUX_GPIO(GPIO_PTC3, PTC3_DATA),
+	PINMUX_GPIO(GPIO_PTC2, PTC2_DATA),
+	PINMUX_GPIO(GPIO_PTC0, PTC0_DATA),
+
+	/* PTD */
+	PINMUX_GPIO(GPIO_PTD7, PTD7_DATA),
+	PINMUX_GPIO(GPIO_PTD6, PTD6_DATA),
+	PINMUX_GPIO(GPIO_PTD5, PTD5_DATA),
+	PINMUX_GPIO(GPIO_PTD4, PTD4_DATA),
+	PINMUX_GPIO(GPIO_PTD3, PTD3_DATA),
+	PINMUX_GPIO(GPIO_PTD2, PTD2_DATA),
+	PINMUX_GPIO(GPIO_PTD1, PTD1_DATA),
+	PINMUX_GPIO(GPIO_PTD0, PTD0_DATA),
+
+	/* PTE */
+	PINMUX_GPIO(GPIO_PTE7, PTE7_DATA),
+	PINMUX_GPIO(GPIO_PTE6, PTE6_DATA),
+	PINMUX_GPIO(GPIO_PTE5, PTE5_DATA),
+	PINMUX_GPIO(GPIO_PTE4, PTE4_DATA),
+	PINMUX_GPIO(GPIO_PTE1, PTE1_DATA),
+	PINMUX_GPIO(GPIO_PTE0, PTE0_DATA),
+
+	/* PTF */
+	PINMUX_GPIO(GPIO_PTF6, PTF6_DATA),
+	PINMUX_GPIO(GPIO_PTF5, PTF5_DATA),
+	PINMUX_GPIO(GPIO_PTF4, PTF4_DATA),
+	PINMUX_GPIO(GPIO_PTF3, PTF3_DATA),
+	PINMUX_GPIO(GPIO_PTF2, PTF2_DATA),
+	PINMUX_GPIO(GPIO_PTF1, PTF1_DATA),
+	PINMUX_GPIO(GPIO_PTF0, PTF0_DATA),
+
+	/* PTG */
+	PINMUX_GPIO(GPIO_PTG4, PTG4_DATA),
+	PINMUX_GPIO(GPIO_PTG3, PTG3_DATA),
+	PINMUX_GPIO(GPIO_PTG2, PTG2_DATA),
+	PINMUX_GPIO(GPIO_PTG1, PTG1_DATA),
+	PINMUX_GPIO(GPIO_PTG0, PTG0_DATA),
+
+	/* PTH */
+	PINMUX_GPIO(GPIO_PTH7, PTH7_DATA),
+	PINMUX_GPIO(GPIO_PTH6, PTH6_DATA),
+	PINMUX_GPIO(GPIO_PTH5, PTH5_DATA),
+	PINMUX_GPIO(GPIO_PTH4, PTH4_DATA),
+	PINMUX_GPIO(GPIO_PTH3, PTH3_DATA),
+	PINMUX_GPIO(GPIO_PTH2, PTH2_DATA),
+	PINMUX_GPIO(GPIO_PTH1, PTH1_DATA),
+	PINMUX_GPIO(GPIO_PTH0, PTH0_DATA),
+
+	/* PTJ */
+	PINMUX_GPIO(GPIO_PTJ7, PTJ7_DATA),
+	PINMUX_GPIO(GPIO_PTJ6, PTJ6_DATA),
+	PINMUX_GPIO(GPIO_PTJ5, PTJ5_DATA),
+	PINMUX_GPIO(GPIO_PTJ1, PTJ1_DATA),
+	PINMUX_GPIO(GPIO_PTJ0, PTJ0_DATA),
+
+	/* PTK */
+	PINMUX_GPIO(GPIO_PTK6, PTK6_DATA),
+	PINMUX_GPIO(GPIO_PTK5, PTK5_DATA),
+	PINMUX_GPIO(GPIO_PTK4, PTK4_DATA),
+	PINMUX_GPIO(GPIO_PTK3, PTK3_DATA),
+	PINMUX_GPIO(GPIO_PTK2, PTK2_DATA),
+	PINMUX_GPIO(GPIO_PTK1, PTK1_DATA),
+	PINMUX_GPIO(GPIO_PTK0, PTK0_DATA),
+
+	/* PTL */
+	PINMUX_GPIO(GPIO_PTL7, PTL7_DATA),
+	PINMUX_GPIO(GPIO_PTL6, PTL6_DATA),
+	PINMUX_GPIO(GPIO_PTL5, PTL5_DATA),
+	PINMUX_GPIO(GPIO_PTL4, PTL4_DATA),
+	PINMUX_GPIO(GPIO_PTL3, PTL3_DATA),
+	PINMUX_GPIO(GPIO_PTL2, PTL2_DATA),
+	PINMUX_GPIO(GPIO_PTL1, PTL1_DATA),
+	PINMUX_GPIO(GPIO_PTL0, PTL0_DATA),
+
+	/* PTM */
+	PINMUX_GPIO(GPIO_PTM7, PTM7_DATA),
+	PINMUX_GPIO(GPIO_PTM6, PTM6_DATA),
+	PINMUX_GPIO(GPIO_PTM5, PTM5_DATA),
+	PINMUX_GPIO(GPIO_PTM4, PTM4_DATA),
+	PINMUX_GPIO(GPIO_PTM3, PTM3_DATA),
+	PINMUX_GPIO(GPIO_PTM2, PTM2_DATA),
+	PINMUX_GPIO(GPIO_PTM1, PTM1_DATA),
+	PINMUX_GPIO(GPIO_PTM0, PTM0_DATA),
+
+	/* PTN */
+	PINMUX_GPIO(GPIO_PTN7, PTN7_DATA),
+	PINMUX_GPIO(GPIO_PTN6, PTN6_DATA),
+	PINMUX_GPIO(GPIO_PTN5, PTN5_DATA),
+	PINMUX_GPIO(GPIO_PTN4, PTN4_DATA),
+	PINMUX_GPIO(GPIO_PTN3, PTN3_DATA),
+	PINMUX_GPIO(GPIO_PTN2, PTN2_DATA),
+	PINMUX_GPIO(GPIO_PTN1, PTN1_DATA),
+	PINMUX_GPIO(GPIO_PTN0, PTN0_DATA),
+
+	/* PTQ */
+	PINMUX_GPIO(GPIO_PTQ6, PTQ6_DATA),
+	PINMUX_GPIO(GPIO_PTQ5, PTQ5_DATA),
+	PINMUX_GPIO(GPIO_PTQ4, PTQ4_DATA),
+	PINMUX_GPIO(GPIO_PTQ3, PTQ3_DATA),
+	PINMUX_GPIO(GPIO_PTQ2, PTQ2_DATA),
+	PINMUX_GPIO(GPIO_PTQ1, PTQ1_DATA),
+	PINMUX_GPIO(GPIO_PTQ0, PTQ0_DATA),
+
+	/* PTR */
+	PINMUX_GPIO(GPIO_PTR4, PTR4_DATA),
+	PINMUX_GPIO(GPIO_PTR3, PTR3_DATA),
+	PINMUX_GPIO(GPIO_PTR2, PTR2_DATA),
+	PINMUX_GPIO(GPIO_PTR1, PTR1_DATA),
+	PINMUX_GPIO(GPIO_PTR0, PTR0_DATA),
+
+	/* PTS */
+	PINMUX_GPIO(GPIO_PTS4, PTS4_DATA),
+	PINMUX_GPIO(GPIO_PTS3, PTS3_DATA),
+	PINMUX_GPIO(GPIO_PTS2, PTS2_DATA),
+	PINMUX_GPIO(GPIO_PTS1, PTS1_DATA),
+	PINMUX_GPIO(GPIO_PTS0, PTS0_DATA),
+
+	/* PTT */
+	PINMUX_GPIO(GPIO_PTT4, PTT4_DATA),
+	PINMUX_GPIO(GPIO_PTT3, PTT3_DATA),
+	PINMUX_GPIO(GPIO_PTT2, PTT2_DATA),
+	PINMUX_GPIO(GPIO_PTT1, PTT1_DATA),
+	PINMUX_GPIO(GPIO_PTT0, PTT0_DATA),
+
+	/* PTU */
+	PINMUX_GPIO(GPIO_PTU4, PTU4_DATA),
+	PINMUX_GPIO(GPIO_PTU3, PTU3_DATA),
+	PINMUX_GPIO(GPIO_PTU2, PTU2_DATA),
+	PINMUX_GPIO(GPIO_PTU1, PTU1_DATA),
+	PINMUX_GPIO(GPIO_PTU0, PTU0_DATA),
+
+	/* PTV */
+	PINMUX_GPIO(GPIO_PTV4, PTV4_DATA),
+	PINMUX_GPIO(GPIO_PTV3, PTV3_DATA),
+	PINMUX_GPIO(GPIO_PTV2, PTV2_DATA),
+	PINMUX_GPIO(GPIO_PTV1, PTV1_DATA),
+	PINMUX_GPIO(GPIO_PTV0, PTV0_DATA),
+
+	/* PTW */
+	PINMUX_GPIO(GPIO_PTW6, PTW6_DATA),
+	PINMUX_GPIO(GPIO_PTW5, PTW5_DATA),
+	PINMUX_GPIO(GPIO_PTW4, PTW4_DATA),
+	PINMUX_GPIO(GPIO_PTW3, PTW3_DATA),
+	PINMUX_GPIO(GPIO_PTW2, PTW2_DATA),
+	PINMUX_GPIO(GPIO_PTW1, PTW1_DATA),
+	PINMUX_GPIO(GPIO_PTW0, PTW0_DATA),
+
+	/* PTX */
+	PINMUX_GPIO(GPIO_PTX6, PTX6_DATA),
+	PINMUX_GPIO(GPIO_PTX5, PTX5_DATA),
+	PINMUX_GPIO(GPIO_PTX4, PTX4_DATA),
+	PINMUX_GPIO(GPIO_PTX3, PTX3_DATA),
+	PINMUX_GPIO(GPIO_PTX2, PTX2_DATA),
+	PINMUX_GPIO(GPIO_PTX1, PTX1_DATA),
+	PINMUX_GPIO(GPIO_PTX0, PTX0_DATA),
+
+	/* PTY */
+	PINMUX_GPIO(GPIO_PTY5, PTY5_DATA),
+	PINMUX_GPIO(GPIO_PTY4, PTY4_DATA),
+	PINMUX_GPIO(GPIO_PTY3, PTY3_DATA),
+	PINMUX_GPIO(GPIO_PTY2, PTY2_DATA),
+	PINMUX_GPIO(GPIO_PTY1, PTY1_DATA),
+	PINMUX_GPIO(GPIO_PTY0, PTY0_DATA),
+
+	/* PTZ */
+	PINMUX_GPIO(GPIO_PTZ5, PTZ5_DATA),
+	PINMUX_GPIO(GPIO_PTZ4, PTZ4_DATA),
+	PINMUX_GPIO(GPIO_PTZ3, PTZ3_DATA),
+	PINMUX_GPIO(GPIO_PTZ2, PTZ2_DATA),
+	PINMUX_GPIO(GPIO_PTZ1, PTZ1_DATA),
+
+	/* SCIF0 */
+	PINMUX_GPIO(GPIO_FN_SCIF0_TXD, SCIF0_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF0_RXD, SCIF0_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF0_RTS, SCIF0_RTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF0_CTS, SCIF0_CTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF0_SCK, SCIF0_SCK_MARK),
+
+	/* SCIF1 */
+	PINMUX_GPIO(GPIO_FN_SCIF1_TXD, SCIF1_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF1_RXD, SCIF1_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF1_RTS, SCIF1_RTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF1_CTS, SCIF1_CTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF1_SCK, SCIF1_SCK_MARK),
+
+	/* SCIF2 */
+	PINMUX_GPIO(GPIO_FN_SCIF2_TXD, SCIF2_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF2_RXD, SCIF2_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF2_RTS, SCIF2_RTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF2_CTS, SCIF2_CTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF2_SCK, SCIF2_SCK_MARK),
+
+	/* SIO */
+	PINMUX_GPIO(GPIO_FN_SIOTXD, SIOTXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIORXD, SIORXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOD, SIOD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOSTRB0, SIOSTRB0_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOSTRB1, SIOSTRB1_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOSCK, SIOSCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOMCK, SIOMCK_MARK),
+
+	/* CEU */
+	PINMUX_GPIO(GPIO_FN_VIO_D15, VIO_D15_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D14, VIO_D14_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D13, VIO_D13_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D12, VIO_D12_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D11, VIO_D11_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D10, VIO_D10_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D9, VIO_D9_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D8, VIO_D8_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D7, VIO_D7_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D6, VIO_D6_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D5, VIO_D5_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D4, VIO_D4_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D3, VIO_D3_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D2, VIO_D2_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D1, VIO_D1_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D0, VIO_D0_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_CLK, VIO_CLK_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_VD, VIO_VD_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_HD, VIO_HD_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_FLD, VIO_FLD_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_CKO, VIO_CKO_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_STEX, VIO_STEX_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_STEM, VIO_STEM_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_VD2, VIO_VD2_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_HD2, VIO_HD2_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_CLK2, VIO_CLK2_MARK),
+
+	/* LCDC */
+	PINMUX_GPIO(GPIO_FN_LCDD23, LCDD23_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD22, LCDD22_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD21, LCDD21_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD20, LCDD20_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD19, LCDD19_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD18, LCDD18_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD17, LCDD17_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD16, LCDD16_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD15, LCDD15_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD14, LCDD14_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD13, LCDD13_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD12, LCDD12_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD11, LCDD11_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD10, LCDD10_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD9, LCDD9_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD8, LCDD8_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD7, LCDD7_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD6, LCDD6_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD5, LCDD5_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD4, LCDD4_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD3, LCDD3_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD2, LCDD2_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD1, LCDD1_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD0, LCDD0_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDLCLK, LCDLCLK_MARK),
+	/* Main LCD */
+	PINMUX_GPIO(GPIO_FN_LCDDON, LCDDON_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDVCPWC, LCDVCPWC_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDVEPWC, LCDVEPWC_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDVSYN, LCDVSYN_MARK),
+	/* Main LCD - RGB Mode */
+	PINMUX_GPIO(GPIO_FN_LCDDCK, LCDDCK_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDHSYN, LCDHSYN_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDDISP, LCDDISP_MARK),
+	/* Main LCD - SYS Mode */
+	PINMUX_GPIO(GPIO_FN_LCDRS, LCDRS_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDCS, LCDCS_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDWR, LCDWR_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDRD, LCDRD_MARK),
+	/* Sub LCD - SYS Mode */
+	PINMUX_GPIO(GPIO_FN_LCDDON2, LCDDON2_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDVCPWC2, LCDVCPWC2_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDVEPWC2, LCDVEPWC2_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDVSYN2, LCDVSYN2_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDCS2, LCDCS2_MARK),
+
+	/* BSC */
+	PINMUX_GPIO(GPIO_FN_IOIS16, IOIS16_MARK),
+	PINMUX_GPIO(GPIO_FN_A25, A25_MARK),
+	PINMUX_GPIO(GPIO_FN_A24, A24_MARK),
+	PINMUX_GPIO(GPIO_FN_A23, A23_MARK),
+	PINMUX_GPIO(GPIO_FN_A22, A22_MARK),
+	PINMUX_GPIO(GPIO_FN_BS, BS_MARK),
+	PINMUX_GPIO(GPIO_FN_CS6B_CE1B, CS6B_CE1B_MARK),
+	PINMUX_GPIO(GPIO_FN_WAIT, WAIT_MARK),
+	PINMUX_GPIO(GPIO_FN_CS6A_CE2B, CS6A_CE2B_MARK),
+
+	/* SBSC */
+	PINMUX_GPIO(GPIO_FN_HPD63, HPD63_MARK),
+	PINMUX_GPIO(GPIO_FN_HPD62, HPD62_MARK),
+	PINMUX_GPIO(GPIO_FN_HPD61, HPD61_MARK),
+	PINMUX_GPIO(GPIO_FN_HPD60, HPD60_MARK),
+	PINMUX_GPIO(GPIO_FN_HPD59, HPD59_MARK),
+	PINMUX_GPIO(GPIO_FN_HPD58, HPD58_MARK),
+	PINMUX_GPIO(GPIO_FN_HPD57, HPD57_MARK),
+	PINMUX_GPIO(GPIO_FN_HPD56, HPD56_MARK),
+	PINMUX_GPIO(GPIO_FN_HPD55, HPD55_MARK),
+	PINMUX_GPIO(GPIO_FN_HPD54, HPD54_MARK),
+	PINMUX_GPIO(GPIO_FN_HPD53, HPD53_MARK),
+	PINMUX_GPIO(GPIO_FN_HPD52, HPD52_MARK),
+	PINMUX_GPIO(GPIO_FN_HPD51, HPD51_MARK),
+	PINMUX_GPIO(GPIO_FN_HPD50, HPD50_MARK),
+	PINMUX_GPIO(GPIO_FN_HPD49, HPD49_MARK),
+	PINMUX_GPIO(GPIO_FN_HPD48, HPD48_MARK),
+	PINMUX_GPIO(GPIO_FN_HPDQM7, HPDQM7_MARK),
+	PINMUX_GPIO(GPIO_FN_HPDQM6, HPDQM6_MARK),
+	PINMUX_GPIO(GPIO_FN_HPDQM5, HPDQM5_MARK),
+	PINMUX_GPIO(GPIO_FN_HPDQM4, HPDQM4_MARK),
+
+	/* IRQ */
+	PINMUX_GPIO(GPIO_FN_IRQ0, IRQ0_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ1, IRQ1_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ2, IRQ2_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ3, IRQ3_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ4, IRQ4_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ5, IRQ5_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ6, IRQ6_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ7, IRQ7_MARK),
+
+	/* SDHI */
+	PINMUX_GPIO(GPIO_FN_SDHICD, SDHICD_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHIWP, SDHIWP_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHID3, SDHID3_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHID2, SDHID2_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHID1, SDHID1_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHID0, SDHID0_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHICMD, SDHICMD_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHICLK, SDHICLK_MARK),
+
+	/* SIU - Port A */
+	PINMUX_GPIO(GPIO_FN_SIUAOLR, SIUAOLR_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUAOBT, SIUAOBT_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUAISLD, SIUAISLD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUAILR, SIUAILR_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUAIBT, SIUAIBT_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUAOSLD, SIUAOSLD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUMCKA, SIUMCKA_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUFCKA, SIUFCKA_MARK),
+
+	/* SIU - Port B */
+	PINMUX_GPIO(GPIO_FN_SIUBOLR, SIUBOLR_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUBOBT, SIUBOBT_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUBISLD, SIUBISLD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUBILR, SIUBILR_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUBIBT, SIUBIBT_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUBOSLD, SIUBOSLD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUMCKB, SIUMCKB_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUFCKB, SIUFCKB_MARK),
+
+	/* AUD */
+	PINMUX_GPIO(GPIO_FN_AUDSYNC, AUDSYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDATA3, AUDATA3_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDATA2, AUDATA2_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDATA1, AUDATA1_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDATA0, AUDATA0_MARK),
+
+	/* DMAC */
+	PINMUX_GPIO(GPIO_FN_DACK, DACK_MARK),
+	PINMUX_GPIO(GPIO_FN_DREQ0, DREQ0_MARK),
+
+	/* VOU */
+	PINMUX_GPIO(GPIO_FN_DV_CLKI, DV_CLKI_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_CLK, DV_CLK_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_HSYNC, DV_HSYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_VSYNC, DV_VSYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D15, DV_D15_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D14, DV_D14_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D13, DV_D13_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D12, DV_D12_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D11, DV_D11_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D10, DV_D10_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D9, DV_D9_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D8, DV_D8_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D7, DV_D7_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D6, DV_D6_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D5, DV_D5_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D4, DV_D4_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D3, DV_D3_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D2, DV_D2_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D1, DV_D1_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D0, DV_D0_MARK),
+
+	/* CPG */
+	PINMUX_GPIO(GPIO_FN_STATUS0, STATUS0_MARK),
+	PINMUX_GPIO(GPIO_FN_PDSTATUS, PDSTATUS_MARK),
+
+	/* SIOF0 */
+	PINMUX_GPIO(GPIO_FN_SIOF0_MCK, SIOF0_MCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF0_SCK, SIOF0_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF0_SYNC, SIOF0_SYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF0_SS1, SIOF0_SS1_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF0_SS2, SIOF0_SS2_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF0_TXD, SIOF0_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF0_RXD, SIOF0_RXD_MARK),
+
+	/* SIOF1 */
+	PINMUX_GPIO(GPIO_FN_SIOF1_MCK, SIOF1_MCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF1_SCK, SIOF1_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF1_SYNC, SIOF1_SYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF1_SS1, SIOF1_SS1_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF1_SS2, SIOF1_SS2_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF1_TXD, SIOF1_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF1_RXD, SIOF1_RXD_MARK),
+
+	/* SIM */
+	PINMUX_GPIO(GPIO_FN_SIM_D, SIM_D_MARK),
+	PINMUX_GPIO(GPIO_FN_SIM_CLK, SIM_CLK_MARK),
+	PINMUX_GPIO(GPIO_FN_SIM_RST, SIM_RST_MARK),
+
+	/* TSIF */
+	PINMUX_GPIO(GPIO_FN_TS_SDAT, TS_SDAT_MARK),
+	PINMUX_GPIO(GPIO_FN_TS_SCK, TS_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_TS_SDEN, TS_SDEN_MARK),
+	PINMUX_GPIO(GPIO_FN_TS_SPSYNC, TS_SPSYNC_MARK),
+
+	/* IRDA */
+	PINMUX_GPIO(GPIO_FN_IRDA_IN, IRDA_IN_MARK),
+	PINMUX_GPIO(GPIO_FN_IRDA_OUT, IRDA_OUT_MARK),
+
+	/* TPU */
+	PINMUX_GPIO(GPIO_FN_TPUTO, TPUTO_MARK),
+
+	/* FLCTL */
+	PINMUX_GPIO(GPIO_FN_FCE, FCE_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF7, NAF7_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF6, NAF6_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF5, NAF5_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF4, NAF4_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF3, NAF3_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF2, NAF2_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF1, NAF1_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF0, NAF0_MARK),
+	PINMUX_GPIO(GPIO_FN_FCDE, FCDE_MARK),
+	PINMUX_GPIO(GPIO_FN_FOE, FOE_MARK),
+	PINMUX_GPIO(GPIO_FN_FSC, FSC_MARK),
+	PINMUX_GPIO(GPIO_FN_FWE, FWE_MARK),
+	PINMUX_GPIO(GPIO_FN_FRB, FRB_MARK),
+
+	/* KEYSC */
+	PINMUX_GPIO(GPIO_FN_KEYIN0, KEYIN0_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYIN1, KEYIN1_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYIN2, KEYIN2_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYIN3, KEYIN3_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYIN4, KEYIN4_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYOUT0, KEYOUT0_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYOUT1, KEYOUT1_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYOUT2, KEYOUT2_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYOUT3, KEYOUT3_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYOUT4_IN6, KEYOUT4_IN6_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYOUT5_IN5, KEYOUT5_IN5_MARK),
+};
+
+static struct pinmux_cfg_reg pinmux_config_regs[] = {
+	{ PINMUX_CFG_REG("PACR", 0xa4050100, 16, 2) {
+		VIO_D7_SCIF1_SCK, PTA7_OUT, PTA7_IN_PD, PTA7_IN,
+		VIO_D6_SCIF1_RXD, 0, PTA6_IN_PD, PTA6_IN,
+		VIO_D5_SCIF1_TXD, PTA5_OUT, PTA5_IN_PD, PTA5_IN,
+		VIO_D4, 0, PTA4_IN_PD, PTA4_IN,
+		VIO_D3, 0, PTA3_IN_PD, PTA3_IN,
+		VIO_D2, 0, PTA2_IN_PD, PTA2_IN,
+		VIO_D1, 0, PTA1_IN_PD, PTA1_IN,
+		VIO_D0_LCDLCLK, 0, PTA0_IN_PD, PTA0_IN }
+	},
+	{ PINMUX_CFG_REG("PBCR", 0xa4050102, 16, 2) {
+		HPD55, PTB7_OUT, 0, PTB7_IN,
+		HPD54, PTB6_OUT, 0, PTB6_IN,
+		HPD53, PTB5_OUT, 0, PTB5_IN,
+		HPD52, PTB4_OUT, 0, PTB4_IN,
+		HPD51, PTB3_OUT, 0, PTB3_IN,
+		HPD50, PTB2_OUT, 0, PTB2_IN,
+		HPD49, PTB1_OUT, 0, PTB1_IN,
+		HPD48, PTB0_OUT, 0, PTB0_IN }
+	},
+	{ PINMUX_CFG_REG("PCCR", 0xa4050104, 16, 2) {
+		0, 0, PTC7_IN_PU, PTC7_IN,
+		0, 0, 0, 0,
+		IOIS16, 0, PTC5_IN_PU, PTC5_IN,
+		HPDQM7, PTC4_OUT, 0, PTC4_IN,
+		HPDQM6, PTC3_OUT, 0, PTC3_IN,
+		HPDQM5, PTC2_OUT, 0, PTC2_IN,
+		0, 0, 0, 0,
+		HPDQM4, PTC0_OUT, 0, PTC0_IN }
+	},
+	{ PINMUX_CFG_REG("PDCR", 0xa4050106, 16, 2) {
+		SDHICD, 0, PTD7_IN_PU, PTD7_IN,
+		SDHIWP, PTD6_OUT, PTD6_IN_PU, PTD6_IN,
+		SDHID3, PTD5_OUT, PTD5_IN_PU, PTD5_IN,
+		IRQ2_SDHID2, PTD4_OUT, PTD4_IN_PU, PTD4_IN,
+		SDHID1, PTD3_OUT, PTD3_IN_PU, PTD3_IN,
+		SDHID0, PTD2_OUT, PTD2_IN_PU, PTD2_IN,
+		SDHICMD, PTD1_OUT, PTD1_IN_PU, PTD1_IN,
+		SDHICLK, PTD0_OUT, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PECR", 0xa4050108, 16, 2) {
+		A25, PTE7_OUT, PTE7_IN_PD, PTE7_IN,
+		A24, PTE6_OUT, PTE6_IN_PD, PTE6_IN,
+		A23, PTE5_OUT, PTE5_IN_PD, PTE5_IN,
+		A22, PTE4_OUT, PTE4_IN_PD, PTE4_IN,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		IRQ5, PTE1_OUT, PTE1_IN_PD, PTE1_IN,
+		IRQ4_BS, PTE0_OUT, PTE0_IN_PD, PTE0_IN }
+	},
+	{ PINMUX_CFG_REG("PFCR", 0xa405010a, 16, 2) {
+		0, 0, 0, 0,
+		PTF6, PTF6_OUT, PTF6_IN_PD, PTF6_IN,
+		SIOSCK_SIUBOBT, PTF5_OUT, PTF5_IN_PD, PTF5_IN,
+		SIOSTRB1_SIUBOLR, PTF4_OUT, PTF4_IN_PD, PTF4_IN,
+		SIOSTRB0_SIUBIBT, PTF3_OUT, PTF3_IN_PD, PTF3_IN,
+		SIOD_SIUBILR, PTF2_OUT, PTF2_IN_PD, PTF2_IN,
+		SIORXD_SIUBISLD, 0, PTF1_IN_PD, PTF1_IN,
+		SIOTXD_SIUBOSLD, PTF0_OUT, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PGCR", 0xa405010c, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		AUDSYNC, PTG4_OUT, 0, 0,
+		AUDATA3, PTG3_OUT, 0, 0,
+		AUDATA2, PTG2_OUT, 0, 0,
+		AUDATA1, PTG1_OUT, 0, 0,
+		AUDATA0, PTG0_OUT, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PHCR", 0xa405010e, 16, 2) {
+		LCDVCPWC_LCDVCPWC2, PTH7_OUT, 0, 0,
+		LCDVSYN2_DACK, PTH6_OUT, PTH6_IN_PD, PTH6_IN,
+		LCDVSYN, PTH5_OUT, PTH5_IN_PD, PTH5_IN,
+		LCDDISP_LCDRS, PTH4_OUT, 0, 0,
+		LCDHSYN_LCDCS, PTH3_OUT, 0, 0,
+		LCDDON_LCDDON2, PTH2_OUT, 0, 0,
+		LCDD17_DV_HSYNC, PTH1_OUT, PTH1_IN_PD, PTH1_IN,
+		LCDD16_DV_VSYNC, PTH0_OUT, PTH0_IN_PD, PTH0_IN }
+	},
+	{ PINMUX_CFG_REG("PJCR", 0xa4050110, 16, 2) {
+		STATUS0, PTJ7_OUT, 0, 0,
+		0, PTJ6_OUT, 0, 0,
+		PDSTATUS, PTJ5_OUT, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		IRQ1, PTJ1_OUT, PTJ1_IN_PU, PTJ1_IN,
+		IRQ0, PTJ0_OUT, PTJ0_IN_PU, PTJ0_IN }
+	},
+	{ PINMUX_CFG_REG("PKCR", 0xa4050112, 16, 2) {
+		0, 0, 0, 0,
+		SIUAILR_SIOF1_SS2, PTK6_OUT, PTK6_IN_PD, PTK6_IN,
+		SIUAIBT_SIOF1_SS1, PTK5_OUT, PTK5_IN_PD, PTK5_IN,
+		SIUAOLR_SIOF1_SYNC, PTK4_OUT, PTK4_IN_PD, PTK4_IN,
+		SIUAOBT_SIOF1_SCK, PTK3_OUT, PTK3_IN_PD, PTK3_IN,
+		SIUAISLD_SIOF1_RXD, 0, PTK2_IN_PD, PTK2_IN,
+		SIUAOSLD_SIOF1_TXD, PTK1_OUT, 0, 0,
+		PTK0, PTK0_OUT, PTK0_IN_PD, PTK0_IN }
+	},
+	{ PINMUX_CFG_REG("PLCR", 0xa4050114, 16, 2) {
+		LCDD15_DV_D15, PTL7_OUT, PTL7_IN_PD, PTL7_IN,
+		LCDD14_DV_D14, PTL6_OUT, PTL6_IN_PD, PTL6_IN,
+		LCDD13_DV_D13, PTL5_OUT, PTL5_IN_PD, PTL5_IN,
+		LCDD12_DV_D12, PTL4_OUT, PTL4_IN_PD, PTL4_IN,
+		LCDD11_DV_D11, PTL3_OUT, PTL3_IN_PD, PTL3_IN,
+		LCDD10_DV_D10, PTL2_OUT, PTL2_IN_PD, PTL2_IN,
+		LCDD9_DV_D9, PTL1_OUT, PTL1_IN_PD, PTL1_IN,
+		LCDD8_DV_D8, PTL0_OUT, PTL0_IN_PD, PTL0_IN }
+	},
+	{ PINMUX_CFG_REG("PMCR", 0xa4050116, 16, 2) {
+		LCDD7_DV_D7, PTM7_OUT, PTM7_IN_PD, PTM7_IN,
+		LCDD6_DV_D6, PTM6_OUT, PTM6_IN_PD, PTM6_IN,
+		LCDD5_DV_D5, PTM5_OUT, PTM5_IN_PD, PTM5_IN,
+		LCDD4_DV_D4, PTM4_OUT, PTM4_IN_PD, PTM4_IN,
+		LCDD3_DV_D3, PTM3_OUT, PTM3_IN_PD, PTM3_IN,
+		LCDD2_DV_D2, PTM2_OUT, PTM2_IN_PD, PTM2_IN,
+		LCDD1_DV_D1, PTM1_OUT, PTM1_IN_PD, PTM1_IN,
+		LCDD0_DV_D0, PTM0_OUT, PTM0_IN_PD, PTM0_IN }
+	},
+	{ PINMUX_CFG_REG("PNCR", 0xa4050118, 16, 2) {
+		HPD63, PTN7_OUT, 0, PTN7_IN,
+		HPD62, PTN6_OUT, 0, PTN6_IN,
+		HPD61, PTN5_OUT, 0, PTN5_IN,
+		HPD60, PTN4_OUT, 0, PTN4_IN,
+		HPD59, PTN3_OUT, 0, PTN3_IN,
+		HPD58, PTN2_OUT, 0, PTN2_IN,
+		HPD57, PTN1_OUT, 0, PTN1_IN,
+		HPD56, PTN0_OUT, 0, PTN0_IN }
+	},
+	{ PINMUX_CFG_REG("PQCR", 0xa405011a, 16, 2) {
+		0, 0, 0, 0,
+		SIOF0_SS2_SIM_RST, PTQ6_OUT, 0, 0,
+		SIOF0_SS1_TS_SPSYNC, PTQ5_OUT, PTQ5_IN_PD, PTQ5_IN,
+		SIOF0_SYNC_TS_SDEN, PTQ4_OUT, PTQ4_IN_PD, PTQ4_IN,
+		SIOF0_SCK_TS_SCK, PTQ3_OUT, PTQ3_IN_PD, PTQ3_IN,
+		PTQ2, 0, PTQ2_IN_PD, PTQ2_IN,
+		PTQ1, PTQ1_OUT, 0, 0,
+		PTQ0, PTQ0_OUT, PTQ0_IN_PU, PTQ0_IN }
+	},
+	{ PINMUX_CFG_REG("PRCR", 0xa405011c, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		LCDRD, PTR4_OUT, 0, 0,
+		CS6B_CE1B_LCDCS2, PTR3_OUT, 0, 0,
+		WAIT, 0, PTR2_IN_PU, PTR2_IN,
+		LCDDCK_LCDWR, PTR1_OUT, 0, 0,
+		LCDVEPWC_LCDVEPWC2, PTR0_OUT, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PSCR", 0xa405011e, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		SCIF0_CTS_SIUAISPD, 0, PTS4_IN_PD, PTS4_IN,
+		SCIF0_RTS_SIUAOSPD, PTS3_OUT, 0, 0,
+		SCIF0_SCK_TPUTO, PTS2_OUT, PTS2_IN_PD, PTS2_IN,
+		SCIF0_RXD, 0, PTS1_IN_PD, PTS1_IN,
+		SCIF0_TXD, PTS0_OUT, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PTCR", 0xa4050140, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		FOE_VIO_VD2, PTT4_OUT, PTT4_IN_PD, PTT4_IN,
+		FWE, PTT3_OUT, PTT3_IN_PD, PTT3_IN,
+		FSC, PTT2_OUT, PTT2_IN_PD, PTT2_IN,
+		DREQ0, 0, PTT1_IN_PD, PTT1_IN,
+		FCDE, PTT0_OUT, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PUCR", 0xa4050142, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		NAF2_VIO_D10, PTU4_OUT, PTU4_IN_PD, PTU4_IN,
+		NAF1_VIO_D9, PTU3_OUT, PTU3_IN_PD, PTU3_IN,
+		NAF0_VIO_D8, PTU2_OUT, PTU2_IN_PD, PTU2_IN,
+		FRB_VIO_CLK2, 0, PTU1_IN_PD, PTU1_IN,
+		FCE_VIO_HD2, PTU0_OUT, PTU0_IN_PD, PTU0_IN }
+	},
+	{ PINMUX_CFG_REG("PVCR", 0xa4050144, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		NAF7_VIO_D15, PTV4_OUT, PTV4_IN_PD, PTV4_IN,
+		NAF6_VIO_D14, PTV3_OUT, PTV3_IN_PD, PTV3_IN,
+		NAF5_VIO_D13, PTV2_OUT, PTV2_IN_PD, PTV2_IN,
+		NAF4_VIO_D12, PTV1_OUT, PTV1_IN_PD, PTV1_IN,
+		NAF3_VIO_D11, PTV0_OUT, PTV0_IN_PD, PTV0_IN }
+	},
+	{ PINMUX_CFG_REG("PWCR", 0xa4050146, 16, 2) {
+		0, 0, 0, 0,
+		VIO_FLD_SCIF2_CTS, 0, PTW6_IN_PD, PTW6_IN,
+		VIO_CKO_SCIF2_RTS, PTW5_OUT, 0, 0,
+		VIO_STEX_SCIF2_SCK, PTW4_OUT, PTW4_IN_PD, PTW4_IN,
+		VIO_STEM_SCIF2_TXD, PTW3_OUT, PTW3_IN_PD, PTW3_IN,
+		VIO_HD_SCIF2_RXD, PTW2_OUT, PTW2_IN_PD, PTW2_IN,
+		VIO_VD_SCIF1_CTS, PTW1_OUT, PTW1_IN_PD, PTW1_IN,
+		VIO_CLK_SCIF1_RTS, PTW0_OUT, PTW0_IN_PD, PTW0_IN }
+	},
+	{ PINMUX_CFG_REG("PXCR", 0xa4050148, 16, 2) {
+		0, 0, 0, 0,
+		CS6A_CE2B, PTX6_OUT, PTX6_IN_PU, PTX6_IN,
+		LCDD23, PTX5_OUT, PTX5_IN_PD, PTX5_IN,
+		LCDD22, PTX4_OUT, PTX4_IN_PD, PTX4_IN,
+		LCDD21, PTX3_OUT, PTX3_IN_PD, PTX3_IN,
+		LCDD20, PTX2_OUT, PTX2_IN_PD, PTX2_IN,
+		LCDD19_DV_CLKI, PTX1_OUT, PTX1_IN_PD, PTX1_IN,
+		LCDD18_DV_CLK, PTX0_OUT, PTX0_IN_PD, PTX0_IN }
+	},
+	{ PINMUX_CFG_REG("PYCR", 0xa405014a, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		KEYOUT5_IN5, PTY5_OUT, PTY5_IN_PU, PTY5_IN,
+		KEYOUT4_IN6, PTY4_OUT, PTY4_IN_PU, PTY4_IN,
+		KEYOUT3, PTY3_OUT, PTY3_IN_PU, PTY3_IN,
+		KEYOUT2, PTY2_OUT, PTY2_IN_PU, PTY2_IN,
+		KEYOUT1, PTY1_OUT, 0, 0,
+		KEYOUT0, PTY0_OUT, PTY0_IN_PU, PTY0_IN }
+	},
+	{ PINMUX_CFG_REG("PZCR", 0xa405014c, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		KEYIN4_IRQ7, 0, PTZ5_IN_PU, PTZ5_IN,
+		KEYIN3, 0, PTZ4_IN_PU, PTZ4_IN,
+		KEYIN2, 0, PTZ3_IN_PU, PTZ3_IN,
+		KEYIN1, 0, PTZ2_IN_PU, PTZ2_IN,
+		KEYIN0_IRQ6, 0, PTZ1_IN_PU, PTZ1_IN,
+		0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PSELA", 0xa405014e, 16, 1) {
+		PSA15_KEYIN0, PSA15_IRQ6,
+		PSA14_KEYIN4, PSA14_IRQ7,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		PSA9_IRQ4, PSA9_BS,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		PSA4_IRQ2, PSA4_SDHID2,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0 }
+	},
+	{ PINMUX_CFG_REG("PSELB", 0xa4050150, 16, 1) {
+		PSB15_SIOTXD, PSB15_SIUBOSLD,
+		PSB14_SIORXD, PSB14_SIUBISLD,
+		PSB13_SIOD, PSB13_SIUBILR,
+		PSB12_SIOSTRB0, PSB12_SIUBIBT,
+		PSB11_SIOSTRB1, PSB11_SIUBOLR,
+		PSB10_SIOSCK, PSB10_SIUBOBT,
+		PSB9_SIOMCK, PSB9_SIUMCKB,
+		PSB8_SIOF0_MCK, PSB8_IRQ3,
+		PSB7_SIOF0_TXD, PSB7_IRDA_OUT,
+		PSB6_SIOF0_RXD, PSB6_IRDA_IN,
+		PSB5_SIOF0_SCK, PSB5_TS_SCK,
+		PSB4_SIOF0_SYNC, PSB4_TS_SDEN,
+		PSB3_SIOF0_SS1, PSB3_TS_SPSYNC,
+		PSB2_SIOF0_SS2, PSB2_SIM_RST,
+		PSB1_SIUMCKA, PSB1_SIOF1_MCK,
+		PSB0_SIUAOSLD, PSB0_SIOF1_TXD }
+	},
+	{ PINMUX_CFG_REG("PSELC", 0xa4050152, 16, 1) {
+		PSC15_SIUAISLD, PSC15_SIOF1_RXD,
+		PSC14_SIUAOBT, PSC14_SIOF1_SCK,
+		PSC13_SIUAOLR, PSC13_SIOF1_SYNC,
+		PSC12_SIUAIBT, PSC12_SIOF1_SS1,
+		PSC11_SIUAILR, PSC11_SIOF1_SS2,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		PSC0_NAF, PSC0_VIO }
+	},
+	{ PINMUX_CFG_REG("PSELD", 0xa4050154, 16, 1) {
+		0, 0,
+		0, 0,
+		PSD13_VIO, PSD13_SCIF2,
+		PSD12_VIO, PSD12_SCIF1,
+		PSD11_VIO, PSD11_SCIF1,
+		PSD10_VIO_D0, PSD10_LCDLCLK,
+		PSD9_SIOMCK_SIUMCKB, PSD9_SIUFCKB,
+		PSD8_SCIF0_SCK, PSD8_TPUTO,
+		PSD7_SCIF0_RTS, PSD7_SIUAOSPD,
+		PSD6_SCIF0_CTS, PSD6_SIUAISPD,
+		PSD5_CS6B_CE1B, PSD5_LCDCS2,
+		0, 0,
+		PSD3_LCDVEPWC_LCDVCPWC, PSD3_LCDVEPWC2_LCDVCPWC2,
+		PSD2_LCDDON, PSD2_LCDDON2,
+		0, 0,
+		PSD0_LCDD19_LCDD0, PSD0_DV }
+	},
+	{ PINMUX_CFG_REG("PSELE", 0xa4050156, 16, 1) {
+		PSE15_SIOF0_MCK_IRQ3, PSE15_SIM_D,
+		PSE14_SIOF0_TXD_IRDA_OUT, PSE14_SIM_CLK,
+		PSE13_SIOF0_RXD_IRDA_IN, PSE13_TS_SDAT,
+		PSE12_LCDVSYN2, PSE12_DACK,
+		PSE11_SIUMCKA_SIOF1_MCK, PSE11_SIUFCKA,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		PSE3_FLCTL, PSE3_VIO,
+		PSE2_NAF2, PSE2_VIO_D10,
+		PSE1_NAF1, PSE1_VIO_D9,
+		PSE0_NAF0, PSE0_VIO_D8 }
+	},
+	{ PINMUX_CFG_REG("HIZCRA", 0xa4050158, 16, 1) {
+		0, 0,
+		HIZA14_KEYSC, HIZA14_HIZ,
+		0, 0,
+		0, 0,
+		0, 0,
+		HIZA10_NAF, HIZA10_HIZ,
+		HIZA9_VIO, HIZA9_HIZ,
+		HIZA8_LCDC, HIZA8_HIZ,
+		HIZA7_LCDC, HIZA7_HIZ,
+		HIZA6_LCDC, HIZA6_HIZ,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0 }
+	},
+	{ PINMUX_CFG_REG("HIZCRB", 0xa405015a, 16, 1) {
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		HIZB1_VIO, HIZB1_HIZ,
+		HIZB0_VIO, HIZB0_HIZ }
+	},
+	{ PINMUX_CFG_REG("HIZCRC", 0xa405015c, 16, 1) {
+		HIZC15_IRQ7, HIZC15_HIZ,
+		HIZC14_IRQ6, HIZC14_HIZ,
+		HIZC13_IRQ5, HIZC13_HIZ,
+		HIZC12_IRQ4, HIZC12_HIZ,
+		HIZC11_IRQ3, HIZC11_HIZ,
+		HIZC10_IRQ2, HIZC10_HIZ,
+		HIZC9_IRQ1, HIZC9_HIZ,
+		HIZC8_IRQ0, HIZC8_HIZ,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0 }
+	},
+	{ PINMUX_CFG_REG("MSELCRB", 0xa4050182, 16, 1) {
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		MSELB9_VIO, MSELB9_VIO2,
+		MSELB8_RGB, MSELB8_SYS,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0 }
+	},
+	{}
+};
+
+static struct pinmux_data_reg pinmux_data_regs[] = {
+	{ PINMUX_DATA_REG("PADR", 0xa4050120, 8) {
+		PTA7_DATA, PTA6_DATA, PTA5_DATA, PTA4_DATA,
+		PTA3_DATA, PTA2_DATA, PTA1_DATA, PTA0_DATA }
+	},
+	{ PINMUX_DATA_REG("PBDR", 0xa4050122, 8) {
+		PTB7_DATA, PTB6_DATA, PTB5_DATA, PTB4_DATA,
+		PTB3_DATA, PTB2_DATA, PTB1_DATA, PTB0_DATA }
+	},
+	{ PINMUX_DATA_REG("PCDR", 0xa4050124, 8) {
+		PTC7_DATA, 0, PTC5_DATA, PTC4_DATA,
+		PTC3_DATA, PTC2_DATA, 0, PTC0_DATA }
+	},
+	{ PINMUX_DATA_REG("PDDR", 0xa4050126, 8) {
+		PTD7_DATA, PTD6_DATA, PTD5_DATA, PTD4_DATA,
+		PTD3_DATA, PTD2_DATA, PTD1_DATA, PTD0_DATA }
+	},
+	{ PINMUX_DATA_REG("PEDR", 0xa4050128, 8) {
+		PTE7_DATA, PTE6_DATA, PTE5_DATA, PTE4_DATA,
+		0, 0, PTE1_DATA, PTE0_DATA }
+	},
+	{ PINMUX_DATA_REG("PFDR", 0xa405012a, 8) {
+		0, PTF6_DATA, PTF5_DATA, PTF4_DATA,
+		PTF3_DATA, PTF2_DATA, PTF1_DATA, PTF0_DATA }
+	},
+	{ PINMUX_DATA_REG("PGDR", 0xa405012c, 8) {
+		0, 0, 0, PTG4_DATA,
+		PTG3_DATA, PTG2_DATA, PTG1_DATA, PTG0_DATA }
+	},
+	{ PINMUX_DATA_REG("PHDR", 0xa405012e, 8) {
+		PTH7_DATA, PTH6_DATA, PTH5_DATA, PTH4_DATA,
+		PTH3_DATA, PTH2_DATA, PTH1_DATA, PTH0_DATA }
+	},
+	{ PINMUX_DATA_REG("PJDR", 0xa4050130, 8) {
+		PTJ7_DATA, PTJ6_DATA, PTJ5_DATA, 0,
+		0, 0, PTJ1_DATA, PTJ0_DATA }
+	},
+	{ PINMUX_DATA_REG("PKDR", 0xa4050132, 8) {
+		0, PTK6_DATA, PTK5_DATA, PTK4_DATA,
+		PTK3_DATA, PTK2_DATA, PTK1_DATA, PTK0_DATA }
+	},
+	{ PINMUX_DATA_REG("PLDR", 0xa4050134, 8) {
+		PTL7_DATA, PTL6_DATA, PTL5_DATA, PTL4_DATA,
+		PTL3_DATA, PTL2_DATA, PTL1_DATA, PTL0_DATA }
+	},
+	{ PINMUX_DATA_REG("PMDR", 0xa4050136, 8) {
+		PTM7_DATA, PTM6_DATA, PTM5_DATA, PTM4_DATA,
+		PTM3_DATA, PTM2_DATA, PTM1_DATA, PTM0_DATA }
+	},
+	{ PINMUX_DATA_REG("PNDR", 0xa4050138, 8) {
+		PTN7_DATA, PTN6_DATA, PTN5_DATA, PTN4_DATA,
+		PTN3_DATA, PTN2_DATA, PTN1_DATA, PTN0_DATA }
+	},
+	{ PINMUX_DATA_REG("PQDR", 0xa405013a, 8) {
+		0, PTQ6_DATA, PTQ5_DATA, PTQ4_DATA,
+		PTQ3_DATA, PTQ2_DATA, PTQ1_DATA, PTQ0_DATA }
+	},
+	{ PINMUX_DATA_REG("PRDR", 0xa405013c, 8) {
+		0, 0, 0, PTR4_DATA,
+		PTR3_DATA, PTR2_DATA, PTR1_DATA, PTR0_DATA }
+	},
+	{ PINMUX_DATA_REG("PSDR", 0xa405013e, 8) {
+		0, 0, 0, PTS4_DATA,
+		PTS3_DATA, PTS2_DATA, PTS1_DATA, PTS0_DATA }
+	},
+	{ PINMUX_DATA_REG("PTDR", 0xa4050160, 8) {
+		0, 0, 0, PTT4_DATA,
+		PTT3_DATA, PTT2_DATA, PTT1_DATA, PTT0_DATA }
+	},
+	{ PINMUX_DATA_REG("PUDR", 0xa4050162, 8) {
+		0, 0, 0, PTU4_DATA,
+		PTU3_DATA, PTU2_DATA, PTU1_DATA, PTU0_DATA }
+	},
+	{ PINMUX_DATA_REG("PVDR", 0xa4050164, 8) {
+		0, 0, 0, PTV4_DATA,
+		PTV3_DATA, PTV2_DATA, PTV1_DATA, PTV0_DATA }
+	},
+	{ PINMUX_DATA_REG("PWDR", 0xa4050166, 8) {
+		0, PTW6_DATA, PTW5_DATA, PTW4_DATA,
+		PTW3_DATA, PTW2_DATA, PTW1_DATA, PTW0_DATA }
+	},
+	{ PINMUX_DATA_REG("PXDR", 0xa4050168, 8) {
+		0, PTX6_DATA, PTX5_DATA, PTX4_DATA,
+		PTX3_DATA, PTX2_DATA, PTX1_DATA, PTX0_DATA }
+	},
+	{ PINMUX_DATA_REG("PYDR", 0xa405016a, 8) {
+		0, PTY6_DATA, PTY5_DATA, PTY4_DATA,
+		PTY3_DATA, PTY2_DATA, PTY1_DATA, PTY0_DATA }
+	},
+	{ PINMUX_DATA_REG("PZDR", 0xa405016c, 8) {
+		0, 0, PTZ5_DATA, PTZ4_DATA,
+		PTZ3_DATA, PTZ2_DATA, PTZ1_DATA, PTZ0_DATA }
+	},
+	{ },
+};
+
+static struct pinmux_info sh7722_pinmux_info = {
+	.name = "sh7722_pfc",
+	.reserved_id = PINMUX_RESERVED,
+	.data = { PINMUX_DATA_BEGIN, PINMUX_DATA_END },
+	.input = { PINMUX_INPUT_BEGIN, PINMUX_INPUT_END },
+	.input_pd = { PINMUX_INPUT_PULLDOWN_BEGIN, PINMUX_INPUT_PULLDOWN_END },
+	.input_pu = { PINMUX_INPUT_PULLUP_BEGIN, PINMUX_INPUT_PULLUP_END },
+	.output = { PINMUX_OUTPUT_BEGIN, PINMUX_OUTPUT_END },
+	.mark = { PINMUX_MARK_BEGIN, PINMUX_MARK_END },
+	.function = { PINMUX_FUNCTION_BEGIN, PINMUX_FUNCTION_END },
+
+	.first_gpio = GPIO_PTA7,
+	.last_gpio = GPIO_FN_KEYOUT5_IN5,
+
+	.gpios = pinmux_gpios,
+	.cfg_regs = pinmux_config_regs,
+	.data_regs = pinmux_data_regs,
+
+	.gpio_data = pinmux_data,
+	.gpio_data_size = ARRAY_SIZE(pinmux_data),
+};
+
+static int __init plat_pinmux_setup(void)
+{
+	return register_pinmux(&sh7722_pinmux_info);
+}
+
+arch_initcall(plat_pinmux_setup);
-- 
GitLab


From 91b6f3c5252937f85e56c71215b37024827ae924 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 8 Oct 2008 20:42:01 +0900
Subject: [PATCH 644/895] sh: Use sh7722 GPIO on Migo-R board

This patch enables the GPIO code on Migo-R and converts the code from
register based pinmux configuration to GPIO based pin by pin setup.
Fix whitespace damage while at it and add 2 LEDs and export them to
user space.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/boards/Kconfig               |   1 +
 arch/sh/boards/mach-migor/lcd_qvga.c |   6 +-
 arch/sh/boards/mach-migor/setup.c    | 187 +++++++++++++++++----------
 3 files changed, 125 insertions(+), 69 deletions(-)

diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig
index c6b21e82755e..4e37f5c02158 100644
--- a/arch/sh/boards/Kconfig
+++ b/arch/sh/boards/Kconfig
@@ -162,6 +162,7 @@ config SH_SH7785LCR_29BIT_PHYSMAPS
 config SH_MIGOR
 	bool "Migo-R"
 	depends on CPU_SUBTYPE_SH7722
+	select GENERIC_GPIO
 	help
 	  Select Migo-R if configuring for the SH7722 Migo-R platform
           by Renesas System Solutions Asia Pte. Ltd.
diff --git a/arch/sh/boards/mach-migor/lcd_qvga.c b/arch/sh/boards/mach-migor/lcd_qvga.c
index 735326c04497..c283cfc2a5c3 100644
--- a/arch/sh/boards/mach-migor/lcd_qvga.c
+++ b/arch/sh/boards/mach-migor/lcd_qvga.c
@@ -17,7 +17,9 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/gpio.h>
 #include <video/sh_mobile_lcdc.h>
+#include <asm/sh7722.h>
 #include <asm/migor.h>
 
 /* LCD Module is a PH240320T according to board schematics. This module
@@ -30,9 +32,9 @@
 
 static void reset_lcd_module(void)
 {
-	ctrl_outb(ctrl_inb(PORT_PHDR) & ~0x04, PORT_PHDR);
+	gpio_set_value(GPIO_PTH2, 0);
 	mdelay(2);
-	ctrl_outb(ctrl_inb(PORT_PHDR) | 0x04, PORT_PHDR);
+	gpio_set_value(GPIO_PTH2, 1);
 	mdelay(1);
 }
 
diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c
index f27475d2fa0c..003ce837734d 100644
--- a/arch/sh/boards/mach-migor/setup.c
+++ b/arch/sh/boards/mach-migor/setup.c
@@ -17,6 +17,7 @@
 #include <linux/smc91x.h>
 #include <linux/delay.h>
 #include <linux/clk.h>
+#include <linux/gpio.h>
 #include <media/soc_camera_platform.h>
 #include <media/sh_mobile_ceu.h>
 #include <video/sh_mobile_lcdc.h>
@@ -25,6 +26,7 @@
 #include <asm/io.h>
 #include <asm/sh_keysc.h>
 #include <asm/migor.h>
+#include <asm/sh7722.h>
 
 /* Address     IRQ  Size  Bus  Description
  * 0x00000000       64MB  16   NOR Flash (SP29PL256N)
@@ -169,7 +171,7 @@ static void migor_nand_flash_cmd_ctl(struct mtd_info *mtd, int cmd,
 
 static int migor_nand_flash_ready(struct mtd_info *mtd)
 {
-	return ctrl_inb(PORT_PADR) & 0x02; /* PTA1 */
+	return gpio_get_value(GPIO_PTA1); /* NAND_RBn */
 }
 
 struct platform_nand_data migor_nand_flash_data = {
@@ -286,22 +288,15 @@ static struct clk *camera_clk;
 
 static void camera_power_on(void)
 {
-	unsigned char value;
-
 	camera_clk = clk_get(NULL, "video_clk");
 	clk_set_rate(camera_clk, 24000000);
 	clk_enable(camera_clk);	/* start VIO_CKO */
 
+	/* use VIO_RST to take camera out of reset */
 	mdelay(10);
-	value = ctrl_inb(PORT_PTDR);
-	value &= ~0x09;
-#ifndef CONFIG_SH_MIGOR_RTA_WVGA
-	value |= 0x01;
-#endif
-	ctrl_outb(value, PORT_PTDR);
+	gpio_set_value(GPIO_PTT3, 0);
 	mdelay(10);
-
-	ctrl_outb(value | 8, PORT_PTDR);
+	gpio_set_value(GPIO_PTT3, 1);
 }
 
 static void camera_power_off(void)
@@ -309,7 +304,7 @@ static void camera_power_off(void)
 	clk_disable(camera_clk); /* stop VIO_CKO */
 	clk_put(camera_clk);
 
-	ctrl_outb(ctrl_inb(PORT_PTDR) & ~0x08, PORT_PTDR);
+	gpio_set_value(GPIO_PTT3, 0);
 }
 
 #ifdef CONFIG_I2C
@@ -458,75 +453,133 @@ static struct i2c_board_info migor_i2c_devices[] = {
 
 static int __init migor_devices_setup(void)
 {
+	/* Lit D11 LED */
+	gpio_request(GPIO_PTJ7, NULL);
+	gpio_direction_output(GPIO_PTJ7, 1);
+	gpio_export(GPIO_PTJ7, 0);
+
+	/* Lit D12 LED */
+	gpio_request(GPIO_PTJ5, NULL);
+	gpio_direction_output(GPIO_PTJ5, 1);
+	gpio_export(GPIO_PTJ5, 0);
+
+	/* SMC91C111 */
+	gpio_request(GPIO_FN_IRQ0, NULL);
+
+	/* KEYSC */
 	clk_always_enable("mstp214"); /* KEYSC */
+	gpio_request(GPIO_FN_KEYOUT0, NULL);
+	gpio_request(GPIO_FN_KEYOUT1, NULL);
+	gpio_request(GPIO_FN_KEYOUT2, NULL);
+	gpio_request(GPIO_FN_KEYOUT3, NULL);
+	gpio_request(GPIO_FN_KEYOUT4_IN6, NULL);
+	gpio_request(GPIO_FN_KEYIN1, NULL);
+	gpio_request(GPIO_FN_KEYIN2, NULL);
+	gpio_request(GPIO_FN_KEYIN3, NULL);
+	gpio_request(GPIO_FN_KEYIN4, NULL);
+	gpio_request(GPIO_FN_KEYOUT5_IN5, NULL);
+
+	/* NAND Flash */
+	gpio_request(GPIO_FN_CS6A_CE2B, NULL);
+	ctrl_outl((ctrl_inl(BSC_CS6ABCR) & ~0x0600) | 0x0200, BSC_CS6ABCR);
+	gpio_request(GPIO_PTA1, NULL);
+	gpio_direction_input(GPIO_PTA1);
+
+	/* Touch Panel */
+	gpio_request(GPIO_FN_IRQ6, NULL);
+
+	/* LCD Panel */
 	clk_always_enable("mstp200"); /* LCDC */
+#ifdef CONFIG_SH_MIGOR_QVGA /* LCDC - QVGA - Enable SYS Interface signals */
+	gpio_request(GPIO_FN_LCDD17, NULL);
+	gpio_request(GPIO_FN_LCDD16, NULL);
+	gpio_request(GPIO_FN_LCDD15, NULL);
+	gpio_request(GPIO_FN_LCDD14, NULL);
+	gpio_request(GPIO_FN_LCDD13, NULL);
+	gpio_request(GPIO_FN_LCDD12, NULL);
+	gpio_request(GPIO_FN_LCDD11, NULL);
+	gpio_request(GPIO_FN_LCDD10, NULL);
+	gpio_request(GPIO_FN_LCDD8, NULL);
+	gpio_request(GPIO_FN_LCDD7, NULL);
+	gpio_request(GPIO_FN_LCDD6, NULL);
+	gpio_request(GPIO_FN_LCDD5, NULL);
+	gpio_request(GPIO_FN_LCDD4, NULL);
+	gpio_request(GPIO_FN_LCDD3, NULL);
+	gpio_request(GPIO_FN_LCDD2, NULL);
+	gpio_request(GPIO_FN_LCDD1, NULL);
+	gpio_request(GPIO_FN_LCDRS, NULL);
+	gpio_request(GPIO_FN_LCDCS, NULL);
+	gpio_request(GPIO_FN_LCDRD, NULL);
+	gpio_request(GPIO_FN_LCDWR, NULL);
+	gpio_request(GPIO_PTH2, NULL); /* LCD_DON */
+	gpio_direction_output(GPIO_PTH2, 1);
+#endif
+#ifdef CONFIG_SH_MIGOR_RTA_WVGA /* LCDC - WVGA - Enable RGB Interface signals */
+	gpio_request(GPIO_FN_LCDD15, NULL);
+	gpio_request(GPIO_FN_LCDD14, NULL);
+	gpio_request(GPIO_FN_LCDD13, NULL);
+	gpio_request(GPIO_FN_LCDD12, NULL);
+	gpio_request(GPIO_FN_LCDD11, NULL);
+	gpio_request(GPIO_FN_LCDD10, NULL);
+	gpio_request(GPIO_FN_LCDD9, NULL);
+	gpio_request(GPIO_FN_LCDD8, NULL);
+	gpio_request(GPIO_FN_LCDD7, NULL);
+	gpio_request(GPIO_FN_LCDD6, NULL);
+	gpio_request(GPIO_FN_LCDD5, NULL);
+	gpio_request(GPIO_FN_LCDD4, NULL);
+	gpio_request(GPIO_FN_LCDD3, NULL);
+	gpio_request(GPIO_FN_LCDD2, NULL);
+	gpio_request(GPIO_FN_LCDD1, NULL);
+	gpio_request(GPIO_FN_LCDD0, NULL);
+	gpio_request(GPIO_FN_LCDLCLK, NULL);
+	gpio_request(GPIO_FN_LCDDCK, NULL);
+	gpio_request(GPIO_FN_LCDVEPWC, NULL);
+	gpio_request(GPIO_FN_LCDVCPWC, NULL);
+	gpio_request(GPIO_FN_LCDVSYN, NULL);
+	gpio_request(GPIO_FN_LCDHSYN, NULL);
+	gpio_request(GPIO_FN_LCDDISP, NULL);
+	gpio_request(GPIO_FN_LCDDON, NULL);
+#endif
+
+	/* CEU */
 	clk_always_enable("mstp203"); /* CEU */
+	gpio_request(GPIO_FN_VIO_CLK2, NULL);
+	gpio_request(GPIO_FN_VIO_VD2, NULL);
+	gpio_request(GPIO_FN_VIO_HD2, NULL);
+	gpio_request(GPIO_FN_VIO_FLD, NULL);
+	gpio_request(GPIO_FN_VIO_CKO, NULL);
+	gpio_request(GPIO_FN_VIO_D15, NULL);
+	gpio_request(GPIO_FN_VIO_D14, NULL);
+	gpio_request(GPIO_FN_VIO_D13, NULL);
+	gpio_request(GPIO_FN_VIO_D12, NULL);
+	gpio_request(GPIO_FN_VIO_D11, NULL);
+	gpio_request(GPIO_FN_VIO_D10, NULL);
+	gpio_request(GPIO_FN_VIO_D9, NULL);
+	gpio_request(GPIO_FN_VIO_D8, NULL);
+
+	gpio_request(GPIO_PTT3, NULL); /* VIO_RST */
+	gpio_direction_output(GPIO_PTT3, 0);
+	gpio_request(GPIO_PTT2, NULL); /* TV_IN_EN */
+	gpio_direction_output(GPIO_PTT2, 1);
+	gpio_request(GPIO_PTT0, NULL); /* CAM_EN */
+#ifdef CONFIG_SH_MIGOR_RTA_WVGA
+	gpio_direction_output(GPIO_PTT0, 0);
+#else
+	gpio_direction_output(GPIO_PTT0, 1);
+#endif
+	ctrl_outw(ctrl_inw(PORT_MSELCRB) | 0x2000, PORT_MSELCRB); /* D15->D8 */
 
 	platform_resource_setup_memory(&migor_ceu_device, "ceu", 4 << 20);
 
 	i2c_register_board_info(0, migor_i2c_devices,
 				ARRAY_SIZE(migor_i2c_devices));
- 
+
 	return platform_add_devices(migor_devices, ARRAY_SIZE(migor_devices));
 }
 __initcall(migor_devices_setup);
 
 static void __init migor_setup(char **cmdline_p)
 {
-	/* SMC91C111 - Enable IRQ0 */
-	ctrl_outw(ctrl_inw(PORT_PJCR) & ~0x0003, PORT_PJCR);
-
-	/* KEYSC */
-	ctrl_outw(ctrl_inw(PORT_PYCR) & ~0x0fff, PORT_PYCR);
-	ctrl_outw(ctrl_inw(PORT_PZCR) & ~0x0ff0, PORT_PZCR);
-	ctrl_outw(ctrl_inw(PORT_PSELA) & ~0x4100, PORT_PSELA);
-	ctrl_outw(ctrl_inw(PORT_HIZCRA) & ~0x4000, PORT_HIZCRA);
-	ctrl_outw(ctrl_inw(PORT_HIZCRC) & ~0xc000, PORT_HIZCRC);
-
-	/* NAND Flash */
-	ctrl_outw(ctrl_inw(PORT_PXCR) & 0x0fff, PORT_PXCR);
-	ctrl_outl((ctrl_inl(BSC_CS6ABCR) & ~0x00000600) | 0x00000200,
-		  BSC_CS6ABCR);
-
-	/* Touch Panel - Enable IRQ6 */
-	ctrl_outw(ctrl_inw(PORT_PZCR) & ~0xc, PORT_PZCR);
-	ctrl_outw((ctrl_inw(PORT_PSELA) | 0x8000), PORT_PSELA);
-	ctrl_outw((ctrl_inw(PORT_HIZCRC) & ~0x4000), PORT_HIZCRC);
-
-#ifdef CONFIG_SH_MIGOR_RTA_WVGA
-	/* LCDC - WVGA - Enable RGB Interface signals */
-	ctrl_outw(ctrl_inw(PORT_PACR) & ~0x0003, PORT_PACR);
-	ctrl_outw(0x0000, PORT_PHCR);
-	ctrl_outw(0x0000, PORT_PLCR);
-	ctrl_outw(0x0000, PORT_PMCR);
-	ctrl_outw(ctrl_inw(PORT_PRCR) & ~0x000f, PORT_PRCR);
-	ctrl_outw((ctrl_inw(PORT_PSELD) & ~0x000d) | 0x0400, PORT_PSELD);
-	ctrl_outw(ctrl_inw(PORT_MSELCRB) & ~0x0100, PORT_MSELCRB);
-	ctrl_outw(ctrl_inw(PORT_HIZCRA) & ~0x01e0, PORT_HIZCRA);
-#endif
-#ifdef CONFIG_SH_MIGOR_QVGA
-	/* LCDC - QVGA - Enable SYS Interface signals */
-	ctrl_outw(ctrl_inw(PORT_PACR) & ~0x0003, PORT_PACR);
-	ctrl_outw((ctrl_inw(PORT_PHCR) & ~0xcfff) | 0x0010, PORT_PHCR);
-	ctrl_outw(0x0000, PORT_PLCR);
-	ctrl_outw(0x0000, PORT_PMCR);
-	ctrl_outw(ctrl_inw(PORT_PRCR) & ~0x030f, PORT_PRCR);
-	ctrl_outw((ctrl_inw(PORT_PSELD) & ~0x0001) | 0x0420, PORT_PSELD);
-	ctrl_outw(ctrl_inw(PORT_MSELCRB) | 0x0100, PORT_MSELCRB);
-	ctrl_outw(ctrl_inw(PORT_HIZCRA) & ~0x01e0, PORT_HIZCRA);
-#endif
-
-	/* CEU */
-	ctrl_outw((ctrl_inw(PORT_PTCR) & ~0x03c3) | 0x0051, PORT_PTCR);
-	ctrl_outw(ctrl_inw(PORT_PUCR) & ~0x03ff, PORT_PUCR);
-	ctrl_outw(ctrl_inw(PORT_PVCR) & ~0x03ff, PORT_PVCR);
-	ctrl_outw(ctrl_inw(PORT_PWCR) & ~0x3c00, PORT_PWCR);
-	ctrl_outw(ctrl_inw(PORT_PSELC) | 0x0001, PORT_PSELC);
-	ctrl_outw(ctrl_inw(PORT_PSELD) & ~0x2000, PORT_PSELD);
-	ctrl_outw(ctrl_inw(PORT_PSELE) | 0x000f, PORT_PSELE);
-	ctrl_outw(ctrl_inw(PORT_MSELCRB) | 0x2200, PORT_MSELCRB);
-	ctrl_outw(ctrl_inw(PORT_HIZCRA) & ~0x0a00, PORT_HIZCRA);
-	ctrl_outw(ctrl_inw(PORT_HIZCRB) & ~0x0003, PORT_HIZCRB);
 }
 
 static struct sh_machine_vector mv_migor __initmv = {
-- 
GitLab


From 0436ec15aeab7cceb0c1866fefb1edaa9aade28d Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 8 Oct 2008 20:42:10 +0900
Subject: [PATCH 645/895] sh: Add sh7723 pinmux code

This patch adds pinmux and gpio support for the sh7723 processor.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/sh7723.h            |  254 +++
 arch/sh/kernel/cpu/sh4a/Makefile        |    1 +
 arch/sh/kernel/cpu/sh4a/pinmux-sh7723.c | 1909 +++++++++++++++++++++++
 3 files changed, 2164 insertions(+)
 create mode 100644 arch/sh/include/asm/sh7723.h
 create mode 100644 arch/sh/kernel/cpu/sh4a/pinmux-sh7723.c

diff --git a/arch/sh/include/asm/sh7723.h b/arch/sh/include/asm/sh7723.h
new file mode 100644
index 000000000000..9d2f6d7aa938
--- /dev/null
+++ b/arch/sh/include/asm/sh7723.h
@@ -0,0 +1,254 @@
+#ifndef __ASM_SH7723_H__
+#define __ASM_SH7723_H__
+
+enum {
+	/* PTA */
+	GPIO_PTA7, GPIO_PTA6, GPIO_PTA5, GPIO_PTA4,
+	GPIO_PTA3, GPIO_PTA2, GPIO_PTA1, GPIO_PTA0,
+
+	/* PTB */
+	GPIO_PTB7, GPIO_PTB6, GPIO_PTB5, GPIO_PTB4,
+	GPIO_PTB3, GPIO_PTB2, GPIO_PTB1, GPIO_PTB0,
+
+	/* PTC */
+	GPIO_PTC7, GPIO_PTC6, GPIO_PTC5, GPIO_PTC4,
+	GPIO_PTC3, GPIO_PTC2, GPIO_PTC1, GPIO_PTC0,
+
+	/* PTD */
+	GPIO_PTD7, GPIO_PTD6, GPIO_PTD5, GPIO_PTD4,
+	GPIO_PTD3, GPIO_PTD2, GPIO_PTD1, GPIO_PTD0,
+
+	/* PTE */
+	GPIO_PTE5, GPIO_PTE4, GPIO_PTE3, GPIO_PTE2,
+	GPIO_PTE1, GPIO_PTE0,
+
+	/* PTF */
+	GPIO_PTF7, GPIO_PTF6, GPIO_PTF5, GPIO_PTF4,
+	GPIO_PTF3, GPIO_PTF2, GPIO_PTF1, GPIO_PTF0,
+
+	/* PTG */
+	GPIO_PTG5, GPIO_PTG4, GPIO_PTG3, GPIO_PTG2,
+	GPIO_PTG1, GPIO_PTG0,
+
+	/* PTH */
+	GPIO_PTH7, GPIO_PTH6, GPIO_PTH5, GPIO_PTH4,
+	GPIO_PTH3, GPIO_PTH2, GPIO_PTH1, GPIO_PTH0,
+
+	/* PTJ */
+	GPIO_PTJ7, GPIO_PTJ5, GPIO_PTJ3, GPIO_PTJ2,
+	GPIO_PTJ1, GPIO_PTJ0,
+
+	/* PTK */
+	GPIO_PTK7, GPIO_PTK6, GPIO_PTK5, GPIO_PTK4,
+	GPIO_PTK3, GPIO_PTK2, GPIO_PTK1, GPIO_PTK0,
+
+	/* PTL */
+	GPIO_PTL7, GPIO_PTL6, GPIO_PTL5, GPIO_PTL4,
+	GPIO_PTL3, GPIO_PTL2, GPIO_PTL1, GPIO_PTL0,
+
+	/* PTM */
+	GPIO_PTM7, GPIO_PTM6, GPIO_PTM5, GPIO_PTM4,
+	GPIO_PTM3, GPIO_PTM2, GPIO_PTM1, GPIO_PTM0,
+
+	/* PTN */
+	GPIO_PTN7, GPIO_PTN6, GPIO_PTN5, GPIO_PTN4,
+	GPIO_PTN3, GPIO_PTN2, GPIO_PTN1, GPIO_PTN0,
+
+	/* PTQ */
+	GPIO_PTQ3, GPIO_PTQ2, GPIO_PTQ1, GPIO_PTQ0,
+
+	/* PTR */
+	GPIO_PTR7, GPIO_PTR6, GPIO_PTR5, GPIO_PTR4,
+	GPIO_PTR3, GPIO_PTR2, GPIO_PTR1, GPIO_PTR0,
+
+	/* PTS */
+	GPIO_PTS7, GPIO_PTS6, GPIO_PTS5, GPIO_PTS4,
+	GPIO_PTS3, GPIO_PTS2, GPIO_PTS1, GPIO_PTS0,
+
+	/* PTT */
+	GPIO_PTT5, GPIO_PTT4, GPIO_PTT3, GPIO_PTT2,
+	GPIO_PTT1, GPIO_PTT0,
+
+	/* PTU */
+	GPIO_PTU5, GPIO_PTU4, GPIO_PTU3, GPIO_PTU2,
+	GPIO_PTU1, GPIO_PTU0,
+
+	/* PTV */
+	GPIO_PTV7, GPIO_PTV6, GPIO_PTV5, GPIO_PTV4,
+	GPIO_PTV3, GPIO_PTV2, GPIO_PTV1, GPIO_PTV0,
+
+	/* PTW */
+	GPIO_PTW7, GPIO_PTW6, GPIO_PTW5, GPIO_PTW4,
+	GPIO_PTW3, GPIO_PTW2, GPIO_PTW1, GPIO_PTW0,
+
+	/* PTX */
+	GPIO_PTX7, GPIO_PTX6, GPIO_PTX5, GPIO_PTX4,
+	GPIO_PTX3, GPIO_PTX2, GPIO_PTX1, GPIO_PTX0,
+
+	/* PTY */
+	GPIO_PTY7, GPIO_PTY6, GPIO_PTY5, GPIO_PTY4,
+	GPIO_PTY3, GPIO_PTY2, GPIO_PTY1, GPIO_PTY0,
+
+	/* PTZ */
+	GPIO_PTZ7, GPIO_PTZ6, GPIO_PTZ5, GPIO_PTZ4,
+	GPIO_PTZ3, GPIO_PTZ2, GPIO_PTZ1, GPIO_PTZ0,
+
+	/* SCIF0 (SCIF: 3 pin PTT/PTU) */
+	GPIO_FN_SCIF0_PTT_TXD, GPIO_FN_SCIF0_PTT_RXD, GPIO_FN_SCIF0_PTT_SCK,
+	GPIO_FN_SCIF0_PTU_TXD, GPIO_FN_SCIF0_PTU_RXD, GPIO_FN_SCIF0_PTU_SCK,
+
+	/* SCIF1 (SCIF: 3 pin PTS/PTV) */
+	GPIO_FN_SCIF1_PTS_TXD, GPIO_FN_SCIF1_PTS_RXD, GPIO_FN_SCIF1_PTS_SCK,
+	GPIO_FN_SCIF1_PTV_TXD, GPIO_FN_SCIF1_PTV_RXD, GPIO_FN_SCIF1_PTV_SCK,
+
+	/* SCIF2 (SCIF: 3 pin PTT/PTU) */
+	GPIO_FN_SCIF2_PTT_TXD, GPIO_FN_SCIF2_PTT_RXD, GPIO_FN_SCIF2_PTT_SCK,
+	GPIO_FN_SCIF2_PTU_TXD, GPIO_FN_SCIF2_PTU_RXD, GPIO_FN_SCIF2_PTU_SCK,
+
+	/* SCIF3 (SCIFA: 5 pin PTS/PTV) */
+	GPIO_FN_SCIF3_PTS_TXD, GPIO_FN_SCIF3_PTS_RXD, GPIO_FN_SCIF3_PTS_SCK,
+	GPIO_FN_SCIF3_PTS_RTS, GPIO_FN_SCIF3_PTS_CTS,
+	GPIO_FN_SCIF3_PTV_TXD, GPIO_FN_SCIF3_PTV_RXD, GPIO_FN_SCIF3_PTV_SCK,
+	GPIO_FN_SCIF3_PTV_RTS, GPIO_FN_SCIF3_PTV_CTS,
+
+	/* SCIF4 (SCIFA: 3 pin PTE/PTN) */
+	GPIO_FN_SCIF4_PTE_TXD, GPIO_FN_SCIF4_PTE_RXD, GPIO_FN_SCIF4_PTE_SCK,
+	GPIO_FN_SCIF4_PTN_TXD, GPIO_FN_SCIF4_PTN_RXD, GPIO_FN_SCIF4_PTN_SCK,
+
+	/* SCIF5 (SCIFA: 3 pin PTE/PTN) */
+	GPIO_FN_SCIF5_PTE_TXD, GPIO_FN_SCIF5_PTE_RXD, GPIO_FN_SCIF5_PTE_SCK,
+	GPIO_FN_SCIF5_PTN_TXD, GPIO_FN_SCIF5_PTN_RXD, GPIO_FN_SCIF5_PTN_SCK,
+
+	/* CEU */
+	GPIO_FN_VIO_D15, GPIO_FN_VIO_D14, GPIO_FN_VIO_D13, GPIO_FN_VIO_D12,
+	GPIO_FN_VIO_D11, GPIO_FN_VIO_D10, GPIO_FN_VIO_D9, GPIO_FN_VIO_D8,
+	GPIO_FN_VIO_D7, GPIO_FN_VIO_D6, GPIO_FN_VIO_D5, GPIO_FN_VIO_D4,
+	GPIO_FN_VIO_D3, GPIO_FN_VIO_D2, GPIO_FN_VIO_D1, GPIO_FN_VIO_D0,
+	GPIO_FN_VIO_FLD, GPIO_FN_VIO_CKO,
+	GPIO_FN_VIO_VD1, GPIO_FN_VIO_HD1, GPIO_FN_VIO_CLK1,
+	GPIO_FN_VIO_VD2, GPIO_FN_VIO_HD2, GPIO_FN_VIO_CLK2,
+
+	/* LCDC */
+	GPIO_FN_LCDD23, GPIO_FN_LCDD22, GPIO_FN_LCDD21, GPIO_FN_LCDD20,
+	GPIO_FN_LCDD19, GPIO_FN_LCDD18, GPIO_FN_LCDD17, GPIO_FN_LCDD16,
+	GPIO_FN_LCDD15, GPIO_FN_LCDD14, GPIO_FN_LCDD13, GPIO_FN_LCDD12,
+	GPIO_FN_LCDD11, GPIO_FN_LCDD10, GPIO_FN_LCDD9, GPIO_FN_LCDD8,
+	GPIO_FN_LCDD7, GPIO_FN_LCDD6, GPIO_FN_LCDD5, GPIO_FN_LCDD4,
+	GPIO_FN_LCDD3, GPIO_FN_LCDD2, GPIO_FN_LCDD1, GPIO_FN_LCDD0,
+	GPIO_FN_LCDLCLK_PTR, GPIO_FN_LCDLCLK_PTW,
+	/* Main LCD */
+	GPIO_FN_LCDDON, GPIO_FN_LCDVCPWC, GPIO_FN_LCDVEPWC, GPIO_FN_LCDVSYN,
+	/* Main LCD - RGB Mode */
+	GPIO_FN_LCDDCK, GPIO_FN_LCDHSYN, GPIO_FN_LCDDISP,
+	/* Main LCD - SYS Mode */
+	GPIO_FN_LCDRS, GPIO_FN_LCDCS, GPIO_FN_LCDWR, GPIO_FN_LCDRD,
+
+	/* IRQ */
+	GPIO_FN_IRQ0, GPIO_FN_IRQ1, GPIO_FN_IRQ2, GPIO_FN_IRQ3,
+	GPIO_FN_IRQ4, GPIO_FN_IRQ5, GPIO_FN_IRQ6, GPIO_FN_IRQ7,
+
+	/* AUD */
+	GPIO_FN_AUDATA3, GPIO_FN_AUDATA2, GPIO_FN_AUDATA1, GPIO_FN_AUDATA0,
+	GPIO_FN_AUDCK, GPIO_FN_AUDSYNC,
+
+	/* SDHI0 (PTD) */
+	GPIO_FN_SDHI0CD_PTD, GPIO_FN_SDHI0WP_PTD,
+	GPIO_FN_SDHI0D3_PTD, GPIO_FN_SDHI0D2_PTD,
+	GPIO_FN_SDHI0D1_PTD, GPIO_FN_SDHI0D0_PTD,
+	GPIO_FN_SDHI0CMD_PTD, GPIO_FN_SDHI0CLK_PTD,
+
+	/* SDHI0 (PTS) */
+	GPIO_FN_SDHI0CD_PTS, GPIO_FN_SDHI0WP_PTS,
+	GPIO_FN_SDHI0D3_PTS, GPIO_FN_SDHI0D2_PTS,
+	GPIO_FN_SDHI0D1_PTS, GPIO_FN_SDHI0D0_PTS,
+	GPIO_FN_SDHI0CMD_PTS, GPIO_FN_SDHI0CLK_PTS,
+
+	/* SDHI1 */
+	GPIO_FN_SDHI1CD, GPIO_FN_SDHI1WP, GPIO_FN_SDHI1D3, GPIO_FN_SDHI1D2,
+	GPIO_FN_SDHI1D1, GPIO_FN_SDHI1D0, GPIO_FN_SDHI1CMD, GPIO_FN_SDHI1CLK,
+
+	/* SIUA */
+	GPIO_FN_SIUAFCK, GPIO_FN_SIUAILR, GPIO_FN_SIUAIBT, GPIO_FN_SIUAISLD,
+	GPIO_FN_SIUAOLR, GPIO_FN_SIUAOBT, GPIO_FN_SIUAOSLD, GPIO_FN_SIUAMCK,
+	GPIO_FN_SIUAISPD, GPIO_FN_SIUOSPD,
+
+	/* SIUB */
+	GPIO_FN_SIUBFCK, GPIO_FN_SIUBILR, GPIO_FN_SIUBIBT, GPIO_FN_SIUBISLD,
+	GPIO_FN_SIUBOLR, GPIO_FN_SIUBOBT, GPIO_FN_SIUBOSLD, GPIO_FN_SIUBMCK,
+
+	/* IRDA */
+	GPIO_FN_IRDA_IN, GPIO_FN_IRDA_OUT,
+
+	/* VOU */
+	GPIO_FN_DV_CLKI, GPIO_FN_DV_CLK, GPIO_FN_DV_HSYNC, GPIO_FN_DV_VSYNC,
+	GPIO_FN_DV_D15, GPIO_FN_DV_D14, GPIO_FN_DV_D13, GPIO_FN_DV_D12,
+	GPIO_FN_DV_D11, GPIO_FN_DV_D10, GPIO_FN_DV_D9, GPIO_FN_DV_D8,
+	GPIO_FN_DV_D7, GPIO_FN_DV_D6, GPIO_FN_DV_D5, GPIO_FN_DV_D4,
+	GPIO_FN_DV_D3, GPIO_FN_DV_D2, GPIO_FN_DV_D1, GPIO_FN_DV_D0,
+
+	/* KEYSC */
+	GPIO_FN_KEYIN0, GPIO_FN_KEYIN1, GPIO_FN_KEYIN2, GPIO_FN_KEYIN3,
+	GPIO_FN_KEYIN4, GPIO_FN_KEYOUT0, GPIO_FN_KEYOUT1, GPIO_FN_KEYOUT2,
+	GPIO_FN_KEYOUT3, GPIO_FN_KEYOUT4_IN6, GPIO_FN_KEYOUT5_IN5,
+
+	/* MSIOF0 (PTF) */
+	GPIO_FN_MSIOF0_PTF_TXD, GPIO_FN_MSIOF0_PTF_RXD, GPIO_FN_MSIOF0_PTF_MCK,
+	GPIO_FN_MSIOF0_PTF_TSYNC, GPIO_FN_MSIOF0_PTF_TSCK,
+	GPIO_FN_MSIOF0_PTF_RSYNC, GPIO_FN_MSIOF0_PTF_RSCK,
+	GPIO_FN_MSIOF0_PTF_SS1, GPIO_FN_MSIOF0_PTF_SS2,
+
+	/* MSIOF0 (PTT+PTX) */
+	GPIO_FN_MSIOF0_PTT_TXD, GPIO_FN_MSIOF0_PTT_RXD, GPIO_FN_MSIOF0_PTX_MCK,
+	GPIO_FN_MSIOF0_PTT_TSYNC, GPIO_FN_MSIOF0_PTT_TSCK,
+	GPIO_FN_MSIOF0_PTT_RSYNC, GPIO_FN_MSIOF0_PTT_RSCK,
+	GPIO_FN_MSIOF0_PTT_SS1, GPIO_FN_MSIOF0_PTT_SS2,
+
+	/* MSIOF1 */
+	GPIO_FN_MSIOF1_TXD, GPIO_FN_MSIOF1_RXD, GPIO_FN_MSIOF1_MCK,
+	GPIO_FN_MSIOF1_TSYNC, GPIO_FN_MSIOF1_TSCK,
+	GPIO_FN_MSIOF1_RSYNC, GPIO_FN_MSIOF1_RSCK,
+	GPIO_FN_MSIOF1_SS1, GPIO_FN_MSIOF1_SS2,
+
+	/* TSIF */
+	GPIO_FN_TS0_SDAT, GPIO_FN_TS0_SCK, GPIO_FN_TS0_SDEN, GPIO_FN_TS0_SPSYNC,
+
+	/* FLCTL */
+	GPIO_FN_FCE, GPIO_FN_NAF7, GPIO_FN_NAF6, GPIO_FN_NAF5, GPIO_FN_NAF4,
+	GPIO_FN_NAF3, GPIO_FN_NAF2, GPIO_FN_NAF1, GPIO_FN_NAF0, GPIO_FN_FCDE,
+	GPIO_FN_FOE, GPIO_FN_FSC, GPIO_FN_FWE, GPIO_FN_FRB,
+
+	/* DMAC */
+	GPIO_FN_DACK1, GPIO_FN_DREQ1, GPIO_FN_DACK0, GPIO_FN_DREQ0,
+
+	/* ADC */
+	GPIO_FN_AN3, GPIO_FN_AN2, GPIO_FN_AN1, GPIO_FN_AN0, GPIO_FN_ADTRG,
+
+	/* CPG */
+	GPIO_FN_STATUS0, GPIO_FN_PDSTATUS,
+
+	/* TPU */
+	GPIO_FN_TPUTO3, GPIO_FN_TPUTO2, GPIO_FN_TPUTO1, GPIO_FN_TPUTO0,
+
+	/* BSC */
+	GPIO_FN_D31, GPIO_FN_D30, GPIO_FN_D29, GPIO_FN_D28,
+	GPIO_FN_D27, GPIO_FN_D26, GPIO_FN_D25, GPIO_FN_D24,
+	GPIO_FN_D23, GPIO_FN_D22, GPIO_FN_D21, GPIO_FN_D20,
+	GPIO_FN_D19, GPIO_FN_D18, GPIO_FN_D17, GPIO_FN_D16,
+	GPIO_FN_IOIS16, GPIO_FN_WAIT, GPIO_FN_BS,
+	GPIO_FN_A25, GPIO_FN_A24, GPIO_FN_A23, GPIO_FN_A22,
+	GPIO_FN_CS6B_CE1B, GPIO_FN_CS6A_CE2B,
+	GPIO_FN_CS5B_CE1A, GPIO_FN_CS5A_CE2A,
+	GPIO_FN_WE3_ICIOWR, GPIO_FN_WE2_ICIORD,
+
+	/* ATAPI */
+	GPIO_FN_IDED15, GPIO_FN_IDED14, GPIO_FN_IDED13, GPIO_FN_IDED12,
+	GPIO_FN_IDED11, GPIO_FN_IDED10, GPIO_FN_IDED9, GPIO_FN_IDED8,
+	GPIO_FN_IDED7, GPIO_FN_IDED6, GPIO_FN_IDED5, GPIO_FN_IDED4,
+	GPIO_FN_IDED3, GPIO_FN_IDED2, GPIO_FN_IDED1, GPIO_FN_IDED0,
+	GPIO_FN_DIRECTION, GPIO_FN_EXBUF_ENB, GPIO_FN_IDERST, GPIO_FN_IODACK,
+	GPIO_FN_IODREQ, GPIO_FN_IDEIORDY, GPIO_FN_IDEINT, GPIO_FN_IDEIOWR,
+	GPIO_FN_IDEIORD, GPIO_FN_IDECS1, GPIO_FN_IDECS0, GPIO_FN_IDEA2,
+	GPIO_FN_IDEA1, GPIO_FN_IDEA0,
+};
+
+#endif /* __ASM_SH7723_H__ */
diff --git a/arch/sh/kernel/cpu/sh4a/Makefile b/arch/sh/kernel/cpu/sh4a/Makefile
index aced813b03ca..be9a0c185958 100644
--- a/arch/sh/kernel/cpu/sh4a/Makefile
+++ b/arch/sh/kernel/cpu/sh4a/Makefile
@@ -29,6 +29,7 @@ clock-$(CONFIG_CPU_SUBTYPE_SHX3)	:= clock-shx3.o
 
 # Pinmux setup
 pinmux-$(CONFIG_CPU_SUBTYPE_SH7722)	:= pinmux-sh7722.o
+pinmux-$(CONFIG_CPU_SUBTYPE_SH7723)	:= pinmux-sh7723.o
 
 obj-y			+= $(clock-y)
 obj-$(CONFIG_SMP)	+= $(smp-y)
diff --git a/arch/sh/kernel/cpu/sh4a/pinmux-sh7723.c b/arch/sh/kernel/cpu/sh4a/pinmux-sh7723.c
new file mode 100644
index 000000000000..92a9bc91015f
--- /dev/null
+++ b/arch/sh/kernel/cpu/sh4a/pinmux-sh7723.c
@@ -0,0 +1,1909 @@
+/*
+ * SH7723 Pinmux
+ *
+ *  Copyright (C) 2008  Magnus Damm
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/gpio.h>
+#include <asm/sh7723.h>
+
+enum {
+	PINMUX_RESERVED = 0,
+
+	PINMUX_DATA_BEGIN,
+	PTA7_DATA, PTA6_DATA, PTA5_DATA, PTA4_DATA,
+	PTA3_DATA, PTA2_DATA, PTA1_DATA, PTA0_DATA,
+	PTB7_DATA, PTB6_DATA, PTB5_DATA, PTB4_DATA,
+	PTB3_DATA, PTB2_DATA, PTB1_DATA, PTB0_DATA,
+	PTC7_DATA, PTC6_DATA, PTC5_DATA, PTC4_DATA,
+	PTC3_DATA, PTC2_DATA, PTC1_DATA, PTC0_DATA,
+	PTD7_DATA, PTD6_DATA, PTD5_DATA, PTD4_DATA,
+	PTD3_DATA, PTD2_DATA, PTD1_DATA, PTD0_DATA,
+	PTE5_DATA, PTE4_DATA, PTE3_DATA, PTE2_DATA, PTE1_DATA, PTE0_DATA,
+	PTF7_DATA, PTF6_DATA, PTF5_DATA, PTF4_DATA,
+	PTF3_DATA, PTF2_DATA, PTF1_DATA, PTF0_DATA,
+	PTG5_DATA, PTG4_DATA, PTG3_DATA, PTG2_DATA, PTG1_DATA, PTG0_DATA,
+	PTH7_DATA, PTH6_DATA, PTH5_DATA, PTH4_DATA,
+	PTH3_DATA, PTH2_DATA, PTH1_DATA, PTH0_DATA,
+	PTJ7_DATA, PTJ5_DATA, PTJ3_DATA, PTJ2_DATA, PTJ1_DATA, PTJ0_DATA,
+	PTK7_DATA, PTK6_DATA, PTK5_DATA, PTK4_DATA,
+	PTK3_DATA, PTK2_DATA, PTK1_DATA, PTK0_DATA,
+	PTL7_DATA, PTL6_DATA, PTL5_DATA, PTL4_DATA,
+	PTL3_DATA, PTL2_DATA, PTL1_DATA, PTL0_DATA,
+	PTM7_DATA, PTM6_DATA, PTM5_DATA, PTM4_DATA,
+	PTM3_DATA, PTM2_DATA, PTM1_DATA, PTM0_DATA,
+	PTN7_DATA, PTN6_DATA, PTN5_DATA, PTN4_DATA,
+	PTN3_DATA, PTN2_DATA, PTN1_DATA, PTN0_DATA,
+	PTQ3_DATA, PTQ2_DATA, PTQ1_DATA, PTQ0_DATA,
+	PTR7_DATA, PTR6_DATA, PTR5_DATA, PTR4_DATA,
+	PTR3_DATA, PTR2_DATA, PTR1_DATA, PTR0_DATA,
+	PTS7_DATA, PTS6_DATA, PTS5_DATA, PTS4_DATA,
+	PTS3_DATA, PTS2_DATA, PTS1_DATA, PTS0_DATA,
+	PTT5_DATA, PTT4_DATA, PTT3_DATA, PTT2_DATA, PTT1_DATA, PTT0_DATA,
+	PTU5_DATA, PTU4_DATA, PTU3_DATA, PTU2_DATA, PTU1_DATA, PTU0_DATA,
+	PTV7_DATA, PTV6_DATA, PTV5_DATA, PTV4_DATA,
+	PTV3_DATA, PTV2_DATA, PTV1_DATA, PTV0_DATA,
+	PTW7_DATA, PTW6_DATA, PTW5_DATA, PTW4_DATA,
+	PTW3_DATA, PTW2_DATA, PTW1_DATA, PTW0_DATA,
+	PTX7_DATA, PTX6_DATA, PTX5_DATA, PTX4_DATA,
+	PTX3_DATA, PTX2_DATA, PTX1_DATA, PTX0_DATA,
+	PTY7_DATA, PTY6_DATA, PTY5_DATA, PTY4_DATA,
+	PTY3_DATA, PTY2_DATA, PTY1_DATA, PTY0_DATA,
+	PTZ7_DATA, PTZ6_DATA, PTZ5_DATA, PTZ4_DATA,
+	PTZ3_DATA, PTZ2_DATA, PTZ1_DATA, PTZ0_DATA,
+	PINMUX_DATA_END,
+
+	PINMUX_INPUT_BEGIN,
+	PTA7_IN, PTA6_IN, PTA5_IN, PTA4_IN,
+	PTA3_IN, PTA2_IN, PTA1_IN, PTA0_IN,
+	PTB7_IN, PTB6_IN, PTB5_IN, PTB4_IN,
+	PTB3_IN, PTB2_IN, PTB1_IN, PTB0_IN,
+	PTC7_IN, PTC6_IN, PTC5_IN, PTC4_IN,
+	PTC3_IN, PTC2_IN, PTC1_IN, PTC0_IN,
+	PTD7_IN, PTD6_IN, PTD5_IN, PTD4_IN,
+	PTD3_IN, PTD2_IN, PTD1_IN, PTD0_IN,
+	PTE5_IN, PTE4_IN, PTE3_IN, PTE2_IN, PTE1_IN, PTE0_IN,
+	PTF7_IN, PTF6_IN, PTF5_IN, PTF4_IN,
+	PTF3_IN, PTF2_IN, PTF1_IN, PTF0_IN,
+	PTH7_IN, PTH6_IN, PTH5_IN, PTH4_IN,
+	PTH3_IN, PTH2_IN, PTH1_IN, PTH0_IN,
+	PTJ3_IN, PTJ2_IN, PTJ1_IN, PTJ0_IN,
+	PTK7_IN, PTK6_IN, PTK5_IN, PTK4_IN,
+	PTK3_IN, PTK2_IN, PTK1_IN, PTK0_IN,
+	PTL7_IN, PTL6_IN, PTL5_IN, PTL4_IN,
+	PTL3_IN, PTL2_IN, PTL1_IN, PTL0_IN,
+	PTM7_IN, PTM6_IN, PTM5_IN, PTM4_IN,
+	PTM3_IN, PTM2_IN, PTM1_IN, PTM0_IN,
+	PTN7_IN, PTN6_IN, PTN5_IN, PTN4_IN,
+	PTN3_IN, PTN2_IN, PTN1_IN, PTN0_IN,
+	PTQ3_IN, PTQ2_IN, PTQ1_IN, PTQ0_IN,
+	PTR7_IN, PTR6_IN, PTR5_IN, PTR4_IN,
+	PTR3_IN, PTR2_IN, PTR1_IN, PTR0_IN,
+	PTS7_IN, PTS6_IN, PTS5_IN, PTS4_IN,
+	PTS3_IN, PTS2_IN, PTS1_IN, PTS0_IN,
+	PTT5_IN, PTT4_IN, PTT3_IN, PTT2_IN, PTT1_IN, PTT0_IN,
+	PTU5_IN, PTU4_IN, PTU3_IN, PTU2_IN, PTU1_IN, PTU0_IN,
+	PTV7_IN, PTV6_IN, PTV5_IN, PTV4_IN,
+	PTV3_IN, PTV2_IN, PTV1_IN, PTV0_IN,
+	PTW7_IN, PTW6_IN, PTW5_IN, PTW4_IN,
+	PTW3_IN, PTW2_IN, PTW1_IN, PTW0_IN,
+	PTX7_IN, PTX6_IN, PTX5_IN, PTX4_IN,
+	PTX3_IN, PTX2_IN, PTX1_IN, PTX0_IN,
+	PTY7_IN, PTY6_IN, PTY5_IN, PTY4_IN,
+	PTY3_IN, PTY2_IN, PTY1_IN, PTY0_IN,
+	PTZ7_IN, PTZ6_IN, PTZ5_IN, PTZ4_IN,
+	PTZ3_IN, PTZ2_IN, PTZ1_IN, PTZ0_IN,
+	PINMUX_INPUT_END,
+
+	PINMUX_INPUT_PULLUP_BEGIN,
+	PTA4_IN_PU, PTA3_IN_PU, PTA2_IN_PU, PTA1_IN_PU, PTA0_IN_PU,
+	PTB2_IN_PU, PTB1_IN_PU,
+	PTR2_IN_PU,
+	PINMUX_INPUT_PULLUP_END,
+
+	PINMUX_OUTPUT_BEGIN,
+	PTA7_OUT, PTA6_OUT, PTA5_OUT, PTA4_OUT,
+	PTA3_OUT, PTA2_OUT, PTA1_OUT, PTA0_OUT,
+	PTB7_OUT, PTB6_OUT, PTB5_OUT, PTB4_OUT,
+	PTB3_OUT, PTB2_OUT, PTB1_OUT, PTB0_OUT,
+	PTC7_OUT, PTC6_OUT, PTC5_OUT, PTC4_OUT,
+	PTC3_OUT, PTC2_OUT, PTC1_OUT, PTC0_OUT,
+	PTD7_OUT, PTD6_OUT, PTD5_OUT, PTD4_OUT,
+	PTD3_OUT, PTD2_OUT, PTD1_OUT, PTD0_OUT,
+	PTE5_OUT, PTE4_OUT, PTE3_OUT, PTE2_OUT, PTE1_OUT, PTE0_OUT,
+	PTF7_OUT, PTF6_OUT, PTF5_OUT, PTF4_OUT,
+	PTF3_OUT, PTF2_OUT, PTF1_OUT, PTF0_OUT,
+	PTG5_OUT, PTG4_OUT, PTG3_OUT, PTG2_OUT, PTG1_OUT, PTG0_OUT,
+	PTH7_OUT, PTH6_OUT, PTH5_OUT, PTH4_OUT,
+	PTH3_OUT, PTH2_OUT, PTH1_OUT, PTH0_OUT,
+	PTJ7_OUT, PTJ5_OUT, PTJ3_OUT, PTJ2_OUT, PTJ1_OUT, PTJ0_OUT,
+	PTK7_OUT, PTK6_OUT, PTK5_OUT, PTK4_OUT,
+	PTK3_OUT, PTK2_OUT, PTK1_OUT, PTK0_OUT,
+	PTL7_OUT, PTL6_OUT, PTL5_OUT, PTL4_OUT,
+	PTL3_OUT, PTL2_OUT, PTL1_OUT, PTL0_OUT,
+	PTM7_OUT, PTM6_OUT, PTM5_OUT, PTM4_OUT,
+	PTM3_OUT, PTM2_OUT, PTM1_OUT, PTM0_OUT,
+	PTN7_OUT, PTN6_OUT, PTN5_OUT, PTN4_OUT,
+	PTN3_OUT, PTN2_OUT, PTN1_OUT, PTN0_OUT,
+	PTR7_OUT, PTR6_OUT, PTR5_OUT, PTR4_OUT,
+	PTR1_OUT, PTR0_OUT,
+	PTS7_OUT, PTS6_OUT, PTS5_OUT, PTS4_OUT,
+	PTS3_OUT, PTS2_OUT, PTS1_OUT, PTS0_OUT,
+	PTT5_OUT, PTT4_OUT, PTT3_OUT, PTT2_OUT, PTT1_OUT, PTT0_OUT,
+	PTU5_OUT, PTU4_OUT, PTU3_OUT, PTU2_OUT, PTU1_OUT, PTU0_OUT,
+	PTV7_OUT, PTV6_OUT, PTV5_OUT, PTV4_OUT,
+	PTV3_OUT, PTV2_OUT, PTV1_OUT, PTV0_OUT,
+	PTW7_OUT, PTW6_OUT, PTW5_OUT, PTW4_OUT,
+	PTW3_OUT, PTW2_OUT, PTW1_OUT, PTW0_OUT,
+	PTX7_OUT, PTX6_OUT, PTX5_OUT, PTX4_OUT,
+	PTX3_OUT, PTX2_OUT, PTX1_OUT, PTX0_OUT,
+	PTY7_OUT, PTY6_OUT, PTY5_OUT, PTY4_OUT,
+	PTY3_OUT, PTY2_OUT, PTY1_OUT, PTY0_OUT,
+	PTZ7_OUT, PTZ6_OUT, PTZ5_OUT, PTZ4_OUT,
+	PTZ3_OUT, PTZ2_OUT, PTZ1_OUT, PTZ0_OUT,
+	PINMUX_OUTPUT_END,
+
+	PINMUX_FUNCTION_BEGIN,
+	PTA7_FN, PTA6_FN, PTA5_FN, PTA4_FN,
+	PTA3_FN, PTA2_FN, PTA1_FN, PTA0_FN,
+	PTB7_FN, PTB6_FN, PTB5_FN, PTB4_FN,
+	PTB3_FN, PTB2_FN, PTB1_FN, PTB0_FN,
+	PTC7_FN, PTC6_FN, PTC5_FN, PTC4_FN,
+	PTC3_FN, PTC2_FN, PTC1_FN, PTC0_FN,
+	PTD7_FN, PTD6_FN, PTD5_FN, PTD4_FN,
+	PTD3_FN, PTD2_FN, PTD1_FN, PTD0_FN,
+	PTE5_FN, PTE4_FN, PTE3_FN, PTE2_FN, PTE1_FN, PTE0_FN,
+	PTF7_FN, PTF6_FN, PTF5_FN, PTF4_FN,
+	PTF3_FN, PTF2_FN, PTF1_FN, PTF0_FN,
+	PTG5_FN, PTG4_FN, PTG3_FN, PTG2_FN, PTG1_FN, PTG0_FN,
+	PTH7_FN, PTH6_FN, PTH5_FN, PTH4_FN,
+	PTH3_FN, PTH2_FN, PTH1_FN, PTH0_FN,
+	PTJ7_FN, PTJ5_FN, PTJ3_FN, PTJ2_FN, PTJ1_FN, PTJ0_FN,
+	PTK7_FN, PTK6_FN, PTK5_FN, PTK4_FN,
+	PTK3_FN, PTK2_FN, PTK1_FN, PTK0_FN,
+	PTL7_FN, PTL6_FN, PTL5_FN, PTL4_FN,
+	PTL3_FN, PTL2_FN, PTL1_FN, PTL0_FN,
+	PTM7_FN, PTM6_FN, PTM5_FN, PTM4_FN,
+	PTM3_FN, PTM2_FN, PTM1_FN, PTM0_FN,
+	PTN7_FN, PTN6_FN, PTN5_FN, PTN4_FN,
+	PTN3_FN, PTN2_FN, PTN1_FN, PTN0_FN,
+	PTQ3_FN, PTQ2_FN, PTQ1_FN, PTQ0_FN,
+	PTR7_FN, PTR6_FN, PTR5_FN, PTR4_FN,
+	PTR3_FN, PTR2_FN, PTR1_FN, PTR0_FN,
+	PTS7_FN, PTS6_FN, PTS5_FN, PTS4_FN,
+	PTS3_FN, PTS2_FN, PTS1_FN, PTS0_FN,
+	PTT5_FN, PTT4_FN, PTT3_FN, PTT2_FN, PTT1_FN, PTT0_FN,
+	PTU5_FN, PTU4_FN, PTU3_FN, PTU2_FN, PTU1_FN, PTU0_FN,
+	PTV7_FN, PTV6_FN, PTV5_FN, PTV4_FN,
+	PTV3_FN, PTV2_FN, PTV1_FN, PTV0_FN,
+	PTW7_FN, PTW6_FN, PTW5_FN, PTW4_FN,
+	PTW3_FN, PTW2_FN, PTW1_FN, PTW0_FN,
+	PTX7_FN, PTX6_FN, PTX5_FN, PTX4_FN,
+	PTX3_FN, PTX2_FN, PTX1_FN, PTX0_FN,
+	PTY7_FN, PTY6_FN, PTY5_FN, PTY4_FN,
+	PTY3_FN, PTY2_FN, PTY1_FN, PTY0_FN,
+	PTZ7_FN, PTZ6_FN, PTZ5_FN, PTZ4_FN,
+	PTZ3_FN, PTZ2_FN, PTZ1_FN, PTZ0_FN,
+
+
+	PSA15_PSA14_FN1, PSA15_PSA14_FN2,
+	PSA13_PSA12_FN1, PSA13_PSA12_FN2,
+	PSA11_PSA10_FN1, PSA11_PSA10_FN2,
+	PSA5_PSA4_FN1, PSA5_PSA4_FN2, PSA5_PSA4_FN3,
+	PSA3_PSA2_FN1, PSA3_PSA2_FN2,
+	PSB15_PSB14_FN1, PSB15_PSB14_FN2,
+	PSB13_PSB12_LCDC_RGB, PSB13_PSB12_LCDC_SYS,
+	PSB9_PSB8_FN1, PSB9_PSB8_FN2, PSB9_PSB8_FN3,
+	PSB7_PSB6_FN1, PSB7_PSB6_FN2,
+	PSB5_PSB4_FN1, PSB5_PSB4_FN2,
+	PSB3_PSB2_FN1, PSB3_PSB2_FN2,
+	PSC15_PSC14_FN1, PSC15_PSC14_FN2,
+	PSC13_PSC12_FN1, PSC13_PSC12_FN2,
+	PSC11_PSC10_FN1, PSC11_PSC10_FN2, PSC11_PSC10_FN3,
+	PSC9_PSC8_FN1, PSC9_PSC8_FN2,
+	PSC7_PSC6_FN1, PSC7_PSC6_FN2, PSC7_PSC6_FN3,
+	PSD15_PSD14_FN1, PSD15_PSD14_FN2,
+	PSD13_PSD12_FN1, PSD13_PSD12_FN2,
+	PSD11_PSD10_FN1, PSD11_PSD10_FN2, PSD11_PSD10_FN3,
+	PSD9_PSD8_FN1, PSD9_PSD8_FN2,
+	PSD7_PSD6_FN1, PSD7_PSD6_FN2,
+	PSD5_PSD4_FN1, PSD5_PSD4_FN2,
+	PSD3_PSD2_FN1, PSD3_PSD2_FN2,
+	PSD1_PSD0_FN1, PSD1_PSD0_FN2,
+	PINMUX_FUNCTION_END,
+
+	PINMUX_MARK_BEGIN,
+	SCIF0_PTT_TXD_MARK, SCIF0_PTT_RXD_MARK,
+	SCIF0_PTT_SCK_MARK, SCIF0_PTU_TXD_MARK,
+	SCIF0_PTU_RXD_MARK, SCIF0_PTU_SCK_MARK,
+
+	SCIF1_PTS_TXD_MARK, SCIF1_PTS_RXD_MARK,
+	SCIF1_PTS_SCK_MARK, SCIF1_PTV_TXD_MARK,
+	SCIF1_PTV_RXD_MARK, SCIF1_PTV_SCK_MARK,
+
+	SCIF2_PTT_TXD_MARK, SCIF2_PTT_RXD_MARK,
+	SCIF2_PTT_SCK_MARK, SCIF2_PTU_TXD_MARK,
+	SCIF2_PTU_RXD_MARK, SCIF2_PTU_SCK_MARK,
+
+	SCIF3_PTS_TXD_MARK, SCIF3_PTS_RXD_MARK,
+	SCIF3_PTS_SCK_MARK, SCIF3_PTS_RTS_MARK,
+	SCIF3_PTS_CTS_MARK, SCIF3_PTV_TXD_MARK,
+	SCIF3_PTV_RXD_MARK, SCIF3_PTV_SCK_MARK,
+	SCIF3_PTV_RTS_MARK, SCIF3_PTV_CTS_MARK,
+
+	SCIF4_PTE_TXD_MARK, SCIF4_PTE_RXD_MARK,
+	SCIF4_PTE_SCK_MARK, SCIF4_PTN_TXD_MARK,
+	SCIF4_PTN_RXD_MARK, SCIF4_PTN_SCK_MARK,
+
+	SCIF5_PTE_TXD_MARK, SCIF5_PTE_RXD_MARK,
+	SCIF5_PTE_SCK_MARK, SCIF5_PTN_TXD_MARK,
+	SCIF5_PTN_RXD_MARK, SCIF5_PTN_SCK_MARK,
+
+	VIO_D15_MARK, VIO_D14_MARK, VIO_D13_MARK, VIO_D12_MARK,
+	VIO_D11_MARK, VIO_D10_MARK, VIO_D9_MARK, VIO_D8_MARK,
+	VIO_D7_MARK, VIO_D6_MARK, VIO_D5_MARK, VIO_D4_MARK,
+	VIO_D3_MARK, VIO_D2_MARK, VIO_D1_MARK, VIO_D0_MARK,
+	VIO_FLD_MARK, VIO_CKO_MARK,
+	VIO_VD1_MARK, VIO_HD1_MARK, VIO_CLK1_MARK,
+	VIO_HD2_MARK, VIO_VD2_MARK, VIO_CLK2_MARK,
+
+	LCDD23_MARK, LCDD22_MARK, LCDD21_MARK, LCDD20_MARK,
+	LCDD19_MARK, LCDD18_MARK, LCDD17_MARK, LCDD16_MARK,
+	LCDD15_MARK, LCDD14_MARK, LCDD13_MARK, LCDD12_MARK,
+	LCDD11_MARK, LCDD10_MARK, LCDD9_MARK, LCDD8_MARK,
+	LCDD7_MARK, LCDD6_MARK, LCDD5_MARK, LCDD4_MARK,
+	LCDD3_MARK, LCDD2_MARK, LCDD1_MARK, LCDD0_MARK,
+	LCDDON_MARK, LCDVCPWC_MARK, LCDVEPWC_MARK,
+	LCDVSYN_MARK, LCDDCK_MARK, LCDHSYN_MARK, LCDDISP_MARK,
+	LCDRS_MARK, LCDCS_MARK, LCDWR_MARK, LCDRD_MARK,
+	LCDLCLK_PTR_MARK, LCDLCLK_PTW_MARK,
+
+	IRQ0_MARK, IRQ1_MARK, IRQ2_MARK, IRQ3_MARK,
+	IRQ4_MARK, IRQ5_MARK, IRQ6_MARK, IRQ7_MARK,
+
+	AUDATA3_MARK, AUDATA2_MARK, AUDATA1_MARK, AUDATA0_MARK,
+	AUDCK_MARK, AUDSYNC_MARK,
+
+	SDHI0CD_PTD_MARK, SDHI0WP_PTD_MARK,
+	SDHI0D3_PTD_MARK, SDHI0D2_PTD_MARK,
+	SDHI0D1_PTD_MARK, SDHI0D0_PTD_MARK,
+	SDHI0CMD_PTD_MARK, SDHI0CLK_PTD_MARK,
+
+	SDHI0CD_PTS_MARK, SDHI0WP_PTS_MARK,
+	SDHI0D3_PTS_MARK, SDHI0D2_PTS_MARK,
+	SDHI0D1_PTS_MARK, SDHI0D0_PTS_MARK,
+	SDHI0CMD_PTS_MARK, SDHI0CLK_PTS_MARK,
+
+	SDHI1CD_MARK, SDHI1WP_MARK, SDHI1D3_MARK, SDHI1D2_MARK,
+	SDHI1D1_MARK, SDHI1D0_MARK, SDHI1CMD_MARK, SDHI1CLK_MARK,
+
+	SIUAFCK_MARK, SIUAILR_MARK, SIUAIBT_MARK, SIUAISLD_MARK,
+	SIUAOLR_MARK, SIUAOBT_MARK, SIUAOSLD_MARK, SIUAMCK_MARK,
+	SIUAISPD_MARK, SIUAOSPD_MARK,
+
+	SIUBFCK_MARK, SIUBILR_MARK, SIUBIBT_MARK, SIUBISLD_MARK,
+	SIUBOLR_MARK, SIUBOBT_MARK, SIUBOSLD_MARK, SIUBMCK_MARK,
+
+	IRDA_IN_MARK, IRDA_OUT_MARK,
+
+	DV_CLKI_MARK, DV_CLK_MARK, DV_HSYNC_MARK, DV_VSYNC_MARK,
+	DV_D15_MARK, DV_D14_MARK, DV_D13_MARK, DV_D12_MARK,
+	DV_D11_MARK, DV_D10_MARK, DV_D9_MARK, DV_D8_MARK,
+	DV_D7_MARK, DV_D6_MARK, DV_D5_MARK, DV_D4_MARK,
+	DV_D3_MARK, DV_D2_MARK, DV_D1_MARK, DV_D0_MARK,
+
+	KEYIN0_MARK, KEYIN1_MARK, KEYIN2_MARK, KEYIN3_MARK, KEYIN4_MARK,
+	KEYOUT0_MARK, KEYOUT1_MARK, KEYOUT2_MARK, KEYOUT3_MARK,
+	KEYOUT4_IN6_MARK, KEYOUT5_IN5_MARK,
+
+	MSIOF0_PTF_TXD_MARK, MSIOF0_PTF_RXD_MARK, MSIOF0_PTF_MCK_MARK,
+	MSIOF0_PTF_TSYNC_MARK, MSIOF0_PTF_TSCK_MARK, MSIOF0_PTF_RSYNC_MARK,
+	MSIOF0_PTF_RSCK_MARK, MSIOF0_PTF_SS1_MARK, MSIOF0_PTF_SS2_MARK,
+
+	MSIOF0_PTT_TXD_MARK, MSIOF0_PTT_RXD_MARK, MSIOF0_PTX_MCK_MARK,
+	MSIOF0_PTT_TSYNC_MARK, MSIOF0_PTT_TSCK_MARK, MSIOF0_PTT_RSYNC_MARK,
+	MSIOF0_PTT_RSCK_MARK, MSIOF0_PTT_SS1_MARK, MSIOF0_PTT_SS2_MARK,
+
+	MSIOF1_TXD_MARK, MSIOF1_RXD_MARK, MSIOF1_MCK_MARK,
+	MSIOF1_TSYNC_MARK, MSIOF1_TSCK_MARK, MSIOF1_RSYNC_MARK,
+	MSIOF1_RSCK_MARK, MSIOF1_SS1_MARK, MSIOF1_SS2_MARK,
+
+	TS0_SDAT_MARK, TS0_SCK_MARK, TS0_SDEN_MARK, TS0_SPSYNC_MARK,
+
+	FCE_MARK, NAF7_MARK, NAF6_MARK, NAF5_MARK, NAF4_MARK,
+	NAF3_MARK, NAF2_MARK, NAF1_MARK, NAF0_MARK, FCDE_MARK,
+	FOE_MARK, FSC_MARK, FWE_MARK, FRB_MARK,
+
+	DACK1_MARK, DREQ1_MARK, DACK0_MARK, DREQ0_MARK,
+
+	AN3_MARK, AN2_MARK, AN1_MARK, AN0_MARK, ADTRG_MARK,
+
+	STATUS0_MARK, PDSTATUS_MARK,
+
+	TPUTO3_MARK, TPUTO2_MARK, TPUTO1_MARK, TPUTO0_MARK,
+
+	D31_MARK, D30_MARK, D29_MARK, D28_MARK,
+	D27_MARK, D26_MARK, D25_MARK, D24_MARK,
+	D23_MARK, D22_MARK, D21_MARK, D20_MARK,
+	D19_MARK, D18_MARK, D17_MARK, D16_MARK,
+	IOIS16_MARK, WAIT_MARK, BS_MARK,
+	A25_MARK, A24_MARK, A23_MARK, A22_MARK,
+	CS6B_CE1B_MARK, CS6A_CE2B_MARK,
+	CS5B_CE1A_MARK, CS5A_CE2A_MARK,
+	WE3_ICIOWR_MARK, WE2_ICIORD_MARK,
+
+	IDED15_MARK, IDED14_MARK, IDED13_MARK, IDED12_MARK,
+	IDED11_MARK, IDED10_MARK, IDED9_MARK, IDED8_MARK,
+	IDED7_MARK, IDED6_MARK, IDED5_MARK, IDED4_MARK,
+	IDED3_MARK, IDED2_MARK, IDED1_MARK, IDED0_MARK,
+	DIRECTION_MARK, EXBUF_ENB_MARK, IDERST_MARK, IODACK_MARK,
+	IODREQ_MARK, IDEIORDY_MARK, IDEINT_MARK, IDEIOWR_MARK,
+	IDEIORD_MARK, IDECS1_MARK, IDECS0_MARK, IDEA2_MARK,
+	IDEA1_MARK, IDEA0_MARK,
+	PINMUX_MARK_END,
+};
+
+static pinmux_enum_t pinmux_data[] = {
+	/* PTA GPIO */
+	PINMUX_DATA(PTA7_DATA, PTA7_IN, PTA7_OUT),
+	PINMUX_DATA(PTA6_DATA, PTA6_IN, PTA6_OUT),
+	PINMUX_DATA(PTA5_DATA, PTA5_IN, PTA5_OUT),
+	PINMUX_DATA(PTA4_DATA, PTA4_IN, PTA4_OUT, PTA4_IN_PU),
+	PINMUX_DATA(PTA3_DATA, PTA3_IN, PTA3_OUT, PTA3_IN_PU),
+	PINMUX_DATA(PTA2_DATA, PTA2_IN, PTA2_OUT, PTA2_IN_PU),
+	PINMUX_DATA(PTA1_DATA, PTA1_IN, PTA1_OUT, PTA1_IN_PU),
+	PINMUX_DATA(PTA0_DATA, PTA0_IN, PTA0_OUT, PTA0_IN_PU),
+
+	/* PTB GPIO */
+	PINMUX_DATA(PTB7_DATA, PTB7_IN, PTB7_OUT),
+	PINMUX_DATA(PTB6_DATA, PTB6_IN, PTB6_OUT),
+	PINMUX_DATA(PTB5_DATA, PTB5_IN, PTB5_OUT),
+	PINMUX_DATA(PTB4_DATA, PTB4_IN, PTB4_OUT),
+	PINMUX_DATA(PTB3_DATA, PTB3_IN, PTB3_OUT),
+	PINMUX_DATA(PTB2_DATA, PTB2_IN, PTB2_OUT, PTB2_IN_PU),
+	PINMUX_DATA(PTB1_DATA, PTB1_IN, PTB1_OUT, PTB1_IN_PU),
+	PINMUX_DATA(PTB0_DATA, PTB0_IN, PTB0_OUT),
+
+	/* PTC GPIO */
+	PINMUX_DATA(PTC7_DATA, PTC7_IN, PTC7_OUT),
+	PINMUX_DATA(PTC6_DATA, PTC6_IN, PTC6_OUT),
+	PINMUX_DATA(PTC5_DATA, PTC5_IN, PTC5_OUT),
+	PINMUX_DATA(PTC4_DATA, PTC4_IN, PTC4_OUT),
+	PINMUX_DATA(PTC3_DATA, PTC3_IN, PTC3_OUT),
+	PINMUX_DATA(PTC2_DATA, PTC2_IN, PTC2_OUT),
+	PINMUX_DATA(PTC1_DATA, PTC1_IN, PTC1_OUT),
+	PINMUX_DATA(PTC0_DATA, PTC0_IN, PTC0_OUT),
+
+	/* PTD GPIO */
+	PINMUX_DATA(PTD7_DATA, PTD7_IN, PTD7_OUT),
+	PINMUX_DATA(PTD6_DATA, PTD6_IN, PTD6_OUT),
+	PINMUX_DATA(PTD5_DATA, PTD5_IN, PTD5_OUT),
+	PINMUX_DATA(PTD4_DATA, PTD4_IN, PTD4_OUT),
+	PINMUX_DATA(PTD3_DATA, PTD3_IN, PTD3_OUT),
+	PINMUX_DATA(PTD2_DATA, PTD2_IN, PTD2_OUT),
+	PINMUX_DATA(PTD1_DATA, PTD1_IN, PTD1_OUT),
+	PINMUX_DATA(PTD0_DATA, PTD0_IN, PTD0_OUT),
+
+	/* PTE GPIO */
+	PINMUX_DATA(PTE5_DATA, PTE5_IN, PTE5_OUT),
+	PINMUX_DATA(PTE4_DATA, PTE4_IN, PTE4_OUT),
+	PINMUX_DATA(PTE3_DATA, PTE3_IN, PTE3_OUT),
+	PINMUX_DATA(PTE2_DATA, PTE2_IN, PTE2_OUT),
+	PINMUX_DATA(PTE1_DATA, PTE1_IN, PTE1_OUT),
+	PINMUX_DATA(PTE0_DATA, PTE0_IN, PTE0_OUT),
+
+	/* PTF GPIO */
+	PINMUX_DATA(PTF7_DATA, PTF7_IN, PTF7_OUT),
+	PINMUX_DATA(PTF6_DATA, PTF6_IN, PTF6_OUT),
+	PINMUX_DATA(PTF5_DATA, PTF5_IN, PTF5_OUT),
+	PINMUX_DATA(PTF4_DATA, PTF4_IN, PTF4_OUT),
+	PINMUX_DATA(PTF3_DATA, PTF3_IN, PTF3_OUT),
+	PINMUX_DATA(PTF2_DATA, PTF2_IN, PTF2_OUT),
+	PINMUX_DATA(PTF1_DATA, PTF1_IN, PTF1_OUT),
+	PINMUX_DATA(PTF0_DATA, PTF0_IN, PTF0_OUT),
+
+	/* PTG GPIO */
+	PINMUX_DATA(PTG5_DATA, PTG5_OUT),
+	PINMUX_DATA(PTG4_DATA, PTG4_OUT),
+	PINMUX_DATA(PTG3_DATA, PTG3_OUT),
+	PINMUX_DATA(PTG2_DATA, PTG2_OUT),
+	PINMUX_DATA(PTG1_DATA, PTG1_OUT),
+	PINMUX_DATA(PTG0_DATA, PTG0_OUT),
+
+	/* PTH GPIO */
+	PINMUX_DATA(PTH7_DATA, PTH7_IN, PTH7_OUT),
+	PINMUX_DATA(PTH6_DATA, PTH6_IN, PTH6_OUT),
+	PINMUX_DATA(PTH5_DATA, PTH5_IN, PTH5_OUT),
+	PINMUX_DATA(PTH4_DATA, PTH4_IN, PTH4_OUT),
+	PINMUX_DATA(PTH3_DATA, PTH3_IN, PTH3_OUT),
+	PINMUX_DATA(PTH2_DATA, PTH2_IN, PTH2_OUT),
+	PINMUX_DATA(PTH1_DATA, PTH1_IN, PTH1_OUT),
+	PINMUX_DATA(PTH0_DATA, PTH0_IN, PTH0_OUT),
+
+	/* PTJ GPIO */
+	PINMUX_DATA(PTJ7_DATA, PTJ7_OUT),
+	PINMUX_DATA(PTJ5_DATA, PTJ5_OUT),
+	PINMUX_DATA(PTJ3_DATA, PTJ3_IN, PTJ3_OUT),
+	PINMUX_DATA(PTJ2_DATA, PTJ2_IN, PTJ2_OUT),
+	PINMUX_DATA(PTJ1_DATA, PTJ1_IN, PTJ1_OUT),
+	PINMUX_DATA(PTJ0_DATA, PTJ0_IN, PTJ0_OUT),
+
+	/* PTK GPIO */
+	PINMUX_DATA(PTK7_DATA, PTK7_IN, PTK7_OUT),
+	PINMUX_DATA(PTK6_DATA, PTK6_IN, PTK6_OUT),
+	PINMUX_DATA(PTK5_DATA, PTK5_IN, PTK5_OUT),
+	PINMUX_DATA(PTK4_DATA, PTK4_IN, PTK4_OUT),
+	PINMUX_DATA(PTK3_DATA, PTK3_IN, PTK3_OUT),
+	PINMUX_DATA(PTK2_DATA, PTK2_IN, PTK2_OUT),
+	PINMUX_DATA(PTK1_DATA, PTK1_IN, PTK1_OUT),
+	PINMUX_DATA(PTK0_DATA, PTK0_IN, PTK0_OUT),
+
+	/* PTL GPIO */
+	PINMUX_DATA(PTL7_DATA, PTL7_IN, PTL7_OUT),
+	PINMUX_DATA(PTL6_DATA, PTL6_IN, PTL6_OUT),
+	PINMUX_DATA(PTL5_DATA, PTL5_IN, PTL5_OUT),
+	PINMUX_DATA(PTL4_DATA, PTL4_IN, PTL4_OUT),
+	PINMUX_DATA(PTL3_DATA, PTL3_IN, PTL3_OUT),
+	PINMUX_DATA(PTL2_DATA, PTL2_IN, PTL2_OUT),
+	PINMUX_DATA(PTL1_DATA, PTL1_IN, PTL1_OUT),
+	PINMUX_DATA(PTL0_DATA, PTL0_IN, PTL0_OUT),
+
+	/* PTM GPIO */
+	PINMUX_DATA(PTM7_DATA, PTM7_IN, PTM7_OUT),
+	PINMUX_DATA(PTM6_DATA, PTM6_IN, PTM6_OUT),
+	PINMUX_DATA(PTM5_DATA, PTM5_IN, PTM5_OUT),
+	PINMUX_DATA(PTM4_DATA, PTM4_IN, PTM4_OUT),
+	PINMUX_DATA(PTM3_DATA, PTM3_IN, PTM3_OUT),
+	PINMUX_DATA(PTM2_DATA, PTM2_IN, PTM2_OUT),
+	PINMUX_DATA(PTM1_DATA, PTM1_IN, PTM1_OUT),
+	PINMUX_DATA(PTM0_DATA, PTM0_IN, PTM0_OUT),
+
+	/* PTN GPIO */
+	PINMUX_DATA(PTN7_DATA, PTN7_IN, PTN7_OUT),
+	PINMUX_DATA(PTN6_DATA, PTN6_IN, PTN6_OUT),
+	PINMUX_DATA(PTN5_DATA, PTN5_IN, PTN5_OUT),
+	PINMUX_DATA(PTN4_DATA, PTN4_IN, PTN4_OUT),
+	PINMUX_DATA(PTN3_DATA, PTN3_IN, PTN3_OUT),
+	PINMUX_DATA(PTN2_DATA, PTN2_IN, PTN2_OUT),
+	PINMUX_DATA(PTN1_DATA, PTN1_IN, PTN1_OUT),
+	PINMUX_DATA(PTN0_DATA, PTN0_IN, PTN0_OUT),
+
+	/* PTQ GPIO */
+	PINMUX_DATA(PTQ3_DATA, PTQ3_IN),
+	PINMUX_DATA(PTQ2_DATA, PTQ2_IN),
+	PINMUX_DATA(PTQ1_DATA, PTQ1_IN),
+	PINMUX_DATA(PTQ0_DATA, PTQ0_IN),
+
+	/* PTR GPIO */
+	PINMUX_DATA(PTR7_DATA, PTR7_IN, PTR7_OUT),
+	PINMUX_DATA(PTR6_DATA, PTR6_IN, PTR6_OUT),
+	PINMUX_DATA(PTR5_DATA, PTR5_IN, PTR5_OUT),
+	PINMUX_DATA(PTR4_DATA, PTR4_IN, PTR4_OUT),
+	PINMUX_DATA(PTR3_DATA, PTR3_IN),
+	PINMUX_DATA(PTR2_DATA, PTR2_IN, PTR2_IN_PU),
+	PINMUX_DATA(PTR1_DATA, PTR1_IN, PTR1_OUT),
+	PINMUX_DATA(PTR0_DATA, PTR0_IN, PTR0_OUT),
+
+	/* PTS GPIO */
+	PINMUX_DATA(PTS7_DATA, PTS7_IN, PTS7_OUT),
+	PINMUX_DATA(PTS6_DATA, PTS6_IN, PTS6_OUT),
+	PINMUX_DATA(PTS5_DATA, PTS5_IN, PTS5_OUT),
+	PINMUX_DATA(PTS4_DATA, PTS4_IN, PTS4_OUT),
+	PINMUX_DATA(PTS3_DATA, PTS3_IN, PTS3_OUT),
+	PINMUX_DATA(PTS2_DATA, PTS2_IN, PTS2_OUT),
+	PINMUX_DATA(PTS1_DATA, PTS1_IN, PTS1_OUT),
+	PINMUX_DATA(PTS0_DATA, PTS0_IN, PTS0_OUT),
+
+	/* PTT GPIO */
+	PINMUX_DATA(PTT5_DATA, PTT5_IN, PTT5_OUT),
+	PINMUX_DATA(PTT4_DATA, PTT4_IN, PTT4_OUT),
+	PINMUX_DATA(PTT3_DATA, PTT3_IN, PTT3_OUT),
+	PINMUX_DATA(PTT2_DATA, PTT2_IN, PTT2_OUT),
+	PINMUX_DATA(PTT1_DATA, PTT1_IN, PTT1_OUT),
+	PINMUX_DATA(PTT0_DATA, PTT0_IN, PTT0_OUT),
+
+	/* PTU GPIO */
+	PINMUX_DATA(PTU5_DATA, PTU5_IN, PTU5_OUT),
+	PINMUX_DATA(PTU4_DATA, PTU4_IN, PTU4_OUT),
+	PINMUX_DATA(PTU3_DATA, PTU3_IN, PTU3_OUT),
+	PINMUX_DATA(PTU2_DATA, PTU2_IN, PTU2_OUT),
+	PINMUX_DATA(PTU1_DATA, PTU1_IN, PTU1_OUT),
+	PINMUX_DATA(PTU0_DATA, PTU0_IN, PTU0_OUT),
+
+	/* PTV GPIO */
+	PINMUX_DATA(PTV7_DATA, PTV7_IN, PTV7_OUT),
+	PINMUX_DATA(PTV6_DATA, PTV6_IN, PTV6_OUT),
+	PINMUX_DATA(PTV5_DATA, PTV5_IN, PTV5_OUT),
+	PINMUX_DATA(PTV4_DATA, PTV4_IN, PTV4_OUT),
+	PINMUX_DATA(PTV3_DATA, PTV3_IN, PTV3_OUT),
+	PINMUX_DATA(PTV2_DATA, PTV2_IN, PTV2_OUT),
+	PINMUX_DATA(PTV1_DATA, PTV1_IN, PTV1_OUT),
+	PINMUX_DATA(PTV0_DATA, PTV0_IN, PTV0_OUT),
+
+	/* PTW GPIO */
+	PINMUX_DATA(PTW7_DATA, PTW7_IN, PTW7_OUT),
+	PINMUX_DATA(PTW6_DATA, PTW6_IN, PTW6_OUT),
+	PINMUX_DATA(PTW5_DATA, PTW5_IN, PTW5_OUT),
+	PINMUX_DATA(PTW4_DATA, PTW4_IN, PTW4_OUT),
+	PINMUX_DATA(PTW3_DATA, PTW3_IN, PTW3_OUT),
+	PINMUX_DATA(PTW2_DATA, PTW2_IN, PTW2_OUT),
+	PINMUX_DATA(PTW1_DATA, PTW1_IN, PTW1_OUT),
+	PINMUX_DATA(PTW0_DATA, PTW0_IN, PTW0_OUT),
+
+	/* PTX GPIO */
+	PINMUX_DATA(PTX7_DATA, PTX7_IN, PTX7_OUT),
+	PINMUX_DATA(PTX6_DATA, PTX6_IN, PTX6_OUT),
+	PINMUX_DATA(PTX5_DATA, PTX5_IN, PTX5_OUT),
+	PINMUX_DATA(PTX4_DATA, PTX4_IN, PTX4_OUT),
+	PINMUX_DATA(PTX3_DATA, PTX3_IN, PTX3_OUT),
+	PINMUX_DATA(PTX2_DATA, PTX2_IN, PTX2_OUT),
+	PINMUX_DATA(PTX1_DATA, PTX1_IN, PTX1_OUT),
+	PINMUX_DATA(PTX0_DATA, PTX0_IN, PTX0_OUT),
+
+	/* PTY GPIO */
+	PINMUX_DATA(PTY7_DATA, PTY7_IN, PTY7_OUT),
+	PINMUX_DATA(PTY6_DATA, PTY6_IN, PTY6_OUT),
+	PINMUX_DATA(PTY5_DATA, PTY5_IN, PTY5_OUT),
+	PINMUX_DATA(PTY4_DATA, PTY4_IN, PTY4_OUT),
+	PINMUX_DATA(PTY3_DATA, PTY3_IN, PTY3_OUT),
+	PINMUX_DATA(PTY2_DATA, PTY2_IN, PTY2_OUT),
+	PINMUX_DATA(PTY1_DATA, PTY1_IN, PTY1_OUT),
+	PINMUX_DATA(PTY0_DATA, PTY0_IN, PTY0_OUT),
+
+	/* PTZ GPIO */
+	PINMUX_DATA(PTZ7_DATA, PTZ7_IN, PTZ7_OUT),
+	PINMUX_DATA(PTZ6_DATA, PTZ6_IN, PTZ6_OUT),
+	PINMUX_DATA(PTZ5_DATA, PTZ5_IN, PTZ5_OUT),
+	PINMUX_DATA(PTZ4_DATA, PTZ4_IN, PTZ4_OUT),
+	PINMUX_DATA(PTZ3_DATA, PTZ3_IN, PTZ3_OUT),
+	PINMUX_DATA(PTZ2_DATA, PTZ2_IN, PTZ2_OUT),
+	PINMUX_DATA(PTZ1_DATA, PTZ1_IN, PTZ1_OUT),
+	PINMUX_DATA(PTZ0_DATA, PTZ0_IN, PTZ0_OUT),
+
+	/* PTA FN */
+	PINMUX_DATA(D23_MARK, PSA15_PSA14_FN1, PTA7_FN),
+	PINMUX_DATA(KEYOUT2_MARK, PSA15_PSA14_FN2, PTA7_FN),
+	PINMUX_DATA(D22_MARK, PSA15_PSA14_FN1, PTA6_FN),
+	PINMUX_DATA(KEYOUT1_MARK, PSA15_PSA14_FN2, PTA6_FN),
+	PINMUX_DATA(D21_MARK, PSA15_PSA14_FN1, PTA5_FN),
+	PINMUX_DATA(KEYOUT0_MARK, PSA15_PSA14_FN2, PTA5_FN),
+	PINMUX_DATA(D20_MARK, PSA15_PSA14_FN1, PTA4_FN),
+	PINMUX_DATA(KEYIN4_MARK, PSA15_PSA14_FN2, PTA4_FN),
+	PINMUX_DATA(D19_MARK, PSA15_PSA14_FN1, PTA3_FN),
+	PINMUX_DATA(KEYIN3_MARK, PSA15_PSA14_FN2, PTA3_FN),
+	PINMUX_DATA(D18_MARK, PSA15_PSA14_FN1, PTA2_FN),
+	PINMUX_DATA(KEYIN2_MARK, PSA15_PSA14_FN2, PTA2_FN),
+	PINMUX_DATA(D17_MARK, PSA15_PSA14_FN1, PTA1_FN),
+	PINMUX_DATA(KEYIN1_MARK, PSA15_PSA14_FN2, PTA1_FN),
+	PINMUX_DATA(D16_MARK, PSA15_PSA14_FN1, PTA0_FN),
+	PINMUX_DATA(KEYIN0_MARK, PSA15_PSA14_FN2, PTA0_FN),
+
+	/* PTB FN */
+	PINMUX_DATA(D31_MARK, PTB7_FN),
+	PINMUX_DATA(D30_MARK, PTB6_FN),
+	PINMUX_DATA(D29_MARK, PTB5_FN),
+	PINMUX_DATA(D28_MARK, PTB4_FN),
+	PINMUX_DATA(D27_MARK, PTB3_FN),
+	PINMUX_DATA(D26_MARK, PSA15_PSA14_FN1, PTB2_FN),
+	PINMUX_DATA(KEYOUT5_IN5_MARK, PSA15_PSA14_FN2, PTB2_FN),
+	PINMUX_DATA(D25_MARK, PSA15_PSA14_FN1, PTB1_FN),
+	PINMUX_DATA(KEYOUT4_IN6_MARK, PSA15_PSA14_FN2, PTB1_FN),
+	PINMUX_DATA(D24_MARK, PSA15_PSA14_FN1, PTB0_FN),
+	PINMUX_DATA(KEYOUT3_MARK, PSA15_PSA14_FN2, PTB0_FN),
+
+	/* PTC FN */
+	PINMUX_DATA(IDED15_MARK, PSA11_PSA10_FN1, PTC7_FN),
+	PINMUX_DATA(SDHI1CD_MARK, PSA11_PSA10_FN2, PTC7_FN),
+	PINMUX_DATA(IDED14_MARK, PSA11_PSA10_FN1, PTC6_FN),
+	PINMUX_DATA(SDHI1WP_MARK, PSA11_PSA10_FN2, PTC6_FN),
+	PINMUX_DATA(IDED13_MARK, PSA11_PSA10_FN1, PTC5_FN),
+	PINMUX_DATA(SDHI1D3_MARK, PSA11_PSA10_FN2, PTC5_FN),
+	PINMUX_DATA(IDED12_MARK, PSA11_PSA10_FN1, PTC4_FN),
+	PINMUX_DATA(SDHI1D2_MARK, PSA11_PSA10_FN2, PTC4_FN),
+	PINMUX_DATA(IDED11_MARK, PSA11_PSA10_FN1, PTC3_FN),
+	PINMUX_DATA(SDHI1D1_MARK, PSA11_PSA10_FN2, PTC3_FN),
+	PINMUX_DATA(IDED10_MARK, PSA11_PSA10_FN1, PTC2_FN),
+	PINMUX_DATA(SDHI1D0_MARK, PSA11_PSA10_FN2, PTC2_FN),
+	PINMUX_DATA(IDED9_MARK, PSA11_PSA10_FN1, PTC1_FN),
+	PINMUX_DATA(SDHI1CMD_MARK, PSA11_PSA10_FN2, PTC1_FN),
+	PINMUX_DATA(IDED8_MARK, PSA11_PSA10_FN1, PTC0_FN),
+	PINMUX_DATA(SDHI1CLK_MARK, PSA11_PSA10_FN2, PTC0_FN),
+
+	/* PTD FN */
+	PINMUX_DATA(IDED7_MARK, PSA11_PSA10_FN1, PTD7_FN),
+	PINMUX_DATA(SDHI0CD_PTD_MARK, PSA11_PSA10_FN2, PTD7_FN),
+	PINMUX_DATA(IDED6_MARK, PSA11_PSA10_FN1, PTD6_FN),
+	PINMUX_DATA(SDHI0WP_PTD_MARK, PSA11_PSA10_FN2, PTD6_FN),
+	PINMUX_DATA(IDED5_MARK, PSA11_PSA10_FN1, PTD5_FN),
+	PINMUX_DATA(SDHI0D3_PTD_MARK, PSA11_PSA10_FN2, PTD5_FN),
+	PINMUX_DATA(IDED4_MARK, PSA11_PSA10_FN1, PTD4_FN),
+	PINMUX_DATA(SDHI0D2_PTD_MARK, PSA11_PSA10_FN2, PTD4_FN),
+	PINMUX_DATA(IDED3_MARK, PSA11_PSA10_FN1, PTD3_FN),
+	PINMUX_DATA(SDHI0D1_PTD_MARK, PSA11_PSA10_FN2, PTD3_FN),
+	PINMUX_DATA(IDED2_MARK, PSA11_PSA10_FN1, PTD2_FN),
+	PINMUX_DATA(SDHI0D0_PTD_MARK, PSA11_PSA10_FN2, PTD2_FN),
+	PINMUX_DATA(IDED1_MARK, PSA11_PSA10_FN1, PTD1_FN),
+	PINMUX_DATA(SDHI0CMD_PTD_MARK, PSA11_PSA10_FN2, PTD1_FN),
+	PINMUX_DATA(IDED0_MARK, PSA11_PSA10_FN1, PTD0_FN),
+	PINMUX_DATA(SDHI0CLK_PTD_MARK, PSA11_PSA10_FN2, PTD0_FN),
+
+	/* PTE FN */
+	PINMUX_DATA(DIRECTION_MARK, PSA11_PSA10_FN1, PTE5_FN),
+	PINMUX_DATA(SCIF5_PTE_SCK_MARK, PSA11_PSA10_FN2, PTE5_FN),
+	PINMUX_DATA(EXBUF_ENB_MARK, PSA11_PSA10_FN1, PTE4_FN),
+	PINMUX_DATA(SCIF5_PTE_RXD_MARK, PSA11_PSA10_FN2, PTE4_FN),
+	PINMUX_DATA(IDERST_MARK, PSA11_PSA10_FN1, PTE3_FN),
+	PINMUX_DATA(SCIF5_PTE_TXD_MARK, PSA11_PSA10_FN2, PTE3_FN),
+	PINMUX_DATA(IODACK_MARK, PSA11_PSA10_FN1, PTE2_FN),
+	PINMUX_DATA(SCIF4_PTE_SCK_MARK, PSA11_PSA10_FN2, PTE2_FN),
+	PINMUX_DATA(IODREQ_MARK, PSA11_PSA10_FN1, PTE1_FN),
+	PINMUX_DATA(SCIF4_PTE_RXD_MARK, PSA11_PSA10_FN2, PTE1_FN),
+	PINMUX_DATA(IDEIORDY_MARK, PSA11_PSA10_FN1, PTE0_FN),
+	PINMUX_DATA(SCIF4_PTE_TXD_MARK, PSA11_PSA10_FN2, PTE0_FN),
+
+	/* PTF FN */
+	PINMUX_DATA(IDEINT_MARK, PTF7_FN),
+	PINMUX_DATA(IDEIOWR_MARK, PSA5_PSA4_FN1, PTF6_FN),
+	PINMUX_DATA(MSIOF0_PTF_SS2_MARK, PSA5_PSA4_FN2, PTF6_FN),
+	PINMUX_DATA(MSIOF0_PTF_RSYNC_MARK, PSA5_PSA4_FN3, PTF6_FN),
+	PINMUX_DATA(IDEIORD_MARK, PSA5_PSA4_FN1, PTF5_FN),
+	PINMUX_DATA(MSIOF0_PTF_SS1_MARK, PSA5_PSA4_FN2, PTF5_FN),
+	PINMUX_DATA(MSIOF0_PTF_RSCK_MARK, PSA5_PSA4_FN3, PTF5_FN),
+	PINMUX_DATA(IDECS1_MARK, PSA11_PSA10_FN1, PTF4_FN),
+	PINMUX_DATA(MSIOF0_PTF_TSYNC_MARK, PSA11_PSA10_FN2, PTF4_FN),
+	PINMUX_DATA(IDECS0_MARK, PSA11_PSA10_FN1, PTF3_FN),
+	PINMUX_DATA(MSIOF0_PTF_TSCK_MARK, PSA11_PSA10_FN2, PTF3_FN),
+	PINMUX_DATA(IDEA2_MARK, PSA11_PSA10_FN1, PTF2_FN),
+	PINMUX_DATA(MSIOF0_PTF_RXD_MARK, PSA11_PSA10_FN2, PTF2_FN),
+	PINMUX_DATA(IDEA1_MARK, PSA11_PSA10_FN1, PTF1_FN),
+	PINMUX_DATA(MSIOF0_PTF_TXD_MARK, PSA11_PSA10_FN2, PTF1_FN),
+	PINMUX_DATA(IDEA0_MARK, PSA11_PSA10_FN1, PTF0_FN),
+	PINMUX_DATA(MSIOF0_PTF_MCK_MARK, PSA11_PSA10_FN2, PTF0_FN),
+
+	/* PTG FN */
+	PINMUX_DATA(AUDCK_MARK, PTG5_FN),
+	PINMUX_DATA(AUDSYNC_MARK, PTG4_FN),
+	PINMUX_DATA(AUDATA3_MARK, PSA3_PSA2_FN1, PTG3_FN),
+	PINMUX_DATA(TPUTO3_MARK, PSA3_PSA2_FN2, PTG3_FN),
+	PINMUX_DATA(AUDATA2_MARK, PSA3_PSA2_FN1, PTG2_FN),
+	PINMUX_DATA(TPUTO2_MARK, PSA3_PSA2_FN2, PTG2_FN),
+	PINMUX_DATA(AUDATA1_MARK, PSA3_PSA2_FN1, PTG1_FN),
+	PINMUX_DATA(TPUTO1_MARK, PSA3_PSA2_FN2, PTG1_FN),
+	PINMUX_DATA(AUDATA0_MARK, PSA3_PSA2_FN1, PTG0_FN),
+	PINMUX_DATA(TPUTO0_MARK, PSA3_PSA2_FN2, PTG0_FN),
+
+	/* PTG FN */
+	PINMUX_DATA(LCDVCPWC_MARK, PTH7_FN),
+	PINMUX_DATA(LCDRD_MARK, PSB15_PSB14_FN1, PTH6_FN),
+	PINMUX_DATA(DV_CLKI_MARK, PSB15_PSB14_FN2, PTH6_FN),
+	PINMUX_DATA(LCDVSYN_MARK, PSB15_PSB14_FN1, PTH5_FN),
+	PINMUX_DATA(DV_CLK_MARK, PSB15_PSB14_FN2, PTH5_FN),
+	PINMUX_DATA(LCDDISP_MARK, PSB13_PSB12_LCDC_RGB, PTH4_FN),
+	PINMUX_DATA(LCDRS_MARK, PSB13_PSB12_LCDC_SYS, PTH4_FN),
+	PINMUX_DATA(LCDHSYN_MARK, PSB13_PSB12_LCDC_RGB, PTH3_FN),
+	PINMUX_DATA(LCDCS_MARK, PSB13_PSB12_LCDC_SYS, PTH3_FN),
+	PINMUX_DATA(LCDDON_MARK, PTH2_FN),
+	PINMUX_DATA(LCDDCK_MARK, PSB13_PSB12_LCDC_RGB, PTH1_FN),
+	PINMUX_DATA(LCDWR_MARK, PSB13_PSB12_LCDC_SYS, PTH1_FN),
+	PINMUX_DATA(LCDVEPWC_MARK, PTH0_FN),
+
+	/* PTJ FN */
+	PINMUX_DATA(STATUS0_MARK, PTJ7_FN),
+	PINMUX_DATA(PDSTATUS_MARK, PTJ5_FN),
+	PINMUX_DATA(A25_MARK, PTJ3_FN),
+	PINMUX_DATA(A24_MARK, PTJ2_FN),
+	PINMUX_DATA(A23_MARK, PTJ1_FN),
+	PINMUX_DATA(A22_MARK, PTJ0_FN),
+
+	/* PTK FN */
+	PINMUX_DATA(SIUAFCK_MARK, PTK7_FN),
+	PINMUX_DATA(SIUAILR_MARK, PSB9_PSB8_FN1, PTK6_FN),
+	PINMUX_DATA(MSIOF1_SS2_MARK, PSB9_PSB8_FN2, PTK6_FN),
+	PINMUX_DATA(MSIOF1_RSYNC_MARK, PSB9_PSB8_FN3, PTK6_FN),
+	PINMUX_DATA(SIUAIBT_MARK, PSB9_PSB8_FN1, PTK5_FN),
+	PINMUX_DATA(MSIOF1_SS1_MARK, PSB9_PSB8_FN2, PTK5_FN),
+	PINMUX_DATA(MSIOF1_RSCK_MARK, PSB9_PSB8_FN3, PTK5_FN),
+	PINMUX_DATA(SIUAISLD_MARK, PSB7_PSB6_FN1, PTK4_FN),
+	PINMUX_DATA(MSIOF1_RXD_MARK, PSB7_PSB6_FN2, PTK4_FN),
+	PINMUX_DATA(SIUAOLR_MARK, PSB7_PSB6_FN1, PTK3_FN),
+	PINMUX_DATA(MSIOF1_TSYNC_MARK, PSB7_PSB6_FN2, PTK3_FN),
+	PINMUX_DATA(SIUAOBT_MARK, PSB7_PSB6_FN1, PTK2_FN),
+	PINMUX_DATA(MSIOF1_TSCK_MARK, PSB7_PSB6_FN2, PTK2_FN),
+	PINMUX_DATA(SIUAOSLD_MARK, PSB7_PSB6_FN1, PTK1_FN),
+	PINMUX_DATA(MSIOF1_RXD_MARK, PSB7_PSB6_FN2, PTK1_FN),
+	PINMUX_DATA(SIUAMCK_MARK, PSB7_PSB6_FN1, PTK0_FN),
+	PINMUX_DATA(MSIOF1_MCK_MARK, PSB7_PSB6_FN2, PTK0_FN),
+
+	/* PTL FN */
+	PINMUX_DATA(LCDD15_MARK, PSB5_PSB4_FN1, PTL7_FN),
+	PINMUX_DATA(DV_D15_MARK, PSB5_PSB4_FN2, PTL7_FN),
+	PINMUX_DATA(LCDD14_MARK, PSB5_PSB4_FN1, PTL6_FN),
+	PINMUX_DATA(DV_D14_MARK, PSB5_PSB4_FN2, PTL6_FN),
+	PINMUX_DATA(LCDD13_MARK, PSB5_PSB4_FN1, PTL5_FN),
+	PINMUX_DATA(DV_D13_MARK, PSB5_PSB4_FN2, PTL5_FN),
+	PINMUX_DATA(LCDD12_MARK, PSB5_PSB4_FN1, PTL4_FN),
+	PINMUX_DATA(DV_D12_MARK, PSB5_PSB4_FN2, PTL4_FN),
+	PINMUX_DATA(LCDD11_MARK, PSB5_PSB4_FN1, PTL3_FN),
+	PINMUX_DATA(DV_D11_MARK, PSB5_PSB4_FN2, PTL3_FN),
+	PINMUX_DATA(LCDD10_MARK, PSB5_PSB4_FN1, PTL2_FN),
+	PINMUX_DATA(DV_D10_MARK, PSB5_PSB4_FN2, PTL2_FN),
+	PINMUX_DATA(LCDD9_MARK, PSB5_PSB4_FN1, PTL1_FN),
+	PINMUX_DATA(DV_D9_MARK, PSB5_PSB4_FN2, PTL1_FN),
+	PINMUX_DATA(LCDD8_MARK, PSB5_PSB4_FN1, PTL0_FN),
+	PINMUX_DATA(DV_D8_MARK, PSB5_PSB4_FN2, PTL0_FN),
+
+	/* PTM FN */
+	PINMUX_DATA(LCDD7_MARK, PSB5_PSB4_FN1, PTM7_FN),
+	PINMUX_DATA(DV_D7_MARK, PSB5_PSB4_FN2, PTM7_FN),
+	PINMUX_DATA(LCDD6_MARK, PSB5_PSB4_FN1, PTM6_FN),
+	PINMUX_DATA(DV_D6_MARK, PSB5_PSB4_FN2, PTM6_FN),
+	PINMUX_DATA(LCDD5_MARK, PSB5_PSB4_FN1, PTM5_FN),
+	PINMUX_DATA(DV_D5_MARK, PSB5_PSB4_FN2, PTM5_FN),
+	PINMUX_DATA(LCDD4_MARK, PSB5_PSB4_FN1, PTM4_FN),
+	PINMUX_DATA(DV_D4_MARK, PSB5_PSB4_FN2, PTM4_FN),
+	PINMUX_DATA(LCDD3_MARK, PSB5_PSB4_FN1, PTM3_FN),
+	PINMUX_DATA(DV_D3_MARK, PSB5_PSB4_FN2, PTM3_FN),
+	PINMUX_DATA(LCDD2_MARK, PSB5_PSB4_FN1, PTM2_FN),
+	PINMUX_DATA(DV_D2_MARK, PSB5_PSB4_FN2, PTM2_FN),
+	PINMUX_DATA(LCDD1_MARK, PSB5_PSB4_FN1, PTM1_FN),
+	PINMUX_DATA(DV_D1_MARK, PSB5_PSB4_FN2, PTM1_FN),
+	PINMUX_DATA(LCDD0_MARK, PSB5_PSB4_FN1, PTM0_FN),
+	PINMUX_DATA(DV_D0_MARK, PSB5_PSB4_FN2, PTM0_FN),
+
+	/* PTN FN */
+	PINMUX_DATA(LCDD23_MARK, PSB3_PSB2_FN1, PTN7_FN),
+	PINMUX_DATA(SCIF5_PTN_SCK_MARK, PSB3_PSB2_FN2, PTN7_FN),
+	PINMUX_DATA(LCDD22_MARK, PSB3_PSB2_FN1, PTN6_FN),
+	PINMUX_DATA(SCIF5_PTN_RXD_MARK, PSB3_PSB2_FN2, PTN6_FN),
+	PINMUX_DATA(LCDD21_MARK, PSB3_PSB2_FN1, PTN5_FN),
+	PINMUX_DATA(SCIF5_PTN_TXD_MARK, PSB3_PSB2_FN2, PTN5_FN),
+	PINMUX_DATA(LCDD20_MARK, PSB3_PSB2_FN1, PTN4_FN),
+	PINMUX_DATA(SCIF4_PTN_SCK_MARK, PSB3_PSB2_FN2, PTN4_FN),
+	PINMUX_DATA(LCDD19_MARK, PSB3_PSB2_FN1, PTN3_FN),
+	PINMUX_DATA(SCIF4_PTN_RXD_MARK, PSB3_PSB2_FN2, PTN3_FN),
+	PINMUX_DATA(LCDD18_MARK, PSB3_PSB2_FN1, PTN2_FN),
+	PINMUX_DATA(SCIF4_PTN_TXD_MARK, PSB3_PSB2_FN2, PTN2_FN),
+	PINMUX_DATA(LCDD17_MARK, PSB5_PSB4_FN1, PTN1_FN),
+	PINMUX_DATA(DV_VSYNC_MARK, PSB5_PSB4_FN2, PTN1_FN),
+	PINMUX_DATA(LCDD16_MARK, PSB5_PSB4_FN1, PTN0_FN),
+	PINMUX_DATA(DV_HSYNC_MARK, PSB5_PSB4_FN2, PTN0_FN),
+
+	/* PTQ FN */
+	PINMUX_DATA(AN3_MARK, PTQ3_FN),
+	PINMUX_DATA(AN2_MARK, PTQ2_FN),
+	PINMUX_DATA(AN1_MARK, PTQ1_FN),
+	PINMUX_DATA(AN0_MARK, PTQ0_FN),
+
+	/* PTR FN */
+	PINMUX_DATA(CS6B_CE1B_MARK, PTR7_FN),
+	PINMUX_DATA(CS6A_CE2B_MARK, PTR6_FN),
+	PINMUX_DATA(CS5B_CE1A_MARK, PTR5_FN),
+	PINMUX_DATA(CS5A_CE2A_MARK, PTR4_FN),
+	PINMUX_DATA(IOIS16_MARK, PSA13_PSA12_FN1, PTR3_FN),
+	PINMUX_DATA(LCDLCLK_PTR_MARK, PSA13_PSA12_FN2, PTR3_FN),
+	PINMUX_DATA(WAIT_MARK, PTR2_FN),
+	PINMUX_DATA(WE3_ICIOWR_MARK, PTR1_FN),
+	PINMUX_DATA(WE2_ICIORD_MARK, PTR0_FN),
+
+	/* PTS FN */
+	PINMUX_DATA(SCIF1_PTS_SCK_MARK, PSC15_PSC14_FN1, PTS7_FN),
+	PINMUX_DATA(SDHI0CD_PTS_MARK, PSC15_PSC14_FN2, PTS7_FN),
+	PINMUX_DATA(SCIF1_PTS_RXD_MARK, PSC15_PSC14_FN1, PTS6_FN),
+	PINMUX_DATA(SDHI0WP_PTS_MARK, PSC15_PSC14_FN2, PTS6_FN),
+	PINMUX_DATA(SCIF1_PTS_TXD_MARK, PSC15_PSC14_FN1, PTS5_FN),
+	PINMUX_DATA(SDHI0D3_PTS_MARK, PSC15_PSC14_FN2, PTS5_FN),
+	PINMUX_DATA(SCIF3_PTS_CTS_MARK, PSC15_PSC14_FN1, PTS4_FN),
+	PINMUX_DATA(SDHI0D2_PTS_MARK, PSC15_PSC14_FN2, PTS4_FN),
+	PINMUX_DATA(SCIF3_PTS_RTS_MARK, PSC15_PSC14_FN1, PTS3_FN),
+	PINMUX_DATA(SDHI0D1_PTS_MARK, PSC15_PSC14_FN2, PTS3_FN),
+	PINMUX_DATA(SCIF3_PTS_SCK_MARK, PSC15_PSC14_FN1, PTS2_FN),
+	PINMUX_DATA(SDHI0D0_PTS_MARK, PSC15_PSC14_FN2, PTS2_FN),
+	PINMUX_DATA(SCIF3_PTS_RXD_MARK, PSC15_PSC14_FN1, PTS1_FN),
+	PINMUX_DATA(SDHI0CMD_PTS_MARK, PSC15_PSC14_FN2, PTS1_FN),
+	PINMUX_DATA(SCIF3_PTS_TXD_MARK, PSC15_PSC14_FN1, PTS0_FN),
+	PINMUX_DATA(SDHI0CLK_PTS_MARK, PSC15_PSC14_FN2, PTS0_FN),
+
+	/* PTT FN */
+	PINMUX_DATA(SCIF0_PTT_SCK_MARK, PSC13_PSC12_FN1, PTT5_FN),
+	PINMUX_DATA(MSIOF0_PTT_TSCK_MARK, PSC13_PSC12_FN2, PTT5_FN),
+	PINMUX_DATA(SCIF0_PTT_RXD_MARK, PSC13_PSC12_FN1, PTT4_FN),
+	PINMUX_DATA(MSIOF0_PTT_RXD_MARK, PSC13_PSC12_FN2, PTT4_FN),
+	PINMUX_DATA(SCIF0_PTT_TXD_MARK, PSC13_PSC12_FN1, PTT3_FN),
+	PINMUX_DATA(MSIOF0_PTT_TXD_MARK, PSC13_PSC12_FN2, PTT3_FN),
+	PINMUX_DATA(SCIF2_PTT_SCK_MARK, PSC11_PSC10_FN1, PTT2_FN),
+	PINMUX_DATA(MSIOF0_PTT_TSYNC_MARK, PSC11_PSC10_FN2, PTT2_FN),
+	PINMUX_DATA(SCIF2_PTT_RXD_MARK, PSC11_PSC10_FN1, PTT1_FN),
+	PINMUX_DATA(MSIOF0_PTT_SS1_MARK, PSC11_PSC10_FN2, PTT1_FN),
+	PINMUX_DATA(MSIOF0_PTT_RSCK_MARK, PSC11_PSC10_FN3, PTT1_FN),
+	PINMUX_DATA(SCIF2_PTT_TXD_MARK, PSC11_PSC10_FN1, PTT0_FN),
+	PINMUX_DATA(MSIOF0_PTT_SS2_MARK, PSC11_PSC10_FN2, PTT0_FN),
+	PINMUX_DATA(MSIOF0_PTT_RSYNC_MARK, PSC11_PSC10_FN3, PTT0_FN),
+
+	/* PTU FN */
+	PINMUX_DATA(FCDE_MARK, PSC9_PSC8_FN1, PTU5_FN),
+	PINMUX_DATA(SCIF0_PTU_SCK_MARK, PSC9_PSC8_FN2, PTU5_FN),
+	PINMUX_DATA(FSC_MARK, PSC9_PSC8_FN1, PTU4_FN),
+	PINMUX_DATA(SCIF0_PTU_RXD_MARK, PSC9_PSC8_FN2, PTU4_FN),
+	PINMUX_DATA(FWE_MARK, PSC9_PSC8_FN1, PTU3_FN),
+	PINMUX_DATA(SCIF0_PTU_TXD_MARK, PSC9_PSC8_FN2, PTU3_FN),
+	PINMUX_DATA(FOE_MARK, PSC7_PSC6_FN1, PTU2_FN),
+	PINMUX_DATA(SCIF2_PTU_SCK_MARK, PSC7_PSC6_FN2, PTU2_FN),
+	PINMUX_DATA(VIO_VD2_MARK, PSC7_PSC6_FN3, PTU2_FN),
+	PINMUX_DATA(FRB_MARK, PSC7_PSC6_FN1, PTU1_FN),
+	PINMUX_DATA(SCIF2_PTU_RXD_MARK, PSC7_PSC6_FN2, PTU1_FN),
+	PINMUX_DATA(VIO_CLK2_MARK, PSC7_PSC6_FN3, PTU1_FN),
+	PINMUX_DATA(FCE_MARK, PSC7_PSC6_FN1, PTU0_FN),
+	PINMUX_DATA(SCIF2_PTU_TXD_MARK, PSC7_PSC6_FN2, PTU0_FN),
+	PINMUX_DATA(VIO_HD2_MARK, PSC7_PSC6_FN3, PTU0_FN),
+
+	/* PTV FN */
+	PINMUX_DATA(NAF7_MARK, PSC7_PSC6_FN1, PTV7_FN),
+	PINMUX_DATA(SCIF1_PTV_SCK_MARK, PSC7_PSC6_FN2, PTV7_FN),
+	PINMUX_DATA(VIO_D15_MARK, PSC7_PSC6_FN3, PTV7_FN),
+	PINMUX_DATA(NAF6_MARK, PSC7_PSC6_FN1, PTV6_FN),
+	PINMUX_DATA(SCIF1_PTV_RXD_MARK, PSC7_PSC6_FN2, PTV6_FN),
+	PINMUX_DATA(VIO_D14_MARK, PSC7_PSC6_FN3, PTV6_FN),
+	PINMUX_DATA(NAF5_MARK, PSC7_PSC6_FN1, PTV5_FN),
+	PINMUX_DATA(SCIF1_PTV_TXD_MARK, PSC7_PSC6_FN2, PTV5_FN),
+	PINMUX_DATA(VIO_D13_MARK, PSC7_PSC6_FN3, PTV5_FN),
+	PINMUX_DATA(NAF4_MARK, PSC7_PSC6_FN1, PTV4_FN),
+	PINMUX_DATA(SCIF3_PTV_CTS_MARK, PSC7_PSC6_FN2, PTV4_FN),
+	PINMUX_DATA(VIO_D12_MARK, PSC7_PSC6_FN3, PTV4_FN),
+	PINMUX_DATA(NAF3_MARK, PSC7_PSC6_FN1, PTV3_FN),
+	PINMUX_DATA(SCIF3_PTV_RTS_MARK, PSC7_PSC6_FN2, PTV3_FN),
+	PINMUX_DATA(VIO_D11_MARK, PSC7_PSC6_FN3, PTV3_FN),
+	PINMUX_DATA(NAF2_MARK, PSC7_PSC6_FN1, PTV2_FN),
+	PINMUX_DATA(SCIF3_PTV_SCK_MARK, PSC7_PSC6_FN2, PTV2_FN),
+	PINMUX_DATA(VIO_D10_MARK, PSC7_PSC6_FN3, PTV2_FN),
+	PINMUX_DATA(NAF1_MARK, PSC7_PSC6_FN1, PTV1_FN),
+	PINMUX_DATA(SCIF3_PTV_RXD_MARK, PSC7_PSC6_FN2, PTV1_FN),
+	PINMUX_DATA(VIO_D9_MARK, PSC7_PSC6_FN3, PTV1_FN),
+	PINMUX_DATA(NAF0_MARK, PSC7_PSC6_FN1, PTV0_FN),
+	PINMUX_DATA(SCIF3_PTV_TXD_MARK, PSC7_PSC6_FN2, PTV0_FN),
+	PINMUX_DATA(VIO_D8_MARK, PSC7_PSC6_FN3, PTV0_FN),
+
+	/* PTW FN */
+	PINMUX_DATA(IRQ7_MARK, PTW7_FN),
+	PINMUX_DATA(IRQ6_MARK, PTW6_FN),
+	PINMUX_DATA(IRQ5_MARK, PTW5_FN),
+	PINMUX_DATA(IRQ4_MARK, PSD15_PSD14_FN1, PTW4_FN),
+	PINMUX_DATA(LCDLCLK_PTW_MARK, PSD15_PSD14_FN2, PTW4_FN),
+	PINMUX_DATA(IRQ3_MARK, PSD13_PSD12_FN1, PTW3_FN),
+	PINMUX_DATA(ADTRG_MARK, PSD13_PSD12_FN2, PTW3_FN),
+	PINMUX_DATA(IRQ2_MARK, PSD11_PSD10_FN1, PTW2_FN),
+	PINMUX_DATA(BS_MARK, PSD11_PSD10_FN2, PTW2_FN),
+	PINMUX_DATA(VIO_CKO_MARK, PSD11_PSD10_FN3, PTW2_FN),
+	PINMUX_DATA(IRQ1_MARK, PSD9_PSD8_FN1, PTW1_FN),
+	PINMUX_DATA(SIUAISPD_MARK, PSD9_PSD8_FN2, PTW1_FN),
+	PINMUX_DATA(IRQ0_MARK, PSD7_PSD6_FN1, PTW0_FN),
+	PINMUX_DATA(SIUAOSPD_MARK, PSD7_PSD6_FN2, PTW0_FN),
+
+	/* PTX FN */
+	PINMUX_DATA(DACK1_MARK, PTX7_FN),
+	PINMUX_DATA(DREQ1_MARK, PSD3_PSD2_FN1, PTX6_FN),
+	PINMUX_DATA(MSIOF0_PTX_MCK_MARK, PSD3_PSD2_FN2, PTX6_FN),
+	PINMUX_DATA(DACK1_MARK, PTX5_FN),
+	PINMUX_DATA(IRDA_OUT_MARK, PSD5_PSD4_FN2, PTX5_FN),
+	PINMUX_DATA(DREQ1_MARK, PTX4_FN),
+	PINMUX_DATA(IRDA_IN_MARK, PSD5_PSD4_FN2, PTX4_FN),
+	PINMUX_DATA(TS0_SDAT_MARK, PTX3_FN),
+	PINMUX_DATA(TS0_SCK_MARK, PTX2_FN),
+	PINMUX_DATA(TS0_SDEN_MARK, PTX1_FN),
+	PINMUX_DATA(TS0_SPSYNC_MARK, PTX0_FN),
+
+	/* PTY FN */
+	PINMUX_DATA(VIO_D7_MARK, PTY7_FN),
+	PINMUX_DATA(VIO_D6_MARK, PTY6_FN),
+	PINMUX_DATA(VIO_D5_MARK, PTY5_FN),
+	PINMUX_DATA(VIO_D4_MARK, PTY4_FN),
+	PINMUX_DATA(VIO_D3_MARK, PTY3_FN),
+	PINMUX_DATA(VIO_D2_MARK, PTY2_FN),
+	PINMUX_DATA(VIO_D1_MARK, PTY1_FN),
+	PINMUX_DATA(VIO_D0_MARK, PTY0_FN),
+
+	/* PTZ FN */
+	PINMUX_DATA(SIUBOBT_MARK, PTZ7_FN),
+	PINMUX_DATA(SIUBOLR_MARK, PTZ6_FN),
+	PINMUX_DATA(SIUBOSLD_MARK, PTZ5_FN),
+	PINMUX_DATA(SIUBMCK_MARK, PTZ4_FN),
+	PINMUX_DATA(VIO_FLD_MARK, PSD1_PSD0_FN1, PTZ3_FN),
+	PINMUX_DATA(SIUBFCK_MARK, PSD1_PSD0_FN2, PTZ3_FN),
+	PINMUX_DATA(VIO_HD1_MARK, PSD1_PSD0_FN1, PTZ2_FN),
+	PINMUX_DATA(SIUBILR_MARK, PSD1_PSD0_FN2, PTZ2_FN),
+	PINMUX_DATA(VIO_VD1_MARK, PSD1_PSD0_FN1, PTZ1_FN),
+	PINMUX_DATA(SIUBIBT_MARK, PSD1_PSD0_FN2, PTZ1_FN),
+	PINMUX_DATA(VIO_CLK1_MARK, PSD1_PSD0_FN1, PTZ0_FN),
+	PINMUX_DATA(SIUBISLD_MARK, PSD1_PSD0_FN2, PTZ0_FN),
+};
+
+static struct pinmux_gpio pinmux_gpios[] = {
+	/* PTA */
+	PINMUX_GPIO(GPIO_PTA7, PTA7_DATA),
+	PINMUX_GPIO(GPIO_PTA6, PTA6_DATA),
+	PINMUX_GPIO(GPIO_PTA5, PTA5_DATA),
+	PINMUX_GPIO(GPIO_PTA4, PTA4_DATA),
+	PINMUX_GPIO(GPIO_PTA3, PTA3_DATA),
+	PINMUX_GPIO(GPIO_PTA2, PTA2_DATA),
+	PINMUX_GPIO(GPIO_PTA1, PTA1_DATA),
+	PINMUX_GPIO(GPIO_PTA0, PTA0_DATA),
+
+	/* PTB */
+	PINMUX_GPIO(GPIO_PTB7, PTB7_DATA),
+	PINMUX_GPIO(GPIO_PTB6, PTB6_DATA),
+	PINMUX_GPIO(GPIO_PTB5, PTB5_DATA),
+	PINMUX_GPIO(GPIO_PTB4, PTB4_DATA),
+	PINMUX_GPIO(GPIO_PTB3, PTB3_DATA),
+	PINMUX_GPIO(GPIO_PTB2, PTB2_DATA),
+	PINMUX_GPIO(GPIO_PTB1, PTB1_DATA),
+	PINMUX_GPIO(GPIO_PTB0, PTB0_DATA),
+
+	/* PTC */
+	PINMUX_GPIO(GPIO_PTC7, PTC7_DATA),
+	PINMUX_GPIO(GPIO_PTC6, PTC6_DATA),
+	PINMUX_GPIO(GPIO_PTC5, PTC5_DATA),
+	PINMUX_GPIO(GPIO_PTC4, PTC4_DATA),
+	PINMUX_GPIO(GPIO_PTC3, PTC3_DATA),
+	PINMUX_GPIO(GPIO_PTC2, PTC2_DATA),
+	PINMUX_GPIO(GPIO_PTC1, PTC1_DATA),
+	PINMUX_GPIO(GPIO_PTC0, PTC0_DATA),
+
+	/* PTD */
+	PINMUX_GPIO(GPIO_PTD7, PTD7_DATA),
+	PINMUX_GPIO(GPIO_PTD6, PTD6_DATA),
+	PINMUX_GPIO(GPIO_PTD5, PTD5_DATA),
+	PINMUX_GPIO(GPIO_PTD4, PTD4_DATA),
+	PINMUX_GPIO(GPIO_PTD3, PTD3_DATA),
+	PINMUX_GPIO(GPIO_PTD2, PTD2_DATA),
+	PINMUX_GPIO(GPIO_PTD1, PTD1_DATA),
+	PINMUX_GPIO(GPIO_PTD0, PTD0_DATA),
+
+	/* PTE */
+	PINMUX_GPIO(GPIO_PTE5, PTE5_DATA),
+	PINMUX_GPIO(GPIO_PTE4, PTE4_DATA),
+	PINMUX_GPIO(GPIO_PTE3, PTE3_DATA),
+	PINMUX_GPIO(GPIO_PTE2, PTE2_DATA),
+	PINMUX_GPIO(GPIO_PTE1, PTE1_DATA),
+	PINMUX_GPIO(GPIO_PTE0, PTE0_DATA),
+
+	/* PTF */
+	PINMUX_GPIO(GPIO_PTF7, PTF7_DATA),
+	PINMUX_GPIO(GPIO_PTF6, PTF6_DATA),
+	PINMUX_GPIO(GPIO_PTF5, PTF5_DATA),
+	PINMUX_GPIO(GPIO_PTF4, PTF4_DATA),
+	PINMUX_GPIO(GPIO_PTF3, PTF3_DATA),
+	PINMUX_GPIO(GPIO_PTF2, PTF2_DATA),
+	PINMUX_GPIO(GPIO_PTF1, PTF1_DATA),
+	PINMUX_GPIO(GPIO_PTF0, PTF0_DATA),
+
+	/* PTG */
+	PINMUX_GPIO(GPIO_PTG5, PTG5_DATA),
+	PINMUX_GPIO(GPIO_PTG4, PTG4_DATA),
+	PINMUX_GPIO(GPIO_PTG3, PTG3_DATA),
+	PINMUX_GPIO(GPIO_PTG2, PTG2_DATA),
+	PINMUX_GPIO(GPIO_PTG1, PTG1_DATA),
+	PINMUX_GPIO(GPIO_PTG0, PTG0_DATA),
+
+	/* PTH */
+	PINMUX_GPIO(GPIO_PTH7, PTH7_DATA),
+	PINMUX_GPIO(GPIO_PTH6, PTH6_DATA),
+	PINMUX_GPIO(GPIO_PTH5, PTH5_DATA),
+	PINMUX_GPIO(GPIO_PTH4, PTH4_DATA),
+	PINMUX_GPIO(GPIO_PTH3, PTH3_DATA),
+	PINMUX_GPIO(GPIO_PTH2, PTH2_DATA),
+	PINMUX_GPIO(GPIO_PTH1, PTH1_DATA),
+	PINMUX_GPIO(GPIO_PTH0, PTH0_DATA),
+
+	/* PTJ */
+	PINMUX_GPIO(GPIO_PTJ7, PTJ7_DATA),
+	PINMUX_GPIO(GPIO_PTJ5, PTJ5_DATA),
+	PINMUX_GPIO(GPIO_PTJ3, PTJ3_DATA),
+	PINMUX_GPIO(GPIO_PTJ2, PTJ2_DATA),
+	PINMUX_GPIO(GPIO_PTJ1, PTJ1_DATA),
+	PINMUX_GPIO(GPIO_PTJ0, PTJ0_DATA),
+
+	/* PTK */
+	PINMUX_GPIO(GPIO_PTK7, PTK7_DATA),
+	PINMUX_GPIO(GPIO_PTK6, PTK6_DATA),
+	PINMUX_GPIO(GPIO_PTK5, PTK5_DATA),
+	PINMUX_GPIO(GPIO_PTK4, PTK4_DATA),
+	PINMUX_GPIO(GPIO_PTK3, PTK3_DATA),
+	PINMUX_GPIO(GPIO_PTK2, PTK2_DATA),
+	PINMUX_GPIO(GPIO_PTK1, PTK1_DATA),
+	PINMUX_GPIO(GPIO_PTK0, PTK0_DATA),
+
+	/* PTL */
+	PINMUX_GPIO(GPIO_PTL7, PTL7_DATA),
+	PINMUX_GPIO(GPIO_PTL6, PTL6_DATA),
+	PINMUX_GPIO(GPIO_PTL5, PTL5_DATA),
+	PINMUX_GPIO(GPIO_PTL4, PTL4_DATA),
+	PINMUX_GPIO(GPIO_PTL3, PTL3_DATA),
+	PINMUX_GPIO(GPIO_PTL2, PTL2_DATA),
+	PINMUX_GPIO(GPIO_PTL1, PTL1_DATA),
+	PINMUX_GPIO(GPIO_PTL0, PTL0_DATA),
+
+	/* PTM */
+	PINMUX_GPIO(GPIO_PTM7, PTM7_DATA),
+	PINMUX_GPIO(GPIO_PTM6, PTM6_DATA),
+	PINMUX_GPIO(GPIO_PTM5, PTM5_DATA),
+	PINMUX_GPIO(GPIO_PTM4, PTM4_DATA),
+	PINMUX_GPIO(GPIO_PTM3, PTM3_DATA),
+	PINMUX_GPIO(GPIO_PTM2, PTM2_DATA),
+	PINMUX_GPIO(GPIO_PTM1, PTM1_DATA),
+	PINMUX_GPIO(GPIO_PTM0, PTM0_DATA),
+
+	/* PTN */
+	PINMUX_GPIO(GPIO_PTN7, PTN7_DATA),
+	PINMUX_GPIO(GPIO_PTN6, PTN6_DATA),
+	PINMUX_GPIO(GPIO_PTN5, PTN5_DATA),
+	PINMUX_GPIO(GPIO_PTN4, PTN4_DATA),
+	PINMUX_GPIO(GPIO_PTN3, PTN3_DATA),
+	PINMUX_GPIO(GPIO_PTN2, PTN2_DATA),
+	PINMUX_GPIO(GPIO_PTN1, PTN1_DATA),
+	PINMUX_GPIO(GPIO_PTN0, PTN0_DATA),
+
+	/* PTQ */
+	PINMUX_GPIO(GPIO_PTQ3, PTQ3_DATA),
+	PINMUX_GPIO(GPIO_PTQ2, PTQ2_DATA),
+	PINMUX_GPIO(GPIO_PTQ1, PTQ1_DATA),
+	PINMUX_GPIO(GPIO_PTQ0, PTQ0_DATA),
+
+	/* PTR */
+	PINMUX_GPIO(GPIO_PTR7, PTR7_DATA),
+	PINMUX_GPIO(GPIO_PTR6, PTR6_DATA),
+	PINMUX_GPIO(GPIO_PTR5, PTR5_DATA),
+	PINMUX_GPIO(GPIO_PTR4, PTR4_DATA),
+	PINMUX_GPIO(GPIO_PTR3, PTR3_DATA),
+	PINMUX_GPIO(GPIO_PTR2, PTR2_DATA),
+	PINMUX_GPIO(GPIO_PTR1, PTR1_DATA),
+	PINMUX_GPIO(GPIO_PTR0, PTR0_DATA),
+
+	/* PTS */
+	PINMUX_GPIO(GPIO_PTS7, PTS7_DATA),
+	PINMUX_GPIO(GPIO_PTS6, PTS6_DATA),
+	PINMUX_GPIO(GPIO_PTS5, PTS5_DATA),
+	PINMUX_GPIO(GPIO_PTS4, PTS4_DATA),
+	PINMUX_GPIO(GPIO_PTS3, PTS3_DATA),
+	PINMUX_GPIO(GPIO_PTS2, PTS2_DATA),
+	PINMUX_GPIO(GPIO_PTS1, PTS1_DATA),
+	PINMUX_GPIO(GPIO_PTS0, PTS0_DATA),
+
+	/* PTT */
+	PINMUX_GPIO(GPIO_PTT5, PTT5_DATA),
+	PINMUX_GPIO(GPIO_PTT4, PTT4_DATA),
+	PINMUX_GPIO(GPIO_PTT3, PTT3_DATA),
+	PINMUX_GPIO(GPIO_PTT2, PTT2_DATA),
+	PINMUX_GPIO(GPIO_PTT1, PTT1_DATA),
+	PINMUX_GPIO(GPIO_PTT0, PTT0_DATA),
+
+	/* PTU */
+	PINMUX_GPIO(GPIO_PTU5, PTU5_DATA),
+	PINMUX_GPIO(GPIO_PTU4, PTU4_DATA),
+	PINMUX_GPIO(GPIO_PTU3, PTU3_DATA),
+	PINMUX_GPIO(GPIO_PTU2, PTU2_DATA),
+	PINMUX_GPIO(GPIO_PTU1, PTU1_DATA),
+	PINMUX_GPIO(GPIO_PTU0, PTU0_DATA),
+
+	/* PTV */
+	PINMUX_GPIO(GPIO_PTV7, PTV7_DATA),
+	PINMUX_GPIO(GPIO_PTV6, PTV6_DATA),
+	PINMUX_GPIO(GPIO_PTV5, PTV5_DATA),
+	PINMUX_GPIO(GPIO_PTV4, PTV4_DATA),
+	PINMUX_GPIO(GPIO_PTV3, PTV3_DATA),
+	PINMUX_GPIO(GPIO_PTV2, PTV2_DATA),
+	PINMUX_GPIO(GPIO_PTV1, PTV1_DATA),
+	PINMUX_GPIO(GPIO_PTV0, PTV0_DATA),
+
+	/* PTW */
+	PINMUX_GPIO(GPIO_PTW7, PTW7_DATA),
+	PINMUX_GPIO(GPIO_PTW6, PTW6_DATA),
+	PINMUX_GPIO(GPIO_PTW5, PTW5_DATA),
+	PINMUX_GPIO(GPIO_PTW4, PTW4_DATA),
+	PINMUX_GPIO(GPIO_PTW3, PTW3_DATA),
+	PINMUX_GPIO(GPIO_PTW2, PTW2_DATA),
+	PINMUX_GPIO(GPIO_PTW1, PTW1_DATA),
+	PINMUX_GPIO(GPIO_PTW0, PTW0_DATA),
+
+	/* PTX */
+	PINMUX_GPIO(GPIO_PTX7, PTX7_DATA),
+	PINMUX_GPIO(GPIO_PTX6, PTX6_DATA),
+	PINMUX_GPIO(GPIO_PTX5, PTX5_DATA),
+	PINMUX_GPIO(GPIO_PTX4, PTX4_DATA),
+	PINMUX_GPIO(GPIO_PTX3, PTX3_DATA),
+	PINMUX_GPIO(GPIO_PTX2, PTX2_DATA),
+	PINMUX_GPIO(GPIO_PTX1, PTX1_DATA),
+	PINMUX_GPIO(GPIO_PTX0, PTX0_DATA),
+
+	/* PTY */
+	PINMUX_GPIO(GPIO_PTY7, PTY7_DATA),
+	PINMUX_GPIO(GPIO_PTY6, PTY6_DATA),
+	PINMUX_GPIO(GPIO_PTY5, PTY5_DATA),
+	PINMUX_GPIO(GPIO_PTY4, PTY4_DATA),
+	PINMUX_GPIO(GPIO_PTY3, PTY3_DATA),
+	PINMUX_GPIO(GPIO_PTY2, PTY2_DATA),
+	PINMUX_GPIO(GPIO_PTY1, PTY1_DATA),
+	PINMUX_GPIO(GPIO_PTY0, PTY0_DATA),
+
+	/* PTZ */
+	PINMUX_GPIO(GPIO_PTZ7, PTZ7_DATA),
+	PINMUX_GPIO(GPIO_PTZ6, PTZ6_DATA),
+	PINMUX_GPIO(GPIO_PTZ5, PTZ5_DATA),
+	PINMUX_GPIO(GPIO_PTZ4, PTZ4_DATA),
+	PINMUX_GPIO(GPIO_PTZ3, PTZ3_DATA),
+	PINMUX_GPIO(GPIO_PTZ2, PTZ2_DATA),
+	PINMUX_GPIO(GPIO_PTZ1, PTZ1_DATA),
+	PINMUX_GPIO(GPIO_PTZ0, PTZ0_DATA),
+
+	/* SCIF0 */
+	PINMUX_GPIO(GPIO_FN_SCIF0_PTT_TXD, SCIF0_PTT_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF0_PTT_RXD, SCIF0_PTT_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF0_PTT_SCK, SCIF0_PTT_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF0_PTU_TXD, SCIF0_PTU_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF0_PTU_RXD, SCIF0_PTU_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF0_PTU_SCK, SCIF0_PTU_SCK_MARK),
+
+	/* SCIF1 */
+	PINMUX_GPIO(GPIO_FN_SCIF1_PTS_TXD, SCIF1_PTS_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF1_PTS_RXD, SCIF1_PTS_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF1_PTS_SCK, SCIF1_PTS_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF1_PTV_TXD, SCIF1_PTV_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF1_PTV_RXD, SCIF1_PTV_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF1_PTV_SCK, SCIF1_PTV_SCK_MARK),
+
+	/* SCIF2 */
+	PINMUX_GPIO(GPIO_FN_SCIF2_PTT_TXD, SCIF2_PTT_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF2_PTT_RXD, SCIF2_PTT_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF2_PTT_SCK, SCIF2_PTT_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF2_PTU_TXD, SCIF2_PTU_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF2_PTU_RXD, SCIF2_PTU_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF2_PTU_SCK, SCIF2_PTU_SCK_MARK),
+
+	/* SCIF3 */
+	PINMUX_GPIO(GPIO_FN_SCIF3_PTS_TXD, SCIF3_PTS_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_PTS_RXD, SCIF3_PTS_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_PTS_SCK, SCIF3_PTS_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_PTS_RTS, SCIF3_PTS_RTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_PTS_CTS, SCIF3_PTS_CTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_PTV_TXD, SCIF3_PTV_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_PTV_RXD, SCIF3_PTV_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_PTV_SCK, SCIF3_PTV_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_PTV_RTS, SCIF3_PTV_RTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_PTV_CTS, SCIF3_PTV_CTS_MARK),
+
+	/* SCIF4 */
+	PINMUX_GPIO(GPIO_FN_SCIF4_PTE_TXD, SCIF4_PTE_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF4_PTE_RXD, SCIF4_PTE_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF4_PTE_SCK, SCIF4_PTE_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF4_PTN_TXD, SCIF4_PTN_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF4_PTN_RXD, SCIF4_PTN_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF4_PTN_SCK, SCIF4_PTN_SCK_MARK),
+
+	/* SCIF5 */
+	PINMUX_GPIO(GPIO_FN_SCIF5_PTE_TXD, SCIF5_PTE_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF5_PTE_RXD, SCIF5_PTE_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF5_PTE_SCK, SCIF5_PTE_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF5_PTN_TXD, SCIF5_PTN_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF5_PTN_RXD, SCIF5_PTN_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF5_PTN_SCK, SCIF5_PTN_SCK_MARK),
+
+	/* CEU */
+	PINMUX_GPIO(GPIO_FN_VIO_D15, VIO_D15_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D14, VIO_D14_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D13, VIO_D13_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D12, VIO_D12_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D11, VIO_D11_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D10, VIO_D10_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D9, VIO_D9_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D8, VIO_D8_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D7, VIO_D7_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D6, VIO_D6_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D5, VIO_D5_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D4, VIO_D4_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D3, VIO_D3_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D2, VIO_D2_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D1, VIO_D1_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D0, VIO_D0_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_CLK1, VIO_CLK1_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_VD1, VIO_VD1_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_HD1, VIO_HD1_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_FLD, VIO_FLD_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_CKO, VIO_CKO_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_VD2, VIO_VD2_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_HD2, VIO_HD2_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_CLK2, VIO_CLK2_MARK),
+
+	/* LCDC */
+	PINMUX_GPIO(GPIO_FN_LCDD23, LCDD23_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD22, LCDD22_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD21, LCDD21_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD20, LCDD20_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD19, LCDD19_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD18, LCDD18_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD17, LCDD17_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD16, LCDD16_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD15, LCDD15_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD14, LCDD14_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD13, LCDD13_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD12, LCDD12_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD11, LCDD11_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD10, LCDD10_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD9, LCDD9_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD8, LCDD8_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD7, LCDD7_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD6, LCDD6_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD5, LCDD5_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD4, LCDD4_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD3, LCDD3_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD2, LCDD2_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD1, LCDD1_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD0, LCDD0_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDLCLK_PTR, LCDLCLK_PTR_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDLCLK_PTW, LCDLCLK_PTW_MARK),
+	/* Main LCD */
+	PINMUX_GPIO(GPIO_FN_LCDDON, LCDDON_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDVCPWC, LCDVCPWC_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDVEPWC, LCDVEPWC_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDVSYN, LCDVSYN_MARK),
+	/* Main LCD - RGB Mode */
+	PINMUX_GPIO(GPIO_FN_LCDDCK, LCDDCK_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDHSYN, LCDHSYN_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDDISP, LCDDISP_MARK),
+	/* Main LCD - SYS Mode */
+	PINMUX_GPIO(GPIO_FN_LCDRS, LCDRS_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDCS, LCDCS_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDWR, LCDWR_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDRD, LCDRD_MARK),
+
+	/* IRQ */
+	PINMUX_GPIO(GPIO_FN_IRQ0, IRQ0_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ1, IRQ1_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ2, IRQ2_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ3, IRQ3_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ4, IRQ4_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ5, IRQ5_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ6, IRQ6_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ7, IRQ7_MARK),
+
+	/* AUD */
+	PINMUX_GPIO(GPIO_FN_AUDCK, AUDCK_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDSYNC, AUDSYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDATA3, AUDATA3_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDATA2, AUDATA2_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDATA1, AUDATA1_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDATA0, AUDATA0_MARK),
+
+	/* SDHI0 (PTD) */
+	PINMUX_GPIO(GPIO_FN_SDHI0CD_PTD, SDHI0CD_PTD_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0WP_PTD, SDHI0WP_PTD_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0D3_PTD, SDHI0D3_PTD_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0D2_PTD, SDHI0D2_PTD_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0D1_PTD, SDHI0D1_PTD_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0D0_PTD, SDHI0D0_PTD_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0CMD_PTD, SDHI0CMD_PTD_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0CLK_PTD, SDHI0CLK_PTD_MARK),
+
+	/* SDHI0 (PTS) */
+	PINMUX_GPIO(GPIO_FN_SDHI0CD_PTS, SDHI0CD_PTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0WP_PTS, SDHI0WP_PTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0D3_PTS, SDHI0D3_PTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0D2_PTS, SDHI0D2_PTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0D1_PTS, SDHI0D1_PTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0D0_PTS, SDHI0D0_PTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0CMD_PTS, SDHI0CMD_PTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0CLK_PTS, SDHI0CLK_PTS_MARK),
+
+	/* SDHI1 */
+	PINMUX_GPIO(GPIO_FN_SDHI1CD, SDHI1CD_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI1WP, SDHI1WP_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI1D3, SDHI1D3_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI1D2, SDHI1D2_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI1D1, SDHI1D1_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI1D0, SDHI1D0_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI1CMD, SDHI1CMD_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI1CLK, SDHI1CLK_MARK),
+
+	/* SIUA */
+	PINMUX_GPIO(GPIO_FN_SIUAFCK, SIUAFCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUAILR, SIUAILR_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUAIBT, SIUAIBT_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUAISLD, SIUAISLD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUAOLR, SIUAOLR_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUAOBT, SIUAOBT_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUAOSLD, SIUAOSLD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUAMCK, SIUAMCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUAISPD, SIUAISPD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUOSPD, SIUAOSPD_MARK),
+
+	/* SIUB */
+	PINMUX_GPIO(GPIO_FN_SIUBFCK, SIUBFCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUBILR, SIUBILR_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUBIBT, SIUBIBT_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUBISLD, SIUBISLD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUBOLR, SIUBOLR_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUBOBT, SIUBOBT_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUBOSLD, SIUBOSLD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUBMCK, SIUBMCK_MARK),
+
+	/* IRDA */
+	PINMUX_GPIO(GPIO_FN_IRDA_IN, IRDA_IN_MARK),
+	PINMUX_GPIO(GPIO_FN_IRDA_OUT, IRDA_OUT_MARK),
+
+	/* VOU */
+	PINMUX_GPIO(GPIO_FN_DV_CLKI, DV_CLKI_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_CLK, DV_CLK_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_HSYNC, DV_HSYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_VSYNC, DV_VSYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D15, DV_D15_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D14, DV_D14_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D13, DV_D13_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D12, DV_D12_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D11, DV_D11_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D10, DV_D10_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D9, DV_D9_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D8, DV_D8_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D7, DV_D7_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D6, DV_D6_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D5, DV_D5_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D4, DV_D4_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D3, DV_D3_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D2, DV_D2_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D1, DV_D1_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D0, DV_D0_MARK),
+
+	/* KEYSC */
+	PINMUX_GPIO(GPIO_FN_KEYIN0, KEYIN0_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYIN1, KEYIN1_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYIN2, KEYIN2_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYIN3, KEYIN3_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYIN4, KEYIN4_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYOUT0, KEYOUT0_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYOUT1, KEYOUT1_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYOUT2, KEYOUT2_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYOUT3, KEYOUT3_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYOUT4_IN6, KEYOUT4_IN6_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYOUT5_IN5, KEYOUT5_IN5_MARK),
+
+	/* MSIOF0 (PTF) */
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTF_TXD, MSIOF0_PTF_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTF_RXD, MSIOF0_PTF_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTF_MCK, MSIOF0_PTF_MCK_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTF_TSYNC, MSIOF0_PTF_TSYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTF_TSCK, MSIOF0_PTF_TSCK_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTF_RSYNC, MSIOF0_PTF_RSYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTF_RSCK, MSIOF0_PTF_RSCK_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTF_SS1, MSIOF0_PTF_SS1_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTF_SS2, MSIOF0_PTF_SS2_MARK),
+
+	/* MSIOF0 (PTT+PTX) */
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTT_TXD, MSIOF0_PTT_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTT_RXD, MSIOF0_PTT_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTX_MCK, MSIOF0_PTX_MCK_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTT_TSYNC, MSIOF0_PTT_TSYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTT_TSCK, MSIOF0_PTT_TSCK_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTT_RSYNC, MSIOF0_PTT_RSYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTT_RSCK, MSIOF0_PTT_RSCK_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTT_SS1, MSIOF0_PTT_SS1_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTT_SS2, MSIOF0_PTT_SS2_MARK),
+
+	/* MSIOF1 */
+	PINMUX_GPIO(GPIO_FN_MSIOF1_TXD, MSIOF1_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF1_RXD, MSIOF1_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF1_MCK, MSIOF1_MCK_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF1_TSYNC, MSIOF1_TSYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF1_TSCK, MSIOF1_TSCK_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF1_RSYNC, MSIOF1_RSYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF1_RSCK, MSIOF1_RSCK_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF1_SS1, MSIOF1_SS1_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF1_SS2, MSIOF1_SS2_MARK),
+
+	/* TSIF */
+	PINMUX_GPIO(GPIO_FN_TS0_SDAT, TS0_SDAT_MARK),
+	PINMUX_GPIO(GPIO_FN_TS0_SCK, TS0_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_TS0_SDEN, TS0_SDEN_MARK),
+	PINMUX_GPIO(GPIO_FN_TS0_SPSYNC, TS0_SPSYNC_MARK),
+
+	/* FLCTL */
+	PINMUX_GPIO(GPIO_FN_FCE, FCE_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF7, NAF7_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF6, NAF6_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF5, NAF5_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF4, NAF4_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF3, NAF3_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF2, NAF2_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF1, NAF1_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF0, NAF0_MARK),
+	PINMUX_GPIO(GPIO_FN_FCDE, FCDE_MARK),
+	PINMUX_GPIO(GPIO_FN_FOE, FOE_MARK),
+	PINMUX_GPIO(GPIO_FN_FSC, FSC_MARK),
+	PINMUX_GPIO(GPIO_FN_FWE, FWE_MARK),
+	PINMUX_GPIO(GPIO_FN_FRB, FRB_MARK),
+
+	/* DMAC */
+	PINMUX_GPIO(GPIO_FN_DACK1, DACK1_MARK),
+	PINMUX_GPIO(GPIO_FN_DREQ1, DREQ1_MARK),
+	PINMUX_GPIO(GPIO_FN_DACK0, DACK0_MARK),
+	PINMUX_GPIO(GPIO_FN_DREQ0, DREQ0_MARK),
+
+	/* ADC */
+	PINMUX_GPIO(GPIO_FN_AN3, AN3_MARK),
+	PINMUX_GPIO(GPIO_FN_AN2, AN2_MARK),
+	PINMUX_GPIO(GPIO_FN_AN1, AN1_MARK),
+	PINMUX_GPIO(GPIO_FN_AN0, AN0_MARK),
+	PINMUX_GPIO(GPIO_FN_ADTRG, ADTRG_MARK),
+
+	/* CPG */
+	PINMUX_GPIO(GPIO_FN_STATUS0, STATUS0_MARK),
+	PINMUX_GPIO(GPIO_FN_PDSTATUS, PDSTATUS_MARK),
+
+	/* TPU */
+	PINMUX_GPIO(GPIO_FN_TPUTO0, TPUTO0_MARK),
+	PINMUX_GPIO(GPIO_FN_TPUTO1, TPUTO1_MARK),
+	PINMUX_GPIO(GPIO_FN_TPUTO2, TPUTO2_MARK),
+	PINMUX_GPIO(GPIO_FN_TPUTO3, TPUTO3_MARK),
+
+	/* BSC */
+	PINMUX_GPIO(GPIO_FN_D31, D31_MARK),
+	PINMUX_GPIO(GPIO_FN_D30, D30_MARK),
+	PINMUX_GPIO(GPIO_FN_D29, D29_MARK),
+	PINMUX_GPIO(GPIO_FN_D28, D28_MARK),
+	PINMUX_GPIO(GPIO_FN_D27, D27_MARK),
+	PINMUX_GPIO(GPIO_FN_D26, D26_MARK),
+	PINMUX_GPIO(GPIO_FN_D25, D25_MARK),
+	PINMUX_GPIO(GPIO_FN_D24, D24_MARK),
+	PINMUX_GPIO(GPIO_FN_D23, D23_MARK),
+	PINMUX_GPIO(GPIO_FN_D22, D22_MARK),
+	PINMUX_GPIO(GPIO_FN_D21, D21_MARK),
+	PINMUX_GPIO(GPIO_FN_D20, D20_MARK),
+	PINMUX_GPIO(GPIO_FN_D19, D19_MARK),
+	PINMUX_GPIO(GPIO_FN_D18, D18_MARK),
+	PINMUX_GPIO(GPIO_FN_D17, D17_MARK),
+	PINMUX_GPIO(GPIO_FN_D16, D16_MARK),
+	PINMUX_GPIO(GPIO_FN_IOIS16, IOIS16_MARK),
+	PINMUX_GPIO(GPIO_FN_WAIT, WAIT_MARK),
+	PINMUX_GPIO(GPIO_FN_BS, BS_MARK),
+	PINMUX_GPIO(GPIO_FN_A25, A25_MARK),
+	PINMUX_GPIO(GPIO_FN_A24, A24_MARK),
+	PINMUX_GPIO(GPIO_FN_A23, A23_MARK),
+	PINMUX_GPIO(GPIO_FN_A22, A22_MARK),
+	PINMUX_GPIO(GPIO_FN_CS6B_CE1B, CS6B_CE1B_MARK),
+	PINMUX_GPIO(GPIO_FN_CS6A_CE2B, CS6A_CE2B_MARK),
+	PINMUX_GPIO(GPIO_FN_CS5B_CE1A, CS5B_CE1A_MARK),
+	PINMUX_GPIO(GPIO_FN_CS5A_CE2A, CS5A_CE2A_MARK),
+	PINMUX_GPIO(GPIO_FN_WE3_ICIOWR, WE3_ICIOWR_MARK),
+	PINMUX_GPIO(GPIO_FN_WE2_ICIORD, WE2_ICIORD_MARK),
+
+	/* ATAPI */
+	PINMUX_GPIO(GPIO_FN_IDED15, IDED15_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED14, IDED14_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED13, IDED13_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED12, IDED12_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED11, IDED11_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED10, IDED10_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED9, IDED9_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED8, IDED8_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED7, IDED7_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED6, IDED6_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED5, IDED5_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED4, IDED4_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED3, IDED3_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED2, IDED2_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED1, IDED1_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED0, IDED0_MARK),
+	PINMUX_GPIO(GPIO_FN_DIRECTION, DIRECTION_MARK),
+	PINMUX_GPIO(GPIO_FN_EXBUF_ENB, EXBUF_ENB_MARK),
+	PINMUX_GPIO(GPIO_FN_IDERST, IDERST_MARK),
+	PINMUX_GPIO(GPIO_FN_IODACK, IODACK_MARK),
+	PINMUX_GPIO(GPIO_FN_IODREQ, IODREQ_MARK),
+	PINMUX_GPIO(GPIO_FN_IDEIORDY, IDEIORDY_MARK),
+	PINMUX_GPIO(GPIO_FN_IDEINT, IDEINT_MARK),
+	PINMUX_GPIO(GPIO_FN_IDEIOWR, IDEIOWR_MARK),
+	PINMUX_GPIO(GPIO_FN_IDEIORD, IDEIORD_MARK),
+	PINMUX_GPIO(GPIO_FN_IDECS1, IDECS1_MARK),
+	PINMUX_GPIO(GPIO_FN_IDECS0, IDECS0_MARK),
+	PINMUX_GPIO(GPIO_FN_IDEA2, IDEA2_MARK),
+	PINMUX_GPIO(GPIO_FN_IDEA1, IDEA1_MARK),
+	PINMUX_GPIO(GPIO_FN_IDEA0, IDEA0_MARK),
+ };
+
+static struct pinmux_cfg_reg pinmux_config_regs[] = {
+	{ PINMUX_CFG_REG("PACR", 0xa4050100, 16, 2) {
+		PTA7_FN, PTA7_OUT, 0, PTA7_IN,
+		PTA6_FN, PTA6_OUT, 0, PTA6_IN,
+		PTA5_FN, PTA5_OUT, 0, PTA5_IN,
+		PTA4_FN, PTA4_OUT, PTA4_IN_PU, PTA4_IN,
+		PTA3_FN, PTA3_OUT, PTA3_IN_PU, PTA3_IN,
+		PTA2_FN, PTA2_OUT, PTA2_IN_PU, PTA2_IN,
+		PTA1_FN, PTA1_OUT, PTA1_IN_PU, PTA1_IN,
+		PTA0_FN, PTA0_OUT, PTA0_IN_PU, PTA0_IN }
+	},
+	{ PINMUX_CFG_REG("PBCR", 0xa4050102, 16, 2) {
+		PTB7_FN, PTB7_OUT, 0, PTB7_IN,
+		PTB6_FN, PTB6_OUT, 0, PTB6_IN,
+		PTB5_FN, PTB5_OUT, 0, PTB5_IN,
+		PTB4_FN, PTB4_OUT, 0, PTB4_IN,
+		PTB3_FN, PTB3_OUT, 0, PTB3_IN,
+		PTB2_FN, PTB2_OUT, PTB2_IN_PU, PTB2_IN,
+		PTB1_FN, PTB1_OUT, PTB1_IN_PU, PTB1_IN,
+		PTB0_FN, PTB0_OUT, 0, PTB0_IN }
+	},
+	{ PINMUX_CFG_REG("PCCR", 0xa4050104, 16, 2) {
+		PTC7_FN, PTC7_OUT, 0, PTC7_IN,
+		PTC6_FN, PTC6_OUT, 0, PTC6_IN,
+		PTC5_FN, PTC5_OUT, 0, PTC5_IN,
+		PTC4_FN, PTC4_OUT, 0, PTC4_IN,
+		PTC3_FN, PTC3_OUT, 0, PTC3_IN,
+		PTC2_FN, PTC2_OUT, 0, PTC2_IN,
+		PTC1_FN, PTC1_OUT, 0, PTC1_IN,
+		PTC0_FN, PTC0_OUT, 0, PTC0_IN }
+	},
+	{ PINMUX_CFG_REG("PDCR", 0xa4050106, 16, 2) {
+		PTD7_FN, PTD7_OUT, 0, PTD7_IN,
+		PTD6_FN, PTD6_OUT, 0, PTD6_IN,
+		PTD5_FN, PTD5_OUT, 0, PTD5_IN,
+		PTD4_FN, PTD4_OUT, 0, PTD4_IN,
+		PTD3_FN, PTD3_OUT, 0, PTD3_IN,
+		PTD2_FN, PTD2_OUT, 0, PTD2_IN,
+		PTD1_FN, PTD1_OUT, 0, PTD1_IN,
+		PTD0_FN, PTD0_OUT, 0, PTD0_IN }
+	},
+	{ PINMUX_CFG_REG("PECR", 0xa4050108, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		PTE5_FN, PTE5_OUT, 0, PTE5_IN,
+		PTE4_FN, PTE4_OUT, 0, PTE4_IN,
+		PTE3_FN, PTE3_OUT, 0, PTE3_IN,
+		PTE2_FN, PTE2_OUT, 0, PTE2_IN,
+		PTE1_FN, PTE1_OUT, 0, PTE1_IN,
+		PTE0_FN, PTE0_OUT, 0, PTE0_IN }
+	},
+	{ PINMUX_CFG_REG("PFCR", 0xa405010a, 16, 2) {
+		PTF7_FN, PTF7_OUT, 0, PTF7_IN,
+		PTF6_FN, PTF6_OUT, 0, PTF6_IN,
+		PTF5_FN, PTF5_OUT, 0, PTF5_IN,
+		PTF4_FN, PTF4_OUT, 0, PTF4_IN,
+		PTF3_FN, PTF3_OUT, 0, PTF3_IN,
+		PTF2_FN, PTF2_OUT, 0, PTF2_IN,
+		PTF1_FN, PTF1_OUT, 0, PTF1_IN,
+		PTF0_FN, PTF0_OUT, 0, PTF0_IN }
+	},
+	{ PINMUX_CFG_REG("PGCR", 0xa405010c, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		PTG5_FN, PTG5_OUT, 0, 0,
+		PTG4_FN, PTG4_OUT, 0, 0,
+		PTG3_FN, PTG3_OUT, 0, 0,
+		PTG2_FN, PTG2_OUT, 0, 0,
+		PTG1_FN, PTG1_OUT, 0, 0,
+		PTG0_FN, PTG0_OUT, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PHCR", 0xa405010e, 16, 2) {
+		PTH7_FN, PTH7_OUT, 0, PTH7_IN,
+		PTH6_FN, PTH6_OUT, 0, PTH6_IN,
+		PTH5_FN, PTH5_OUT, 0, PTH5_IN,
+		PTH4_FN, PTH4_OUT, 0, PTH4_IN,
+		PTH3_FN, PTH3_OUT, 0, PTH3_IN,
+		PTH2_FN, PTH2_OUT, 0, PTH2_IN,
+		PTH1_FN, PTH1_OUT, 0, PTH1_IN,
+		PTH0_FN, PTH0_OUT, 0, PTH0_IN }
+	},
+	{ PINMUX_CFG_REG("PJCR", 0xa4050110, 16, 2) {
+		PTJ7_FN, PTJ7_OUT, 0, 0,
+		0, 0, 0, 0,
+		PTJ5_FN, PTJ5_OUT, 0, 0,
+		0, 0, 0, 0,
+		PTJ3_FN, PTJ3_OUT, 0, PTJ3_IN,
+		PTJ2_FN, PTJ2_OUT, 0, PTJ2_IN,
+		PTJ1_FN, PTJ1_OUT, 0, PTJ1_IN,
+		PTJ0_FN, PTJ0_OUT, 0, PTJ0_IN }
+	},
+	{ PINMUX_CFG_REG("PKCR", 0xa4050112, 16, 2) {
+		PTK7_FN, PTK7_OUT, 0, PTK7_IN,
+		PTK6_FN, PTK6_OUT, 0, PTK6_IN,
+		PTK5_FN, PTK5_OUT, 0, PTK5_IN,
+		PTK4_FN, PTK4_OUT, 0, PTK4_IN,
+		PTK3_FN, PTK3_OUT, 0, PTK3_IN,
+		PTK2_FN, PTK2_OUT, 0, PTK2_IN,
+		PTK1_FN, PTK1_OUT, 0, PTK1_IN,
+		PTK0_FN, PTK0_OUT, 0, PTK0_IN }
+	},
+	{ PINMUX_CFG_REG("PLCR", 0xa4050114, 16, 2) {
+		PTL7_FN, PTL7_OUT, 0, PTL7_IN,
+		PTL6_FN, PTL6_OUT, 0, PTL6_IN,
+		PTL5_FN, PTL5_OUT, 0, PTL5_IN,
+		PTL4_FN, PTL4_OUT, 0, PTL4_IN,
+		PTL3_FN, PTL3_OUT, 0, PTL3_IN,
+		PTL2_FN, PTL2_OUT, 0, PTL2_IN,
+		PTL1_FN, PTL1_OUT, 0, PTL1_IN,
+		PTL0_FN, PTL0_OUT, 0, PTL0_IN }
+	},
+	{ PINMUX_CFG_REG("PMCR", 0xa4050116, 16, 2) {
+		PTM7_FN, PTM7_OUT, 0, PTM7_IN,
+		PTM6_FN, PTM6_OUT, 0, PTM6_IN,
+		PTM5_FN, PTM5_OUT, 0, PTM5_IN,
+		PTM4_FN, PTM4_OUT, 0, PTM4_IN,
+		PTM3_FN, PTM3_OUT, 0, PTM3_IN,
+		PTM2_FN, PTM2_OUT, 0, PTM2_IN,
+		PTM1_FN, PTM1_OUT, 0, PTM1_IN,
+		PTM0_FN, PTM0_OUT, 0, PTM0_IN }
+	},
+	{ PINMUX_CFG_REG("PNCR", 0xa4050118, 16, 2) {
+		PTN7_FN, PTN7_OUT, 0, PTN7_IN,
+		PTN6_FN, PTN6_OUT, 0, PTN6_IN,
+		PTN5_FN, PTN5_OUT, 0, PTN5_IN,
+		PTN4_FN, PTN4_OUT, 0, PTN4_IN,
+		PTN3_FN, PTN3_OUT, 0, PTN3_IN,
+		PTN2_FN, PTN2_OUT, 0, PTN2_IN,
+		PTN1_FN, PTN1_OUT, 0, PTN1_IN,
+		PTN0_FN, PTN0_OUT, 0, PTN0_IN }
+	},
+	{ PINMUX_CFG_REG("PQCR", 0xa405011a, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		PTQ3_FN, 0, 0, PTQ3_IN,
+		PTQ2_FN, 0, 0, PTQ2_IN,
+		PTQ1_FN, 0, 0, PTQ1_IN,
+		PTQ0_FN, 0, 0, PTQ0_IN }
+	},
+	{ PINMUX_CFG_REG("PRCR", 0xa405011c, 16, 2) {
+		PTR7_FN, PTR7_OUT, 0, PTR7_IN,
+		PTR6_FN, PTR6_OUT, 0, PTR6_IN,
+		PTR5_FN, PTR5_OUT, 0, PTR5_IN,
+		PTR4_FN, PTR4_OUT, 0, PTR4_IN,
+		PTR3_FN, 0, 0, PTR3_IN,
+		PTR2_FN, 0, PTR2_IN_PU, PTR2_IN,
+		PTR1_FN, PTR1_OUT, 0, PTR1_IN,
+		PTR0_FN, PTR0_OUT, 0, PTR0_IN }
+	},
+	{ PINMUX_CFG_REG("PSCR", 0xa405011e, 16, 2) {
+		PTS7_FN, PTS7_OUT, 0, PTS7_IN,
+		PTS6_FN, PTS6_OUT, 0, PTS6_IN,
+		PTS5_FN, PTS5_OUT, 0, PTS5_IN,
+		PTS4_FN, PTS4_OUT, 0, PTS4_IN,
+		PTS3_FN, PTS3_OUT, 0, PTS3_IN,
+		PTS2_FN, PTS2_OUT, 0, PTS2_IN,
+		PTS1_FN, PTS1_OUT, 0, PTS1_IN,
+		PTS0_FN, PTS0_OUT, 0, PTS0_IN }
+	},
+	{ PINMUX_CFG_REG("PTCR", 0xa4050140, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		PTT5_FN, PTT5_OUT, 0, PTT5_IN,
+		PTT4_FN, PTT4_OUT, 0, PTT4_IN,
+		PTT3_FN, PTT3_OUT, 0, PTT3_IN,
+		PTT2_FN, PTT2_OUT, 0, PTT2_IN,
+		PTT1_FN, PTT1_OUT, 0, PTT1_IN,
+		PTT0_FN, PTT0_OUT, 0, PTT0_IN }
+	},
+	{ PINMUX_CFG_REG("PUCR", 0xa4050142, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		PTU5_FN, PTU5_OUT, 0, PTU5_IN,
+		PTU4_FN, PTU4_OUT, 0, PTU4_IN,
+		PTU3_FN, PTU3_OUT, 0, PTU3_IN,
+		PTU2_FN, PTU2_OUT, 0, PTU2_IN,
+		PTU1_FN, PTU1_OUT, 0, PTU1_IN,
+		PTU0_FN, PTU0_OUT, 0, PTU0_IN }
+	},
+	{ PINMUX_CFG_REG("PVCR", 0xa4050144, 16, 2) {
+		PTV7_FN, PTV7_OUT, 0, PTV7_IN,
+		PTV6_FN, PTV6_OUT, 0, PTV6_IN,
+		PTV5_FN, PTV5_OUT, 0, PTV5_IN,
+		PTV4_FN, PTV4_OUT, 0, PTV4_IN,
+		PTV3_FN, PTV3_OUT, 0, PTV3_IN,
+		PTV2_FN, PTV2_OUT, 0, PTV2_IN,
+		PTV1_FN, PTV1_OUT, 0, PTV1_IN,
+		PTV0_FN, PTV0_OUT, 0, PTV0_IN }
+	},
+	{ PINMUX_CFG_REG("PWCR", 0xa4050146, 16, 2) {
+		PTW7_FN, PTW7_OUT, 0, PTW7_IN,
+		PTW6_FN, PTW6_OUT, 0, PTW6_IN,
+		PTW5_FN, PTW5_OUT, 0, PTW5_IN,
+		PTW4_FN, PTW4_OUT, 0, PTW4_IN,
+		PTW3_FN, PTW3_OUT, 0, PTW3_IN,
+		PTW2_FN, PTW2_OUT, 0, PTW2_IN,
+		PTW1_FN, PTW1_OUT, 0, PTW1_IN,
+		PTW0_FN, PTW0_OUT, 0, PTW0_IN }
+	},
+	{ PINMUX_CFG_REG("PXCR", 0xa4050148, 16, 2) {
+		PTX7_FN, PTX7_OUT, 0, PTX7_IN,
+		PTX6_FN, PTX6_OUT, 0, PTX6_IN,
+		PTX5_FN, PTX5_OUT, 0, PTX5_IN,
+		PTX4_FN, PTX4_OUT, 0, PTX4_IN,
+		PTX3_FN, PTX3_OUT, 0, PTX3_IN,
+		PTX2_FN, PTX2_OUT, 0, PTX2_IN,
+		PTX1_FN, PTX1_OUT, 0, PTX1_IN,
+		PTX0_FN, PTX0_OUT, 0, PTX0_IN }
+	},
+	{ PINMUX_CFG_REG("PYCR", 0xa405014a, 16, 2) {
+		PTY7_FN, PTY7_OUT, 0, PTY7_IN,
+		PTY6_FN, PTY6_OUT, 0, PTY6_IN,
+		PTY5_FN, PTY5_OUT, 0, PTY5_IN,
+		PTY4_FN, PTY4_OUT, 0, PTY4_IN,
+		PTY3_FN, PTY3_OUT, 0, PTY3_IN,
+		PTY2_FN, PTY2_OUT, 0, PTY2_IN,
+		PTY1_FN, PTY1_OUT, 0, PTY1_IN,
+		PTY0_FN, PTY0_OUT, 0, PTY0_IN }
+	},
+	{ PINMUX_CFG_REG("PZCR", 0xa405014c, 16, 2) {
+		PTZ7_FN, PTZ7_OUT, 0, PTZ7_IN,
+		PTZ6_FN, PTZ6_OUT, 0, PTZ6_IN,
+		PTZ5_FN, PTZ5_OUT, 0, PTZ5_IN,
+		PTZ4_FN, PTZ4_OUT, 0, PTZ4_IN,
+		PTZ3_FN, PTZ3_OUT, 0, PTZ3_IN,
+		PTZ2_FN, PTZ2_OUT, 0, PTZ2_IN,
+		PTZ1_FN, PTZ1_OUT, 0, PTZ1_IN,
+		PTZ0_FN, PTZ0_OUT, 0, PTZ0_IN }
+	},
+	{ PINMUX_CFG_REG("PSELA", 0xa405014e, 16, 2) {
+		PSA15_PSA14_FN1, PSA15_PSA14_FN2, 0, 0,
+		PSA13_PSA12_FN1, PSA13_PSA12_FN2, 0, 0,
+		PSA11_PSA10_FN1, PSA11_PSA10_FN2, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		PSA5_PSA4_FN1, PSA5_PSA4_FN2, PSA5_PSA4_FN3, 0,
+		PSA3_PSA2_FN1, PSA3_PSA2_FN2, 0, 0,
+		0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PSELB", 0xa4050150, 16, 2) {
+		PSB15_PSB14_FN1, PSB15_PSB14_FN2, 0, 0,
+		PSB13_PSB12_LCDC_RGB, PSB13_PSB12_LCDC_SYS, 0, 0,
+		0, 0, 0, 0,
+		PSB9_PSB8_FN1, PSB9_PSB8_FN2, PSB9_PSB8_FN3, 0,
+		PSB7_PSB6_FN1, PSB7_PSB6_FN2, 0, 0,
+		PSB5_PSB4_FN1, PSB5_PSB4_FN2, 0, 0,
+		PSB3_PSB2_FN1, PSB3_PSB2_FN2, 0, 0,
+		0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PSELC", 0xa4050152, 16, 2) {
+		PSC15_PSC14_FN1, PSC15_PSC14_FN2, 0, 0,
+		PSC13_PSC12_FN1, PSC13_PSC12_FN2, 0, 0,
+		PSC11_PSC10_FN1, PSC11_PSC10_FN2, PSC11_PSC10_FN3, 0,
+		PSC9_PSC8_FN1, PSC9_PSC8_FN2, 0, 0,
+		PSC7_PSC6_FN1, PSC7_PSC6_FN2, PSC7_PSC6_FN3, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PSELD", 0xa4050154, 16, 2) {
+		PSD15_PSD14_FN1, PSD15_PSD14_FN2, 0, 0,
+		PSD13_PSD12_FN1, PSD13_PSD12_FN2, 0, 0,
+		PSD11_PSD10_FN1, PSD11_PSD10_FN2, PSD11_PSD10_FN3, 0,
+		PSD9_PSD8_FN1, PSD9_PSD8_FN2, 0, 0,
+		PSD7_PSD6_FN1, PSD7_PSD6_FN2, 0, 0,
+		PSD5_PSD4_FN1, PSD5_PSD4_FN2, 0, 0,
+		PSD3_PSD2_FN1, PSD3_PSD2_FN2, 0, 0,
+		PSD1_PSD0_FN1, PSD1_PSD0_FN2, 0, 0 }
+	},
+	{}
+};
+
+static struct pinmux_data_reg pinmux_data_regs[] = {
+	{ PINMUX_DATA_REG("PADR", 0xa4050120, 8) {
+		PTA7_DATA, PTA6_DATA, PTA5_DATA, PTA4_DATA,
+		PTA3_DATA, PTA2_DATA, PTA1_DATA, PTA0_DATA }
+	},
+	{ PINMUX_DATA_REG("PBDR", 0xa4050122, 8) {
+		PTB7_DATA, PTB6_DATA, PTB5_DATA, PTB4_DATA,
+		PTB3_DATA, PTB2_DATA, PTB1_DATA, PTB0_DATA }
+	},
+	{ PINMUX_DATA_REG("PCDR", 0xa4050124, 8) {
+		PTC7_DATA, PTC6_DATA, PTC5_DATA, PTC4_DATA,
+		PTC3_DATA, PTC2_DATA, PTC1_DATA, PTC0_DATA }
+	},
+	{ PINMUX_DATA_REG("PDDR", 0xa4050126, 8) {
+		PTD7_DATA, PTD6_DATA, PTD5_DATA, PTD4_DATA,
+		PTD3_DATA, PTD2_DATA, PTD1_DATA, PTD0_DATA }
+	},
+	{ PINMUX_DATA_REG("PEDR", 0xa4050128, 8) {
+		0, 0, PTE5_DATA, PTE4_DATA,
+		PTE3_DATA, PTE2_DATA, PTE1_DATA, PTE0_DATA }
+	},
+	{ PINMUX_DATA_REG("PFDR", 0xa405012a, 8) {
+		PTF7_DATA, PTF6_DATA, PTF5_DATA, PTF4_DATA,
+		PTF3_DATA, PTF2_DATA, PTF1_DATA, PTF0_DATA }
+	},
+	{ PINMUX_DATA_REG("PGDR", 0xa405012c, 8) {
+		0, 0, PTG5_DATA, PTG4_DATA,
+		PTG3_DATA, PTG2_DATA, PTG1_DATA, PTG0_DATA }
+	},
+	{ PINMUX_DATA_REG("PHDR", 0xa405012e, 8) {
+		PTH7_DATA, PTH6_DATA, PTH5_DATA, PTH4_DATA,
+		PTH3_DATA, PTH2_DATA, PTH1_DATA, PTH0_DATA }
+	},
+	{ PINMUX_DATA_REG("PJDR", 0xa4050130, 8) {
+		PTJ7_DATA, 0, PTJ5_DATA, 0,
+		PTJ3_DATA, PTJ2_DATA, PTJ1_DATA, PTJ0_DATA }
+	},
+	{ PINMUX_DATA_REG("PKDR", 0xa4050132, 8) {
+		PTK7_DATA, PTK6_DATA, PTK5_DATA, PTK4_DATA,
+		PTK3_DATA, PTK2_DATA, PTK1_DATA, PTK0_DATA }
+	},
+	{ PINMUX_DATA_REG("PLDR", 0xa4050134, 8) {
+		PTL7_DATA, PTL6_DATA, PTL5_DATA, PTL4_DATA,
+		PTL3_DATA, PTL2_DATA, PTL1_DATA, PTL0_DATA }
+	},
+	{ PINMUX_DATA_REG("PMDR", 0xa4050136, 8) {
+		PTM7_DATA, PTM6_DATA, PTM5_DATA, PTM4_DATA,
+		PTM3_DATA, PTM2_DATA, PTM1_DATA, PTM0_DATA }
+	},
+	{ PINMUX_DATA_REG("PNDR", 0xa4050138, 8) {
+		PTN7_DATA, PTN6_DATA, PTN5_DATA, PTN4_DATA,
+		PTN3_DATA, PTN2_DATA, PTN1_DATA, PTN0_DATA }
+	},
+	{ PINMUX_DATA_REG("PQDR", 0xa405013a, 8) {
+		0, 0, 0, 0,
+		PTQ3_DATA, PTQ2_DATA, PTQ1_DATA, PTQ0_DATA }
+	},
+	{ PINMUX_DATA_REG("PRDR", 0xa405013c, 8) {
+		PTR7_DATA, PTR6_DATA, PTR5_DATA, PTR4_DATA,
+		PTR3_DATA, PTR2_DATA, PTR1_DATA, PTR0_DATA }
+	},
+	{ PINMUX_DATA_REG("PSDR", 0xa405013e, 8) {
+		PTS7_DATA, PTS6_DATA, PTS5_DATA, PTS4_DATA,
+		PTS3_DATA, PTS2_DATA, PTS1_DATA, PTS0_DATA }
+	},
+	{ PINMUX_DATA_REG("PTDR", 0xa4050160, 8) {
+		0, 0, PTT5_DATA, PTT4_DATA,
+		PTT3_DATA, PTT2_DATA, PTT1_DATA, PTT0_DATA }
+	},
+	{ PINMUX_DATA_REG("PUDR", 0xa4050162, 8) {
+		0, 0, PTU5_DATA, PTU4_DATA,
+		PTU3_DATA, PTU2_DATA, PTU1_DATA, PTU0_DATA }
+	},
+	{ PINMUX_DATA_REG("PVDR", 0xa4050164, 8) {
+		PTV7_DATA, PTV6_DATA, PTV5_DATA, PTV4_DATA,
+		PTV3_DATA, PTV2_DATA, PTV1_DATA, PTV0_DATA }
+	},
+	{ PINMUX_DATA_REG("PWDR", 0xa4050166, 8) {
+		PTW7_DATA, PTW6_DATA, PTW5_DATA, PTW4_DATA,
+		PTW3_DATA, PTW2_DATA, PTW1_DATA, PTW0_DATA }
+	},
+	{ PINMUX_DATA_REG("PXDR", 0xa4050168, 8) {
+		PTX7_DATA, PTX6_DATA, PTX5_DATA, PTX4_DATA,
+		PTX3_DATA, PTX2_DATA, PTX1_DATA, PTX0_DATA }
+	},
+	{ PINMUX_DATA_REG("PYDR", 0xa405016a, 8) {
+		PTY7_DATA, PTY6_DATA, PTY5_DATA, PTY4_DATA,
+		PTY3_DATA, PTY2_DATA, PTY1_DATA, PTY0_DATA }
+	},
+	{ PINMUX_DATA_REG("PZDR", 0xa405016c, 8) {
+		PTZ7_DATA, PTZ6_DATA, PTZ5_DATA, PTZ4_DATA,
+		PTZ3_DATA, PTZ2_DATA, PTZ1_DATA, PTZ0_DATA }
+	},
+	{ },
+};
+
+static struct pinmux_info sh7723_pinmux_info = {
+	.name = "sh7723_pfc",
+	.reserved_id = PINMUX_RESERVED,
+	.data = { PINMUX_DATA_BEGIN, PINMUX_DATA_END },
+	.input = { PINMUX_INPUT_BEGIN, PINMUX_INPUT_END },
+	.input_pu = { PINMUX_INPUT_PULLUP_BEGIN, PINMUX_INPUT_PULLUP_END },
+	.output = { PINMUX_OUTPUT_BEGIN, PINMUX_OUTPUT_END },
+	.mark = { PINMUX_MARK_BEGIN, PINMUX_MARK_END },
+	.function = { PINMUX_FUNCTION_BEGIN, PINMUX_FUNCTION_END },
+
+	.first_gpio = GPIO_PTA7,
+	.last_gpio = GPIO_FN_IDEA0,
+
+	.gpios = pinmux_gpios,
+	.cfg_regs = pinmux_config_regs,
+	.data_regs = pinmux_data_regs,
+
+	.gpio_data = pinmux_data,
+	.gpio_data_size = ARRAY_SIZE(pinmux_data),
+};
+
+static int __init plat_pinmux_setup(void)
+{
+	return register_pinmux(&sh7723_pinmux_info);
+}
+
+arch_initcall(plat_pinmux_setup);
-- 
GitLab


From 16587c453b90b761c6b883f1d6cc06fe3d455eee Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 8 Oct 2008 20:42:20 +0900
Subject: [PATCH 646/895] sh: Use sh7723 GPIO on AP325RXA board

This patch enables the GPIO code on AP325RXA and converts the code from
register based pinmux configuration to GPIO based pin by pin setup.
While at it 2 LEDs and one switch are added and exported to user space.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/boards/Kconfig          |   1 +
 arch/sh/boards/board-ap325rxa.c | 100 +++++++++++++++++++++++---------
 2 files changed, 75 insertions(+), 26 deletions(-)

diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig
index 4e37f5c02158..2a059191efef 100644
--- a/arch/sh/boards/Kconfig
+++ b/arch/sh/boards/Kconfig
@@ -170,6 +170,7 @@ config SH_MIGOR
 config SH_AP325RXA
 	bool "AP-325RXA"
 	depends on CPU_SUBTYPE_SH7723
+	select GENERIC_GPIO
 	help
 	  Renesas "AP-325RXA" support.
 	  Compatible with ALGO SYSTEM CO.,LTD. "AP-320A"
diff --git a/arch/sh/boards/board-ap325rxa.c b/arch/sh/boards/board-ap325rxa.c
index 7ae8dcddfeb4..1e6daa36e5a7 100644
--- a/arch/sh/boards/board-ap325rxa.c
+++ b/arch/sh/boards/board-ap325rxa.c
@@ -18,11 +18,13 @@
 #include <linux/delay.h>
 #include <linux/i2c.h>
 #include <linux/smc911x.h>
+#include <linux/gpio.h>
 #include <media/soc_camera_platform.h>
 #include <media/sh_mobile_ceu.h>
 #include <video/sh_mobile_lcdc.h>
 #include <asm/io.h>
 #include <asm/clock.h>
+#include <asm/sh7723.h>
 
 static struct smc911x_platdata smc911x_info = {
 	.flags = SMC911X_USE_32BIT,
@@ -109,17 +111,7 @@ static struct platform_device ap325rxa_nor_flash_device = {
 #define FPGA_LCDREG	0xB4100180
 #define FPGA_BKLREG	0xB4100212
 #define FPGA_LCDREG_VAL	0x0018
-#define PORT_PHCR	0xA405010E
-#define PORT_PLCR	0xA4050114
-#define PORT_PMCR	0xA4050116
-#define PORT_PRCR	0xA405011C
-#define PORT_PSCR	0xA405011E
-#define PORT_PZCR	0xA405014C
-#define PORT_HIZCRA	0xA4050158
 #define PORT_MSELCRB	0xA4050182
-#define PORT_PSDR	0xA405013E
-#define PORT_PZDR	0xA405016C
-#define PORT_PSELD	0xA4050154
 
 static void ap320_wvga_power_on(void *board_data)
 {
@@ -129,8 +121,7 @@ static void ap320_wvga_power_on(void *board_data)
 	ctrl_outw(FPGA_LCDREG_VAL, FPGA_LCDREG);
 
 	/* backlight */
-	ctrl_outw((ctrl_inw(PORT_PSCR) & ~0x00C0) | 0x40, PORT_PSCR);
-	ctrl_outb(ctrl_inb(PORT_PSDR) & ~0x08, PORT_PSDR);
+	gpio_set_value(GPIO_PTS3, 0);
 	ctrl_outw(0x100, FPGA_BKLREG);
 }
 
@@ -298,8 +289,77 @@ static struct i2c_board_info __initdata ap325rxa_i2c_devices[] = {
 
 static int __init ap325rxa_devices_setup(void)
 {
-	clk_always_enable("mstp200"); /* LCDC */
-	clk_always_enable("mstp203"); /* CEU */
+	/* LD3 and LD4 LEDs */
+	gpio_request(GPIO_PTX5, NULL); /* RUN */
+	gpio_direction_output(GPIO_PTX5, 1);
+	gpio_export(GPIO_PTX5, 0);
+
+	gpio_request(GPIO_PTX4, NULL); /* INDICATOR */
+	gpio_direction_output(GPIO_PTX4, 0);
+	gpio_export(GPIO_PTX4, 0);
+
+	/* SW1 input */
+	gpio_request(GPIO_PTF7, NULL); /* MODE */
+	gpio_direction_input(GPIO_PTF7);
+	gpio_export(GPIO_PTF7, 0);
+
+	/* LCDC */
+	clk_always_enable("mstp200");
+	gpio_request(GPIO_FN_LCDD15, NULL);
+	gpio_request(GPIO_FN_LCDD14, NULL);
+	gpio_request(GPIO_FN_LCDD13, NULL);
+	gpio_request(GPIO_FN_LCDD12, NULL);
+	gpio_request(GPIO_FN_LCDD11, NULL);
+	gpio_request(GPIO_FN_LCDD10, NULL);
+	gpio_request(GPIO_FN_LCDD9, NULL);
+	gpio_request(GPIO_FN_LCDD8, NULL);
+	gpio_request(GPIO_FN_LCDD7, NULL);
+	gpio_request(GPIO_FN_LCDD6, NULL);
+	gpio_request(GPIO_FN_LCDD5, NULL);
+	gpio_request(GPIO_FN_LCDD4, NULL);
+	gpio_request(GPIO_FN_LCDD3, NULL);
+	gpio_request(GPIO_FN_LCDD2, NULL);
+	gpio_request(GPIO_FN_LCDD1, NULL);
+	gpio_request(GPIO_FN_LCDD0, NULL);
+	gpio_request(GPIO_FN_LCDLCLK_PTR, NULL);
+	gpio_request(GPIO_FN_LCDDCK, NULL);
+	gpio_request(GPIO_FN_LCDVEPWC, NULL);
+	gpio_request(GPIO_FN_LCDVCPWC, NULL);
+	gpio_request(GPIO_FN_LCDVSYN, NULL);
+	gpio_request(GPIO_FN_LCDHSYN, NULL);
+	gpio_request(GPIO_FN_LCDDISP, NULL);
+	gpio_request(GPIO_FN_LCDDON, NULL);
+
+	/* LCD backlight */
+	gpio_request(GPIO_PTS3, NULL);
+	gpio_direction_output(GPIO_PTS3, 1);
+
+	/* CEU */
+	clk_always_enable("mstp203");
+	gpio_request(GPIO_FN_VIO_CLK2, NULL);
+	gpio_request(GPIO_FN_VIO_VD2, NULL);
+	gpio_request(GPIO_FN_VIO_HD2, NULL);
+	gpio_request(GPIO_FN_VIO_FLD, NULL);
+	gpio_request(GPIO_FN_VIO_CKO, NULL);
+	gpio_request(GPIO_FN_VIO_D15, NULL);
+	gpio_request(GPIO_FN_VIO_D14, NULL);
+	gpio_request(GPIO_FN_VIO_D13, NULL);
+	gpio_request(GPIO_FN_VIO_D12, NULL);
+	gpio_request(GPIO_FN_VIO_D11, NULL);
+	gpio_request(GPIO_FN_VIO_D10, NULL);
+	gpio_request(GPIO_FN_VIO_D9, NULL);
+	gpio_request(GPIO_FN_VIO_D8, NULL);
+
+	gpio_request(GPIO_PTZ7, NULL);
+	gpio_direction_output(GPIO_PTZ7, 0); /* OE_CAM */
+	gpio_request(GPIO_PTZ6, NULL);
+	gpio_direction_output(GPIO_PTZ6, 0); /* STBY_CAM */
+	gpio_request(GPIO_PTZ5, NULL);
+	gpio_direction_output(GPIO_PTZ5, 1); /* RST_CAM */
+	gpio_request(GPIO_PTZ4, NULL);
+	gpio_direction_output(GPIO_PTZ4, 0); /* SADDR */
+
+	ctrl_outw(ctrl_inw(PORT_MSELCRB) & ~0x0001, PORT_MSELCRB);
 
 	platform_resource_setup_memory(&ceu_device, "ceu", 4 << 20);
 
@@ -313,18 +373,6 @@ device_initcall(ap325rxa_devices_setup);
 
 static void __init ap325rxa_setup(char **cmdline_p)
 {
-	/* LCDC configuration */
-	ctrl_outw(ctrl_inw(PORT_PHCR) & ~0xffff, PORT_PHCR);
-	ctrl_outw(ctrl_inw(PORT_PLCR) & ~0xffff, PORT_PLCR);
-	ctrl_outw(ctrl_inw(PORT_PMCR) & ~0xffff, PORT_PMCR);
-	ctrl_outw(ctrl_inw(PORT_PRCR) & ~0x03ff, PORT_PRCR);
-	ctrl_outw(ctrl_inw(PORT_HIZCRA) & ~0x01C0, PORT_HIZCRA);
-
-	/* CEU */
-	ctrl_outw(ctrl_inw(PORT_MSELCRB) & ~0x0001, PORT_MSELCRB);
-	ctrl_outw(ctrl_inw(PORT_PSELD) & ~0x0003, PORT_PSELD);
-	ctrl_outw((ctrl_inw(PORT_PZCR) & ~0xff00) | 0x5500, PORT_PZCR);
-	ctrl_outb((ctrl_inb(PORT_PZDR) & ~0xf0) | 0x20, PORT_PZDR);
 }
 
 static struct sh_machine_vector mv_ap325rxa __initmv = {
-- 
GitLab


From 41e4a9a5e2fc5a43b2b81aec402720226de29f05 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 8 Oct 2008 20:42:29 +0900
Subject: [PATCH 647/895] sh: Add sh7203 pinmux code

This patch adds pinmux and gpio support for the sh7203 processor.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/sh7203.h            |  143 ++
 arch/sh/kernel/cpu/sh2a/Makefile        |    5 +
 arch/sh/kernel/cpu/sh2a/pinmux-sh7203.c | 1599 +++++++++++++++++++++++
 3 files changed, 1747 insertions(+)
 create mode 100644 arch/sh/include/asm/sh7203.h
 create mode 100644 arch/sh/kernel/cpu/sh2a/pinmux-sh7203.c

diff --git a/arch/sh/include/asm/sh7203.h b/arch/sh/include/asm/sh7203.h
new file mode 100644
index 000000000000..79f93159018d
--- /dev/null
+++ b/arch/sh/include/asm/sh7203.h
@@ -0,0 +1,143 @@
+#ifndef __ASM_SH7203_H__
+#define __ASM_SH7203_H__
+
+enum {
+	/* PA */
+	GPIO_PA7, GPIO_PA6, GPIO_PA5, GPIO_PA4,
+	GPIO_PA3, GPIO_PA2, GPIO_PA1, GPIO_PA0,
+
+	/* PB */
+	GPIO_PB12,
+	GPIO_PB11, GPIO_PB10, GPIO_PB9, GPIO_PB8,
+	GPIO_PB7, GPIO_PB6, GPIO_PB5, GPIO_PB4,
+	GPIO_PB3, GPIO_PB2, GPIO_PB1, GPIO_PB0,
+
+	/* PC */
+	GPIO_PC14, GPIO_PC13, GPIO_PC12,
+	GPIO_PC11, GPIO_PC10, GPIO_PC9, GPIO_PC8,
+	GPIO_PC7, GPIO_PC6, GPIO_PC5, GPIO_PC4,
+	GPIO_PC3, GPIO_PC2, GPIO_PC1, GPIO_PC0,
+
+	/* PD */
+	GPIO_PD15, GPIO_PD14, GPIO_PD13, GPIO_PD12,
+	GPIO_PD11, GPIO_PD10, GPIO_PD9, GPIO_PD8,
+	GPIO_PD7, GPIO_PD6, GPIO_PD5, GPIO_PD4,
+	GPIO_PD3, GPIO_PD2, GPIO_PD1, GPIO_PD0,
+
+	/* PE */
+	GPIO_PE15, GPIO_PE14, GPIO_PE13, GPIO_PE12,
+	GPIO_PE11, GPIO_PE10, GPIO_PE9, GPIO_PE8,
+	GPIO_PE7, GPIO_PE6, GPIO_PE5, GPIO_PE4,
+	GPIO_PE3, GPIO_PE2, GPIO_PE1, GPIO_PE0,
+
+	/* PF */
+	GPIO_PF30, GPIO_PF29, GPIO_PF28,
+	GPIO_PF27, GPIO_PF26, GPIO_PF25, GPIO_PF24,
+	GPIO_PF23, GPIO_PF22, GPIO_PF21, GPIO_PF20,
+	GPIO_PF19, GPIO_PF18, GPIO_PF17, GPIO_PF16,
+	GPIO_PF15, GPIO_PF14, GPIO_PF13, GPIO_PF12,
+	GPIO_PF11, GPIO_PF10, GPIO_PF9, GPIO_PF8,
+	GPIO_PF7, GPIO_PF6, GPIO_PF5, GPIO_PF4,
+	GPIO_PF3, GPIO_PF2, GPIO_PF1, GPIO_PF0,
+
+	/* INTC: IRQ and PINT on PB/PD/PE */
+	GPIO_FN_PINT7_PB, GPIO_FN_PINT6_PB, GPIO_FN_PINT5_PB, GPIO_FN_PINT4_PB,
+	GPIO_FN_PINT3_PB, GPIO_FN_PINT2_PB, GPIO_FN_PINT1_PB, GPIO_FN_PINT0_PB,
+	GPIO_FN_PINT7_PD, GPIO_FN_PINT6_PD, GPIO_FN_PINT5_PD, GPIO_FN_PINT4_PD,
+	GPIO_FN_PINT3_PD, GPIO_FN_PINT2_PD, GPIO_FN_PINT1_PD, GPIO_FN_PINT0_PD,
+	GPIO_FN_IRQ7_PB, GPIO_FN_IRQ6_PB, GPIO_FN_IRQ5_PB, GPIO_FN_IRQ4_PB,
+	GPIO_FN_IRQ3_PB, GPIO_FN_IRQ2_PB, GPIO_FN_IRQ1_PB, GPIO_FN_IRQ0_PB,
+	GPIO_FN_IRQ7_PD, GPIO_FN_IRQ6_PD, GPIO_FN_IRQ5_PD, GPIO_FN_IRQ4_PD,
+	GPIO_FN_IRQ3_PD, GPIO_FN_IRQ2_PD, GPIO_FN_IRQ1_PD, GPIO_FN_IRQ0_PD,
+	GPIO_FN_IRQ7_PE, GPIO_FN_IRQ6_PE, GPIO_FN_IRQ5_PE, GPIO_FN_IRQ4_PE,
+	GPIO_FN_IRQ3_PE, GPIO_FN_IRQ2_PE, GPIO_FN_IRQ1_PE, GPIO_FN_IRQ0_PE,
+
+	GPIO_FN_WDTOVF, GPIO_FN_IRQOUT, GPIO_FN_REFOUT, GPIO_FN_IRQOUT_REFOUT,
+	GPIO_FN_UBCTRG,
+
+	/* CAN */
+	GPIO_FN_CTX1, GPIO_FN_CRX1, GPIO_FN_CTX0, GPIO_FN_CTX0_CTX1,
+	GPIO_FN_CRX0, GPIO_FN_CRX0_CRX1,
+
+	/* IIC3 */
+	GPIO_FN_SDA3, GPIO_FN_SCL3,
+	GPIO_FN_SDA2, GPIO_FN_SCL2,
+	GPIO_FN_SDA1, GPIO_FN_SCL1,
+	GPIO_FN_SDA0, GPIO_FN_SCL0,
+
+	/* DMAC */
+	GPIO_FN_TEND0_PD, GPIO_FN_TEND0_PE, GPIO_FN_DACK0_PD,
+	GPIO_FN_DACK0_PE, GPIO_FN_DREQ0_PD, GPIO_FN_DREQ0_PE,
+	GPIO_FN_TEND1_PD, GPIO_FN_TEND1_PE, GPIO_FN_DACK1_PD,
+	GPIO_FN_DACK1_PE, GPIO_FN_DREQ1_PD, GPIO_FN_DREQ1_PE,
+	GPIO_FN_DACK2, GPIO_FN_DREQ2,
+	GPIO_FN_DACK3, GPIO_FN_DREQ3,
+
+	/* ADC */
+	GPIO_FN_ADTRG_PD, GPIO_FN_ADTRG_PE,
+
+	/* BSC */
+	GPIO_FN_D31, GPIO_FN_D30, GPIO_FN_D29, GPIO_FN_D28,
+	GPIO_FN_D27, GPIO_FN_D26, GPIO_FN_D25, GPIO_FN_D24,
+	GPIO_FN_D23, GPIO_FN_D22, GPIO_FN_D21, GPIO_FN_D20,
+	GPIO_FN_D19, GPIO_FN_D18, GPIO_FN_D17, GPIO_FN_D16,
+	GPIO_FN_A25, GPIO_FN_A24, GPIO_FN_A23, GPIO_FN_A22,
+	GPIO_FN_A21, GPIO_FN_CS4, GPIO_FN_MRES, GPIO_FN_BS,
+	GPIO_FN_IOIS16, GPIO_FN_CS1, GPIO_FN_CS6_CE1B,
+	GPIO_FN_CE2B, GPIO_FN_CS5_CE1A, GPIO_FN_CE2A,
+	GPIO_FN_FRAME, GPIO_FN_WAIT, GPIO_FN_RDWR,
+	GPIO_FN_CKE, GPIO_FN_CASU, GPIO_FN_BREQ, GPIO_FN_RASU,
+	GPIO_FN_BACK, GPIO_FN_CASL, GPIO_FN_RASL,
+	GPIO_FN_WE3_DQMUU_AH_ICIO_WR, GPIO_FN_WE2_DQMUL_ICIORD,
+	GPIO_FN_WE1_DQMLU_WE, GPIO_FN_WE0_DQMLL,
+	GPIO_FN_CS3, GPIO_FN_CS2, GPIO_FN_A1, GPIO_FN_A0, GPIO_FN_CS7,
+
+	/* TMU */
+	GPIO_FN_TIOC4D, GPIO_FN_TIOC4C, GPIO_FN_TIOC4B, GPIO_FN_TIOC4A,
+	GPIO_FN_TIOC3D, GPIO_FN_TIOC3C, GPIO_FN_TIOC3B, GPIO_FN_TIOC3A,
+	GPIO_FN_TIOC2B, GPIO_FN_TIOC1B, GPIO_FN_TIOC2A, GPIO_FN_TIOC1A,
+	GPIO_FN_TIOC0D, GPIO_FN_TIOC0C, GPIO_FN_TIOC0B, GPIO_FN_TIOC0A,
+	GPIO_FN_TCLKD_PD, GPIO_FN_TCLKC_PD, GPIO_FN_TCLKB_PD, GPIO_FN_TCLKA_PD,
+	GPIO_FN_TCLKD_PF, GPIO_FN_TCLKC_PF, GPIO_FN_TCLKB_PF, GPIO_FN_TCLKA_PF,
+
+	/* SSU */
+	GPIO_FN_SCS0_PD, GPIO_FN_SSO0_PD, GPIO_FN_SSI0_PD, GPIO_FN_SSCK0_PD,
+	GPIO_FN_SCS0_PF, GPIO_FN_SSO0_PF, GPIO_FN_SSI0_PF, GPIO_FN_SSCK0_PF,
+	GPIO_FN_SCS1_PD, GPIO_FN_SSO1_PD, GPIO_FN_SSI1_PD, GPIO_FN_SSCK1_PD,
+	GPIO_FN_SCS1_PF, GPIO_FN_SSO1_PF, GPIO_FN_SSI1_PF, GPIO_FN_SSCK1_PF,
+
+	/* SCIF */
+	GPIO_FN_TXD0, GPIO_FN_RXD0, GPIO_FN_SCK0,
+	GPIO_FN_TXD1, GPIO_FN_RXD1, GPIO_FN_SCK1,
+	GPIO_FN_TXD2, GPIO_FN_RXD2, GPIO_FN_SCK2,
+	GPIO_FN_RTS3, GPIO_FN_CTS3, GPIO_FN_TXD3, GPIO_FN_RXD3, GPIO_FN_SCK3,
+
+	/* SSI */
+	GPIO_FN_AUDIO_CLK,
+	GPIO_FN_SSIDATA3, GPIO_FN_SSIWS3, GPIO_FN_SSISCK3,
+	GPIO_FN_SSIDATA2, GPIO_FN_SSIWS2, GPIO_FN_SSISCK2,
+	GPIO_FN_SSIDATA1, GPIO_FN_SSIWS1, GPIO_FN_SSISCK1,
+	GPIO_FN_SSIDATA0, GPIO_FN_SSIWS0, GPIO_FN_SSISCK0,
+
+	/* FLCTL */
+	GPIO_FN_FCE, GPIO_FN_FRB,
+	GPIO_FN_NAF7, GPIO_FN_NAF6, GPIO_FN_NAF5, GPIO_FN_NAF4,
+	GPIO_FN_NAF3, GPIO_FN_NAF2, GPIO_FN_NAF1, GPIO_FN_NAF0,
+	GPIO_FN_FSC, GPIO_FN_FOE, GPIO_FN_FCDE, GPIO_FN_FWE,
+
+	/* LCDC */
+	GPIO_FN_LCD_VEPWC, GPIO_FN_LCD_VCPWC,
+	GPIO_FN_LCD_CLK, GPIO_FN_LCD_FLM,
+	GPIO_FN_LCD_M_DISP, GPIO_FN_LCD_CL2,
+	GPIO_FN_LCD_CL1, GPIO_FN_LCD_DON,
+	GPIO_FN_LCD_DATA15, GPIO_FN_LCD_DATA14,
+	GPIO_FN_LCD_DATA13, GPIO_FN_LCD_DATA12,
+	GPIO_FN_LCD_DATA11, GPIO_FN_LCD_DATA10,
+	GPIO_FN_LCD_DATA9, GPIO_FN_LCD_DATA8,
+	GPIO_FN_LCD_DATA7, GPIO_FN_LCD_DATA6,
+	GPIO_FN_LCD_DATA5, GPIO_FN_LCD_DATA4,
+	GPIO_FN_LCD_DATA3, GPIO_FN_LCD_DATA2,
+	GPIO_FN_LCD_DATA1, GPIO_FN_LCD_DATA0,
+};
+
+#endif /* __ASM_SH7203_H__ */
diff --git a/arch/sh/kernel/cpu/sh2a/Makefile b/arch/sh/kernel/cpu/sh2a/Makefile
index 1ab1ecf4c768..428450cc0809 100644
--- a/arch/sh/kernel/cpu/sh2a/Makefile
+++ b/arch/sh/kernel/cpu/sh2a/Makefile
@@ -12,3 +12,8 @@ obj-$(CONFIG_CPU_SUBTYPE_SH7206)	+= setup-sh7206.o clock-sh7206.o
 obj-$(CONFIG_CPU_SUBTYPE_SH7203)	+= setup-sh7203.o clock-sh7203.o
 obj-$(CONFIG_CPU_SUBTYPE_SH7263)	+= setup-sh7203.o clock-sh7203.o
 obj-$(CONFIG_CPU_SUBTYPE_MXG)		+= setup-mxg.o clock-sh7206.o
+
+# Pinmux setup
+pinmux-$(CONFIG_CPU_SUBTYPE_SH7203)	:= pinmux-sh7203.o
+
+obj-$(CONFIG_GENERIC_GPIO)	+= $(pinmux-y)
diff --git a/arch/sh/kernel/cpu/sh2a/pinmux-sh7203.c b/arch/sh/kernel/cpu/sh2a/pinmux-sh7203.c
new file mode 100644
index 000000000000..39a5b880418f
--- /dev/null
+++ b/arch/sh/kernel/cpu/sh2a/pinmux-sh7203.c
@@ -0,0 +1,1599 @@
+/*
+ * SH7203 Pinmux
+ *
+ *  Copyright (C) 2008  Magnus Damm
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/gpio.h>
+#include <asm/sh7203.h>
+
+enum {
+	PINMUX_RESERVED = 0,
+
+	PINMUX_DATA_BEGIN,
+	PA7_DATA, PA6_DATA, PA5_DATA, PA4_DATA,
+	PA3_DATA, PA2_DATA, PA1_DATA, PA0_DATA,
+	PB12_DATA,
+	PB11_DATA, PB10_DATA, PB9_DATA, PB8_DATA,
+	PB7_DATA, PB6_DATA, PB5_DATA, PB4_DATA,
+	PB3_DATA, PB2_DATA, PB1_DATA, PB0_DATA,
+	PC14_DATA, PC13_DATA, PC12_DATA,
+	PC11_DATA, PC10_DATA, PC9_DATA, PC8_DATA,
+	PC7_DATA, PC6_DATA, PC5_DATA, PC4_DATA,
+	PC3_DATA, PC2_DATA, PC1_DATA, PC0_DATA,
+	PD15_DATA, PD14_DATA, PD13_DATA, PD12_DATA,
+	PD11_DATA, PD10_DATA, PD9_DATA, PD8_DATA,
+	PD7_DATA, PD6_DATA, PD5_DATA, PD4_DATA,
+	PD3_DATA, PD2_DATA, PD1_DATA, PD0_DATA,
+	PE15_DATA, PE14_DATA, PE13_DATA, PE12_DATA,
+	PE11_DATA, PE10_DATA, PE9_DATA, PE8_DATA,
+	PE7_DATA, PE6_DATA, PE5_DATA, PE4_DATA,
+	PE3_DATA, PE2_DATA, PE1_DATA, PE0_DATA,
+	PF30_DATA, PF29_DATA, PF28_DATA,
+	PF27_DATA, PF26_DATA, PF25_DATA, PF24_DATA,
+	PF23_DATA, PF22_DATA, PF21_DATA, PF20_DATA,
+	PF19_DATA, PF18_DATA, PF17_DATA, PF16_DATA,
+	PF15_DATA, PF14_DATA, PF13_DATA, PF12_DATA,
+	PF11_DATA, PF10_DATA, PF9_DATA, PF8_DATA,
+	PF7_DATA, PF6_DATA, PF5_DATA, PF4_DATA,
+	PF3_DATA, PF2_DATA, PF1_DATA, PF0_DATA,
+	PINMUX_DATA_END,
+
+	PINMUX_INPUT_BEGIN,
+	PA7_IN, PA6_IN, PA5_IN, PA4_IN,
+	PA3_IN, PA2_IN, PA1_IN, PA0_IN,
+	PB11_IN, PB10_IN, PB9_IN, PB8_IN,
+	PB7_IN, PB6_IN, PB5_IN, PB4_IN,
+	PB3_IN, PB2_IN, PB1_IN, PB0_IN,
+	PC14_IN, PC13_IN, PC12_IN,
+	PC11_IN, PC10_IN, PC9_IN, PC8_IN,
+	PC7_IN, PC6_IN, PC5_IN, PC4_IN,
+	PC3_IN, PC2_IN, PC1_IN, PC0_IN,
+	PD15_IN, PD14_IN, PD13_IN, PD12_IN,
+	PD11_IN, PD10_IN, PD9_IN, PD8_IN,
+	PD7_IN, PD6_IN, PD5_IN, PD4_IN,
+	PD3_IN, PD2_IN, PD1_IN, PD0_IN,
+	PE15_IN, PE14_IN, PE13_IN, PE12_IN,
+	PE11_IN, PE10_IN, PE9_IN, PE8_IN,
+	PE7_IN, PE6_IN, PE5_IN, PE4_IN,
+	PE3_IN, PE2_IN, PE1_IN, PE0_IN,
+	PF30_IN, PF29_IN, PF28_IN,
+	PF27_IN, PF26_IN, PF25_IN, PF24_IN,
+	PF23_IN, PF22_IN, PF21_IN, PF20_IN,
+	PF19_IN, PF18_IN, PF17_IN, PF16_IN,
+	PF15_IN, PF14_IN, PF13_IN, PF12_IN,
+	PF11_IN, PF10_IN, PF9_IN, PF8_IN,
+	PF7_IN, PF6_IN, PF5_IN, PF4_IN,
+	PF3_IN, PF2_IN, PF1_IN, PF0_IN,
+	PINMUX_INPUT_END,
+
+	PINMUX_OUTPUT_BEGIN,
+	PB12_OUT,
+	PB11_OUT, PB10_OUT, PB9_OUT, PB8_OUT,
+	PC14_OUT, PC13_OUT, PC12_OUT,
+	PC11_OUT, PC10_OUT, PC9_OUT, PC8_OUT,
+	PC7_OUT, PC6_OUT, PC5_OUT, PC4_OUT,
+	PC3_OUT, PC2_OUT, PC1_OUT, PC0_OUT,
+	PD15_OUT, PD14_OUT, PD13_OUT, PD12_OUT,
+	PD11_OUT, PD10_OUT, PD9_OUT, PD8_OUT,
+	PD7_OUT, PD6_OUT, PD5_OUT, PD4_OUT,
+	PD3_OUT, PD2_OUT, PD1_OUT, PD0_OUT,
+	PE15_OUT, PE14_OUT, PE13_OUT, PE12_OUT,
+	PE11_OUT, PE10_OUT, PE9_OUT, PE8_OUT,
+	PE7_OUT, PE6_OUT, PE5_OUT, PE4_OUT,
+	PE3_OUT, PE2_OUT, PE1_OUT, PE0_OUT,
+	PF30_OUT, PF29_OUT, PF28_OUT,
+	PF27_OUT, PF26_OUT, PF25_OUT, PF24_OUT,
+	PF23_OUT, PF22_OUT, PF21_OUT, PF20_OUT,
+	PF19_OUT, PF18_OUT, PF17_OUT, PF16_OUT,
+	PF15_OUT, PF14_OUT, PF13_OUT, PF12_OUT,
+	PF11_OUT, PF10_OUT, PF9_OUT, PF8_OUT,
+	PF7_OUT, PF6_OUT, PF5_OUT, PF4_OUT,
+	PF3_OUT, PF2_OUT, PF1_OUT, PF0_OUT,
+	PINMUX_OUTPUT_END,
+
+	PINMUX_FUNCTION_BEGIN,
+	PB11_IOR_IN, PB11_IOR_OUT,
+	PB10_IOR_IN, PB10_IOR_OUT,
+	PB9_IOR_IN, PB9_IOR_OUT,
+	PB8_IOR_IN, PB8_IOR_OUT,
+	PB12MD_00, PB12MD_01, PB12MD_10, PB12MD_11,
+	PB11MD_0, PB11MD_1,
+	PB10MD_0, PB10MD_1,
+	PB9MD_00, PB9MD_01, PB9MD_10,
+	PB8MD_00, PB8MD_01, PB8MD_10,
+	PB7MD_00, PB7MD_01, PB7MD_10, PB7MD_11,
+	PB6MD_00, PB6MD_01, PB6MD_10, PB6MD_11,
+	PB5MD_00, PB5MD_01, PB5MD_10, PB5MD_11,
+	PB4MD_00, PB4MD_01, PB4MD_10, PB4MD_11,
+	PB3MD_00, PB3MD_01, PB3MD_10, PB3MD_11,
+	PB2MD_00, PB2MD_01, PB2MD_10, PB2MD_11,
+	PB1MD_00, PB1MD_01, PB1MD_10, PB1MD_11,
+	PB0MD_00, PB0MD_01, PB0MD_10, PB0MD_11,
+
+	PB12IRQ_00, PB12IRQ_01, PB12IRQ_10,
+
+	PC14MD_0, PC14MD_1,
+	PC13MD_0, PC13MD_1,
+	PC12MD_0, PC12MD_1,
+	PC11MD_00, PC11MD_01, PC11MD_10,
+	PC10MD_00, PC10MD_01, PC10MD_10,
+	PC9MD_0, PC9MD_1,
+	PC8MD_0, PC8MD_1,
+	PC7MD_0, PC7MD_1,
+	PC6MD_0, PC6MD_1,
+	PC5MD_0, PC5MD_1,
+	PC4MD_0, PC4MD_1,
+	PC3MD_0, PC3MD_1,
+	PC2MD_0, PC2MD_1,
+	PC1MD_0, PC1MD_1,
+	PC0MD_00, PC0MD_01, PC0MD_10,
+
+	PD15MD_000, PD15MD_001, PD15MD_010, PD15MD_100, PD15MD_101,
+	PD14MD_000, PD14MD_001, PD14MD_010, PD14MD_101,
+	PD13MD_000, PD13MD_001, PD13MD_010, PD13MD_100, PD13MD_101,
+	PD12MD_000, PD12MD_001, PD12MD_010, PD12MD_100, PD12MD_101,
+	PD11MD_000, PD11MD_001, PD11MD_010, PD11MD_100, PD11MD_101,
+	PD10MD_000, PD10MD_001, PD10MD_010, PD10MD_100, PD10MD_101,
+	PD9MD_000, PD9MD_001, PD9MD_010, PD9MD_100, PD9MD_101,
+	PD8MD_000, PD8MD_001, PD8MD_010, PD8MD_100, PD8MD_101,
+	PD7MD_000, PD7MD_001, PD7MD_010, PD7MD_011, PD7MD_100, PD7MD_101,
+	PD6MD_000, PD6MD_001, PD6MD_010, PD6MD_011, PD6MD_100, PD6MD_101,
+	PD5MD_000, PD5MD_001, PD5MD_010, PD5MD_011, PD5MD_100, PD5MD_101,
+	PD4MD_000, PD4MD_001, PD4MD_010, PD4MD_011, PD4MD_100, PD4MD_101,
+	PD3MD_000, PD3MD_001, PD3MD_010, PD3MD_011, PD3MD_100, PD3MD_101,
+	PD2MD_000, PD2MD_001, PD2MD_010, PD2MD_011, PD2MD_100, PD2MD_101,
+	PD1MD_000, PD1MD_001, PD1MD_010, PD1MD_011, PD1MD_100, PD1MD_101,
+	PD0MD_000, PD0MD_001, PD0MD_010, PD0MD_011, PD0MD_100, PD0MD_101,
+
+	PE15MD_00, PE15MD_01, PE15MD_11,
+	PE14MD_00, PE14MD_01, PE14MD_11,
+	PE13MD_00, PE13MD_11,
+	PE12MD_00, PE12MD_11,
+	PE11MD_000, PE11MD_001, PE11MD_010, PE11MD_100,
+	PE10MD_000, PE10MD_001, PE10MD_010, PE10MD_100,
+	PE9MD_00, PE9MD_01, PE9MD_10, PE9MD_11,
+	PE8MD_00, PE8MD_01, PE8MD_10, PE8MD_11,
+	PE7MD_000, PE7MD_001, PE7MD_010, PE7MD_011, PE7MD_100,
+	PE6MD_000, PE6MD_001, PE6MD_010, PE6MD_011, PE6MD_100,
+	PE5MD_000, PE5MD_001, PE5MD_010, PE5MD_011, PE5MD_100,
+	PE4MD_000, PE4MD_001, PE4MD_010, PE4MD_011, PE4MD_100,
+	PE3MD_00, PE3MD_01, PE3MD_11,
+	PE2MD_00, PE2MD_01, PE2MD_11,
+	PE1MD_00, PE1MD_01, PE1MD_10, PE1MD_11,
+	PE0MD_000, PE0MD_001, PE0MD_011, PE0MD_100,
+
+	PF30MD_0, PF30MD_1,
+	PF29MD_0, PF29MD_1,
+	PF28MD_0, PF28MD_1,
+	PF27MD_0, PF27MD_1,
+	PF26MD_0, PF26MD_1,
+	PF25MD_0, PF25MD_1,
+	PF24MD_0, PF24MD_1,
+	PF23MD_00, PF23MD_01, PF23MD_10,
+	PF22MD_00, PF22MD_01, PF22MD_10,
+	PF21MD_00, PF21MD_01, PF21MD_10,
+	PF20MD_00, PF20MD_01, PF20MD_10,
+	PF19MD_00, PF19MD_01, PF19MD_10,
+	PF18MD_00, PF18MD_01, PF18MD_10,
+	PF17MD_00, PF17MD_01, PF17MD_10,
+	PF16MD_00, PF16MD_01, PF16MD_10,
+	PF15MD_00, PF15MD_01, PF15MD_10,
+	PF14MD_00, PF14MD_01, PF14MD_10,
+	PF13MD_00, PF13MD_01, PF13MD_10,
+	PF12MD_00, PF12MD_01, PF12MD_10,
+	PF11MD_00, PF11MD_01, PF11MD_10,
+	PF10MD_00, PF10MD_01, PF10MD_10,
+	PF9MD_00, PF9MD_01, PF9MD_10,
+	PF8MD_00, PF8MD_01, PF8MD_10,
+	PF7MD_00, PF7MD_01, PF7MD_10, PF7MD_11,
+	PF6MD_00, PF6MD_01, PF6MD_10, PF6MD_11,
+	PF5MD_00, PF5MD_01, PF5MD_10, PF5MD_11,
+	PF4MD_00, PF4MD_01, PF4MD_10, PF4MD_11,
+	PF3MD_00, PF3MD_01, PF3MD_10, PF3MD_11,
+	PF2MD_00, PF2MD_01, PF2MD_10, PF2MD_11,
+	PF1MD_00, PF1MD_01, PF1MD_10, PF1MD_11,
+	PF0MD_00, PF0MD_01, PF0MD_10, PF0MD_11,
+	PINMUX_FUNCTION_END,
+
+	PINMUX_MARK_BEGIN,
+	PINT7_PB_MARK, PINT6_PB_MARK, PINT5_PB_MARK, PINT4_PB_MARK,
+	PINT3_PB_MARK, PINT2_PB_MARK, PINT1_PB_MARK, PINT0_PB_MARK,
+	PINT7_PD_MARK, PINT6_PD_MARK, PINT5_PD_MARK, PINT4_PD_MARK,
+	PINT3_PD_MARK, PINT2_PD_MARK, PINT1_PD_MARK, PINT0_PD_MARK,
+	IRQ7_PB_MARK, IRQ6_PB_MARK, IRQ5_PB_MARK, IRQ4_PB_MARK,
+	IRQ3_PB_MARK, IRQ2_PB_MARK, IRQ1_PB_MARK, IRQ0_PB_MARK,
+	IRQ7_PD_MARK, IRQ6_PD_MARK, IRQ5_PD_MARK, IRQ4_PD_MARK,
+	IRQ3_PD_MARK, IRQ2_PD_MARK, IRQ1_PD_MARK, IRQ0_PD_MARK,
+	IRQ7_PE_MARK, IRQ6_PE_MARK, IRQ5_PE_MARK, IRQ4_PE_MARK,
+	IRQ3_PE_MARK, IRQ2_PE_MARK, IRQ1_PE_MARK, IRQ0_PE_MARK,
+	WDTOVF_MARK, IRQOUT_MARK, REFOUT_MARK, IRQOUT_REFOUT_MARK,
+	UBCTRG_MARK,
+	CTX1_MARK, CRX1_MARK, CTX0_MARK, CTX0_CTX1_MARK,
+	CRX0_MARK, CRX0_CRX1_MARK,
+	SDA3_MARK, SCL3_MARK,
+	SDA2_MARK, SCL2_MARK,
+	SDA1_MARK, SCL1_MARK,
+	SDA0_MARK, SCL0_MARK,
+	TEND0_PD_MARK, TEND0_PE_MARK, DACK0_PD_MARK, DACK0_PE_MARK,
+	DREQ0_PD_MARK, DREQ0_PE_MARK, TEND1_PD_MARK, TEND1_PE_MARK,
+	DACK1_PD_MARK, DACK1_PE_MARK, DREQ1_PD_MARK, DREQ1_PE_MARK,
+	DACK2_MARK, DREQ2_MARK, DACK3_MARK, DREQ3_MARK,
+	ADTRG_PD_MARK, ADTRG_PE_MARK,
+	D31_MARK, D30_MARK, D29_MARK, D28_MARK,
+	D27_MARK, D26_MARK, D25_MARK, D24_MARK,
+	D23_MARK, D22_MARK, D21_MARK, D20_MARK,
+	D19_MARK, D18_MARK, D17_MARK, D16_MARK,
+	A25_MARK, A24_MARK, A23_MARK, A22_MARK,
+	A21_MARK, CS4_MARK, MRES_MARK, BS_MARK,
+	IOIS16_MARK, CS1_MARK, CS6_CE1B_MARK, CE2B_MARK,
+	CS5_CE1A_MARK, CE2A_MARK, FRAME_MARK, WAIT_MARK,
+	RDWR_MARK, CKE_MARK, CASU_MARK,	BREQ_MARK,
+	RASU_MARK, BACK_MARK, CASL_MARK, RASL_MARK,
+	WE3_DQMUU_AH_ICIO_WR_MARK, WE2_DQMUL_ICIORD_MARK,
+	WE1_DQMLU_WE_MARK, WE0_DQMLL_MARK,
+	CS3_MARK, CS2_MARK, A1_MARK, A0_MARK, CS7_MARK,
+	TIOC4D_MARK, TIOC4C_MARK, TIOC4B_MARK, TIOC4A_MARK,
+	TIOC3D_MARK, TIOC3C_MARK, TIOC3B_MARK, TIOC3A_MARK,
+	TIOC2B_MARK, TIOC1B_MARK, TIOC2A_MARK, TIOC1A_MARK,
+	TIOC0D_MARK, TIOC0C_MARK, TIOC0B_MARK, TIOC0A_MARK,
+	TCLKD_PD_MARK, TCLKC_PD_MARK, TCLKB_PD_MARK, TCLKA_PD_MARK,
+	TCLKD_PF_MARK, TCLKC_PF_MARK, TCLKB_PF_MARK, TCLKA_PF_MARK,
+	SCS0_PD_MARK, SSO0_PD_MARK, SSI0_PD_MARK, SSCK0_PD_MARK,
+	SCS0_PF_MARK, SSO0_PF_MARK, SSI0_PF_MARK, SSCK0_PF_MARK,
+	SCS1_PD_MARK, SSO1_PD_MARK, SSI1_PD_MARK, SSCK1_PD_MARK,
+	SCS1_PF_MARK, SSO1_PF_MARK, SSI1_PF_MARK, SSCK1_PF_MARK,
+	TXD0_MARK, RXD0_MARK, SCK0_MARK,
+	TXD1_MARK, RXD1_MARK, SCK1_MARK,
+	TXD2_MARK, RXD2_MARK, SCK2_MARK,
+	RTS3_MARK, CTS3_MARK, TXD3_MARK,
+	RXD3_MARK, SCK3_MARK,
+	AUDIO_CLK_MARK,
+	SSIDATA3_MARK, SSIWS3_MARK, SSISCK3_MARK,
+	SSIDATA2_MARK, SSIWS2_MARK, SSISCK2_MARK,
+	SSIDATA1_MARK, SSIWS1_MARK, SSISCK1_MARK,
+	SSIDATA0_MARK, SSIWS0_MARK, SSISCK0_MARK,
+	FCE_MARK, FRB_MARK,
+	NAF7_MARK, NAF6_MARK, NAF5_MARK, NAF4_MARK,
+	NAF3_MARK, NAF2_MARK, NAF1_MARK, NAF0_MARK,
+	FSC_MARK, FOE_MARK, FCDE_MARK, FWE_MARK,
+	LCD_VEPWC_MARK, LCD_VCPWC_MARK,	LCD_CLK_MARK, LCD_FLM_MARK,
+	LCD_M_DISP_MARK, LCD_CL2_MARK, LCD_CL1_MARK, LCD_DON_MARK,
+	LCD_DATA15_MARK, LCD_DATA14_MARK, LCD_DATA13_MARK, LCD_DATA12_MARK,
+	LCD_DATA11_MARK, LCD_DATA10_MARK, LCD_DATA9_MARK, LCD_DATA8_MARK,
+	LCD_DATA7_MARK, LCD_DATA6_MARK, LCD_DATA5_MARK, LCD_DATA4_MARK,
+	LCD_DATA3_MARK, LCD_DATA2_MARK, LCD_DATA1_MARK, LCD_DATA0_MARK,
+	PINMUX_MARK_END,
+};
+
+static pinmux_enum_t pinmux_data[] = {
+
+	/* PA */
+	PINMUX_DATA(PA7_DATA, PA7_IN),
+	PINMUX_DATA(PA6_DATA, PA6_IN),
+	PINMUX_DATA(PA5_DATA, PA5_IN),
+	PINMUX_DATA(PA4_DATA, PA4_IN),
+	PINMUX_DATA(PA3_DATA, PA3_IN),
+	PINMUX_DATA(PA2_DATA, PA2_IN),
+	PINMUX_DATA(PA1_DATA, PA1_IN),
+	PINMUX_DATA(PA0_DATA, PA0_IN),
+
+	/* PB */
+	PINMUX_DATA(PB12_DATA, PB12MD_00, PB12_OUT),
+	PINMUX_DATA(WDTOVF_MARK, PB12MD_01),
+	PINMUX_DATA(IRQOUT_MARK, PB12MD_10, PB12IRQ_00),
+	PINMUX_DATA(REFOUT_MARK, PB12MD_10, PB12IRQ_01),
+	PINMUX_DATA(IRQOUT_REFOUT_MARK, PB12MD_10, PB12IRQ_10),
+	PINMUX_DATA(UBCTRG_MARK, PB12MD_11),
+
+	PINMUX_DATA(PB11_DATA, PB11MD_0, PB11_IN, PB11_OUT),
+	PINMUX_DATA(CTX1_MARK, PB11MD_1),
+
+	PINMUX_DATA(PB10_DATA, PB10MD_0, PB10_IN, PB10_OUT),
+	PINMUX_DATA(CRX1_MARK, PB10MD_1),
+
+	PINMUX_DATA(PB9_DATA, PB9MD_00, PB9_IN, PB9_OUT),
+	PINMUX_DATA(CTX0_MARK, PB9MD_01),
+	PINMUX_DATA(CTX0_CTX1_MARK, PB9MD_10),
+
+	PINMUX_DATA(PB8_DATA, PB8MD_00, PB8_IN, PB8_OUT),
+	PINMUX_DATA(CRX0_MARK, PB8MD_01),
+	PINMUX_DATA(CRX0_CRX1_MARK, PB8MD_10),
+
+	PINMUX_DATA(PB7_DATA, PB7MD_00, PB7_IN),
+	PINMUX_DATA(SDA3_MARK, PB7MD_01),
+	PINMUX_DATA(PINT7_PB_MARK, PB7MD_10),
+	PINMUX_DATA(IRQ7_PB_MARK, PB7MD_11),
+
+	PINMUX_DATA(PB6_DATA, PB6MD_00, PB6_IN),
+	PINMUX_DATA(SCL3_MARK, PB6MD_01),
+	PINMUX_DATA(PINT6_PB_MARK, PB6MD_10),
+	PINMUX_DATA(IRQ6_PB_MARK, PB6MD_11),
+
+	PINMUX_DATA(PB5_DATA, PB5MD_00, PB5_IN),
+	PINMUX_DATA(SDA2_MARK, PB6MD_01),
+	PINMUX_DATA(PINT5_PB_MARK, PB6MD_10),
+	PINMUX_DATA(IRQ5_PB_MARK, PB6MD_11),
+
+	PINMUX_DATA(PB4_DATA, PB4MD_00, PB4_IN),
+	PINMUX_DATA(SCL2_MARK, PB4MD_01),
+	PINMUX_DATA(PINT4_PB_MARK, PB4MD_10),
+	PINMUX_DATA(IRQ4_PB_MARK, PB4MD_11),
+
+	PINMUX_DATA(PB3_DATA, PB3MD_00, PB3_IN),
+	PINMUX_DATA(SDA1_MARK, PB3MD_01),
+	PINMUX_DATA(PINT3_PB_MARK, PB3MD_10),
+	PINMUX_DATA(IRQ3_PB_MARK, PB3MD_11),
+
+	PINMUX_DATA(PB2_DATA, PB2MD_00, PB2_IN),
+	PINMUX_DATA(SCL1_MARK, PB2MD_01),
+	PINMUX_DATA(PINT2_PB_MARK, PB2MD_10),
+	PINMUX_DATA(IRQ2_PB_MARK, PB2MD_11),
+
+	PINMUX_DATA(PB1_DATA, PB1MD_00, PB1_IN),
+	PINMUX_DATA(SDA0_MARK, PB1MD_01),
+	PINMUX_DATA(PINT1_PB_MARK, PB1MD_10),
+	PINMUX_DATA(IRQ1_PB_MARK, PB1MD_11),
+
+	PINMUX_DATA(PB0_DATA, PB0MD_00, PB0_IN),
+	PINMUX_DATA(SCL0_MARK, PB0MD_01),
+	PINMUX_DATA(PINT0_PB_MARK, PB0MD_10),
+	PINMUX_DATA(IRQ0_PB_MARK, PB0MD_11),
+
+	/* PC */
+	PINMUX_DATA(PC14_DATA, PC14MD_0, PC14_IN, PC14_OUT),
+	PINMUX_DATA(WAIT_MARK, PC14MD_1),
+
+	PINMUX_DATA(PC13_DATA, PC13MD_0, PC13_IN, PC13_OUT),
+	PINMUX_DATA(RDWR_MARK, PC13MD_1),
+
+	PINMUX_DATA(PC12_DATA, PC12MD_0, PC12_IN, PC12_OUT),
+	PINMUX_DATA(CKE_MARK, PC12MD_1),
+
+	PINMUX_DATA(PC11_DATA, PC11MD_00, PC11_IN, PC11_OUT),
+	PINMUX_DATA(CASU_MARK, PC11MD_01),
+	PINMUX_DATA(BREQ_MARK, PC11MD_10),
+
+	PINMUX_DATA(PC10_DATA, PC10MD_00, PC10_IN, PC10_OUT),
+	PINMUX_DATA(RASU_MARK, PC10MD_01),
+	PINMUX_DATA(BACK_MARK, PC10MD_10),
+
+	PINMUX_DATA(PC9_DATA, PC9MD_0, PC9_IN, PC9_OUT),
+	PINMUX_DATA(CASL_MARK, PC9MD_1),
+
+	PINMUX_DATA(PC8_DATA, PC8MD_0, PC8_IN, PC8_OUT),
+	PINMUX_DATA(RASL_MARK, PC8MD_1),
+
+	PINMUX_DATA(PC7_DATA, PC7MD_0, PC7_IN, PC7_OUT),
+	PINMUX_DATA(WE3_DQMUU_AH_ICIO_WR_MARK, PC7MD_1),
+
+	PINMUX_DATA(PC6_DATA, PC6MD_0, PC6_IN, PC6_OUT),
+	PINMUX_DATA(WE2_DQMUL_ICIORD_MARK, PC6MD_1),
+
+	PINMUX_DATA(PC5_DATA, PC5MD_0, PC5_IN, PC5_OUT),
+	PINMUX_DATA(WE1_DQMLU_WE_MARK, PC5MD_1),
+
+	PINMUX_DATA(PC4_DATA, PC4MD_0, PC4_IN, PC4_OUT),
+	PINMUX_DATA(WE0_DQMLL_MARK, PC4MD_1),
+
+	PINMUX_DATA(PC3_DATA, PC3MD_0, PC3_IN, PC3_OUT),
+	PINMUX_DATA(CS3_MARK, PC3MD_1),
+
+	PINMUX_DATA(PC2_DATA, PC2MD_0, PC2_IN, PC2_OUT),
+	PINMUX_DATA(CS2_MARK, PC2MD_1),
+
+	PINMUX_DATA(PC1_DATA, PC1MD_0, PC1_IN, PC1_OUT),
+	PINMUX_DATA(A1_MARK, PC1MD_1),
+
+	PINMUX_DATA(PC0_DATA, PC0MD_00, PC0_IN, PC0_OUT),
+	PINMUX_DATA(A0_MARK, PC0MD_01),
+	PINMUX_DATA(CS7_MARK, PC0MD_10),
+
+	/* PD */
+	PINMUX_DATA(PD15_DATA, PD15MD_000, PD15_IN, PD15_OUT),
+	PINMUX_DATA(D31_MARK, PD15MD_001),
+	PINMUX_DATA(PINT7_PD_MARK, PD15MD_010),
+	PINMUX_DATA(ADTRG_PD_MARK, PD15MD_100),
+	PINMUX_DATA(TIOC4D_MARK, PD15MD_101),
+
+	PINMUX_DATA(PD14_DATA, PD14MD_000, PD14_IN, PD14_OUT),
+	PINMUX_DATA(D30_MARK, PD14MD_001),
+	PINMUX_DATA(PINT6_PD_MARK, PD14MD_010),
+	PINMUX_DATA(TIOC4C_MARK, PD14MD_101),
+
+	PINMUX_DATA(PD13_DATA, PD13MD_000, PD13_IN, PD13_OUT),
+	PINMUX_DATA(D29_MARK, PD13MD_001),
+	PINMUX_DATA(PINT5_PD_MARK, PD13MD_010),
+	PINMUX_DATA(TEND1_PD_MARK, PD13MD_100),
+	PINMUX_DATA(TIOC4B_MARK, PD13MD_101),
+
+	PINMUX_DATA(PD12_DATA, PD12MD_000, PD12_IN, PD12_OUT),
+	PINMUX_DATA(D28_MARK, PD12MD_001),
+	PINMUX_DATA(PINT4_PD_MARK, PD12MD_010),
+	PINMUX_DATA(DACK1_PD_MARK, PD12MD_100),
+	PINMUX_DATA(TIOC4A_MARK, PD12MD_101),
+
+	PINMUX_DATA(PD11_DATA, PD11MD_000, PD11_IN, PD11_OUT),
+	PINMUX_DATA(D27_MARK, PD11MD_001),
+	PINMUX_DATA(PINT3_PD_MARK, PD11MD_010),
+	PINMUX_DATA(DREQ1_PD_MARK, PD11MD_100),
+	PINMUX_DATA(TIOC3D_MARK, PD11MD_101),
+
+	PINMUX_DATA(PD10_DATA, PD10MD_000, PD10_IN, PD10_OUT),
+	PINMUX_DATA(D26_MARK, PD10MD_001),
+	PINMUX_DATA(PINT2_PD_MARK, PD10MD_010),
+	PINMUX_DATA(TEND0_PD_MARK, PD10MD_100),
+	PINMUX_DATA(TIOC3C_MARK, PD10MD_101),
+
+	PINMUX_DATA(PD9_DATA, PD9MD_000, PD9_IN, PD9_OUT),
+	PINMUX_DATA(D25_MARK, PD9MD_001),
+	PINMUX_DATA(PINT1_PD_MARK, PD9MD_010),
+	PINMUX_DATA(DACK0_PD_MARK, PD9MD_100),
+	PINMUX_DATA(TIOC3B_MARK, PD9MD_101),
+
+	PINMUX_DATA(PD8_DATA, PD8MD_000, PD8_IN, PD8_OUT),
+	PINMUX_DATA(D24_MARK, PD8MD_001),
+	PINMUX_DATA(PINT0_PD_MARK, PD8MD_010),
+	PINMUX_DATA(DREQ0_PD_MARK, PD8MD_100),
+	PINMUX_DATA(TIOC3A_MARK, PD8MD_101),
+
+	PINMUX_DATA(PD7_DATA, PD7MD_000, PD7_IN, PD7_OUT),
+	PINMUX_DATA(D23_MARK, PD7MD_001),
+	PINMUX_DATA(IRQ7_PD_MARK, PD7MD_010),
+	PINMUX_DATA(SCS1_PD_MARK, PD7MD_011),
+	PINMUX_DATA(TCLKD_PD_MARK, PD7MD_100),
+	PINMUX_DATA(TIOC2B_MARK, PD7MD_101),
+
+	PINMUX_DATA(PD6_DATA, PD6MD_000, PD6_IN, PD6_OUT),
+	PINMUX_DATA(D22_MARK, PD6MD_001),
+	PINMUX_DATA(IRQ6_PD_MARK, PD6MD_010),
+	PINMUX_DATA(SSO1_PD_MARK, PD6MD_011),
+	PINMUX_DATA(TCLKC_PD_MARK, PD6MD_100),
+	PINMUX_DATA(TIOC2A_MARK, PD6MD_101),
+
+	PINMUX_DATA(PD5_DATA, PD5MD_000, PD5_IN, PD5_OUT),
+	PINMUX_DATA(D21_MARK, PD5MD_001),
+	PINMUX_DATA(IRQ5_PD_MARK, PD5MD_010),
+	PINMUX_DATA(SSI1_PD_MARK, PD5MD_011),
+	PINMUX_DATA(TCLKB_PD_MARK, PD5MD_100),
+	PINMUX_DATA(TIOC1B_MARK, PD5MD_101),
+
+	PINMUX_DATA(PD4_DATA, PD4MD_000, PD4_IN, PD4_OUT),
+	PINMUX_DATA(D20_MARK, PD4MD_001),
+	PINMUX_DATA(IRQ4_PD_MARK, PD4MD_010),
+	PINMUX_DATA(SSCK1_PD_MARK, PD4MD_011),
+	PINMUX_DATA(TCLKA_PD_MARK, PD4MD_100),
+	PINMUX_DATA(TIOC1A_MARK, PD4MD_101),
+
+	PINMUX_DATA(PD3_DATA, PD3MD_000, PD3_IN, PD3_OUT),
+	PINMUX_DATA(D19_MARK, PD3MD_001),
+	PINMUX_DATA(IRQ3_PD_MARK, PD3MD_010),
+	PINMUX_DATA(SCS0_PD_MARK, PD3MD_011),
+	PINMUX_DATA(DACK3_MARK, PD3MD_100),
+	PINMUX_DATA(TIOC0D_MARK, PD3MD_101),
+
+	PINMUX_DATA(PD2_DATA, PD2MD_000, PD2_IN, PD2_OUT),
+	PINMUX_DATA(D18_MARK, PD2MD_001),
+	PINMUX_DATA(IRQ2_PD_MARK, PD2MD_010),
+	PINMUX_DATA(SSO0_PD_MARK, PD2MD_011),
+	PINMUX_DATA(DREQ3_MARK, PD2MD_100),
+	PINMUX_DATA(TIOC0C_MARK, PD2MD_101),
+
+	PINMUX_DATA(PD1_DATA, PD1MD_000, PD1_IN, PD1_OUT),
+	PINMUX_DATA(D17_MARK, PD1MD_001),
+	PINMUX_DATA(IRQ1_PD_MARK, PD1MD_010),
+	PINMUX_DATA(SSI0_PD_MARK, PD1MD_011),
+	PINMUX_DATA(DACK2_MARK, PD1MD_100),
+	PINMUX_DATA(TIOC0B_MARK, PD1MD_101),
+
+	PINMUX_DATA(PD0_DATA, PD0MD_000, PD0_IN, PD0_OUT),
+	PINMUX_DATA(D16_MARK, PD0MD_001),
+	PINMUX_DATA(IRQ0_PD_MARK, PD0MD_010),
+	PINMUX_DATA(SSCK0_PD_MARK, PD0MD_011),
+	PINMUX_DATA(DREQ2_MARK, PD0MD_100),
+	PINMUX_DATA(TIOC0A_MARK, PD0MD_101),
+
+	/* PE */
+	PINMUX_DATA(PE15_DATA, PE15MD_00, PE15_IN, PE15_OUT),
+	PINMUX_DATA(IOIS16_MARK, PE15MD_01),
+	PINMUX_DATA(RTS3_MARK, PE15MD_11),
+
+	PINMUX_DATA(PE14_DATA, PE14MD_00, PE14_IN, PE14_OUT),
+	PINMUX_DATA(CS1_MARK, PE14MD_01),
+	PINMUX_DATA(CTS3_MARK, PE14MD_11),
+
+	PINMUX_DATA(PE13_DATA, PE13MD_00, PE13_IN, PE13_OUT),
+	PINMUX_DATA(TXD3_MARK, PE13MD_11),
+
+	PINMUX_DATA(PE12_DATA, PE12MD_00, PE12_IN, PE12_OUT),
+	PINMUX_DATA(RXD3_MARK, PE12MD_11),
+
+	PINMUX_DATA(PE11_DATA, PE11MD_000, PE11_IN, PE11_OUT),
+	PINMUX_DATA(CS6_CE1B_MARK, PE11MD_001),
+	PINMUX_DATA(IRQ7_PE_MARK, PE11MD_010),
+	PINMUX_DATA(TEND1_PE_MARK, PE11MD_100),
+
+	PINMUX_DATA(PE10_DATA, PE10MD_000, PE10_IN, PE10_OUT),
+	PINMUX_DATA(CE2B_MARK, PE10MD_001),
+	PINMUX_DATA(IRQ6_PE_MARK, PE10MD_010),
+	PINMUX_DATA(TEND0_PE_MARK, PE10MD_100),
+
+	PINMUX_DATA(PE9_DATA, PE9MD_00, PE9_IN, PE9_OUT),
+	PINMUX_DATA(CS5_CE1A_MARK, PE9MD_01),
+	PINMUX_DATA(IRQ5_PE_MARK, PE9MD_10),
+	PINMUX_DATA(SCK3_MARK, PE9MD_11),
+
+	PINMUX_DATA(PE8_DATA, PE8MD_00, PE8_IN, PE8_OUT),
+	PINMUX_DATA(CE2A_MARK, PE8MD_01),
+	PINMUX_DATA(IRQ4_PE_MARK, PE8MD_10),
+	PINMUX_DATA(SCK2_MARK, PE8MD_11),
+
+	PINMUX_DATA(PE7_DATA, PE7MD_000, PE7_IN, PE7_OUT),
+	PINMUX_DATA(FRAME_MARK, PE7MD_001),
+	PINMUX_DATA(IRQ3_PE_MARK, PE7MD_010),
+	PINMUX_DATA(TXD2_MARK, PE7MD_011),
+	PINMUX_DATA(DACK1_PE_MARK, PE7MD_100),
+
+	PINMUX_DATA(PE6_DATA, PE6MD_000, PE6_IN, PE6_OUT),
+	PINMUX_DATA(A25_MARK, PE6MD_001),
+	PINMUX_DATA(IRQ2_PE_MARK, PE6MD_010),
+	PINMUX_DATA(RXD2_MARK, PE6MD_011),
+	PINMUX_DATA(DREQ1_PE_MARK, PE6MD_100),
+
+	PINMUX_DATA(PE5_DATA, PE5MD_000, PE5_IN, PE5_OUT),
+	PINMUX_DATA(A24_MARK, PE5MD_001),
+	PINMUX_DATA(IRQ1_PE_MARK, PE5MD_010),
+	PINMUX_DATA(TXD1_MARK, PE5MD_011),
+	PINMUX_DATA(DACK0_PE_MARK, PE5MD_100),
+
+	PINMUX_DATA(PE4_DATA, PE4MD_000, PE4_IN, PE4_OUT),
+	PINMUX_DATA(A23_MARK, PE4MD_001),
+	PINMUX_DATA(IRQ0_PE_MARK, PE4MD_010),
+	PINMUX_DATA(RXD1_MARK, PE4MD_011),
+	PINMUX_DATA(DREQ0_PE_MARK, PE4MD_100),
+
+	PINMUX_DATA(PE3_DATA, PE3MD_00, PE3_IN, PE3_OUT),
+	PINMUX_DATA(A22_MARK, PE3MD_01),
+	PINMUX_DATA(SCK1_MARK, PE3MD_11),
+
+	PINMUX_DATA(PE2_DATA, PE2MD_00, PE2_IN, PE2_OUT),
+	PINMUX_DATA(A21_MARK, PE2MD_01),
+	PINMUX_DATA(SCK0_MARK, PE2MD_11),
+
+	PINMUX_DATA(PE1_DATA, PE1MD_00, PE1_IN, PE1_OUT),
+	PINMUX_DATA(CS4_MARK, PE1MD_01),
+	PINMUX_DATA(MRES_MARK, PE1MD_10),
+	PINMUX_DATA(TXD0_MARK, PE1MD_11),
+
+	PINMUX_DATA(PE0_DATA, PE0MD_000, PE0_IN, PE0_OUT),
+	PINMUX_DATA(BS_MARK, PE0MD_001),
+	PINMUX_DATA(RXD0_MARK, PE0MD_011),
+	PINMUX_DATA(ADTRG_PE_MARK, PE0MD_100),
+
+	/* PF */
+	PINMUX_DATA(PF30_DATA, PF30MD_0, PF30_IN, PF30_OUT),
+	PINMUX_DATA(AUDIO_CLK_MARK, PF30MD_1),
+
+	PINMUX_DATA(PF29_DATA, PF29MD_0, PF29_IN, PF29_OUT),
+	PINMUX_DATA(SSIDATA3_MARK, PF29MD_1),
+
+	PINMUX_DATA(PF28_DATA, PF28MD_0, PF28_IN, PF28_OUT),
+	PINMUX_DATA(SSIWS3_MARK, PF28MD_1),
+
+	PINMUX_DATA(PF27_DATA, PF27MD_0, PF27_IN, PF27_OUT),
+	PINMUX_DATA(SSISCK3_MARK, PF27MD_1),
+
+	PINMUX_DATA(PF26_DATA, PF26MD_0, PF26_IN, PF26_OUT),
+	PINMUX_DATA(SSIDATA2_MARK, PF26MD_1),
+
+	PINMUX_DATA(PF25_DATA, PF25MD_0, PF25_IN, PF25_OUT),
+	PINMUX_DATA(SSIWS2_MARK, PF25MD_1),
+
+	PINMUX_DATA(PF24_DATA, PF24MD_0, PF24_IN, PF24_OUT),
+	PINMUX_DATA(SSISCK2_MARK, PF24MD_1),
+
+	PINMUX_DATA(PF23_DATA, PF23MD_00, PF23_IN, PF23_OUT),
+	PINMUX_DATA(SSIDATA1_MARK, PF23MD_01),
+	PINMUX_DATA(LCD_VEPWC_MARK, PF23MD_10),
+
+	PINMUX_DATA(PF22_DATA, PF22MD_00, PF22_IN, PF22_OUT),
+	PINMUX_DATA(SSIWS1_MARK, PF22MD_01),
+	PINMUX_DATA(LCD_VCPWC_MARK, PF22MD_10),
+
+	PINMUX_DATA(PF21_DATA, PF21MD_00, PF21_IN, PF21_OUT),
+	PINMUX_DATA(SSISCK1_MARK, PF21MD_01),
+	PINMUX_DATA(LCD_CLK_MARK, PF21MD_10),
+
+	PINMUX_DATA(PF20_DATA, PF20MD_00, PF20_IN, PF20_OUT),
+	PINMUX_DATA(SSIDATA0_MARK, PF20MD_01),
+	PINMUX_DATA(LCD_FLM_MARK, PF20MD_10),
+
+	PINMUX_DATA(PF19_DATA, PF19MD_00, PF19_IN, PF19_OUT),
+	PINMUX_DATA(SSIWS0_MARK, PF19MD_01),
+	PINMUX_DATA(LCD_M_DISP_MARK, PF19MD_10),
+
+	PINMUX_DATA(PF18_DATA, PF18MD_00, PF18_IN, PF18_OUT),
+	PINMUX_DATA(SSISCK0_MARK, PF18MD_01),
+	PINMUX_DATA(LCD_CL2_MARK, PF18MD_10),
+
+	PINMUX_DATA(PF17_DATA, PF17MD_00, PF17_IN, PF17_OUT),
+	PINMUX_DATA(FCE_MARK, PF17MD_01),
+	PINMUX_DATA(LCD_CL1_MARK, PF17MD_10),
+
+	PINMUX_DATA(PF16_DATA, PF16MD_00, PF16_IN, PF16_OUT),
+	PINMUX_DATA(FRB_MARK, PF16MD_01),
+	PINMUX_DATA(LCD_DON_MARK, PF16MD_10),
+
+	PINMUX_DATA(PF15_DATA, PF15MD_00, PF15_IN, PF15_OUT),
+	PINMUX_DATA(NAF7_MARK, PF15MD_01),
+	PINMUX_DATA(LCD_DATA15_MARK, PF15MD_10),
+
+	PINMUX_DATA(PF14_DATA, PF14MD_00, PF14_IN, PF14_OUT),
+	PINMUX_DATA(NAF6_MARK, PF14MD_01),
+	PINMUX_DATA(LCD_DATA14_MARK, PF14MD_10),
+
+	PINMUX_DATA(PF13_DATA, PF13MD_00, PF13_IN, PF13_OUT),
+	PINMUX_DATA(NAF5_MARK, PF13MD_01),
+	PINMUX_DATA(LCD_DATA13_MARK, PF13MD_10),
+
+	PINMUX_DATA(PF12_DATA, PF12MD_00, PF12_IN, PF12_OUT),
+	PINMUX_DATA(NAF4_MARK, PF12MD_01),
+	PINMUX_DATA(LCD_DATA12_MARK, PF12MD_10),
+
+	PINMUX_DATA(PF11_DATA, PF11MD_00, PF11_IN, PF11_OUT),
+	PINMUX_DATA(NAF3_MARK, PF11MD_01),
+	PINMUX_DATA(LCD_DATA11_MARK, PF11MD_10),
+
+	PINMUX_DATA(PF10_DATA, PF10MD_00, PF10_IN, PF10_OUT),
+	PINMUX_DATA(NAF2_MARK, PF10MD_01),
+	PINMUX_DATA(LCD_DATA10_MARK, PF10MD_10),
+
+	PINMUX_DATA(PF9_DATA, PF9MD_00, PF9_IN, PF9_OUT),
+	PINMUX_DATA(NAF1_MARK, PF9MD_01),
+	PINMUX_DATA(LCD_DATA9_MARK, PF9MD_10),
+
+	PINMUX_DATA(PF8_DATA, PF8MD_00, PF8_IN, PF8_OUT),
+	PINMUX_DATA(NAF0_MARK, PF8MD_01),
+	PINMUX_DATA(LCD_DATA8_MARK, PF8MD_10),
+
+	PINMUX_DATA(PF7_DATA, PF7MD_00, PF7_IN, PF7_OUT),
+	PINMUX_DATA(FSC_MARK, PF7MD_01),
+	PINMUX_DATA(LCD_DATA7_MARK, PF7MD_10),
+	PINMUX_DATA(SCS1_PF_MARK, PF7MD_11),
+
+	PINMUX_DATA(PF6_DATA, PF6MD_00, PF6_IN, PF6_OUT),
+	PINMUX_DATA(FOE_MARK, PF6MD_01),
+	PINMUX_DATA(LCD_DATA6_MARK, PF6MD_10),
+	PINMUX_DATA(SSO1_PF_MARK, PF6MD_11),
+
+	PINMUX_DATA(PF5_DATA, PF5MD_00, PF5_IN, PF5_OUT),
+	PINMUX_DATA(FCDE_MARK, PF5MD_01),
+	PINMUX_DATA(LCD_DATA5_MARK, PF5MD_10),
+	PINMUX_DATA(SSI1_PF_MARK, PF5MD_11),
+
+	PINMUX_DATA(PF4_DATA, PF4MD_00, PF4_IN, PF4_OUT),
+	PINMUX_DATA(FWE_MARK, PF4MD_01),
+	PINMUX_DATA(LCD_DATA4_MARK, PF4MD_10),
+	PINMUX_DATA(SSCK1_PF_MARK, PF4MD_11),
+
+	PINMUX_DATA(PF3_DATA, PF3MD_00, PF3_IN, PF3_OUT),
+	PINMUX_DATA(TCLKD_PF_MARK, PF3MD_01),
+	PINMUX_DATA(LCD_DATA3_MARK, PF3MD_10),
+	PINMUX_DATA(SCS0_PF_MARK, PF3MD_11),
+
+	PINMUX_DATA(PF2_DATA, PF2MD_00, PF2_IN, PF2_OUT),
+	PINMUX_DATA(TCLKC_PF_MARK, PF2MD_01),
+	PINMUX_DATA(LCD_DATA2_MARK, PF2MD_10),
+	PINMUX_DATA(SSO0_PF_MARK, PF2MD_11),
+
+	PINMUX_DATA(PF1_DATA, PF1MD_00, PF1_IN, PF1_OUT),
+	PINMUX_DATA(TCLKB_PF_MARK, PF1MD_01),
+	PINMUX_DATA(LCD_DATA1_MARK, PF1MD_10),
+	PINMUX_DATA(SSI0_PF_MARK, PF1MD_11),
+
+	PINMUX_DATA(PF0_DATA, PF0MD_00, PF0_IN, PF0_OUT),
+	PINMUX_DATA(TCLKA_PF_MARK, PF0MD_01),
+	PINMUX_DATA(LCD_DATA0_MARK, PF0MD_10),
+	PINMUX_DATA(SSCK0_PF_MARK, PF0MD_11),
+};
+
+static struct pinmux_gpio pinmux_gpios[] = {
+
+	/* PA */
+	PINMUX_GPIO(GPIO_PA7, PA7_DATA),
+	PINMUX_GPIO(GPIO_PA6, PA6_DATA),
+	PINMUX_GPIO(GPIO_PA5, PA5_DATA),
+	PINMUX_GPIO(GPIO_PA4, PA4_DATA),
+	PINMUX_GPIO(GPIO_PA3, PA3_DATA),
+	PINMUX_GPIO(GPIO_PA2, PA2_DATA),
+	PINMUX_GPIO(GPIO_PA1, PA1_DATA),
+	PINMUX_GPIO(GPIO_PA0, PA0_DATA),
+
+	/* PB */
+	PINMUX_GPIO(GPIO_PB12, PB12_DATA),
+	PINMUX_GPIO(GPIO_PB11, PB11_DATA),
+	PINMUX_GPIO(GPIO_PB10, PB10_DATA),
+	PINMUX_GPIO(GPIO_PB9, PB9_DATA),
+	PINMUX_GPIO(GPIO_PB8, PB8_DATA),
+	PINMUX_GPIO(GPIO_PB7, PB7_DATA),
+	PINMUX_GPIO(GPIO_PB6, PB6_DATA),
+	PINMUX_GPIO(GPIO_PB5, PB5_DATA),
+	PINMUX_GPIO(GPIO_PB4, PB4_DATA),
+	PINMUX_GPIO(GPIO_PB3, PB3_DATA),
+	PINMUX_GPIO(GPIO_PB2, PB2_DATA),
+	PINMUX_GPIO(GPIO_PB1, PB1_DATA),
+	PINMUX_GPIO(GPIO_PB0, PB0_DATA),
+
+	/* PC */
+	PINMUX_GPIO(GPIO_PC14, PC14_DATA),
+	PINMUX_GPIO(GPIO_PC13, PC13_DATA),
+	PINMUX_GPIO(GPIO_PC12, PC12_DATA),
+	PINMUX_GPIO(GPIO_PC11, PC11_DATA),
+	PINMUX_GPIO(GPIO_PC10, PC10_DATA),
+	PINMUX_GPIO(GPIO_PC9, PC9_DATA),
+	PINMUX_GPIO(GPIO_PC8, PC8_DATA),
+	PINMUX_GPIO(GPIO_PC7, PC7_DATA),
+	PINMUX_GPIO(GPIO_PC6, PC6_DATA),
+	PINMUX_GPIO(GPIO_PC5, PC5_DATA),
+	PINMUX_GPIO(GPIO_PC4, PC4_DATA),
+	PINMUX_GPIO(GPIO_PC3, PC3_DATA),
+	PINMUX_GPIO(GPIO_PC2, PC2_DATA),
+	PINMUX_GPIO(GPIO_PC1, PC1_DATA),
+	PINMUX_GPIO(GPIO_PC0, PC0_DATA),
+
+	/* PD */
+	PINMUX_GPIO(GPIO_PD15, PD15_DATA),
+	PINMUX_GPIO(GPIO_PD14, PD14_DATA),
+	PINMUX_GPIO(GPIO_PD13, PD13_DATA),
+	PINMUX_GPIO(GPIO_PD12, PD12_DATA),
+	PINMUX_GPIO(GPIO_PD11, PD11_DATA),
+	PINMUX_GPIO(GPIO_PD10, PD10_DATA),
+	PINMUX_GPIO(GPIO_PD9, PD9_DATA),
+	PINMUX_GPIO(GPIO_PD8, PD8_DATA),
+	PINMUX_GPIO(GPIO_PD7, PD7_DATA),
+	PINMUX_GPIO(GPIO_PD6, PD6_DATA),
+	PINMUX_GPIO(GPIO_PD5, PD5_DATA),
+	PINMUX_GPIO(GPIO_PD4, PD4_DATA),
+	PINMUX_GPIO(GPIO_PD3, PD3_DATA),
+	PINMUX_GPIO(GPIO_PD2, PD2_DATA),
+	PINMUX_GPIO(GPIO_PD1, PD1_DATA),
+	PINMUX_GPIO(GPIO_PD0, PD0_DATA),
+
+	/* PE */
+	PINMUX_GPIO(GPIO_PE15, PE15_DATA),
+	PINMUX_GPIO(GPIO_PE14, PE14_DATA),
+	PINMUX_GPIO(GPIO_PE13, PE13_DATA),
+	PINMUX_GPIO(GPIO_PE12, PE12_DATA),
+	PINMUX_GPIO(GPIO_PE11, PE11_DATA),
+	PINMUX_GPIO(GPIO_PE10, PE10_DATA),
+	PINMUX_GPIO(GPIO_PE9, PE9_DATA),
+	PINMUX_GPIO(GPIO_PE8, PE8_DATA),
+	PINMUX_GPIO(GPIO_PE7, PE7_DATA),
+	PINMUX_GPIO(GPIO_PE6, PE6_DATA),
+	PINMUX_GPIO(GPIO_PE5, PE5_DATA),
+	PINMUX_GPIO(GPIO_PE4, PE4_DATA),
+	PINMUX_GPIO(GPIO_PE3, PE3_DATA),
+	PINMUX_GPIO(GPIO_PE2, PE2_DATA),
+	PINMUX_GPIO(GPIO_PE1, PE1_DATA),
+	PINMUX_GPIO(GPIO_PE0, PE0_DATA),
+
+	/* PF */
+	PINMUX_GPIO(GPIO_PF30, PF30_DATA),
+	PINMUX_GPIO(GPIO_PF29, PF29_DATA),
+	PINMUX_GPIO(GPIO_PF28, PF28_DATA),
+	PINMUX_GPIO(GPIO_PF27, PF27_DATA),
+	PINMUX_GPIO(GPIO_PF26, PF26_DATA),
+	PINMUX_GPIO(GPIO_PF25, PF25_DATA),
+	PINMUX_GPIO(GPIO_PF24, PF24_DATA),
+	PINMUX_GPIO(GPIO_PF23, PF23_DATA),
+	PINMUX_GPIO(GPIO_PF22, PF22_DATA),
+	PINMUX_GPIO(GPIO_PF21, PF21_DATA),
+	PINMUX_GPIO(GPIO_PF20, PF20_DATA),
+	PINMUX_GPIO(GPIO_PF19, PF19_DATA),
+	PINMUX_GPIO(GPIO_PF18, PF18_DATA),
+	PINMUX_GPIO(GPIO_PF17, PF17_DATA),
+	PINMUX_GPIO(GPIO_PF16, PF16_DATA),
+	PINMUX_GPIO(GPIO_PF15, PF15_DATA),
+	PINMUX_GPIO(GPIO_PF14, PF14_DATA),
+	PINMUX_GPIO(GPIO_PF13, PF13_DATA),
+	PINMUX_GPIO(GPIO_PF12, PF12_DATA),
+	PINMUX_GPIO(GPIO_PF11, PF11_DATA),
+	PINMUX_GPIO(GPIO_PF10, PF10_DATA),
+	PINMUX_GPIO(GPIO_PF9, PF9_DATA),
+	PINMUX_GPIO(GPIO_PF8, PF8_DATA),
+	PINMUX_GPIO(GPIO_PF7, PF7_DATA),
+	PINMUX_GPIO(GPIO_PF6, PF6_DATA),
+	PINMUX_GPIO(GPIO_PF5, PF5_DATA),
+	PINMUX_GPIO(GPIO_PF4, PF4_DATA),
+	PINMUX_GPIO(GPIO_PF3, PF3_DATA),
+	PINMUX_GPIO(GPIO_PF2, PF2_DATA),
+	PINMUX_GPIO(GPIO_PF1, PF1_DATA),
+	PINMUX_GPIO(GPIO_PF0, PF0_DATA),
+
+	/* INTC */
+	PINMUX_GPIO(GPIO_FN_PINT7_PB, PINT7_PB_MARK),
+	PINMUX_GPIO(GPIO_FN_PINT6_PB, PINT6_PB_MARK),
+	PINMUX_GPIO(GPIO_FN_PINT5_PB, PINT5_PB_MARK),
+	PINMUX_GPIO(GPIO_FN_PINT4_PB, PINT4_PB_MARK),
+	PINMUX_GPIO(GPIO_FN_PINT3_PB, PINT3_PB_MARK),
+	PINMUX_GPIO(GPIO_FN_PINT2_PB, PINT2_PB_MARK),
+	PINMUX_GPIO(GPIO_FN_PINT1_PB, PINT1_PB_MARK),
+	PINMUX_GPIO(GPIO_FN_PINT0_PB, PINT0_PB_MARK),
+	PINMUX_GPIO(GPIO_FN_PINT7_PD, PINT7_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_PINT6_PD, PINT6_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_PINT5_PD, PINT5_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_PINT4_PD, PINT4_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_PINT3_PD, PINT3_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_PINT2_PD, PINT2_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_PINT1_PD, PINT1_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_PINT0_PD, PINT0_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ7_PB, IRQ7_PB_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ6_PB, IRQ6_PB_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ5_PB, IRQ5_PB_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ4_PB, IRQ4_PB_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ3_PB, IRQ3_PB_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ2_PB, IRQ2_PB_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ1_PB, IRQ1_PB_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ0_PB, IRQ0_PB_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ7_PD, IRQ7_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ6_PD, IRQ6_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ5_PD, IRQ5_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ4_PD, IRQ4_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ3_PD, IRQ3_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ2_PD, IRQ2_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ1_PD, IRQ1_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ0_PD, IRQ0_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ7_PE, IRQ7_PE_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ6_PE, IRQ6_PE_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ5_PE, IRQ5_PE_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ4_PE, IRQ4_PE_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ3_PE, IRQ3_PE_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ2_PE, IRQ2_PE_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ1_PE, IRQ1_PE_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ0_PE, IRQ0_PE_MARK),
+
+	PINMUX_GPIO(GPIO_FN_WDTOVF, WDTOVF_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQOUT, IRQOUT_MARK),
+	PINMUX_GPIO(GPIO_FN_REFOUT, REFOUT_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQOUT_REFOUT, IRQOUT_REFOUT_MARK),
+	PINMUX_GPIO(GPIO_FN_UBCTRG, UBCTRG_MARK),
+
+	/* CAN */
+	PINMUX_GPIO(GPIO_FN_CTX1, CTX1_MARK),
+	PINMUX_GPIO(GPIO_FN_CRX1, CRX1_MARK),
+	PINMUX_GPIO(GPIO_FN_CTX0, CTX0_MARK),
+	PINMUX_GPIO(GPIO_FN_CTX0_CTX1, CTX0_CTX1_MARK),
+	PINMUX_GPIO(GPIO_FN_CRX0, CRX0_MARK),
+	PINMUX_GPIO(GPIO_FN_CRX0_CRX1, CRX0_CRX1_MARK),
+
+	/* IIC3 */
+	PINMUX_GPIO(GPIO_FN_SDA3, SDA3_MARK),
+	PINMUX_GPIO(GPIO_FN_SCL3, SCL3_MARK),
+	PINMUX_GPIO(GPIO_FN_SDA2, SDA2_MARK),
+	PINMUX_GPIO(GPIO_FN_SCL2, SCL2_MARK),
+	PINMUX_GPIO(GPIO_FN_SDA1, SDA1_MARK),
+	PINMUX_GPIO(GPIO_FN_SCL1, SCL1_MARK),
+	PINMUX_GPIO(GPIO_FN_SDA0, SDA0_MARK),
+	PINMUX_GPIO(GPIO_FN_SCL0, SCL0_MARK),
+
+	/* DMAC */
+	PINMUX_GPIO(GPIO_FN_TEND0_PD, TEND0_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_TEND0_PE, TEND0_PE_MARK),
+	PINMUX_GPIO(GPIO_FN_DACK0_PD, DACK0_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_DACK0_PE, DACK0_PE_MARK),
+	PINMUX_GPIO(GPIO_FN_DREQ0_PD, DREQ0_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_DREQ0_PE, DREQ0_PE_MARK),
+	PINMUX_GPIO(GPIO_FN_TEND1_PD, TEND1_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_TEND1_PE, TEND1_PE_MARK),
+	PINMUX_GPIO(GPIO_FN_DACK1_PD, DACK1_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_DACK1_PE, DACK1_PE_MARK),
+	PINMUX_GPIO(GPIO_FN_DREQ1_PD, DREQ1_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_DREQ1_PE, DREQ1_PE_MARK),
+	PINMUX_GPIO(GPIO_FN_DACK2, DACK2_MARK),
+	PINMUX_GPIO(GPIO_FN_DREQ2, DREQ2_MARK),
+	PINMUX_GPIO(GPIO_FN_DACK3, DACK3_MARK),
+	PINMUX_GPIO(GPIO_FN_DREQ3, DREQ3_MARK),
+
+	/* ADC */
+	PINMUX_GPIO(GPIO_FN_ADTRG_PD, ADTRG_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_ADTRG_PE, ADTRG_PE_MARK),
+
+	/* BSC */
+	PINMUX_GPIO(GPIO_FN_D31, D31_MARK),
+	PINMUX_GPIO(GPIO_FN_D30, D30_MARK),
+	PINMUX_GPIO(GPIO_FN_D29, D29_MARK),
+	PINMUX_GPIO(GPIO_FN_D28, D28_MARK),
+	PINMUX_GPIO(GPIO_FN_D27, D27_MARK),
+	PINMUX_GPIO(GPIO_FN_D26, D26_MARK),
+	PINMUX_GPIO(GPIO_FN_D25, D25_MARK),
+	PINMUX_GPIO(GPIO_FN_D24, D24_MARK),
+	PINMUX_GPIO(GPIO_FN_D23, D23_MARK),
+	PINMUX_GPIO(GPIO_FN_D22, D22_MARK),
+	PINMUX_GPIO(GPIO_FN_D21, D21_MARK),
+	PINMUX_GPIO(GPIO_FN_D20, D20_MARK),
+	PINMUX_GPIO(GPIO_FN_D19, D19_MARK),
+	PINMUX_GPIO(GPIO_FN_D18, D18_MARK),
+	PINMUX_GPIO(GPIO_FN_D17, D17_MARK),
+	PINMUX_GPIO(GPIO_FN_D16, D16_MARK),
+	PINMUX_GPIO(GPIO_FN_A25, A25_MARK),
+	PINMUX_GPIO(GPIO_FN_A24, A24_MARK),
+	PINMUX_GPIO(GPIO_FN_A23, A23_MARK),
+	PINMUX_GPIO(GPIO_FN_A22, A22_MARK),
+	PINMUX_GPIO(GPIO_FN_A21, A21_MARK),
+	PINMUX_GPIO(GPIO_FN_CS4, CS4_MARK),
+	PINMUX_GPIO(GPIO_FN_MRES, MRES_MARK),
+	PINMUX_GPIO(GPIO_FN_BS, BS_MARK),
+	PINMUX_GPIO(GPIO_FN_IOIS16, IOIS16_MARK),
+	PINMUX_GPIO(GPIO_FN_CS1, CS1_MARK),
+	PINMUX_GPIO(GPIO_FN_CS6_CE1B, CS6_CE1B_MARK),
+	PINMUX_GPIO(GPIO_FN_CE2B, CE2B_MARK),
+	PINMUX_GPIO(GPIO_FN_CS5_CE1A, CS5_CE1A_MARK),
+	PINMUX_GPIO(GPIO_FN_CE2A, CE2A_MARK),
+	PINMUX_GPIO(GPIO_FN_FRAME, FRAME_MARK),
+	PINMUX_GPIO(GPIO_FN_WAIT, WAIT_MARK),
+	PINMUX_GPIO(GPIO_FN_RDWR, RDWR_MARK),
+	PINMUX_GPIO(GPIO_FN_CKE, CKE_MARK),
+	PINMUX_GPIO(GPIO_FN_CASU, CASU_MARK),
+	PINMUX_GPIO(GPIO_FN_BREQ, BREQ_MARK),
+	PINMUX_GPIO(GPIO_FN_RASU, RASU_MARK),
+	PINMUX_GPIO(GPIO_FN_BACK, BACK_MARK),
+	PINMUX_GPIO(GPIO_FN_CASL, CASL_MARK),
+	PINMUX_GPIO(GPIO_FN_RASL, RASL_MARK),
+	PINMUX_GPIO(GPIO_FN_WE3_DQMUU_AH_ICIO_WR, WE3_DQMUU_AH_ICIO_WR_MARK),
+	PINMUX_GPIO(GPIO_FN_WE2_DQMUL_ICIORD, WE2_DQMUL_ICIORD_MARK),
+	PINMUX_GPIO(GPIO_FN_WE1_DQMLU_WE, WE1_DQMLU_WE_MARK),
+	PINMUX_GPIO(GPIO_FN_WE0_DQMLL, WE0_DQMLL_MARK),
+	PINMUX_GPIO(GPIO_FN_CS3, CS3_MARK),
+	PINMUX_GPIO(GPIO_FN_CS2, CS2_MARK),
+	PINMUX_GPIO(GPIO_FN_A1, A1_MARK),
+	PINMUX_GPIO(GPIO_FN_A0, A0_MARK),
+	PINMUX_GPIO(GPIO_FN_CS7, CS7_MARK),
+
+	/* TMU */
+	PINMUX_GPIO(GPIO_FN_TIOC4D, TIOC4D_MARK),
+	PINMUX_GPIO(GPIO_FN_TIOC4C, TIOC4C_MARK),
+	PINMUX_GPIO(GPIO_FN_TIOC4B, TIOC4B_MARK),
+	PINMUX_GPIO(GPIO_FN_TIOC4A, TIOC4A_MARK),
+	PINMUX_GPIO(GPIO_FN_TIOC3D, TIOC3D_MARK),
+	PINMUX_GPIO(GPIO_FN_TIOC3C, TIOC3C_MARK),
+	PINMUX_GPIO(GPIO_FN_TIOC3B, TIOC3B_MARK),
+	PINMUX_GPIO(GPIO_FN_TIOC3A, TIOC3A_MARK),
+	PINMUX_GPIO(GPIO_FN_TIOC2B, TIOC2B_MARK),
+	PINMUX_GPIO(GPIO_FN_TIOC1B, TIOC1B_MARK),
+	PINMUX_GPIO(GPIO_FN_TIOC2A, TIOC2A_MARK),
+	PINMUX_GPIO(GPIO_FN_TIOC1A, TIOC1A_MARK),
+	PINMUX_GPIO(GPIO_FN_TIOC0D, TIOC0D_MARK),
+	PINMUX_GPIO(GPIO_FN_TIOC0C, TIOC0C_MARK),
+	PINMUX_GPIO(GPIO_FN_TIOC0B, TIOC0B_MARK),
+	PINMUX_GPIO(GPIO_FN_TIOC0A, TIOC0A_MARK),
+	PINMUX_GPIO(GPIO_FN_TCLKD_PD, TCLKD_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_TCLKC_PD, TCLKC_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_TCLKB_PD, TCLKB_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_TCLKA_PD, TCLKA_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_TCLKD_PF, TCLKD_PF_MARK),
+	PINMUX_GPIO(GPIO_FN_TCLKC_PF, TCLKC_PF_MARK),
+	PINMUX_GPIO(GPIO_FN_TCLKB_PF, TCLKB_PF_MARK),
+	PINMUX_GPIO(GPIO_FN_TCLKA_PF, TCLKA_PF_MARK),
+
+	/* SSU */
+	PINMUX_GPIO(GPIO_FN_SCS0_PD, SCS0_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_SSO0_PD, SSO0_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_SSI0_PD, SSI0_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_SSCK0_PD, SSCK0_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCS0_PF, SCS0_PF_MARK),
+	PINMUX_GPIO(GPIO_FN_SSO0_PF, SSO0_PF_MARK),
+	PINMUX_GPIO(GPIO_FN_SSI0_PF, SSI0_PF_MARK),
+	PINMUX_GPIO(GPIO_FN_SSCK0_PF, SSCK0_PF_MARK),
+	PINMUX_GPIO(GPIO_FN_SCS1_PD, SCS1_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_SSO1_PD, SSO1_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_SSI1_PD, SSI1_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_SSCK1_PD, SSCK1_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCS1_PF, SCS1_PF_MARK),
+	PINMUX_GPIO(GPIO_FN_SSO1_PF, SSO1_PF_MARK),
+	PINMUX_GPIO(GPIO_FN_SSI1_PF, SSI1_PF_MARK),
+	PINMUX_GPIO(GPIO_FN_SSCK1_PF, SSCK1_PF_MARK),
+
+	/* SCIF */
+	PINMUX_GPIO(GPIO_FN_TXD0, TXD0_MARK),
+	PINMUX_GPIO(GPIO_FN_RXD0, RXD0_MARK),
+	PINMUX_GPIO(GPIO_FN_SCK0, SCK0_MARK),
+	PINMUX_GPIO(GPIO_FN_TXD1, TXD1_MARK),
+	PINMUX_GPIO(GPIO_FN_RXD1, RXD1_MARK),
+	PINMUX_GPIO(GPIO_FN_SCK1, SCK1_MARK),
+	PINMUX_GPIO(GPIO_FN_TXD2, TXD2_MARK),
+	PINMUX_GPIO(GPIO_FN_RXD2, RXD2_MARK),
+	PINMUX_GPIO(GPIO_FN_SCK2, SCK2_MARK),
+	PINMUX_GPIO(GPIO_FN_RTS3, RTS3_MARK),
+	PINMUX_GPIO(GPIO_FN_CTS3, CTS3_MARK),
+	PINMUX_GPIO(GPIO_FN_TXD3, TXD3_MARK),
+	PINMUX_GPIO(GPIO_FN_RXD3, RXD3_MARK),
+	PINMUX_GPIO(GPIO_FN_SCK3, SCK3_MARK),
+
+	/* SSI */
+	PINMUX_GPIO(GPIO_FN_AUDIO_CLK, AUDIO_CLK_MARK),
+	PINMUX_GPIO(GPIO_FN_SSIDATA3, SSIDATA3_MARK),
+	PINMUX_GPIO(GPIO_FN_SSIWS3, SSIWS3_MARK),
+	PINMUX_GPIO(GPIO_FN_SSISCK3, SSISCK3_MARK),
+	PINMUX_GPIO(GPIO_FN_SSIDATA2, SSIDATA2_MARK),
+	PINMUX_GPIO(GPIO_FN_SSIWS2, SSIWS2_MARK),
+	PINMUX_GPIO(GPIO_FN_SSISCK2, SSISCK2_MARK),
+	PINMUX_GPIO(GPIO_FN_SSIDATA1, SSIDATA1_MARK),
+	PINMUX_GPIO(GPIO_FN_SSIWS1, SSIWS1_MARK),
+	PINMUX_GPIO(GPIO_FN_SSISCK1, SSISCK1_MARK),
+	PINMUX_GPIO(GPIO_FN_SSIDATA0, SSIDATA0_MARK),
+	PINMUX_GPIO(GPIO_FN_SSIWS0, SSIWS0_MARK),
+	PINMUX_GPIO(GPIO_FN_SSISCK0, SSISCK0_MARK),
+
+	/* FLCTL */
+	PINMUX_GPIO(GPIO_FN_FCE, FCE_MARK),
+	PINMUX_GPIO(GPIO_FN_FRB, FRB_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF7, NAF7_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF6, NAF6_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF5, NAF5_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF4, NAF4_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF3, NAF3_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF2, NAF2_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF1, NAF1_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF0, NAF0_MARK),
+	PINMUX_GPIO(GPIO_FN_FSC, FSC_MARK),
+	PINMUX_GPIO(GPIO_FN_FOE, FOE_MARK),
+	PINMUX_GPIO(GPIO_FN_FCDE, FCDE_MARK),
+	PINMUX_GPIO(GPIO_FN_FWE, FWE_MARK),
+
+	/* LCDC */
+	PINMUX_GPIO(GPIO_FN_LCD_VEPWC, LCD_VEPWC_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_VCPWC, LCD_VCPWC_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_CLK, LCD_CLK_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_FLM, LCD_FLM_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_M_DISP, LCD_M_DISP_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_CL2, LCD_CL2_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_CL1, LCD_CL1_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DON, LCD_DON_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA15, LCD_DATA15_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA14, LCD_DATA14_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA13, LCD_DATA13_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA12, LCD_DATA12_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA11, LCD_DATA11_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA10, LCD_DATA10_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA9, LCD_DATA9_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA8, LCD_DATA8_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA7, LCD_DATA7_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA6, LCD_DATA6_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA5, LCD_DATA5_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA4, LCD_DATA4_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA3, LCD_DATA3_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA2, LCD_DATA2_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA1, LCD_DATA1_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA0, LCD_DATA0_MARK),
+};
+
+static struct pinmux_cfg_reg pinmux_config_regs[] = {
+	{ PINMUX_CFG_REG("PBIORL", 0xfffe3886, 16, 1) {
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		PB11_IN, PB11_OUT,
+		PB10_IN, PB10_OUT,
+		PB9_IN, PB9_OUT,
+		PB8_IN, PB8_OUT,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0 }
+	},
+	{ PINMUX_CFG_REG("PBCRL4", 0xfffe3890, 16, 4) {
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PB12MD_00, PB12MD_01, PB12MD_10, PB12MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PBCRL3", 0xfffe3892, 16, 4) {
+		PB11MD_0, PB11MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PB10MD_0, PB10MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PB9MD_00, PB9MD_01, PB9MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PB8MD_00, PB8MD_01, PB8MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PBCRL2", 0xfffe3894, 16, 4) {
+		PB7MD_00, PB7MD_01, PB7MD_10, PB7MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PB6MD_00, PB6MD_01, PB6MD_10, PB6MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PB5MD_00, PB5MD_01, PB5MD_10, PB5MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PB4MD_00, PB4MD_01, PB4MD_10, PB4MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PBCRL1", 0xfffe3896, 16, 4) {
+		PB3MD_00, PB3MD_01, PB3MD_10, PB3MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PB2MD_00, PB2MD_01, PB2MD_10, PB2MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PB1MD_00, PB1MD_01, PB1MD_10, PB1MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PB0MD_00, PB0MD_01, PB0MD_10, PB0MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("IFCR", 0xfffe38a2, 16, 4) {
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PB12IRQ_00, PB12IRQ_01, PB12IRQ_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PCIORL", 0xfffe3906, 16, 1) {
+		0, 0,
+		PC14_IN, PC14_OUT,
+		PC13_IN, PC13_OUT,
+		PC12_IN, PC12_OUT,
+		PC11_IN, PC11_OUT,
+		PC10_IN, PC10_OUT,
+		PC9_IN, PC9_OUT,
+		PC8_IN, PC8_OUT,
+		PC7_IN, PC7_OUT,
+		PC6_IN, PC6_OUT,
+		PC5_IN, PC5_OUT,
+		PC4_IN, PC4_OUT,
+		PC3_IN, PC3_OUT,
+		PC2_IN, PC2_OUT,
+		PC1_IN, PC1_OUT,
+		PC0_IN, PC0_OUT }
+	},
+	{ PINMUX_CFG_REG("PCCRL4", 0xfffe3910, 16, 4) {
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PC14MD_0, PC14MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PC13MD_0, PC13MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PC12MD_0, PC12MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PCCRL3", 0xfffe3912, 16, 4) {
+		PC11MD_00, PC11MD_01, PC11MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PC10MD_00, PC10MD_01, PC10MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PC9MD_0, PC9MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PC8MD_0, PC8MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PCCRL2", 0xfffe3914, 16, 4) {
+		PC7MD_0, PC7MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PC6MD_0, PC6MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PC5MD_0, PC5MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PC4MD_0, PC4MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PCCRL1", 0xfffe3916, 16, 4) {
+		PC3MD_0, PC3MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PC2MD_0, PC2MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PC1MD_0, PC1MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PC0MD_00, PC0MD_01, PC0MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PDIORL", 0xfffe3986, 16, 1) {
+		PD15_IN, PD15_OUT,
+		PD14_IN, PD14_OUT,
+		PD13_IN, PD13_OUT,
+		PD12_IN, PD12_OUT,
+		PD11_IN, PD11_OUT,
+		PD10_IN, PD10_OUT,
+		PD9_IN, PD9_OUT,
+		PD8_IN, PD8_OUT,
+		PD7_IN, PD7_OUT,
+		PD6_IN, PD6_OUT,
+		PD5_IN, PD5_OUT,
+		PD4_IN, PD4_OUT,
+		PD3_IN, PD3_OUT,
+		PD2_IN, PD2_OUT,
+		PD1_IN, PD1_OUT,
+		PD0_IN, PD0_OUT }
+	},
+	{ PINMUX_CFG_REG("PDCRL4", 0xfffe3990, 16, 4) {
+		PD15MD_000, PD15MD_001, PD15MD_010, 0,
+		PD15MD_100, PD15MD_101, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+
+		PD14MD_000, PD14MD_001, PD14MD_010, 0,
+		0, PD14MD_101, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+
+		PD13MD_000, PD13MD_001, PD13MD_010, 0,
+		PD13MD_100, PD13MD_101, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+
+		PD12MD_000, PD12MD_001, PD12MD_010, 0,
+		PD12MD_100, PD12MD_101, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PDCRL3", 0xfffe3992, 16, 4) {
+		PD11MD_000, PD11MD_001, PD11MD_010, 0,
+		PD11MD_100, PD11MD_101, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+
+		PD10MD_000, PD10MD_001, PD10MD_010, 0,
+		PD10MD_100, PD10MD_101, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+
+		PD9MD_000, PD9MD_001, PD9MD_010, 0,
+		PD9MD_100, PD9MD_101, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+
+		PD8MD_000, PD8MD_001, PD8MD_010, 0,
+		PD8MD_100, PD8MD_101, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PDCRL2", 0xfffe3994, 16, 4) {
+		PD7MD_000, PD7MD_001, PD7MD_010, PD7MD_011,
+		PD7MD_100, PD7MD_101, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+
+		PD6MD_000, PD6MD_001, PD6MD_010, PD6MD_011,
+		PD6MD_100, PD6MD_101, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+
+		PD5MD_000, PD5MD_001, PD5MD_010, PD5MD_011,
+		PD5MD_100, PD5MD_101, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+
+		PD4MD_000, PD4MD_001, PD4MD_010, PD4MD_011,
+		PD4MD_100, PD4MD_101, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PDCRL1", 0xfffe3996, 16, 4) {
+		PD3MD_000, PD3MD_001, PD3MD_010, PD3MD_011,
+		PD3MD_100, PD3MD_101, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+
+		PD2MD_000, PD2MD_001, PD2MD_010, PD2MD_011,
+		PD2MD_100, PD2MD_101, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+
+		PD1MD_000, PD1MD_001, PD1MD_010, PD1MD_011,
+		PD1MD_100, PD1MD_101, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+
+		PD0MD_000, PD0MD_001, PD0MD_010, PD0MD_011,
+		PD0MD_100, PD0MD_101, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PEIORL", 0xfffe3a06, 16, 1) {
+		PE15_IN, PE15_OUT,
+		PE14_IN, PE14_OUT,
+		PE13_IN, PE13_OUT,
+		PE12_IN, PE12_OUT,
+		PE11_IN, PE11_OUT,
+		PE10_IN, PE10_OUT,
+		PE9_IN, PE9_OUT,
+		PE8_IN, PE8_OUT,
+		PE7_IN, PE7_OUT,
+		PE6_IN, PE6_OUT,
+		PE5_IN, PE5_OUT,
+		PE4_IN, PE4_OUT,
+		PE3_IN, PE3_OUT,
+		PE2_IN, PE2_OUT,
+		PE1_IN, PE1_OUT,
+		PE0_IN, PE0_OUT }
+	},
+	{ PINMUX_CFG_REG("PECRL4", 0xfffe3a10, 16, 4) {
+		PE15MD_00, PE15MD_01, 0, PE15MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PE14MD_00, PE14MD_01, 0, PE14MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PE13MD_00, 0, 0, PE13MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PE12MD_00, 0, 0, PE12MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PECRL3", 0xfffe3a12, 16, 4) {
+		PE11MD_000, PE11MD_001, PE11MD_010, 0,
+		PE11MD_100, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+
+		PE10MD_000, PE10MD_001, PE10MD_010, 0,
+		PE10MD_100, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+
+		PE9MD_00, PE9MD_01, PE9MD_10, PE9MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PE8MD_00, PE8MD_01, PE8MD_10, PE8MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PECRL2", 0xfffe3a14, 16, 4) {
+		PE7MD_000, PE7MD_001, PE7MD_010, PE7MD_011,
+		PE7MD_100, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+
+		PE6MD_000, PE6MD_001, PE6MD_010, PE6MD_011,
+		PE6MD_100, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+
+		PE5MD_000, PE5MD_001, PE5MD_010, PE5MD_011,
+		PE5MD_100, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+
+		PE4MD_000, PE4MD_001, PE4MD_010, PE4MD_011,
+		PE4MD_100, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PECRL1", 0xfffe3a16, 16, 4) {
+		PE3MD_00, PE3MD_01, 0, PE3MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PE2MD_00, PE2MD_01, 0, PE2MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PE1MD_00, PE1MD_01, PE1MD_10, PE1MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PE0MD_000, PE0MD_001, 0, PE0MD_011,
+		PE0MD_100, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PFIORH", 0xfffe3a84, 16, 1) {
+		0, 0,
+		PF30_IN, PF30_OUT,
+		PF29_IN, PF29_OUT,
+		PF28_IN, PF28_OUT,
+		PF27_IN, PF27_OUT,
+		PF26_IN, PF26_OUT,
+		PF25_IN, PF25_OUT,
+		PF24_IN, PF24_OUT,
+		PF23_IN, PF23_OUT,
+		PF22_IN, PF22_OUT,
+		PF21_IN, PF21_OUT,
+		PF20_IN, PF20_OUT,
+		PF19_IN, PF19_OUT,
+		PF18_IN, PF18_OUT,
+		PF17_IN, PF17_OUT,
+		PF16_IN, PF16_OUT }
+	},
+	{ PINMUX_CFG_REG("PFIORL", 0xfffe3a86, 16, 1) {
+		PF15_IN, PF15_OUT,
+		PF14_IN, PF14_OUT,
+		PF13_IN, PF13_OUT,
+		PF12_IN, PF12_OUT,
+		PF11_IN, PF11_OUT,
+		PF10_IN, PF10_OUT,
+		PF9_IN, PF9_OUT,
+		PF8_IN, PF8_OUT,
+		PF7_IN, PF7_OUT,
+		PF6_IN, PF6_OUT,
+		PF5_IN, PF5_OUT,
+		PF4_IN, PF4_OUT,
+		PF3_IN, PF3_OUT,
+		PF2_IN, PF2_OUT,
+		PF1_IN, PF1_OUT,
+		PF0_IN, PF0_OUT }
+	},
+	{ PINMUX_CFG_REG("PFCRH4", 0xfffe3a88, 16, 4) {
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF30MD_0, PF30MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF29MD_0, PF29MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF28MD_0, PF28MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PFCRH3", 0xfffe3a8a, 16, 4) {
+		PF27MD_0, PF27MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF26MD_0, PF26MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF25MD_0, PF25MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF24MD_0, PF24MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PFCRH2", 0xfffe3a8c, 16, 4) {
+		PF23MD_00, PF23MD_01, PF23MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF22MD_00, PF22MD_01, PF22MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF21MD_00, PF21MD_01, PF21MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF20MD_00, PF20MD_01, PF20MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PFCRH1", 0xfffe3a8e, 16, 4) {
+		PF19MD_00, PF19MD_01, PF19MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF18MD_00, PF18MD_01, PF18MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF17MD_00, PF17MD_01, PF17MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF16MD_00, PF16MD_01, PF16MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PFCRL4", 0xfffe3a90, 16, 4) {
+		PF15MD_00, PF15MD_01, PF15MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF14MD_00, PF14MD_01, PF14MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF13MD_00, PF13MD_01, PF13MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF12MD_00, PF12MD_01, PF12MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PFCRL3", 0xfffe3a92, 16, 4) {
+		PF11MD_00, PF11MD_01, PF11MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF10MD_00, PF10MD_01, PF10MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF9MD_00, PF9MD_01, PF9MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF8MD_00, PF8MD_01, PF8MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PFCRL2", 0xfffe3a94, 16, 4) {
+		PF7MD_00, PF7MD_01, PF7MD_10, PF7MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF6MD_00, PF6MD_01, PF6MD_10, PF6MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF5MD_00, PF5MD_01, PF5MD_10, PF5MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF4MD_00, PF4MD_01, PF4MD_10, PF4MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PFCRL1", 0xfffe3a96, 16, 4) {
+		PF3MD_00, PF3MD_01, PF3MD_10, PF3MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF2MD_00, PF2MD_01, PF2MD_10, PF2MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF1MD_00, PF1MD_01, PF1MD_10, PF1MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF0MD_00, PF0MD_01, PF0MD_10, PF0MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{}
+};
+
+static struct pinmux_data_reg pinmux_data_regs[] = {
+	{ PINMUX_DATA_REG("PADRL", 0xfffe3802, 16) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		PA7_DATA, PA6_DATA, PA5_DATA, PA4_DATA,
+		PA3_DATA, PA2_DATA, PA1_DATA, PA0_DATA }
+	},
+	{ PINMUX_DATA_REG("PBDRL", 0xfffe3882, 16) {
+		0, 0, 0, PB12_DATA,
+		PB11_DATA, PB10_DATA, PB9_DATA, PB8_DATA,
+		PB7_DATA, PB6_DATA, PB5_DATA, PB4_DATA,
+		PB3_DATA, PB2_DATA, PB1_DATA, PB0_DATA }
+	},
+	{ PINMUX_DATA_REG("PCDRL", 0xfffe3902, 16) {
+		0, PC14_DATA, PC13_DATA, PC12_DATA,
+		PC11_DATA, PC10_DATA, PC9_DATA, PC8_DATA,
+		PC7_DATA, PC6_DATA, PC5_DATA, PC4_DATA,
+		PC3_DATA, PC2_DATA, PC1_DATA, PC0_DATA }
+	},
+	{ PINMUX_DATA_REG("PDDRL", 0xfffe3982, 16) {
+		PD15_DATA, PD14_DATA, PD13_DATA, PD12_DATA,
+		PD11_DATA, PD10_DATA, PD9_DATA, PD8_DATA,
+		PD7_DATA, PD6_DATA, PD5_DATA, PD4_DATA,
+		PD3_DATA, PD2_DATA, PD1_DATA, PD0_DATA }
+	},
+	{ PINMUX_DATA_REG("PEDRL", 0xfffe3a02, 16) {
+		PE15_DATA, PE14_DATA, PE13_DATA, PE12_DATA,
+		PE11_DATA, PE10_DATA, PE9_DATA, PE8_DATA,
+		PE7_DATA, PE6_DATA, PE5_DATA, PE4_DATA,
+		PE3_DATA, PE2_DATA, PE1_DATA, PE0_DATA }
+	},
+	{ PINMUX_DATA_REG("PFDRH", 0xfffe3a80, 16) {
+		0, PF30_DATA, PF29_DATA, PF28_DATA,
+		PF27_DATA, PF26_DATA, PF25_DATA, PF24_DATA,
+		PF23_DATA, PF22_DATA, PF21_DATA, PF20_DATA,
+		PF19_DATA, PF18_DATA, PF17_DATA, PF16_DATA }
+	},
+	{ PINMUX_DATA_REG("PFDRL", 0xfffe3a82, 16) {
+		PF15_DATA, PF14_DATA, PF13_DATA, PF12_DATA,
+		PF11_DATA, PF10_DATA, PF9_DATA, PF8_DATA,
+		PF7_DATA, PF6_DATA, PF5_DATA, PF4_DATA,
+		PF3_DATA, PF2_DATA, PF1_DATA, PF0_DATA }
+	},
+	{ },
+};
+
+static struct pinmux_info sh7203_pinmux_info = {
+	.name = "sh7203_pfc",
+	.reserved_id = PINMUX_RESERVED,
+	.data = { PINMUX_DATA_BEGIN, PINMUX_DATA_END },
+	.input = { PINMUX_INPUT_BEGIN, PINMUX_INPUT_END },
+	.output = { PINMUX_OUTPUT_BEGIN, PINMUX_OUTPUT_END },
+	.mark = { PINMUX_MARK_BEGIN, PINMUX_MARK_END },
+	.function = { PINMUX_FUNCTION_BEGIN, PINMUX_FUNCTION_END },
+
+	.first_gpio = GPIO_PA7,
+	.last_gpio = GPIO_FN_LCD_DATA0,
+
+	.gpios = pinmux_gpios,
+	.cfg_regs = pinmux_config_regs,
+	.data_regs = pinmux_data_regs,
+
+	.gpio_data = pinmux_data,
+	.gpio_data_size = ARRAY_SIZE(pinmux_data),
+};
+
+static int __init plat_pinmux_setup(void)
+{
+	return register_pinmux(&sh7203_pinmux_info);
+}
+
+arch_initcall(plat_pinmux_setup);
-- 
GitLab


From 7a5c679b52d06471b0b08a39ed5c9e843eab13dd Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 8 Oct 2008 20:42:38 +0900
Subject: [PATCH 648/895] sh: Use sh7203 GPIO on rsk7203 board

Make the rsk7203 board use the newly added sh7203 pinmux code.
Only a single LED plus the serial console pins for now.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/boards/Kconfig         |  1 +
 arch/sh/boards/board-rsk7203.c | 11 +++++++++++
 2 files changed, 12 insertions(+)

diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig
index 2a059191efef..6c4cbf15ce88 100644
--- a/arch/sh/boards/Kconfig
+++ b/arch/sh/boards/Kconfig
@@ -128,6 +128,7 @@ config SH_RTS7751R2D
 
 config SH_RSK7203
 	bool "RSK7203"
+	select GENERIC_GPIO
 	depends on CPU_SUBTYPE_SH7203
 
 config SH_SDK7780
diff --git a/arch/sh/boards/board-rsk7203.c b/arch/sh/boards/board-rsk7203.c
index ffbedc59a973..ded799cf3eae 100644
--- a/arch/sh/boards/board-rsk7203.c
+++ b/arch/sh/boards/board-rsk7203.c
@@ -16,8 +16,10 @@
 #include <linux/mtd/physmap.h>
 #include <linux/mtd/map.h>
 #include <linux/smc911x.h>
+#include <linux/gpio.h>
 #include <asm/machvec.h>
 #include <asm/io.h>
+#include <asm/sh7203.h>
 
 static struct smc911x_platdata smc911x_info = {
 	.flags		= SMC911X_USE_16BIT,
@@ -122,6 +124,15 @@ static struct platform_device *rsk7203_devices[] __initdata = {
 
 static int __init rsk7203_devices_setup(void)
 {
+	/* Select pins for SCIF0 */
+	gpio_request(GPIO_FN_TXD0, NULL);
+	gpio_request(GPIO_FN_RXD0, NULL);
+
+	/* Lit LED0 */
+	gpio_request(GPIO_PE10, NULL);
+	gpio_direction_output(GPIO_PE10, 0);
+	gpio_export(GPIO_PE10, 0);
+
 	set_mtd_partitions();
 	return platform_add_devices(rsk7203_devices,
 				    ARRAY_SIZE(rsk7203_devices));
-- 
GitLab


From c9c3c1b74d3fa304884b5f42bfaffcf71ac6a96c Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 8 Oct 2008 20:42:46 +0900
Subject: [PATCH 649/895] sh: Add sh7720 pinmux code

This patch adds pinmux and gpio support for the sh7720 processor.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/sh7720.h           |  174 ++++
 arch/sh/kernel/cpu/sh3/Makefile        |    4 +
 arch/sh/kernel/cpu/sh3/pinmux-sh7720.c | 1242 ++++++++++++++++++++++++
 3 files changed, 1420 insertions(+)
 create mode 100644 arch/sh/include/asm/sh7720.h
 create mode 100644 arch/sh/kernel/cpu/sh3/pinmux-sh7720.c

diff --git a/arch/sh/include/asm/sh7720.h b/arch/sh/include/asm/sh7720.h
new file mode 100644
index 000000000000..41c1406d6da2
--- /dev/null
+++ b/arch/sh/include/asm/sh7720.h
@@ -0,0 +1,174 @@
+#ifndef __ASM_SH7720_H__
+#define __ASM_SH7720_H__
+
+enum {
+	/* PTA */
+	GPIO_PTA7, GPIO_PTA6, GPIO_PTA5, GPIO_PTA4,
+	GPIO_PTA3, GPIO_PTA2, GPIO_PTA1, GPIO_PTA0,
+
+	/* PTB */
+	GPIO_PTB7, GPIO_PTB6, GPIO_PTB5, GPIO_PTB4,
+	GPIO_PTB3, GPIO_PTB2, GPIO_PTB1, GPIO_PTB0,
+
+	/* PTC */
+	GPIO_PTC7, GPIO_PTC6, GPIO_PTC5, GPIO_PTC4,
+	GPIO_PTC3, GPIO_PTC2, GPIO_PTC1, GPIO_PTC0,
+
+	/* PTD */
+	GPIO_PTD7, GPIO_PTD6, GPIO_PTD5, GPIO_PTD4,
+	GPIO_PTD3, GPIO_PTD2, GPIO_PTD1, GPIO_PTD0,
+
+	/* PTE */
+	GPIO_PTE6, GPIO_PTE5, GPIO_PTE4, GPIO_PTE3,
+	GPIO_PTE2, GPIO_PTE1, GPIO_PTE0,
+
+	/* PTF */
+	GPIO_PTF6, GPIO_PTF5, GPIO_PTF4, GPIO_PTF3,
+	GPIO_PTF2, GPIO_PTF1, GPIO_PTF0, GPIO_PTG6,
+
+	/* PTG */
+	GPIO_PTG5, GPIO_PTG4, GPIO_PTG3, GPIO_PTG2,
+	GPIO_PTG1, GPIO_PTG0,
+
+	/* PTH */
+	GPIO_PTH6, GPIO_PTH5, GPIO_PTH4, GPIO_PTH3,
+	GPIO_PTH2, GPIO_PTH1, GPIO_PTH0,
+
+	/* PTJ */
+	GPIO_PTJ6, GPIO_PTJ5, GPIO_PTJ4, GPIO_PTJ3,
+	GPIO_PTJ2, GPIO_PTJ1, GPIO_PTJ0,
+
+	/* PTK */
+	GPIO_PTK3, GPIO_PTK2, GPIO_PTK1, GPIO_PTK0,
+
+	/* PTL */
+	GPIO_PTL7, GPIO_PTL6, GPIO_PTL5, GPIO_PTL4, GPIO_PTL3,
+
+	/* PTM */
+	GPIO_PTM7, GPIO_PTM6, GPIO_PTM5, GPIO_PTM4,
+	GPIO_PTM3, GPIO_PTM2, GPIO_PTM1, GPIO_PTM0,
+
+	/* PTP */
+	GPIO_PTP4, GPIO_PTP3, GPIO_PTP2, GPIO_PTP1, GPIO_PTP0,
+
+	/* PTR */
+	GPIO_PTR7, GPIO_PTR6, GPIO_PTR5, GPIO_PTR4,
+	GPIO_PTR3, GPIO_PTR2, GPIO_PTR1, GPIO_PTR0,
+
+	/* PTS */
+	GPIO_PTS4, GPIO_PTS3, GPIO_PTS2, GPIO_PTS1, GPIO_PTS0,
+
+	/* PTT */
+	GPIO_PTT4, GPIO_PTT3, GPIO_PTT2, GPIO_PTT1, GPIO_PTT0,
+
+	/* PTU */
+	GPIO_PTU4, GPIO_PTU3, GPIO_PTU2, GPIO_PTU1, GPIO_PTU0,
+
+	/* PTV */
+	GPIO_PTV4, GPIO_PTV3, GPIO_PTV2, GPIO_PTV1, GPIO_PTV0,
+
+	/* BSC */
+	GPIO_FN_D31, GPIO_FN_D30, GPIO_FN_D29, GPIO_FN_D28,
+	GPIO_FN_D27, GPIO_FN_D26, GPIO_FN_D25, GPIO_FN_D24,
+	GPIO_FN_D23, GPIO_FN_D22, GPIO_FN_D21, GPIO_FN_D20,
+	GPIO_FN_D19, GPIO_FN_D18, GPIO_FN_D17, GPIO_FN_D16,
+	GPIO_FN_IOIS16, GPIO_FN_RAS, GPIO_FN_CAS, GPIO_FN_CKE,
+	GPIO_FN_CS5B_CE1A, GPIO_FN_CS6B_CE1B,
+	GPIO_FN_A25, GPIO_FN_A24, GPIO_FN_A23, GPIO_FN_A22,
+	GPIO_FN_A21, GPIO_FN_A20, GPIO_FN_A19, GPIO_FN_A0,
+	GPIO_FN_REFOUT, GPIO_FN_IRQOUT,
+
+	/* LCDC */
+	GPIO_FN_LCD_DATA15, GPIO_FN_LCD_DATA14,
+	GPIO_FN_LCD_DATA13, GPIO_FN_LCD_DATA12,
+	GPIO_FN_LCD_DATA11, GPIO_FN_LCD_DATA10,
+	GPIO_FN_LCD_DATA9, GPIO_FN_LCD_DATA8,
+	GPIO_FN_LCD_DATA7, GPIO_FN_LCD_DATA6,
+	GPIO_FN_LCD_DATA5, GPIO_FN_LCD_DATA4,
+	GPIO_FN_LCD_DATA3, GPIO_FN_LCD_DATA2,
+	GPIO_FN_LCD_DATA1, GPIO_FN_LCD_DATA0,
+	GPIO_FN_LCD_M_DISP,
+	GPIO_FN_LCD_CL1, GPIO_FN_LCD_CL2,
+	GPIO_FN_LCD_DON, GPIO_FN_LCD_FLM,
+	GPIO_FN_LCD_VEPWC, GPIO_FN_LCD_VCPWC,
+
+	/* AFEIF */
+	GPIO_FN_AFE_RXIN, GPIO_FN_AFE_RDET,
+	GPIO_FN_AFE_FS, GPIO_FN_AFE_TXOUT,
+	GPIO_FN_AFE_SCLK, GPIO_FN_AFE_RLYCNT,
+	GPIO_FN_AFE_HC1,
+
+	/* IIC */
+	GPIO_FN_IIC_SCL, GPIO_FN_IIC_SDA,
+
+	/* DAC */
+	GPIO_FN_DA1, GPIO_FN_DA0,
+
+	/* ADC */
+	GPIO_FN_AN3, GPIO_FN_AN2, GPIO_FN_AN1, GPIO_FN_AN0, GPIO_FN_ADTRG,
+
+	/* USB */
+	GPIO_FN_USB1D_RCV, GPIO_FN_USB1D_TXSE0,
+	GPIO_FN_USB1D_TXDPLS, GPIO_FN_USB1D_DMNS,
+	GPIO_FN_USB1D_DPLS, GPIO_FN_USB1D_SPEED,
+	GPIO_FN_USB1D_TXENL, GPIO_FN_USB2_PWR_EN,
+	GPIO_FN_USB1_PWR_EN_USBF_UPLUP, GPIO_FN_USB1D_SUSPEND,
+
+	/* INTC */
+	GPIO_FN_IRQ5, GPIO_FN_IRQ4,
+	GPIO_FN_IRQ3_IRL3, GPIO_FN_IRQ2_IRL2,
+	GPIO_FN_IRQ1_IRL1, GPIO_FN_IRQ0_IRL0,
+
+	/* PCC */
+	GPIO_FN_PCC_REG, GPIO_FN_PCC_DRV,
+	GPIO_FN_PCC_BVD2, GPIO_FN_PCC_BVD1,
+	GPIO_FN_PCC_CD2, GPIO_FN_PCC_CD1,
+	GPIO_FN_PCC_RESET, GPIO_FN_PCC_RDY,
+	GPIO_FN_PCC_VS2, GPIO_FN_PCC_VS1,
+
+	/* HUDI */
+	GPIO_FN_AUDATA3, GPIO_FN_AUDATA2, GPIO_FN_AUDATA1, GPIO_FN_AUDATA0,
+	GPIO_FN_AUDCK, GPIO_FN_AUDSYNC, GPIO_FN_ASEBRKAK, GPIO_FN_TRST,
+	GPIO_FN_TMS, GPIO_FN_TDO, GPIO_FN_TDI, GPIO_FN_TCK,
+
+	/* DMAC */
+	GPIO_FN_DACK1, GPIO_FN_DREQ1, GPIO_FN_DACK0, GPIO_FN_DREQ0,
+	GPIO_FN_TEND1, GPIO_FN_TEND0,
+
+	/* SIOF0 */
+	GPIO_FN_SIOF0_SYNC, GPIO_FN_SIOF0_MCLK,
+	GPIO_FN_SIOF0_TXD, GPIO_FN_SIOF0_RXD,
+	GPIO_FN_SIOF0_SCK,
+
+	/* SIOF1 */
+	GPIO_FN_SIOF1_SYNC, GPIO_FN_SIOF1_MCLK,
+	GPIO_FN_SIOF1_TXD, GPIO_FN_SIOF1_RXD,
+	GPIO_FN_SIOF1_SCK,
+
+	/* SCIF0 */
+	GPIO_FN_SCIF0_TXD, GPIO_FN_SCIF0_RXD,
+	GPIO_FN_SCIF0_RTS, GPIO_FN_SCIF0_CTS, GPIO_FN_SCIF0_SCK,
+
+	/* SCIF1 */
+	GPIO_FN_SCIF1_TXD, GPIO_FN_SCIF1_RXD,
+	GPIO_FN_SCIF1_RTS, GPIO_FN_SCIF1_CTS, GPIO_FN_SCIF1_SCK,
+
+	/* TPU */
+	GPIO_FN_TPU_TO1, GPIO_FN_TPU_TO0,
+	GPIO_FN_TPU_TI3B, GPIO_FN_TPU_TI3A,
+	GPIO_FN_TPU_TI2B, GPIO_FN_TPU_TI2A,
+	GPIO_FN_TPU_TO3, GPIO_FN_TPU_TO2,
+
+	/* SIM */
+	GPIO_FN_SIM_D, GPIO_FN_SIM_CLK, GPIO_FN_SIM_RST,
+
+	/* MMC */
+	GPIO_FN_MMC_DAT, GPIO_FN_MMC_CMD,
+	GPIO_FN_MMC_CLK, GPIO_FN_MMC_VDDON,
+	GPIO_FN_MMC_ODMOD,
+
+	/* SYSC */
+	GPIO_FN_STATUS0, GPIO_FN_STATUS1,
+};
+
+#endif /* __ASM_SH7720_H__ */
diff --git a/arch/sh/kernel/cpu/sh3/Makefile b/arch/sh/kernel/cpu/sh3/Makefile
index 511de55af832..e07c69e16d9b 100644
--- a/arch/sh/kernel/cpu/sh3/Makefile
+++ b/arch/sh/kernel/cpu/sh3/Makefile
@@ -24,4 +24,8 @@ clock-$(CONFIG_CPU_SUBTYPE_SH7710)	:= clock-sh7710.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7720)	:= clock-sh7710.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7712)	:= clock-sh7712.o
 
+# Pinmux setup
+pinmux-$(CONFIG_CPU_SUBTYPE_SH7720)	:= pinmux-sh7720.o
+
 obj-y	+= $(clock-y)
+obj-$(CONFIG_GENERIC_GPIO)	+= $(pinmux-y)
diff --git a/arch/sh/kernel/cpu/sh3/pinmux-sh7720.c b/arch/sh/kernel/cpu/sh3/pinmux-sh7720.c
new file mode 100644
index 000000000000..b66c239509a2
--- /dev/null
+++ b/arch/sh/kernel/cpu/sh3/pinmux-sh7720.c
@@ -0,0 +1,1242 @@
+/*
+ * SH7720 Pinmux
+ *
+ *  Copyright (C) 2008  Magnus Damm
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/gpio.h>
+#include <asm/sh7720.h>
+
+enum {
+	PINMUX_RESERVED = 0,
+
+	PINMUX_DATA_BEGIN,
+	PTA7_DATA, PTA6_DATA, PTA5_DATA, PTA4_DATA,
+	PTA3_DATA, PTA2_DATA, PTA1_DATA, PTA0_DATA,
+	PTB7_DATA, PTB6_DATA, PTB5_DATA, PTB4_DATA,
+	PTB3_DATA, PTB2_DATA, PTB1_DATA, PTB0_DATA,
+	PTC7_DATA, PTC6_DATA, PTC5_DATA, PTC4_DATA,
+	PTC3_DATA, PTC2_DATA, PTC1_DATA, PTC0_DATA,
+	PTD7_DATA, PTD6_DATA, PTD5_DATA, PTD4_DATA,
+	PTD3_DATA, PTD2_DATA, PTD1_DATA, PTD0_DATA,
+	PTE6_DATA, PTE5_DATA, PTE4_DATA,
+	PTE3_DATA, PTE2_DATA, PTE1_DATA, PTE0_DATA,
+	PTF6_DATA, PTF5_DATA, PTF4_DATA,
+	PTF3_DATA, PTF2_DATA, PTF1_DATA, PTF0_DATA,
+	PTG6_DATA, PTG5_DATA, PTG4_DATA,
+	PTG3_DATA, PTG2_DATA, PTG1_DATA, PTG0_DATA,
+	PTH6_DATA, PTH5_DATA, PTH4_DATA,
+	PTH3_DATA, PTH2_DATA, PTH1_DATA, PTH0_DATA,
+	PTJ6_DATA, PTJ5_DATA, PTJ4_DATA,
+	PTJ3_DATA, PTJ2_DATA, PTJ1_DATA, PTJ0_DATA,
+	PTK3_DATA, PTK2_DATA, PTK1_DATA, PTK0_DATA,
+	PTL7_DATA, PTL6_DATA, PTL5_DATA, PTL4_DATA, PTL3_DATA,
+	PTM7_DATA, PTM6_DATA, PTM5_DATA, PTM4_DATA,
+	PTM3_DATA, PTM2_DATA, PTM1_DATA, PTM0_DATA,
+	PTP4_DATA, PTP3_DATA, PTP2_DATA, PTP1_DATA, PTP0_DATA,
+	PTR7_DATA, PTR6_DATA, PTR5_DATA, PTR4_DATA,
+	PTR3_DATA, PTR2_DATA, PTR1_DATA, PTR0_DATA,
+	PTS4_DATA, PTS3_DATA, PTS2_DATA, PTS1_DATA, PTS0_DATA,
+	PTT4_DATA, PTT3_DATA, PTT2_DATA, PTT1_DATA, PTT0_DATA,
+	PTU4_DATA, PTU3_DATA, PTU2_DATA, PTU1_DATA, PTU0_DATA,
+	PTV4_DATA, PTV3_DATA, PTV2_DATA, PTV1_DATA, PTV0_DATA,
+	PINMUX_DATA_END,
+
+	PINMUX_INPUT_BEGIN,
+	PTA7_IN, PTA6_IN, PTA5_IN, PTA4_IN,
+	PTA3_IN, PTA2_IN, PTA1_IN, PTA0_IN,
+	PTB7_IN, PTB6_IN, PTB5_IN, PTB4_IN,
+	PTB3_IN, PTB2_IN, PTB1_IN, PTB0_IN,
+	PTC7_IN, PTC6_IN, PTC5_IN, PTC4_IN,
+	PTC3_IN, PTC2_IN, PTC1_IN, PTC0_IN,
+	PTD7_IN, PTD6_IN, PTD5_IN, PTD4_IN,
+	PTD3_IN, PTD2_IN, PTD1_IN, PTD0_IN,
+	PTE6_IN, PTE5_IN, PTE4_IN,
+	PTE3_IN, PTE2_IN, PTE1_IN, PTE0_IN,
+	PTF6_IN, PTF5_IN, PTF4_IN,
+	PTF3_IN, PTF2_IN, PTF1_IN, PTF0_IN,
+	PTG6_IN, PTG5_IN, PTG4_IN,
+	PTG3_IN, PTG2_IN, PTG1_IN, PTG0_IN,
+	PTH6_IN, PTH5_IN, PTH4_IN,
+	PTH3_IN, PTH2_IN, PTH1_IN, PTH0_IN,
+	PTJ6_IN, PTJ5_IN, PTJ4_IN,
+	PTJ3_IN, PTJ2_IN, PTJ1_IN, PTJ0_IN,
+	PTK3_IN, PTK2_IN, PTK1_IN, PTK0_IN,
+	PTL7_IN, PTL6_IN, PTL5_IN, PTL4_IN, PTL3_IN,
+	PTM7_IN, PTM6_IN, PTM5_IN, PTM4_IN,
+	PTM3_IN, PTM2_IN, PTM1_IN, PTM0_IN,
+	PTP4_IN, PTP3_IN, PTP2_IN, PTP1_IN, PTP0_IN,
+	PTR7_IN, PTR6_IN, PTR5_IN, PTR4_IN,
+	PTR3_IN, PTR2_IN, PTR1_IN, PTR0_IN,
+	PTS4_IN, PTS3_IN, PTS2_IN, PTS1_IN, PTS0_IN,
+	PTT4_IN, PTT3_IN, PTT2_IN, PTT1_IN, PTT0_IN,
+	PTU4_IN, PTU3_IN, PTU2_IN, PTU1_IN, PTU0_IN,
+	PTV4_IN, PTV3_IN, PTV2_IN, PTV1_IN, PTV0_IN,
+	PINMUX_INPUT_END,
+
+	PINMUX_INPUT_PULLUP_BEGIN,
+	PTA7_IN_PU, PTA6_IN_PU, PTA5_IN_PU, PTA4_IN_PU,
+	PTA3_IN_PU, PTA2_IN_PU, PTA1_IN_PU, PTA0_IN_PU,
+	PTB7_IN_PU, PTB6_IN_PU, PTB5_IN_PU, PTB4_IN_PU,
+	PTB3_IN_PU, PTB2_IN_PU, PTB1_IN_PU, PTB0_IN_PU,
+	PTC7_IN_PU, PTC6_IN_PU, PTC5_IN_PU, PTC4_IN_PU,
+	PTC3_IN_PU, PTC2_IN_PU, PTC1_IN_PU, PTC0_IN_PU,
+	PTD7_IN_PU, PTD6_IN_PU, PTD5_IN_PU, PTD4_IN_PU,
+	PTD3_IN_PU, PTD2_IN_PU, PTD1_IN_PU, PTD0_IN_PU,
+	PTE4_IN_PU, PTE3_IN_PU, PTE2_IN_PU, PTE1_IN_PU, PTE0_IN_PU,
+	PTF0_IN_PU,
+	PTG6_IN_PU, PTG5_IN_PU, PTG4_IN_PU,
+	PTG3_IN_PU, PTG2_IN_PU, PTG1_IN_PU, PTG0_IN_PU,
+	PTH6_IN_PU, PTH5_IN_PU, PTH4_IN_PU,
+	PTH3_IN_PU, PTH2_IN_PU, PTH1_IN_PU, PTH0_IN_PU,
+	PTJ6_IN_PU, PTJ5_IN_PU, PTJ4_IN_PU,
+	PTJ3_IN_PU, PTJ2_IN_PU, PTJ1_IN_PU, PTJ0_IN_PU,
+	PTK3_IN_PU, PTK2_IN_PU, PTK1_IN_PU, PTK0_IN_PU,
+	PTL7_IN_PU, PTL6_IN_PU, PTL5_IN_PU, PTL4_IN_PU, PTL3_IN_PU,
+	PTM7_IN_PU, PTM6_IN_PU, PTM5_IN_PU, PTM4_IN_PU,
+	PTM3_IN_PU, PTM2_IN_PU, PTM1_IN_PU, PTM0_IN_PU,
+	PTP4_IN_PU, PTP3_IN_PU, PTP2_IN_PU, PTP1_IN_PU, PTP0_IN_PU,
+	PTR7_IN_PU, PTR6_IN_PU, PTR5_IN_PU, PTR4_IN_PU,
+	PTR3_IN_PU, PTR2_IN_PU, PTR1_IN_PU, PTR0_IN_PU,
+	PTS4_IN_PU, PTS3_IN_PU, PTS2_IN_PU, PTS1_IN_PU, PTS0_IN_PU,
+	PTT4_IN_PU, PTT3_IN_PU, PTT2_IN_PU, PTT1_IN_PU, PTT0_IN_PU,
+	PTU4_IN_PU, PTU3_IN_PU, PTU2_IN_PU, PTU1_IN_PU, PTU0_IN_PU,
+	PTV4_IN_PU, PTV3_IN_PU, PTV2_IN_PU, PTV1_IN_PU, PTV0_IN_PU,
+	PINMUX_INPUT_PULLUP_END,
+
+	PINMUX_OUTPUT_BEGIN,
+	PTA7_OUT, PTA6_OUT, PTA5_OUT, PTA4_OUT,
+	PTA3_OUT, PTA2_OUT, PTA1_OUT, PTA0_OUT,
+	PTB7_OUT, PTB6_OUT, PTB5_OUT, PTB4_OUT,
+	PTB3_OUT, PTB2_OUT, PTB1_OUT, PTB0_OUT,
+	PTC7_OUT, PTC6_OUT, PTC5_OUT, PTC4_OUT,
+	PTC3_OUT, PTC2_OUT, PTC1_OUT, PTC0_OUT,
+	PTD7_OUT, PTD6_OUT, PTD5_OUT, PTD4_OUT,
+	PTD3_OUT, PTD2_OUT, PTD1_OUT, PTD0_OUT,
+	PTE4_OUT, PTE3_OUT, PTE2_OUT, PTE1_OUT, PTE0_OUT,
+	PTF0_OUT,
+	PTG6_OUT, PTG5_OUT, PTG4_OUT,
+	PTG3_OUT, PTG2_OUT, PTG1_OUT, PTG0_OUT,
+	PTH6_OUT, PTH5_OUT, PTH4_OUT,
+	PTH3_OUT, PTH2_OUT, PTH1_OUT, PTH0_OUT,
+	PTJ6_OUT, PTJ5_OUT, PTJ4_OUT,
+	PTJ3_OUT, PTJ2_OUT, PTJ1_OUT, PTJ0_OUT,
+	PTK3_OUT, PTK2_OUT, PTK1_OUT, PTK0_OUT,
+	PTL7_OUT, PTL6_OUT, PTL5_OUT, PTL4_OUT, PTL3_OUT,
+	PTM7_OUT, PTM6_OUT, PTM5_OUT, PTM4_OUT,
+	PTM3_OUT, PTM2_OUT, PTM1_OUT, PTM0_OUT,
+	PTP4_OUT, PTP3_OUT, PTP2_OUT, PTP1_OUT, PTP0_OUT,
+	PTR7_OUT, PTR6_OUT, PTR5_OUT, PTR4_OUT,
+	PTR3_OUT, PTR2_OUT, PTR1_OUT, PTR0_OUT,
+	PTS4_OUT, PTS3_OUT, PTS2_OUT, PTS1_OUT, PTS0_OUT,
+	PTT4_OUT, PTT3_OUT, PTT2_OUT, PTT1_OUT, PTT0_OUT,
+	PTU4_OUT, PTU3_OUT, PTU2_OUT, PTU1_OUT, PTU0_OUT,
+	PTV4_OUT, PTV3_OUT, PTV2_OUT, PTV1_OUT, PTV0_OUT,
+	PINMUX_OUTPUT_END,
+
+	PINMUX_FUNCTION_BEGIN,
+	PTA7_FN, PTA6_FN, PTA5_FN, PTA4_FN,
+	PTA3_FN, PTA2_FN, PTA1_FN, PTA0_FN,
+	PTB7_FN, PTB6_FN, PTB5_FN, PTB4_FN,
+	PTB3_FN, PTB2_FN, PTB1_FN, PTB0_FN,
+	PTC7_FN, PTC6_FN, PTC5_FN, PTC4_FN,
+	PTC3_FN, PTC2_FN, PTC1_FN, PTC0_FN,
+	PTD7_FN, PTD6_FN, PTD5_FN, PTD4_FN,
+	PTD3_FN, PTD2_FN, PTD1_FN, PTD0_FN,
+	PTE6_FN, PTE5_FN, PTE4_FN,
+	PTE3_FN, PTE2_FN, PTE1_FN, PTE0_FN,
+	PTF6_FN, PTF5_FN, PTF4_FN,
+	PTF3_FN, PTF2_FN, PTF1_FN, PTF0_FN,
+	PTG6_FN, PTG5_FN, PTG4_FN,
+	PTG3_FN, PTG2_FN, PTG1_FN, PTG0_FN,
+	PTH6_FN, PTH5_FN, PTH4_FN,
+	PTH3_FN, PTH2_FN, PTH1_FN, PTH0_FN,
+	PTJ6_FN, PTJ5_FN, PTJ4_FN,
+	PTJ3_FN, PTJ2_FN, PTJ1_FN, PTJ0_FN,
+	PTK3_FN, PTK2_FN, PTK1_FN, PTK0_FN,
+	PTL7_FN, PTL6_FN, PTL5_FN, PTL4_FN, PTL3_FN,
+	PTM7_FN, PTM6_FN, PTM5_FN, PTM4_FN,
+	PTM3_FN, PTM2_FN, PTM1_FN, PTM0_FN,
+	PTP4_FN, PTP3_FN, PTP2_FN, PTP1_FN, PTP0_FN,
+	PTR7_FN, PTR6_FN, PTR5_FN, PTR4_FN,
+	PTR3_FN, PTR2_FN, PTR1_FN, PTR0_FN,
+	PTS4_FN, PTS3_FN, PTS2_FN, PTS1_FN, PTS0_FN,
+	PTT4_FN, PTT3_FN, PTT2_FN, PTT1_FN, PTT0_FN,
+	PTU4_FN, PTU3_FN, PTU2_FN, PTU1_FN, PTU0_FN,
+	PTV4_FN, PTV3_FN, PTV2_FN, PTV1_FN, PTV0_FN,
+
+	PSELA_1_0_00, PSELA_1_0_01, PSELA_1_0_10,
+	PSELA_3_2_00, PSELA_3_2_01, PSELA_3_2_10, PSELA_3_2_11,
+	PSELA_5_4_00, PSELA_5_4_01, PSELA_5_4_10, PSELA_5_4_11,
+	PSELA_7_6_00, PSELA_7_6_01, PSELA_7_6_10,
+	PSELA_9_8_00, PSELA_9_8_01, PSELA_9_8_10,
+	PSELA_11_10_00, PSELA_11_10_01, PSELA_11_10_10,
+	PSELA_13_12_00, PSELA_13_12_10,
+	PSELA_15_14_00, PSELA_15_14_10,
+	PSELB_9_8_00, PSELB_9_8_11,
+	PSELB_11_10_00, PSELB_11_10_01, PSELB_11_10_10, PSELB_11_10_11,
+	PSELB_13_12_00, PSELB_13_12_01, PSELB_13_12_10, PSELB_13_12_11,
+	PSELB_15_14_00, PSELB_15_14_11,
+	PSELC_9_8_00, PSELC_9_8_10,
+	PSELC_11_10_00, PSELC_11_10_10,
+	PSELC_13_12_00,	PSELC_13_12_01,	PSELC_13_12_10,
+	PSELC_15_14_00,	PSELC_15_14_01,	PSELC_15_14_10,
+	PSELD_1_0_00, PSELD_1_0_10,
+	PSELD_11_10_00,	PSELD_11_10_01,
+	PSELD_15_14_00,	PSELD_15_14_01,	PSELD_15_14_10,
+	PINMUX_FUNCTION_END,
+
+	PINMUX_MARK_BEGIN,
+	D31_MARK, D30_MARK, D29_MARK, D28_MARK,
+	D27_MARK, D26_MARK, D25_MARK, D24_MARK,
+	D23_MARK, D22_MARK, D21_MARK, D20_MARK,
+	D19_MARK, D18_MARK, D17_MARK, D16_MARK,
+	IOIS16_MARK, RAS_MARK, CAS_MARK, CKE_MARK,
+	CS5B_CE1A_MARK, CS6B_CE1B_MARK,
+	A25_MARK, A24_MARK, A23_MARK, A22_MARK,
+	A21_MARK, A20_MARK, A19_MARK, A0_MARK,
+	REFOUT_MARK, IRQOUT_MARK,
+	LCD_DATA15_MARK, LCD_DATA14_MARK,
+	LCD_DATA13_MARK, LCD_DATA12_MARK,
+	LCD_DATA11_MARK, LCD_DATA10_MARK,
+	LCD_DATA9_MARK, LCD_DATA8_MARK,
+	LCD_DATA7_MARK, LCD_DATA6_MARK,
+	LCD_DATA5_MARK, LCD_DATA4_MARK,
+	LCD_DATA3_MARK, LCD_DATA2_MARK,
+	LCD_DATA1_MARK, LCD_DATA0_MARK,
+	LCD_M_DISP_MARK,
+	LCD_CL1_MARK, LCD_CL2_MARK,
+	LCD_DON_MARK, LCD_FLM_MARK,
+	LCD_VEPWC_MARK, LCD_VCPWC_MARK,
+	AFE_RXIN_MARK, AFE_RDET_MARK,
+	AFE_FS_MARK, AFE_TXOUT_MARK,
+	AFE_SCLK_MARK, AFE_RLYCNT_MARK,
+	AFE_HC1_MARK,
+	IIC_SCL_MARK, IIC_SDA_MARK,
+	DA1_MARK, DA0_MARK,
+	AN3_MARK, AN2_MARK, AN1_MARK, AN0_MARK, ADTRG_MARK,
+	USB1D_RCV_MARK, USB1D_TXSE0_MARK,
+	USB1D_TXDPLS_MARK, USB1D_DMNS_MARK,
+	USB1D_DPLS_MARK, USB1D_SPEED_MARK,
+	USB1D_TXENL_MARK,
+	USB2_PWR_EN_MARK, USB1_PWR_EN_USBF_UPLUP_MARK, USB1D_SUSPEND_MARK,
+	IRQ5_MARK, IRQ4_MARK,
+	IRQ3_IRL3_MARK, IRQ2_IRL2_MARK,
+	IRQ1_IRL1_MARK, IRQ0_IRL0_MARK,
+	PCC_REG_MARK, PCC_DRV_MARK,
+	PCC_BVD2_MARK, PCC_BVD1_MARK,
+	PCC_CD2_MARK, PCC_CD1_MARK,
+	PCC_RESET_MARK, PCC_RDY_MARK,
+	PCC_VS2_MARK, PCC_VS1_MARK,
+	AUDATA3_MARK, AUDATA2_MARK, AUDATA1_MARK, AUDATA0_MARK,
+	AUDCK_MARK, AUDSYNC_MARK, ASEBRKAK_MARK, TRST_MARK,
+	TMS_MARK, TDO_MARK, TDI_MARK, TCK_MARK,
+	DACK1_MARK, DREQ1_MARK, DACK0_MARK, DREQ0_MARK,
+	TEND1_MARK, TEND0_MARK,
+	SIOF0_SYNC_MARK, SIOF0_MCLK_MARK,
+	SIOF0_TXD_MARK, SIOF0_RXD_MARK,
+	SIOF0_SCK_MARK,
+	SIOF1_SYNC_MARK, SIOF1_MCLK_MARK,
+	SIOF1_TXD_MARK, SIOF1_RXD_MARK,
+	SIOF1_SCK_MARK,
+	SCIF0_TXD_MARK, SCIF0_RXD_MARK,
+	SCIF0_RTS_MARK, SCIF0_CTS_MARK, SCIF0_SCK_MARK,
+	SCIF1_TXD_MARK, SCIF1_RXD_MARK,
+	SCIF1_RTS_MARK, SCIF1_CTS_MARK, SCIF1_SCK_MARK,
+	TPU_TO1_MARK, TPU_TO0_MARK,
+	TPU_TI3B_MARK, TPU_TI3A_MARK,
+	TPU_TI2B_MARK, TPU_TI2A_MARK,
+	TPU_TO3_MARK, TPU_TO2_MARK,
+	SIM_D_MARK, SIM_CLK_MARK, SIM_RST_MARK,
+	MMC_DAT_MARK, MMC_CMD_MARK,
+	MMC_CLK_MARK, MMC_VDDON_MARK,
+	MMC_ODMOD_MARK,
+	STATUS0_MARK, STATUS1_MARK,
+	PINMUX_MARK_END,
+};
+
+static pinmux_enum_t pinmux_data[] = {
+	/* PTA GPIO */
+	PINMUX_DATA(PTA7_DATA, PTA7_IN, PTA7_OUT, PTA7_IN_PU),
+	PINMUX_DATA(PTA6_DATA, PTA6_IN, PTA6_OUT, PTA6_IN_PU),
+	PINMUX_DATA(PTA5_DATA, PTA5_IN, PTA5_OUT, PTA5_IN_PU),
+	PINMUX_DATA(PTA4_DATA, PTA4_IN, PTA4_OUT, PTA4_IN_PU),
+	PINMUX_DATA(PTA3_DATA, PTA3_IN, PTA3_OUT, PTA3_IN_PU),
+	PINMUX_DATA(PTA2_DATA, PTA2_IN, PTA2_OUT, PTA2_IN_PU),
+	PINMUX_DATA(PTA1_DATA, PTA1_IN, PTA1_OUT, PTA1_IN_PU),
+	PINMUX_DATA(PTA0_DATA, PTA0_IN, PTA0_OUT, PTA0_IN_PU),
+
+	/* PTB GPIO */
+	PINMUX_DATA(PTB7_DATA, PTB7_IN, PTB7_OUT, PTB7_IN_PU),
+	PINMUX_DATA(PTB6_DATA, PTB6_IN, PTB6_OUT, PTB6_IN_PU),
+	PINMUX_DATA(PTB5_DATA, PTB5_IN, PTB5_OUT, PTB5_IN_PU),
+	PINMUX_DATA(PTB4_DATA, PTB4_IN, PTB4_OUT, PTB4_IN_PU),
+	PINMUX_DATA(PTB3_DATA, PTB3_IN, PTB3_OUT, PTB3_IN_PU),
+	PINMUX_DATA(PTB2_DATA, PTB2_IN, PTB2_OUT, PTB2_IN_PU),
+	PINMUX_DATA(PTB1_DATA, PTB1_IN, PTB1_OUT, PTB1_IN_PU),
+	PINMUX_DATA(PTB0_DATA, PTB0_IN, PTB0_OUT, PTB0_IN_PU),
+
+	/* PTC GPIO */
+	PINMUX_DATA(PTC7_DATA, PTC7_IN, PTC7_OUT, PTC7_IN_PU),
+	PINMUX_DATA(PTC6_DATA, PTC6_IN, PTC6_OUT, PTC6_IN_PU),
+	PINMUX_DATA(PTC5_DATA, PTC5_IN, PTC5_OUT, PTC5_IN_PU),
+	PINMUX_DATA(PTC4_DATA, PTC4_IN, PTC4_OUT, PTC4_IN_PU),
+	PINMUX_DATA(PTC3_DATA, PTC3_IN, PTC3_OUT, PTC3_IN_PU),
+	PINMUX_DATA(PTC2_DATA, PTC2_IN, PTC2_OUT, PTC2_IN_PU),
+	PINMUX_DATA(PTC1_DATA, PTC1_IN, PTC1_OUT, PTC1_IN_PU),
+	PINMUX_DATA(PTC0_DATA, PTC0_IN, PTC0_OUT, PTC0_IN_PU),
+
+	/* PTD GPIO */
+	PINMUX_DATA(PTD7_DATA, PTD7_IN, PTD7_OUT, PTD7_IN_PU),
+	PINMUX_DATA(PTD6_DATA, PTD6_IN, PTD6_OUT, PTD6_IN_PU),
+	PINMUX_DATA(PTD5_DATA, PTD5_IN, PTD5_OUT, PTD5_IN_PU),
+	PINMUX_DATA(PTD4_DATA, PTD4_IN, PTD4_OUT, PTD4_IN_PU),
+	PINMUX_DATA(PTD3_DATA, PTD3_IN, PTD3_OUT, PTD3_IN_PU),
+	PINMUX_DATA(PTD2_DATA, PTD2_IN, PTD2_OUT, PTD2_IN_PU),
+	PINMUX_DATA(PTD1_DATA, PTD1_IN, PTD1_OUT, PTD1_IN_PU),
+	PINMUX_DATA(PTD0_DATA, PTD0_IN, PTD0_OUT, PTD0_IN_PU),
+
+	/* PTE GPIO */
+	PINMUX_DATA(PTE6_DATA, PTE6_IN),
+	PINMUX_DATA(PTE5_DATA, PTE5_IN),
+	PINMUX_DATA(PTE4_DATA, PTE4_IN, PTE4_OUT, PTE4_IN_PU),
+	PINMUX_DATA(PTE3_DATA, PTE3_IN, PTE3_OUT, PTE3_IN_PU),
+	PINMUX_DATA(PTE2_DATA, PTE2_IN, PTE2_OUT, PTE2_IN_PU),
+	PINMUX_DATA(PTE1_DATA, PTE1_IN, PTE1_OUT, PTE1_IN_PU),
+	PINMUX_DATA(PTE0_DATA, PTE0_IN, PTE0_OUT, PTE0_IN_PU),
+
+	/* PTF GPIO */
+	PINMUX_DATA(PTF6_DATA, PTF6_IN),
+	PINMUX_DATA(PTF5_DATA, PTF5_IN),
+	PINMUX_DATA(PTF4_DATA, PTF4_IN),
+	PINMUX_DATA(PTF3_DATA, PTF3_IN),
+	PINMUX_DATA(PTF2_DATA, PTF2_IN),
+	PINMUX_DATA(PTF1_DATA, PTF1_IN),
+	PINMUX_DATA(PTF0_DATA, PTF0_IN, PTF0_OUT, PTF0_IN_PU),
+
+	/* PTG GPIO */
+	PINMUX_DATA(PTG6_DATA, PTG6_IN, PTG6_OUT, PTG6_IN_PU),
+	PINMUX_DATA(PTG5_DATA, PTG5_IN, PTG5_OUT, PTG5_IN_PU),
+	PINMUX_DATA(PTG4_DATA, PTG4_IN, PTG4_OUT, PTG4_IN_PU),
+	PINMUX_DATA(PTG3_DATA, PTG3_IN, PTG3_OUT, PTG3_IN_PU),
+	PINMUX_DATA(PTG2_DATA, PTG2_IN, PTG2_OUT, PTG2_IN_PU),
+	PINMUX_DATA(PTG1_DATA, PTG1_IN, PTG1_OUT, PTG1_IN_PU),
+	PINMUX_DATA(PTG0_DATA, PTG0_IN, PTG0_OUT, PTG0_IN_PU),
+
+	/* PTH GPIO */
+	PINMUX_DATA(PTH6_DATA, PTH6_IN, PTH6_OUT, PTH6_IN_PU),
+	PINMUX_DATA(PTH5_DATA, PTH5_IN, PTH5_OUT, PTH5_IN_PU),
+	PINMUX_DATA(PTH4_DATA, PTH4_IN, PTH4_OUT, PTH4_IN_PU),
+	PINMUX_DATA(PTH3_DATA, PTH3_IN, PTH3_OUT, PTH3_IN_PU),
+	PINMUX_DATA(PTH2_DATA, PTH2_IN, PTH2_OUT, PTH2_IN_PU),
+	PINMUX_DATA(PTH1_DATA, PTH1_IN, PTH1_OUT, PTH1_IN_PU),
+	PINMUX_DATA(PTH0_DATA, PTH0_IN, PTH0_OUT, PTH0_IN_PU),
+
+	/* PTJ GPIO */
+	PINMUX_DATA(PTJ6_DATA, PTJ6_IN, PTJ6_OUT, PTJ6_IN_PU),
+	PINMUX_DATA(PTJ5_DATA, PTJ5_IN, PTJ5_OUT, PTJ5_IN_PU),
+	PINMUX_DATA(PTJ4_DATA, PTJ4_IN, PTJ4_OUT, PTJ4_IN_PU),
+	PINMUX_DATA(PTJ3_DATA, PTJ3_IN, PTJ3_OUT, PTJ3_IN_PU),
+	PINMUX_DATA(PTJ2_DATA, PTJ2_IN, PTJ2_OUT, PTJ2_IN_PU),
+	PINMUX_DATA(PTJ1_DATA, PTJ1_IN, PTJ1_OUT, PTJ1_IN_PU),
+	PINMUX_DATA(PTJ0_DATA, PTJ0_IN, PTJ0_OUT, PTJ0_IN_PU),
+
+	/* PTK GPIO */
+	PINMUX_DATA(PTK3_DATA, PTK3_IN, PTK3_OUT, PTK3_IN_PU),
+	PINMUX_DATA(PTK2_DATA, PTK2_IN, PTK2_OUT, PTK2_IN_PU),
+	PINMUX_DATA(PTK1_DATA, PTK1_IN, PTK1_OUT, PTK1_IN_PU),
+	PINMUX_DATA(PTK0_DATA, PTK0_IN, PTK0_OUT, PTK0_IN_PU),
+
+	/* PTL GPIO */
+	PINMUX_DATA(PTL7_DATA, PTL7_IN, PTL7_OUT, PTL7_IN_PU),
+	PINMUX_DATA(PTL6_DATA, PTL6_IN, PTL6_OUT, PTL6_IN_PU),
+	PINMUX_DATA(PTL5_DATA, PTL5_IN, PTL5_OUT, PTL5_IN_PU),
+	PINMUX_DATA(PTL4_DATA, PTL4_IN, PTL4_OUT, PTL4_IN_PU),
+	PINMUX_DATA(PTL3_DATA, PTL3_IN, PTL3_OUT, PTL3_IN_PU),
+
+	/* PTM GPIO */
+	PINMUX_DATA(PTM7_DATA, PTM7_IN, PTM7_OUT, PTM7_IN_PU),
+	PINMUX_DATA(PTM6_DATA, PTM6_IN, PTM6_OUT, PTM6_IN_PU),
+	PINMUX_DATA(PTM5_DATA, PTM5_IN, PTM5_OUT, PTM5_IN_PU),
+	PINMUX_DATA(PTM4_DATA, PTM4_IN, PTM4_OUT, PTM4_IN_PU),
+	PINMUX_DATA(PTM3_DATA, PTM3_IN, PTM3_OUT, PTM3_IN_PU),
+	PINMUX_DATA(PTM2_DATA, PTM2_IN, PTM2_OUT, PTM2_IN_PU),
+	PINMUX_DATA(PTM1_DATA, PTM1_IN, PTM1_OUT, PTM1_IN_PU),
+	PINMUX_DATA(PTM0_DATA, PTM0_IN, PTM0_OUT, PTM0_IN_PU),
+
+	/* PTP GPIO */
+	PINMUX_DATA(PTP4_DATA, PTP4_IN, PTP4_OUT, PTP4_IN_PU),
+	PINMUX_DATA(PTP3_DATA, PTP3_IN, PTP3_OUT, PTP3_IN_PU),
+	PINMUX_DATA(PTP2_DATA, PTP2_IN, PTP2_OUT, PTP2_IN_PU),
+	PINMUX_DATA(PTP1_DATA, PTP1_IN, PTP1_OUT, PTP1_IN_PU),
+	PINMUX_DATA(PTP0_DATA, PTP0_IN, PTP0_OUT, PTP0_IN_PU),
+
+	/* PTR GPIO */
+	PINMUX_DATA(PTR7_DATA, PTR7_IN, PTR7_OUT, PTR7_IN_PU),
+	PINMUX_DATA(PTR6_DATA, PTR6_IN, PTR6_OUT, PTR6_IN_PU),
+	PINMUX_DATA(PTR5_DATA, PTR5_IN, PTR5_OUT, PTR5_IN_PU),
+	PINMUX_DATA(PTR4_DATA, PTR4_IN, PTR4_OUT, PTR4_IN_PU),
+	PINMUX_DATA(PTR3_DATA, PTR3_IN, PTR3_OUT, PTR3_IN_PU),
+	PINMUX_DATA(PTR2_DATA, PTR2_IN, PTR2_OUT, PTR2_IN_PU),
+	PINMUX_DATA(PTR1_DATA, PTR1_IN, PTR1_OUT, PTR1_IN_PU),
+	PINMUX_DATA(PTR0_DATA, PTR0_IN, PTR0_OUT, PTR0_IN_PU),
+
+	/* PTS GPIO */
+	PINMUX_DATA(PTS4_DATA, PTS4_IN, PTS4_OUT, PTS4_IN_PU),
+	PINMUX_DATA(PTS3_DATA, PTS3_IN, PTS3_OUT, PTS3_IN_PU),
+	PINMUX_DATA(PTS2_DATA, PTS2_IN, PTS2_OUT, PTS2_IN_PU),
+	PINMUX_DATA(PTS1_DATA, PTS1_IN, PTS1_OUT, PTS1_IN_PU),
+	PINMUX_DATA(PTS0_DATA, PTS0_IN, PTS0_OUT, PTS0_IN_PU),
+
+	/* PTT GPIO */
+	PINMUX_DATA(PTT4_DATA, PTT4_IN, PTT4_OUT, PTT4_IN_PU),
+	PINMUX_DATA(PTT3_DATA, PTT3_IN, PTT3_OUT, PTT3_IN_PU),
+	PINMUX_DATA(PTT2_DATA, PTT2_IN, PTT2_OUT, PTT2_IN_PU),
+	PINMUX_DATA(PTT1_DATA, PTT1_IN, PTT1_OUT, PTT1_IN_PU),
+	PINMUX_DATA(PTT0_DATA, PTT0_IN, PTT0_OUT, PTT0_IN_PU),
+
+	/* PTU GPIO */
+	PINMUX_DATA(PTU4_DATA, PTU4_IN, PTU4_OUT, PTU4_IN_PU),
+	PINMUX_DATA(PTU3_DATA, PTU3_IN, PTU3_OUT, PTU3_IN_PU),
+	PINMUX_DATA(PTU2_DATA, PTU2_IN, PTU2_OUT, PTU2_IN_PU),
+	PINMUX_DATA(PTU1_DATA, PTU1_IN, PTU1_OUT, PTU1_IN_PU),
+	PINMUX_DATA(PTU0_DATA, PTU0_IN, PTU0_OUT, PTU0_IN_PU),
+
+	/* PTV GPIO */
+	PINMUX_DATA(PTV4_DATA, PTV4_IN, PTV4_OUT, PTV4_IN_PU),
+	PINMUX_DATA(PTV3_DATA, PTV3_IN, PTV3_OUT, PTV3_IN_PU),
+	PINMUX_DATA(PTV2_DATA, PTV2_IN, PTV2_OUT, PTV2_IN_PU),
+	PINMUX_DATA(PTV1_DATA, PTV1_IN, PTV1_OUT, PTV1_IN_PU),
+	PINMUX_DATA(PTV0_DATA, PTV0_IN, PTV0_OUT, PTV0_IN_PU),
+
+	/* PTA FN */
+	PINMUX_DATA(D23_MARK, PTA7_FN),
+	PINMUX_DATA(D22_MARK, PTA6_FN),
+	PINMUX_DATA(D21_MARK, PTA5_FN),
+	PINMUX_DATA(D20_MARK, PTA4_FN),
+	PINMUX_DATA(D19_MARK, PTA3_FN),
+	PINMUX_DATA(D18_MARK, PTA2_FN),
+	PINMUX_DATA(D17_MARK, PTA1_FN),
+	PINMUX_DATA(D16_MARK, PTA0_FN),
+
+	/* PTB FN */
+	PINMUX_DATA(D31_MARK, PTB7_FN),
+	PINMUX_DATA(D30_MARK, PTB6_FN),
+	PINMUX_DATA(D29_MARK, PTB5_FN),
+	PINMUX_DATA(D28_MARK, PTB4_FN),
+	PINMUX_DATA(D27_MARK, PTB3_FN),
+	PINMUX_DATA(D26_MARK, PTB2_FN),
+	PINMUX_DATA(D25_MARK, PTB1_FN),
+	PINMUX_DATA(D24_MARK, PTB0_FN),
+
+	/* PTC FN */
+	PINMUX_DATA(LCD_DATA7_MARK, PTC7_FN),
+	PINMUX_DATA(LCD_DATA6_MARK, PTC6_FN),
+	PINMUX_DATA(LCD_DATA5_MARK, PTC5_FN),
+	PINMUX_DATA(LCD_DATA4_MARK, PTC4_FN),
+	PINMUX_DATA(LCD_DATA3_MARK, PTC3_FN),
+	PINMUX_DATA(LCD_DATA2_MARK, PTC2_FN),
+	PINMUX_DATA(LCD_DATA1_MARK, PTC1_FN),
+	PINMUX_DATA(LCD_DATA0_MARK, PTC0_FN),
+
+	/* PTD FN */
+	PINMUX_DATA(LCD_DATA15_MARK, PTD7_FN),
+	PINMUX_DATA(LCD_DATA14_MARK, PTD6_FN),
+	PINMUX_DATA(LCD_DATA13_MARK, PTD5_FN),
+	PINMUX_DATA(LCD_DATA12_MARK, PTD4_FN),
+	PINMUX_DATA(LCD_DATA11_MARK, PTD3_FN),
+	PINMUX_DATA(LCD_DATA10_MARK, PTD2_FN),
+	PINMUX_DATA(LCD_DATA9_MARK, PTD1_FN),
+	PINMUX_DATA(LCD_DATA8_MARK, PTD0_FN),
+
+	/* PTE FN */
+	PINMUX_DATA(IIC_SCL_MARK, PSELB_9_8_00, PTE6_FN),
+	PINMUX_DATA(AFE_RXIN_MARK, PSELB_9_8_11, PTE6_FN),
+	PINMUX_DATA(IIC_SDA_MARK, PSELB_9_8_00, PTE5_FN),
+	PINMUX_DATA(AFE_RDET_MARK, PSELB_9_8_11, PTE5_FN),
+	PINMUX_DATA(LCD_M_DISP_MARK, PTE4_FN),
+	PINMUX_DATA(LCD_CL1_MARK, PTE3_FN),
+	PINMUX_DATA(LCD_CL2_MARK, PTE2_FN),
+	PINMUX_DATA(LCD_DON_MARK, PTE1_FN),
+	PINMUX_DATA(LCD_FLM_MARK, PTE0_FN),
+
+	/* PTF FN */
+	PINMUX_DATA(DA1_MARK, PTF6_FN),
+	PINMUX_DATA(DA0_MARK, PTF5_FN),
+	PINMUX_DATA(AN3_MARK, PTF4_FN),
+	PINMUX_DATA(AN2_MARK, PTF3_FN),
+	PINMUX_DATA(AN1_MARK, PTF2_FN),
+	PINMUX_DATA(AN0_MARK, PTF1_FN),
+	PINMUX_DATA(ADTRG_MARK, PTF0_FN),
+
+	/* PTG FN */
+	PINMUX_DATA(USB1D_RCV_MARK, PSELA_3_2_00, PTG6_FN),
+	PINMUX_DATA(AFE_FS_MARK, PSELA_3_2_01, PTG6_FN),
+	PINMUX_DATA(PCC_REG_MARK, PSELA_3_2_10, PTG6_FN),
+	PINMUX_DATA(IRQ5_MARK, PSELA_3_2_11, PTG6_FN),
+	PINMUX_DATA(USB1D_TXSE0_MARK, PSELA_5_4_00, PTG5_FN),
+	PINMUX_DATA(AFE_TXOUT_MARK, PSELA_5_4_01, PTG5_FN),
+	PINMUX_DATA(PCC_DRV_MARK, PSELA_5_4_10, PTG5_FN),
+	PINMUX_DATA(IRQ4_MARK, PSELA_5_4_11, PTG5_FN),
+	PINMUX_DATA(USB1D_TXDPLS_MARK, PSELA_7_6_00, PTG4_FN),
+	PINMUX_DATA(AFE_SCLK_MARK, PSELA_7_6_01, PTG4_FN),
+	PINMUX_DATA(IOIS16_MARK, PSELA_7_6_10, PTG4_FN),
+	PINMUX_DATA(USB1D_DMNS_MARK, PSELA_9_8_00, PTG3_FN),
+	PINMUX_DATA(AFE_RLYCNT_MARK, PSELA_9_8_01, PTG3_FN),
+	PINMUX_DATA(PCC_BVD2_MARK, PSELA_9_8_10, PTG3_FN),
+	PINMUX_DATA(USB1D_DPLS_MARK, PSELA_11_10_00, PTG2_FN),
+	PINMUX_DATA(AFE_HC1_MARK, PSELA_11_10_01, PTG2_FN),
+	PINMUX_DATA(PCC_BVD1_MARK, PSELA_11_10_10, PTG2_FN),
+	PINMUX_DATA(USB1D_SPEED_MARK, PSELA_13_12_00, PTG1_FN),
+	PINMUX_DATA(PCC_CD2_MARK, PSELA_13_12_10, PTG1_FN),
+	PINMUX_DATA(USB1D_TXENL_MARK, PSELA_15_14_00, PTG0_FN),
+	PINMUX_DATA(PCC_CD1_MARK, PSELA_15_14_10, PTG0_FN),
+
+	/* PTH FN */
+	PINMUX_DATA(RAS_MARK, PTH6_FN),
+	PINMUX_DATA(CAS_MARK, PTH5_FN),
+	PINMUX_DATA(CKE_MARK, PTH4_FN),
+	PINMUX_DATA(STATUS1_MARK, PTH3_FN),
+	PINMUX_DATA(STATUS0_MARK, PTH2_FN),
+	PINMUX_DATA(USB2_PWR_EN_MARK, PTH1_FN),
+	PINMUX_DATA(USB1_PWR_EN_USBF_UPLUP_MARK, PTH0_FN),
+
+	/* PTJ FN */
+	PINMUX_DATA(AUDCK_MARK, PTJ6_FN),
+	PINMUX_DATA(ASEBRKAK_MARK, PTJ5_FN),
+	PINMUX_DATA(AUDATA3_MARK, PTJ4_FN),
+	PINMUX_DATA(AUDATA2_MARK, PTJ3_FN),
+	PINMUX_DATA(AUDATA1_MARK, PTJ2_FN),
+	PINMUX_DATA(AUDATA0_MARK, PTJ1_FN),
+	PINMUX_DATA(AUDSYNC_MARK, PTJ0_FN),
+
+	/* PTK FN */
+	PINMUX_DATA(PCC_RESET_MARK, PTK3_FN),
+	PINMUX_DATA(PCC_RDY_MARK, PTK2_FN),
+	PINMUX_DATA(PCC_VS2_MARK, PTK1_FN),
+	PINMUX_DATA(PCC_VS1_MARK, PTK0_FN),
+
+	/* PTL FN */
+	PINMUX_DATA(TRST_MARK, PTL7_FN),
+	PINMUX_DATA(TMS_MARK, PTL6_FN),
+	PINMUX_DATA(TDO_MARK, PTL5_FN),
+	PINMUX_DATA(TDI_MARK, PTL4_FN),
+	PINMUX_DATA(TCK_MARK, PTL3_FN),
+
+	/* PTM FN */
+	PINMUX_DATA(DREQ1_MARK, PTM7_FN),
+	PINMUX_DATA(DREQ0_MARK, PTM6_FN),
+	PINMUX_DATA(DACK1_MARK, PTM5_FN),
+	PINMUX_DATA(DACK0_MARK, PTM4_FN),
+	PINMUX_DATA(TEND1_MARK, PTM3_FN),
+	PINMUX_DATA(TEND0_MARK, PTM2_FN),
+	PINMUX_DATA(CS5B_CE1A_MARK, PTM1_FN),
+	PINMUX_DATA(CS6B_CE1B_MARK, PTM0_FN),
+
+	/* PTP FN */
+	PINMUX_DATA(USB1D_SUSPEND_MARK, PSELA_1_0_00, PTP4_FN),
+	PINMUX_DATA(REFOUT_MARK, PSELA_1_0_01, PTP4_FN),
+	PINMUX_DATA(IRQOUT_MARK, PSELA_1_0_10, PTP4_FN),
+	PINMUX_DATA(IRQ3_IRL3_MARK, PTP3_FN),
+	PINMUX_DATA(IRQ2_IRL2_MARK, PTP2_FN),
+	PINMUX_DATA(IRQ1_IRL1_MARK, PTP1_FN),
+	PINMUX_DATA(IRQ0_IRL0_MARK, PTP0_FN),
+
+	/* PTR FN */
+	PINMUX_DATA(A25_MARK, PTR7_FN),
+	PINMUX_DATA(A24_MARK, PTR6_FN),
+	PINMUX_DATA(A23_MARK, PTR5_FN),
+	PINMUX_DATA(A22_MARK, PTR4_FN),
+	PINMUX_DATA(A21_MARK, PTR3_FN),
+	PINMUX_DATA(A20_MARK, PTR2_FN),
+	PINMUX_DATA(A19_MARK, PTR1_FN),
+	PINMUX_DATA(A0_MARK, PTR0_FN),
+
+	/* PTS FN */
+	PINMUX_DATA(SIOF0_SYNC_MARK, PTS4_FN),
+	PINMUX_DATA(SIOF0_MCLK_MARK, PTS3_FN),
+	PINMUX_DATA(SIOF0_TXD_MARK, PTS2_FN),
+	PINMUX_DATA(SIOF0_RXD_MARK, PTS1_FN),
+	PINMUX_DATA(SIOF0_SCK_MARK, PTS0_FN),
+
+	/* PTT FN */
+	PINMUX_DATA(SCIF0_CTS_MARK, PSELB_15_14_00, PTT4_FN),
+	PINMUX_DATA(TPU_TO1_MARK, PSELB_15_14_11, PTT4_FN),
+	PINMUX_DATA(SCIF0_RTS_MARK, PSELB_15_14_00, PTT3_FN),
+	PINMUX_DATA(TPU_TO0_MARK, PSELB_15_14_11, PTT3_FN),
+	PINMUX_DATA(SCIF0_TXD_MARK, PTT2_FN),
+	PINMUX_DATA(SCIF0_RXD_MARK, PTT1_FN),
+	PINMUX_DATA(SCIF0_SCK_MARK, PTT0_FN),
+
+	/* PTU FN */
+	PINMUX_DATA(SIOF1_SYNC_MARK, PTU4_FN),
+	PINMUX_DATA(SIOF1_MCLK_MARK, PSELD_11_10_00, PTU3_FN),
+	PINMUX_DATA(TPU_TI3B_MARK, PSELD_11_10_01, PTU3_FN),
+	PINMUX_DATA(SIOF1_TXD_MARK, PSELD_15_14_00, PTU2_FN),
+	PINMUX_DATA(TPU_TI3A_MARK, PSELD_15_14_01, PTU2_FN),
+	PINMUX_DATA(MMC_DAT_MARK, PSELD_15_14_10, PTU2_FN),
+	PINMUX_DATA(SIOF1_RXD_MARK, PSELC_13_12_00, PTU1_FN),
+	PINMUX_DATA(TPU_TI2B_MARK, PSELC_13_12_01, PTU1_FN),
+	PINMUX_DATA(MMC_CMD_MARK, PSELC_13_12_10, PTU1_FN),
+	PINMUX_DATA(SIOF1_SCK_MARK, PSELC_15_14_00, PTU0_FN),
+	PINMUX_DATA(TPU_TI2A_MARK, PSELC_15_14_01, PTU0_FN),
+	PINMUX_DATA(MMC_CLK_MARK, PSELC_15_14_10, PTU0_FN),
+
+	/* PTV FN */
+	PINMUX_DATA(SCIF1_CTS_MARK, PSELB_11_10_00, PTV4_FN),
+	PINMUX_DATA(TPU_TO3_MARK, PSELB_11_10_01, PTV4_FN),
+	PINMUX_DATA(MMC_VDDON_MARK, PSELB_11_10_10, PTV4_FN),
+	PINMUX_DATA(LCD_VEPWC_MARK, PSELB_11_10_11, PTV4_FN),
+	PINMUX_DATA(SCIF1_RTS_MARK, PSELB_13_12_00, PTV3_FN),
+	PINMUX_DATA(TPU_TO2_MARK, PSELB_13_12_01, PTV3_FN),
+	PINMUX_DATA(MMC_ODMOD_MARK, PSELB_13_12_10, PTV3_FN),
+	PINMUX_DATA(LCD_VCPWC_MARK, PSELB_13_12_11, PTV3_FN),
+	PINMUX_DATA(SCIF1_TXD_MARK, PSELC_9_8_00, PTV2_FN),
+	PINMUX_DATA(SIM_D_MARK, PSELC_9_8_10, PTV2_FN),
+	PINMUX_DATA(SCIF1_RXD_MARK, PSELC_11_10_00, PTV1_FN),
+	PINMUX_DATA(SIM_RST_MARK, PSELC_11_10_10, PTV1_FN),
+	PINMUX_DATA(SCIF1_SCK_MARK, PSELD_1_0_00, PTV0_FN),
+	PINMUX_DATA(SIM_CLK_MARK, PSELD_1_0_10, PTV0_FN),
+};
+
+static struct pinmux_gpio pinmux_gpios[] = {
+	/* PTA */
+	PINMUX_GPIO(GPIO_PTA7, PTA7_DATA),
+	PINMUX_GPIO(GPIO_PTA6, PTA6_DATA),
+	PINMUX_GPIO(GPIO_PTA5, PTA5_DATA),
+	PINMUX_GPIO(GPIO_PTA4, PTA4_DATA),
+	PINMUX_GPIO(GPIO_PTA3, PTA3_DATA),
+	PINMUX_GPIO(GPIO_PTA2, PTA2_DATA),
+	PINMUX_GPIO(GPIO_PTA1, PTA1_DATA),
+	PINMUX_GPIO(GPIO_PTA0, PTA0_DATA),
+
+	/* PTB */
+	PINMUX_GPIO(GPIO_PTB7, PTB7_DATA),
+	PINMUX_GPIO(GPIO_PTB6, PTB6_DATA),
+	PINMUX_GPIO(GPIO_PTB5, PTB5_DATA),
+	PINMUX_GPIO(GPIO_PTB4, PTB4_DATA),
+	PINMUX_GPIO(GPIO_PTB3, PTB3_DATA),
+	PINMUX_GPIO(GPIO_PTB2, PTB2_DATA),
+	PINMUX_GPIO(GPIO_PTB1, PTB1_DATA),
+	PINMUX_GPIO(GPIO_PTB0, PTB0_DATA),
+
+	/* PTC */
+	PINMUX_GPIO(GPIO_PTC7, PTC7_DATA),
+	PINMUX_GPIO(GPIO_PTC6, PTC6_DATA),
+	PINMUX_GPIO(GPIO_PTC5, PTC5_DATA),
+	PINMUX_GPIO(GPIO_PTC4, PTC4_DATA),
+	PINMUX_GPIO(GPIO_PTC3, PTC3_DATA),
+	PINMUX_GPIO(GPIO_PTC2, PTC2_DATA),
+	PINMUX_GPIO(GPIO_PTC1, PTC1_DATA),
+	PINMUX_GPIO(GPIO_PTC0, PTC0_DATA),
+
+	/* PTD */
+	PINMUX_GPIO(GPIO_PTD7, PTD7_DATA),
+	PINMUX_GPIO(GPIO_PTD6, PTD6_DATA),
+	PINMUX_GPIO(GPIO_PTD5, PTD5_DATA),
+	PINMUX_GPIO(GPIO_PTD4, PTD4_DATA),
+	PINMUX_GPIO(GPIO_PTD3, PTD3_DATA),
+	PINMUX_GPIO(GPIO_PTD2, PTD2_DATA),
+	PINMUX_GPIO(GPIO_PTD1, PTD1_DATA),
+	PINMUX_GPIO(GPIO_PTD0, PTD0_DATA),
+
+	/* PTE */
+	PINMUX_GPIO(GPIO_PTE6, PTE6_DATA),
+	PINMUX_GPIO(GPIO_PTE5, PTE5_DATA),
+	PINMUX_GPIO(GPIO_PTE4, PTE4_DATA),
+	PINMUX_GPIO(GPIO_PTE3, PTE3_DATA),
+	PINMUX_GPIO(GPIO_PTE2, PTE2_DATA),
+	PINMUX_GPIO(GPIO_PTE1, PTE1_DATA),
+	PINMUX_GPIO(GPIO_PTE0, PTE0_DATA),
+
+	/* PTF */
+	PINMUX_GPIO(GPIO_PTF6, PTF6_DATA),
+	PINMUX_GPIO(GPIO_PTF5, PTF5_DATA),
+	PINMUX_GPIO(GPIO_PTF4, PTF4_DATA),
+	PINMUX_GPIO(GPIO_PTF3, PTF3_DATA),
+	PINMUX_GPIO(GPIO_PTF2, PTF2_DATA),
+	PINMUX_GPIO(GPIO_PTF1, PTF1_DATA),
+	PINMUX_GPIO(GPIO_PTF0, PTF0_DATA),
+
+	/* PTG */
+	PINMUX_GPIO(GPIO_PTG6, PTG6_DATA),
+	PINMUX_GPIO(GPIO_PTG5, PTG5_DATA),
+	PINMUX_GPIO(GPIO_PTG4, PTG4_DATA),
+	PINMUX_GPIO(GPIO_PTG3, PTG3_DATA),
+	PINMUX_GPIO(GPIO_PTG2, PTG2_DATA),
+	PINMUX_GPIO(GPIO_PTG1, PTG1_DATA),
+	PINMUX_GPIO(GPIO_PTG0, PTG0_DATA),
+
+	/* PTH */
+	PINMUX_GPIO(GPIO_PTH6, PTH6_DATA),
+	PINMUX_GPIO(GPIO_PTH5, PTH5_DATA),
+	PINMUX_GPIO(GPIO_PTH4, PTH4_DATA),
+	PINMUX_GPIO(GPIO_PTH3, PTH3_DATA),
+	PINMUX_GPIO(GPIO_PTH2, PTH2_DATA),
+	PINMUX_GPIO(GPIO_PTH1, PTH1_DATA),
+	PINMUX_GPIO(GPIO_PTH0, PTH0_DATA),
+
+	/* PTJ */
+	PINMUX_GPIO(GPIO_PTJ6, PTJ6_DATA),
+	PINMUX_GPIO(GPIO_PTJ5, PTJ5_DATA),
+	PINMUX_GPIO(GPIO_PTJ4, PTJ4_DATA),
+	PINMUX_GPIO(GPIO_PTJ3, PTJ3_DATA),
+	PINMUX_GPIO(GPIO_PTJ2, PTJ2_DATA),
+	PINMUX_GPIO(GPIO_PTJ1, PTJ1_DATA),
+	PINMUX_GPIO(GPIO_PTJ0, PTJ0_DATA),
+
+	/* PTK */
+	PINMUX_GPIO(GPIO_PTK3, PTK3_DATA),
+	PINMUX_GPIO(GPIO_PTK2, PTK2_DATA),
+	PINMUX_GPIO(GPIO_PTK1, PTK1_DATA),
+	PINMUX_GPIO(GPIO_PTK0, PTK0_DATA),
+
+	/* PTL */
+	PINMUX_GPIO(GPIO_PTL7, PTL7_DATA),
+	PINMUX_GPIO(GPIO_PTL6, PTL6_DATA),
+	PINMUX_GPIO(GPIO_PTL5, PTL5_DATA),
+	PINMUX_GPIO(GPIO_PTL4, PTL4_DATA),
+	PINMUX_GPIO(GPIO_PTL3, PTL3_DATA),
+
+	/* PTM */
+	PINMUX_GPIO(GPIO_PTM7, PTM7_DATA),
+	PINMUX_GPIO(GPIO_PTM6, PTM6_DATA),
+	PINMUX_GPIO(GPIO_PTM5, PTM5_DATA),
+	PINMUX_GPIO(GPIO_PTM4, PTM4_DATA),
+	PINMUX_GPIO(GPIO_PTM3, PTM3_DATA),
+	PINMUX_GPIO(GPIO_PTM2, PTM2_DATA),
+	PINMUX_GPIO(GPIO_PTM1, PTM1_DATA),
+	PINMUX_GPIO(GPIO_PTM0, PTM0_DATA),
+
+	/* PTP */
+	PINMUX_GPIO(GPIO_PTP4, PTP4_DATA),
+	PINMUX_GPIO(GPIO_PTP3, PTP3_DATA),
+	PINMUX_GPIO(GPIO_PTP2, PTP2_DATA),
+	PINMUX_GPIO(GPIO_PTP1, PTP1_DATA),
+	PINMUX_GPIO(GPIO_PTP0, PTP0_DATA),
+
+	/* PTR */
+	PINMUX_GPIO(GPIO_PTR7, PTR7_DATA),
+	PINMUX_GPIO(GPIO_PTR6, PTR6_DATA),
+	PINMUX_GPIO(GPIO_PTR5, PTR5_DATA),
+	PINMUX_GPIO(GPIO_PTR4, PTR4_DATA),
+	PINMUX_GPIO(GPIO_PTR3, PTR3_DATA),
+	PINMUX_GPIO(GPIO_PTR2, PTR2_DATA),
+	PINMUX_GPIO(GPIO_PTR1, PTR1_DATA),
+	PINMUX_GPIO(GPIO_PTR0, PTR0_DATA),
+
+	/* PTS */
+	PINMUX_GPIO(GPIO_PTS4, PTS4_DATA),
+	PINMUX_GPIO(GPIO_PTS3, PTS3_DATA),
+	PINMUX_GPIO(GPIO_PTS2, PTS2_DATA),
+	PINMUX_GPIO(GPIO_PTS1, PTS1_DATA),
+	PINMUX_GPIO(GPIO_PTS0, PTS0_DATA),
+
+	/* PTT */
+	PINMUX_GPIO(GPIO_PTT4, PTT4_DATA),
+	PINMUX_GPIO(GPIO_PTT3, PTT3_DATA),
+	PINMUX_GPIO(GPIO_PTT2, PTT2_DATA),
+	PINMUX_GPIO(GPIO_PTT1, PTT1_DATA),
+	PINMUX_GPIO(GPIO_PTT0, PTT0_DATA),
+
+	/* PTU */
+	PINMUX_GPIO(GPIO_PTU4, PTU4_DATA),
+	PINMUX_GPIO(GPIO_PTU3, PTU3_DATA),
+	PINMUX_GPIO(GPIO_PTU2, PTU2_DATA),
+	PINMUX_GPIO(GPIO_PTU1, PTU1_DATA),
+	PINMUX_GPIO(GPIO_PTU0, PTU0_DATA),
+
+	/* PTV */
+	PINMUX_GPIO(GPIO_PTV4, PTV4_DATA),
+	PINMUX_GPIO(GPIO_PTV3, PTV3_DATA),
+	PINMUX_GPIO(GPIO_PTV2, PTV2_DATA),
+	PINMUX_GPIO(GPIO_PTV1, PTV1_DATA),
+	PINMUX_GPIO(GPIO_PTV0, PTV0_DATA),
+
+	/* BSC */
+	PINMUX_GPIO(GPIO_FN_D31, D31_MARK),
+	PINMUX_GPIO(GPIO_FN_D30, D30_MARK),
+	PINMUX_GPIO(GPIO_FN_D29, D29_MARK),
+	PINMUX_GPIO(GPIO_FN_D28, D28_MARK),
+	PINMUX_GPIO(GPIO_FN_D27, D27_MARK),
+	PINMUX_GPIO(GPIO_FN_D26, D26_MARK),
+	PINMUX_GPIO(GPIO_FN_D25, D25_MARK),
+	PINMUX_GPIO(GPIO_FN_D24, D24_MARK),
+	PINMUX_GPIO(GPIO_FN_D23, D23_MARK),
+	PINMUX_GPIO(GPIO_FN_D22, D22_MARK),
+	PINMUX_GPIO(GPIO_FN_D21, D21_MARK),
+	PINMUX_GPIO(GPIO_FN_D20, D20_MARK),
+	PINMUX_GPIO(GPIO_FN_D19, D19_MARK),
+	PINMUX_GPIO(GPIO_FN_D18, D18_MARK),
+	PINMUX_GPIO(GPIO_FN_D17, D17_MARK),
+	PINMUX_GPIO(GPIO_FN_D16, D16_MARK),
+	PINMUX_GPIO(GPIO_FN_IOIS16, IOIS16_MARK),
+	PINMUX_GPIO(GPIO_FN_RAS, RAS_MARK),
+	PINMUX_GPIO(GPIO_FN_CAS, CAS_MARK),
+	PINMUX_GPIO(GPIO_FN_CKE, CKE_MARK),
+	PINMUX_GPIO(GPIO_FN_CS5B_CE1A, CS5B_CE1A_MARK),
+	PINMUX_GPIO(GPIO_FN_CS6B_CE1B, CS6B_CE1B_MARK),
+	PINMUX_GPIO(GPIO_FN_A25, A25_MARK),
+	PINMUX_GPIO(GPIO_FN_A24, A24_MARK),
+	PINMUX_GPIO(GPIO_FN_A23, A23_MARK),
+	PINMUX_GPIO(GPIO_FN_A22, A22_MARK),
+	PINMUX_GPIO(GPIO_FN_A21, A21_MARK),
+	PINMUX_GPIO(GPIO_FN_A20, A20_MARK),
+	PINMUX_GPIO(GPIO_FN_A19, A19_MARK),
+	PINMUX_GPIO(GPIO_FN_A0, A0_MARK),
+	PINMUX_GPIO(GPIO_FN_REFOUT, REFOUT_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQOUT, IRQOUT_MARK),
+
+	/* LCDC */
+	PINMUX_GPIO(GPIO_FN_LCD_DATA15, LCD_DATA15_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA14, LCD_DATA14_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA13, LCD_DATA13_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA12, LCD_DATA12_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA11, LCD_DATA11_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA10, LCD_DATA10_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA9, LCD_DATA9_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA8, LCD_DATA8_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA7, LCD_DATA7_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA6, LCD_DATA6_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA5, LCD_DATA5_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA4, LCD_DATA4_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA3, LCD_DATA3_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA2, LCD_DATA2_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA1, LCD_DATA1_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA0, LCD_DATA0_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_M_DISP, LCD_M_DISP_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_CL1, LCD_CL1_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_CL2, LCD_CL2_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DON, LCD_DON_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_FLM, LCD_FLM_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_VEPWC, LCD_VEPWC_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_VCPWC, LCD_VCPWC_MARK),
+
+	/* AFEIF */
+	PINMUX_GPIO(GPIO_FN_AFE_RXIN, AFE_RXIN_MARK),
+	PINMUX_GPIO(GPIO_FN_AFE_RDET, AFE_RDET_MARK),
+	PINMUX_GPIO(GPIO_FN_AFE_FS, AFE_FS_MARK),
+	PINMUX_GPIO(GPIO_FN_AFE_TXOUT, AFE_TXOUT_MARK),
+	PINMUX_GPIO(GPIO_FN_AFE_SCLK, AFE_SCLK_MARK),
+	PINMUX_GPIO(GPIO_FN_AFE_RLYCNT, AFE_RLYCNT_MARK),
+	PINMUX_GPIO(GPIO_FN_AFE_HC1, AFE_HC1_MARK),
+
+	/* IIC */
+	PINMUX_GPIO(GPIO_FN_IIC_SCL, IIC_SCL_MARK),
+	PINMUX_GPIO(GPIO_FN_IIC_SDA, IIC_SDA_MARK),
+
+	/* DAC */
+	PINMUX_GPIO(GPIO_FN_DA1, DA1_MARK),
+	PINMUX_GPIO(GPIO_FN_DA0, DA0_MARK),
+
+	/* ADC */
+	PINMUX_GPIO(GPIO_FN_AN3, AN3_MARK),
+	PINMUX_GPIO(GPIO_FN_AN2, AN2_MARK),
+	PINMUX_GPIO(GPIO_FN_AN1, AN1_MARK),
+	PINMUX_GPIO(GPIO_FN_AN0, AN0_MARK),
+	PINMUX_GPIO(GPIO_FN_ADTRG, ADTRG_MARK),
+
+	/* USB */
+	PINMUX_GPIO(GPIO_FN_USB1D_RCV, USB1D_RCV_MARK),
+	PINMUX_GPIO(GPIO_FN_USB1D_TXSE0, USB1D_TXSE0_MARK),
+	PINMUX_GPIO(GPIO_FN_USB1D_TXDPLS, USB1D_TXDPLS_MARK),
+	PINMUX_GPIO(GPIO_FN_USB1D_DMNS, USB1D_DMNS_MARK),
+	PINMUX_GPIO(GPIO_FN_USB1D_DPLS, USB1D_DPLS_MARK),
+	PINMUX_GPIO(GPIO_FN_USB1D_SPEED, USB1D_SPEED_MARK),
+	PINMUX_GPIO(GPIO_FN_USB1D_TXENL, USB1D_TXENL_MARK),
+
+	PINMUX_GPIO(GPIO_FN_USB2_PWR_EN, USB2_PWR_EN_MARK),
+	PINMUX_GPIO(GPIO_FN_USB1_PWR_EN_USBF_UPLUP,
+		    USB1_PWR_EN_USBF_UPLUP_MARK),
+	PINMUX_GPIO(GPIO_FN_USB1D_SUSPEND, USB1D_SUSPEND_MARK),
+
+	/* INTC */
+	PINMUX_GPIO(GPIO_FN_IRQ5, IRQ5_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ4, IRQ4_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ3_IRL3, IRQ3_IRL3_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ2_IRL2, IRQ2_IRL2_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ1_IRL1, IRQ1_IRL1_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ0_IRL0, IRQ0_IRL0_MARK),
+
+	/* PCC */
+	PINMUX_GPIO(GPIO_FN_PCC_REG, PCC_REG_MARK),
+	PINMUX_GPIO(GPIO_FN_PCC_DRV, PCC_DRV_MARK),
+	PINMUX_GPIO(GPIO_FN_PCC_BVD2, PCC_BVD2_MARK),
+	PINMUX_GPIO(GPIO_FN_PCC_BVD1, PCC_BVD1_MARK),
+	PINMUX_GPIO(GPIO_FN_PCC_CD2, PCC_CD2_MARK),
+	PINMUX_GPIO(GPIO_FN_PCC_CD1, PCC_CD1_MARK),
+	PINMUX_GPIO(GPIO_FN_PCC_RESET, PCC_RESET_MARK),
+	PINMUX_GPIO(GPIO_FN_PCC_RDY, PCC_RDY_MARK),
+	PINMUX_GPIO(GPIO_FN_PCC_VS2, PCC_VS2_MARK),
+	PINMUX_GPIO(GPIO_FN_PCC_VS1, PCC_VS1_MARK),
+
+	/* HUDI */
+	PINMUX_GPIO(GPIO_FN_AUDATA3, AUDATA3_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDATA2, AUDATA2_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDATA1, AUDATA1_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDATA0, AUDATA0_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDCK, AUDCK_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDSYNC, AUDSYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_ASEBRKAK, ASEBRKAK_MARK),
+	PINMUX_GPIO(GPIO_FN_TRST, TRST_MARK),
+	PINMUX_GPIO(GPIO_FN_TMS, TMS_MARK),
+	PINMUX_GPIO(GPIO_FN_TDO, TDO_MARK),
+	PINMUX_GPIO(GPIO_FN_TDI, TDI_MARK),
+	PINMUX_GPIO(GPIO_FN_TCK, TCK_MARK),
+
+	/* DMAC */
+	PINMUX_GPIO(GPIO_FN_DACK1, DACK1_MARK),
+	PINMUX_GPIO(GPIO_FN_DREQ1, DREQ1_MARK),
+	PINMUX_GPIO(GPIO_FN_DACK0, DACK0_MARK),
+	PINMUX_GPIO(GPIO_FN_DREQ0, DREQ0_MARK),
+	PINMUX_GPIO(GPIO_FN_TEND1, TEND1_MARK),
+	PINMUX_GPIO(GPIO_FN_TEND0, TEND0_MARK),
+
+	/* SIOF0 */
+	PINMUX_GPIO(GPIO_FN_SIOF0_SYNC, SIOF0_SYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF0_MCLK, SIOF0_MCLK_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF0_TXD, SIOF0_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF0_RXD, SIOF0_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF0_SCK, SIOF0_SCK_MARK),
+
+	/* SIOF1 */
+	PINMUX_GPIO(GPIO_FN_SIOF1_SYNC, SIOF1_SYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF1_MCLK, SIOF1_MCLK_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF1_TXD, SIOF1_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF1_RXD, SIOF1_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF1_SCK, SIOF1_SCK_MARK),
+
+	/* SCIF0 */
+	PINMUX_GPIO(GPIO_FN_SCIF0_TXD, SCIF0_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF0_RXD, SCIF0_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF0_RTS, SCIF0_RTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF0_CTS, SCIF0_CTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF0_SCK, SCIF0_SCK_MARK),
+
+	/* SCIF1 */
+	PINMUX_GPIO(GPIO_FN_SCIF1_TXD, SCIF1_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF1_RXD, SCIF1_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF1_RTS, SCIF1_RTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF1_CTS, SCIF1_CTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF1_SCK, SCIF1_SCK_MARK),
+
+	/* TPU */
+	PINMUX_GPIO(GPIO_FN_TPU_TO1, TPU_TO1_MARK),
+	PINMUX_GPIO(GPIO_FN_TPU_TO0, TPU_TO0_MARK),
+	PINMUX_GPIO(GPIO_FN_TPU_TI3B, TPU_TI3B_MARK),
+	PINMUX_GPIO(GPIO_FN_TPU_TI3A, TPU_TI3A_MARK),
+	PINMUX_GPIO(GPIO_FN_TPU_TI2B, TPU_TI2B_MARK),
+	PINMUX_GPIO(GPIO_FN_TPU_TI2A, TPU_TI2A_MARK),
+	PINMUX_GPIO(GPIO_FN_TPU_TO3, TPU_TO3_MARK),
+	PINMUX_GPIO(GPIO_FN_TPU_TO2, TPU_TO2_MARK),
+
+	/* SIM */
+	PINMUX_GPIO(GPIO_FN_SIM_D, SIM_D_MARK),
+	PINMUX_GPIO(GPIO_FN_SIM_CLK, SIM_CLK_MARK),
+	PINMUX_GPIO(GPIO_FN_SIM_RST, SIM_RST_MARK),
+
+	/* MMC */
+	PINMUX_GPIO(GPIO_FN_MMC_DAT, MMC_DAT_MARK),
+	PINMUX_GPIO(GPIO_FN_MMC_CMD, MMC_CMD_MARK),
+	PINMUX_GPIO(GPIO_FN_MMC_CLK, MMC_CLK_MARK),
+	PINMUX_GPIO(GPIO_FN_MMC_VDDON, MMC_VDDON_MARK),
+	PINMUX_GPIO(GPIO_FN_MMC_ODMOD, MMC_ODMOD_MARK),
+
+	/* SYSC */
+	PINMUX_GPIO(GPIO_FN_STATUS0, STATUS0_MARK),
+	PINMUX_GPIO(GPIO_FN_STATUS1, STATUS1_MARK),
+};
+
+static struct pinmux_cfg_reg pinmux_config_regs[] = {
+	{ PINMUX_CFG_REG("PACR", 0xa4050100, 16, 2) {
+		PTA7_FN, PTA7_OUT, PTA7_IN_PU, PTA7_IN,
+		PTA6_FN, PTA6_OUT, PTA6_IN_PU, PTA6_IN,
+		PTA5_FN, PTA5_OUT, PTA5_IN_PU, PTA5_IN,
+		PTA4_FN, PTA4_OUT, PTA4_IN_PU, PTA4_IN,
+		PTA3_FN, PTA3_OUT, PTA3_IN_PU, PTA3_IN,
+		PTA2_FN, PTA2_OUT, PTA2_IN_PU, PTA2_IN,
+		PTA1_FN, PTA1_OUT, PTA1_IN_PU, PTA1_IN,
+		PTA0_FN, PTA0_OUT, PTA0_IN_PU, PTA0_IN }
+	},
+	{ PINMUX_CFG_REG("PBCR", 0xa4050102, 16, 2) {
+		PTB7_FN, PTB7_OUT, PTB7_IN_PU, PTB7_IN,
+		PTB6_FN, PTB6_OUT, PTB6_IN_PU, PTB6_IN,
+		PTB5_FN, PTB5_OUT, PTB5_IN_PU, PTB5_IN,
+		PTB4_FN, PTB4_OUT, PTB4_IN_PU, PTB4_IN,
+		PTB3_FN, PTB3_OUT, PTB3_IN_PU, PTB3_IN,
+		PTB2_FN, PTB2_OUT, PTB2_IN_PU, PTB2_IN,
+		PTB1_FN, PTB1_OUT, PTB1_IN_PU, PTB1_IN,
+		PTB0_FN, PTB0_OUT, PTB0_IN_PU, PTB0_IN }
+	},
+	{ PINMUX_CFG_REG("PCCR", 0xa4050104, 16, 2) {
+		PTC7_FN, PTC7_OUT, PTC7_IN_PU, PTC7_IN,
+		PTC6_FN, PTC6_OUT, PTC6_IN_PU, PTC6_IN,
+		PTC5_FN, PTC5_OUT, PTC5_IN_PU, PTC5_IN,
+		PTC4_FN, PTC4_OUT, PTC4_IN_PU, PTC4_IN,
+		PTC3_FN, PTC3_OUT, PTC3_IN_PU, PTC3_IN,
+		PTC2_FN, PTC2_OUT, PTC2_IN_PU, PTC2_IN,
+		PTC1_FN, PTC1_OUT, PTC1_IN_PU, PTC1_IN,
+		PTC0_FN, PTC0_OUT, PTC0_IN_PU, PTC0_IN }
+	},
+	{ PINMUX_CFG_REG("PDCR", 0xa4050106, 16, 2) {
+		PTD7_FN, PTD7_OUT, PTD7_IN_PU, PTD7_IN,
+		PTD6_FN, PTD6_OUT, PTD6_IN_PU, PTD6_IN,
+		PTD5_FN, PTD5_OUT, PTD5_IN_PU, PTD5_IN,
+		PTD4_FN, PTD4_OUT, PTD4_IN_PU, PTD4_IN,
+		PTD3_FN, PTD3_OUT, PTD3_IN_PU, PTD3_IN,
+		PTD2_FN, PTD2_OUT, PTD2_IN_PU, PTD2_IN,
+		PTD1_FN, PTD1_OUT, PTD1_IN_PU, PTD1_IN,
+		PTD0_FN, PTD0_OUT, PTD0_IN_PU, PTD0_IN }
+	},
+	{ PINMUX_CFG_REG("PECR", 0xa4050108, 16, 2) {
+		0, 0, 0, 0,
+		PTE6_FN, 0, 0, PTE6_IN,
+		PTE5_FN, 0, 0, PTE5_IN,
+		PTE4_FN, PTE4_OUT, PTE4_IN_PU, PTE4_IN,
+		PTE3_FN, PTE3_OUT, PTE3_IN_PU, PTE3_IN,
+		PTE2_FN, PTE2_OUT, PTE2_IN_PU, PTE2_IN,
+		PTE1_FN, PTE1_OUT, PTE1_IN_PU, PTE1_IN,
+		PTE0_FN, PTE0_OUT, PTE0_IN_PU, PTE0_IN }
+	},
+	{ PINMUX_CFG_REG("PFCR", 0xa405010a, 16, 2) {
+		0, 0, 0, 0,
+		PTF6_FN, 0, 0, PTF6_IN,
+		PTF5_FN, 0, 0, PTF5_IN,
+		PTF4_FN, 0, 0, PTF4_IN,
+		PTF3_FN, 0, 0, PTF3_IN,
+		PTF2_FN, 0, 0, PTF2_IN,
+		PTF1_FN, 0, 0, PTF1_IN,
+		PTF0_FN, 0, 0, PTF0_IN }
+	},
+	{ PINMUX_CFG_REG("PGCR", 0xa405010c, 16, 2) {
+		0, 0, 0, 0,
+		PTG6_FN, PTG6_OUT, PTG6_IN_PU, PTG6_IN,
+		PTG5_FN, PTG5_OUT, PTG5_IN_PU, PTG5_IN,
+		PTG4_FN, PTG4_OUT, PTG4_IN_PU, PTG4_IN,
+		PTG3_FN, PTG3_OUT, PTG3_IN_PU, PTG3_IN,
+		PTG2_FN, PTG2_OUT, PTG2_IN_PU, PTG2_IN,
+		PTG1_FN, PTG1_OUT, PTG1_IN_PU, PTG1_IN,
+		PTG0_FN, PTG0_OUT, PTG0_IN_PU, PTG0_IN }
+	},
+	{ PINMUX_CFG_REG("PHCR", 0xa405010e, 16, 2) {
+		0, 0, 0, 0,
+		PTH6_FN, PTH6_OUT, PTH6_IN_PU, PTH6_IN,
+		PTH5_FN, PTH5_OUT, PTH5_IN_PU, PTH5_IN,
+		PTH4_FN, PTH4_OUT, PTH4_IN_PU, PTH4_IN,
+		PTH3_FN, PTH3_OUT, PTH3_IN_PU, PTH3_IN,
+		PTH2_FN, PTH2_OUT, PTH2_IN_PU, PTH2_IN,
+		PTH1_FN, PTH1_OUT, PTH1_IN_PU, PTH1_IN,
+		PTH0_FN, PTH0_OUT, PTH0_IN_PU, PTH0_IN }
+	},
+	{ PINMUX_CFG_REG("PJCR", 0xa4050110, 16, 2) {
+		0, 0, 0, 0,
+		PTJ6_FN, PTJ6_OUT, PTJ6_IN_PU, PTJ6_IN,
+		PTJ5_FN, PTJ5_OUT, PTJ5_IN_PU, PTJ5_IN,
+		PTJ4_FN, PTJ4_OUT, PTJ4_IN_PU, PTJ4_IN,
+		PTJ3_FN, PTJ3_OUT, PTJ3_IN_PU, PTJ3_IN,
+		PTJ2_FN, PTJ2_OUT, PTJ2_IN_PU, PTJ2_IN,
+		PTJ1_FN, PTJ1_OUT, PTJ1_IN_PU, PTJ1_IN,
+		PTJ0_FN, PTJ0_OUT, PTJ0_IN_PU, PTJ0_IN }
+	},
+	{ PINMUX_CFG_REG("PKCR", 0xa4050112, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		PTK3_FN, PTK3_OUT, PTK3_IN_PU, PTK3_IN,
+		PTK2_FN, PTK2_OUT, PTK2_IN_PU, PTK2_IN,
+		PTK1_FN, PTK1_OUT, PTK1_IN_PU, PTK1_IN,
+		PTK0_FN, PTK0_OUT, PTK0_IN_PU, PTK0_IN }
+	},
+	{ PINMUX_CFG_REG("PLCR", 0xa4050114, 16, 2) {
+		PTL7_FN, PTL7_OUT, PTL7_IN_PU, PTL7_IN,
+		PTL6_FN, PTL6_OUT, PTL6_IN_PU, PTL6_IN,
+		PTL5_FN, PTL5_OUT, PTL5_IN_PU, PTL5_IN,
+		PTL4_FN, PTL4_OUT, PTL4_IN_PU, PTL4_IN,
+		PTL3_FN, PTL3_OUT, PTL3_IN_PU, PTL3_IN,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PMCR", 0xa4050116, 16, 2) {
+		PTM7_FN, PTM7_OUT, PTM7_IN_PU, PTM7_IN,
+		PTM6_FN, PTM6_OUT, PTM6_IN_PU, PTM6_IN,
+		PTM5_FN, PTM5_OUT, PTM5_IN_PU, PTM5_IN,
+		PTM4_FN, PTM4_OUT, PTM4_IN_PU, PTM4_IN,
+		PTM3_FN, PTM3_OUT, PTM3_IN_PU, PTM3_IN,
+		PTM2_FN, PTM2_OUT, PTM2_IN_PU, PTM2_IN,
+		PTM1_FN, PTM1_OUT, PTM1_IN_PU, PTM1_IN,
+		PTM0_FN, PTM0_OUT, PTM0_IN_PU, PTM0_IN }
+	},
+	{ PINMUX_CFG_REG("PPCR", 0xa4050118, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		PTP4_FN, PTP4_OUT, PTP4_IN_PU, PTP4_IN,
+		PTP3_FN, PTP3_OUT, PTP3_IN_PU, PTP3_IN,
+		PTP2_FN, PTP2_OUT, PTP2_IN_PU, PTP2_IN,
+		PTP1_FN, PTP1_OUT, PTP1_IN_PU, PTP1_IN,
+		PTP0_FN, PTP0_OUT, PTP0_IN_PU, PTP0_IN }
+	},
+	{ PINMUX_CFG_REG("PRCR", 0xa405011a, 16, 2) {
+		PTR7_FN, PTR7_OUT, PTR7_IN_PU, PTR7_IN,
+		PTR6_FN, PTR6_OUT, PTR6_IN_PU, PTR6_IN,
+		PTR5_FN, PTR5_OUT, PTR5_IN_PU, PTR5_IN,
+		PTR4_FN, PTR4_OUT, PTR4_IN_PU, PTR4_IN,
+		PTR3_FN, PTR3_OUT, PTR3_IN_PU, PTR3_IN,
+		PTR2_FN, PTR2_OUT, PTR2_IN_PU, PTR2_IN,
+		PTR1_FN, PTR1_OUT, PTR1_IN_PU, PTR1_IN,
+		PTR0_FN, PTR0_OUT, PTR0_IN_PU, PTR0_IN }
+	},
+	{ PINMUX_CFG_REG("PSCR", 0xa405011c, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		PTS4_FN, PTS4_OUT, PTS4_IN_PU, PTS4_IN,
+		PTS3_FN, PTS3_OUT, PTS3_IN_PU, PTS3_IN,
+		PTS2_FN, PTS2_OUT, PTS2_IN_PU, PTS2_IN,
+		PTS1_FN, PTS1_OUT, PTS1_IN_PU, PTS1_IN,
+		PTS0_FN, PTS0_OUT, PTS0_IN_PU, PTS0_IN }
+	},
+	{ PINMUX_CFG_REG("PTCR", 0xa405011e, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		PTT4_FN, PTT4_OUT, PTT4_IN_PU, PTT4_IN,
+		PTT3_FN, PTT3_OUT, PTT3_IN_PU, PTT3_IN,
+		PTT2_FN, PTT2_OUT, PTT2_IN_PU, PTT2_IN,
+		PTT1_FN, PTT1_OUT, PTT1_IN_PU, PTT1_IN,
+		PTT0_FN, PTT0_OUT, PTT0_IN_PU, PTT0_IN }
+	},
+	{ PINMUX_CFG_REG("PUCR", 0xa4050120, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		PTU4_FN, PTU4_OUT, PTU4_IN_PU, PTU4_IN,
+		PTU3_FN, PTU3_OUT, PTU3_IN_PU, PTU3_IN,
+		PTU2_FN, PTU2_OUT, PTU2_IN_PU, PTU2_IN,
+		PTU1_FN, PTU1_OUT, PTU1_IN_PU, PTU1_IN,
+		PTU0_FN, PTU0_OUT, PTU0_IN_PU, PTU0_IN }
+	},
+	{ PINMUX_CFG_REG("PVCR", 0xa4050122, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		PTV4_FN, PTV4_OUT, PTV4_IN_PU, PTV4_IN,
+		PTV3_FN, PTV3_OUT, PTV3_IN_PU, PTV3_IN,
+		PTV2_FN, PTV2_OUT, PTV2_IN_PU, PTV2_IN,
+		PTV1_FN, PTV1_OUT, PTV1_IN_PU, PTV1_IN,
+		PTV0_FN, PTV0_OUT, PTV0_IN_PU, PTV0_IN }
+	},
+	{}
+};
+
+static struct pinmux_data_reg pinmux_data_regs[] = {
+	{ PINMUX_DATA_REG("PADR", 0xa4050140, 8) {
+		PTA7_DATA, PTA6_DATA, PTA5_DATA, PTA4_DATA,
+		PTA3_DATA, PTA2_DATA, PTA1_DATA, PTA0_DATA }
+	},
+	{ PINMUX_DATA_REG("PBDR", 0xa4050142, 8) {
+		PTB7_DATA, PTB6_DATA, PTB5_DATA, PTB4_DATA,
+		PTB3_DATA, PTB2_DATA, PTB1_DATA, PTB0_DATA }
+	},
+	{ PINMUX_DATA_REG("PCDR", 0xa4050144, 8) {
+		PTC7_DATA, PTC6_DATA, PTC5_DATA, PTC4_DATA,
+		PTC3_DATA, PTC2_DATA, PTC1_DATA, PTC0_DATA }
+	},
+	{ PINMUX_DATA_REG("PDDR", 0xa4050126, 8) {
+		PTD7_DATA, PTD6_DATA, PTD5_DATA, PTD4_DATA,
+		PTD3_DATA, PTD2_DATA, PTD1_DATA, PTD0_DATA }
+	},
+	{ PINMUX_DATA_REG("PEDR", 0xa4050148, 8) {
+		0, PTE6_DATA, PTE5_DATA, PTE4_DATA,
+		PTE3_DATA, PTE2_DATA, PTE1_DATA, PTE0_DATA }
+	},
+	{ PINMUX_DATA_REG("PFDR", 0xa405014a, 8) {
+		0, PTF6_DATA, PTF5_DATA, PTF4_DATA,
+		PTF3_DATA, PTF2_DATA, PTF1_DATA, PTF0_DATA }
+	},
+	{ PINMUX_DATA_REG("PGDR", 0xa405014c, 8) {
+		0, PTG6_DATA, PTG5_DATA, PTG4_DATA,
+		PTG3_DATA, PTG2_DATA, PTG1_DATA, PTG0_DATA }
+	},
+	{ PINMUX_DATA_REG("PHDR", 0xa405014e, 8) {
+		0, PTH6_DATA, PTH5_DATA, PTH4_DATA,
+		PTH3_DATA, PTH2_DATA, PTH1_DATA, PTH0_DATA }
+	},
+	{ PINMUX_DATA_REG("PJDR", 0xa4050150, 8) {
+		0, PTJ6_DATA, PTJ5_DATA, PTJ4_DATA,
+		PTJ3_DATA, PTJ2_DATA, PTJ1_DATA, PTJ0_DATA }
+	},
+	{ PINMUX_DATA_REG("PKDR", 0xa4050152, 8) {
+		0, 0, 0, 0,
+		PTK3_DATA, PTK2_DATA, PTK1_DATA, PTK0_DATA }
+	},
+	{ PINMUX_DATA_REG("PLDR", 0xa4050154, 8) {
+		PTL7_DATA, PTL6_DATA, PTL5_DATA, PTL4_DATA,
+		PTL3_DATA, 0, 0, 0 }
+	},
+	{ PINMUX_DATA_REG("PMDR", 0xa4050156, 8) {
+		PTM7_DATA, PTM6_DATA, PTM5_DATA, PTM4_DATA,
+		PTM3_DATA, PTM2_DATA, PTM1_DATA, PTM0_DATA }
+	},
+	{ PINMUX_DATA_REG("PPDR", 0xa4050158, 8) {
+		0, 0, 0, PTP4_DATA,
+		PTP3_DATA, PTP2_DATA, PTP1_DATA, PTP0_DATA }
+	},
+	{ PINMUX_DATA_REG("PRDR", 0xa405015a, 8) {
+		PTR7_DATA, PTR6_DATA, PTR5_DATA, PTR4_DATA,
+		PTR3_DATA, PTR2_DATA, PTR1_DATA, PTR0_DATA }
+	},
+	{ PINMUX_DATA_REG("PSDR", 0xa405015c, 8) {
+		0, 0, 0, PTS4_DATA,
+		PTS3_DATA, PTS2_DATA, PTS1_DATA, PTS0_DATA }
+	},
+	{ PINMUX_DATA_REG("PTDR", 0xa405015e, 8) {
+		0, 0, 0, PTT4_DATA,
+		PTT3_DATA, PTT2_DATA, PTT1_DATA, PTT0_DATA }
+	},
+	{ PINMUX_DATA_REG("PUDR", 0xa4050160, 8) {
+		0, 0, 0, PTU4_DATA,
+		PTU3_DATA, PTU2_DATA, PTU1_DATA, PTU0_DATA }
+	},
+	{ PINMUX_DATA_REG("PVDR", 0xa4050162, 8) {
+		0, 0, 0, PTV4_DATA,
+		PTV3_DATA, PTV2_DATA, PTV1_DATA, PTV0_DATA }
+	},
+	{ },
+};
+
+static struct pinmux_info sh7720_pinmux_info = {
+	.name = "sh7720_pfc",
+	.reserved_id = PINMUX_RESERVED,
+	.data = { PINMUX_DATA_BEGIN, PINMUX_DATA_END },
+	.input = { PINMUX_INPUT_BEGIN, PINMUX_INPUT_END },
+	.input_pu = { PINMUX_INPUT_PULLUP_BEGIN, PINMUX_INPUT_PULLUP_END },
+	.output = { PINMUX_OUTPUT_BEGIN, PINMUX_OUTPUT_END },
+	.mark = { PINMUX_MARK_BEGIN, PINMUX_MARK_END },
+	.function = { PINMUX_FUNCTION_BEGIN, PINMUX_FUNCTION_END },
+
+	.first_gpio = GPIO_PTA7,
+	.last_gpio = GPIO_FN_STATUS1,
+
+	.gpios = pinmux_gpios,
+	.cfg_regs = pinmux_config_regs,
+	.data_regs = pinmux_data_regs,
+
+	.gpio_data = pinmux_data,
+	.gpio_data_size = ARRAY_SIZE(pinmux_data),
+};
+
+static int __init plat_pinmux_setup(void)
+{
+	return register_pinmux(&sh7720_pinmux_info);
+}
+
+arch_initcall(plat_pinmux_setup);
-- 
GitLab


From 843284d0e41b2a7f88504d051d91b5e9dc9c78c7 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 8 Oct 2008 20:42:56 +0900
Subject: [PATCH 650/895] sh: Use sh7720 GPIO on magicpanelr2 board

This patch hooks up the magicpanelr2 board with the sh7720 pinmux code.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/boards/Kconfig              |  1 +
 arch/sh/boards/board-magicpanelr2.c | 11 ++++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig
index 6c4cbf15ce88..50467f9d0d0b 100644
--- a/arch/sh/boards/Kconfig
+++ b/arch/sh/boards/Kconfig
@@ -238,6 +238,7 @@ config SH_X3PROTO
 config SH_MAGIC_PANEL_R2
 	bool "Magic Panel R2"
 	depends on CPU_SUBTYPE_SH7720
+	select GENERIC_GPIO
 	help
 	  Select Magic Panel R2 if configuring for Magic Panel R2.
 
diff --git a/arch/sh/boards/board-magicpanelr2.c b/arch/sh/boards/board-magicpanelr2.c
index f3b8b07ea5d6..eb0e8e992c29 100644
--- a/arch/sh/boards/board-magicpanelr2.c
+++ b/arch/sh/boards/board-magicpanelr2.c
@@ -13,12 +13,14 @@
 #include <linux/irq.h>
 #include <linux/platform_device.h>
 #include <linux/delay.h>
+#include <linux/gpio.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 #include <linux/mtd/physmap.h>
 #include <linux/mtd/map.h>
 #include <asm/magicpanelr2.h>
 #include <asm/heartbeat.h>
+#include <asm/sh7720.h>
 
 #define LAN9115_READY	(ctrl_inl(0xA8000084UL) & 0x00000001UL)
 
@@ -170,7 +172,14 @@ static void __init setup_port_multiplexing(void)
 	/* R7 A25;	     R6 A24;	     R5 A23;		  R4 A22;
 	 * R3 A21;	     R2 A20;	     R1 A19;		  R0 A0;
 	 */
-	ctrl_outw(0x0000, PORT_PRCR);	/* 00 00 00 00 00 00 00 00 */
+	gpio_request(GPIO_FN_A25, NULL);
+	gpio_request(GPIO_FN_A24, NULL);
+	gpio_request(GPIO_FN_A23, NULL);
+	gpio_request(GPIO_FN_A22, NULL);
+	gpio_request(GPIO_FN_A21, NULL);
+	gpio_request(GPIO_FN_A20, NULL);
+	gpio_request(GPIO_FN_A19, NULL);
+	gpio_request(GPIO_FN_A0, NULL);
 
 	/* S7 (x);		S6 (x);        S5 (x);	     S4 GPO(EEPROM_CS2);
 	 * S3 GPO(EEPROM_CS1);  S2 SIOF0_TXD;  S1 SIOF0_RXD; S0 SIOF0_SCK;
-- 
GitLab


From a3e0270663a520a2cb8c7418c098676f25478d2f Mon Sep 17 00:00:00 2001
From: Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
Date: Fri, 17 Oct 2008 16:53:57 +0900
Subject: [PATCH 651/895] sh: ap325rxa: Add support RTC RX-8564LC in AP325RXA
 board

Renesas AP325RXA board has Epson RX-8564LC of RTC.
This patch supports RTC of this board.

Signed-off-by: Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/boards/board-ap325rxa.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/sh/boards/board-ap325rxa.c b/arch/sh/boards/board-ap325rxa.c
index 7ae8dcddfeb4..6abaf46a9835 100644
--- a/arch/sh/boards/board-ap325rxa.c
+++ b/arch/sh/boards/board-ap325rxa.c
@@ -294,6 +294,9 @@ static struct platform_device *ap325rxa_devices[] __initdata = {
 };
 
 static struct i2c_board_info __initdata ap325rxa_i2c_devices[] = {
+	{
+		I2C_BOARD_INFO("pcf8563", 0x51),
+	},
 };
 
 static int __init ap325rxa_devices_setup(void)
-- 
GitLab


From 9986b311efdc9727a5a48f03c3dac3d536fbaa74 Mon Sep 17 00:00:00 2001
From: Zhaolei <zhaolei@cn.fujitsu.com>
Date: Fri, 17 Oct 2008 19:25:03 +0800
Subject: [PATCH 652/895] Fix debugfs_create_dir's error checking method for
 arch/sh/kernel/

debugfs_create_dir() returns NULL if an error occurs, returns -ENODEV
when debugfs is not enabled in the kernel.

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/setup.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index 267b344099c0..836e80d5cb9f 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -554,6 +554,8 @@ struct dentry *sh_debugfs_root;
 static int __init sh_debugfs_init(void)
 {
 	sh_debugfs_root = debugfs_create_dir("sh", NULL);
+	if (!sh_debugfs_root)
+		return -ENOMEM;
 	if (IS_ERR(sh_debugfs_root))
 		return PTR_ERR(sh_debugfs_root);
 
-- 
GitLab


From 25627c7fd71269e2658b6872eef65719ee80b9aa Mon Sep 17 00:00:00 2001
From: Zhaolei <zhaolei@cn.fujitsu.com>
Date: Fri, 17 Oct 2008 19:25:09 +0800
Subject: [PATCH 653/895] Fix debugfs_create_file's error checking method for
 arch/sh/mm/

debugfs_create_file() returns NULL if an error occurs, returns -ENODEV
when debugfs is not enabled in the kernel.

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/mm/cache-debugfs.c | 6 ++++++
 arch/sh/mm/pmb.c           | 2 ++
 2 files changed, 8 insertions(+)

diff --git a/arch/sh/mm/cache-debugfs.c b/arch/sh/mm/cache-debugfs.c
index 0e189ccd4a77..5ba067b26591 100644
--- a/arch/sh/mm/cache-debugfs.c
+++ b/arch/sh/mm/cache-debugfs.c
@@ -130,12 +130,18 @@ static int __init cache_debugfs_init(void)
 	dcache_dentry = debugfs_create_file("dcache", S_IRUSR, sh_debugfs_root,
 					    (unsigned int *)CACHE_TYPE_DCACHE,
 					    &cache_debugfs_fops);
+	if (!dcache_dentry)
+		return -ENOMEM;
 	if (IS_ERR(dcache_dentry))
 		return PTR_ERR(dcache_dentry);
 
 	icache_dentry = debugfs_create_file("icache", S_IRUSR, sh_debugfs_root,
 					    (unsigned int *)CACHE_TYPE_ICACHE,
 					    &cache_debugfs_fops);
+	if (!icache_dentry) {
+		debugfs_remove(dcache_dentry);
+		return -ENOMEM;
+	}
 	if (IS_ERR(icache_dentry)) {
 		debugfs_remove(dcache_dentry);
 		return PTR_ERR(icache_dentry);
diff --git a/arch/sh/mm/pmb.c b/arch/sh/mm/pmb.c
index cef727669c87..84241676265e 100644
--- a/arch/sh/mm/pmb.c
+++ b/arch/sh/mm/pmb.c
@@ -394,6 +394,8 @@ static int __init pmb_debugfs_init(void)
 
 	dentry = debugfs_create_file("pmb", S_IFREG | S_IRUGO,
 				     sh_debugfs_root, NULL, &pmb_debugfs_fops);
+	if (!dentry)
+		return -ENOMEM;
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
 
-- 
GitLab


From 9a19eb2a6607f2f6329efb3c4637fe23afae2cd7 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 20 Oct 2008 11:37:58 +0900
Subject: [PATCH 654/895] sh: Fix up some merge damage.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/elf.h | 2 ++
 arch/sh/kernel/setup.c    | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/sh/include/asm/elf.h b/arch/sh/include/asm/elf.h
index f7d0b8e6aa84..9eb9036a1bdc 100644
--- a/arch/sh/include/asm/elf.h
+++ b/arch/sh/include/asm/elf.h
@@ -197,6 +197,8 @@ do {									\
 } while (0)
 #endif
 
+#define SET_PERSONALITY(ex) set_personality(PER_LINUX_32BIT)
+
 #ifdef CONFIG_VSYSCALL
 /* vDSO has arch_setup_additional_pages */
 #define ARCH_HAS_SETUP_ADDITIONAL_PAGES
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index 836e80d5cb9f..5767f0a84cd7 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -282,7 +282,7 @@ void __init setup_bootmem_allocator(unsigned long free_pfn)
 			printk("initrd extends beyond end of memory "
 			       "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
 			       initrd_start_phys + INITRD_SIZE,
-			       PFN_PHYS(max_low_pfn));
+			       (unsigned long)PFN_PHYS(max_low_pfn));
 			initrd_start = 0;
 		}
 	}
-- 
GitLab


From a30c89ad41099f58922e0941e346e7c4371655b9 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 9 Oct 2008 18:41:34 +0900
Subject: [PATCH 655/895] sh: reduce Migo-R smc91x overruns

Improve Migo-R ethernet performance by reducing smc91x overruns.
This is done by enabling SMC91X_NOWAIT and optimizing CS4 setup.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/boards/mach-migor/setup.c | 6 ++++--
 arch/sh/include/asm/migor.h       | 2 ++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c
index 003ce837734d..3e3cc9da34ad 100644
--- a/arch/sh/boards/mach-migor/setup.c
+++ b/arch/sh/boards/mach-migor/setup.c
@@ -37,7 +37,7 @@
  */
 
 static struct smc91x_platdata smc91x_info = {
-	.flags = SMC91X_USE_16BIT,
+	.flags = SMC91X_USE_16BIT | SMC91X_NOWAIT,
 };
 
 static struct resource smc91x_eth_resources[] = {
@@ -463,8 +463,10 @@ static int __init migor_devices_setup(void)
 	gpio_direction_output(GPIO_PTJ5, 1);
 	gpio_export(GPIO_PTJ5, 0);
 
-	/* SMC91C111 */
+	/* SMC91C111 - Enable IRQ0, Setup CS4 for 16-bit fast access */
 	gpio_request(GPIO_FN_IRQ0, NULL);
+	ctrl_outl(0x00003400, BSC_CS4BCR);
+	ctrl_outl(0x00110080, BSC_CS4WCR);
 
 	/* KEYSC */
 	clk_always_enable("mstp214"); /* KEYSC */
diff --git a/arch/sh/include/asm/migor.h b/arch/sh/include/asm/migor.h
index 70596d38fd67..e451f0229e00 100644
--- a/arch/sh/include/asm/migor.h
+++ b/arch/sh/include/asm/migor.h
@@ -52,7 +52,9 @@
 #define PORT_HIZCRB 0xa405015a
 #define PORT_HIZCRC 0xa405015c
 
+#define BSC_CS4BCR 0xfec10010
 #define BSC_CS6ABCR 0xfec1001c
+#define BSC_CS4WCR 0xfec10030
 
 #include <video/sh_mobile_lcdc.h>
 
-- 
GitLab


From 4aeaa223433355a281559033f1e7600bfd73238e Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 9 Oct 2008 18:42:55 +0900
Subject: [PATCH 656/895] sh: add dynamic crash base address support

Add support for dynamic crash kernel base address.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/setup.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index 5767f0a84cd7..e7152cc6930e 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -147,6 +147,7 @@ static void __init reserve_crashkernel(void)
 {
 	unsigned long long free_mem;
 	unsigned long long crash_size, crash_base;
+	void *vp;
 	int ret;
 
 	free_mem = ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT;
@@ -155,12 +156,14 @@ static void __init reserve_crashkernel(void)
 			&crash_size, &crash_base);
 	if (ret == 0 && crash_size) {
 		if (crash_base <= 0) {
-			printk(KERN_INFO "crashkernel reservation failed - "
-					"you have to specify a base address\n");
-			return;
-		}
-
-		if (reserve_bootmem(crash_base, crash_size,
+			vp = alloc_bootmem_nopanic(crash_size); 
+			if (!vp) {
+				printk(KERN_INFO "crashkernel allocation "
+				       "failed\n");
+				return;
+			}
+			crash_base = __pa(vp);
+		} else if (reserve_bootmem(crash_base, crash_size,
 					BOOTMEM_EXCLUSIVE) < 0) {
 			printk(KERN_INFO "crashkernel reservation failed - "
 					"memory is in use\n");
-- 
GitLab


From 7713718751aa8b8df39314b7e49ea2f66a5d1f0c Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 9 Oct 2008 18:43:57 +0900
Subject: [PATCH 657/895] sh: remove consistent alloc cruft

Remove left overs from the generic declared coherent rework.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/mm/consistent.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c
index 7619a0fae086..9f8ea3ada4db 100644
--- a/arch/sh/mm/consistent.c
+++ b/arch/sh/mm/consistent.c
@@ -16,14 +16,6 @@
 #include <asm/addrspace.h>
 #include <asm/io.h>
 
-struct dma_coherent_mem {
-	void		*virt_base;
-	u32		device_base;
-	int		size;
-	int		flags;
-	unsigned long	*bitmap;
-};
-
 void *dma_alloc_coherent(struct device *dev, size_t size,
 			   dma_addr_t *dma_handle, gfp_t gfp)
 {
@@ -58,12 +50,10 @@ EXPORT_SYMBOL(dma_alloc_coherent);
 void dma_free_coherent(struct device *dev, size_t size,
 			 void *vaddr, dma_addr_t dma_handle)
 {
-	struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
 	int order = get_order(size);
 
 	if (!dma_release_from_coherent(dev, order, vaddr)) {
 		WARN_ON(irqs_disabled());	/* for portability */
-		BUG_ON(mem && mem->flags & DMA_MEMORY_EXCLUSIVE);
 		free_pages((unsigned long)phys_to_virt(dma_handle), order);
 		iounmap(vaddr);
 	}
-- 
GitLab


From 7994b1c55ec19238067a47f09b0463db3a48c33c Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 9 Oct 2008 18:47:27 +0900
Subject: [PATCH 658/895] video: remove unused sh_mobile_lcdc platform data

Remove lddckr from the platform data, these days we calculate the
register value from clock source and clock dividers anyway.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/video/sh_mobile_lcdcfb.c | 1 -
 include/video/sh_mobile_lcdc.h   | 1 -
 2 files changed, 2 deletions(-)

diff --git a/drivers/video/sh_mobile_lcdcfb.c b/drivers/video/sh_mobile_lcdcfb.c
index b7468bacce80..399a8a75ab0b 100644
--- a/drivers/video/sh_mobile_lcdcfb.c
+++ b/drivers/video/sh_mobile_lcdcfb.c
@@ -589,7 +589,6 @@ static int __init sh_mobile_lcdc_probe(struct platform_device *pdev)
 		goto err1;
 	}
 
-	priv->lddckr = pdata->lddckr;
 	priv->base = ioremap_nocache(res->start, (res->end - res->start) + 1);
 
 	for (i = 0; i < j; i++) {
diff --git a/include/video/sh_mobile_lcdc.h b/include/video/sh_mobile_lcdc.h
index 130102f663f5..bfb42dbc5cc8 100644
--- a/include/video/sh_mobile_lcdc.h
+++ b/include/video/sh_mobile_lcdc.h
@@ -64,7 +64,6 @@ struct sh_mobile_lcdc_chan_cfg {
 };
 
 struct sh_mobile_lcdc_info {
-	unsigned long lddckr;
 	int clock_source;
 	struct sh_mobile_lcdc_chan_cfg ch[2];
 };
-- 
GitLab


From f400f510df4e29bd00ffe07981ec703070cb9e19 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 9 Oct 2008 18:48:16 +0900
Subject: [PATCH 659/895] video: add sh_mobile_lcdc platform flags

Add platform data flags for detailed lcd display configuration.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/video/sh_mobile_lcdcfb.c | 5 +++++
 include/video/sh_mobile_lcdc.h   | 7 +++++++
 2 files changed, 12 insertions(+)

diff --git a/drivers/video/sh_mobile_lcdcfb.c b/drivers/video/sh_mobile_lcdcfb.c
index 399a8a75ab0b..efff672fd7b8 100644
--- a/drivers/video/sh_mobile_lcdcfb.c
+++ b/drivers/video/sh_mobile_lcdcfb.c
@@ -262,6 +262,11 @@ static int sh_mobile_lcdc_start(struct sh_mobile_lcdc_priv *priv)
 		tmp = ch->ldmt1r_value;
 		tmp |= (lcd_cfg->sync & FB_SYNC_VERT_HIGH_ACT) ? 0 : 1 << 28;
 		tmp |= (lcd_cfg->sync & FB_SYNC_HOR_HIGH_ACT) ? 0 : 1 << 27;
+		tmp |= (ch->cfg.flags & LCDC_FLAGS_DWPOL) ? 1 << 26 : 0;
+		tmp |= (ch->cfg.flags & LCDC_FLAGS_DIPOL) ? 1 << 25 : 0;
+		tmp |= (ch->cfg.flags & LCDC_FLAGS_DAPOL) ? 1 << 24 : 0;
+		tmp |= (ch->cfg.flags & LCDC_FLAGS_HSCNT) ? 1 << 17 : 0;
+		tmp |= (ch->cfg.flags & LCDC_FLAGS_DWCNT) ? 1 << 16 : 0;
 		lcdc_write_chan(ch, LDMT1R, tmp);
 
 		/* setup SYS bus */
diff --git a/include/video/sh_mobile_lcdc.h b/include/video/sh_mobile_lcdc.h
index bfb42dbc5cc8..1a4bc6ada606 100644
--- a/include/video/sh_mobile_lcdc.h
+++ b/include/video/sh_mobile_lcdc.h
@@ -28,6 +28,12 @@ enum { LCDC_CHAN_DISABLED = 0,
 
 enum { LCDC_CLK_BUS, LCDC_CLK_PERIPHERAL, LCDC_CLK_EXTERNAL };
 
+#define LCDC_FLAGS_DWPOL (1 << 0) /* Rising edge dot clock data latch */
+#define LCDC_FLAGS_DIPOL (1 << 1) /* Active low display enable polarity */
+#define LCDC_FLAGS_DAPOL (1 << 2) /* Active low display data polarity */
+#define LCDC_FLAGS_HSCNT (1 << 3) /* Disable HSYNC during VBLANK */
+#define LCDC_FLAGS_DWCNT (1 << 4) /* Disable dotclock during blanking */
+
 struct sh_mobile_lcdc_sys_bus_cfg {
 	unsigned long ldmt2r;
 	unsigned long ldmt3r;
@@ -57,6 +63,7 @@ struct sh_mobile_lcdc_chan_cfg {
 	int bpp;
 	int interface_type; /* selects RGBn or SYSn I/F, see above */
 	int clock_divider;
+	unsigned long flags; /* LCDC_FLAGS_... */
 	struct fb_videomode lcd_cfg;
 	struct sh_mobile_lcdc_lcd_size_cfg lcd_size_cfg;
 	struct sh_mobile_lcdc_board_cfg board_cfg;
-- 
GitLab


From ba1d28181c586deec468cc6ae558c0c099f1b956 Mon Sep 17 00:00:00 2001
From: Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
Date: Fri, 3 Oct 2008 17:37:31 +0900
Subject: [PATCH 660/895] serial: sh-sci: Add support SCIF of SH7723

SH7723 has two types of SCIF (SCIF and SCIFA).
The current sh-sci driver supports only SCIFA, and calculation methods of SCBRR
are different.
This patch support this methods.

Signed-off-by: Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/serial/sh-sci.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/serial/sh-sci.h b/drivers/serial/sh-sci.h
index 511c10d42187..7cd28b226800 100644
--- a/drivers/serial/sh-sci.h
+++ b/drivers/serial/sh-sci.h
@@ -789,7 +789,14 @@ static inline int sci_rxd_in(struct uart_port *port)
       defined(CONFIG_CPU_SUBTYPE_SH7721)
 #define SCBRR_VALUE(bps, clk) (((clk*2)+16*bps)/(32*bps)-1)
 #elif defined(CONFIG_CPU_SUBTYPE_SH7723)
-#define SCBRR_VALUE(bps, clk) (((clk*2)+16*bps)/(16*bps)-1)
+static inline int scbrr_calc(struct uart_port *port, int bps, int clk)
+{
+	if (port->type == PORT_SCIF)
+		return (clk+16*bps)/(32*bps)-1;
+	else
+		return ((clk*2)+16*bps)/(16*bps)-1;
+}
+#define SCBRR_VALUE(bps, clk) scbrr_calc(port, bps, clk)
 #elif defined(__H8300H__) || defined(__H8300S__)
 #define SCBRR_VALUE(bps, clk) (((clk*1000/32)/bps)-1)
 #else /* Generic SH */
-- 
GitLab


From f7275650133ce9df83886684f3bd97373dfc21ea Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 20 Oct 2008 12:04:53 +0900
Subject: [PATCH 661/895] sh: Move the CPU definition headers from asm/ to
 cpu/.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/boards/board-ap325rxa.c                | 2 +-
 arch/sh/boards/board-magicpanelr2.c            | 2 +-
 arch/sh/boards/mach-migor/lcd_qvga.c           | 2 +-
 arch/sh/boards/mach-migor/setup.c              | 2 +-
 arch/sh/include/{asm => cpu-sh2a/cpu}/sh7203.h | 0
 arch/sh/include/{asm => cpu-sh3/cpu}/sh7720.h  | 0
 arch/sh/include/{asm => cpu-sh4}/sh7722.h      | 0
 arch/sh/include/{asm => cpu-sh4}/sh7723.h      | 0
 arch/sh/kernel/cpu/sh3/pinmux-sh7720.c         | 2 +-
 arch/sh/kernel/cpu/sh4a/pinmux-sh7722.c        | 2 +-
 arch/sh/kernel/cpu/sh4a/pinmux-sh7723.c        | 2 +-
 11 files changed, 7 insertions(+), 7 deletions(-)
 rename arch/sh/include/{asm => cpu-sh2a/cpu}/sh7203.h (100%)
 rename arch/sh/include/{asm => cpu-sh3/cpu}/sh7720.h (100%)
 rename arch/sh/include/{asm => cpu-sh4}/sh7722.h (100%)
 rename arch/sh/include/{asm => cpu-sh4}/sh7723.h (100%)

diff --git a/arch/sh/boards/board-ap325rxa.c b/arch/sh/boards/board-ap325rxa.c
index 84fa8a04eac4..7c7874e6ac36 100644
--- a/arch/sh/boards/board-ap325rxa.c
+++ b/arch/sh/boards/board-ap325rxa.c
@@ -24,7 +24,7 @@
 #include <video/sh_mobile_lcdc.h>
 #include <asm/io.h>
 #include <asm/clock.h>
-#include <asm/sh7723.h>
+#include <cpu/sh7723.h>
 
 static struct smc911x_platdata smc911x_info = {
 	.flags = SMC911X_USE_32BIT,
diff --git a/arch/sh/boards/board-magicpanelr2.c b/arch/sh/boards/board-magicpanelr2.c
index eb0e8e992c29..da6bff83a6ea 100644
--- a/arch/sh/boards/board-magicpanelr2.c
+++ b/arch/sh/boards/board-magicpanelr2.c
@@ -20,7 +20,7 @@
 #include <linux/mtd/map.h>
 #include <asm/magicpanelr2.h>
 #include <asm/heartbeat.h>
-#include <asm/sh7720.h>
+#include <cpu/sh7720.h>
 
 #define LAN9115_READY	(ctrl_inl(0xA8000084UL) & 0x00000001UL)
 
diff --git a/arch/sh/boards/mach-migor/lcd_qvga.c b/arch/sh/boards/mach-migor/lcd_qvga.c
index c283cfc2a5c3..71a40710a5aa 100644
--- a/arch/sh/boards/mach-migor/lcd_qvga.c
+++ b/arch/sh/boards/mach-migor/lcd_qvga.c
@@ -19,7 +19,7 @@
 #include <linux/module.h>
 #include <linux/gpio.h>
 #include <video/sh_mobile_lcdc.h>
-#include <asm/sh7722.h>
+#include <cpu/sh7722.h>
 #include <asm/migor.h>
 
 /* LCD Module is a PH240320T according to board schematics. This module
diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c
index 3e3cc9da34ad..d00cce04d6e3 100644
--- a/arch/sh/boards/mach-migor/setup.c
+++ b/arch/sh/boards/mach-migor/setup.c
@@ -26,7 +26,7 @@
 #include <asm/io.h>
 #include <asm/sh_keysc.h>
 #include <asm/migor.h>
-#include <asm/sh7722.h>
+#include <cpu/sh7722.h>
 
 /* Address     IRQ  Size  Bus  Description
  * 0x00000000       64MB  16   NOR Flash (SP29PL256N)
diff --git a/arch/sh/include/asm/sh7203.h b/arch/sh/include/cpu-sh2a/cpu/sh7203.h
similarity index 100%
rename from arch/sh/include/asm/sh7203.h
rename to arch/sh/include/cpu-sh2a/cpu/sh7203.h
diff --git a/arch/sh/include/asm/sh7720.h b/arch/sh/include/cpu-sh3/cpu/sh7720.h
similarity index 100%
rename from arch/sh/include/asm/sh7720.h
rename to arch/sh/include/cpu-sh3/cpu/sh7720.h
diff --git a/arch/sh/include/asm/sh7722.h b/arch/sh/include/cpu-sh4/sh7722.h
similarity index 100%
rename from arch/sh/include/asm/sh7722.h
rename to arch/sh/include/cpu-sh4/sh7722.h
diff --git a/arch/sh/include/asm/sh7723.h b/arch/sh/include/cpu-sh4/sh7723.h
similarity index 100%
rename from arch/sh/include/asm/sh7723.h
rename to arch/sh/include/cpu-sh4/sh7723.h
diff --git a/arch/sh/kernel/cpu/sh3/pinmux-sh7720.c b/arch/sh/kernel/cpu/sh3/pinmux-sh7720.c
index b66c239509a2..9ca154627147 100644
--- a/arch/sh/kernel/cpu/sh3/pinmux-sh7720.c
+++ b/arch/sh/kernel/cpu/sh3/pinmux-sh7720.c
@@ -11,7 +11,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/gpio.h>
-#include <asm/sh7720.h>
+#include <cpu/sh7720.h>
 
 enum {
 	PINMUX_RESERVED = 0,
diff --git a/arch/sh/kernel/cpu/sh4a/pinmux-sh7722.c b/arch/sh/kernel/cpu/sh4a/pinmux-sh7722.c
index 51b12ab58c9e..cb9d07bd59f8 100644
--- a/arch/sh/kernel/cpu/sh4a/pinmux-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/pinmux-sh7722.c
@@ -1,7 +1,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/gpio.h>
-#include <asm/sh7722.h>
+#include <cpu/sh7722.h>
 
 enum {
 	PINMUX_RESERVED = 0,
diff --git a/arch/sh/kernel/cpu/sh4a/pinmux-sh7723.c b/arch/sh/kernel/cpu/sh4a/pinmux-sh7723.c
index 92a9bc91015f..88bf5ecda849 100644
--- a/arch/sh/kernel/cpu/sh4a/pinmux-sh7723.c
+++ b/arch/sh/kernel/cpu/sh4a/pinmux-sh7723.c
@@ -11,7 +11,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/gpio.h>
-#include <asm/sh7723.h>
+#include <cpu/sh7723.h>
 
 enum {
 	PINMUX_RESERVED = 0,
-- 
GitLab


From 7639a4541f7e7abb1295ff8ab39cc2f5842239ae Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 20 Oct 2008 13:02:48 +0900
Subject: [PATCH 662/895] sh: Migrate common board headers to mach-common/.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/Makefile                                               | 3 +++
 arch/sh/boards/board-magicpanelr2.c                            | 2 +-
 arch/sh/boards/board-sh7785lcr.c                               | 2 +-
 arch/sh/boards/board-shmin.c                                   | 2 +-
 arch/sh/boards/mach-edosk7705/io.c                             | 2 +-
 arch/sh/boards/mach-edosk7705/setup.c                          | 2 +-
 arch/sh/boards/mach-highlander/irq-r7780mp.c                   | 2 +-
 arch/sh/boards/mach-highlander/irq-r7780rp.c                   | 2 +-
 arch/sh/boards/mach-highlander/irq-r7785rp.c                   | 2 +-
 arch/sh/boards/mach-highlander/psw.c                           | 2 +-
 arch/sh/boards/mach-highlander/setup.c                         | 2 +-
 arch/sh/boards/mach-hp6xx/hp6xx_apm.c                          | 2 +-
 arch/sh/boards/mach-hp6xx/pm.c                                 | 2 +-
 arch/sh/boards/mach-hp6xx/setup.c                              | 2 +-
 arch/sh/boards/mach-lboxre2/irq.c                              | 2 +-
 arch/sh/boards/mach-lboxre2/setup.c                            | 2 +-
 arch/sh/boards/mach-microdev/io.c                              | 2 +-
 arch/sh/boards/mach-microdev/irq.c                             | 2 +-
 arch/sh/boards/mach-microdev/setup.c                           | 2 +-
 arch/sh/boards/mach-migor/lcd_qvga.c                           | 2 +-
 arch/sh/boards/mach-migor/setup.c                              | 2 +-
 arch/sh/boards/mach-r2d/irq.c                                  | 2 +-
 arch/sh/boards/mach-r2d/setup.c                                | 2 +-
 arch/sh/boards/mach-sdk7780/irq.c                              | 2 +-
 arch/sh/boards/mach-sdk7780/setup.c                            | 2 +-
 arch/sh/boards/mach-sh7763rdp/irq.c                            | 2 +-
 arch/sh/boards/mach-sh7763rdp/setup.c                          | 2 +-
 arch/sh/boards/mach-snapgear/setup.c                           | 2 +-
 arch/sh/boards/mach-systemh/io.c                               | 2 +-
 arch/sh/boards/mach-systemh/irq.c                              | 2 +-
 arch/sh/boards/mach-systemh/setup.c                            | 2 +-
 arch/sh/boards/mach-titan/io.c                                 | 2 +-
 arch/sh/boards/mach-titan/setup.c                              | 2 +-
 arch/sh/drivers/pci/ops-lboxre2.c                              | 2 +-
 arch/sh/drivers/pci/ops-r7780rp.c                              | 2 +-
 arch/sh/drivers/pci/ops-rts7751r2d.c                           | 2 +-
 arch/sh/drivers/pci/ops-sdk7780.c                              | 2 +-
 arch/sh/drivers/pci/ops-titan.c                                | 2 +-
 arch/sh/include/{asm => mach-common/mach}/edosk7705.h          | 0
 .../include/{asm/r7780rp.h => mach-common/mach/highlander.h}   | 0
 arch/sh/include/{asm => mach-common/mach}/hp6xx.h              | 0
 arch/sh/include/{asm => mach-common/mach}/lboxre2.h            | 0
 arch/sh/include/{asm => mach-common/mach}/magicpanelr2.h       | 0
 arch/sh/include/{asm => mach-common/mach}/microdev.h           | 0
 arch/sh/include/{asm => mach-common/mach}/migor.h              | 0
 arch/sh/include/{asm/rts7751r2d.h => mach-common/mach/r2d.h}   | 0
 arch/sh/include/{asm => mach-common/mach}/sdk7780.h            | 0
 arch/sh/include/{asm => mach-common/mach}/sh7763rdp.h          | 0
 arch/sh/include/{asm => mach-common/mach}/sh7785lcr.h          | 0
 arch/sh/include/{asm => mach-common/mach}/shmin.h              | 0
 arch/sh/include/{asm => mach-common/mach}/snapgear.h           | 0
 arch/sh/include/{asm => mach-common/mach}/systemh7751.h        | 0
 arch/sh/include/{asm => mach-common/mach}/titan.h              | 0
 drivers/input/touchscreen/hp680_ts_input.c                     | 2 +-
 drivers/leds/leds-hp6xx.c                                      | 2 +-
 drivers/rtc/rtc-ds1302.c                                       | 2 +-
 drivers/video/backlight/hp680_bl.c                             | 2 +-
 sound/oss/sh_dac_audio.c                                       | 2 +-
 58 files changed, 45 insertions(+), 42 deletions(-)
 rename arch/sh/include/{asm => mach-common/mach}/edosk7705.h (100%)
 rename arch/sh/include/{asm/r7780rp.h => mach-common/mach/highlander.h} (100%)
 rename arch/sh/include/{asm => mach-common/mach}/hp6xx.h (100%)
 rename arch/sh/include/{asm => mach-common/mach}/lboxre2.h (100%)
 rename arch/sh/include/{asm => mach-common/mach}/magicpanelr2.h (100%)
 rename arch/sh/include/{asm => mach-common/mach}/microdev.h (100%)
 rename arch/sh/include/{asm => mach-common/mach}/migor.h (100%)
 rename arch/sh/include/{asm/rts7751r2d.h => mach-common/mach/r2d.h} (100%)
 rename arch/sh/include/{asm => mach-common/mach}/sdk7780.h (100%)
 rename arch/sh/include/{asm => mach-common/mach}/sh7763rdp.h (100%)
 rename arch/sh/include/{asm => mach-common/mach}/sh7785lcr.h (100%)
 rename arch/sh/include/{asm => mach-common/mach}/shmin.h (100%)
 rename arch/sh/include/{asm => mach-common/mach}/snapgear.h (100%)
 rename arch/sh/include/{asm => mach-common/mach}/systemh7751.h (100%)
 rename arch/sh/include/{asm => mach-common/mach}/titan.h (100%)

diff --git a/arch/sh/Makefile b/arch/sh/Makefile
index 0bc956012c38..1f409bf81809 100644
--- a/arch/sh/Makefile
+++ b/arch/sh/Makefile
@@ -125,6 +125,9 @@ core-y	+= $(addprefix arch/sh/boards/, \
 	     $(filter-out ., $(patsubst %,%/,$(machdir-y))))
 endif
 
+# Common machine type headers. Not part of the arch/sh/boards/ hierarchy.
+machdir-y	+= mach-common
+
 # Companion chips
 core-$(CONFIG_HD6446X_SERIES)	+= arch/sh/cchips/hd6446x/
 
diff --git a/arch/sh/boards/board-magicpanelr2.c b/arch/sh/boards/board-magicpanelr2.c
index da6bff83a6ea..3de22ccdeb7e 100644
--- a/arch/sh/boards/board-magicpanelr2.c
+++ b/arch/sh/boards/board-magicpanelr2.c
@@ -18,7 +18,7 @@
 #include <linux/mtd/partitions.h>
 #include <linux/mtd/physmap.h>
 #include <linux/mtd/map.h>
-#include <asm/magicpanelr2.h>
+#include <mach/magicpanelr2.h>
 #include <asm/heartbeat.h>
 #include <cpu/sh7720.h>
 
diff --git a/arch/sh/boards/board-sh7785lcr.c b/arch/sh/boards/board-sh7785lcr.c
index b95d674ee704..408bbddaf325 100644
--- a/arch/sh/boards/board-sh7785lcr.c
+++ b/arch/sh/boards/board-sh7785lcr.c
@@ -19,7 +19,7 @@
 #include <linux/i2c-pca-platform.h>
 #include <linux/i2c-algo-pca.h>
 #include <asm/heartbeat.h>
-#include <asm/sh7785lcr.h>
+#include <mach/sh7785lcr.h>
 
 /*
  * NOTE: This board has 2 physical memory maps.
diff --git a/arch/sh/boards/board-shmin.c b/arch/sh/boards/board-shmin.c
index 16e5dae8ecfb..5cc0867de5ab 100644
--- a/arch/sh/boards/board-shmin.c
+++ b/arch/sh/boards/board-shmin.c
@@ -8,7 +8,7 @@
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <asm/machvec.h>
-#include <asm/shmin.h>
+#include <mach/shmin.h>
 #include <asm/clock.h>
 #include <asm/io.h>
 
diff --git a/arch/sh/boards/mach-edosk7705/io.c b/arch/sh/boards/mach-edosk7705/io.c
index 541cea2a652f..7d153e50a01b 100644
--- a/arch/sh/boards/mach-edosk7705/io.c
+++ b/arch/sh/boards/mach-edosk7705/io.c
@@ -11,7 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <asm/io.h>
-#include <asm/edosk7705/io.h>
+#include <mach/edosk7705.h>
 #include <asm/addrspace.h>
 
 #define SMC_IOADDR	0xA2000000
diff --git a/arch/sh/boards/mach-edosk7705/setup.c b/arch/sh/boards/mach-edosk7705/setup.c
index f076c45308dd..ab3f47bffdf3 100644
--- a/arch/sh/boards/mach-edosk7705/setup.c
+++ b/arch/sh/boards/mach-edosk7705/setup.c
@@ -10,7 +10,7 @@
  */
 #include <linux/init.h>
 #include <asm/machvec.h>
-#include <asm/edosk7705/io.h>
+#include <mach/edosk7705.h>
 
 static void __init sh_edosk7705_init_irq(void)
 {
diff --git a/arch/sh/boards/mach-highlander/irq-r7780mp.c b/arch/sh/boards/mach-highlander/irq-r7780mp.c
index ae1cfcb29700..83c28bcd4d2a 100644
--- a/arch/sh/boards/mach-highlander/irq-r7780mp.c
+++ b/arch/sh/boards/mach-highlander/irq-r7780mp.c
@@ -12,7 +12,7 @@
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/io.h>
-#include <asm/r7780rp.h>
+#include <mach/highlander.h>
 
 enum {
 	UNUSED = 0,
diff --git a/arch/sh/boards/mach-highlander/irq-r7780rp.c b/arch/sh/boards/mach-highlander/irq-r7780rp.c
index 9d3921fe27c0..b721e86b5af4 100644
--- a/arch/sh/boards/mach-highlander/irq-r7780rp.c
+++ b/arch/sh/boards/mach-highlander/irq-r7780rp.c
@@ -12,7 +12,7 @@
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/io.h>
-#include <asm/r7780rp.h>
+#include <mach/highlander.h>
 
 enum {
 	UNUSED = 0,
diff --git a/arch/sh/boards/mach-highlander/irq-r7785rp.c b/arch/sh/boards/mach-highlander/irq-r7785rp.c
index 896c045aa39d..3811b060a39b 100644
--- a/arch/sh/boards/mach-highlander/irq-r7785rp.c
+++ b/arch/sh/boards/mach-highlander/irq-r7785rp.c
@@ -12,7 +12,7 @@
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/io.h>
-#include <asm/r7780rp.h>
+#include <mach/highlander.h>
 
 enum {
 	UNUSED = 0,
diff --git a/arch/sh/boards/mach-highlander/psw.c b/arch/sh/boards/mach-highlander/psw.c
index be8d5477fc65..37b1a2ee71a5 100644
--- a/arch/sh/boards/mach-highlander/psw.c
+++ b/arch/sh/boards/mach-highlander/psw.c
@@ -13,7 +13,7 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
-#include <asm/r7780rp.h>
+#include <mach/highlander.h>
 #include <asm/push-switch.h>
 
 static irqreturn_t psw_irq_handler(int irq, void *arg)
diff --git a/arch/sh/boards/mach-highlander/setup.c b/arch/sh/boards/mach-highlander/setup.c
index bc79afb6fc4c..c5a40f7906d7 100644
--- a/arch/sh/boards/mach-highlander/setup.c
+++ b/arch/sh/boards/mach-highlander/setup.c
@@ -20,7 +20,7 @@
 #include <linux/i2c.h>
 #include <net/ax88796.h>
 #include <asm/machvec.h>
-#include <asm/r7780rp.h>
+#include <mach/highlander.h>
 #include <asm/clock.h>
 #include <asm/heartbeat.h>
 #include <asm/io.h>
diff --git a/arch/sh/boards/mach-hp6xx/hp6xx_apm.c b/arch/sh/boards/mach-hp6xx/hp6xx_apm.c
index 177f4f028e0d..e85212faf40a 100644
--- a/arch/sh/boards/mach-hp6xx/hp6xx_apm.c
+++ b/arch/sh/boards/mach-hp6xx/hp6xx_apm.c
@@ -14,7 +14,7 @@
 #include <linux/apm-emulation.h>
 #include <linux/io.h>
 #include <asm/adc.h>
-#include <asm/hp6xx.h>
+#include <mach/hp6xx.h>
 
 /* percentage values */
 #define APM_CRITICAL			10
diff --git a/arch/sh/boards/mach-hp6xx/pm.c b/arch/sh/boards/mach-hp6xx/pm.c
index e96684def788..64af1f2eef05 100644
--- a/arch/sh/boards/mach-hp6xx/pm.c
+++ b/arch/sh/boards/mach-hp6xx/pm.c
@@ -12,7 +12,7 @@
 #include <linux/time.h>
 #include <asm/io.h>
 #include <asm/hd64461.h>
-#include <asm/hp6xx.h>
+#include <mach/hp6xx.h>
 #include <cpu/dac.h>
 #include <asm/pm.h>
 
diff --git a/arch/sh/boards/mach-hp6xx/setup.c b/arch/sh/boards/mach-hp6xx/setup.c
index 475b46caec1f..48fece78ff54 100644
--- a/arch/sh/boards/mach-hp6xx/setup.c
+++ b/arch/sh/boards/mach-hp6xx/setup.c
@@ -15,7 +15,7 @@
 #include <asm/hd64461.h>
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/hp6xx.h>
+#include <mach/hp6xx.h>
 #include <cpu/dac.h>
 
 #define	SCPCR	0xa4000116
diff --git a/arch/sh/boards/mach-lboxre2/irq.c b/arch/sh/boards/mach-lboxre2/irq.c
index 5a1c3bbe7b50..8aa171ab833e 100644
--- a/arch/sh/boards/mach-lboxre2/irq.c
+++ b/arch/sh/boards/mach-lboxre2/irq.c
@@ -15,7 +15,7 @@
 #include <linux/irq.h>
 #include <asm/irq.h>
 #include <asm/io.h>
-#include <asm/lboxre2.h>
+#include <mach/lboxre2.h>
 
 /*
  * Initialize IRQ setting
diff --git a/arch/sh/boards/mach-lboxre2/setup.c b/arch/sh/boards/mach-lboxre2/setup.c
index c74440d38ee9..2b0b5818e1e4 100644
--- a/arch/sh/boards/mach-lboxre2/setup.c
+++ b/arch/sh/boards/mach-lboxre2/setup.c
@@ -16,7 +16,7 @@
 #include <linux/ata_platform.h>
 #include <asm/machvec.h>
 #include <asm/addrspace.h>
-#include <asm/lboxre2.h>
+#include <mach/lboxre2.h>
 #include <asm/io.h>
 
 static struct resource cf_ide_resources[] = {
diff --git a/arch/sh/boards/mach-microdev/io.c b/arch/sh/boards/mach-microdev/io.c
index 9f8a540f7e14..52dd748211c7 100644
--- a/arch/sh/boards/mach-microdev/io.c
+++ b/arch/sh/boards/mach-microdev/io.c
@@ -15,7 +15,7 @@
 #include <linux/pci.h>
 #include <linux/wait.h>
 #include <asm/io.h>
-#include <asm/microdev.h>
+#include <mach/microdev.h>
 
 	/*
 	 *	we need to have a 'safe' address to re-direct all I/O requests
diff --git a/arch/sh/boards/mach-microdev/irq.c b/arch/sh/boards/mach-microdev/irq.c
index 4d335077a3ff..702753cbd28f 100644
--- a/arch/sh/boards/mach-microdev/irq.c
+++ b/arch/sh/boards/mach-microdev/irq.c
@@ -14,7 +14,7 @@
 #include <linux/interrupt.h>
 #include <asm/system.h>
 #include <asm/io.h>
-#include <asm/microdev.h>
+#include <mach/microdev.h>
 
 #define NUM_EXTERNAL_IRQS 16	/* IRL0 .. IRL15 */
 
diff --git a/arch/sh/boards/mach-microdev/setup.c b/arch/sh/boards/mach-microdev/setup.c
index fc8cd06d66cf..a9202fe3cb59 100644
--- a/arch/sh/boards/mach-microdev/setup.c
+++ b/arch/sh/boards/mach-microdev/setup.c
@@ -14,7 +14,7 @@
 #include <linux/platform_device.h>
 #include <linux/ioport.h>
 #include <video/s1d13xxxfb.h>
-#include <asm/microdev.h>
+#include <mach/microdev.h>
 #include <asm/io.h>
 #include <asm/machvec.h>
 
diff --git a/arch/sh/boards/mach-migor/lcd_qvga.c b/arch/sh/boards/mach-migor/lcd_qvga.c
index 71a40710a5aa..de9014a8a93e 100644
--- a/arch/sh/boards/mach-migor/lcd_qvga.c
+++ b/arch/sh/boards/mach-migor/lcd_qvga.c
@@ -20,7 +20,7 @@
 #include <linux/gpio.h>
 #include <video/sh_mobile_lcdc.h>
 #include <cpu/sh7722.h>
-#include <asm/migor.h>
+#include <mach/migor.h>
 
 /* LCD Module is a PH240320T according to board schematics. This module
  * is made up of a 240x320 LCD hooked up to a R61505U (or HX8347-A01?)
diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c
index d00cce04d6e3..769d63043424 100644
--- a/arch/sh/boards/mach-migor/setup.c
+++ b/arch/sh/boards/mach-migor/setup.c
@@ -25,7 +25,7 @@
 #include <asm/machvec.h>
 #include <asm/io.h>
 #include <asm/sh_keysc.h>
-#include <asm/migor.h>
+#include <mach/migor.h>
 #include <cpu/sh7722.h>
 
 /* Address     IRQ  Size  Bus  Description
diff --git a/arch/sh/boards/mach-r2d/irq.c b/arch/sh/boards/mach-r2d/irq.c
index 8e49f6e51247..c70fecedcac4 100644
--- a/arch/sh/boards/mach-r2d/irq.c
+++ b/arch/sh/boards/mach-r2d/irq.c
@@ -13,7 +13,7 @@
 #include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
-#include <asm/rts7751r2d.h>
+#include <mach/r2d.h>
 
 #define R2D_NR_IRL 13
 
diff --git a/arch/sh/boards/mach-r2d/setup.c b/arch/sh/boards/mach-r2d/setup.c
index 2308e8753bcd..c585be00956e 100644
--- a/arch/sh/boards/mach-r2d/setup.c
+++ b/arch/sh/boards/mach-r2d/setup.c
@@ -18,7 +18,7 @@
 #include <linux/spi/spi.h>
 #include <linux/spi/spi_bitbang.h>
 #include <asm/machvec.h>
-#include <asm/rts7751r2d.h>
+#include <mach/r2d.h>
 #include <asm/io.h>
 #include <asm/io_trapped.h>
 #include <asm/spi.h>
diff --git a/arch/sh/boards/mach-sdk7780/irq.c b/arch/sh/boards/mach-sdk7780/irq.c
index 87cdc578f6ff..855558163c58 100644
--- a/arch/sh/boards/mach-sdk7780/irq.c
+++ b/arch/sh/boards/mach-sdk7780/irq.c
@@ -12,7 +12,7 @@
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/io.h>
-#include <asm/sdk7780.h>
+#include <mach/sdk7780.h>
 
 enum {
 	UNUSED = 0,
diff --git a/arch/sh/boards/mach-sdk7780/setup.c b/arch/sh/boards/mach-sdk7780/setup.c
index acc5932587f1..aad94a78dc70 100644
--- a/arch/sh/boards/mach-sdk7780/setup.c
+++ b/arch/sh/boards/mach-sdk7780/setup.c
@@ -13,7 +13,7 @@
 #include <linux/platform_device.h>
 #include <linux/ata_platform.h>
 #include <asm/machvec.h>
-#include <asm/sdk7780.h>
+#include <mach/sdk7780.h>
 #include <asm/heartbeat.h>
 #include <asm/io.h>
 #include <asm/addrspace.h>
diff --git a/arch/sh/boards/mach-sh7763rdp/irq.c b/arch/sh/boards/mach-sh7763rdp/irq.c
index fd850bad2dec..d8ebfa7d8c76 100644
--- a/arch/sh/boards/mach-sh7763rdp/irq.c
+++ b/arch/sh/boards/mach-sh7763rdp/irq.c
@@ -15,7 +15,7 @@
 #include <linux/irq.h>
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/sh7763rdp.h>
+#include <mach/sh7763rdp.h>
 
 #define INTC_BASE		(0xFFD00000)
 #define INTC_INT2PRI7   (INTC_BASE+0x4001C)
diff --git a/arch/sh/boards/mach-sh7763rdp/setup.c b/arch/sh/boards/mach-sh7763rdp/setup.c
index 23850da05e3c..6f926fd2162b 100644
--- a/arch/sh/boards/mach-sh7763rdp/setup.c
+++ b/arch/sh/boards/mach-sh7763rdp/setup.c
@@ -17,7 +17,7 @@
 #include <linux/mtd/physmap.h>
 #include <linux/fb.h>
 #include <linux/io.h>
-#include <asm/sh7763rdp.h>
+#include <mach/sh7763rdp.h>
 #include <asm/sh_eth.h>
 #include <asm/sh7760fb.h>
 
diff --git a/arch/sh/boards/mach-snapgear/setup.c b/arch/sh/boards/mach-snapgear/setup.c
index a5e349d3dda2..a3277a23cf14 100644
--- a/arch/sh/boards/mach-snapgear/setup.c
+++ b/arch/sh/boards/mach-snapgear/setup.c
@@ -19,7 +19,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <asm/machvec.h>
-#include <asm/snapgear.h>
+#include <mach/snapgear.h>
 #include <asm/irq.h>
 #include <asm/io.h>
 #include <cpu/timer.h>
diff --git a/arch/sh/boards/mach-systemh/io.c b/arch/sh/boards/mach-systemh/io.c
index 1b767e1a1428..dec3db0ee933 100644
--- a/arch/sh/boards/mach-systemh/io.c
+++ b/arch/sh/boards/mach-systemh/io.c
@@ -9,7 +9,7 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/pci.h>
-#include <asm/systemh7751.h>
+#include <mach/systemh7751.h>
 #include <asm/addrspace.h>
 #include <asm/io.h>
 
diff --git a/arch/sh/boards/mach-systemh/irq.c b/arch/sh/boards/mach-systemh/irq.c
index 601c9c8cdbec..538406872a89 100644
--- a/arch/sh/boards/mach-systemh/irq.c
+++ b/arch/sh/boards/mach-systemh/irq.c
@@ -14,7 +14,7 @@
 #include <linux/interrupt.h>
 
 #include <asm/io.h>
-#include <asm/systemh7751.h>
+#include <mach/systemh7751.h>
 #include <asm/smc37c93x.h>
 
 /* address of external interrupt mask register
diff --git a/arch/sh/boards/mach-systemh/setup.c b/arch/sh/boards/mach-systemh/setup.c
index ee78af842778..219fd800a43f 100644
--- a/arch/sh/boards/mach-systemh/setup.c
+++ b/arch/sh/boards/mach-systemh/setup.c
@@ -16,7 +16,7 @@
  */
 #include <linux/init.h>
 #include <asm/machvec.h>
-#include <asm/systemh7751.h>
+#include <mach/systemh7751.h>
 
 extern void make_systemh_irq(unsigned int irq);
 
diff --git a/arch/sh/boards/mach-titan/io.c b/arch/sh/boards/mach-titan/io.c
index 4730c1dd697d..4badad4c6f30 100644
--- a/arch/sh/boards/mach-titan/io.c
+++ b/arch/sh/boards/mach-titan/io.c
@@ -4,7 +4,7 @@
 #include <linux/pci.h>
 #include <asm/machvec.h>
 #include <asm/addrspace.h>
-#include <asm/titan.h>
+#include <mach/titan.h>
 #include <asm/io.h>
 
 static inline unsigned int port2adr(unsigned int port)
diff --git a/arch/sh/boards/mach-titan/setup.c b/arch/sh/boards/mach-titan/setup.c
index 5de3b2ad71af..81e7e0f03863 100644
--- a/arch/sh/boards/mach-titan/setup.c
+++ b/arch/sh/boards/mach-titan/setup.c
@@ -9,7 +9,7 @@
  */
 #include <linux/init.h>
 #include <linux/irq.h>
-#include <asm/titan.h>
+#include <mach/titan.h>
 #include <asm/io.h>
 
 static void __init init_titan_irq(void)
diff --git a/arch/sh/drivers/pci/ops-lboxre2.c b/arch/sh/drivers/pci/ops-lboxre2.c
index a13cb764b0b9..86c0b6fb7375 100644
--- a/arch/sh/drivers/pci/ops-lboxre2.c
+++ b/arch/sh/drivers/pci/ops-lboxre2.c
@@ -10,7 +10,7 @@
 #include <linux/init.h>
 #include <linux/pci.h>
 #include <linux/io.h>
-#include <asm/lboxre2.h>
+#include <mach/lboxre2.h>
 #include "pci-sh4.h"
 
 static char lboxre2_irq_tab[] __initdata = {
diff --git a/arch/sh/drivers/pci/ops-r7780rp.c b/arch/sh/drivers/pci/ops-r7780rp.c
index 5fdadaeed6fc..8555238e63eb 100644
--- a/arch/sh/drivers/pci/ops-r7780rp.c
+++ b/arch/sh/drivers/pci/ops-r7780rp.c
@@ -13,7 +13,7 @@
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/pci.h>
-#include <asm/r7780rp.h>
+#include <mach/highlander.h>
 #include <asm/io.h>
 #include "pci-sh4.h"
 
diff --git a/arch/sh/drivers/pci/ops-rts7751r2d.c b/arch/sh/drivers/pci/ops-rts7751r2d.c
index b3fa3e2ef184..d6ca74b25d5f 100644
--- a/arch/sh/drivers/pci/ops-rts7751r2d.c
+++ b/arch/sh/drivers/pci/ops-rts7751r2d.c
@@ -15,7 +15,7 @@
 #include <linux/init.h>
 #include <linux/pci.h>
 #include <linux/io.h>
-#include <asm/rts7751r2d.h>
+#include <mach/r2d.h>
 #include "pci-sh4.h"
 
 static u8 rts7751r2d_irq_tab[] __initdata = {
diff --git a/arch/sh/drivers/pci/ops-sdk7780.c b/arch/sh/drivers/pci/ops-sdk7780.c
index 66a9b4047f26..4dcc64184b23 100644
--- a/arch/sh/drivers/pci/ops-sdk7780.c
+++ b/arch/sh/drivers/pci/ops-sdk7780.c
@@ -13,7 +13,7 @@
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/pci.h>
-#include <asm/sdk7780.h>
+#include <mach/sdk7780.h>
 #include <asm/io.h>
 #include "pci-sh4.h"
 
diff --git a/arch/sh/drivers/pci/ops-titan.c b/arch/sh/drivers/pci/ops-titan.c
index ac8ee2312cd8..a8f7801a34af 100644
--- a/arch/sh/drivers/pci/ops-titan.c
+++ b/arch/sh/drivers/pci/ops-titan.c
@@ -16,7 +16,7 @@
 #include <linux/init.h>
 #include <linux/pci.h>
 #include <linux/io.h>
-#include <asm/titan.h>
+#include <mach/titan.h>
 #include "pci-sh4.h"
 
 static char titan_irq_tab[] __initdata = {
diff --git a/arch/sh/include/asm/edosk7705.h b/arch/sh/include/mach-common/mach/edosk7705.h
similarity index 100%
rename from arch/sh/include/asm/edosk7705.h
rename to arch/sh/include/mach-common/mach/edosk7705.h
diff --git a/arch/sh/include/asm/r7780rp.h b/arch/sh/include/mach-common/mach/highlander.h
similarity index 100%
rename from arch/sh/include/asm/r7780rp.h
rename to arch/sh/include/mach-common/mach/highlander.h
diff --git a/arch/sh/include/asm/hp6xx.h b/arch/sh/include/mach-common/mach/hp6xx.h
similarity index 100%
rename from arch/sh/include/asm/hp6xx.h
rename to arch/sh/include/mach-common/mach/hp6xx.h
diff --git a/arch/sh/include/asm/lboxre2.h b/arch/sh/include/mach-common/mach/lboxre2.h
similarity index 100%
rename from arch/sh/include/asm/lboxre2.h
rename to arch/sh/include/mach-common/mach/lboxre2.h
diff --git a/arch/sh/include/asm/magicpanelr2.h b/arch/sh/include/mach-common/mach/magicpanelr2.h
similarity index 100%
rename from arch/sh/include/asm/magicpanelr2.h
rename to arch/sh/include/mach-common/mach/magicpanelr2.h
diff --git a/arch/sh/include/asm/microdev.h b/arch/sh/include/mach-common/mach/microdev.h
similarity index 100%
rename from arch/sh/include/asm/microdev.h
rename to arch/sh/include/mach-common/mach/microdev.h
diff --git a/arch/sh/include/asm/migor.h b/arch/sh/include/mach-common/mach/migor.h
similarity index 100%
rename from arch/sh/include/asm/migor.h
rename to arch/sh/include/mach-common/mach/migor.h
diff --git a/arch/sh/include/asm/rts7751r2d.h b/arch/sh/include/mach-common/mach/r2d.h
similarity index 100%
rename from arch/sh/include/asm/rts7751r2d.h
rename to arch/sh/include/mach-common/mach/r2d.h
diff --git a/arch/sh/include/asm/sdk7780.h b/arch/sh/include/mach-common/mach/sdk7780.h
similarity index 100%
rename from arch/sh/include/asm/sdk7780.h
rename to arch/sh/include/mach-common/mach/sdk7780.h
diff --git a/arch/sh/include/asm/sh7763rdp.h b/arch/sh/include/mach-common/mach/sh7763rdp.h
similarity index 100%
rename from arch/sh/include/asm/sh7763rdp.h
rename to arch/sh/include/mach-common/mach/sh7763rdp.h
diff --git a/arch/sh/include/asm/sh7785lcr.h b/arch/sh/include/mach-common/mach/sh7785lcr.h
similarity index 100%
rename from arch/sh/include/asm/sh7785lcr.h
rename to arch/sh/include/mach-common/mach/sh7785lcr.h
diff --git a/arch/sh/include/asm/shmin.h b/arch/sh/include/mach-common/mach/shmin.h
similarity index 100%
rename from arch/sh/include/asm/shmin.h
rename to arch/sh/include/mach-common/mach/shmin.h
diff --git a/arch/sh/include/asm/snapgear.h b/arch/sh/include/mach-common/mach/snapgear.h
similarity index 100%
rename from arch/sh/include/asm/snapgear.h
rename to arch/sh/include/mach-common/mach/snapgear.h
diff --git a/arch/sh/include/asm/systemh7751.h b/arch/sh/include/mach-common/mach/systemh7751.h
similarity index 100%
rename from arch/sh/include/asm/systemh7751.h
rename to arch/sh/include/mach-common/mach/systemh7751.h
diff --git a/arch/sh/include/asm/titan.h b/arch/sh/include/mach-common/mach/titan.h
similarity index 100%
rename from arch/sh/include/asm/titan.h
rename to arch/sh/include/mach-common/mach/titan.h
diff --git a/drivers/input/touchscreen/hp680_ts_input.c b/drivers/input/touchscreen/hp680_ts_input.c
index c38d4e0f95c6..a89700e7ace4 100644
--- a/drivers/input/touchscreen/hp680_ts_input.c
+++ b/drivers/input/touchscreen/hp680_ts_input.c
@@ -5,7 +5,7 @@
 #include <asm/io.h>
 #include <asm/delay.h>
 #include <asm/adc.h>
-#include <asm/hp6xx.h>
+#include <mach/hp6xx.h>
 
 #define MODNAME "hp680_ts_input"
 
diff --git a/drivers/leds/leds-hp6xx.c b/drivers/leds/leds-hp6xx.c
index 844d5979c904..e8fb1baf8a50 100644
--- a/drivers/leds/leds-hp6xx.c
+++ b/drivers/leds/leds-hp6xx.c
@@ -15,7 +15,7 @@
 #include <linux/platform_device.h>
 #include <linux/leds.h>
 #include <asm/hd64461.h>
-#include <asm/hp6xx.h>
+#include <mach/hp6xx.h>
 
 static void hp6xxled_green_set(struct led_classdev *led_cdev,
 			       enum led_brightness value)
diff --git a/drivers/rtc/rtc-ds1302.c b/drivers/rtc/rtc-ds1302.c
index b9397818f73a..8f4e96bb229a 100644
--- a/drivers/rtc/rtc-ds1302.c
+++ b/drivers/rtc/rtc-ds1302.c
@@ -40,7 +40,7 @@
 #define	RTC_SCLK	0x0400
 
 #ifdef CONFIG_SH_SECUREEDGE5410
-#include <asm/snapgear.h>
+#include <mach/snapgear.h>
 #define set_dp(x)	SECUREEDGE_WRITE_IOPORT(x, 0x1c00)
 #define get_dp()	SECUREEDGE_READ_IOPORT()
 #else
diff --git a/drivers/video/backlight/hp680_bl.c b/drivers/video/backlight/hp680_bl.c
index 6fa0b9d5559a..d4cfed0b26d5 100644
--- a/drivers/video/backlight/hp680_bl.c
+++ b/drivers/video/backlight/hp680_bl.c
@@ -19,7 +19,7 @@
 #include <linux/backlight.h>
 
 #include <cpu/dac.h>
-#include <asm/hp6xx.h>
+#include <mach/hp6xx.h>
 #include <asm/hd64461.h>
 
 #define HP680_MAX_INTENSITY 255
diff --git a/sound/oss/sh_dac_audio.c b/sound/oss/sh_dac_audio.c
index b493660deb36..e5d423994918 100644
--- a/sound/oss/sh_dac_audio.c
+++ b/sound/oss/sh_dac_audio.c
@@ -26,7 +26,7 @@
 #include <asm/cpu/dac.h>
 #include <asm/cpu/timer.h>
 #include <asm/machvec.h>
-#include <asm/hp6xx.h>
+#include <mach/hp6xx.h>
 #include <asm/hd64461.h>
 
 #define MODNAME "sh_dac_audio"
-- 
GitLab


From ebf9c118df678de111eda45709081806f71710cc Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 20 Oct 2008 13:04:06 +0900
Subject: [PATCH 663/895] sh: Only build in gpio.o when CONFIG_GENERIC_GPIO is
 selected.

The pinmux management is tied in to this code, while it is presently only
used by platforms that select GENERIC_GPIO. The asm/gpio.h definitions
are not referenced when GENERIC_GPIO is disabled, resulting in a build
failure for all of the platforms that don't select it.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/Makefile_32 | 3 ++-
 arch/sh/kernel/Makefile_64 | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/sh/kernel/Makefile_32 b/arch/sh/kernel/Makefile_32
index 7c41ac7c298c..48edfb145fb4 100644
--- a/arch/sh/kernel/Makefile_32
+++ b/arch/sh/kernel/Makefile_32
@@ -5,7 +5,7 @@
 extra-y	:= head_32.o init_task.o vmlinux.lds
 
 obj-y	:= debugtraps.o io.o io_generic.o irq.o machvec.o process_32.o \
-	   ptrace_32.o setup.o signal_32.o sys_sh.o sys_sh32.o gpio.o \
+	   ptrace_32.o setup.o signal_32.o sys_sh.o sys_sh32.o \
 	   syscalls_32.o time_32.o topology.o traps.o traps_32.o
 
 obj-y				+= cpu/ timers/
@@ -23,5 +23,6 @@ obj-$(CONFIG_PM)		+= pm.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-$(CONFIG_IO_TRAPPED)	+= io_trapped.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o
+obj-$(CONFIG_GENERIC_GPIO)	+= gpio.o
 
 EXTRA_CFLAGS += -Werror
diff --git a/arch/sh/kernel/Makefile_64 b/arch/sh/kernel/Makefile_64
index 2a7a5e694a1f..c97660b2b48d 100644
--- a/arch/sh/kernel/Makefile_64
+++ b/arch/sh/kernel/Makefile_64
@@ -1,7 +1,7 @@
 extra-y	:= head_64.o init_task.o vmlinux.lds
 
 obj-y	:= debugtraps.o io.o io_generic.o irq.o machvec.o process_64.o \
-	   ptrace_64.o setup.o signal_64.o sys_sh.o sys_sh64.o gpio.o \
+	   ptrace_64.o setup.o signal_64.o sys_sh.o sys_sh64.o \
 	   syscalls_64.o time_64.o topology.o traps.o traps_64.o
 
 obj-y				+= cpu/ timers/
@@ -18,5 +18,6 @@ obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
 obj-$(CONFIG_PM)		+= pm.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-$(CONFIG_IO_TRAPPED)	+= io_trapped.o
+obj-$(CONFIG_GENERIC_GPIO)	+= gpio.o
 
 EXTRA_CFLAGS += -Werror
-- 
GitLab


From 40e24c403f325715f9c43b9fed2068641201ee0b Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 20 Oct 2008 14:30:17 +0900
Subject: [PATCH 664/895] sh: Move SH-4 CPU headers down one more level.

These accidentally got placed in to cpu-sh4 instead of cpu-sh4/cpu, push
them down one more level.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/cpu-sh4/{ => cpu}/sh7722.h | 0
 arch/sh/include/cpu-sh4/{ => cpu}/sh7723.h | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename arch/sh/include/cpu-sh4/{ => cpu}/sh7722.h (100%)
 rename arch/sh/include/cpu-sh4/{ => cpu}/sh7723.h (100%)

diff --git a/arch/sh/include/cpu-sh4/sh7722.h b/arch/sh/include/cpu-sh4/cpu/sh7722.h
similarity index 100%
rename from arch/sh/include/cpu-sh4/sh7722.h
rename to arch/sh/include/cpu-sh4/cpu/sh7722.h
diff --git a/arch/sh/include/cpu-sh4/sh7723.h b/arch/sh/include/cpu-sh4/cpu/sh7723.h
similarity index 100%
rename from arch/sh/include/cpu-sh4/sh7723.h
rename to arch/sh/include/cpu-sh4/cpu/sh7723.h
-- 
GitLab


From 0537ae6a3d7d6d9005446ee6419272fd4c38a58d Mon Sep 17 00:00:00 2001
From: Julius Volz <juliusv@google.com>
Date: Sun, 19 Oct 2008 23:29:56 -0700
Subject: [PATCH 665/895] ipvs: Update CONFIG_IP_VS_IPV6 description and help
 text

This adds a URL to further info to the CONFIG_IP_VS_IPV6 Kconfig help
text. Also, I think it should be ok to remove the "DANGEROUS" label in the
description line at this point to get people to try it out and find all
the bugs ;) It's still marked as experimental, of course.

Signed-off-by: Julius Volz <juliusv@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/ipvs/Kconfig | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index 05048e403266..79a698052218 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -25,11 +25,13 @@ menuconfig IP_VS
 if IP_VS
 
 config	IP_VS_IPV6
-	bool "IPv6 support for IPVS (DANGEROUS)"
+	bool "IPv6 support for IPVS"
 	depends on EXPERIMENTAL && (IPV6 = y || IP_VS = IPV6)
 	---help---
 	  Add IPv6 support to IPVS. This is incomplete and might be dangerous.
 
+	  See http://www.mindbasket.com/ipvs for more information.
+
 	  Say N if unsure.
 
 config	IP_VS_DEBUG
-- 
GitLab


From 92845ffd2a221f9f90b064ac55bb010bf27a193f Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Sun, 19 Oct 2008 23:33:56 -0700
Subject: [PATCH 666/895] netdev: change name dropping error codes

If changename notifier returns an error code, it gets incorrectly
cleared during rollback so the error is never returned to the user.
Found while testing similar code for MTU changes.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 868ec0ba8b77..b8a4fd0806af 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -924,10 +924,10 @@ int dev_change_name(struct net_device *dev, const char *newname)
 		strlcpy(dev->name, newname, IFNAMSIZ);
 
 rollback:
-	err = device_rename(&dev->dev, dev->name);
-	if (err) {
+	ret = device_rename(&dev->dev, dev->name);
+	if (ret) {
 		memcpy(dev->name, oldname, IFNAMSIZ);
-		return err;
+		return ret;
 	}
 
 	write_lock_bh(&dev_base_lock);
-- 
GitLab


From fd5070370c74c32b7a98090eabb46c53baf41733 Mon Sep 17 00:00:00 2001
From: KOVACS Krisztian <hidden@sch.bme.hu>
Date: Sun, 19 Oct 2008 23:35:58 -0700
Subject: [PATCH 667/895] tcp: Fix IPv6 fallout from 'Port redirection support
 for TCP'

'tcp: Port redirection support for TCP' (a3116ac5c) added a new member
to inet_request_sock() which inet_csk_clone() makes use of but failed
to add proper initialization to the IPv6 syncookie code and missed a
couple of places where the new member should be used instead of
inet_sk(sk)->sport.

Signed-off-by: KOVACS Krisztian <hidden@sch.bme.hu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/syncookies.c | 1 +
 net/ipv6/tcp_ipv6.c   | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index ec394cf5a19b..676c80b5b14b 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -204,6 +204,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 
 	req->mss = mss;
 	ireq->rmt_port = th->source;
+	ireq->loc_port = th->dest;
 	ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr);
 	ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr);
 	if (ipv6_opt_accepted(sk, skb) ||
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e5310c9b84dc..b6b356b7912a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -476,7 +476,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req)
 	fl.fl6_flowlabel = 0;
 	fl.oif = treq->iif;
 	fl.fl_ip_dport = inet_rsk(req)->rmt_port;
-	fl.fl_ip_sport = inet_sk(sk)->sport;
+	fl.fl_ip_sport = inet_rsk(req)->loc_port;
 	security_req_classify_flow(req, &fl);
 
 	opt = np->opt;
@@ -1309,7 +1309,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
 		fl.oif = sk->sk_bound_dev_if;
 		fl.fl_ip_dport = inet_rsk(req)->rmt_port;
-		fl.fl_ip_sport = inet_sk(sk)->sport;
+		fl.fl_ip_sport = inet_rsk(req)->loc_port;
 		security_req_classify_flow(req, &fl);
 
 		if (ip6_dst_lookup(sk, &dst, &fl))
@@ -1865,7 +1865,7 @@ static void get_openreq6(struct seq_file *seq,
 		   i,
 		   src->s6_addr32[0], src->s6_addr32[1],
 		   src->s6_addr32[2], src->s6_addr32[3],
-		   ntohs(inet_sk(sk)->sport),
+		   ntohs(inet_rsk(req)->loc_port),
 		   dest->s6_addr32[0], dest->s6_addr32[1],
 		   dest->s6_addr32[2], dest->s6_addr32[3],
 		   ntohs(inet_rsk(req)->rmt_port),
-- 
GitLab


From 944f750227fa0beb2b440709687415621e2533a4 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sun, 19 Oct 2008 23:36:47 -0700
Subject: [PATCH 668/895] dccp: Port redirection support for DCCP

Commit a3116ac5c216fc3c145906a46df9ce542ff7dcf2 from 1st October ("tcp: Port
redirection support for TCP") broke DCCP skb lookup by changing inet_csk_clone,
which is used by DCCP to generate the child socket after the handshake.

This patch updates DCCP to use 'loc_port' instead of 'sport', which fixes the
problem, and thus inheriting port redirection support via the new interface.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: KOVACS Krisztian <hidden@sch.bme.hu>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ipv6.c      | 4 ++--
 net/dccp/minisocks.c | 1 +
 net/dccp/output.c    | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 11062780bb02..d4ce1224e008 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -259,7 +259,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
 	fl.fl6_flowlabel = 0;
 	fl.oif = ireq6->iif;
 	fl.fl_ip_dport = inet_rsk(req)->rmt_port;
-	fl.fl_ip_sport = inet_sk(sk)->sport;
+	fl.fl_ip_sport = inet_rsk(req)->loc_port;
 	security_req_classify_flow(req, &fl);
 
 	opt = np->opt;
@@ -558,7 +558,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 		ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr);
 		fl.oif = sk->sk_bound_dev_if;
 		fl.fl_ip_dport = inet_rsk(req)->rmt_port;
-		fl.fl_ip_sport = inet_sk(sk)->sport;
+		fl.fl_ip_sport = inet_rsk(req)->loc_port;
 		security_sk_classify_flow(sk, &fl);
 
 		if (ip6_dst_lookup(sk, &dst, &fl))
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index b2804e2d1b8c..e6bf99e3e41a 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -309,6 +309,7 @@ void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb)
 	struct dccp_request_sock *dreq = dccp_rsk(req);
 
 	inet_rsk(req)->rmt_port	  = dccp_hdr(skb)->dccph_sport;
+	inet_rsk(req)->loc_port	  = dccp_hdr(skb)->dccph_dport;
 	inet_rsk(req)->acked	  = 0;
 	req->rcv_wnd		  = sysctl_dccp_feat_sequence_window;
 	dreq->dreq_timestamp_echo = 0;
diff --git a/net/dccp/output.c b/net/dccp/output.c
index d06945c7d3df..809d803d5006 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -347,7 +347,7 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
 	/* Build and checksum header */
 	dh = dccp_zeroed_hdr(skb, dccp_header_size);
 
-	dh->dccph_sport	= inet_sk(sk)->sport;
+	dh->dccph_sport	= inet_rsk(req)->loc_port;
 	dh->dccph_dport	= inet_rsk(req)->rmt_port;
 	dh->dccph_doff	= (dccp_header_size +
 			   DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
-- 
GitLab


From 9f3ffae0dbce491a3e9871b686342fd5aa854f05 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Sun, 19 Oct 2008 23:37:47 -0700
Subject: [PATCH 669/895] pkt_sched: sch_generic: Fix oops in sch_teql

After these commands:
# modprobe sch_teql
# tc qdisc add dev eth0 root teql0
# tc qdisc del dev eth0 root
we get an oops in teql_destroy() when spin_lock is taken from a null
qdisc_sleeping pointer. It's because at the moment teql0 dev haven't
been activated yet, and a qdisc_root_sleeping() is pointing to noop
qdisc's netdev_queue with qdisc_sleeping uninitialized. This patch
fixes this both for noop and noqueue netdev_queues to avoid similar
problems in the future.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 7b5572d6beb5..93cd30ce6501 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -326,6 +326,7 @@ struct Qdisc_ops noop_qdisc_ops __read_mostly = {
 
 static struct netdev_queue noop_netdev_queue = {
 	.qdisc		=	&noop_qdisc,
+	.qdisc_sleeping	=	&noop_qdisc,
 };
 
 struct Qdisc noop_qdisc = {
@@ -352,6 +353,7 @@ static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
 static struct Qdisc noqueue_qdisc;
 static struct netdev_queue noqueue_netdev_queue = {
 	.qdisc		=	&noqueue_qdisc,
+	.qdisc_sleeping	=	&noqueue_qdisc,
 };
 
 static struct Qdisc noqueue_qdisc = {
-- 
GitLab


From 35a347991cb7da5540fcdbef57800c02bafcb0b3 Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <shimoda.yoshihiro@renesas.com>
Date: Mon, 20 Oct 2008 17:17:44 +0900
Subject: [PATCH 670/895] [MTD] [NAND] sh_flctl: fix compile error

Fix compile error because the first patch was broken -- the file got
truncated.

Signed-off-by: Yoshihiro Shimoda <shimoda.yoshihiro@renesas.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/sh_flctl.c | 577 ++++++++++++++++++++++++++++++++++++
 1 file changed, 577 insertions(+)

diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c
index 600a76f5580e..821acb08ff1c 100644
--- a/drivers/mtd/nand/sh_flctl.c
+++ b/drivers/mtd/nand/sh_flctl.c
@@ -299,3 +299,580 @@ static void set_cmd_regs(struct mtd_info *mtd, uint32_t cmd, uint32_t flcmcdr_va
 		break;
 	case NAND_CMD_PAGEPROG:
 		addr_len_bytes = flctl->rw_ADRCNT;
+		flcmdcr_val |= DOCMD2_E | CDSRC_E | SELRW;
+		break;
+	case NAND_CMD_READID:
+		flcmncr_val &= ~SNAND_E;
+		addr_len_bytes = ADRCNT_1;
+		break;
+	case NAND_CMD_STATUS:
+	case NAND_CMD_RESET:
+		flcmncr_val &= ~SNAND_E;
+		flcmdcr_val &= ~(DOADR_E | DOSR_E);
+		break;
+	default:
+		break;
+	}
+
+	/* Set address bytes parameter */
+	flcmdcr_val |= addr_len_bytes;
+
+	/* Now actually write */
+	writel(flcmncr_val, FLCMNCR(flctl));
+	writel(flcmdcr_val, FLCMDCR(flctl));
+	writel(flcmcdr_val, FLCMCDR(flctl));
+}
+
+static int flctl_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
+				uint8_t *buf)
+{
+	int i, eccsize = chip->ecc.size;
+	int eccbytes = chip->ecc.bytes;
+	int eccsteps = chip->ecc.steps;
+	uint8_t *p = buf;
+	struct sh_flctl *flctl = mtd_to_flctl(mtd);
+
+	for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize)
+		chip->read_buf(mtd, p, eccsize);
+
+	for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize) {
+		if (flctl->hwecc_cant_correct[i])
+			mtd->ecc_stats.failed++;
+		else
+			mtd->ecc_stats.corrected += 0;
+	}
+
+	return 0;
+}
+
+static void flctl_write_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
+				   const uint8_t *buf)
+{
+	int i, eccsize = chip->ecc.size;
+	int eccbytes = chip->ecc.bytes;
+	int eccsteps = chip->ecc.steps;
+	const uint8_t *p = buf;
+
+	for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize)
+		chip->write_buf(mtd, p, eccsize);
+}
+
+static void execmd_read_page_sector(struct mtd_info *mtd, int page_addr)
+{
+	struct sh_flctl *flctl = mtd_to_flctl(mtd);
+	int sector, page_sectors;
+
+	if (flctl->page_size)
+		page_sectors = 4;
+	else
+		page_sectors = 1;
+
+	writel(readl(FLCMNCR(flctl)) | ACM_SACCES_MODE | _4ECCCORRECT,
+		 FLCMNCR(flctl));
+
+	set_cmd_regs(mtd, NAND_CMD_READ0,
+		(NAND_CMD_READSTART << 8) | NAND_CMD_READ0);
+
+	for (sector = 0; sector < page_sectors; sector++) {
+		int ret;
+
+		empty_fifo(flctl);
+		writel(readl(FLCMDCR(flctl)) | 1, FLCMDCR(flctl));
+		writel(page_addr << 2 | sector, FLADR(flctl));
+
+		start_translation(flctl);
+		read_fiforeg(flctl, 512, 512 * sector);
+
+		ret = read_ecfiforeg(flctl,
+			&flctl->done_buff[mtd->writesize + 16 * sector]);
+
+		if (ret)
+			flctl->hwecc_cant_correct[sector] = 1;
+
+		writel(0x0, FL4ECCCR(flctl));
+		wait_completion(flctl);
+	}
+	writel(readl(FLCMNCR(flctl)) & ~(ACM_SACCES_MODE | _4ECCCORRECT),
+			FLCMNCR(flctl));
+}
+
+static void execmd_read_oob(struct mtd_info *mtd, int page_addr)
+{
+	struct sh_flctl *flctl = mtd_to_flctl(mtd);
+
+	set_cmd_regs(mtd, NAND_CMD_READ0,
+		(NAND_CMD_READSTART << 8) | NAND_CMD_READ0);
+
+	empty_fifo(flctl);
+	if (flctl->page_size) {
+		int i;
+		/* In case that the page size is 2k */
+		for (i = 0; i < 16 * 3; i++)
+			flctl->done_buff[i] = 0xFF;
+
+		set_addr(mtd, 3 * 528 + 512, page_addr);
+		writel(16, FLDTCNTR(flctl));
+
+		start_translation(flctl);
+		read_fiforeg(flctl, 16, 16 * 3);
+		wait_completion(flctl);
+	} else {
+		/* In case that the page size is 512b */
+		set_addr(mtd, 512, page_addr);
+		writel(16, FLDTCNTR(flctl));
+
+		start_translation(flctl);
+		read_fiforeg(flctl, 16, 0);
+		wait_completion(flctl);
+	}
+}
+
+static void execmd_write_page_sector(struct mtd_info *mtd)
+{
+	struct sh_flctl *flctl = mtd_to_flctl(mtd);
+	int i, page_addr = flctl->seqin_page_addr;
+	int sector, page_sectors;
+
+	if (flctl->page_size)
+		page_sectors = 4;
+	else
+		page_sectors = 1;
+
+	writel(readl(FLCMNCR(flctl)) | ACM_SACCES_MODE, FLCMNCR(flctl));
+
+	set_cmd_regs(mtd, NAND_CMD_PAGEPROG,
+			(NAND_CMD_PAGEPROG << 8) | NAND_CMD_SEQIN);
+
+	for (sector = 0; sector < page_sectors; sector++) {
+		empty_fifo(flctl);
+		writel(readl(FLCMDCR(flctl)) | 1, FLCMDCR(flctl));
+		writel(page_addr << 2 | sector, FLADR(flctl));
+
+		start_translation(flctl);
+		write_fiforeg(flctl, 512, 512 * sector);
+
+		for (i = 0; i < 4; i++) {
+			wait_wecfifo_ready(flctl); /* wait for write ready */
+			writel(0xFFFFFFFF, FLECFIFO(flctl));
+		}
+		wait_completion(flctl);
+	}
+
+	writel(readl(FLCMNCR(flctl)) & ~ACM_SACCES_MODE, FLCMNCR(flctl));
+}
+
+static void execmd_write_oob(struct mtd_info *mtd)
+{
+	struct sh_flctl *flctl = mtd_to_flctl(mtd);
+	int page_addr = flctl->seqin_page_addr;
+	int sector, page_sectors;
+
+	if (flctl->page_size) {
+		sector = 3;
+		page_sectors = 4;
+	} else {
+		sector = 0;
+		page_sectors = 1;
+	}
+
+	set_cmd_regs(mtd, NAND_CMD_PAGEPROG,
+			(NAND_CMD_PAGEPROG << 8) | NAND_CMD_SEQIN);
+
+	for (; sector < page_sectors; sector++) {
+		empty_fifo(flctl);
+		set_addr(mtd, sector * 528 + 512, page_addr);
+		writel(16, FLDTCNTR(flctl));	/* set read size */
+
+		start_translation(flctl);
+		write_fiforeg(flctl, 16, 16 * sector);
+		wait_completion(flctl);
+	}
+}
+
+static void flctl_cmdfunc(struct mtd_info *mtd, unsigned int command,
+			int column, int page_addr)
+{
+	struct sh_flctl *flctl = mtd_to_flctl(mtd);
+	uint32_t read_cmd = 0;
+
+	flctl->read_bytes = 0;
+	if (command != NAND_CMD_PAGEPROG)
+		flctl->index = 0;
+
+	switch (command) {
+	case NAND_CMD_READ1:
+	case NAND_CMD_READ0:
+		if (flctl->hwecc) {
+			/* read page with hwecc */
+			execmd_read_page_sector(mtd, page_addr);
+			break;
+		}
+		empty_fifo(flctl);
+		if (flctl->page_size)
+			set_cmd_regs(mtd, command, (NAND_CMD_READSTART << 8)
+				| command);
+		else
+			set_cmd_regs(mtd, command, command);
+
+		set_addr(mtd, 0, page_addr);
+
+		flctl->read_bytes = mtd->writesize + mtd->oobsize;
+		flctl->index += column;
+		goto read_normal_exit;
+
+	case NAND_CMD_READOOB:
+		if (flctl->hwecc) {
+			/* read page with hwecc */
+			execmd_read_oob(mtd, page_addr);
+			break;
+		}
+
+		empty_fifo(flctl);
+		if (flctl->page_size) {
+			set_cmd_regs(mtd, command, (NAND_CMD_READSTART << 8)
+				| NAND_CMD_READ0);
+			set_addr(mtd, mtd->writesize, page_addr);
+		} else {
+			set_cmd_regs(mtd, command, command);
+			set_addr(mtd, 0, page_addr);
+		}
+		flctl->read_bytes = mtd->oobsize;
+		goto read_normal_exit;
+
+	case NAND_CMD_READID:
+		empty_fifo(flctl);
+		set_cmd_regs(mtd, command, command);
+		set_addr(mtd, 0, 0);
+
+		flctl->read_bytes = 4;
+		writel(flctl->read_bytes, FLDTCNTR(flctl)); /* set read size */
+		start_translation(flctl);
+		read_datareg(flctl, 0);	/* read and end */
+		break;
+
+	case NAND_CMD_ERASE1:
+		flctl->erase1_page_addr = page_addr;
+		break;
+
+	case NAND_CMD_ERASE2:
+		set_cmd_regs(mtd, NAND_CMD_ERASE1,
+			(command << 8) | NAND_CMD_ERASE1);
+		set_addr(mtd, -1, flctl->erase1_page_addr);
+		start_translation(flctl);
+		wait_completion(flctl);
+		break;
+
+	case NAND_CMD_SEQIN:
+		if (!flctl->page_size) {
+			/* output read command */
+			if (column >= mtd->writesize) {
+				column -= mtd->writesize;
+				read_cmd = NAND_CMD_READOOB;
+			} else if (column < 256) {
+				read_cmd = NAND_CMD_READ0;
+			} else {
+				column -= 256;
+				read_cmd = NAND_CMD_READ1;
+			}
+		}
+		flctl->seqin_column = column;
+		flctl->seqin_page_addr = page_addr;
+		flctl->seqin_read_cmd = read_cmd;
+		break;
+
+	case NAND_CMD_PAGEPROG:
+		empty_fifo(flctl);
+		if (!flctl->page_size) {
+			set_cmd_regs(mtd, NAND_CMD_SEQIN,
+					flctl->seqin_read_cmd);
+			set_addr(mtd, -1, -1);
+			writel(0, FLDTCNTR(flctl));	/* set 0 size */
+			start_translation(flctl);
+			wait_completion(flctl);
+		}
+		if (flctl->hwecc) {
+			/* write page with hwecc */
+			if (flctl->seqin_column == mtd->writesize)
+				execmd_write_oob(mtd);
+			else if (!flctl->seqin_column)
+				execmd_write_page_sector(mtd);
+			else
+				printk(KERN_ERR "Invalid address !?\n");
+			break;
+		}
+		set_cmd_regs(mtd, command, (command << 8) | NAND_CMD_SEQIN);
+		set_addr(mtd, flctl->seqin_column, flctl->seqin_page_addr);
+		writel(flctl->index, FLDTCNTR(flctl));	/* set write size */
+		start_translation(flctl);
+		write_fiforeg(flctl, flctl->index, 0);
+		wait_completion(flctl);
+		break;
+
+	case NAND_CMD_STATUS:
+		set_cmd_regs(mtd, command, command);
+		set_addr(mtd, -1, -1);
+
+		flctl->read_bytes = 1;
+		writel(flctl->read_bytes, FLDTCNTR(flctl)); /* set read size */
+		start_translation(flctl);
+		read_datareg(flctl, 0); /* read and end */
+		break;
+
+	case NAND_CMD_RESET:
+		set_cmd_regs(mtd, command, command);
+		set_addr(mtd, -1, -1);
+
+		writel(0, FLDTCNTR(flctl));	/* set 0 size */
+		start_translation(flctl);
+		wait_completion(flctl);
+		break;
+
+	default:
+		break;
+	}
+	return;
+
+read_normal_exit:
+	writel(flctl->read_bytes, FLDTCNTR(flctl));	/* set read size */
+	start_translation(flctl);
+	read_fiforeg(flctl, flctl->read_bytes, 0);
+	wait_completion(flctl);
+	return;
+}
+
+static void flctl_select_chip(struct mtd_info *mtd, int chipnr)
+{
+	struct sh_flctl *flctl = mtd_to_flctl(mtd);
+	uint32_t flcmncr_val = readl(FLCMNCR(flctl));
+
+	switch (chipnr) {
+	case -1:
+		flcmncr_val &= ~CE0_ENABLE;
+		writel(flcmncr_val, FLCMNCR(flctl));
+		break;
+	case 0:
+		flcmncr_val |= CE0_ENABLE;
+		writel(flcmncr_val, FLCMNCR(flctl));
+		break;
+	default:
+		BUG();
+	}
+}
+
+static void flctl_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
+{
+	struct sh_flctl *flctl = mtd_to_flctl(mtd);
+	int i, index = flctl->index;
+
+	for (i = 0; i < len; i++)
+		flctl->done_buff[index + i] = buf[i];
+	flctl->index += len;
+}
+
+static uint8_t flctl_read_byte(struct mtd_info *mtd)
+{
+	struct sh_flctl *flctl = mtd_to_flctl(mtd);
+	int index = flctl->index;
+	uint8_t data;
+
+	data = flctl->done_buff[index];
+	flctl->index++;
+	return data;
+}
+
+static void flctl_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
+{
+	int i;
+
+	for (i = 0; i < len; i++)
+		buf[i] = flctl_read_byte(mtd);
+}
+
+static int flctl_verify_buf(struct mtd_info *mtd, const u_char *buf, int len)
+{
+	int i;
+
+	for (i = 0; i < len; i++)
+		if (buf[i] != flctl_read_byte(mtd))
+			return -EFAULT;
+	return 0;
+}
+
+static void flctl_register_init(struct sh_flctl *flctl, unsigned long val)
+{
+	writel(val, FLCMNCR(flctl));
+}
+
+static int flctl_chip_init_tail(struct mtd_info *mtd)
+{
+	struct sh_flctl *flctl = mtd_to_flctl(mtd);
+	struct nand_chip *chip = &flctl->chip;
+
+	if (mtd->writesize == 512) {
+		flctl->page_size = 0;
+		if (chip->chipsize > (32 << 20)) {
+			/* big than 32MB */
+			flctl->rw_ADRCNT = ADRCNT_4;
+			flctl->erase_ADRCNT = ADRCNT_3;
+		} else if (chip->chipsize > (2 << 16)) {
+			/* big than 128KB */
+			flctl->rw_ADRCNT = ADRCNT_3;
+			flctl->erase_ADRCNT = ADRCNT_2;
+		} else {
+			flctl->rw_ADRCNT = ADRCNT_2;
+			flctl->erase_ADRCNT = ADRCNT_1;
+		}
+	} else {
+		flctl->page_size = 1;
+		if (chip->chipsize > (128 << 20)) {
+			/* big than 128MB */
+			flctl->rw_ADRCNT = ADRCNT2_E;
+			flctl->erase_ADRCNT = ADRCNT_3;
+		} else if (chip->chipsize > (8 << 16)) {
+			/* big than 512KB */
+			flctl->rw_ADRCNT = ADRCNT_4;
+			flctl->erase_ADRCNT = ADRCNT_2;
+		} else {
+			flctl->rw_ADRCNT = ADRCNT_3;
+			flctl->erase_ADRCNT = ADRCNT_1;
+		}
+	}
+
+	if (flctl->hwecc) {
+		if (mtd->writesize == 512) {
+			chip->ecc.layout = &flctl_4secc_oob_16;
+			chip->badblock_pattern = &flctl_4secc_smallpage;
+		} else {
+			chip->ecc.layout = &flctl_4secc_oob_64;
+			chip->badblock_pattern = &flctl_4secc_largepage;
+		}
+
+		chip->ecc.size = 512;
+		chip->ecc.bytes = 10;
+		chip->ecc.read_page = flctl_read_page_hwecc;
+		chip->ecc.write_page = flctl_write_page_hwecc;
+		chip->ecc.mode = NAND_ECC_HW;
+
+		/* 4 symbols ECC enabled */
+		writel(readl(FLCMNCR(flctl)) | _4ECCEN | ECCPOS2 | ECCPOS_02,
+				FLCMNCR(flctl));
+	} else {
+		chip->ecc.mode = NAND_ECC_SOFT;
+	}
+
+	return 0;
+}
+
+static int __init flctl_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+	struct sh_flctl *flctl;
+	struct mtd_info *flctl_mtd;
+	struct nand_chip *nand;
+	struct sh_flctl_platform_data *pdata;
+	int ret;
+
+	pdata = pdev->dev.platform_data;
+	if (pdata == NULL) {
+		printk(KERN_ERR "sh_flctl platform_data not found.\n");
+		return -ENODEV;
+	}
+
+	flctl = kzalloc(sizeof(struct sh_flctl), GFP_KERNEL);
+	if (!flctl) {
+		printk(KERN_ERR "Unable to allocate NAND MTD dev structure.\n");
+		return -ENOMEM;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		printk(KERN_ERR "%s: resource not found.\n", __func__);
+		ret = -ENODEV;
+		goto err;
+	}
+
+	flctl->reg = ioremap(res->start, res->end - res->start + 1);
+	if (flctl->reg == NULL) {
+		printk(KERN_ERR "%s: ioremap error.\n", __func__);
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	platform_set_drvdata(pdev, flctl);
+	flctl_mtd = &flctl->mtd;
+	nand = &flctl->chip;
+	flctl_mtd->priv = nand;
+	flctl->hwecc = pdata->has_hwecc;
+
+	flctl_register_init(flctl, pdata->flcmncr_val);
+
+	nand->options = NAND_NO_AUTOINCR;
+
+	/* Set address of hardware control function */
+	/* 20 us command delay time */
+	nand->chip_delay = 20;
+
+	nand->read_byte = flctl_read_byte;
+	nand->write_buf = flctl_write_buf;
+	nand->read_buf = flctl_read_buf;
+	nand->verify_buf = flctl_verify_buf;
+	nand->select_chip = flctl_select_chip;
+	nand->cmdfunc = flctl_cmdfunc;
+
+	ret = nand_scan_ident(flctl_mtd, 1);
+	if (ret)
+		goto err;
+
+	ret = flctl_chip_init_tail(flctl_mtd);
+	if (ret)
+		goto err;
+
+	ret = nand_scan_tail(flctl_mtd);
+	if (ret)
+		goto err;
+
+	add_mtd_partitions(flctl_mtd, pdata->parts, pdata->nr_parts);
+
+	return 0;
+
+err:
+	kfree(flctl);
+	return ret;
+}
+
+static int __exit flctl_remove(struct platform_device *pdev)
+{
+	struct sh_flctl *flctl = platform_get_drvdata(pdev);
+
+	nand_release(&flctl->mtd);
+	kfree(flctl);
+
+	return 0;
+}
+
+static struct platform_driver flctl_driver = {
+	.probe		= flctl_probe,
+	.remove		= flctl_remove,
+	.driver = {
+		.name	= "sh_flctl",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init flctl_nand_init(void)
+{
+	return platform_driver_register(&flctl_driver);
+}
+
+static void __exit flctl_nand_cleanup(void)
+{
+	platform_driver_unregister(&flctl_driver);
+}
+
+module_init(flctl_nand_init);
+module_exit(flctl_nand_cleanup);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Yoshihiro Shimoda");
+MODULE_DESCRIPTION("SuperH FLCTL driver");
+MODULE_ALIAS("platform:sh_flctl");
-- 
GitLab


From 7d28e0d1e55442d198f7c35626d2c460ac04cab2 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Mon, 20 Oct 2008 09:24:43 +0100
Subject: [PATCH 671/895] [MTD] [NAND] GPIO driver depends on ARM... for now.

Not all architectures provide readsb(). We should probably move to using
ioread8_rep() instead.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index b9eed9925462..1c2e9450d663 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -58,7 +58,7 @@ config MTD_NAND_H1900
 
 config MTD_NAND_GPIO
 	tristate "GPIO NAND Flash driver"
-	depends on GENERIC_GPIO
+	depends on GENERIC_GPIO && ARM
 	help
 	  This enables a GPIO based NAND flash driver.
 
-- 
GitLab


From 8a1a6272057e2ad90ab531a70330165888866e60 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Mon, 20 Oct 2008 09:26:16 +0100
Subject: [PATCH 672/895] Revert "[MTD] m25p80.c code cleanup"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 75d0ee2202b5740e94e913d8a52f91c6557c4c81.

Although it seems ObviouslyCorrectâ„¢, the spi_write() call uses DMA,
while spi_write_then_read() does not. Since our buffer is on the stack,
we must use the latter even though we don't actually want to read
anything back.

Pointed out by David Brownell <david-b@pacbell.net>

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/devices/m25p80.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index 697a3a217837..76a76751da36 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -134,7 +134,7 @@ static inline int write_enable(struct m25p *flash)
 {
 	u8	code = OPCODE_WREN;
 
-	return spi_write(flash->spi, &code, 1);
+	return spi_write_then_read(flash->spi, &code, 1, NULL, 0);
 }
 
 
-- 
GitLab


From e67ef25a35b949561a9bd77693523ec94ab4a278 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Sep 2008 23:50:23 +0200
Subject: [PATCH 673/895] timer_list: print real timer address

The current timer_list output prints the address of the on stack copy
of the active hrtimer instead of the hrtimer itself.

Print the address of the real timer instead.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/timer_list.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index a40e20fd0001..ec9ea6cadd85 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -47,13 +47,14 @@ static void print_name_offset(struct seq_file *m, void *sym)
 }
 
 static void
-print_timer(struct seq_file *m, struct hrtimer *timer, int idx, u64 now)
+print_timer(struct seq_file *m, struct hrtimer *taddr, struct hrtimer *timer,
+	    int idx, u64 now)
 {
 #ifdef CONFIG_TIMER_STATS
 	char tmp[TASK_COMM_LEN + 1];
 #endif
 	SEQ_printf(m, " #%d: ", idx);
-	print_name_offset(m, timer);
+	print_name_offset(m, taddr);
 	SEQ_printf(m, ", ");
 	print_name_offset(m, timer->function);
 	SEQ_printf(m, ", S:%02lx", timer->state);
@@ -99,7 +100,7 @@ next_one:
 		tmp = *timer;
 		spin_unlock_irqrestore(&base->cpu_base->lock, flags);
 
-		print_timer(m, &tmp, i, now);
+		print_timer(m, timer, &tmp, i, now);
 		next++;
 		goto next_one;
 	}
-- 
GitLab


From c5b77a3d3a716a5c61a1999d7f2a78e9c39fd1b0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 29 Sep 2008 17:31:41 +0200
Subject: [PATCH 674/895] timer_list: print cpu number of clockevents device

The per cpu clock events device output of timer_list lacks an
association of the device to the cpu which is annoying when looking at
the output of /proc/timer_list from a 128 way system.

Add the CPU number info and mark the broadcast device in the device
list printout.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/timer_list.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index ec9ea6cadd85..5479c6e7a023 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -184,12 +184,16 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
 static void
-print_tickdevice(struct seq_file *m, struct tick_device *td)
+print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
 {
 	struct clock_event_device *dev = td->evtdev;
 
 	SEQ_printf(m, "\n");
 	SEQ_printf(m, "Tick Device: mode:     %d\n", td->mode);
+	if (cpu < 0)
+		SEQ_printf(m, "Broadcast device\n");
+	else
+		SEQ_printf(m, "Per CPU device: %d\n", cpu);
 
 	SEQ_printf(m, "Clock Event Device: ");
 	if (!dev) {
@@ -223,7 +227,7 @@ static void timer_list_show_tickdevices(struct seq_file *m)
 	int cpu;
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
-	print_tickdevice(m, tick_get_broadcast_device());
+	print_tickdevice(m, tick_get_broadcast_device(), -1);
 	SEQ_printf(m, "tick_broadcast_mask: %08lx\n",
 		   tick_get_broadcast_mask()->bits[0]);
 #ifdef CONFIG_TICK_ONESHOT
@@ -233,7 +237,7 @@ static void timer_list_show_tickdevices(struct seq_file *m)
 	SEQ_printf(m, "\n");
 #endif
 	for_each_online_cpu(cpu)
-		   print_tickdevice(m, tick_get_device(cpu));
+		print_tickdevice(m, tick_get_device(cpu), cpu);
 	SEQ_printf(m, "\n");
 }
 #else
-- 
GitLab


From 870e2a284567714335d125c390366dce882d726f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 29 Sep 2008 17:41:55 +0200
Subject: [PATCH 675/895] timer_list: add base address to clock base

The base address of a (per cpu) clock base is a useful debug info.
Add it and bump the version number of timer_lists.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/timer_list.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 5479c6e7a023..f6426911e35a 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -110,6 +110,7 @@ next_one:
 static void
 print_base(struct seq_file *m, struct hrtimer_clock_base *base, u64 now)
 {
+	SEQ_printf(m, "  .base:       %p\n", base);
 	SEQ_printf(m, "  .index:      %d\n",
 			base->index);
 	SEQ_printf(m, "  .resolution: %Lu nsecs\n",
@@ -249,7 +250,7 @@ static int timer_list_show(struct seq_file *m, void *v)
 	u64 now = ktime_to_ns(ktime_get());
 	int cpu;
 
-	SEQ_printf(m, "Timer List Version: v0.3\n");
+	SEQ_printf(m, "Timer List Version: v0.4\n");
 	SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
 	SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
 
-- 
GitLab


From 041fb574c75a570a0796acd3ed83e4ce0ea920f4 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 20 Oct 2008 03:31:17 -0700
Subject: [PATCH 676/895] netfilter: ctnetlink: remove obsolete NAT dependency
 from Kconfig

Now that ctnetlink doesn't have any NAT module depenencies anymore,
we can also remove them from Kconfig.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 78892cf2b021..25dcef9f2194 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -271,7 +271,6 @@ config NF_CONNTRACK_TFTP
 config NF_CT_NETLINK
 	tristate 'Connection tracking netlink interface'
 	select NETFILTER_NETLINK
-	depends on NF_NAT=n || NF_NAT
 	default m if NETFILTER_ADVANCED=n
 	help
 	  This option enables support for a netlink-based userspace interface
-- 
GitLab


From 10a03a42d140a029bcba531df2897839f3569871 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 20 Oct 2008 03:31:54 -0700
Subject: [PATCH 677/895] netfilter: netns: use NFPROTO_NUMPROTO instead of
 NUMPROTO for tables array

The netfilter families have been decoupled from regular protocol families.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/x_tables.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h
index 0cb63ed2c1fc..b8093971ccb4 100644
--- a/include/net/netns/x_tables.h
+++ b/include/net/netns/x_tables.h
@@ -2,9 +2,9 @@
 #define __NETNS_X_TABLES_H
 
 #include <linux/list.h>
-#include <linux/net.h>
+#include <linux/netfilter.h>
 
 struct netns_xt {
-	struct list_head tables[NPROTO];
+	struct list_head tables[NFPROTO_NUMPROTO];
 };
 #endif
-- 
GitLab


From 6def1eb48101600884ebed56de03041fadc7a985 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 20 Oct 2008 03:32:21 -0700
Subject: [PATCH 678/895] netfilter: xt_iprange: fix range inversion match

Inverted IPv4 v1 and IPv6 v0 matches don't match anything since 2.6.25-rc1!

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_iprange.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c
index 6f62c36948d9..7ac54eab0b00 100644
--- a/net/netfilter/xt_iprange.c
+++ b/net/netfilter/xt_iprange.c
@@ -61,7 +61,7 @@ iprange_mt4(const struct sk_buff *skb, const struct xt_match_param *par)
 	if (info->flags & IPRANGE_SRC) {
 		m  = ntohl(iph->saddr) < ntohl(info->src_min.ip);
 		m |= ntohl(iph->saddr) > ntohl(info->src_max.ip);
-		m ^= info->flags & IPRANGE_SRC_INV;
+		m ^= !!(info->flags & IPRANGE_SRC_INV);
 		if (m) {
 			pr_debug("src IP " NIPQUAD_FMT " NOT in range %s"
 			         NIPQUAD_FMT "-" NIPQUAD_FMT "\n",
@@ -75,7 +75,7 @@ iprange_mt4(const struct sk_buff *skb, const struct xt_match_param *par)
 	if (info->flags & IPRANGE_DST) {
 		m  = ntohl(iph->daddr) < ntohl(info->dst_min.ip);
 		m |= ntohl(iph->daddr) > ntohl(info->dst_max.ip);
-		m ^= info->flags & IPRANGE_DST_INV;
+		m ^= !!(info->flags & IPRANGE_DST_INV);
 		if (m) {
 			pr_debug("dst IP " NIPQUAD_FMT " NOT in range %s"
 			         NIPQUAD_FMT "-" NIPQUAD_FMT "\n",
@@ -114,14 +114,14 @@ iprange_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 	if (info->flags & IPRANGE_SRC) {
 		m  = iprange_ipv6_sub(&iph->saddr, &info->src_min.in6) < 0;
 		m |= iprange_ipv6_sub(&iph->saddr, &info->src_max.in6) > 0;
-		m ^= info->flags & IPRANGE_SRC_INV;
+		m ^= !!(info->flags & IPRANGE_SRC_INV);
 		if (m)
 			return false;
 	}
 	if (info->flags & IPRANGE_DST) {
 		m  = iprange_ipv6_sub(&iph->daddr, &info->dst_min.in6) < 0;
 		m |= iprange_ipv6_sub(&iph->daddr, &info->dst_max.in6) > 0;
-		m ^= info->flags & IPRANGE_DST_INV;
+		m ^= !!(info->flags & IPRANGE_DST_INV);
 		if (m)
 			return false;
 	}
-- 
GitLab


From 311670f3ea90115f2f1840e3e9770ed71e06e6c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Mon, 20 Oct 2008 03:33:24 -0700
Subject: [PATCH 679/895] netfilter: snmp nat leaks memory in case of failure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilpo JÃ¤rvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/nf_nat_snmp_basic.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index ffeaffc3fffe..8303e4b406c0 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -742,6 +742,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
 			*obj = kmalloc(sizeof(struct snmp_object) + len,
 				       GFP_ATOMIC);
 			if (*obj == NULL) {
+				kfree(p);
 				kfree(id);
 				if (net_ratelimit())
 					printk("OOM in bsalg (%d)\n", __LINE__);
-- 
GitLab


From b09eec161b0d416cac0f4758042efdf8f912ce27 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 20 Oct 2008 03:33:49 -0700
Subject: [PATCH 680/895] netfilter: xt_recent: use proc_create_data()

Fixes a crash in recent_seq_start:

BUG: unable to handle kernel NULL pointer dereference at 0000000000000100
IP: [<ffffffffa002119c>] recent_seq_start+0x4c/0x90 [xt_recent]
PGD 17d33c067 PUD 107afe067 PMD 0
Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC
CPU 0
Modules linked in: ipt_LOG xt_recent af_packet iptable_nat nf_nat nf_conntrack_ipv4 nf_conntrack nf_defrag_ipv4 xt_tcpudp iptable_filter ip_tables x_tables ext2 nls_utf8 fuse sr_mod cdrom [last unloaded: ntfs]
Pid: 32373, comm: cat Not tainted 2.6.27-04ab591808565f968d4406f6435090ad671ebdab #6
RIP: 0010:[<ffffffffa002119c>]  [<ffffffffa002119c>] recent_seq_start+0x4c/0x90 [xt_recent]
RSP: 0018:ffff88015fed7e28  EFLAGS: 00010246
...

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_recent.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 4ebd4ca9a991..280c471bcdf4 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -318,15 +318,15 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
 	for (i = 0; i < ip_list_hash_size; i++)
 		INIT_LIST_HEAD(&t->iphash[i]);
 #ifdef CONFIG_PROC_FS
-	t->proc = proc_create(t->name, ip_list_perms, recent_proc_dir,
-		  &recent_mt_fops);
+	t->proc = proc_create_data(t->name, ip_list_perms, recent_proc_dir,
+		  &recent_mt_fops, t);
 	if (t->proc == NULL) {
 		kfree(t);
 		goto out;
 	}
 #ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
-	t->proc_old = proc_create(t->name, ip_list_perms, proc_old_dir,
-		      &recent_old_fops);
+	t->proc_old = proc_create_data(t->name, ip_list_perms, proc_old_dir,
+		      &recent_old_fops, t);
 	if (t->proc_old == NULL) {
 		remove_proc_entry(t->name, proc_old_dir);
 		kfree(t);
@@ -334,11 +334,9 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
 	}
 	t->proc_old->uid   = ip_list_uid;
 	t->proc_old->gid   = ip_list_gid;
-	t->proc_old->data  = t;
 #endif
 	t->proc->uid       = ip_list_uid;
 	t->proc->gid       = ip_list_gid;
-	t->proc->data      = t;
 #endif
 	spin_lock_bh(&recent_lock);
 	list_add_tail(&t->list, &tables);
-- 
GitLab


From 67671841dfb82df7a60c46e6fefe813cf57805ff Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 20 Oct 2008 03:34:27 -0700
Subject: [PATCH 681/895] netfilter: fix compilation error with NAT=n

This patch fixes the compilation of ctnetlink when the NAT support
is not enabled.

/home/benh/kernels/linux-powerpc/net/netfilter/nf_conntrack_netlink.c:819: warning: enum nf_nat_manip_type\u2019 declared inside parameter list
/home/benh/kernels/linux-powerpc/net/netfilter/nf_conntrack_netlink.c:819: warning: its scope is only this definition or declaration, which is probably not what you want

Reported-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Reported by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_netlink.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 2e4ad9671e19..a040d46f85d6 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -813,6 +813,7 @@ out:
 	return err;
 }
 
+#ifdef CONFIG_NF_NAT_NEEDED
 static int
 ctnetlink_parse_nat_setup(struct nf_conn *ct,
 			  enum nf_nat_manip_type manip,
@@ -840,6 +841,7 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct,
 
 	return parse_nat_setup(ct, manip, attr);
 }
+#endif
 
 static int
 ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[])
-- 
GitLab


From fdc9314cbe027281b5440780692105d49b53cf2c Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 20 Oct 2008 03:34:51 -0700
Subject: [PATCH 682/895] netfilter: replace old NF_ARP calls with NFPROTO_ARP

(Supplements: ee999d8b9573df1b547aacdc6d79f86eb79c25cd)

NFPROTO_ARP actually has a different value from NF_ARP, so ensure all
callers use the new value so that packets _do_ get delivered to the
registered hooks.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netfilter.c  | 2 +-
 net/ipv4/arp.c             | 4 ++--
 net/netfilter/xt_NFQUEUE.c | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index a4abed5b4c44..fa5cda4e552a 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -719,7 +719,7 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
 		return NF_ACCEPT;
 	}
 	*d = (struct net_device *)in;
-	NF_HOOK(NF_ARP, NF_ARP_FORWARD, skb, (struct net_device *)in,
+	NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, skb, (struct net_device *)in,
 		(struct net_device *)out, br_nf_forward_finish);
 
 	return NF_STOLEN;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index b043eda60b04..1a9dd66511fc 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -663,7 +663,7 @@ out:
 void arp_xmit(struct sk_buff *skb)
 {
 	/* Send it off, maybe filter it using firewalling first.  */
-	NF_HOOK(NF_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit);
+	NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit);
 }
 
 /*
@@ -928,7 +928,7 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
 
 	memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
 
-	return NF_HOOK(NF_ARP, NF_ARP_IN, skb, dev, NULL, arp_process);
+	return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, skb, dev, NULL, arp_process);
 
 freeskb:
 	kfree_skb(skb);
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 2cc1fff49307..f9977b3311f7 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -48,7 +48,7 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = {
 	},
 	{
 		.name		= "NFQUEUE",
-		.family		= NF_ARP,
+		.family		= NFPROTO_ARP,
 		.target		= nfqueue_tg,
 		.targetsize	= sizeof(struct xt_NFQ_info),
 		.me		= THIS_MODULE,
-- 
GitLab


From 24bdeb4598b9560c8ffecb8ba5cefa01f3a12a54 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Sat, 18 Oct 2008 20:27:27 -0700
Subject: [PATCH 683/895] Fix documentation of sysrq-q

I fell into the trap recently that it only dumps hrtimers instead of
all timers. Fix the documentation.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: torvalds@linux-foundation.org
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 Documentation/sysrq.txt | 3 ++-
 drivers/char/sysrq.c    | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
index 5ce0952aa065..49378a9f2b5f 100644
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -95,7 +95,8 @@ On all -  write a character to /proc/sysrq-trigger.  e.g.:
 
 'p'     - Will dump the current registers and flags to your console.
 
-'q'     - Will dump a list of all running timers.
+'q'     - Will dump a list of all running hrtimers.
+	  WARNING: Does not cover any other timers
 
 'r'     - Turns off keyboard raw mode and sets it to XLATE.
 
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index dce4cc0e6953..d0c0d64ed366 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -168,7 +168,7 @@ static void sysrq_handle_show_timers(int key, struct tty_struct *tty)
 static struct sysrq_key_op sysrq_show_timers_op = {
 	.handler	= sysrq_handle_show_timers,
 	.help_msg	= "show-all-timers(Q)",
-	.action_msg	= "Show Pending Timers",
+	.action_msg	= "Show pending hrtimers (no others)",
 };
 
 static void sysrq_handle_mountro(int key, struct tty_struct *tty)
-- 
GitLab


From 322acf6585f3c4e82ee32a246b0483ca0f6ad3f4 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 20 Oct 2008 12:33:14 +0200
Subject: [PATCH 684/895] fix documentation of sysrq-q really

SysRq-Q also dumps information about the clockevent devices.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 Documentation/sysrq.txt | 4 ++--
 drivers/char/sysrq.c    | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
index 49378a9f2b5f..7b3b069c376e 100644
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -95,8 +95,8 @@ On all -  write a character to /proc/sysrq-trigger.  e.g.:
 
 'p'     - Will dump the current registers and flags to your console.
 
-'q'     - Will dump a list of all running hrtimers.
-	  WARNING: Does not cover any other timers
+'q'     - Will dump per CPU lists of all armed hrtimers (not timer_list timers)
+	  and detailed information about all clockevent devices.
 
 'r'     - Turns off keyboard raw mode and sets it to XLATE.
 
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index d0c0d64ed366..ce0d9da52a8a 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -168,7 +168,7 @@ static void sysrq_handle_show_timers(int key, struct tty_struct *tty)
 static struct sysrq_key_op sysrq_show_timers_op = {
 	.handler	= sysrq_handle_show_timers,
 	.help_msg	= "show-all-timers(Q)",
-	.action_msg	= "Show pending hrtimers (no others)",
+	.action_msg	= "Show clockevent devices & pending hrtimers (no others)",
 };
 
 static void sysrq_handle_mountro(int key, struct tty_struct *tty)
-- 
GitLab


From d014e5f7f817d8a5d65457c416ade9244785f196 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sat, 18 Oct 2008 20:25:41 -0700
Subject: [PATCH 685/895] mailmap: add Mark Brown

A couple of commits have a broken real name - fix them up.

Signed-off-by: Mark Brown <broonie@sirena.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 .mailmap | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.mailmap b/.mailmap
index dfab12f809ed..eba9bf953ef5 100644
--- a/.mailmap
+++ b/.mailmap
@@ -66,6 +66,7 @@ Kenneth W Chen <kenneth.w.chen@intel.com>
 Koushik <raghavendra.koushik@neterion.com>
 Leonid I Ananiev <leonid.i.ananiev@intel.com>
 Linas Vepstas <linas@austin.ibm.com>
+Mark Brown <broonie@sirena.org.uk>
 Matthieu CASTET <castet.matthieu@free.fr>
 Michael Buesch <mb@bu3sch.de>
 Michael Buesch <mbuesch@freenet.de>
-- 
GitLab


From cbb2ed4ac61f3e3d1656db141cfced6ed38861d5 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Sat, 18 Oct 2008 20:25:46 -0700
Subject: [PATCH 686/895] tpm: don't export static functions

Today's linux-next build (powerpc_allyesconfig) failed like this:

drivers/char/tpm/tpm.c:1162: error: __ksymtab_tpm_dev_release causes a section type conflict

Caused by commit 253115b71fa06330bd58afbe01ccaf763a8a0cf1 ("The
tpm_dev_release function is only called for platform devices, not pnp")
which exported a static function.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Rajiv Andrade <srajiv@linux.vnet.ibm.com>
Cc: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/tpm/tpm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c
index e70d13defde4..9c47dc48c9fd 100644
--- a/drivers/char/tpm/tpm.c
+++ b/drivers/char/tpm/tpm.c
@@ -1157,7 +1157,7 @@ EXPORT_SYMBOL_GPL(tpm_dev_vendor_release);
  * Once all references to platform device are down to 0,
  * release all allocated structures.
  */
-static void tpm_dev_release(struct device *dev)
+void tpm_dev_release(struct device *dev)
 {
 	struct tpm_chip *chip = dev_get_drvdata(dev);
 
-- 
GitLab


From 8433ac61acf733dc4b427de5f0891518c21538f9 Mon Sep 17 00:00:00 2001
From: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Date: Sat, 18 Oct 2008 20:25:53 -0700
Subject: [PATCH 687/895] serial_txx9: use %lx for iobase

Fix a warning caused by commit 0c8946d97ae7d2d6691f8290a10faa63453b63f8
(serial: Make uart_port's ioport "unsigned long".)

Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Josip Rodin <joy@entuzijast.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/serial/serial_txx9.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/serial/serial_txx9.c b/drivers/serial/serial_txx9.c
index 8fcb4c5b9a26..7313c2edcb83 100644
--- a/drivers/serial/serial_txx9.c
+++ b/drivers/serial/serial_txx9.c
@@ -1039,7 +1039,7 @@ static int __devinit serial_txx9_probe(struct platform_device *dev)
 		ret = serial_txx9_register_port(&port);
 		if (ret < 0) {
 			dev_err(&dev->dev, "unable to register port at index %d "
-				"(IO%x MEM%llx IRQ%d): %d\n", i,
+				"(IO%lx MEM%llx IRQ%d): %d\n", i,
 				p->iobase, (unsigned long long)p->mapbase,
 				p->irq, ret);
 		}
-- 
GitLab


From 71088785c6bc68fddb450063d57b1bd1c78e0ea1 Mon Sep 17 00:00:00 2001
From: Badari Pulavarty <pbadari@us.ibm.com>
Date: Sat, 18 Oct 2008 20:25:58 -0700
Subject: [PATCH 688/895] mm: cleanup to make remove_memory() arch-neutral

There is nothing architecture specific about remove_memory().
remove_memory() function is common for all architectures which support
hotplug memory remove.  Instead of duplicating it in every architecture,
collapse them into arch neutral function.

[akpm@linux-foundation.org: fix the export]
Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: Gary Hade <garyhade@us.ibm.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/mm/init.c   | 17 -----------------
 arch/powerpc/mm/mem.c | 17 -----------------
 arch/s390/mm/init.c   | 11 -----------
 mm/memory_hotplug.c   | 12 +++++++++++-
 4 files changed, 11 insertions(+), 46 deletions(-)

diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index f482a9098e32..054bcd9439aa 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -700,23 +700,6 @@ int arch_add_memory(int nid, u64 start, u64 size)
 
 	return ret;
 }
-#ifdef CONFIG_MEMORY_HOTREMOVE
-int remove_memory(u64 start, u64 size)
-{
-	unsigned long start_pfn, end_pfn;
-	unsigned long timeout = 120 * HZ;
-	int ret;
-	start_pfn = start >> PAGE_SHIFT;
-	end_pfn = start_pfn + (size >> PAGE_SHIFT);
-	ret = offline_pages(start_pfn, end_pfn, timeout);
-	if (ret)
-		goto out;
-	/* we can free mem_map at this point */
-out:
-	return ret;
-}
-EXPORT_SYMBOL_GPL(remove_memory);
-#endif /* CONFIG_MEMORY_HOTREMOVE */
 #endif
 
 /*
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 98d7bf99533a..b9e1a1da6e52 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -134,23 +134,6 @@ int arch_add_memory(int nid, u64 start, u64 size)
 
 	return __add_pages(zone, start_pfn, nr_pages);
 }
-
-#ifdef CONFIG_MEMORY_HOTREMOVE
-int remove_memory(u64 start, u64 size)
-{
-	unsigned long start_pfn, end_pfn;
-	int ret;
-
-	start_pfn = start >> PAGE_SHIFT;
-	end_pfn = start_pfn + (size >> PAGE_SHIFT);
-	ret = offline_pages(start_pfn, end_pfn, 120 * HZ);
-	if (ret)
-		goto out;
-	/* Arch-specific calls go here - next patch */
-out:
-	return ret;
-}
-#endif /* CONFIG_MEMORY_HOTREMOVE */
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
 /*
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 1169130a97ef..158b0d6d7046 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -189,14 +189,3 @@ int arch_add_memory(int nid, u64 start, u64 size)
 	return rc;
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
-
-#ifdef CONFIG_MEMORY_HOTREMOVE
-int remove_memory(u64 start, u64 size)
-{
-	unsigned long start_pfn, end_pfn;
-
-	start_pfn = PFN_DOWN(start);
-	end_pfn = start_pfn + PFN_DOWN(size);
-	return offline_pages(start_pfn, end_pfn, 120 * HZ);
-}
-#endif /* CONFIG_MEMORY_HOTREMOVE */
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 89fee2dcb039..c299d083d8e2 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -26,6 +26,7 @@
 #include <linux/delay.h>
 #include <linux/migrate.h>
 #include <linux/page-isolation.h>
+#include <linux/pfn.h>
 
 #include <asm/tlbflush.h>
 
@@ -849,10 +850,19 @@ failed_removal:
 
 	return ret;
 }
+
+int remove_memory(u64 start, u64 size)
+{
+	unsigned long start_pfn, end_pfn;
+
+	start_pfn = PFN_DOWN(start);
+	end_pfn = start_pfn + PFN_DOWN(size);
+	return offline_pages(start_pfn, end_pfn, 120 * HZ);
+}
 #else
 int remove_memory(u64 start, u64 size)
 {
 	return -EINVAL;
 }
-EXPORT_SYMBOL_GPL(remove_memory);
 #endif /* CONFIG_MEMORY_HOTREMOVE */
+EXPORT_SYMBOL_GPL(remove_memory);
-- 
GitLab


From 62695a84eb8f2e718bf4dfb21700afaa7a08e0ea Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Sat, 18 Oct 2008 20:26:09 -0700
Subject: [PATCH 689/895] vmscan: move isolate_lru_page() to vmscan.c

On large memory systems, the VM can spend way too much time scanning
through pages that it cannot (or should not) evict from memory.  Not only
does it use up CPU time, but it also provokes lock contention and can
leave large systems under memory presure in a catatonic state.

This patch series improves VM scalability by:

1) putting filesystem backed, swap backed and unevictable pages
   onto their own LRUs, so the system only scans the pages that it
   can/should evict from memory

2) switching to two handed clock replacement for the anonymous LRUs,
   so the number of pages that need to be scanned when the system
   starts swapping is bound to a reasonable number

3) keeping unevictable pages off the LRU completely, so the
   VM does not waste CPU time scanning them. ramfs, ramdisk,
   SHM_LOCKED shared memory segments and mlock()ed VMA pages
   are keept on the unevictable list.

This patch:

isolate_lru_page logically belongs to be in vmscan.c than migrate.c.

It is tough, because we don't need that function without memory migration
so there is a valid argument to have it in migrate.c.  However a
subsequent patch needs to make use of it in the core mm, so we can happily
move it to vmscan.c.

Also, make the function a little more generic by not requiring that it
adds an isolated page to a given list.  Callers can do that.

	Note that we now have '__isolate_lru_page()', that does
	something quite different, visible outside of vmscan.c
	for use with memory controller.  Methinks we need to
	rationalize these names/purposes.	--lts

[akpm@linux-foundation.org: fix mm/memory_hotplug.c build]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/migrate.h |  3 ---
 mm/internal.h           |  2 ++
 mm/memory_hotplug.c     |  3 ++-
 mm/mempolicy.c          |  9 +++++++--
 mm/migrate.c            | 34 +++----------------------------
 mm/vmscan.c             | 45 +++++++++++++++++++++++++++++++++++++++++
 6 files changed, 59 insertions(+), 37 deletions(-)

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 03aea612d284..3f34005068d4 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -7,7 +7,6 @@
 typedef struct page *new_page_t(struct page *, unsigned long private, int **);
 
 #ifdef CONFIG_MIGRATION
-extern int isolate_lru_page(struct page *p, struct list_head *pagelist);
 extern int putback_lru_pages(struct list_head *l);
 extern int migrate_page(struct address_space *,
 			struct page *, struct page *);
@@ -21,8 +20,6 @@ extern int migrate_vmas(struct mm_struct *mm,
 		const nodemask_t *from, const nodemask_t *to,
 		unsigned long flags);
 #else
-static inline int isolate_lru_page(struct page *p, struct list_head *list)
-					{ return -ENOSYS; }
 static inline int putback_lru_pages(struct list_head *l) { return 0; }
 static inline int migrate_pages(struct list_head *l, new_page_t x,
 		unsigned long private) { return -ENOSYS; }
diff --git a/mm/internal.h b/mm/internal.h
index 1f43f7416972..4e8e78b978b5 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -39,6 +39,8 @@ static inline void __put_page(struct page *page)
 	atomic_dec(&page->_count);
 }
 
+extern int isolate_lru_page(struct page *page);
+
 extern void __free_pages_bootmem(struct page *page, unsigned int order);
 
 /*
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index c299d083d8e2..3b4975815141 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -658,8 +658,9 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
 		 * We can skip free pages. And we can only deal with pages on
 		 * LRU.
 		 */
-		ret = isolate_lru_page(page, &source);
+		ret = isolate_lru_page(page);
 		if (!ret) { /* Success */
+			list_add_tail(&page->lru, &source);
 			move_pages--;
 		} else {
 			/* Becasue we don't have big zone->lock. we should
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 83369058ec13..71b47491487d 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -93,6 +93,8 @@
 #include <asm/tlbflush.h>
 #include <asm/uaccess.h>
 
+#include "internal.h"
+
 /* Internal flags */
 #define MPOL_MF_DISCONTIG_OK (MPOL_MF_INTERNAL << 0)	/* Skip checks for continuous vmas */
 #define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1)		/* Invert check for nodemask */
@@ -762,8 +764,11 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist,
 	/*
 	 * Avoid migrating a page that is shared with others.
 	 */
-	if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(page) == 1)
-		isolate_lru_page(page, pagelist);
+	if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(page) == 1) {
+		if (!isolate_lru_page(page)) {
+			list_add_tail(&page->lru, pagelist);
+		}
+	}
 }
 
 static struct page *new_node_page(struct page *page, unsigned long node, int **x)
diff --git a/mm/migrate.c b/mm/migrate.c
index 2a80136b23bb..da73742e52a5 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -36,36 +36,6 @@
 
 #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
 
-/*
- * Isolate one page from the LRU lists. If successful put it onto
- * the indicated list with elevated page count.
- *
- * Result:
- *  -EBUSY: page not on LRU list
- *  0: page removed from LRU list and added to the specified list.
- */
-int isolate_lru_page(struct page *page, struct list_head *pagelist)
-{
-	int ret = -EBUSY;
-
-	if (PageLRU(page)) {
-		struct zone *zone = page_zone(page);
-
-		spin_lock_irq(&zone->lru_lock);
-		if (PageLRU(page) && get_page_unless_zero(page)) {
-			ret = 0;
-			ClearPageLRU(page);
-			if (PageActive(page))
-				del_page_from_active_list(zone, page);
-			else
-				del_page_from_inactive_list(zone, page);
-			list_add_tail(&page->lru, pagelist);
-		}
-		spin_unlock_irq(&zone->lru_lock);
-	}
-	return ret;
-}
-
 /*
  * migrate_prep() needs to be called before we start compiling a list of pages
  * to be migrated using isolate_lru_page().
@@ -914,7 +884,9 @@ static int do_move_pages(struct mm_struct *mm, struct page_to_node *pm,
 				!migrate_all)
 			goto put_and_set;
 
-		err = isolate_lru_page(page, &pagelist);
+		err = isolate_lru_page(page);
+		if (!err)
+			list_add_tail(&page->lru, &pagelist);
 put_and_set:
 		/*
 		 * Either remove the duplicate refcount from
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1ff1a58e7c10..1fd4912a596c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -844,6 +844,51 @@ static unsigned long clear_active_flags(struct list_head *page_list)
 	return nr_active;
 }
 
+/**
+ * isolate_lru_page - tries to isolate a page from its LRU list
+ * @page: page to isolate from its LRU list
+ *
+ * Isolates a @page from an LRU list, clears PageLRU and adjusts the
+ * vmstat statistic corresponding to whatever LRU list the page was on.
+ *
+ * Returns 0 if the page was removed from an LRU list.
+ * Returns -EBUSY if the page was not on an LRU list.
+ *
+ * The returned page will have PageLRU() cleared.  If it was found on
+ * the active list, it will have PageActive set.  That flag may need
+ * to be cleared by the caller before letting the page go.
+ *
+ * The vmstat statistic corresponding to the list on which the page was
+ * found will be decremented.
+ *
+ * Restrictions:
+ * (1) Must be called with an elevated refcount on the page. This is a
+ *     fundamentnal difference from isolate_lru_pages (which is called
+ *     without a stable reference).
+ * (2) the lru_lock must not be held.
+ * (3) interrupts must be enabled.
+ */
+int isolate_lru_page(struct page *page)
+{
+	int ret = -EBUSY;
+
+	if (PageLRU(page)) {
+		struct zone *zone = page_zone(page);
+
+		spin_lock_irq(&zone->lru_lock);
+		if (PageLRU(page) && get_page_unless_zero(page)) {
+			ret = 0;
+			ClearPageLRU(page);
+			if (PageActive(page))
+				del_page_from_active_list(zone, page);
+			else
+				del_page_from_inactive_list(zone, page);
+		}
+		spin_unlock_irq(&zone->lru_lock);
+	}
+	return ret;
+}
+
 /*
  * shrink_inactive_list() is a helper for shrink_zone().  It returns the number
  * of reclaimed pages
-- 
GitLab


From b69408e88bd86b98feb7b9a38fd865e1ddb29827 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux-foundation.org>
Date: Sat, 18 Oct 2008 20:26:14 -0700
Subject: [PATCH 690/895] vmscan: Use an indexed array for LRU variables

Currently we are defining explicit variables for the inactive and active
list.  An indexed array can be more generic and avoid repeating similar
code in several places in the reclaim code.

We are saving a few bytes in terms of code size:

Before:

   text    data     bss     dec     hex filename
4097753  573120 4092484 8763357  85b7dd vmlinux

After:

   text    data     bss     dec     hex filename
4097729  573120 4092484 8763333  85b7c5 vmlinux

Having an easy way to add new lru lists may ease future work on the
reclaim code.

Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  17 ++----
 include/linux/mm_inline.h  |  49 +++++++++++----
 include/linux/mmzone.h     |  26 ++++++--
 mm/memcontrol.c            | 115 +++++++++++++----------------------
 mm/page_alloc.c            |   9 +--
 mm/swap.c                  |   2 +-
 mm/vmscan.c                | 120 ++++++++++++++++++-------------------
 mm/vmstat.c                |   3 +-
 8 files changed, 171 insertions(+), 170 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index fdf3967e1397..a6ac0d491fe6 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -69,10 +69,8 @@ extern void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem,
 extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
 							int priority);
 
-extern long mem_cgroup_calc_reclaim_active(struct mem_cgroup *mem,
-				struct zone *zone, int priority);
-extern long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem,
-				struct zone *zone, int priority);
+extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone,
+					int priority, enum lru_list lru);
 
 #else /* CONFIG_CGROUP_MEM_RES_CTLR */
 static inline void page_reset_bad_cgroup(struct page *page)
@@ -159,14 +157,9 @@ static inline void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
 {
 }
 
-static inline long mem_cgroup_calc_reclaim_active(struct mem_cgroup *mem,
-					struct zone *zone, int priority)
-{
-	return 0;
-}
-
-static inline long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem,
-					struct zone *zone, int priority)
+static inline long mem_cgroup_calc_reclaim(struct mem_cgroup *mem,
+					struct zone *zone, int priority,
+					enum lru_list lru)
 {
 	return 0;
 }
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 895bc4e93039..2704729777ef 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -1,40 +1,67 @@
+static inline void
+add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l)
+{
+	list_add(&page->lru, &zone->lru[l].list);
+	__inc_zone_state(zone, NR_LRU_BASE + l);
+}
+
+static inline void
+del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l)
+{
+	list_del(&page->lru);
+	__dec_zone_state(zone, NR_LRU_BASE + l);
+}
+
 static inline void
 add_page_to_active_list(struct zone *zone, struct page *page)
 {
-	list_add(&page->lru, &zone->active_list);
-	__inc_zone_state(zone, NR_ACTIVE);
+	add_page_to_lru_list(zone, page, LRU_ACTIVE);
 }
 
 static inline void
 add_page_to_inactive_list(struct zone *zone, struct page *page)
 {
-	list_add(&page->lru, &zone->inactive_list);
-	__inc_zone_state(zone, NR_INACTIVE);
+	add_page_to_lru_list(zone, page, LRU_INACTIVE);
 }
 
 static inline void
 del_page_from_active_list(struct zone *zone, struct page *page)
 {
-	list_del(&page->lru);
-	__dec_zone_state(zone, NR_ACTIVE);
+	del_page_from_lru_list(zone, page, LRU_ACTIVE);
 }
 
 static inline void
 del_page_from_inactive_list(struct zone *zone, struct page *page)
 {
-	list_del(&page->lru);
-	__dec_zone_state(zone, NR_INACTIVE);
+	del_page_from_lru_list(zone, page, LRU_INACTIVE);
 }
 
 static inline void
 del_page_from_lru(struct zone *zone, struct page *page)
 {
+	enum lru_list l = LRU_INACTIVE;
+
 	list_del(&page->lru);
 	if (PageActive(page)) {
 		__ClearPageActive(page);
-		__dec_zone_state(zone, NR_ACTIVE);
-	} else {
-		__dec_zone_state(zone, NR_INACTIVE);
+		l = LRU_ACTIVE;
 	}
+	__dec_zone_state(zone, NR_LRU_BASE + l);
 }
 
+/**
+ * page_lru - which LRU list should a page be on?
+ * @page: the page to test
+ *
+ * Returns the LRU list a page should be on, as an index
+ * into the array of LRU lists.
+ */
+static inline enum lru_list page_lru(struct page *page)
+{
+	enum lru_list lru = LRU_BASE;
+
+	if (PageActive(page))
+		lru += LRU_ACTIVE;
+
+	return lru;
+}
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 428328a05fa1..156e18f3919b 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -81,8 +81,9 @@ struct zone_padding {
 enum zone_stat_item {
 	/* First 128 byte cacheline (assuming 64 bit words) */
 	NR_FREE_PAGES,
-	NR_INACTIVE,
-	NR_ACTIVE,
+	NR_LRU_BASE,
+	NR_INACTIVE = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */
+	NR_ACTIVE,	/*  "     "     "   "       "         */
 	NR_ANON_PAGES,	/* Mapped anonymous pages */
 	NR_FILE_MAPPED,	/* pagecache pages mapped into pagetables.
 			   only modified from process context */
@@ -107,6 +108,19 @@ enum zone_stat_item {
 #endif
 	NR_VM_ZONE_STAT_ITEMS };
 
+enum lru_list {
+	LRU_BASE,
+	LRU_INACTIVE=LRU_BASE,	/* must match order of NR_[IN]ACTIVE */
+	LRU_ACTIVE,		/*  "     "     "   "       "        */
+	NR_LRU_LISTS };
+
+#define for_each_lru(l) for (l = 0; l < NR_LRU_LISTS; l++)
+
+static inline int is_active_lru(enum lru_list l)
+{
+	return (l == LRU_ACTIVE);
+}
+
 struct per_cpu_pages {
 	int count;		/* number of pages in the list */
 	int high;		/* high watermark, emptying needed */
@@ -251,10 +265,10 @@ struct zone {
 
 	/* Fields commonly accessed by the page reclaim scanner */
 	spinlock_t		lru_lock;	
-	struct list_head	active_list;
-	struct list_head	inactive_list;
-	unsigned long		nr_scan_active;
-	unsigned long		nr_scan_inactive;
+	struct {
+		struct list_head list;
+		unsigned long nr_scan;
+	} lru[NR_LRU_LISTS];
 	unsigned long		pages_scanned;	   /* since last reclaim */
 	unsigned long		flags;		   /* zone flags, see below */
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 36896f3eb7f5..c0cbd7790c51 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -32,6 +32,7 @@
 #include <linux/fs.h>
 #include <linux/seq_file.h>
 #include <linux/vmalloc.h>
+#include <linux/mm_inline.h>
 
 #include <asm/uaccess.h>
 
@@ -85,22 +86,13 @@ static s64 mem_cgroup_read_stat(struct mem_cgroup_stat *stat,
 /*
  * per-zone information in memory controller.
  */
-
-enum mem_cgroup_zstat_index {
-	MEM_CGROUP_ZSTAT_ACTIVE,
-	MEM_CGROUP_ZSTAT_INACTIVE,
-
-	NR_MEM_CGROUP_ZSTAT,
-};
-
 struct mem_cgroup_per_zone {
 	/*
 	 * spin_lock to protect the per cgroup LRU
 	 */
 	spinlock_t		lru_lock;
-	struct list_head	active_list;
-	struct list_head	inactive_list;
-	unsigned long count[NR_MEM_CGROUP_ZSTAT];
+	struct list_head	lists[NR_LRU_LISTS];
+	unsigned long		count[NR_LRU_LISTS];
 };
 /* Macro for accessing counter */
 #define MEM_CGROUP_ZSTAT(mz, idx)	((mz)->count[(idx)])
@@ -227,7 +219,7 @@ page_cgroup_zoneinfo(struct page_cgroup *pc)
 }
 
 static unsigned long mem_cgroup_get_all_zonestat(struct mem_cgroup *mem,
-					enum mem_cgroup_zstat_index idx)
+					enum lru_list idx)
 {
 	int nid, zid;
 	struct mem_cgroup_per_zone *mz;
@@ -297,11 +289,9 @@ static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz,
 			struct page_cgroup *pc)
 {
 	int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
+	int lru = !!from;
 
-	if (from)
-		MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) -= 1;
-	else
-		MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1;
+	MEM_CGROUP_ZSTAT(mz, lru) -= 1;
 
 	mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, false);
 	list_del(&pc->lru);
@@ -310,37 +300,35 @@ static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz,
 static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz,
 				struct page_cgroup *pc)
 {
-	int to = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
+	int lru = LRU_INACTIVE;
+
+	if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE)
+		lru += LRU_ACTIVE;
+
+	MEM_CGROUP_ZSTAT(mz, lru) += 1;
+	list_add(&pc->lru, &mz->lists[lru]);
 
-	if (!to) {
-		MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) += 1;
-		list_add(&pc->lru, &mz->inactive_list);
-	} else {
-		MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) += 1;
-		list_add(&pc->lru, &mz->active_list);
-	}
 	mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, true);
 }
 
 static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
 {
-	int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
 	struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc);
+	int lru = LRU_INACTIVE;
 
-	if (from)
-		MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) -= 1;
-	else
-		MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1;
+	if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE)
+		lru += LRU_ACTIVE;
 
-	if (active) {
-		MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) += 1;
+	MEM_CGROUP_ZSTAT(mz, lru) -= 1;
+
+	if (active)
 		pc->flags |= PAGE_CGROUP_FLAG_ACTIVE;
-		list_move(&pc->lru, &mz->active_list);
-	} else {
-		MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) += 1;
+	else
 		pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE;
-		list_move(&pc->lru, &mz->inactive_list);
-	}
+
+	lru = !!active;
+	MEM_CGROUP_ZSTAT(mz, lru) += 1;
+	list_move(&pc->lru, &mz->lists[lru]);
 }
 
 int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
@@ -412,8 +400,8 @@ long mem_cgroup_reclaim_imbalance(struct mem_cgroup *mem)
 {
 	unsigned long active, inactive;
 	/* active and inactive are the number of pages. 'long' is ok.*/
-	active = mem_cgroup_get_all_zonestat(mem, MEM_CGROUP_ZSTAT_ACTIVE);
-	inactive = mem_cgroup_get_all_zonestat(mem, MEM_CGROUP_ZSTAT_INACTIVE);
+	active = mem_cgroup_get_all_zonestat(mem, LRU_ACTIVE);
+	inactive = mem_cgroup_get_all_zonestat(mem, LRU_INACTIVE);
 	return (long) (active / (inactive + 1));
 }
 
@@ -444,28 +432,17 @@ void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, int priority)
  * (see include/linux/mmzone.h)
  */
 
-long mem_cgroup_calc_reclaim_active(struct mem_cgroup *mem,
-				   struct zone *zone, int priority)
+long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone,
+					int priority, enum lru_list lru)
 {
-	long nr_active;
+	long nr_pages;
 	int nid = zone->zone_pgdat->node_id;
 	int zid = zone_idx(zone);
 	struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(mem, nid, zid);
 
-	nr_active = MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE);
-	return (nr_active >> priority);
-}
-
-long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem,
-					struct zone *zone, int priority)
-{
-	long nr_inactive;
-	int nid = zone->zone_pgdat->node_id;
-	int zid = zone_idx(zone);
-	struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(mem, nid, zid);
+	nr_pages = MEM_CGROUP_ZSTAT(mz, lru);
 
-	nr_inactive = MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE);
-	return (nr_inactive >> priority);
+	return (nr_pages >> priority);
 }
 
 unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
@@ -484,14 +461,11 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 	int nid = z->zone_pgdat->node_id;
 	int zid = zone_idx(z);
 	struct mem_cgroup_per_zone *mz;
+	int lru = !!active;
 
 	BUG_ON(!mem_cont);
 	mz = mem_cgroup_zoneinfo(mem_cont, nid, zid);
-	if (active)
-		src = &mz->active_list;
-	else
-		src = &mz->inactive_list;
-
+	src = &mz->lists[lru];
 
 	spin_lock(&mz->lru_lock);
 	scan = 0;
@@ -863,7 +837,7 @@ int mem_cgroup_resize_limit(struct mem_cgroup *memcg, unsigned long long val)
 #define FORCE_UNCHARGE_BATCH	(128)
 static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
 			    struct mem_cgroup_per_zone *mz,
-			    int active)
+			    enum lru_list lru)
 {
 	struct page_cgroup *pc;
 	struct page *page;
@@ -871,10 +845,7 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
 	unsigned long flags;
 	struct list_head *list;
 
-	if (active)
-		list = &mz->active_list;
-	else
-		list = &mz->inactive_list;
+	list = &mz->lists[lru];
 
 	spin_lock_irqsave(&mz->lru_lock, flags);
 	while (!list_empty(list)) {
@@ -922,11 +893,10 @@ static int mem_cgroup_force_empty(struct mem_cgroup *mem)
 		for_each_node_state(node, N_POSSIBLE)
 			for (zid = 0; zid < MAX_NR_ZONES; zid++) {
 				struct mem_cgroup_per_zone *mz;
+				enum lru_list l;
 				mz = mem_cgroup_zoneinfo(mem, node, zid);
-				/* drop all page_cgroup in active_list */
-				mem_cgroup_force_empty_list(mem, mz, 1);
-				/* drop all page_cgroup in inactive_list */
-				mem_cgroup_force_empty_list(mem, mz, 0);
+				for_each_lru(l)
+					mem_cgroup_force_empty_list(mem, mz, l);
 			}
 	}
 	ret = 0;
@@ -1015,9 +985,9 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
 		unsigned long active, inactive;
 
 		inactive = mem_cgroup_get_all_zonestat(mem_cont,
-						MEM_CGROUP_ZSTAT_INACTIVE);
+						LRU_INACTIVE);
 		active = mem_cgroup_get_all_zonestat(mem_cont,
-						MEM_CGROUP_ZSTAT_ACTIVE);
+						LRU_ACTIVE);
 		cb->fill(cb, "active", (active) * PAGE_SIZE);
 		cb->fill(cb, "inactive", (inactive) * PAGE_SIZE);
 	}
@@ -1062,6 +1032,7 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
 {
 	struct mem_cgroup_per_node *pn;
 	struct mem_cgroup_per_zone *mz;
+	enum lru_list l;
 	int zone, tmp = node;
 	/*
 	 * This routine is called against possible nodes.
@@ -1082,9 +1053,9 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
 
 	for (zone = 0; zone < MAX_NR_ZONES; zone++) {
 		mz = &pn->zoneinfo[zone];
-		INIT_LIST_HEAD(&mz->active_list);
-		INIT_LIST_HEAD(&mz->inactive_list);
 		spin_lock_init(&mz->lru_lock);
+		for_each_lru(l)
+			INIT_LIST_HEAD(&mz->lists[l]);
 	}
 	return 0;
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9eb9eb928285..ee7a96ef40dc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3414,6 +3414,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
 	for (j = 0; j < MAX_NR_ZONES; j++) {
 		struct zone *zone = pgdat->node_zones + j;
 		unsigned long size, realsize, memmap_pages;
+		enum lru_list l;
 
 		size = zone_spanned_pages_in_node(nid, j, zones_size);
 		realsize = size - zone_absent_pages_in_node(nid, j,
@@ -3465,10 +3466,10 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
 		zone->prev_priority = DEF_PRIORITY;
 
 		zone_pcp_init(zone);
-		INIT_LIST_HEAD(&zone->active_list);
-		INIT_LIST_HEAD(&zone->inactive_list);
-		zone->nr_scan_active = 0;
-		zone->nr_scan_inactive = 0;
+		for_each_lru(l) {
+			INIT_LIST_HEAD(&zone->lru[l].list);
+			zone->lru[l].nr_scan = 0;
+		}
 		zap_zone_vm_stats(zone);
 		zone->flags = 0;
 		if (!size)
diff --git a/mm/swap.c b/mm/swap.c
index 9e0cb3118079..82c2b3a76f94 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -117,7 +117,7 @@ static void pagevec_move_tail(struct pagevec *pvec)
 			spin_lock(&zone->lru_lock);
 		}
 		if (PageLRU(page) && !PageActive(page)) {
-			list_move_tail(&page->lru, &zone->inactive_list);
+			list_move_tail(&page->lru, &zone->lru[LRU_INACTIVE].list);
 			pgmoved++;
 		}
 	}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1fd4912a596c..46fdaa546b8d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -819,10 +819,10 @@ static unsigned long isolate_pages_global(unsigned long nr,
 					int active)
 {
 	if (active)
-		return isolate_lru_pages(nr, &z->active_list, dst,
+		return isolate_lru_pages(nr, &z->lru[LRU_ACTIVE].list, dst,
 						scanned, order, mode);
 	else
-		return isolate_lru_pages(nr, &z->inactive_list, dst,
+		return isolate_lru_pages(nr, &z->lru[LRU_INACTIVE].list, dst,
 						scanned, order, mode);
 }
 
@@ -973,10 +973,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
 			VM_BUG_ON(PageLRU(page));
 			SetPageLRU(page);
 			list_del(&page->lru);
-			if (PageActive(page))
-				add_page_to_active_list(zone, page);
-			else
-				add_page_to_inactive_list(zone, page);
+			add_page_to_lru_list(zone, page, page_lru(page));
 			if (!pagevec_add(&pvec, page)) {
 				spin_unlock_irq(&zone->lru_lock);
 				__pagevec_release(&pvec);
@@ -1144,8 +1141,8 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 	int pgdeactivate = 0;
 	unsigned long pgscanned;
 	LIST_HEAD(l_hold);	/* The pages which were snipped off */
-	LIST_HEAD(l_inactive);	/* Pages to go onto the inactive_list */
-	LIST_HEAD(l_active);	/* Pages to go onto the active_list */
+	LIST_HEAD(l_active);
+	LIST_HEAD(l_inactive);
 	struct page *page;
 	struct pagevec pvec;
 	int reclaim_mapped = 0;
@@ -1194,7 +1191,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 		VM_BUG_ON(!PageActive(page));
 		ClearPageActive(page);
 
-		list_move(&page->lru, &zone->inactive_list);
+		list_move(&page->lru, &zone->lru[LRU_INACTIVE].list);
 		mem_cgroup_move_lists(page, false);
 		pgmoved++;
 		if (!pagevec_add(&pvec, page)) {
@@ -1224,7 +1221,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 		SetPageLRU(page);
 		VM_BUG_ON(!PageActive(page));
 
-		list_move(&page->lru, &zone->active_list);
+		list_move(&page->lru, &zone->lru[LRU_ACTIVE].list);
 		mem_cgroup_move_lists(page, true);
 		pgmoved++;
 		if (!pagevec_add(&pvec, page)) {
@@ -1244,65 +1241,64 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 	pagevec_release(&pvec);
 }
 
+static unsigned long shrink_list(enum lru_list l, unsigned long nr_to_scan,
+	struct zone *zone, struct scan_control *sc, int priority)
+{
+	if (l == LRU_ACTIVE) {
+		shrink_active_list(nr_to_scan, zone, sc, priority);
+		return 0;
+	}
+	return shrink_inactive_list(nr_to_scan, zone, sc);
+}
+
 /*
  * This is a basic per-zone page freer.  Used by both kswapd and direct reclaim.
  */
 static unsigned long shrink_zone(int priority, struct zone *zone,
 				struct scan_control *sc)
 {
-	unsigned long nr_active;
-	unsigned long nr_inactive;
+	unsigned long nr[NR_LRU_LISTS];
 	unsigned long nr_to_scan;
 	unsigned long nr_reclaimed = 0;
+	enum lru_list l;
 
 	if (scan_global_lru(sc)) {
 		/*
 		 * Add one to nr_to_scan just to make sure that the kernel
 		 * will slowly sift through the active list.
 		 */
-		zone->nr_scan_active +=
-			(zone_page_state(zone, NR_ACTIVE) >> priority) + 1;
-		nr_active = zone->nr_scan_active;
-		zone->nr_scan_inactive +=
-			(zone_page_state(zone, NR_INACTIVE) >> priority) + 1;
-		nr_inactive = zone->nr_scan_inactive;
-		if (nr_inactive >= sc->swap_cluster_max)
-			zone->nr_scan_inactive = 0;
-		else
-			nr_inactive = 0;
-
-		if (nr_active >= sc->swap_cluster_max)
-			zone->nr_scan_active = 0;
-		else
-			nr_active = 0;
+		for_each_lru(l) {
+			zone->lru[l].nr_scan += (zone_page_state(zone,
+					NR_LRU_BASE + l)  >> priority) + 1;
+			nr[l] = zone->lru[l].nr_scan;
+			if (nr[l] >= sc->swap_cluster_max)
+				zone->lru[l].nr_scan = 0;
+			else
+				nr[l] = 0;
+		}
 	} else {
 		/*
 		 * This reclaim occurs not because zone memory shortage but
 		 * because memory controller hits its limit.
 		 * Then, don't modify zone reclaim related data.
 		 */
-		nr_active = mem_cgroup_calc_reclaim_active(sc->mem_cgroup,
-					zone, priority);
+		nr[LRU_ACTIVE] = mem_cgroup_calc_reclaim(sc->mem_cgroup,
+					zone, priority, LRU_ACTIVE);
 
-		nr_inactive = mem_cgroup_calc_reclaim_inactive(sc->mem_cgroup,
-					zone, priority);
+		nr[LRU_INACTIVE] = mem_cgroup_calc_reclaim(sc->mem_cgroup,
+					zone, priority, LRU_INACTIVE);
 	}
 
-
-	while (nr_active || nr_inactive) {
-		if (nr_active) {
-			nr_to_scan = min(nr_active,
+	while (nr[LRU_ACTIVE] || nr[LRU_INACTIVE]) {
+		for_each_lru(l) {
+			if (nr[l]) {
+				nr_to_scan = min(nr[l],
 					(unsigned long)sc->swap_cluster_max);
-			nr_active -= nr_to_scan;
-			shrink_active_list(nr_to_scan, zone, sc, priority);
-		}
+				nr[l] -= nr_to_scan;
 
-		if (nr_inactive) {
-			nr_to_scan = min(nr_inactive,
-					(unsigned long)sc->swap_cluster_max);
-			nr_inactive -= nr_to_scan;
-			nr_reclaimed += shrink_inactive_list(nr_to_scan, zone,
-								sc);
+				nr_reclaimed += shrink_list(l, nr_to_scan,
+							zone, sc, priority);
+			}
 		}
 	}
 
@@ -1819,6 +1815,7 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
 {
 	struct zone *zone;
 	unsigned long nr_to_scan, ret = 0;
+	enum lru_list l;
 
 	for_each_zone(zone) {
 
@@ -1828,28 +1825,25 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
 		if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY)
 			continue;
 
-		/* For pass = 0 we don't shrink the active list */
-		if (pass > 0) {
-			zone->nr_scan_active +=
-				(zone_page_state(zone, NR_ACTIVE) >> prio) + 1;
-			if (zone->nr_scan_active >= nr_pages || pass > 3) {
-				zone->nr_scan_active = 0;
+		for_each_lru(l) {
+			/* For pass = 0 we don't shrink the active list */
+			if (pass == 0 && l == LRU_ACTIVE)
+				continue;
+
+			zone->lru[l].nr_scan +=
+				(zone_page_state(zone, NR_LRU_BASE + l)
+								>> prio) + 1;
+			if (zone->lru[l].nr_scan >= nr_pages || pass > 3) {
+				zone->lru[l].nr_scan = 0;
 				nr_to_scan = min(nr_pages,
-					zone_page_state(zone, NR_ACTIVE));
-				shrink_active_list(nr_to_scan, zone, sc, prio);
+					zone_page_state(zone,
+							NR_LRU_BASE + l));
+				ret += shrink_list(l, nr_to_scan, zone,
+								sc, prio);
+				if (ret >= nr_pages)
+					return ret;
 			}
 		}
-
-		zone->nr_scan_inactive +=
-			(zone_page_state(zone, NR_INACTIVE) >> prio) + 1;
-		if (zone->nr_scan_inactive >= nr_pages || pass > 3) {
-			zone->nr_scan_inactive = 0;
-			nr_to_scan = min(nr_pages,
-				zone_page_state(zone, NR_INACTIVE));
-			ret += shrink_inactive_list(nr_to_scan, zone, sc);
-			if (ret >= nr_pages)
-				return ret;
-		}
 	}
 
 	return ret;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index d7826af2fb07..52c0335c1b71 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -696,7 +696,8 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 		   zone->pages_low,
 		   zone->pages_high,
 		   zone->pages_scanned,
-		   zone->nr_scan_active, zone->nr_scan_inactive,
+		   zone->lru[LRU_ACTIVE].nr_scan,
+		   zone->lru[LRU_INACTIVE].nr_scan,
 		   zone->spanned_pages,
 		   zone->present_pages);
 
-- 
GitLab


From f04e9ebbe4909f9a41efd55149bc353299f4e83b Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Sat, 18 Oct 2008 20:26:19 -0700
Subject: [PATCH 691/895] swap: use an array for the LRU pagevecs

Turn the pagevecs into an array just like the LRUs.  This significantly
cleans up the source code and reduces the size of the kernel by about 13kB
after all the LRU lists have been created further down in the split VM
patch series.

Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagevec.h | 13 +++++--
 include/linux/swap.h    | 18 ++++++++--
 mm/migrate.c            | 11 +-----
 mm/swap.c               | 79 ++++++++++++++---------------------------
 4 files changed, 55 insertions(+), 66 deletions(-)

diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index 8eb7fa76c1d0..6b8f11bcc948 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -23,8 +23,7 @@ struct pagevec {
 void __pagevec_release(struct pagevec *pvec);
 void __pagevec_release_nonlru(struct pagevec *pvec);
 void __pagevec_free(struct pagevec *pvec);
-void __pagevec_lru_add(struct pagevec *pvec);
-void __pagevec_lru_add_active(struct pagevec *pvec);
+void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru);
 void pagevec_strip(struct pagevec *pvec);
 unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
 		pgoff_t start, unsigned nr_pages);
@@ -81,6 +80,16 @@ static inline void pagevec_free(struct pagevec *pvec)
 		__pagevec_free(pvec);
 }
 
+static inline void __pagevec_lru_add(struct pagevec *pvec)
+{
+	____pagevec_lru_add(pvec, LRU_INACTIVE);
+}
+
+static inline void __pagevec_lru_add_active(struct pagevec *pvec)
+{
+	____pagevec_lru_add(pvec, LRU_ACTIVE);
+}
+
 static inline void pagevec_lru_add(struct pagevec *pvec)
 {
 	if (pagevec_count(pvec))
diff --git a/include/linux/swap.h b/include/linux/swap.h
index de40f169a4e4..fcc169610d09 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -171,8 +171,8 @@ extern unsigned int nr_free_pagecache_pages(void);
 
 
 /* linux/mm/swap.c */
-extern void lru_cache_add(struct page *);
-extern void lru_cache_add_active(struct page *);
+extern void __lru_cache_add(struct page *, enum lru_list lru);
+extern void lru_cache_add_lru(struct page *, enum lru_list lru);
 extern void activate_page(struct page *);
 extern void mark_page_accessed(struct page *);
 extern void lru_add_drain(void);
@@ -180,6 +180,20 @@ extern int lru_add_drain_all(void);
 extern void rotate_reclaimable_page(struct page *page);
 extern void swap_setup(void);
 
+/**
+ * lru_cache_add: add a page to the page lists
+ * @page: the page to add
+ */
+static inline void lru_cache_add(struct page *page)
+{
+	__lru_cache_add(page, LRU_INACTIVE);
+}
+
+static inline void lru_cache_add_active(struct page *page)
+{
+	__lru_cache_add(page, LRU_ACTIVE);
+}
+
 /* linux/mm/vmscan.c */
 extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 					gfp_t gfp_mask);
diff --git a/mm/migrate.c b/mm/migrate.c
index da73742e52a5..ad15b5ef2599 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -55,16 +55,7 @@ int migrate_prep(void)
 
 static inline void move_to_lru(struct page *page)
 {
-	if (PageActive(page)) {
-		/*
-		 * lru_cache_add_active checks that
-		 * the PG_active bit is off.
-		 */
-		ClearPageActive(page);
-		lru_cache_add_active(page);
-	} else {
-		lru_cache_add(page);
-	}
+	lru_cache_add_lru(page, page_lru(page));
 	put_page(page);
 }
 
diff --git a/mm/swap.c b/mm/swap.c
index 82c2b3a76f94..e3045040dc3e 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -34,8 +34,7 @@
 /* How many pages do we try to swap or page in/out together? */
 int page_cluster;
 
-static DEFINE_PER_CPU(struct pagevec, lru_add_pvecs);
-static DEFINE_PER_CPU(struct pagevec, lru_add_active_pvecs);
+static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs);
 static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
 
 /*
@@ -186,28 +185,29 @@ void mark_page_accessed(struct page *page)
 
 EXPORT_SYMBOL(mark_page_accessed);
 
-/**
- * lru_cache_add: add a page to the page lists
- * @page: the page to add
- */
-void lru_cache_add(struct page *page)
+void __lru_cache_add(struct page *page, enum lru_list lru)
 {
-	struct pagevec *pvec = &get_cpu_var(lru_add_pvecs);
+	struct pagevec *pvec = &get_cpu_var(lru_add_pvecs)[lru];
 
 	page_cache_get(page);
 	if (!pagevec_add(pvec, page))
-		__pagevec_lru_add(pvec);
+		____pagevec_lru_add(pvec, lru);
 	put_cpu_var(lru_add_pvecs);
 }
 
-void lru_cache_add_active(struct page *page)
+/**
+ * lru_cache_add_lru - add a page to a page list
+ * @page: the page to be added to the LRU.
+ * @lru: the LRU list to which the page is added.
+ */
+void lru_cache_add_lru(struct page *page, enum lru_list lru)
 {
-	struct pagevec *pvec = &get_cpu_var(lru_add_active_pvecs);
+	if (PageActive(page)) {
+		ClearPageActive(page);
+	}
 
-	page_cache_get(page);
-	if (!pagevec_add(pvec, page))
-		__pagevec_lru_add_active(pvec);
-	put_cpu_var(lru_add_active_pvecs);
+	VM_BUG_ON(PageLRU(page) || PageActive(page));
+	__lru_cache_add(page, lru);
 }
 
 /*
@@ -217,15 +217,15 @@ void lru_cache_add_active(struct page *page)
  */
 static void drain_cpu_pagevecs(int cpu)
 {
+	struct pagevec *pvecs = per_cpu(lru_add_pvecs, cpu);
 	struct pagevec *pvec;
+	int lru;
 
-	pvec = &per_cpu(lru_add_pvecs, cpu);
-	if (pagevec_count(pvec))
-		__pagevec_lru_add(pvec);
-
-	pvec = &per_cpu(lru_add_active_pvecs, cpu);
-	if (pagevec_count(pvec))
-		__pagevec_lru_add_active(pvec);
+	for_each_lru(lru) {
+		pvec = &pvecs[lru - LRU_BASE];
+		if (pagevec_count(pvec))
+			____pagevec_lru_add(pvec, lru);
+	}
 
 	pvec = &per_cpu(lru_rotate_pvecs, cpu);
 	if (pagevec_count(pvec)) {
@@ -380,7 +380,7 @@ void __pagevec_release_nonlru(struct pagevec *pvec)
  * Add the passed pages to the LRU, then drop the caller's refcount
  * on them.  Reinitialises the caller's pagevec.
  */
-void __pagevec_lru_add(struct pagevec *pvec)
+void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
 {
 	int i;
 	struct zone *zone = NULL;
@@ -397,7 +397,9 @@ void __pagevec_lru_add(struct pagevec *pvec)
 		}
 		VM_BUG_ON(PageLRU(page));
 		SetPageLRU(page);
-		add_page_to_inactive_list(zone, page);
+		if (is_active_lru(lru))
+			SetPageActive(page);
+		add_page_to_lru_list(zone, page, lru);
 	}
 	if (zone)
 		spin_unlock_irq(&zone->lru_lock);
@@ -405,34 +407,7 @@ void __pagevec_lru_add(struct pagevec *pvec)
 	pagevec_reinit(pvec);
 }
 
-EXPORT_SYMBOL(__pagevec_lru_add);
-
-void __pagevec_lru_add_active(struct pagevec *pvec)
-{
-	int i;
-	struct zone *zone = NULL;
-
-	for (i = 0; i < pagevec_count(pvec); i++) {
-		struct page *page = pvec->pages[i];
-		struct zone *pagezone = page_zone(page);
-
-		if (pagezone != zone) {
-			if (zone)
-				spin_unlock_irq(&zone->lru_lock);
-			zone = pagezone;
-			spin_lock_irq(&zone->lru_lock);
-		}
-		VM_BUG_ON(PageLRU(page));
-		SetPageLRU(page);
-		VM_BUG_ON(PageActive(page));
-		SetPageActive(page);
-		add_page_to_active_list(zone, page);
-	}
-	if (zone)
-		spin_unlock_irq(&zone->lru_lock);
-	release_pages(pvec->pages, pvec->nr, pvec->cold);
-	pagevec_reinit(pvec);
-}
+EXPORT_SYMBOL(____pagevec_lru_add);
 
 /*
  * Try to drop buffers from the pages in a pagevec
-- 
GitLab


From 68a22394c286a2daf06ee8d65d8835f738faefa5 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Sat, 18 Oct 2008 20:26:23 -0700
Subject: [PATCH 692/895] vmscan: free swap space on swap-in/activation

If vm_swap_full() (swap space more than 50% full), the system will free
swap space at swapin time.  With this patch, the system will also free the
swap space in the pageout code, when we decide that the page is not a
candidate for swapout (and just wasting swap space).

Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Signed-off-by: MinChan Kim <minchan.kim@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagevec.h |  1 +
 include/linux/swap.h    |  6 ++++++
 mm/swap.c               | 24 ++++++++++++++++++++++++
 mm/swapfile.c           | 25 ++++++++++++++++++++++---
 mm/vmscan.c             |  7 +++++++
 5 files changed, 60 insertions(+), 3 deletions(-)

diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index 6b8f11bcc948..fea3a982ee55 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -25,6 +25,7 @@ void __pagevec_release_nonlru(struct pagevec *pvec);
 void __pagevec_free(struct pagevec *pvec);
 void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru);
 void pagevec_strip(struct pagevec *pvec);
+void pagevec_swap_free(struct pagevec *pvec);
 unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
 		pgoff_t start, unsigned nr_pages);
 unsigned pagevec_lookup_tag(struct pagevec *pvec,
diff --git a/include/linux/swap.h b/include/linux/swap.h
index fcc169610d09..833be56ad835 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -265,6 +265,7 @@ extern sector_t swapdev_block(int, pgoff_t);
 extern struct swap_info_struct *get_swap_info_struct(unsigned);
 extern int can_share_swap_page(struct page *);
 extern int remove_exclusive_swap_page(struct page *);
+extern int remove_exclusive_swap_page_ref(struct page *);
 struct backing_dev_info;
 
 /* linux/mm/thrash.c */
@@ -353,6 +354,11 @@ static inline int remove_exclusive_swap_page(struct page *p)
 	return 0;
 }
 
+static inline int remove_exclusive_swap_page_ref(struct page *page)
+{
+	return 0;
+}
+
 static inline swp_entry_t get_swap_page(void)
 {
 	swp_entry_t entry;
diff --git a/mm/swap.c b/mm/swap.c
index e3045040dc3e..88a394872677 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -427,6 +427,30 @@ void pagevec_strip(struct pagevec *pvec)
 	}
 }
 
+/**
+ * pagevec_swap_free - try to free swap space from the pages in a pagevec
+ * @pvec: pagevec with swapcache pages to free the swap space of
+ *
+ * The caller needs to hold an extra reference to each page and
+ * not hold the page lock on the pages.  This function uses a
+ * trylock on the page lock so it may not always free the swap
+ * space associated with a page.
+ */
+void pagevec_swap_free(struct pagevec *pvec)
+{
+	int i;
+
+	for (i = 0; i < pagevec_count(pvec); i++) {
+		struct page *page = pvec->pages[i];
+
+		if (PageSwapCache(page) && trylock_page(page)) {
+			if (PageSwapCache(page))
+				remove_exclusive_swap_page_ref(page);
+			unlock_page(page);
+		}
+	}
+}
+
 /**
  * pagevec_lookup - gang pagecache lookup
  * @pvec:	Where the resulting pages are placed
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 1e330f2998fa..2a97fafa3d89 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -344,7 +344,7 @@ int can_share_swap_page(struct page *page)
  * Work out if there are any other processes sharing this
  * swap cache page. Free it if you can. Return success.
  */
-int remove_exclusive_swap_page(struct page *page)
+static int remove_exclusive_swap_page_count(struct page *page, int count)
 {
 	int retval;
 	struct swap_info_struct * p;
@@ -357,7 +357,7 @@ int remove_exclusive_swap_page(struct page *page)
 		return 0;
 	if (PageWriteback(page))
 		return 0;
-	if (page_count(page) != 2) /* 2: us + cache */
+	if (page_count(page) != count) /* us + cache + ptes */
 		return 0;
 
 	entry.val = page_private(page);
@@ -370,7 +370,7 @@ int remove_exclusive_swap_page(struct page *page)
 	if (p->swap_map[swp_offset(entry)] == 1) {
 		/* Recheck the page count with the swapcache lock held.. */
 		spin_lock_irq(&swapper_space.tree_lock);
-		if ((page_count(page) == 2) && !PageWriteback(page)) {
+		if ((page_count(page) == count) && !PageWriteback(page)) {
 			__delete_from_swap_cache(page);
 			SetPageDirty(page);
 			retval = 1;
@@ -387,6 +387,25 @@ int remove_exclusive_swap_page(struct page *page)
 	return retval;
 }
 
+/*
+ * Most of the time the page should have two references: one for the
+ * process and one for the swap cache.
+ */
+int remove_exclusive_swap_page(struct page *page)
+{
+	return remove_exclusive_swap_page_count(page, 2);
+}
+
+/*
+ * The pageout code holds an extra reference to the page.  That raises
+ * the reference count to test for to 2 for a page that is only in the
+ * swap cache plus 1 for each process that maps the page.
+ */
+int remove_exclusive_swap_page_ref(struct page *page)
+{
+	return remove_exclusive_swap_page_count(page, 2 + page_mapcount(page));
+}
+
 /*
  * Free the swap entry like above, but also try to
  * free the page cache entry if it is the last user.
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 46fdaa546b8d..e656035d3406 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -647,6 +647,9 @@ free_it:
 		continue;
 
 activate_locked:
+		/* Not a candidate for swapping, so reclaim swap space. */
+		if (PageSwapCache(page) && vm_swap_full())
+			remove_exclusive_swap_page_ref(page);
 		SetPageActive(page);
 		pgactivate++;
 keep_locked:
@@ -1228,6 +1231,8 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 			__mod_zone_page_state(zone, NR_ACTIVE, pgmoved);
 			pgmoved = 0;
 			spin_unlock_irq(&zone->lru_lock);
+			if (vm_swap_full())
+				pagevec_swap_free(&pvec);
 			__pagevec_release(&pvec);
 			spin_lock_irq(&zone->lru_lock);
 		}
@@ -1237,6 +1242,8 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 	__count_zone_vm_events(PGREFILL, zone, pgscanned);
 	__count_vm_events(PGDEACTIVATE, pgdeactivate);
 	spin_unlock_irq(&zone->lru_lock);
+	if (vm_swap_full())
+		pagevec_swap_free(&pvec);
 
 	pagevec_release(&pvec);
 }
-- 
GitLab


From b2e185384f534781fd22f5ce170b2ad26f97df70 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Sat, 18 Oct 2008 20:26:30 -0700
Subject: [PATCH 693/895] define page_file_cache() function

Define page_file_cache() function to answer the question:
	is page backed by a file?

Originally part of Rik van Riel's split-lru patch.  Extracted to make
available for other, independent reclaim patches.

Moved inline function to linux/mm_inline.h where it will be needed by
subsequent "split LRU" and "noreclaim" patches.

Unfortunately this needs to use a page flag, since the PG_swapbacked state
needs to be preserved all the way to the point where the page is last
removed from the LRU.  Trying to derive the status from other info in the
page resulted in wrong VM statistics in earlier split VM patchsets.

The total number of page flags in use on a 32 bit machine after this patch
is 19.

[akpm@linux-foundation.org: fix up out-of-order merge fallout]
[hugh@veritas.com: splitlru: shmem_getpage SetPageSwapBacked sooner[
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: MinChan Kim <minchan.kim@gmail.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_inline.h  | 27 +++++++++++++++++++++++++++
 include/linux/page-flags.h |  8 ++++++--
 mm/memory.c                |  3 +++
 mm/migrate.c               |  2 ++
 mm/page_alloc.c            |  2 ++
 mm/shmem.c                 |  1 +
 mm/swap_state.c            |  3 +++
 7 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 2704729777ef..96e970485b6c 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -1,3 +1,28 @@
+#ifndef LINUX_MM_INLINE_H
+#define LINUX_MM_INLINE_H
+
+/**
+ * page_is_file_cache - should the page be on a file LRU or anon LRU?
+ * @page: the page to test
+ *
+ * Returns !0 if @page is page cache page backed by a regular filesystem,
+ * or 0 if @page is anonymous, tmpfs or otherwise ram or swap backed.
+ * Used by functions that manipulate the LRU lists, to sort a page
+ * onto the right LRU list.
+ *
+ * We would like to get this info without a page flag, but the state
+ * needs to survive until the page is last deleted from the LRU, which
+ * could be as far down as __page_cache_release.
+ */
+static inline int page_is_file_cache(struct page *page)
+{
+	if (PageSwapBacked(page))
+		return 0;
+
+	/* The page is page cache backed by a normal filesystem. */
+	return 1;
+}
+
 static inline void
 add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l)
 {
@@ -65,3 +90,5 @@ static inline enum lru_list page_lru(struct page *page)
 
 	return lru;
 }
+
+#endif
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index c74d3e875314..57b688cfb5e2 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -93,6 +93,7 @@ enum pageflags {
 	PG_mappedtodisk,	/* Has blocks allocated on-disk */
 	PG_reclaim,		/* To be reclaimed asap */
 	PG_buddy,		/* Page is free, on buddy lists */
+	PG_swapbacked,		/* Page is backed by RAM/swap */
 #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
 	PG_uncached,		/* Page has been mapped as uncached */
 #endif
@@ -176,6 +177,7 @@ PAGEFLAG(SavePinned, savepinned);			/* Xen */
 PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved)
 PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private)
 	__SETPAGEFLAG(Private, private)
+PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked)
 
 __PAGEFLAG(SlobPage, slob_page)
 __PAGEFLAG(SlobFree, slob_free)
@@ -334,7 +336,8 @@ static inline void __ClearPageTail(struct page *page)
  * Flags checked in bad_page().  Pages on the free list should not have
  * these flags set.  It they are, there is a problem.
  */
-#define PAGE_FLAGS_CLEAR_WHEN_BAD (PAGE_FLAGS | 1 << PG_reclaim | 1 << PG_dirty)
+#define PAGE_FLAGS_CLEAR_WHEN_BAD (PAGE_FLAGS | \
+		1 << PG_reclaim | 1 << PG_dirty | 1 << PG_swapbacked)
 
 /*
  * Flags checked when a page is freed.  Pages being freed should not have
@@ -347,7 +350,8 @@ static inline void __ClearPageTail(struct page *page)
  * Pages being prepped should not have these flags set.  It they are, there
  * is a problem.
  */
-#define PAGE_FLAGS_CHECK_AT_PREP (PAGE_FLAGS | 1 << PG_reserved | 1 << PG_dirty)
+#define PAGE_FLAGS_CHECK_AT_PREP (PAGE_FLAGS | \
+		1 << PG_reserved | 1 << PG_dirty | 1 << PG_swapbacked)
 
 #endif /* !__GENERATING_BOUNDS_H */
 #endif	/* PAGE_FLAGS_H */
diff --git a/mm/memory.c b/mm/memory.c
index 1002f473f497..7512933dcc10 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1888,6 +1888,7 @@ gotten:
 		ptep_clear_flush_notify(vma, address, page_table);
 		set_pte_at(mm, address, page_table, entry);
 		update_mmu_cache(vma, address, entry);
+		SetPageSwapBacked(new_page);
 		lru_cache_add_active(new_page);
 		page_add_new_anon_rmap(new_page, vma, address);
 
@@ -2382,6 +2383,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (!pte_none(*page_table))
 		goto release;
 	inc_mm_counter(mm, anon_rss);
+	SetPageSwapBacked(page);
 	lru_cache_add_active(page);
 	page_add_new_anon_rmap(page, vma, address);
 	set_pte_at(mm, address, page_table, entry);
@@ -2523,6 +2525,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		set_pte_at(mm, address, page_table, entry);
 		if (anon) {
                         inc_mm_counter(mm, anon_rss);
+			SetPageSwapBacked(page);
                         lru_cache_add_active(page);
                         page_add_new_anon_rmap(page, vma, address);
 		} else {
diff --git a/mm/migrate.c b/mm/migrate.c
index ad15b5ef2599..c07327487111 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -572,6 +572,8 @@ static int move_to_new_page(struct page *newpage, struct page *page)
 	/* Prepare mapping for the new page.*/
 	newpage->index = page->index;
 	newpage->mapping = page->mapping;
+	if (PageSwapBacked(page))
+		SetPageSwapBacked(newpage);
 
 	mapping = page_mapping(page);
 	if (!mapping)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ee7a96ef40dc..2099904d6cc4 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -462,6 +462,8 @@ static inline int free_pages_check(struct page *page)
 		bad_page(page);
 	if (PageDirty(page))
 		__ClearPageDirty(page);
+	if (PageSwapBacked(page))
+		__ClearPageSwapBacked(page);
 	/*
 	 * For now, we report if PG_reserved was found set, but do not
 	 * clear it, and do not free the page.  But we shall soon need
diff --git a/mm/shmem.c b/mm/shmem.c
index d87958a5f03e..fd421ed703ed 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1367,6 +1367,7 @@ repeat:
 				error = -ENOMEM;
 				goto failed;
 			}
+			SetPageSwapBacked(filepage);
 
 			/* Precharge page while we can wait, compensate after */
 			error = mem_cgroup_cache_charge(filepage, current->mm,
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 797c3831cbec..7a3ece0b5a3b 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -75,6 +75,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
 	BUG_ON(!PageLocked(page));
 	BUG_ON(PageSwapCache(page));
 	BUG_ON(PagePrivate(page));
+	BUG_ON(!PageSwapBacked(page));
 	error = radix_tree_preload(gfp_mask);
 	if (!error) {
 		page_cache_get(page);
@@ -303,6 +304,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 		 * May fail (-ENOMEM) if radix-tree node allocation failed.
 		 */
 		set_page_locked(new_page);
+		SetPageSwapBacked(new_page);
 		err = add_to_swap_cache(new_page, entry, gfp_mask & GFP_KERNEL);
 		if (likely(!err)) {
 			/*
@@ -312,6 +314,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 			swap_readpage(NULL, new_page);
 			return new_page;
 		}
+		ClearPageSwapBacked(new_page);
 		clear_page_locked(new_page);
 		swap_free(entry);
 	} while (err != -ENOMEM);
-- 
GitLab


From 4f98a2fee8acdb4ac84545df98cccecfd130f8db Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Sat, 18 Oct 2008 20:26:32 -0700
Subject: [PATCH 694/895] vmscan: split LRU lists into anon & file sets

Split the LRU lists in two, one set for pages that are backed by real file
systems ("file") and one for pages that are backed by memory and swap
("anon").  The latter includes tmpfs.

The advantage of doing this is that the VM will not have to scan over lots
of anonymous pages (which we generally do not want to swap out), just to
find the page cache pages that it should evict.

This patch has the infrastructure and a basic policy to balance how much
we scan the anon lists and how much we scan the file lists.  The big
policy changes are in separate patches.

[lee.schermerhorn@hp.com: collect lru meminfo statistics from correct offset]
[kosaki.motohiro@jp.fujitsu.com: prevent incorrect oom under split_lru]
[kosaki.motohiro@jp.fujitsu.com: fix pagevec_move_tail() doesn't treat unevictable page]
[hugh@veritas.com: memcg swapbacked pages active]
[hugh@veritas.com: splitlru: BDI_CAP_SWAP_BACKED]
[akpm@linux-foundation.org: fix /proc/vmstat units]
[nishimura@mxp.nes.nec.co.jp: memcg: fix handling of shmem migration]
[kosaki.motohiro@jp.fujitsu.com: adjust Quicklists field of /proc/meminfo]
[kosaki.motohiro@jp.fujitsu.com: fix style issue of get_scan_ratio()]
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/node.c         |  56 +++--
 fs/cifs/file.c              |   4 +-
 fs/nfs/dir.c                |   2 +-
 fs/ntfs/file.c              |   4 +-
 fs/proc/proc_misc.c         |  77 ++++---
 fs/ramfs/file-nommu.c       |   4 +-
 include/linux/backing-dev.h |  13 ++
 include/linux/memcontrol.h  |   2 +-
 include/linux/mm_inline.h   |  50 +++--
 include/linux/mmzone.h      |  47 +++-
 include/linux/pagevec.h     |  29 ++-
 include/linux/swap.h        |  20 +-
 include/linux/vmstat.h      |  10 +
 mm/filemap.c                |  22 +-
 mm/hugetlb.c                |  10 +-
 mm/memcontrol.c             |  88 ++++----
 mm/memory.c                 |   6 +-
 mm/page-writeback.c         |   8 +-
 mm/page_alloc.c             |  25 ++-
 mm/readahead.c              |   2 +-
 mm/shmem.c                  |   2 +-
 mm/swap.c                   |  14 +-
 mm/swap_state.c             |   4 +-
 mm/vmscan.c                 | 416 +++++++++++++++++++-----------------
 mm/vmstat.c                 |  14 +-
 25 files changed, 562 insertions(+), 367 deletions(-)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 5116b78c6325..fc7e9bf0cdbc 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -61,34 +61,44 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
 	si_meminfo_node(&i, nid);
 
 	n = sprintf(buf, "\n"
-		       "Node %d MemTotal:     %8lu kB\n"
-		       "Node %d MemFree:      %8lu kB\n"
-		       "Node %d MemUsed:      %8lu kB\n"
-		       "Node %d Active:       %8lu kB\n"
-		       "Node %d Inactive:     %8lu kB\n"
+		       "Node %d MemTotal:       %8lu kB\n"
+		       "Node %d MemFree:        %8lu kB\n"
+		       "Node %d MemUsed:        %8lu kB\n"
+		       "Node %d Active:         %8lu kB\n"
+		       "Node %d Inactive:       %8lu kB\n"
+		       "Node %d Active(anon):   %8lu kB\n"
+		       "Node %d Inactive(anon): %8lu kB\n"
+		       "Node %d Active(file):   %8lu kB\n"
+		       "Node %d Inactive(file): %8lu kB\n"
 #ifdef CONFIG_HIGHMEM
-		       "Node %d HighTotal:    %8lu kB\n"
-		       "Node %d HighFree:     %8lu kB\n"
-		       "Node %d LowTotal:     %8lu kB\n"
-		       "Node %d LowFree:      %8lu kB\n"
+		       "Node %d HighTotal:      %8lu kB\n"
+		       "Node %d HighFree:       %8lu kB\n"
+		       "Node %d LowTotal:       %8lu kB\n"
+		       "Node %d LowFree:        %8lu kB\n"
 #endif
-		       "Node %d Dirty:        %8lu kB\n"
-		       "Node %d Writeback:    %8lu kB\n"
-		       "Node %d FilePages:    %8lu kB\n"
-		       "Node %d Mapped:       %8lu kB\n"
-		       "Node %d AnonPages:    %8lu kB\n"
-		       "Node %d PageTables:   %8lu kB\n"
-		       "Node %d NFS_Unstable: %8lu kB\n"
-		       "Node %d Bounce:       %8lu kB\n"
-		       "Node %d WritebackTmp: %8lu kB\n"
-		       "Node %d Slab:         %8lu kB\n"
-		       "Node %d SReclaimable: %8lu kB\n"
-		       "Node %d SUnreclaim:   %8lu kB\n",
+		       "Node %d Dirty:          %8lu kB\n"
+		       "Node %d Writeback:      %8lu kB\n"
+		       "Node %d FilePages:      %8lu kB\n"
+		       "Node %d Mapped:         %8lu kB\n"
+		       "Node %d AnonPages:      %8lu kB\n"
+		       "Node %d PageTables:     %8lu kB\n"
+		       "Node %d NFS_Unstable:   %8lu kB\n"
+		       "Node %d Bounce:         %8lu kB\n"
+		       "Node %d WritebackTmp:   %8lu kB\n"
+		       "Node %d Slab:           %8lu kB\n"
+		       "Node %d SReclaimable:   %8lu kB\n"
+		       "Node %d SUnreclaim:     %8lu kB\n",
 		       nid, K(i.totalram),
 		       nid, K(i.freeram),
 		       nid, K(i.totalram - i.freeram),
-		       nid, K(node_page_state(nid, NR_ACTIVE)),
-		       nid, K(node_page_state(nid, NR_INACTIVE)),
+		       nid, K(node_page_state(nid, NR_ACTIVE_ANON) +
+				node_page_state(nid, NR_ACTIVE_FILE)),
+		       nid, K(node_page_state(nid, NR_INACTIVE_ANON) +
+				node_page_state(nid, NR_INACTIVE_FILE)),
+		       nid, K(node_page_state(nid, NR_ACTIVE_ANON)),
+		       nid, K(node_page_state(nid, NR_INACTIVE_ANON)),
+		       nid, K(node_page_state(nid, NR_ACTIVE_FILE)),
+		       nid, K(node_page_state(nid, NR_INACTIVE_FILE)),
 #ifdef CONFIG_HIGHMEM
 		       nid, K(i.totalhigh),
 		       nid, K(i.freehigh),
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index c4a8a0605125..62d8bd8f14c0 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1791,7 +1791,7 @@ static void cifs_copy_cache_pages(struct address_space *mapping,
 		SetPageUptodate(page);
 		unlock_page(page);
 		if (!pagevec_add(plru_pvec, page))
-			__pagevec_lru_add(plru_pvec);
+			__pagevec_lru_add_file(plru_pvec);
 		data += PAGE_CACHE_SIZE;
 	}
 	return;
@@ -1925,7 +1925,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 		bytes_read = 0;
 	}
 
-	pagevec_lru_add(&lru_pvec);
+	pagevec_lru_add_file(&lru_pvec);
 
 /* need to free smb_read_data buf before exit */
 	if (smb_read_data) {
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2ab70d46ecbc..efdba2e802d7 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1517,7 +1517,7 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
 	if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0,
 							GFP_KERNEL)) {
 		pagevec_add(&lru_pvec, page);
-		pagevec_lru_add(&lru_pvec);
+		pagevec_lru_add_file(&lru_pvec);
 		SetPageUptodate(page);
 		unlock_page(page);
 	} else
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index d020866d4232..3140a4429af1 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -439,7 +439,7 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping,
 			pages[nr] = *cached_page;
 			page_cache_get(*cached_page);
 			if (unlikely(!pagevec_add(lru_pvec, *cached_page)))
-				__pagevec_lru_add(lru_pvec);
+				__pagevec_lru_add_file(lru_pvec);
 			*cached_page = NULL;
 		}
 		index++;
@@ -2084,7 +2084,7 @@ err_out:
 						OSYNC_METADATA|OSYNC_DATA);
 		}
   	}
-	pagevec_lru_add(&lru_pvec);
+	pagevec_lru_add_file(&lru_pvec);
 	ntfs_debug("Done.  Returning %s (written 0x%lx, status %li).",
 			written ? "written" : "status", (unsigned long)written,
 			(long)status);
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 59ea42e1ef03..b8edb2860557 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -136,6 +136,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
 	unsigned long allowed;
 	struct vmalloc_info vmi;
 	long cached;
+	unsigned long pages[NR_LRU_LISTS];
+	int lru;
 
 /*
  * display in kilobytes.
@@ -154,51 +156,62 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
 
 	get_vmalloc_info(&vmi);
 
+	for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
+		pages[lru] = global_page_state(NR_LRU_BASE + lru);
+
 	/*
 	 * Tagged format, for easy grepping and expansion.
 	 */
 	len = sprintf(page,
-		"MemTotal:     %8lu kB\n"
-		"MemFree:      %8lu kB\n"
-		"Buffers:      %8lu kB\n"
-		"Cached:       %8lu kB\n"
-		"SwapCached:   %8lu kB\n"
-		"Active:       %8lu kB\n"
-		"Inactive:     %8lu kB\n"
+		"MemTotal:       %8lu kB\n"
+		"MemFree:        %8lu kB\n"
+		"Buffers:        %8lu kB\n"
+		"Cached:         %8lu kB\n"
+		"SwapCached:     %8lu kB\n"
+		"Active:         %8lu kB\n"
+		"Inactive:       %8lu kB\n"
+		"Active(anon):   %8lu kB\n"
+		"Inactive(anon): %8lu kB\n"
+		"Active(file):   %8lu kB\n"
+		"Inactive(file): %8lu kB\n"
 #ifdef CONFIG_HIGHMEM
-		"HighTotal:    %8lu kB\n"
-		"HighFree:     %8lu kB\n"
-		"LowTotal:     %8lu kB\n"
-		"LowFree:      %8lu kB\n"
+		"HighTotal:      %8lu kB\n"
+		"HighFree:       %8lu kB\n"
+		"LowTotal:       %8lu kB\n"
+		"LowFree:        %8lu kB\n"
 #endif
-		"SwapTotal:    %8lu kB\n"
-		"SwapFree:     %8lu kB\n"
-		"Dirty:        %8lu kB\n"
-		"Writeback:    %8lu kB\n"
-		"AnonPages:    %8lu kB\n"
-		"Mapped:       %8lu kB\n"
-		"Slab:         %8lu kB\n"
-		"SReclaimable: %8lu kB\n"
-		"SUnreclaim:   %8lu kB\n"
-		"PageTables:   %8lu kB\n"
+		"SwapTotal:      %8lu kB\n"
+		"SwapFree:       %8lu kB\n"
+		"Dirty:          %8lu kB\n"
+		"Writeback:      %8lu kB\n"
+		"AnonPages:      %8lu kB\n"
+		"Mapped:         %8lu kB\n"
+		"Slab:           %8lu kB\n"
+		"SReclaimable:   %8lu kB\n"
+		"SUnreclaim:     %8lu kB\n"
+		"PageTables:     %8lu kB\n"
 #ifdef CONFIG_QUICKLIST
-		"Quicklists:   %8lu kB\n"
+		"Quicklists:     %8lu kB\n"
 #endif
-		"NFS_Unstable: %8lu kB\n"
-		"Bounce:       %8lu kB\n"
-		"WritebackTmp: %8lu kB\n"
-		"CommitLimit:  %8lu kB\n"
-		"Committed_AS: %8lu kB\n"
-		"VmallocTotal: %8lu kB\n"
-		"VmallocUsed:  %8lu kB\n"
-		"VmallocChunk: %8lu kB\n",
+		"NFS_Unstable:   %8lu kB\n"
+		"Bounce:         %8lu kB\n"
+		"WritebackTmp:   %8lu kB\n"
+		"CommitLimit:    %8lu kB\n"
+		"Committed_AS:   %8lu kB\n"
+		"VmallocTotal:   %8lu kB\n"
+		"VmallocUsed:    %8lu kB\n"
+		"VmallocChunk:   %8lu kB\n",
 		K(i.totalram),
 		K(i.freeram),
 		K(i.bufferram),
 		K(cached),
 		K(total_swapcache_pages),
-		K(global_page_state(NR_ACTIVE)),
-		K(global_page_state(NR_INACTIVE)),
+		K(pages[LRU_ACTIVE_ANON]   + pages[LRU_ACTIVE_FILE]),
+		K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]),
+		K(pages[LRU_ACTIVE_ANON]),
+		K(pages[LRU_INACTIVE_ANON]),
+		K(pages[LRU_ACTIVE_FILE]),
+		K(pages[LRU_INACTIVE_FILE]),
 #ifdef CONFIG_HIGHMEM
 		K(i.totalhigh),
 		K(i.freehigh),
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 5145cb9125af..76acdbc34611 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -112,12 +112,12 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
 			goto add_error;
 
 		if (!pagevec_add(&lru_pvec, page))
-			__pagevec_lru_add(&lru_pvec);
+			__pagevec_lru_add_file(&lru_pvec);
 
 		unlock_page(page);
 	}
 
-	pagevec_lru_add(&lru_pvec);
+	pagevec_lru_add_file(&lru_pvec);
 	return 0;
 
  fsize_exceeded:
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 0a24d5550eb3..bee52abb8a4d 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -175,6 +175,8 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
  * BDI_CAP_READ_MAP:       Can be mapped for reading
  * BDI_CAP_WRITE_MAP:      Can be mapped for writing
  * BDI_CAP_EXEC_MAP:       Can be mapped for execution
+ *
+ * BDI_CAP_SWAP_BACKED:    Count shmem/tmpfs objects as swap-backed.
  */
 #define BDI_CAP_NO_ACCT_DIRTY	0x00000001
 #define BDI_CAP_NO_WRITEBACK	0x00000002
@@ -184,6 +186,7 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
 #define BDI_CAP_WRITE_MAP	0x00000020
 #define BDI_CAP_EXEC_MAP	0x00000040
 #define BDI_CAP_NO_ACCT_WB	0x00000080
+#define BDI_CAP_SWAP_BACKED	0x00000100
 
 #define BDI_CAP_VMFLAGS \
 	(BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP)
@@ -248,6 +251,11 @@ static inline bool bdi_cap_account_writeback(struct backing_dev_info *bdi)
 				      BDI_CAP_NO_WRITEBACK));
 }
 
+static inline bool bdi_cap_swap_backed(struct backing_dev_info *bdi)
+{
+	return bdi->capabilities & BDI_CAP_SWAP_BACKED;
+}
+
 static inline bool mapping_cap_writeback_dirty(struct address_space *mapping)
 {
 	return bdi_cap_writeback_dirty(mapping->backing_dev_info);
@@ -258,4 +266,9 @@ static inline bool mapping_cap_account_dirty(struct address_space *mapping)
 	return bdi_cap_account_dirty(mapping->backing_dev_info);
 }
 
+static inline bool mapping_cap_swap_backed(struct address_space *mapping)
+{
+	return bdi_cap_swap_backed(mapping->backing_dev_info);
+}
+
 #endif		/* _LINUX_BACKING_DEV_H */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index a6ac0d491fe6..8d8f05c1515a 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -44,7 +44,7 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 					unsigned long *scanned, int order,
 					int mode, struct zone *z,
 					struct mem_cgroup *mem_cont,
-					int active);
+					int active, int file);
 extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
 int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
 
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 96e970485b6c..2eb599465d56 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -5,7 +5,7 @@
  * page_is_file_cache - should the page be on a file LRU or anon LRU?
  * @page: the page to test
  *
- * Returns !0 if @page is page cache page backed by a regular filesystem,
+ * Returns LRU_FILE if @page is page cache page backed by a regular filesystem,
  * or 0 if @page is anonymous, tmpfs or otherwise ram or swap backed.
  * Used by functions that manipulate the LRU lists, to sort a page
  * onto the right LRU list.
@@ -20,7 +20,7 @@ static inline int page_is_file_cache(struct page *page)
 		return 0;
 
 	/* The page is page cache backed by a normal filesystem. */
-	return 1;
+	return LRU_FILE;
 }
 
 static inline void
@@ -38,39 +38,64 @@ del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l)
 }
 
 static inline void
-add_page_to_active_list(struct zone *zone, struct page *page)
+add_page_to_inactive_anon_list(struct zone *zone, struct page *page)
 {
-	add_page_to_lru_list(zone, page, LRU_ACTIVE);
+	add_page_to_lru_list(zone, page, LRU_INACTIVE_ANON);
 }
 
 static inline void
-add_page_to_inactive_list(struct zone *zone, struct page *page)
+add_page_to_active_anon_list(struct zone *zone, struct page *page)
 {
-	add_page_to_lru_list(zone, page, LRU_INACTIVE);
+	add_page_to_lru_list(zone, page, LRU_ACTIVE_ANON);
 }
 
 static inline void
-del_page_from_active_list(struct zone *zone, struct page *page)
+add_page_to_inactive_file_list(struct zone *zone, struct page *page)
 {
-	del_page_from_lru_list(zone, page, LRU_ACTIVE);
+	add_page_to_lru_list(zone, page, LRU_INACTIVE_FILE);
 }
 
 static inline void
-del_page_from_inactive_list(struct zone *zone, struct page *page)
+add_page_to_active_file_list(struct zone *zone, struct page *page)
 {
-	del_page_from_lru_list(zone, page, LRU_INACTIVE);
+	add_page_to_lru_list(zone, page, LRU_ACTIVE_FILE);
+}
+
+static inline void
+del_page_from_inactive_anon_list(struct zone *zone, struct page *page)
+{
+	del_page_from_lru_list(zone, page, LRU_INACTIVE_ANON);
+}
+
+static inline void
+del_page_from_active_anon_list(struct zone *zone, struct page *page)
+{
+	del_page_from_lru_list(zone, page, LRU_ACTIVE_ANON);
+}
+
+static inline void
+del_page_from_inactive_file_list(struct zone *zone, struct page *page)
+{
+	del_page_from_lru_list(zone, page, LRU_INACTIVE_FILE);
+}
+
+static inline void
+del_page_from_active_file_list(struct zone *zone, struct page *page)
+{
+	del_page_from_lru_list(zone, page, LRU_INACTIVE_FILE);
 }
 
 static inline void
 del_page_from_lru(struct zone *zone, struct page *page)
 {
-	enum lru_list l = LRU_INACTIVE;
+	enum lru_list l = LRU_BASE;
 
 	list_del(&page->lru);
 	if (PageActive(page)) {
 		__ClearPageActive(page);
-		l = LRU_ACTIVE;
+		l += LRU_ACTIVE;
 	}
+	l += page_is_file_cache(page);
 	__dec_zone_state(zone, NR_LRU_BASE + l);
 }
 
@@ -87,6 +112,7 @@ static inline enum lru_list page_lru(struct page *page)
 
 	if (PageActive(page))
 		lru += LRU_ACTIVE;
+	lru += page_is_file_cache(page);
 
 	return lru;
 }
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 156e18f3919b..59a4c8fd6ebd 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -82,21 +82,23 @@ enum zone_stat_item {
 	/* First 128 byte cacheline (assuming 64 bit words) */
 	NR_FREE_PAGES,
 	NR_LRU_BASE,
-	NR_INACTIVE = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */
-	NR_ACTIVE,	/*  "     "     "   "       "         */
+	NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */
+	NR_ACTIVE_ANON,		/*  "     "     "   "       "         */
+	NR_INACTIVE_FILE,	/*  "     "     "   "       "         */
+	NR_ACTIVE_FILE,		/*  "     "     "   "       "         */
 	NR_ANON_PAGES,	/* Mapped anonymous pages */
 	NR_FILE_MAPPED,	/* pagecache pages mapped into pagetables.
 			   only modified from process context */
 	NR_FILE_PAGES,
 	NR_FILE_DIRTY,
 	NR_WRITEBACK,
-	/* Second 128 byte cacheline */
 	NR_SLAB_RECLAIMABLE,
 	NR_SLAB_UNRECLAIMABLE,
 	NR_PAGETABLE,		/* used for pagetables */
 	NR_UNSTABLE_NFS,	/* NFS unstable pages */
 	NR_BOUNCE,
 	NR_VMSCAN_WRITE,
+	/* Second 128 byte cacheline */
 	NR_WRITEBACK_TEMP,	/* Writeback using temporary buffers */
 #ifdef CONFIG_NUMA
 	NUMA_HIT,		/* allocated in intended node */
@@ -108,17 +110,36 @@ enum zone_stat_item {
 #endif
 	NR_VM_ZONE_STAT_ITEMS };
 
+/*
+ * We do arithmetic on the LRU lists in various places in the code,
+ * so it is important to keep the active lists LRU_ACTIVE higher in
+ * the array than the corresponding inactive lists, and to keep
+ * the *_FILE lists LRU_FILE higher than the corresponding _ANON lists.
+ *
+ * This has to be kept in sync with the statistics in zone_stat_item
+ * above and the descriptions in vmstat_text in mm/vmstat.c
+ */
+#define LRU_BASE 0
+#define LRU_ACTIVE 1
+#define LRU_FILE 2
+
 enum lru_list {
-	LRU_BASE,
-	LRU_INACTIVE=LRU_BASE,	/* must match order of NR_[IN]ACTIVE */
-	LRU_ACTIVE,		/*  "     "     "   "       "        */
+	LRU_INACTIVE_ANON = LRU_BASE,
+	LRU_ACTIVE_ANON = LRU_BASE + LRU_ACTIVE,
+	LRU_INACTIVE_FILE = LRU_BASE + LRU_FILE,
+	LRU_ACTIVE_FILE = LRU_BASE + LRU_FILE + LRU_ACTIVE,
 	NR_LRU_LISTS };
 
 #define for_each_lru(l) for (l = 0; l < NR_LRU_LISTS; l++)
 
+static inline int is_file_lru(enum lru_list l)
+{
+	return (l == LRU_INACTIVE_FILE || l == LRU_ACTIVE_FILE);
+}
+
 static inline int is_active_lru(enum lru_list l)
 {
-	return (l == LRU_ACTIVE);
+	return (l == LRU_ACTIVE_ANON || l == LRU_ACTIVE_FILE);
 }
 
 struct per_cpu_pages {
@@ -269,6 +290,18 @@ struct zone {
 		struct list_head list;
 		unsigned long nr_scan;
 	} lru[NR_LRU_LISTS];
+
+	/*
+	 * The pageout code in vmscan.c keeps track of how many of the
+	 * mem/swap backed and file backed pages are refeferenced.
+	 * The higher the rotated/scanned ratio, the more valuable
+	 * that cache is.
+	 *
+	 * The anon LRU stats live in [0], file LRU stats in [1]
+	 */
+	unsigned long		recent_rotated[2];
+	unsigned long		recent_scanned[2];
+
 	unsigned long		pages_scanned;	   /* since last reclaim */
 	unsigned long		flags;		   /* zone flags, see below */
 
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index fea3a982ee55..5fc96a4e760f 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -81,20 +81,37 @@ static inline void pagevec_free(struct pagevec *pvec)
 		__pagevec_free(pvec);
 }
 
-static inline void __pagevec_lru_add(struct pagevec *pvec)
+static inline void __pagevec_lru_add_anon(struct pagevec *pvec)
 {
-	____pagevec_lru_add(pvec, LRU_INACTIVE);
+	____pagevec_lru_add(pvec, LRU_INACTIVE_ANON);
 }
 
-static inline void __pagevec_lru_add_active(struct pagevec *pvec)
+static inline void __pagevec_lru_add_active_anon(struct pagevec *pvec)
 {
-	____pagevec_lru_add(pvec, LRU_ACTIVE);
+	____pagevec_lru_add(pvec, LRU_ACTIVE_ANON);
 }
 
-static inline void pagevec_lru_add(struct pagevec *pvec)
+static inline void __pagevec_lru_add_file(struct pagevec *pvec)
+{
+	____pagevec_lru_add(pvec, LRU_INACTIVE_FILE);
+}
+
+static inline void __pagevec_lru_add_active_file(struct pagevec *pvec)
+{
+	____pagevec_lru_add(pvec, LRU_ACTIVE_FILE);
+}
+
+
+static inline void pagevec_lru_add_file(struct pagevec *pvec)
+{
+	if (pagevec_count(pvec))
+		__pagevec_lru_add_file(pvec);
+}
+
+static inline void pagevec_lru_add_anon(struct pagevec *pvec)
 {
 	if (pagevec_count(pvec))
-		__pagevec_lru_add(pvec);
+		__pagevec_lru_add_anon(pvec);
 }
 
 #endif /* _LINUX_PAGEVEC_H */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 833be56ad835..7d09d79997a4 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -184,14 +184,24 @@ extern void swap_setup(void);
  * lru_cache_add: add a page to the page lists
  * @page: the page to add
  */
-static inline void lru_cache_add(struct page *page)
+static inline void lru_cache_add_anon(struct page *page)
 {
-	__lru_cache_add(page, LRU_INACTIVE);
+	__lru_cache_add(page, LRU_INACTIVE_ANON);
 }
 
-static inline void lru_cache_add_active(struct page *page)
+static inline void lru_cache_add_active_anon(struct page *page)
 {
-	__lru_cache_add(page, LRU_ACTIVE);
+	__lru_cache_add(page, LRU_ACTIVE_ANON);
+}
+
+static inline void lru_cache_add_file(struct page *page)
+{
+	__lru_cache_add(page, LRU_INACTIVE_FILE);
+}
+
+static inline void lru_cache_add_active_file(struct page *page)
+{
+	__lru_cache_add(page, LRU_ACTIVE_FILE);
 }
 
 /* linux/mm/vmscan.c */
@@ -199,7 +209,7 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 					gfp_t gfp_mask);
 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
 							gfp_t gfp_mask);
-extern int __isolate_lru_page(struct page *page, int mode);
+extern int __isolate_lru_page(struct page *page, int mode, int file);
 extern unsigned long shrink_all_memory(unsigned long nr_pages);
 extern int vm_swappiness;
 extern int remove_mapping(struct address_space *mapping, struct page *page);
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 58334d439516..ff5179f2b153 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -159,6 +159,16 @@ static inline unsigned long zone_page_state(struct zone *zone,
 	return x;
 }
 
+extern unsigned long global_lru_pages(void);
+
+static inline unsigned long zone_lru_pages(struct zone *zone)
+{
+	return (zone_page_state(zone, NR_ACTIVE_ANON)
+		+ zone_page_state(zone, NR_ACTIVE_FILE)
+		+ zone_page_state(zone, NR_INACTIVE_ANON)
+		+ zone_page_state(zone, NR_INACTIVE_FILE));
+}
+
 #ifdef CONFIG_NUMA
 /*
  * Determine the per node value of a stat item. This function
diff --git a/mm/filemap.c b/mm/filemap.c
index 903bf316912a..a1ddd2557af2 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -33,6 +33,7 @@
 #include <linux/cpuset.h>
 #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
 #include <linux/memcontrol.h>
+#include <linux/mm_inline.h> /* for page_is_file_cache() */
 #include "internal.h"
 
 /*
@@ -492,9 +493,24 @@ EXPORT_SYMBOL(add_to_page_cache_locked);
 int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
 				pgoff_t offset, gfp_t gfp_mask)
 {
-	int ret = add_to_page_cache(page, mapping, offset, gfp_mask);
-	if (ret == 0)
-		lru_cache_add(page);
+	int ret;
+
+	/*
+	 * Splice_read and readahead add shmem/tmpfs pages into the page cache
+	 * before shmem_readpage has a chance to mark them as SwapBacked: they
+	 * need to go on the active_anon lru below, and mem_cgroup_cache_charge
+	 * (called in add_to_page_cache) needs to know where they're going too.
+	 */
+	if (mapping_cap_swap_backed(mapping))
+		SetPageSwapBacked(page);
+
+	ret = add_to_page_cache(page, mapping, offset, gfp_mask);
+	if (ret == 0) {
+		if (page_is_file_cache(page))
+			lru_cache_add_file(page);
+		else
+			lru_cache_add_active_anon(page);
+	}
 	return ret;
 }
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 38633864a93e..2fc7fddd9b1f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1459,11 +1459,11 @@ int hugetlb_report_meminfo(char *buf)
 {
 	struct hstate *h = &default_hstate;
 	return sprintf(buf,
-			"HugePages_Total: %5lu\n"
-			"HugePages_Free:  %5lu\n"
-			"HugePages_Rsvd:  %5lu\n"
-			"HugePages_Surp:  %5lu\n"
-			"Hugepagesize:    %5lu kB\n",
+			"HugePages_Total:   %5lu\n"
+			"HugePages_Free:    %5lu\n"
+			"HugePages_Rsvd:    %5lu\n"
+			"HugePages_Surp:    %5lu\n"
+			"Hugepagesize:   %8lu kB\n",
 			h->nr_huge_pages,
 			h->free_huge_pages,
 			h->resv_huge_pages,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c0cbd7790c51..27e9e75f4eab 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -162,6 +162,7 @@ struct page_cgroup {
 };
 #define PAGE_CGROUP_FLAG_CACHE	(0x1)	/* charged as cache */
 #define PAGE_CGROUP_FLAG_ACTIVE (0x2)	/* page is active in this cgroup */
+#define PAGE_CGROUP_FLAG_FILE	(0x4)	/* page is file system backed */
 
 static int page_cgroup_nid(struct page_cgroup *pc)
 {
@@ -177,6 +178,7 @@ enum charge_type {
 	MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
 	MEM_CGROUP_CHARGE_TYPE_MAPPED,
 	MEM_CGROUP_CHARGE_TYPE_FORCE,	/* used by force_empty */
+	MEM_CGROUP_CHARGE_TYPE_SHMEM,	/* used by page migration of shmem */
 };
 
 /*
@@ -288,8 +290,12 @@ static void unlock_page_cgroup(struct page *page)
 static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz,
 			struct page_cgroup *pc)
 {
-	int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
-	int lru = !!from;
+	int lru = LRU_BASE;
+
+	if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE)
+		lru += LRU_ACTIVE;
+	if (pc->flags & PAGE_CGROUP_FLAG_FILE)
+		lru += LRU_FILE;
 
 	MEM_CGROUP_ZSTAT(mz, lru) -= 1;
 
@@ -300,10 +306,12 @@ static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz,
 static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz,
 				struct page_cgroup *pc)
 {
-	int lru = LRU_INACTIVE;
+	int lru = LRU_BASE;
 
 	if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE)
 		lru += LRU_ACTIVE;
+	if (pc->flags & PAGE_CGROUP_FLAG_FILE)
+		lru += LRU_FILE;
 
 	MEM_CGROUP_ZSTAT(mz, lru) += 1;
 	list_add(&pc->lru, &mz->lists[lru]);
@@ -314,10 +322,9 @@ static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz,
 static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
 {
 	struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc);
-	int lru = LRU_INACTIVE;
-
-	if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE)
-		lru += LRU_ACTIVE;
+	int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
+	int file = pc->flags & PAGE_CGROUP_FLAG_FILE;
+	int lru = LRU_FILE * !!file + !!from;
 
 	MEM_CGROUP_ZSTAT(mz, lru) -= 1;
 
@@ -326,7 +333,7 @@ static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
 	else
 		pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE;
 
-	lru = !!active;
+	lru = LRU_FILE * !!file + !!active;
 	MEM_CGROUP_ZSTAT(mz, lru) += 1;
 	list_move(&pc->lru, &mz->lists[lru]);
 }
@@ -390,21 +397,6 @@ int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem)
 	return (int)((rss * 100L) / total);
 }
 
-/*
- * This function is called from vmscan.c. In page reclaiming loop. balance
- * between active and inactive list is calculated. For memory controller
- * page reclaiming, we should use using mem_cgroup's imbalance rather than
- * zone's global lru imbalance.
- */
-long mem_cgroup_reclaim_imbalance(struct mem_cgroup *mem)
-{
-	unsigned long active, inactive;
-	/* active and inactive are the number of pages. 'long' is ok.*/
-	active = mem_cgroup_get_all_zonestat(mem, LRU_ACTIVE);
-	inactive = mem_cgroup_get_all_zonestat(mem, LRU_INACTIVE);
-	return (long) (active / (inactive + 1));
-}
-
 /*
  * prev_priority control...this will be used in memory reclaim path.
  */
@@ -450,7 +442,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 					unsigned long *scanned, int order,
 					int mode, struct zone *z,
 					struct mem_cgroup *mem_cont,
-					int active)
+					int active, int file)
 {
 	unsigned long nr_taken = 0;
 	struct page *page;
@@ -461,7 +453,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 	int nid = z->zone_pgdat->node_id;
 	int zid = zone_idx(z);
 	struct mem_cgroup_per_zone *mz;
-	int lru = !!active;
+	int lru = LRU_FILE * !!file + !!active;
 
 	BUG_ON(!mem_cont);
 	mz = mem_cgroup_zoneinfo(mem_cont, nid, zid);
@@ -477,6 +469,9 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 		if (unlikely(!PageLRU(page)))
 			continue;
 
+		/*
+		 * TODO: play better with lumpy reclaim, grabbing anything.
+		 */
 		if (PageActive(page) && !active) {
 			__mem_cgroup_move_lists(pc, true);
 			continue;
@@ -489,7 +484,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 		scan++;
 		list_move(&pc->lru, &pc_list);
 
-		if (__isolate_lru_page(page, mode) == 0) {
+		if (__isolate_lru_page(page, mode, file) == 0) {
 			list_move(&page->lru, dst);
 			nr_taken++;
 		}
@@ -575,10 +570,16 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
 	 * If a page is accounted as a page cache, insert to inactive list.
 	 * If anon, insert to active list.
 	 */
-	if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE)
+	if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) {
 		pc->flags = PAGE_CGROUP_FLAG_CACHE;
-	else
+		if (page_is_file_cache(page))
+			pc->flags |= PAGE_CGROUP_FLAG_FILE;
+		else
+			pc->flags |= PAGE_CGROUP_FLAG_ACTIVE;
+	} else if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
 		pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
+	else /* MEM_CGROUP_CHARGE_TYPE_SHMEM */
+		pc->flags = PAGE_CGROUP_FLAG_CACHE | PAGE_CGROUP_FLAG_ACTIVE;
 
 	lock_page_cgroup(page);
 	if (unlikely(page_get_page_cgroup(page))) {
@@ -737,8 +738,12 @@ int mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
 	if (pc) {
 		mem = pc->mem_cgroup;
 		css_get(&mem->css);
-		if (pc->flags & PAGE_CGROUP_FLAG_CACHE)
-			ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
+		if (pc->flags & PAGE_CGROUP_FLAG_CACHE) {
+			if (page_is_file_cache(page))
+				ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
+			else
+				ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
+		}
 	}
 	unlock_page_cgroup(page);
 	if (mem) {
@@ -982,14 +987,21 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
 	}
 	/* showing # of active pages */
 	{
-		unsigned long active, inactive;
-
-		inactive = mem_cgroup_get_all_zonestat(mem_cont,
-						LRU_INACTIVE);
-		active = mem_cgroup_get_all_zonestat(mem_cont,
-						LRU_ACTIVE);
-		cb->fill(cb, "active", (active) * PAGE_SIZE);
-		cb->fill(cb, "inactive", (inactive) * PAGE_SIZE);
+		unsigned long active_anon, inactive_anon;
+		unsigned long active_file, inactive_file;
+
+		inactive_anon = mem_cgroup_get_all_zonestat(mem_cont,
+						LRU_INACTIVE_ANON);
+		active_anon = mem_cgroup_get_all_zonestat(mem_cont,
+						LRU_ACTIVE_ANON);
+		inactive_file = mem_cgroup_get_all_zonestat(mem_cont,
+						LRU_INACTIVE_FILE);
+		active_file = mem_cgroup_get_all_zonestat(mem_cont,
+						LRU_ACTIVE_FILE);
+		cb->fill(cb, "active_anon", (active_anon) * PAGE_SIZE);
+		cb->fill(cb, "inactive_anon", (inactive_anon) * PAGE_SIZE);
+		cb->fill(cb, "active_file", (active_file) * PAGE_SIZE);
+		cb->fill(cb, "inactive_file", (inactive_file) * PAGE_SIZE);
 	}
 	return 0;
 }
diff --git a/mm/memory.c b/mm/memory.c
index 7512933dcc10..71cdefd1ef14 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1889,7 +1889,7 @@ gotten:
 		set_pte_at(mm, address, page_table, entry);
 		update_mmu_cache(vma, address, entry);
 		SetPageSwapBacked(new_page);
-		lru_cache_add_active(new_page);
+		lru_cache_add_active_anon(new_page);
 		page_add_new_anon_rmap(new_page, vma, address);
 
 		if (old_page) {
@@ -2384,7 +2384,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		goto release;
 	inc_mm_counter(mm, anon_rss);
 	SetPageSwapBacked(page);
-	lru_cache_add_active(page);
+	lru_cache_add_active_anon(page);
 	page_add_new_anon_rmap(page, vma, address);
 	set_pte_at(mm, address, page_table, entry);
 
@@ -2526,7 +2526,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		if (anon) {
                         inc_mm_counter(mm, anon_rss);
 			SetPageSwapBacked(page);
-                        lru_cache_add_active(page);
+                        lru_cache_add_active_anon(page);
                         page_add_new_anon_rmap(page, vma, address);
 		} else {
 			inc_mm_counter(mm, file_rss);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index b40f6d5f8fe9..2970e35fd03f 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -329,9 +329,7 @@ static unsigned long highmem_dirtyable_memory(unsigned long total)
 		struct zone *z =
 			&NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
 
-		x += zone_page_state(z, NR_FREE_PAGES)
-			+ zone_page_state(z, NR_INACTIVE)
-			+ zone_page_state(z, NR_ACTIVE);
+		x += zone_page_state(z, NR_FREE_PAGES) + zone_lru_pages(z);
 	}
 	/*
 	 * Make sure that the number of highmem pages is never larger
@@ -355,9 +353,7 @@ unsigned long determine_dirtyable_memory(void)
 {
 	unsigned long x;
 
-	x = global_page_state(NR_FREE_PAGES)
-		+ global_page_state(NR_INACTIVE)
-		+ global_page_state(NR_ACTIVE);
+	x = global_page_state(NR_FREE_PAGES) + global_lru_pages();
 
 	if (!vm_highmem_is_dirtyable)
 		x -= highmem_dirtyable_memory(x);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2099904d6cc4..740a16a32c22 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1864,10 +1864,13 @@ void show_free_areas(void)
 		}
 	}
 
-	printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu\n"
+	printk("Active_anon:%lu active_file:%lu inactive_anon%lu\n"
+		" inactive_file:%lu dirty:%lu writeback:%lu unstable:%lu\n"
 		" free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n",
-		global_page_state(NR_ACTIVE),
-		global_page_state(NR_INACTIVE),
+		global_page_state(NR_ACTIVE_ANON),
+		global_page_state(NR_ACTIVE_FILE),
+		global_page_state(NR_INACTIVE_ANON),
+		global_page_state(NR_INACTIVE_FILE),
 		global_page_state(NR_FILE_DIRTY),
 		global_page_state(NR_WRITEBACK),
 		global_page_state(NR_UNSTABLE_NFS),
@@ -1890,8 +1893,10 @@ void show_free_areas(void)
 			" min:%lukB"
 			" low:%lukB"
 			" high:%lukB"
-			" active:%lukB"
-			" inactive:%lukB"
+			" active_anon:%lukB"
+			" inactive_anon:%lukB"
+			" active_file:%lukB"
+			" inactive_file:%lukB"
 			" present:%lukB"
 			" pages_scanned:%lu"
 			" all_unreclaimable? %s"
@@ -1901,8 +1906,10 @@ void show_free_areas(void)
 			K(zone->pages_min),
 			K(zone->pages_low),
 			K(zone->pages_high),
-			K(zone_page_state(zone, NR_ACTIVE)),
-			K(zone_page_state(zone, NR_INACTIVE)),
+			K(zone_page_state(zone, NR_ACTIVE_ANON)),
+			K(zone_page_state(zone, NR_INACTIVE_ANON)),
+			K(zone_page_state(zone, NR_ACTIVE_FILE)),
+			K(zone_page_state(zone, NR_INACTIVE_FILE)),
 			K(zone->present_pages),
 			zone->pages_scanned,
 			(zone_is_all_unreclaimable(zone) ? "yes" : "no")
@@ -3472,6 +3479,10 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
 			INIT_LIST_HEAD(&zone->lru[l].list);
 			zone->lru[l].nr_scan = 0;
 		}
+		zone->recent_rotated[0] = 0;
+		zone->recent_rotated[1] = 0;
+		zone->recent_scanned[0] = 0;
+		zone->recent_scanned[1] = 0;
 		zap_zone_vm_stats(zone);
 		zone->flags = 0;
 		if (!size)
diff --git a/mm/readahead.c b/mm/readahead.c
index 6cbd9a72fde2..bec83c15a78f 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -229,7 +229,7 @@ int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
  */
 unsigned long max_sane_readahead(unsigned long nr)
 {
-	return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE)
+	return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE_FILE)
 		+ node_page_state(numa_node_id(), NR_FREE_PAGES)) / 2);
 }
 
diff --git a/mm/shmem.c b/mm/shmem.c
index fd421ed703ed..fc2ccf79a776 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -199,7 +199,7 @@ static struct vm_operations_struct shmem_vm_ops;
 
 static struct backing_dev_info shmem_backing_dev_info  __read_mostly = {
 	.ra_pages	= 0,	/* No readahead */
-	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK,
+	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
 	.unplug_io_fn	= default_unplug_io_fn,
 };
 
diff --git a/mm/swap.c b/mm/swap.c
index 88a394872677..0b1974a08974 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -116,7 +116,8 @@ static void pagevec_move_tail(struct pagevec *pvec)
 			spin_lock(&zone->lru_lock);
 		}
 		if (PageLRU(page) && !PageActive(page)) {
-			list_move_tail(&page->lru, &zone->lru[LRU_INACTIVE].list);
+			int lru = page_is_file_cache(page);
+			list_move_tail(&page->lru, &zone->lru[lru].list);
 			pgmoved++;
 		}
 	}
@@ -157,11 +158,18 @@ void activate_page(struct page *page)
 
 	spin_lock_irq(&zone->lru_lock);
 	if (PageLRU(page) && !PageActive(page)) {
-		del_page_from_inactive_list(zone, page);
+		int file = page_is_file_cache(page);
+		int lru = LRU_BASE + file;
+		del_page_from_lru_list(zone, page, lru);
+
 		SetPageActive(page);
-		add_page_to_active_list(zone, page);
+		lru += LRU_ACTIVE;
+		add_page_to_lru_list(zone, page, lru);
 		__count_vm_event(PGACTIVATE);
 		mem_cgroup_move_lists(page, true);
+
+		zone->recent_rotated[!!file]++;
+		zone->recent_scanned[!!file]++;
 	}
 	spin_unlock_irq(&zone->lru_lock);
 }
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 7a3ece0b5a3b..ea62084ed402 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -33,7 +33,7 @@ static const struct address_space_operations swap_aops = {
 };
 
 static struct backing_dev_info swap_backing_dev_info = {
-	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK,
+	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
 	.unplug_io_fn	= swap_unplug_io_fn,
 };
 
@@ -310,7 +310,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 			/*
 			 * Initiate read into locked page and return.
 			 */
-			lru_cache_add_active(new_page);
+			lru_cache_add_active_anon(new_page);
 			swap_readpage(NULL, new_page);
 			return new_page;
 		}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e656035d3406..d10d2f9a33f3 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -78,7 +78,7 @@ struct scan_control {
 	unsigned long (*isolate_pages)(unsigned long nr, struct list_head *dst,
 			unsigned long *scanned, int order, int mode,
 			struct zone *z, struct mem_cgroup *mem_cont,
-			int active);
+			int active, int file);
 };
 
 #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
@@ -680,7 +680,7 @@ keep:
  *
  * returns 0 on success, -ve errno on failure.
  */
-int __isolate_lru_page(struct page *page, int mode)
+int __isolate_lru_page(struct page *page, int mode, int file)
 {
 	int ret = -EINVAL;
 
@@ -696,6 +696,9 @@ int __isolate_lru_page(struct page *page, int mode)
 	if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode))
 		return ret;
 
+	if (mode != ISOLATE_BOTH && (!page_is_file_cache(page) != !file))
+		return ret;
+
 	ret = -EBUSY;
 	if (likely(get_page_unless_zero(page))) {
 		/*
@@ -726,12 +729,13 @@ int __isolate_lru_page(struct page *page, int mode)
  * @scanned:	The number of pages that were scanned.
  * @order:	The caller's attempted allocation order
  * @mode:	One of the LRU isolation modes
+ * @file:	True [1] if isolating file [!anon] pages
  *
  * returns how many pages were moved onto *@dst.
  */
 static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 		struct list_head *src, struct list_head *dst,
-		unsigned long *scanned, int order, int mode)
+		unsigned long *scanned, int order, int mode, int file)
 {
 	unsigned long nr_taken = 0;
 	unsigned long scan;
@@ -748,7 +752,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 
 		VM_BUG_ON(!PageLRU(page));
 
-		switch (__isolate_lru_page(page, mode)) {
+		switch (__isolate_lru_page(page, mode, file)) {
 		case 0:
 			list_move(&page->lru, dst);
 			nr_taken++;
@@ -791,10 +795,11 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 				break;
 
 			cursor_page = pfn_to_page(pfn);
+
 			/* Check that we have not crossed a zone boundary. */
 			if (unlikely(page_zone_id(cursor_page) != zone_id))
 				continue;
-			switch (__isolate_lru_page(cursor_page, mode)) {
+			switch (__isolate_lru_page(cursor_page, mode, file)) {
 			case 0:
 				list_move(&cursor_page->lru, dst);
 				nr_taken++;
@@ -819,30 +824,37 @@ static unsigned long isolate_pages_global(unsigned long nr,
 					unsigned long *scanned, int order,
 					int mode, struct zone *z,
 					struct mem_cgroup *mem_cont,
-					int active)
+					int active, int file)
 {
+	int lru = LRU_BASE;
 	if (active)
-		return isolate_lru_pages(nr, &z->lru[LRU_ACTIVE].list, dst,
-						scanned, order, mode);
-	else
-		return isolate_lru_pages(nr, &z->lru[LRU_INACTIVE].list, dst,
-						scanned, order, mode);
+		lru += LRU_ACTIVE;
+	if (file)
+		lru += LRU_FILE;
+	return isolate_lru_pages(nr, &z->lru[lru].list, dst, scanned, order,
+								mode, !!file);
 }
 
 /*
  * clear_active_flags() is a helper for shrink_active_list(), clearing
  * any active bits from the pages in the list.
  */
-static unsigned long clear_active_flags(struct list_head *page_list)
+static unsigned long clear_active_flags(struct list_head *page_list,
+					unsigned int *count)
 {
 	int nr_active = 0;
+	int lru;
 	struct page *page;
 
-	list_for_each_entry(page, page_list, lru)
+	list_for_each_entry(page, page_list, lru) {
+		lru = page_is_file_cache(page);
 		if (PageActive(page)) {
+			lru += LRU_ACTIVE;
 			ClearPageActive(page);
 			nr_active++;
 		}
+		count[lru]++;
+	}
 
 	return nr_active;
 }
@@ -880,12 +892,12 @@ int isolate_lru_page(struct page *page)
 
 		spin_lock_irq(&zone->lru_lock);
 		if (PageLRU(page) && get_page_unless_zero(page)) {
+			int lru = LRU_BASE;
 			ret = 0;
 			ClearPageLRU(page);
-			if (PageActive(page))
-				del_page_from_active_list(zone, page);
-			else
-				del_page_from_inactive_list(zone, page);
+
+			lru += page_is_file_cache(page) + !!PageActive(page);
+			del_page_from_lru_list(zone, page, lru);
 		}
 		spin_unlock_irq(&zone->lru_lock);
 	}
@@ -897,7 +909,7 @@ int isolate_lru_page(struct page *page)
  * of reclaimed pages
  */
 static unsigned long shrink_inactive_list(unsigned long max_scan,
-				struct zone *zone, struct scan_control *sc)
+			struct zone *zone, struct scan_control *sc, int file)
 {
 	LIST_HEAD(page_list);
 	struct pagevec pvec;
@@ -914,20 +926,32 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
 		unsigned long nr_scan;
 		unsigned long nr_freed;
 		unsigned long nr_active;
+		unsigned int count[NR_LRU_LISTS] = { 0, };
+		int mode = (sc->order > PAGE_ALLOC_COSTLY_ORDER) ?
+					ISOLATE_BOTH : ISOLATE_INACTIVE;
 
 		nr_taken = sc->isolate_pages(sc->swap_cluster_max,
-			     &page_list, &nr_scan, sc->order,
-			     (sc->order > PAGE_ALLOC_COSTLY_ORDER)?
-					     ISOLATE_BOTH : ISOLATE_INACTIVE,
-				zone, sc->mem_cgroup, 0);
-		nr_active = clear_active_flags(&page_list);
+			     &page_list, &nr_scan, sc->order, mode,
+				zone, sc->mem_cgroup, 0, file);
+		nr_active = clear_active_flags(&page_list, count);
 		__count_vm_events(PGDEACTIVATE, nr_active);
 
-		__mod_zone_page_state(zone, NR_ACTIVE, -nr_active);
-		__mod_zone_page_state(zone, NR_INACTIVE,
-						-(nr_taken - nr_active));
-		if (scan_global_lru(sc))
+		__mod_zone_page_state(zone, NR_ACTIVE_FILE,
+						-count[LRU_ACTIVE_FILE]);
+		__mod_zone_page_state(zone, NR_INACTIVE_FILE,
+						-count[LRU_INACTIVE_FILE]);
+		__mod_zone_page_state(zone, NR_ACTIVE_ANON,
+						-count[LRU_ACTIVE_ANON]);
+		__mod_zone_page_state(zone, NR_INACTIVE_ANON,
+						-count[LRU_INACTIVE_ANON]);
+
+		if (scan_global_lru(sc)) {
 			zone->pages_scanned += nr_scan;
+			zone->recent_scanned[0] += count[LRU_INACTIVE_ANON];
+			zone->recent_scanned[0] += count[LRU_ACTIVE_ANON];
+			zone->recent_scanned[1] += count[LRU_INACTIVE_FILE];
+			zone->recent_scanned[1] += count[LRU_ACTIVE_FILE];
+		}
 		spin_unlock_irq(&zone->lru_lock);
 
 		nr_scanned += nr_scan;
@@ -947,7 +971,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
 			 * The attempt at page out may have made some
 			 * of the pages active, mark them inactive again.
 			 */
-			nr_active = clear_active_flags(&page_list);
+			nr_active = clear_active_flags(&page_list, count);
 			count_vm_events(PGDEACTIVATE, nr_active);
 
 			nr_freed += shrink_page_list(&page_list, sc,
@@ -977,6 +1001,10 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
 			SetPageLRU(page);
 			list_del(&page->lru);
 			add_page_to_lru_list(zone, page, page_lru(page));
+			if (PageActive(page) && scan_global_lru(sc)) {
+				int file = !!page_is_file_cache(page);
+				zone->recent_rotated[file]++;
+			}
 			if (!pagevec_add(&pvec, page)) {
 				spin_unlock_irq(&zone->lru_lock);
 				__pagevec_release(&pvec);
@@ -1007,115 +1035,7 @@ static inline void note_zone_scanning_priority(struct zone *zone, int priority)
 
 static inline int zone_is_near_oom(struct zone *zone)
 {
-	return zone->pages_scanned >= (zone_page_state(zone, NR_ACTIVE)
-				+ zone_page_state(zone, NR_INACTIVE))*3;
-}
-
-/*
- * Determine we should try to reclaim mapped pages.
- * This is called only when sc->mem_cgroup is NULL.
- */
-static int calc_reclaim_mapped(struct scan_control *sc, struct zone *zone,
-				int priority)
-{
-	long mapped_ratio;
-	long distress;
-	long swap_tendency;
-	long imbalance;
-	int reclaim_mapped = 0;
-	int prev_priority;
-
-	if (scan_global_lru(sc) && zone_is_near_oom(zone))
-		return 1;
-	/*
-	 * `distress' is a measure of how much trouble we're having
-	 * reclaiming pages.  0 -> no problems.  100 -> great trouble.
-	 */
-	if (scan_global_lru(sc))
-		prev_priority = zone->prev_priority;
-	else
-		prev_priority = mem_cgroup_get_reclaim_priority(sc->mem_cgroup);
-
-	distress = 100 >> min(prev_priority, priority);
-
-	/*
-	 * The point of this algorithm is to decide when to start
-	 * reclaiming mapped memory instead of just pagecache.  Work out
-	 * how much memory
-	 * is mapped.
-	 */
-	if (scan_global_lru(sc))
-		mapped_ratio = ((global_page_state(NR_FILE_MAPPED) +
-				global_page_state(NR_ANON_PAGES)) * 100) /
-					vm_total_pages;
-	else
-		mapped_ratio = mem_cgroup_calc_mapped_ratio(sc->mem_cgroup);
-
-	/*
-	 * Now decide how much we really want to unmap some pages.  The
-	 * mapped ratio is downgraded - just because there's a lot of
-	 * mapped memory doesn't necessarily mean that page reclaim
-	 * isn't succeeding.
-	 *
-	 * The distress ratio is important - we don't want to start
-	 * going oom.
-	 *
-	 * A 100% value of vm_swappiness overrides this algorithm
-	 * altogether.
-	 */
-	swap_tendency = mapped_ratio / 2 + distress + sc->swappiness;
-
-	/*
-	 * If there's huge imbalance between active and inactive
-	 * (think active 100 times larger than inactive) we should
-	 * become more permissive, or the system will take too much
-	 * cpu before it start swapping during memory pressure.
-	 * Distress is about avoiding early-oom, this is about
-	 * making swappiness graceful despite setting it to low
-	 * values.
-	 *
-	 * Avoid div by zero with nr_inactive+1, and max resulting
-	 * value is vm_total_pages.
-	 */
-	if (scan_global_lru(sc)) {
-		imbalance  = zone_page_state(zone, NR_ACTIVE);
-		imbalance /= zone_page_state(zone, NR_INACTIVE) + 1;
-	} else
-		imbalance = mem_cgroup_reclaim_imbalance(sc->mem_cgroup);
-
-	/*
-	 * Reduce the effect of imbalance if swappiness is low,
-	 * this means for a swappiness very low, the imbalance
-	 * must be much higher than 100 for this logic to make
-	 * the difference.
-	 *
-	 * Max temporary value is vm_total_pages*100.
-	 */
-	imbalance *= (vm_swappiness + 1);
-	imbalance /= 100;
-
-	/*
-	 * If not much of the ram is mapped, makes the imbalance
-	 * less relevant, it's high priority we refill the inactive
-	 * list with mapped pages only in presence of high ratio of
-	 * mapped pages.
-	 *
-	 * Max temporary value is vm_total_pages*100.
-	 */
-	imbalance *= mapped_ratio;
-	imbalance /= 100;
-
-	/* apply imbalance feedback to swap_tendency */
-	swap_tendency += imbalance;
-
-	/*
-	 * Now use this metric to decide whether to start moving mapped
-	 * memory onto the inactive list.
-	 */
-	if (swap_tendency >= 100)
-		reclaim_mapped = 1;
-
-	return reclaim_mapped;
+	return zone->pages_scanned >= (zone_lru_pages(zone) * 3);
 }
 
 /*
@@ -1138,7 +1058,7 @@ static int calc_reclaim_mapped(struct scan_control *sc, struct zone *zone,
 
 
 static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
-				struct scan_control *sc, int priority)
+			struct scan_control *sc, int priority, int file)
 {
 	unsigned long pgmoved;
 	int pgdeactivate = 0;
@@ -1148,43 +1068,42 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 	LIST_HEAD(l_inactive);
 	struct page *page;
 	struct pagevec pvec;
-	int reclaim_mapped = 0;
-
-	if (sc->may_swap)
-		reclaim_mapped = calc_reclaim_mapped(sc, zone, priority);
+	enum lru_list lru;
 
 	lru_add_drain();
 	spin_lock_irq(&zone->lru_lock);
 	pgmoved = sc->isolate_pages(nr_pages, &l_hold, &pgscanned, sc->order,
 					ISOLATE_ACTIVE, zone,
-					sc->mem_cgroup, 1);
+					sc->mem_cgroup, 1, file);
 	/*
 	 * zone->pages_scanned is used for detect zone's oom
 	 * mem_cgroup remembers nr_scan by itself.
 	 */
-	if (scan_global_lru(sc))
+	if (scan_global_lru(sc)) {
 		zone->pages_scanned += pgscanned;
+		zone->recent_scanned[!!file] += pgmoved;
+	}
 
-	__mod_zone_page_state(zone, NR_ACTIVE, -pgmoved);
+	if (file)
+		__mod_zone_page_state(zone, NR_ACTIVE_FILE, -pgmoved);
+	else
+		__mod_zone_page_state(zone, NR_ACTIVE_ANON, -pgmoved);
 	spin_unlock_irq(&zone->lru_lock);
 
 	while (!list_empty(&l_hold)) {
 		cond_resched();
 		page = lru_to_page(&l_hold);
 		list_del(&page->lru);
-		if (page_mapped(page)) {
-			if (!reclaim_mapped ||
-			    (total_swap_pages == 0 && PageAnon(page)) ||
-			    page_referenced(page, 0, sc->mem_cgroup)) {
-				list_add(&page->lru, &l_active);
-				continue;
-			}
-		}
 		list_add(&page->lru, &l_inactive);
 	}
 
+	/*
+	 * Now put the pages back on the appropriate [file or anon] inactive
+	 * and active lists.
+	 */
 	pagevec_init(&pvec, 1);
 	pgmoved = 0;
+	lru = LRU_BASE + file * LRU_FILE;
 	spin_lock_irq(&zone->lru_lock);
 	while (!list_empty(&l_inactive)) {
 		page = lru_to_page(&l_inactive);
@@ -1194,11 +1113,11 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 		VM_BUG_ON(!PageActive(page));
 		ClearPageActive(page);
 
-		list_move(&page->lru, &zone->lru[LRU_INACTIVE].list);
+		list_move(&page->lru, &zone->lru[lru].list);
 		mem_cgroup_move_lists(page, false);
 		pgmoved++;
 		if (!pagevec_add(&pvec, page)) {
-			__mod_zone_page_state(zone, NR_INACTIVE, pgmoved);
+			__mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
 			spin_unlock_irq(&zone->lru_lock);
 			pgdeactivate += pgmoved;
 			pgmoved = 0;
@@ -1208,7 +1127,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 			spin_lock_irq(&zone->lru_lock);
 		}
 	}
-	__mod_zone_page_state(zone, NR_INACTIVE, pgmoved);
+	__mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
 	pgdeactivate += pgmoved;
 	if (buffer_heads_over_limit) {
 		spin_unlock_irq(&zone->lru_lock);
@@ -1217,6 +1136,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 	}
 
 	pgmoved = 0;
+	lru = LRU_ACTIVE + file * LRU_FILE;
 	while (!list_empty(&l_active)) {
 		page = lru_to_page(&l_active);
 		prefetchw_prev_lru_page(page, &l_active, flags);
@@ -1224,11 +1144,11 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 		SetPageLRU(page);
 		VM_BUG_ON(!PageActive(page));
 
-		list_move(&page->lru, &zone->lru[LRU_ACTIVE].list);
+		list_move(&page->lru, &zone->lru[lru].list);
 		mem_cgroup_move_lists(page, true);
 		pgmoved++;
 		if (!pagevec_add(&pvec, page)) {
-			__mod_zone_page_state(zone, NR_ACTIVE, pgmoved);
+			__mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
 			pgmoved = 0;
 			spin_unlock_irq(&zone->lru_lock);
 			if (vm_swap_full())
@@ -1237,7 +1157,8 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 			spin_lock_irq(&zone->lru_lock);
 		}
 	}
-	__mod_zone_page_state(zone, NR_ACTIVE, pgmoved);
+	__mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
+	zone->recent_rotated[!!file] += pgmoved;
 
 	__count_zone_vm_events(PGREFILL, zone, pgscanned);
 	__count_vm_events(PGDEACTIVATE, pgdeactivate);
@@ -1248,16 +1169,103 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 	pagevec_release(&pvec);
 }
 
-static unsigned long shrink_list(enum lru_list l, unsigned long nr_to_scan,
+static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
 	struct zone *zone, struct scan_control *sc, int priority)
 {
-	if (l == LRU_ACTIVE) {
-		shrink_active_list(nr_to_scan, zone, sc, priority);
+	int file = is_file_lru(lru);
+
+	if (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE) {
+		shrink_active_list(nr_to_scan, zone, sc, priority, file);
 		return 0;
 	}
-	return shrink_inactive_list(nr_to_scan, zone, sc);
+	return shrink_inactive_list(nr_to_scan, zone, sc, file);
+}
+
+/*
+ * Determine how aggressively the anon and file LRU lists should be
+ * scanned.  The relative value of each set of LRU lists is determined
+ * by looking at the fraction of the pages scanned we did rotate back
+ * onto the active list instead of evict.
+ *
+ * percent[0] specifies how much pressure to put on ram/swap backed
+ * memory, while percent[1] determines pressure on the file LRUs.
+ */
+static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
+					unsigned long *percent)
+{
+	unsigned long anon, file, free;
+	unsigned long anon_prio, file_prio;
+	unsigned long ap, fp;
+
+	anon  = zone_page_state(zone, NR_ACTIVE_ANON) +
+		zone_page_state(zone, NR_INACTIVE_ANON);
+	file  = zone_page_state(zone, NR_ACTIVE_FILE) +
+		zone_page_state(zone, NR_INACTIVE_FILE);
+	free  = zone_page_state(zone, NR_FREE_PAGES);
+
+	/* If we have no swap space, do not bother scanning anon pages. */
+	if (nr_swap_pages <= 0) {
+		percent[0] = 0;
+		percent[1] = 100;
+		return;
+	}
+
+	/* If we have very few page cache pages, force-scan anon pages. */
+	if (unlikely(file + free <= zone->pages_high)) {
+		percent[0] = 100;
+		percent[1] = 0;
+		return;
+	}
+
+	/*
+	 * OK, so we have swap space and a fair amount of page cache
+	 * pages.  We use the recently rotated / recently scanned
+	 * ratios to determine how valuable each cache is.
+	 *
+	 * Because workloads change over time (and to avoid overflow)
+	 * we keep these statistics as a floating average, which ends
+	 * up weighing recent references more than old ones.
+	 *
+	 * anon in [0], file in [1]
+	 */
+	if (unlikely(zone->recent_scanned[0] > anon / 4)) {
+		spin_lock_irq(&zone->lru_lock);
+		zone->recent_scanned[0] /= 2;
+		zone->recent_rotated[0] /= 2;
+		spin_unlock_irq(&zone->lru_lock);
+	}
+
+	if (unlikely(zone->recent_scanned[1] > file / 4)) {
+		spin_lock_irq(&zone->lru_lock);
+		zone->recent_scanned[1] /= 2;
+		zone->recent_rotated[1] /= 2;
+		spin_unlock_irq(&zone->lru_lock);
+	}
+
+	/*
+	 * With swappiness at 100, anonymous and file have the same priority.
+	 * This scanning priority is essentially the inverse of IO cost.
+	 */
+	anon_prio = sc->swappiness;
+	file_prio = 200 - sc->swappiness;
+
+	/*
+	 *                  anon       recent_rotated[0]
+	 * %anon = 100 * ----------- / ----------------- * IO cost
+	 *               anon + file      rotate_sum
+	 */
+	ap = (anon_prio + 1) * (zone->recent_scanned[0] + 1);
+	ap /= zone->recent_rotated[0] + 1;
+
+	fp = (file_prio + 1) * (zone->recent_scanned[1] + 1);
+	fp /= zone->recent_rotated[1] + 1;
+
+	/* Normalize to percentages */
+	percent[0] = 100 * ap / (ap + fp + 1);
+	percent[1] = 100 - percent[0];
 }
 
+
 /*
  * This is a basic per-zone page freer.  Used by both kswapd and direct reclaim.
  */
@@ -1267,36 +1275,43 @@ static unsigned long shrink_zone(int priority, struct zone *zone,
 	unsigned long nr[NR_LRU_LISTS];
 	unsigned long nr_to_scan;
 	unsigned long nr_reclaimed = 0;
+	unsigned long percent[2];	/* anon @ 0; file @ 1 */
 	enum lru_list l;
 
-	if (scan_global_lru(sc)) {
-		/*
-		 * Add one to nr_to_scan just to make sure that the kernel
-		 * will slowly sift through the active list.
-		 */
-		for_each_lru(l) {
-			zone->lru[l].nr_scan += (zone_page_state(zone,
-					NR_LRU_BASE + l)  >> priority) + 1;
+	get_scan_ratio(zone, sc, percent);
+
+	for_each_lru(l) {
+		if (scan_global_lru(sc)) {
+			int file = is_file_lru(l);
+			int scan;
+			/*
+			 * Add one to nr_to_scan just to make sure that the
+			 * kernel will slowly sift through each list.
+			 */
+			scan = zone_page_state(zone, NR_LRU_BASE + l);
+			if (priority) {
+				scan >>= priority;
+				scan = (scan * percent[file]) / 100;
+			}
+			zone->lru[l].nr_scan += scan + 1;
 			nr[l] = zone->lru[l].nr_scan;
 			if (nr[l] >= sc->swap_cluster_max)
 				zone->lru[l].nr_scan = 0;
 			else
 				nr[l] = 0;
+		} else {
+			/*
+			 * This reclaim occurs not because zone memory shortage
+			 * but because memory controller hits its limit.
+			 * Don't modify zone reclaim related data.
+			 */
+			nr[l] = mem_cgroup_calc_reclaim(sc->mem_cgroup, zone,
+								priority, l);
 		}
-	} else {
-		/*
-		 * This reclaim occurs not because zone memory shortage but
-		 * because memory controller hits its limit.
-		 * Then, don't modify zone reclaim related data.
-		 */
-		nr[LRU_ACTIVE] = mem_cgroup_calc_reclaim(sc->mem_cgroup,
-					zone, priority, LRU_ACTIVE);
-
-		nr[LRU_INACTIVE] = mem_cgroup_calc_reclaim(sc->mem_cgroup,
-					zone, priority, LRU_INACTIVE);
 	}
 
-	while (nr[LRU_ACTIVE] || nr[LRU_INACTIVE]) {
+	while (nr[LRU_ACTIVE_ANON] || nr[LRU_INACTIVE_ANON] ||
+			nr[LRU_ACTIVE_FILE] || nr[LRU_INACTIVE_FILE]) {
 		for_each_lru(l) {
 			if (nr[l]) {
 				nr_to_scan = min(nr[l],
@@ -1369,7 +1384,7 @@ static unsigned long shrink_zones(int priority, struct zonelist *zonelist,
 
 	return nr_reclaimed;
 }
- 
+
 /*
  * This is the main entry point to direct page reclaim.
  *
@@ -1412,8 +1427,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 			if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
 				continue;
 
-			lru_pages += zone_page_state(zone, NR_ACTIVE)
-					+ zone_page_state(zone, NR_INACTIVE);
+			lru_pages += zone_lru_pages(zone);
 		}
 	}
 
@@ -1615,8 +1629,7 @@ loop_again:
 		for (i = 0; i <= end_zone; i++) {
 			struct zone *zone = pgdat->node_zones + i;
 
-			lru_pages += zone_page_state(zone, NR_ACTIVE)
-					+ zone_page_state(zone, NR_INACTIVE);
+			lru_pages += zone_lru_pages(zone);
 		}
 
 		/*
@@ -1660,8 +1673,7 @@ loop_again:
 			if (zone_is_all_unreclaimable(zone))
 				continue;
 			if (nr_slab == 0 && zone->pages_scanned >=
-				(zone_page_state(zone, NR_ACTIVE)
-				+ zone_page_state(zone, NR_INACTIVE)) * 6)
+						(zone_lru_pages(zone) * 6))
 					zone_set_flag(zone,
 						      ZONE_ALL_UNRECLAIMABLE);
 			/*
@@ -1715,7 +1727,7 @@ out:
 
 /*
  * The background pageout daemon, started as a kernel thread
- * from the init process. 
+ * from the init process.
  *
  * This basically trickles out pages so that we have _some_
  * free memory available even if there is no other activity
@@ -1809,6 +1821,14 @@ void wakeup_kswapd(struct zone *zone, int order)
 	wake_up_interruptible(&pgdat->kswapd_wait);
 }
 
+unsigned long global_lru_pages(void)
+{
+	return global_page_state(NR_ACTIVE_ANON)
+		+ global_page_state(NR_ACTIVE_FILE)
+		+ global_page_state(NR_INACTIVE_ANON)
+		+ global_page_state(NR_INACTIVE_FILE);
+}
+
 #ifdef CONFIG_PM
 /*
  * Helper function for shrink_all_memory().  Tries to reclaim 'nr_pages' pages
@@ -1834,7 +1854,8 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
 
 		for_each_lru(l) {
 			/* For pass = 0 we don't shrink the active list */
-			if (pass == 0 && l == LRU_ACTIVE)
+			if (pass == 0 &&
+				(l == LRU_ACTIVE || l == LRU_ACTIVE_FILE))
 				continue;
 
 			zone->lru[l].nr_scan +=
@@ -1856,11 +1877,6 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
 	return ret;
 }
 
-static unsigned long count_lru_pages(void)
-{
-	return global_page_state(NR_ACTIVE) + global_page_state(NR_INACTIVE);
-}
-
 /*
  * Try to free `nr_pages' of memory, system-wide, and return the number of
  * freed pages.
@@ -1886,7 +1902,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
 
 	current->reclaim_state = &reclaim_state;
 
-	lru_pages = count_lru_pages();
+	lru_pages = global_lru_pages();
 	nr_slab = global_page_state(NR_SLAB_RECLAIMABLE);
 	/* If slab caches are huge, it's better to hit them first */
 	while (nr_slab >= lru_pages) {
@@ -1929,7 +1945,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
 
 			reclaim_state.reclaimed_slab = 0;
 			shrink_slab(sc.nr_scanned, sc.gfp_mask,
-					count_lru_pages());
+					global_lru_pages());
 			ret += reclaim_state.reclaimed_slab;
 			if (ret >= nr_pages)
 				goto out;
@@ -1946,7 +1962,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
 	if (!ret) {
 		do {
 			reclaim_state.reclaimed_slab = 0;
-			shrink_slab(nr_pages, sc.gfp_mask, count_lru_pages());
+			shrink_slab(nr_pages, sc.gfp_mask, global_lru_pages());
 			ret += reclaim_state.reclaimed_slab;
 		} while (ret < nr_pages && reclaim_state.reclaimed_slab > 0);
 	}
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 52c0335c1b71..27400b7da7c4 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -619,8 +619,10 @@ const struct seq_operations pagetypeinfo_op = {
 static const char * const vmstat_text[] = {
 	/* Zoned VM counters */
 	"nr_free_pages",
-	"nr_inactive",
-	"nr_active",
+	"nr_inactive_anon",
+	"nr_active_anon",
+	"nr_inactive_file",
+	"nr_active_file",
 	"nr_anon_pages",
 	"nr_mapped",
 	"nr_file_pages",
@@ -688,7 +690,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 		   "\n        min      %lu"
 		   "\n        low      %lu"
 		   "\n        high     %lu"
-		   "\n        scanned  %lu (a: %lu i: %lu)"
+		   "\n        scanned  %lu (aa: %lu ia: %lu af: %lu if: %lu)"
 		   "\n        spanned  %lu"
 		   "\n        present  %lu",
 		   zone_page_state(zone, NR_FREE_PAGES),
@@ -696,8 +698,10 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 		   zone->pages_low,
 		   zone->pages_high,
 		   zone->pages_scanned,
-		   zone->lru[LRU_ACTIVE].nr_scan,
-		   zone->lru[LRU_INACTIVE].nr_scan,
+		   zone->lru[LRU_ACTIVE_ANON].nr_scan,
+		   zone->lru[LRU_INACTIVE_ANON].nr_scan,
+		   zone->lru[LRU_ACTIVE_FILE].nr_scan,
+		   zone->lru[LRU_INACTIVE_FILE].nr_scan,
 		   zone->spanned_pages,
 		   zone->present_pages);
 
-- 
GitLab


From 556adecba110bf5f1db6c6b56416cfab5bcab698 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Sat, 18 Oct 2008 20:26:34 -0700
Subject: [PATCH 695/895] vmscan: second chance replacement for anonymous pages

We avoid evicting and scanning anonymous pages for the most part, but
under some workloads we can end up with most of memory filled with
anonymous pages.  At that point, we suddenly need to clear the referenced
bits on all of memory, which can take ages on very large memory systems.

We can reduce the maximum number of pages that need to be scanned by not
taking the referenced state into account when deactivating an anonymous
page.  After all, every anonymous page starts out referenced, so why
check?

If an anonymous page gets referenced again before it reaches the end of
the inactive list, we move it back to the active list.

To keep the maximum amount of necessary work reasonable, we scale the
active to inactive ratio with the size of memory, using the formula
active:inactive ratio = sqrt(memory in GB * 10).

Kswapd CPU use now seems to scale by the amount of pageout bandwidth,
instead of by the amount of memory present in the system.

[kamezawa.hiroyu@jp.fujitsu.com: fix OOM with memcg]
[kamezawa.hiroyu@jp.fujitsu.com: memcg: lru scan fix]
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_inline.h | 19 ++++++++++++++++++
 include/linux/mmzone.h    |  6 ++++++
 mm/page_alloc.c           | 41 +++++++++++++++++++++++++++++++++++++++
 mm/vmscan.c               | 38 ++++++++++++++++++++++++++++++++----
 mm/vmstat.c               |  6 ++++--
 5 files changed, 104 insertions(+), 6 deletions(-)

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 2eb599465d56..f451fedd1e75 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -117,4 +117,23 @@ static inline enum lru_list page_lru(struct page *page)
 	return lru;
 }
 
+/**
+ * inactive_anon_is_low - check if anonymous pages need to be deactivated
+ * @zone: zone to check
+ *
+ * Returns true if the zone does not have enough inactive anon pages,
+ * meaning some active anon pages need to be deactivated.
+ */
+static inline int inactive_anon_is_low(struct zone *zone)
+{
+	unsigned long active, inactive;
+
+	active = zone_page_state(zone, NR_ACTIVE_ANON);
+	inactive = zone_page_state(zone, NR_INACTIVE_ANON);
+
+	if (inactive * zone->inactive_ratio < active)
+		return 1;
+
+	return 0;
+}
 #endif
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 59a4c8fd6ebd..9c5111f49a32 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -323,6 +323,12 @@ struct zone {
 	 */
 	int prev_priority;
 
+	/*
+	 * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
+	 * this zone's LRU.  Maintained by the pageout code.
+	 */
+	unsigned int inactive_ratio;
+
 
 	ZONE_PADDING(_pad2_)
 	/* Rarely used or read-mostly fields */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 740a16a32c22..79c0981b1d32 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4263,6 +4263,46 @@ void setup_per_zone_pages_min(void)
 	calculate_totalreserve_pages();
 }
 
+/**
+ * setup_per_zone_inactive_ratio - called when min_free_kbytes changes.
+ *
+ * The inactive anon list should be small enough that the VM never has to
+ * do too much work, but large enough that each inactive page has a chance
+ * to be referenced again before it is swapped out.
+ *
+ * The inactive_anon ratio is the target ratio of ACTIVE_ANON to
+ * INACTIVE_ANON pages on this zone's LRU, maintained by the
+ * pageout code. A zone->inactive_ratio of 3 means 3:1 or 25% of
+ * the anonymous pages are kept on the inactive list.
+ *
+ * total     target    max
+ * memory    ratio     inactive anon
+ * -------------------------------------
+ *   10MB       1         5MB
+ *  100MB       1        50MB
+ *    1GB       3       250MB
+ *   10GB      10       0.9GB
+ *  100GB      31         3GB
+ *    1TB     101        10GB
+ *   10TB     320        32GB
+ */
+void setup_per_zone_inactive_ratio(void)
+{
+	struct zone *zone;
+
+	for_each_zone(zone) {
+		unsigned int gb, ratio;
+
+		/* Zone size in gigabytes */
+		gb = zone->present_pages >> (30 - PAGE_SHIFT);
+		ratio = int_sqrt(10 * gb);
+		if (!ratio)
+			ratio = 1;
+
+		zone->inactive_ratio = ratio;
+	}
+}
+
 /*
  * Initialise min_free_kbytes.
  *
@@ -4300,6 +4340,7 @@ static int __init init_per_zone_pages_min(void)
 		min_free_kbytes = 65536;
 	setup_per_zone_pages_min();
 	setup_per_zone_lowmem_reserve();
+	setup_per_zone_inactive_ratio();
 	return 0;
 }
 module_init(init_per_zone_pages_min)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d10d2f9a33f3..c82ee9a33cfc 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1090,6 +1090,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 		__mod_zone_page_state(zone, NR_ACTIVE_ANON, -pgmoved);
 	spin_unlock_irq(&zone->lru_lock);
 
+	pgmoved = 0;
 	while (!list_empty(&l_hold)) {
 		cond_resched();
 		page = lru_to_page(&l_hold);
@@ -1097,6 +1098,13 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 		list_add(&page->lru, &l_inactive);
 	}
 
+	/*
+	 * Count the referenced pages as rotated, even when they are moved
+	 * to the inactive list.  This helps balance scan pressure between
+	 * file and anonymous pages in get_scan_ratio.
+ 	 */
+	zone->recent_rotated[!!file] += pgmoved;
+
 	/*
 	 * Now put the pages back on the appropriate [file or anon] inactive
 	 * and active lists.
@@ -1158,7 +1166,6 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 		}
 	}
 	__mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
-	zone->recent_rotated[!!file] += pgmoved;
 
 	__count_zone_vm_events(PGREFILL, zone, pgscanned);
 	__count_vm_events(PGDEACTIVATE, pgdeactivate);
@@ -1174,7 +1181,13 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
 {
 	int file = is_file_lru(lru);
 
-	if (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE) {
+	if (lru == LRU_ACTIVE_FILE) {
+		shrink_active_list(nr_to_scan, zone, sc, priority, file);
+		return 0;
+	}
+
+	if (lru == LRU_ACTIVE_ANON &&
+	    (!scan_global_lru(sc) || inactive_anon_is_low(zone))) {
 		shrink_active_list(nr_to_scan, zone, sc, priority, file);
 		return 0;
 	}
@@ -1310,8 +1323,8 @@ static unsigned long shrink_zone(int priority, struct zone *zone,
 		}
 	}
 
-	while (nr[LRU_ACTIVE_ANON] || nr[LRU_INACTIVE_ANON] ||
-			nr[LRU_ACTIVE_FILE] || nr[LRU_INACTIVE_FILE]) {
+	while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
+					nr[LRU_INACTIVE_FILE]) {
 		for_each_lru(l) {
 			if (nr[l]) {
 				nr_to_scan = min(nr[l],
@@ -1324,6 +1337,15 @@ static unsigned long shrink_zone(int priority, struct zone *zone,
 		}
 	}
 
+	/*
+	 * Even if we did not try to evict anon pages at all, we want to
+	 * rebalance the anon lru active/inactive ratio.
+	 */
+	if (!scan_global_lru(sc) || inactive_anon_is_low(zone))
+		shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
+	else if (!scan_global_lru(sc))
+		shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
+
 	throttle_vm_writeout(sc->gfp_mask);
 	return nr_reclaimed;
 }
@@ -1617,6 +1639,14 @@ loop_again:
 			    priority != DEF_PRIORITY)
 				continue;
 
+			/*
+			 * Do some background aging of the anon list, to give
+			 * pages a chance to be referenced before reclaiming.
+			 */
+			if (inactive_anon_is_low(zone))
+				shrink_active_list(SWAP_CLUSTER_MAX, zone,
+							&sc, priority, 0);
+
 			if (!zone_watermark_ok(zone, order, zone->pages_high,
 					       0, 0)) {
 				end_zone = i;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 27400b7da7c4..4380b0dba6d9 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -738,10 +738,12 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 	seq_printf(m,
 		   "\n  all_unreclaimable: %u"
 		   "\n  prev_priority:     %i"
-		   "\n  start_pfn:         %lu",
+		   "\n  start_pfn:         %lu"
+		   "\n  inactive_ratio:    %u",
 			   zone_is_all_unreclaimable(zone),
 		   zone->prev_priority,
-		   zone->zone_start_pfn);
+		   zone->zone_start_pfn,
+		   zone->inactive_ratio);
 	seq_putc(m, '\n');
 }
 
-- 
GitLab


From 7e9cd484204f9e5b316ed35b241abf088d76e0af Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Sat, 18 Oct 2008 20:26:35 -0700
Subject: [PATCH 696/895] vmscan: fix pagecache reclaim referenced bit check

Moving referenced pages back to the head of the active list creates a huge
scalability problem, because by the time a large memory system finally
runs out of free memory, every single page in the system will have been
referenced.

Not only do we not have the time to scan every single page on the active
list, but since they have will all have the referenced bit set, that bit
conveys no useful information.

A more scalable solution is to just move every page that hits the end of
the active list to the inactive list.

We clear the referenced bit off of mapped pages, which need just one
reference to be moved back onto the active list.

Unmapped pages will be moved back to the active list after two references
(see mark_page_accessed).  We preserve the PG_referenced flag on unmapped
pages to preserve accesses that were made while the page was on the active
list.

Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/vmscan.c | 45 +++++++++++++--------------------------------
 1 file changed, 13 insertions(+), 32 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index c82ee9a33cfc..9588973849d0 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1064,7 +1064,6 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 	int pgdeactivate = 0;
 	unsigned long pgscanned;
 	LIST_HEAD(l_hold);	/* The pages which were snipped off */
-	LIST_HEAD(l_active);
 	LIST_HEAD(l_inactive);
 	struct page *page;
 	struct pagevec pvec;
@@ -1095,21 +1094,28 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 		cond_resched();
 		page = lru_to_page(&l_hold);
 		list_del(&page->lru);
+
+		/* page_referenced clears PageReferenced */
+		if (page_mapping_inuse(page) &&
+		    page_referenced(page, 0, sc->mem_cgroup))
+			pgmoved++;
+
 		list_add(&page->lru, &l_inactive);
 	}
 
 	/*
-	 * Count the referenced pages as rotated, even when they are moved
-	 * to the inactive list.  This helps balance scan pressure between
-	 * file and anonymous pages in get_scan_ratio.
- 	 */
+	 * Count referenced pages from currently used mappings as
+	 * rotated, even though they are moved to the inactive list.
+	 * This helps balance scan pressure between file and anonymous
+	 * pages in get_scan_ratio.
+	 */
 	zone->recent_rotated[!!file] += pgmoved;
 
 	/*
-	 * Now put the pages back on the appropriate [file or anon] inactive
-	 * and active lists.
+	 * Move the pages to the [file or anon] inactive list.
 	 */
 	pagevec_init(&pvec, 1);
+
 	pgmoved = 0;
 	lru = LRU_BASE + file * LRU_FILE;
 	spin_lock_irq(&zone->lru_lock);
@@ -1142,31 +1148,6 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 		pagevec_strip(&pvec);
 		spin_lock_irq(&zone->lru_lock);
 	}
-
-	pgmoved = 0;
-	lru = LRU_ACTIVE + file * LRU_FILE;
-	while (!list_empty(&l_active)) {
-		page = lru_to_page(&l_active);
-		prefetchw_prev_lru_page(page, &l_active, flags);
-		VM_BUG_ON(PageLRU(page));
-		SetPageLRU(page);
-		VM_BUG_ON(!PageActive(page));
-
-		list_move(&page->lru, &zone->lru[lru].list);
-		mem_cgroup_move_lists(page, true);
-		pgmoved++;
-		if (!pagevec_add(&pvec, page)) {
-			__mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
-			pgmoved = 0;
-			spin_unlock_irq(&zone->lru_lock);
-			if (vm_swap_full())
-				pagevec_swap_free(&pvec);
-			__pagevec_release(&pvec);
-			spin_lock_irq(&zone->lru_lock);
-		}
-	}
-	__mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
-
 	__count_zone_vm_events(PGREFILL, zone, pgscanned);
 	__count_vm_events(PGDEACTIVATE, pgdeactivate);
 	spin_unlock_irq(&zone->lru_lock);
-- 
GitLab


From c5fdae469a6a26cd882d7fe0aa3fbfffb6b72fc5 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Sat, 18 Oct 2008 20:26:36 -0700
Subject: [PATCH 697/895] vmscan: add newly swapped in pages to the inactive
 list

Swapin_readahead can read in a lot of data that the processes in memory
never need.  Adding swap cache pages to the inactive list prevents them
from putting too much pressure on the working set.

This has the potential to help the programs that are already in memory,
but it could also be a disadvantage to processes that are trying to get
swapped in.

Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/swap_state.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/swap_state.c b/mm/swap_state.c
index ea62084ed402..43cda7b4b808 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -310,7 +310,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 			/*
 			 * Initiate read into locked page and return.
 			 */
-			lru_cache_add_active_anon(new_page);
+			lru_cache_add_anon(new_page);
 			swap_readpage(NULL, new_page);
 			return new_page;
 		}
-- 
GitLab


From 33c120ed2843090e2bd316de1588b8bf8b96cbde Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Sat, 18 Oct 2008 20:26:36 -0700
Subject: [PATCH 698/895] more aggressively use lumpy reclaim

During an AIM7 run on a 16GB system, fork started failing around 32000
threads, despite the system having plenty of free swap and 15GB of
pageable memory.  This was on x86-64, so 8k stacks.

If a higher order allocation fails, we can either:
- keep evicting pages off the end of the LRUs and hope that
  we eventually create a contiguous region; this is somewhat
  unlikely if the system is under enough stress by new
  allocations
- after trying normal eviction for a bit, use lumpy reclaim

This patch switches the system to lumpy reclaim if the VM is having
trouble freeing enough pages, using the same threshold for detection as
used by pageout congestion wait.

Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/vmscan.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9588973849d0..a8347b677e74 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -909,7 +909,8 @@ int isolate_lru_page(struct page *page)
  * of reclaimed pages
  */
 static unsigned long shrink_inactive_list(unsigned long max_scan,
-			struct zone *zone, struct scan_control *sc, int file)
+			struct zone *zone, struct scan_control *sc,
+			int priority, int file)
 {
 	LIST_HEAD(page_list);
 	struct pagevec pvec;
@@ -927,8 +928,19 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
 		unsigned long nr_freed;
 		unsigned long nr_active;
 		unsigned int count[NR_LRU_LISTS] = { 0, };
-		int mode = (sc->order > PAGE_ALLOC_COSTLY_ORDER) ?
-					ISOLATE_BOTH : ISOLATE_INACTIVE;
+		int mode = ISOLATE_INACTIVE;
+
+		/*
+		 * If we need a large contiguous chunk of memory, or have
+		 * trouble getting a small set of contiguous pages, we
+		 * will reclaim both active and inactive pages.
+		 *
+		 * We use the same threshold as pageout congestion_wait below.
+		 */
+		if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
+			mode = ISOLATE_BOTH;
+		else if (sc->order && priority < DEF_PRIORITY - 2)
+			mode = ISOLATE_BOTH;
 
 		nr_taken = sc->isolate_pages(sc->swap_cluster_max,
 			     &page_list, &nr_scan, sc->order, mode,
@@ -1172,7 +1184,7 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
 		shrink_active_list(nr_to_scan, zone, sc, priority, file);
 		return 0;
 	}
-	return shrink_inactive_list(nr_to_scan, zone, sc, file);
+	return shrink_inactive_list(nr_to_scan, zone, sc, priority, file);
 }
 
 /*
-- 
GitLab


From 8a7a8544a4f6554ec2d8048ac9f9672f442db5a2 Mon Sep 17 00:00:00 2001
From: Lee Schermerhorn <lee.schermerhorn@hp.com>
Date: Sat, 18 Oct 2008 20:26:37 -0700
Subject: [PATCH 699/895] pageflag helpers for configed-out flags

Define proper false/noop inline functions for noreclaim page flags when
!defined(CONFIG_UNEVICTABLE_LRU)

Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 57b688cfb5e2..3d31616dcd23 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -162,6 +162,18 @@ static inline int Page##uname(struct page *page) 			\
 #define TESTSCFLAG(uname, lname)					\
 	TESTSETFLAG(uname, lname) TESTCLEARFLAG(uname, lname)
 
+#define SETPAGEFLAG_NOOP(uname)						\
+static inline void SetPage##uname(struct page *page) {  }
+
+#define CLEARPAGEFLAG_NOOP(uname)					\
+static inline void ClearPage##uname(struct page *page) {  }
+
+#define __CLEARPAGEFLAG_NOOP(uname)					\
+static inline void __ClearPage##uname(struct page *page) {  }
+
+#define TESTCLEARFLAG_FALSE(uname)					\
+static inline int TestClearPage##uname(struct page *page) { return 0; }
+
 struct page;	/* forward declaration */
 
 TESTPAGEFLAG(Locked, locked)
-- 
GitLab


From 894bc310419ac95f4fa4142dc364401a7e607f65 Mon Sep 17 00:00:00 2001
From: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Date: Sat, 18 Oct 2008 20:26:39 -0700
Subject: [PATCH 700/895] Unevictable LRU Infrastructure

When the system contains lots of mlocked or otherwise unevictable pages,
the pageout code (kswapd) can spend lots of time scanning over these
pages.  Worse still, the presence of lots of unevictable pages can confuse
kswapd into thinking that more aggressive pageout modes are required,
resulting in all kinds of bad behaviour.

Infrastructure to manage pages excluded from reclaim--i.e., hidden from
vmscan.  Based on a patch by Larry Woodman of Red Hat.  Reworked to
maintain "unevictable" pages on a separate per-zone LRU list, to "hide"
them from vmscan.

Kosaki Motohiro added the support for the memory controller unevictable
lru list.

Pages on the unevictable list have both PG_unevictable and PG_lru set.
Thus, PG_unevictable is analogous to and mutually exclusive with
PG_active--it specifies which LRU list the page is on.

The unevictable infrastructure is enabled by a new mm Kconfig option
[CONFIG_]UNEVICTABLE_LRU.

A new function 'page_evictable(page, vma)' in vmscan.c tests whether or
not a page may be evictable.  Subsequent patches will add the various
!evictable tests.  We'll want to keep these tests light-weight for use in
shrink_active_list() and, possibly, the fault path.

To avoid races between tasks putting pages [back] onto an LRU list and
tasks that might be moving the page from non-evictable to evictable state,
the new function 'putback_lru_page()' -- inverse to 'isolate_lru_page()'
-- tests the "evictability" of a page after placing it on the LRU, before
dropping the reference.  If the page has become unevictable,
putback_lru_page() will redo the 'putback', thus moving the page to the
unevictable list.  This way, we avoid "stranding" evictable pages on the
unevictable list.

[akpm@linux-foundation.org: fix fallout from out-of-order merge]
[riel@redhat.com: fix UNEVICTABLE_LRU and !PROC_PAGE_MONITOR build]
[nishimura@mxp.nes.nec.co.jp: remove redundant mapping check]
[kosaki.motohiro@jp.fujitsu.com: unevictable-lru-infrastructure: putback_lru_page()/unevictable page handling rework]
[kosaki.motohiro@jp.fujitsu.com: kill unnecessary lock_page() in vmscan.c]
[kosaki.motohiro@jp.fujitsu.com: revert migration change of unevictable lru infrastructure]
[kosaki.motohiro@jp.fujitsu.com: revert to unevictable-lru-infrastructure-kconfig-fix.patch]
[kosaki.motohiro@jp.fujitsu.com: restore patch failure of vmstat-unevictable-and-mlocked-pages-vm-events.patch]
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Debugged-by: Benjamin Kidwell <benjkidwell@yahoo.com>
Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |   2 +-
 include/linux/mm_inline.h  |  23 ++++--
 include/linux/mmzone.h     |  24 +++++-
 include/linux/page-flags.h |  22 +++++-
 include/linux/pagevec.h    |   1 -
 include/linux/swap.h       |  12 +++
 mm/Kconfig                 |  11 +++
 mm/internal.h              |  26 +++++++
 mm/memcontrol.c            |  73 +++++++++++-------
 mm/mempolicy.c             |   2 +-
 mm/migrate.c               |  31 ++++----
 mm/swap.c                  |  42 +++++++++--
 mm/vmscan.c                | 149 +++++++++++++++++++++++++++++++++----
 13 files changed, 345 insertions(+), 73 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 8d8f05c1515a..ee1b2fcb4410 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -34,9 +34,9 @@ extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
 				gfp_t gfp_mask);
 extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 					gfp_t gfp_mask);
+extern void mem_cgroup_move_lists(struct page *page, enum lru_list lru);
 extern void mem_cgroup_uncharge_page(struct page *page);
 extern void mem_cgroup_uncharge_cache_page(struct page *page);
-extern void mem_cgroup_move_lists(struct page *page, bool active);
 extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask);
 
 extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index f451fedd1e75..67d7697fd019 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -91,11 +91,16 @@ del_page_from_lru(struct zone *zone, struct page *page)
 	enum lru_list l = LRU_BASE;
 
 	list_del(&page->lru);
-	if (PageActive(page)) {
-		__ClearPageActive(page);
-		l += LRU_ACTIVE;
+	if (PageUnevictable(page)) {
+		__ClearPageUnevictable(page);
+		l = LRU_UNEVICTABLE;
+	} else {
+		if (PageActive(page)) {
+			__ClearPageActive(page);
+			l += LRU_ACTIVE;
+		}
+		l += page_is_file_cache(page);
 	}
-	l += page_is_file_cache(page);
 	__dec_zone_state(zone, NR_LRU_BASE + l);
 }
 
@@ -110,9 +115,13 @@ static inline enum lru_list page_lru(struct page *page)
 {
 	enum lru_list lru = LRU_BASE;
 
-	if (PageActive(page))
-		lru += LRU_ACTIVE;
-	lru += page_is_file_cache(page);
+	if (PageUnevictable(page))
+		lru = LRU_UNEVICTABLE;
+	else {
+		if (PageActive(page))
+			lru += LRU_ACTIVE;
+		lru += page_is_file_cache(page);
+	}
 
 	return lru;
 }
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 9c5111f49a32..d1f60d5fe2ea 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -86,6 +86,11 @@ enum zone_stat_item {
 	NR_ACTIVE_ANON,		/*  "     "     "   "       "         */
 	NR_INACTIVE_FILE,	/*  "     "     "   "       "         */
 	NR_ACTIVE_FILE,		/*  "     "     "   "       "         */
+#ifdef CONFIG_UNEVICTABLE_LRU
+	NR_UNEVICTABLE,		/*  "     "     "   "       "         */
+#else
+	NR_UNEVICTABLE = NR_ACTIVE_FILE, /* avoid compiler errors in dead code */
+#endif
 	NR_ANON_PAGES,	/* Mapped anonymous pages */
 	NR_FILE_MAPPED,	/* pagecache pages mapped into pagetables.
 			   only modified from process context */
@@ -128,10 +133,18 @@ enum lru_list {
 	LRU_ACTIVE_ANON = LRU_BASE + LRU_ACTIVE,
 	LRU_INACTIVE_FILE = LRU_BASE + LRU_FILE,
 	LRU_ACTIVE_FILE = LRU_BASE + LRU_FILE + LRU_ACTIVE,
-	NR_LRU_LISTS };
+#ifdef CONFIG_UNEVICTABLE_LRU
+	LRU_UNEVICTABLE,
+#else
+	LRU_UNEVICTABLE = LRU_ACTIVE_FILE, /* avoid compiler errors in dead code */
+#endif
+	NR_LRU_LISTS
+};
 
 #define for_each_lru(l) for (l = 0; l < NR_LRU_LISTS; l++)
 
+#define for_each_evictable_lru(l) for (l = 0; l <= LRU_ACTIVE_FILE; l++)
+
 static inline int is_file_lru(enum lru_list l)
 {
 	return (l == LRU_INACTIVE_FILE || l == LRU_ACTIVE_FILE);
@@ -142,6 +155,15 @@ static inline int is_active_lru(enum lru_list l)
 	return (l == LRU_ACTIVE_ANON || l == LRU_ACTIVE_FILE);
 }
 
+static inline int is_unevictable_lru(enum lru_list l)
+{
+#ifdef CONFIG_UNEVICTABLE_LRU
+	return (l == LRU_UNEVICTABLE);
+#else
+	return 0;
+#endif
+}
+
 struct per_cpu_pages {
 	int count;		/* number of pages in the list */
 	int high;		/* high watermark, emptying needed */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 3d31616dcd23..ec1a1baad348 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -94,6 +94,9 @@ enum pageflags {
 	PG_reclaim,		/* To be reclaimed asap */
 	PG_buddy,		/* Page is free, on buddy lists */
 	PG_swapbacked,		/* Page is backed by RAM/swap */
+#ifdef CONFIG_UNEVICTABLE_LRU
+	PG_unevictable,		/* Page is "unevictable"  */
+#endif
 #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
 	PG_uncached,		/* Page has been mapped as uncached */
 #endif
@@ -182,6 +185,7 @@ PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced)
 PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty)
 PAGEFLAG(LRU, lru) __CLEARPAGEFLAG(LRU, lru)
 PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active)
+	TESTCLEARFLAG(Active, active)
 __PAGEFLAG(Slab, slab)
 PAGEFLAG(Checked, checked)		/* Used by some filesystems */
 PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned)	/* Xen */
@@ -225,6 +229,15 @@ PAGEFLAG(SwapCache, swapcache)
 PAGEFLAG_FALSE(SwapCache)
 #endif
 
+#ifdef CONFIG_UNEVICTABLE_LRU
+PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
+	TESTCLEARFLAG(Unevictable, unevictable)
+#else
+PAGEFLAG_FALSE(Unevictable) TESTCLEARFLAG_FALSE(Unevictable)
+	SETPAGEFLAG_NOOP(Unevictable) CLEARPAGEFLAG_NOOP(Unevictable)
+	__CLEARPAGEFLAG_NOOP(Unevictable)
+#endif
+
 #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
 PAGEFLAG(Uncached, uncached)
 #else
@@ -340,9 +353,16 @@ static inline void __ClearPageTail(struct page *page)
 
 #endif /* !PAGEFLAGS_EXTENDED */
 
+#ifdef CONFIG_UNEVICTABLE_LRU
+#define __PG_UNEVICTABLE (1 << PG_unevictable)
+#else
+#define __PG_UNEVICTABLE 0
+#endif
+
 #define PAGE_FLAGS	(1 << PG_lru   | 1 << PG_private   | 1 << PG_locked | \
 			 1 << PG_buddy | 1 << PG_writeback | \
-			 1 << PG_slab  | 1 << PG_swapcache | 1 << PG_active)
+			 1 << PG_slab  | 1 << PG_swapcache | 1 << PG_active | \
+			 __PG_UNEVICTABLE)
 
 /*
  * Flags checked in bad_page().  Pages on the free list should not have
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index 5fc96a4e760f..e90a2cb02915 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -101,7 +101,6 @@ static inline void __pagevec_lru_add_active_file(struct pagevec *pvec)
 	____pagevec_lru_add(pvec, LRU_ACTIVE_FILE);
 }
 
-
 static inline void pagevec_lru_add_file(struct pagevec *pvec)
 {
 	if (pagevec_count(pvec))
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 7d09d79997a4..a2113044d20a 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -180,6 +180,8 @@ extern int lru_add_drain_all(void);
 extern void rotate_reclaimable_page(struct page *page);
 extern void swap_setup(void);
 
+extern void add_page_to_unevictable_list(struct page *page);
+
 /**
  * lru_cache_add: add a page to the page lists
  * @page: the page to add
@@ -228,6 +230,16 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order)
 }
 #endif
 
+#ifdef CONFIG_UNEVICTABLE_LRU
+extern int page_evictable(struct page *page, struct vm_area_struct *vma);
+#else
+static inline int page_evictable(struct page *page,
+						struct vm_area_struct *vma)
+{
+	return 1;
+}
+#endif
+
 extern int kswapd_run(int nid);
 
 #ifdef CONFIG_MMU
diff --git a/mm/Kconfig b/mm/Kconfig
index 1a501a4de95c..5b5790f8a816 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -209,5 +209,16 @@ config VIRT_TO_BUS
 	def_bool y
 	depends on !ARCH_NO_VIRT_TO_BUS
 
+config UNEVICTABLE_LRU
+	bool "Add LRU list to track non-evictable pages"
+	default y
+	depends on MMU
+	help
+	  Keeps unevictable pages off of the active and inactive pageout
+	  lists, so kswapd will not waste CPU time or have its balancing
+	  algorithms thrown off by scanning these pages.  Selecting this
+	  will use one page flag and increase the code size a little,
+	  say Y unless you know what you are doing.
+
 config MMU_NOTIFIER
 	bool
diff --git a/mm/internal.h b/mm/internal.h
index 4e8e78b978b5..3db17b2a1ac6 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -39,8 +39,15 @@ static inline void __put_page(struct page *page)
 	atomic_dec(&page->_count);
 }
 
+/*
+ * in mm/vmscan.c:
+ */
 extern int isolate_lru_page(struct page *page);
+extern void putback_lru_page(struct page *page);
 
+/*
+ * in mm/page_alloc.c
+ */
 extern void __free_pages_bootmem(struct page *page, unsigned int order);
 
 /*
@@ -54,6 +61,25 @@ static inline unsigned long page_order(struct page *page)
 	return page_private(page);
 }
 
+#ifdef CONFIG_UNEVICTABLE_LRU
+/*
+ * unevictable_migrate_page() called only from migrate_page_copy() to
+ * migrate unevictable flag to new page.
+ * Note that the old page has been isolated from the LRU lists at this
+ * point so we don't need to worry about LRU statistics.
+ */
+static inline void unevictable_migrate_page(struct page *new, struct page *old)
+{
+	if (TestClearPageUnevictable(old))
+		SetPageUnevictable(new);
+}
+#else
+static inline void unevictable_migrate_page(struct page *new, struct page *old)
+{
+}
+#endif
+
+
 /*
  * FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node,
  * so all functions starting at paging_init should be marked __init
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 27e9e75f4eab..82c065e7551e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -160,9 +160,10 @@ struct page_cgroup {
 	struct mem_cgroup *mem_cgroup;
 	int flags;
 };
-#define PAGE_CGROUP_FLAG_CACHE	(0x1)	/* charged as cache */
-#define PAGE_CGROUP_FLAG_ACTIVE (0x2)	/* page is active in this cgroup */
-#define PAGE_CGROUP_FLAG_FILE	(0x4)	/* page is file system backed */
+#define PAGE_CGROUP_FLAG_CACHE	   (0x1)	/* charged as cache */
+#define PAGE_CGROUP_FLAG_ACTIVE    (0x2)	/* page is active in this cgroup */
+#define PAGE_CGROUP_FLAG_FILE	   (0x4)	/* page is file system backed */
+#define PAGE_CGROUP_FLAG_UNEVICTABLE (0x8)	/* page is unevictableable */
 
 static int page_cgroup_nid(struct page_cgroup *pc)
 {
@@ -292,10 +293,14 @@ static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz,
 {
 	int lru = LRU_BASE;
 
-	if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE)
-		lru += LRU_ACTIVE;
-	if (pc->flags & PAGE_CGROUP_FLAG_FILE)
-		lru += LRU_FILE;
+	if (pc->flags & PAGE_CGROUP_FLAG_UNEVICTABLE)
+		lru = LRU_UNEVICTABLE;
+	else {
+		if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE)
+			lru += LRU_ACTIVE;
+		if (pc->flags & PAGE_CGROUP_FLAG_FILE)
+			lru += LRU_FILE;
+	}
 
 	MEM_CGROUP_ZSTAT(mz, lru) -= 1;
 
@@ -308,10 +313,14 @@ static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz,
 {
 	int lru = LRU_BASE;
 
-	if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE)
-		lru += LRU_ACTIVE;
-	if (pc->flags & PAGE_CGROUP_FLAG_FILE)
-		lru += LRU_FILE;
+	if (pc->flags & PAGE_CGROUP_FLAG_UNEVICTABLE)
+		lru = LRU_UNEVICTABLE;
+	else {
+		if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE)
+			lru += LRU_ACTIVE;
+		if (pc->flags & PAGE_CGROUP_FLAG_FILE)
+			lru += LRU_FILE;
+	}
 
 	MEM_CGROUP_ZSTAT(mz, lru) += 1;
 	list_add(&pc->lru, &mz->lists[lru]);
@@ -319,21 +328,31 @@ static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz,
 	mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, true);
 }
 
-static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
+static void __mem_cgroup_move_lists(struct page_cgroup *pc, enum lru_list lru)
 {
 	struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc);
-	int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
-	int file = pc->flags & PAGE_CGROUP_FLAG_FILE;
-	int lru = LRU_FILE * !!file + !!from;
+	int active    = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
+	int file      = pc->flags & PAGE_CGROUP_FLAG_FILE;
+	int unevictable = pc->flags & PAGE_CGROUP_FLAG_UNEVICTABLE;
+	enum lru_list from = unevictable ? LRU_UNEVICTABLE :
+				(LRU_FILE * !!file + !!active);
 
-	MEM_CGROUP_ZSTAT(mz, lru) -= 1;
+	if (lru == from)
+		return;
 
-	if (active)
-		pc->flags |= PAGE_CGROUP_FLAG_ACTIVE;
-	else
+	MEM_CGROUP_ZSTAT(mz, from) -= 1;
+
+	if (is_unevictable_lru(lru)) {
 		pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE;
+		pc->flags |= PAGE_CGROUP_FLAG_UNEVICTABLE;
+	} else {
+		if (is_active_lru(lru))
+			pc->flags |= PAGE_CGROUP_FLAG_ACTIVE;
+		else
+			pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE;
+		pc->flags &= ~PAGE_CGROUP_FLAG_UNEVICTABLE;
+	}
 
-	lru = LRU_FILE * !!file + !!active;
 	MEM_CGROUP_ZSTAT(mz, lru) += 1;
 	list_move(&pc->lru, &mz->lists[lru]);
 }
@@ -351,7 +370,7 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
 /*
  * This routine assumes that the appropriate zone's lru lock is already held
  */
-void mem_cgroup_move_lists(struct page *page, bool active)
+void mem_cgroup_move_lists(struct page *page, enum lru_list lru)
 {
 	struct page_cgroup *pc;
 	struct mem_cgroup_per_zone *mz;
@@ -374,7 +393,7 @@ void mem_cgroup_move_lists(struct page *page, bool active)
 	if (pc) {
 		mz = page_cgroup_zoneinfo(pc);
 		spin_lock_irqsave(&mz->lru_lock, flags);
-		__mem_cgroup_move_lists(pc, active);
+		__mem_cgroup_move_lists(pc, lru);
 		spin_unlock_irqrestore(&mz->lru_lock, flags);
 	}
 	unlock_page_cgroup(page);
@@ -472,12 +491,10 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 		/*
 		 * TODO: play better with lumpy reclaim, grabbing anything.
 		 */
-		if (PageActive(page) && !active) {
-			__mem_cgroup_move_lists(pc, true);
-			continue;
-		}
-		if (!PageActive(page) && active) {
-			__mem_cgroup_move_lists(pc, false);
+		if (PageUnevictable(page) ||
+		    (PageActive(page) && !active) ||
+		    (!PageActive(page) && active)) {
+			__mem_cgroup_move_lists(pc, page_lru(page));
 			continue;
 		}
 
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 71b47491487d..36f42573a335 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2202,7 +2202,7 @@ static void gather_stats(struct page *page, void *private, int pte_dirty)
 	if (PageSwapCache(page))
 		md->swapcache++;
 
-	if (PageActive(page))
+	if (PageActive(page) || PageUnevictable(page))
 		md->active++;
 
 	if (PageWriteback(page))
diff --git a/mm/migrate.c b/mm/migrate.c
index c07327487111..b10237d8b459 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -53,14 +53,9 @@ int migrate_prep(void)
 	return 0;
 }
 
-static inline void move_to_lru(struct page *page)
-{
-	lru_cache_add_lru(page, page_lru(page));
-	put_page(page);
-}
-
 /*
- * Add isolated pages on the list back to the LRU.
+ * Add isolated pages on the list back to the LRU under page lock
+ * to avoid leaking evictable pages back onto unevictable list.
  *
  * returns the number of pages put back.
  */
@@ -72,7 +67,7 @@ int putback_lru_pages(struct list_head *l)
 
 	list_for_each_entry_safe(page, page2, l, lru) {
 		list_del(&page->lru);
-		move_to_lru(page);
+		putback_lru_page(page);
 		count++;
 	}
 	return count;
@@ -354,8 +349,11 @@ static void migrate_page_copy(struct page *newpage, struct page *page)
 		SetPageReferenced(newpage);
 	if (PageUptodate(page))
 		SetPageUptodate(newpage);
-	if (PageActive(page))
+	if (TestClearPageActive(page)) {
+		VM_BUG_ON(PageUnevictable(page));
 		SetPageActive(newpage);
+	} else
+		unevictable_migrate_page(newpage, page);
 	if (PageChecked(page))
 		SetPageChecked(newpage);
 	if (PageMappedToDisk(page))
@@ -376,7 +374,6 @@ static void migrate_page_copy(struct page *newpage, struct page *page)
 #ifdef CONFIG_SWAP
 	ClearPageSwapCache(page);
 #endif
-	ClearPageActive(page);
 	ClearPagePrivate(page);
 	set_page_private(page, 0);
 	page->mapping = NULL;
@@ -555,6 +552,10 @@ static int fallback_migrate_page(struct address_space *mapping,
  *
  * The new page will have replaced the old page if this function
  * is successful.
+ *
+ * Return value:
+ *   < 0 - error code
+ *  == 0 - success
  */
 static int move_to_new_page(struct page *newpage, struct page *page)
 {
@@ -617,9 +618,10 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
 	if (!newpage)
 		return -ENOMEM;
 
-	if (page_count(page) == 1)
+	if (page_count(page) == 1) {
 		/* page was freed from under us. So we are done. */
 		goto move_newpage;
+	}
 
 	charge = mem_cgroup_prepare_migration(page, newpage);
 	if (charge == -ENOMEM) {
@@ -693,7 +695,6 @@ rcu_unlock:
 		rcu_read_unlock();
 
 unlock:
-
 	unlock_page(page);
 
 	if (rc != -EAGAIN) {
@@ -704,17 +705,19 @@ unlock:
  		 * restored.
  		 */
  		list_del(&page->lru);
- 		move_to_lru(page);
+		putback_lru_page(page);
 	}
 
 move_newpage:
 	if (!charge)
 		mem_cgroup_end_migration(newpage);
+
 	/*
 	 * Move the new page to the LRU. If migration was not successful
 	 * then this will free the page.
 	 */
-	move_to_lru(newpage);
+	putback_lru_page(newpage);
+
 	if (result) {
 		if (rc)
 			*result = rc;
diff --git a/mm/swap.c b/mm/swap.c
index 0b1974a08974..fee6b973f143 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -115,7 +115,7 @@ static void pagevec_move_tail(struct pagevec *pvec)
 			zone = pagezone;
 			spin_lock(&zone->lru_lock);
 		}
-		if (PageLRU(page) && !PageActive(page)) {
+		if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
 			int lru = page_is_file_cache(page);
 			list_move_tail(&page->lru, &zone->lru[lru].list);
 			pgmoved++;
@@ -136,7 +136,7 @@ static void pagevec_move_tail(struct pagevec *pvec)
 void  rotate_reclaimable_page(struct page *page)
 {
 	if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) &&
-	    PageLRU(page)) {
+	    !PageUnevictable(page) && PageLRU(page)) {
 		struct pagevec *pvec;
 		unsigned long flags;
 
@@ -157,7 +157,7 @@ void activate_page(struct page *page)
 	struct zone *zone = page_zone(page);
 
 	spin_lock_irq(&zone->lru_lock);
-	if (PageLRU(page) && !PageActive(page)) {
+	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
 		int file = page_is_file_cache(page);
 		int lru = LRU_BASE + file;
 		del_page_from_lru_list(zone, page, lru);
@@ -166,7 +166,7 @@ void activate_page(struct page *page)
 		lru += LRU_ACTIVE;
 		add_page_to_lru_list(zone, page, lru);
 		__count_vm_event(PGACTIVATE);
-		mem_cgroup_move_lists(page, true);
+		mem_cgroup_move_lists(page, lru);
 
 		zone->recent_rotated[!!file]++;
 		zone->recent_scanned[!!file]++;
@@ -183,7 +183,8 @@ void activate_page(struct page *page)
  */
 void mark_page_accessed(struct page *page)
 {
-	if (!PageActive(page) && PageReferenced(page) && PageLRU(page)) {
+	if (!PageActive(page) && !PageUnevictable(page) &&
+			PageReferenced(page) && PageLRU(page)) {
 		activate_page(page);
 		ClearPageReferenced(page);
 	} else if (!PageReferenced(page)) {
@@ -211,13 +212,38 @@ void __lru_cache_add(struct page *page, enum lru_list lru)
 void lru_cache_add_lru(struct page *page, enum lru_list lru)
 {
 	if (PageActive(page)) {
+		VM_BUG_ON(PageUnevictable(page));
 		ClearPageActive(page);
+	} else if (PageUnevictable(page)) {
+		VM_BUG_ON(PageActive(page));
+		ClearPageUnevictable(page);
 	}
 
-	VM_BUG_ON(PageLRU(page) || PageActive(page));
+	VM_BUG_ON(PageLRU(page) || PageActive(page) || PageUnevictable(page));
 	__lru_cache_add(page, lru);
 }
 
+/**
+ * add_page_to_unevictable_list - add a page to the unevictable list
+ * @page:  the page to be added to the unevictable list
+ *
+ * Add page directly to its zone's unevictable list.  To avoid races with
+ * tasks that might be making the page evictable, through eg. munlock,
+ * munmap or exit, while it's not on the lru, we want to add the page
+ * while it's locked or otherwise "invisible" to other tasks.  This is
+ * difficult to do when using the pagevec cache, so bypass that.
+ */
+void add_page_to_unevictable_list(struct page *page)
+{
+	struct zone *zone = page_zone(page);
+
+	spin_lock_irq(&zone->lru_lock);
+	SetPageUnevictable(page);
+	SetPageLRU(page);
+	add_page_to_lru_list(zone, page, LRU_UNEVICTABLE);
+	spin_unlock_irq(&zone->lru_lock);
+}
+
 /*
  * Drain pages out of the cpu's pagevecs.
  * Either "cpu" is the current CPU, and preemption has already been
@@ -316,6 +342,7 @@ void release_pages(struct page **pages, int nr, int cold)
 
 		if (PageLRU(page)) {
 			struct zone *pagezone = page_zone(page);
+
 			if (pagezone != zone) {
 				if (zone)
 					spin_unlock_irqrestore(&zone->lru_lock,
@@ -392,6 +419,7 @@ void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
 {
 	int i;
 	struct zone *zone = NULL;
+	VM_BUG_ON(is_unevictable_lru(lru));
 
 	for (i = 0; i < pagevec_count(pvec); i++) {
 		struct page *page = pvec->pages[i];
@@ -403,6 +431,8 @@ void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
 			zone = pagezone;
 			spin_lock_irq(&zone->lru_lock);
 		}
+		VM_BUG_ON(PageActive(page));
+		VM_BUG_ON(PageUnevictable(page));
 		VM_BUG_ON(PageLRU(page));
 		SetPageLRU(page);
 		if (is_active_lru(lru))
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a8347b677e74..154b9b608da6 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -470,6 +470,79 @@ int remove_mapping(struct address_space *mapping, struct page *page)
 	return 0;
 }
 
+/**
+ * putback_lru_page - put previously isolated page onto appropriate LRU list
+ * @page: page to be put back to appropriate lru list
+ *
+ * Add previously isolated @page to appropriate LRU list.
+ * Page may still be unevictable for other reasons.
+ *
+ * lru_lock must not be held, interrupts must be enabled.
+ */
+#ifdef CONFIG_UNEVICTABLE_LRU
+void putback_lru_page(struct page *page)
+{
+	int lru;
+	int active = !!TestClearPageActive(page);
+
+	VM_BUG_ON(PageLRU(page));
+
+redo:
+	ClearPageUnevictable(page);
+
+	if (page_evictable(page, NULL)) {
+		/*
+		 * For evictable pages, we can use the cache.
+		 * In event of a race, worst case is we end up with an
+		 * unevictable page on [in]active list.
+		 * We know how to handle that.
+		 */
+		lru = active + page_is_file_cache(page);
+		lru_cache_add_lru(page, lru);
+	} else {
+		/*
+		 * Put unevictable pages directly on zone's unevictable
+		 * list.
+		 */
+		lru = LRU_UNEVICTABLE;
+		add_page_to_unevictable_list(page);
+	}
+	mem_cgroup_move_lists(page, lru);
+
+	/*
+	 * page's status can change while we move it among lru. If an evictable
+	 * page is on unevictable list, it never be freed. To avoid that,
+	 * check after we added it to the list, again.
+	 */
+	if (lru == LRU_UNEVICTABLE && page_evictable(page, NULL)) {
+		if (!isolate_lru_page(page)) {
+			put_page(page);
+			goto redo;
+		}
+		/* This means someone else dropped this page from LRU
+		 * So, it will be freed or putback to LRU again. There is
+		 * nothing to do here.
+		 */
+	}
+
+	put_page(page);		/* drop ref from isolate */
+}
+
+#else /* CONFIG_UNEVICTABLE_LRU */
+
+void putback_lru_page(struct page *page)
+{
+	int lru;
+	VM_BUG_ON(PageLRU(page));
+
+	lru = !!TestClearPageActive(page) + page_is_file_cache(page);
+	lru_cache_add_lru(page, lru);
+	mem_cgroup_move_lists(page, lru);
+	put_page(page);
+}
+#endif /* CONFIG_UNEVICTABLE_LRU */
+
+
 /*
  * shrink_page_list() returns the number of reclaimed pages
  */
@@ -503,6 +576,12 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 
 		sc->nr_scanned++;
 
+		if (unlikely(!page_evictable(page, NULL))) {
+			unlock_page(page);
+			putback_lru_page(page);
+			continue;
+		}
+
 		if (!sc->may_swap && page_mapped(page))
 			goto keep_locked;
 
@@ -602,7 +681,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		 * possible for a page to have PageDirty set, but it is actually
 		 * clean (all its buffers are clean).  This happens if the
 		 * buffers were written out directly, with submit_bh(). ext3
-		 * will do this, as well as the blockdev mapping. 
+		 * will do this, as well as the blockdev mapping.
 		 * try_to_release_page() will discover that cleanness and will
 		 * drop the buffers and mark the page clean - it can be freed.
 		 *
@@ -650,6 +729,7 @@ activate_locked:
 		/* Not a candidate for swapping, so reclaim swap space. */
 		if (PageSwapCache(page) && vm_swap_full())
 			remove_exclusive_swap_page_ref(page);
+		VM_BUG_ON(PageActive(page));
 		SetPageActive(page);
 		pgactivate++;
 keep_locked:
@@ -699,6 +779,14 @@ int __isolate_lru_page(struct page *page, int mode, int file)
 	if (mode != ISOLATE_BOTH && (!page_is_file_cache(page) != !file))
 		return ret;
 
+	/*
+	 * When this function is being called for lumpy reclaim, we
+	 * initially look into all LRU pages, active, inactive and
+	 * unevictable; only give shrink_page_list evictable pages.
+	 */
+	if (PageUnevictable(page))
+		return ret;
+
 	ret = -EBUSY;
 	if (likely(get_page_unless_zero(page))) {
 		/*
@@ -810,7 +898,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 				/* else it is being freed elsewhere */
 				list_move(&cursor_page->lru, src);
 			default:
-				break;
+				break;	/* ! on LRU or wrong list */
 			}
 		}
 	}
@@ -870,8 +958,9 @@ static unsigned long clear_active_flags(struct list_head *page_list,
  * Returns -EBUSY if the page was not on an LRU list.
  *
  * The returned page will have PageLRU() cleared.  If it was found on
- * the active list, it will have PageActive set.  That flag may need
- * to be cleared by the caller before letting the page go.
+ * the active list, it will have PageActive set.  If it was found on
+ * the unevictable list, it will have the PageUnevictable bit set. That flag
+ * may need to be cleared by the caller before letting the page go.
  *
  * The vmstat statistic corresponding to the list on which the page was
  * found will be decremented.
@@ -892,11 +981,10 @@ int isolate_lru_page(struct page *page)
 
 		spin_lock_irq(&zone->lru_lock);
 		if (PageLRU(page) && get_page_unless_zero(page)) {
-			int lru = LRU_BASE;
+			int lru = page_lru(page);
 			ret = 0;
 			ClearPageLRU(page);
 
-			lru += page_is_file_cache(page) + !!PageActive(page);
 			del_page_from_lru_list(zone, page, lru);
 		}
 		spin_unlock_irq(&zone->lru_lock);
@@ -1008,11 +1096,20 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
 		 * Put back any unfreeable pages.
 		 */
 		while (!list_empty(&page_list)) {
+			int lru;
 			page = lru_to_page(&page_list);
 			VM_BUG_ON(PageLRU(page));
-			SetPageLRU(page);
 			list_del(&page->lru);
-			add_page_to_lru_list(zone, page, page_lru(page));
+			if (unlikely(!page_evictable(page, NULL))) {
+				spin_unlock_irq(&zone->lru_lock);
+				putback_lru_page(page);
+				spin_lock_irq(&zone->lru_lock);
+				continue;
+			}
+			SetPageLRU(page);
+			lru = page_lru(page);
+			add_page_to_lru_list(zone, page, lru);
+			mem_cgroup_move_lists(page, lru);
 			if (PageActive(page) && scan_global_lru(sc)) {
 				int file = !!page_is_file_cache(page);
 				zone->recent_rotated[file]++;
@@ -1107,6 +1204,11 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 		page = lru_to_page(&l_hold);
 		list_del(&page->lru);
 
+		if (unlikely(!page_evictable(page, NULL))) {
+			putback_lru_page(page);
+			continue;
+		}
+
 		/* page_referenced clears PageReferenced */
 		if (page_mapping_inuse(page) &&
 		    page_referenced(page, 0, sc->mem_cgroup))
@@ -1140,7 +1242,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 		ClearPageActive(page);
 
 		list_move(&page->lru, &zone->lru[lru].list);
-		mem_cgroup_move_lists(page, false);
+		mem_cgroup_move_lists(page, lru);
 		pgmoved++;
 		if (!pagevec_add(&pvec, page)) {
 			__mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
@@ -1286,7 +1388,7 @@ static unsigned long shrink_zone(int priority, struct zone *zone,
 
 	get_scan_ratio(zone, sc, percent);
 
-	for_each_lru(l) {
+	for_each_evictable_lru(l) {
 		if (scan_global_lru(sc)) {
 			int file = is_file_lru(l);
 			int scan;
@@ -1318,7 +1420,7 @@ static unsigned long shrink_zone(int priority, struct zone *zone,
 
 	while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
 					nr[LRU_INACTIVE_FILE]) {
-		for_each_lru(l) {
+		for_each_evictable_lru(l) {
 			if (nr[l]) {
 				nr_to_scan = min(nr[l],
 					(unsigned long)sc->swap_cluster_max);
@@ -1875,8 +1977,8 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
 		if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY)
 			continue;
 
-		for_each_lru(l) {
-			/* For pass = 0 we don't shrink the active list */
+		for_each_evictable_lru(l) {
+			/* For pass = 0, we don't shrink the active list */
 			if (pass == 0 &&
 				(l == LRU_ACTIVE || l == LRU_ACTIVE_FILE))
 				continue;
@@ -2213,3 +2315,24 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 	return ret;
 }
 #endif
+
+#ifdef CONFIG_UNEVICTABLE_LRU
+/*
+ * page_evictable - test whether a page is evictable
+ * @page: the page to test
+ * @vma: the VMA in which the page is or will be mapped, may be NULL
+ *
+ * Test whether page is evictable--i.e., should be placed on active/inactive
+ * lists vs unevictable list.
+ *
+ * Reasons page might not be evictable:
+ * TODO - later patches
+ */
+int page_evictable(struct page *page, struct vm_area_struct *vma)
+{
+
+	/* TODO:  test page [!]evictable conditions */
+
+	return 1;
+}
+#endif
-- 
GitLab


From bbfd28eee9fbd73e780b19beb3dc562befbb94fa Mon Sep 17 00:00:00 2001
From: Lee Schermerhorn <lee.schermerhorn@hp.com>
Date: Sat, 18 Oct 2008 20:26:40 -0700
Subject: [PATCH 701/895] unevictable lru: add event counting with statistics

Fix to unevictable-lru-page-statistics.patch

Add unevictable lru infrastructure vm events to the statistics patch.
Rename the "NORECL_" and "noreclaim_" symbols and text strings to
"UNEVICTABLE_" and "unevictable_", respectively.

Currently, both the infrastructure and the mlocked pages event are
added by a single patch later in the series.  This makes it difficult
to add or rework the incremental patches.  The events actually "belong"
with the stats, so pull them up to here.

Also, restore the event counting to putback_lru_page().  This was removed
from previous patch in series where it was "misplaced".  The actual events
weren't defined that early.

Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Rik van Riel <riel@redhat.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmstat.h | 5 +++++
 mm/vmscan.c            | 6 ++++++
 mm/vmstat.c            | 5 +++++
 3 files changed, 16 insertions(+)

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index ff5179f2b153..135840cd7feb 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -40,6 +40,11 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		PAGEOUTRUN, ALLOCSTALL, PGROTATED,
 #ifdef CONFIG_HUGETLB_PAGE
 		HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
+#endif
+#ifdef CONFIG_UNEVICTABLE_LRU
+		UNEVICTABLE_PGCULLED,	/* culled to noreclaim list */
+		UNEVICTABLE_PGSCANNED,	/* scanned for reclaimability */
+		UNEVICTABLE_PGRESCUED,	/* rescued from noreclaim list */
 #endif
 		NR_VM_EVENT_ITEMS
 };
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 154b9b608da6..2804d23e2da7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -484,6 +484,7 @@ void putback_lru_page(struct page *page)
 {
 	int lru;
 	int active = !!TestClearPageActive(page);
+	int was_unevictable = PageUnevictable(page);
 
 	VM_BUG_ON(PageLRU(page));
 
@@ -525,6 +526,11 @@ redo:
 		 */
 	}
 
+	if (was_unevictable && lru != LRU_UNEVICTABLE)
+		count_vm_event(UNEVICTABLE_PGRESCUED);
+	else if (!was_unevictable && lru == LRU_UNEVICTABLE)
+		count_vm_event(UNEVICTABLE_PGCULLED);
+
 	put_page(page);		/* drop ref from isolate */
 }
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 4380b0dba6d9..6cb08cdd4f03 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -677,6 +677,11 @@ static const char * const vmstat_text[] = {
 	"htlb_buddy_alloc_success",
 	"htlb_buddy_alloc_fail",
 #endif
+#ifdef CONFIG_UNEVICTABLE_LRU
+	"unevictable_pgs_culled",
+	"unevictable_pgs_scanned",
+	"unevictable_pgs_rescued",
+#endif
 #endif
 };
 
-- 
GitLab


From 7b854121eb3e5ba0241882ff939e2c485228c9c5 Mon Sep 17 00:00:00 2001
From: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Date: Sat, 18 Oct 2008 20:26:40 -0700
Subject: [PATCH 702/895] Unevictable LRU Page Statistics

Report unevictable pages per zone and system wide.

Kosaki Motohiro added support for memory controller unevictable
statistics.

[riel@redhat.com: fix printk in show_free_areas()]
[akpm@linux-foundation.org: fix units in /proc/vmstats]
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Debugged-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/node.c |  6 ++++++
 fs/proc/proc_misc.c |  6 ++++++
 mm/memcontrol.c     |  6 ++++++
 mm/page_alloc.c     | 18 ++++++++++++++++--
 mm/vmstat.c         |  3 +++
 5 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index fc7e9bf0cdbc..11a9a05cf554 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -70,6 +70,9 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
 		       "Node %d Inactive(anon): %8lu kB\n"
 		       "Node %d Active(file):   %8lu kB\n"
 		       "Node %d Inactive(file): %8lu kB\n"
+#ifdef CONFIG_UNEVICTABLE_LRU
+		       "Node %d Noreclaim:      %8lu kB\n"
+#endif
 #ifdef CONFIG_HIGHMEM
 		       "Node %d HighTotal:      %8lu kB\n"
 		       "Node %d HighFree:       %8lu kB\n"
@@ -99,6 +102,9 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
 		       nid, K(node_page_state(nid, NR_INACTIVE_ANON)),
 		       nid, K(node_page_state(nid, NR_ACTIVE_FILE)),
 		       nid, K(node_page_state(nid, NR_INACTIVE_FILE)),
+#ifdef CONFIG_UNEVICTABLE_LRU
+		       nid, K(node_page_state(nid, NR_UNEVICTABLE)),
+#endif
 #ifdef CONFIG_HIGHMEM
 		       nid, K(i.totalhigh),
 		       nid, K(i.freehigh),
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index b8edb2860557..6dd60eaea997 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -174,6 +174,9 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
 		"Inactive(anon): %8lu kB\n"
 		"Active(file):   %8lu kB\n"
 		"Inactive(file): %8lu kB\n"
+#ifdef CONFIG_UNEVICTABLE_LRU
+		"Unevictable:    %8lu kB\n"
+#endif
 #ifdef CONFIG_HIGHMEM
 		"HighTotal:      %8lu kB\n"
 		"HighFree:       %8lu kB\n"
@@ -212,6 +215,9 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
 		K(pages[LRU_INACTIVE_ANON]),
 		K(pages[LRU_ACTIVE_FILE]),
 		K(pages[LRU_INACTIVE_FILE]),
+#ifdef CONFIG_UNEVICTABLE_LRU
+		K(pages[LRU_UNEVICTABLE]),
+#endif
 #ifdef CONFIG_HIGHMEM
 		K(i.totalhigh),
 		K(i.freehigh),
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 82c065e7551e..e93a4db93fbe 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1006,6 +1006,7 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
 	{
 		unsigned long active_anon, inactive_anon;
 		unsigned long active_file, inactive_file;
+		unsigned long unevictable;
 
 		inactive_anon = mem_cgroup_get_all_zonestat(mem_cont,
 						LRU_INACTIVE_ANON);
@@ -1015,10 +1016,15 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
 						LRU_INACTIVE_FILE);
 		active_file = mem_cgroup_get_all_zonestat(mem_cont,
 						LRU_ACTIVE_FILE);
+		unevictable = mem_cgroup_get_all_zonestat(mem_cont,
+							LRU_UNEVICTABLE);
+
 		cb->fill(cb, "active_anon", (active_anon) * PAGE_SIZE);
 		cb->fill(cb, "inactive_anon", (inactive_anon) * PAGE_SIZE);
 		cb->fill(cb, "active_file", (active_file) * PAGE_SIZE);
 		cb->fill(cb, "inactive_file", (inactive_file) * PAGE_SIZE);
+		cb->fill(cb, "unevictable", unevictable * PAGE_SIZE);
+
 	}
 	return 0;
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 79c0981b1d32..4125230a1b2c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1864,13 +1864,21 @@ void show_free_areas(void)
 		}
 	}
 
-	printk("Active_anon:%lu active_file:%lu inactive_anon%lu\n"
-		" inactive_file:%lu dirty:%lu writeback:%lu unstable:%lu\n"
+	printk("Active_anon:%lu active_file:%lu inactive_anon:%lu\n"
+		" inactive_file:%lu"
+//TODO:  check/adjust line lengths
+#ifdef CONFIG_UNEVICTABLE_LRU
+		" unevictable:%lu"
+#endif
+		" dirty:%lu writeback:%lu unstable:%lu\n"
 		" free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n",
 		global_page_state(NR_ACTIVE_ANON),
 		global_page_state(NR_ACTIVE_FILE),
 		global_page_state(NR_INACTIVE_ANON),
 		global_page_state(NR_INACTIVE_FILE),
+#ifdef CONFIG_UNEVICTABLE_LRU
+		global_page_state(NR_UNEVICTABLE),
+#endif
 		global_page_state(NR_FILE_DIRTY),
 		global_page_state(NR_WRITEBACK),
 		global_page_state(NR_UNSTABLE_NFS),
@@ -1897,6 +1905,9 @@ void show_free_areas(void)
 			" inactive_anon:%lukB"
 			" active_file:%lukB"
 			" inactive_file:%lukB"
+#ifdef CONFIG_UNEVICTABLE_LRU
+			" unevictable:%lukB"
+#endif
 			" present:%lukB"
 			" pages_scanned:%lu"
 			" all_unreclaimable? %s"
@@ -1910,6 +1921,9 @@ void show_free_areas(void)
 			K(zone_page_state(zone, NR_INACTIVE_ANON)),
 			K(zone_page_state(zone, NR_ACTIVE_FILE)),
 			K(zone_page_state(zone, NR_INACTIVE_FILE)),
+#ifdef CONFIG_UNEVICTABLE_LRU
+			K(zone_page_state(zone, NR_UNEVICTABLE)),
+#endif
 			K(zone->present_pages),
 			zone->pages_scanned,
 			(zone_is_all_unreclaimable(zone) ? "yes" : "no")
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 6cb08cdd4f03..6db2f6319313 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -623,6 +623,9 @@ static const char * const vmstat_text[] = {
 	"nr_active_anon",
 	"nr_inactive_file",
 	"nr_active_file",
+#ifdef CONFIG_UNEVICTABLE_LRU
+	"nr_unevictable",
+#endif
 	"nr_anon_pages",
 	"nr_mapped",
 	"nr_file_pages",
-- 
GitLab


From ba9ddf49391645e6bb93219131a40446538a5e76 Mon Sep 17 00:00:00 2001
From: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Date: Sat, 18 Oct 2008 20:26:42 -0700
Subject: [PATCH 703/895] Ramfs and Ram Disk pages are unevictable

Christoph Lameter pointed out that ram disk pages also clutter the LRU
lists.  When vmscan finds them dirty and tries to clean them, the ram disk
writeback function just redirties the page so that it goes back onto the
active list.  Round and round she goes...

With the ram disk driver [rd.c] replaced by the newer 'brd.c', this is no
longer the case, as ram disk pages are no longer maintained on the lru.
[This makes them unmigratable for defrag or memory hot remove, but that
can be addressed by a separate patch series.] However, the ramfs pages
behave like ram disk pages used to, so:

Define new address_space flag [shares address_space flags member with
mapping's gfp mask] to indicate that the address space contains all
unevictable pages.  This will provide for efficient testing of ramfs pages
in page_evictable().

Also provide wrapper functions to set/test the unevictable state to
minimize #ifdefs in ramfs driver and any other users of this facility.

Set the unevictable state on address_space structures for new ramfs
inodes.  Test the unevictable state in page_evictable() to cull
unevictable pages.

These changes depend on [CONFIG_]UNEVICTABLE_LRU.

[riel@redhat.com: undo the brd.c part]
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Rik van Riel <riel@redhat.com>
Debugged-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ramfs/inode.c        |  1 +
 include/linux/pagemap.h | 22 ++++++++++++++++++++++
 mm/vmscan.c             |  5 +++++
 3 files changed, 28 insertions(+)

diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index b13123424e49..f031d1c925f0 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -61,6 +61,7 @@ struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev)
 		inode->i_mapping->a_ops = &ramfs_aops;
 		inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;
 		mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
+		mapping_set_unevictable(inode->i_mapping);
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 		switch (mode & S_IFMT) {
 		default:
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 5da31c12101c..09164d2c5c27 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -32,6 +32,28 @@ static inline void mapping_set_error(struct address_space *mapping, int error)
 	}
 }
 
+#ifdef CONFIG_UNEVICTABLE_LRU
+#define AS_UNEVICTABLE	(__GFP_BITS_SHIFT + 2)	/* e.g., ramdisk, SHM_LOCK */
+
+static inline void mapping_set_unevictable(struct address_space *mapping)
+{
+	set_bit(AS_UNEVICTABLE, &mapping->flags);
+}
+
+static inline int mapping_unevictable(struct address_space *mapping)
+{
+	if (mapping && (mapping->flags & AS_UNEVICTABLE))
+		return 1;
+	return 0;
+}
+#else
+static inline void mapping_set_unevictable(struct address_space *mapping) { }
+static inline int mapping_unevictable(struct address_space *mapping)
+{
+	return 0;
+}
+#endif
+
 static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
 {
 	return (__force gfp_t)mapping->flags & __GFP_BITS_MASK;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 2804d23e2da7..9babfbc1ddc8 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2332,11 +2332,16 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
  * lists vs unevictable list.
  *
  * Reasons page might not be evictable:
+ * (1) page's mapping marked unevictable
+ *
  * TODO - later patches
  */
 int page_evictable(struct page *page, struct vm_area_struct *vma)
 {
 
+	if (mapping_unevictable(page_mapping(page)))
+		return 0;
+
 	/* TODO:  test page [!]evictable conditions */
 
 	return 1;
-- 
GitLab


From 89e004ea55abe201b29e2d6e35124101f1288ef7 Mon Sep 17 00:00:00 2001
From: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Date: Sat, 18 Oct 2008 20:26:43 -0700
Subject: [PATCH 704/895] SHM_LOCKED pages are unevictable

Shmem segments locked into memory via shmctl(SHM_LOCKED) should not be
kept on the normal LRU, since scanning them is a waste of time and might
throw off kswapd's balancing algorithms.  Place them on the unevictable
LRU list instead.

Use the AS_UNEVICTABLE flag to mark address_space of SHM_LOCKed shared
memory regions as unevictable.  Then these pages will be culled off the
normal LRU lists during vmscan.

Add new wrapper function to clear the mapping's unevictable state when/if
shared memory segment is munlocked.

Add 'scan_mapping_unevictable_page()' to mm/vmscan.c to scan all pages in
the shmem segment's mapping [struct address_space] for evictability now
that they're no longer locked.  If so, move them to the appropriate zone
lru list.

Changes depend on [CONFIG_]UNEVICTABLE_LRU.

[kosaki.motohiro@jp.fujitsu.com: revert shm change]
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Kosaki Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h      |  4 +-
 include/linux/pagemap.h | 12 ++++--
 include/linux/swap.h    |  4 ++
 ipc/shm.c               |  4 ++
 mm/shmem.c              |  4 ++
 mm/vmscan.c             | 89 +++++++++++++++++++++++++++++++++++++++++
 6 files changed, 112 insertions(+), 5 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index c61ba10768ea..40236290e2ae 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -700,10 +700,10 @@ static inline int page_mapped(struct page *page)
 extern void show_free_areas(void);
 
 #ifdef CONFIG_SHMEM
-int shmem_lock(struct file *file, int lock, struct user_struct *user);
+extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
 #else
 static inline int shmem_lock(struct file *file, int lock,
-			     struct user_struct *user)
+			    struct user_struct *user)
 {
 	return 0;
 }
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 09164d2c5c27..4b6c4d8d26b8 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -40,14 +40,20 @@ static inline void mapping_set_unevictable(struct address_space *mapping)
 	set_bit(AS_UNEVICTABLE, &mapping->flags);
 }
 
+static inline void mapping_clear_unevictable(struct address_space *mapping)
+{
+	clear_bit(AS_UNEVICTABLE, &mapping->flags);
+}
+
 static inline int mapping_unevictable(struct address_space *mapping)
 {
-	if (mapping && (mapping->flags & AS_UNEVICTABLE))
-		return 1;
-	return 0;
+	if (likely(mapping))
+		return test_bit(AS_UNEVICTABLE, &mapping->flags);
+	return !!mapping;
 }
 #else
 static inline void mapping_set_unevictable(struct address_space *mapping) { }
+static inline void mapping_clear_unevictable(struct address_space *mapping) { }
 static inline int mapping_unevictable(struct address_space *mapping)
 {
 	return 0;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index a2113044d20a..7edb4cbc29f9 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -232,12 +232,16 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order)
 
 #ifdef CONFIG_UNEVICTABLE_LRU
 extern int page_evictable(struct page *page, struct vm_area_struct *vma);
+extern void scan_mapping_unevictable_pages(struct address_space *);
 #else
 static inline int page_evictable(struct page *page,
 						struct vm_area_struct *vma)
 {
 	return 1;
 }
+static inline void scan_mapping_unevictable_pages(struct address_space *mapping)
+{
+}
 #endif
 
 extern int kswapd_run(int nid);
diff --git a/ipc/shm.c b/ipc/shm.c
index e77ec698cf40..0add3fa5f547 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -737,6 +737,10 @@ asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf)
 	case SHM_LOCK:
 	case SHM_UNLOCK:
 	{
+		struct file *uninitialized_var(shm_file);
+
+		lru_add_drain_all();  /* drain pagevecs to lru lists */
+
 		shp = shm_lock_check(ns, shmid);
 		if (IS_ERR(shp)) {
 			err = PTR_ERR(shp);
diff --git a/mm/shmem.c b/mm/shmem.c
index fc2ccf79a776..d38d7e61fcd0 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1477,12 +1477,16 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
 		if (!user_shm_lock(inode->i_size, user))
 			goto out_nomem;
 		info->flags |= VM_LOCKED;
+		mapping_set_unevictable(file->f_mapping);
 	}
 	if (!lock && (info->flags & VM_LOCKED) && user) {
 		user_shm_unlock(inode->i_size, user);
 		info->flags &= ~VM_LOCKED;
+		mapping_clear_unevictable(file->f_mapping);
+		scan_mapping_unevictable_pages(file->f_mapping);
 	}
 	retval = 0;
+
 out_nomem:
 	spin_unlock(&info->lock);
 	return retval;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9babfbc1ddc8..dfb342e0db9b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2346,4 +2346,93 @@ int page_evictable(struct page *page, struct vm_area_struct *vma)
 
 	return 1;
 }
+
+/**
+ * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list
+ * @page: page to check evictability and move to appropriate lru list
+ * @zone: zone page is in
+ *
+ * Checks a page for evictability and moves the page to the appropriate
+ * zone lru list.
+ *
+ * Restrictions: zone->lru_lock must be held, page must be on LRU and must
+ * have PageUnevictable set.
+ */
+static void check_move_unevictable_page(struct page *page, struct zone *zone)
+{
+	VM_BUG_ON(PageActive(page));
+
+retry:
+	ClearPageUnevictable(page);
+	if (page_evictable(page, NULL)) {
+		enum lru_list l = LRU_INACTIVE_ANON + page_is_file_cache(page);
+		__dec_zone_state(zone, NR_UNEVICTABLE);
+		list_move(&page->lru, &zone->lru[l].list);
+		__inc_zone_state(zone, NR_INACTIVE_ANON + l);
+		__count_vm_event(UNEVICTABLE_PGRESCUED);
+	} else {
+		/*
+		 * rotate unevictable list
+		 */
+		SetPageUnevictable(page);
+		list_move(&page->lru, &zone->lru[LRU_UNEVICTABLE].list);
+		if (page_evictable(page, NULL))
+			goto retry;
+	}
+}
+
+/**
+ * scan_mapping_unevictable_pages - scan an address space for evictable pages
+ * @mapping: struct address_space to scan for evictable pages
+ *
+ * Scan all pages in mapping.  Check unevictable pages for
+ * evictability and move them to the appropriate zone lru list.
+ */
+void scan_mapping_unevictable_pages(struct address_space *mapping)
+{
+	pgoff_t next = 0;
+	pgoff_t end   = (i_size_read(mapping->host) + PAGE_CACHE_SIZE - 1) >>
+			 PAGE_CACHE_SHIFT;
+	struct zone *zone;
+	struct pagevec pvec;
+
+	if (mapping->nrpages == 0)
+		return;
+
+	pagevec_init(&pvec, 0);
+	while (next < end &&
+		pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
+		int i;
+		int pg_scanned = 0;
+
+		zone = NULL;
+
+		for (i = 0; i < pagevec_count(&pvec); i++) {
+			struct page *page = pvec.pages[i];
+			pgoff_t page_index = page->index;
+			struct zone *pagezone = page_zone(page);
+
+			pg_scanned++;
+			if (page_index > next)
+				next = page_index;
+			next++;
+
+			if (pagezone != zone) {
+				if (zone)
+					spin_unlock_irq(&zone->lru_lock);
+				zone = pagezone;
+				spin_lock_irq(&zone->lru_lock);
+			}
+
+			if (PageLRU(page) && PageUnevictable(page))
+				check_move_unevictable_page(page, zone);
+		}
+		if (zone)
+			spin_unlock_irq(&zone->lru_lock);
+		pagevec_release(&pvec);
+
+		count_vm_events(UNEVICTABLE_PGSCANNED, pg_scanned);
+	}
+
+}
 #endif
-- 
GitLab


From b291f000393f5a0b679012b39d79fbc85c018233 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Sat, 18 Oct 2008 20:26:44 -0700
Subject: [PATCH 705/895] mlock: mlocked pages are unevictable

Make sure that mlocked pages also live on the unevictable LRU, so kswapd
will not scan them over and over again.

This is achieved through various strategies:

1) add yet another page flag--PG_mlocked--to indicate that
   the page is locked for efficient testing in vmscan and,
   optionally, fault path.  This allows early culling of
   unevictable pages, preventing them from getting to
   page_referenced()/try_to_unmap().  Also allows separate
   accounting of mlock'd pages, as Nick's original patch
   did.

   Note:  Nick's original mlock patch used a PG_mlocked
   flag.  I had removed this in favor of the PG_unevictable
   flag + an mlock_count [new page struct member].  I
   restored the PG_mlocked flag to eliminate the new
   count field.

2) add the mlock/unevictable infrastructure to mm/mlock.c,
   with internal APIs in mm/internal.h.  This is a rework
   of Nick's original patch to these files, taking into
   account that mlocked pages are now kept on unevictable
   LRU list.

3) update vmscan.c:page_evictable() to check PageMlocked()
   and, if vma passed in, the vm_flags.  Note that the vma
   will only be passed in for new pages in the fault path;
   and then only if the "cull unevictable pages in fault
   path" patch is included.

4) add try_to_unlock() to rmap.c to walk a page's rmap and
   ClearPageMlocked() if no other vmas have it mlocked.
   Reuses as much of try_to_unmap() as possible.  This
   effectively replaces the use of one of the lru list links
   as an mlock count.  If this mechanism let's pages in mlocked
   vmas leak through w/o PG_mlocked set [I don't know that it
   does], we should catch them later in try_to_unmap().  One
   hopes this will be rare, as it will be relatively expensive.

Original mm/internal.h, mm/rmap.c and mm/mlock.c changes:
Signed-off-by: Nick Piggin <npiggin@suse.de>

splitlru: introduce __get_user_pages():

  New munlock processing need to GUP_FLAGS_IGNORE_VMA_PERMISSIONS.
  because current get_user_pages() can't grab PROT_NONE pages theresore it
  cause PROT_NONE pages can't munlock.

[akpm@linux-foundation.org: fix this for pagemap-pass-mm-into-pagewalkers.patch]
[akpm@linux-foundation.org: untangle patch interdependencies]
[akpm@linux-foundation.org: fix things after out-of-order merging]
[hugh@veritas.com: fix page-flags mess]
[lee.schermerhorn@hp.com: fix munlock page table walk - now requires 'mm']
[kosaki.motohiro@jp.fujitsu.com: build fix]
[kosaki.motohiro@jp.fujitsu.com: fix truncate race and sevaral comments]
[kosaki.motohiro@jp.fujitsu.com: splitlru: introduce __get_user_pages()]
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Cc: Matt Mackall <mpm@selenic.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h         |   5 +
 include/linux/page-flags.h |  19 +-
 include/linux/rmap.h       |  14 ++
 mm/internal.h              |  71 +++++++
 mm/memory.c                |  56 +++++-
 mm/migrate.c               |   2 +
 mm/mlock.c                 | 394 +++++++++++++++++++++++++++++++++++--
 mm/mmap.c                  |   2 -
 mm/nommu.c                 |  44 +++--
 mm/page_alloc.c            |   6 +-
 mm/rmap.c                  | 257 ++++++++++++++++++++----
 mm/swap.c                  |   2 +-
 mm/vmscan.c                |  36 +++-
 13 files changed, 817 insertions(+), 91 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 40236290e2ae..ffee2f743418 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -131,6 +131,11 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_SequentialReadHint(v)	((v)->vm_flags & VM_SEQ_READ)
 #define VM_RandomReadHint(v)		((v)->vm_flags & VM_RAND_READ)
 
+/*
+ * special vmas that are non-mergable, non-mlock()able
+ */
+#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
+
 /*
  * mapping from the currently active vm_flags protection bits (the
  * low four bits) to a page protection mask..
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index ec1a1baad348..b12f93a3c345 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -96,6 +96,7 @@ enum pageflags {
 	PG_swapbacked,		/* Page is backed by RAM/swap */
 #ifdef CONFIG_UNEVICTABLE_LRU
 	PG_unevictable,		/* Page is "unevictable"  */
+	PG_mlocked,		/* Page is vma mlocked */
 #endif
 #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
 	PG_uncached,		/* Page has been mapped as uncached */
@@ -232,7 +233,17 @@ PAGEFLAG_FALSE(SwapCache)
 #ifdef CONFIG_UNEVICTABLE_LRU
 PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
 	TESTCLEARFLAG(Unevictable, unevictable)
+
+#define MLOCK_PAGES 1
+PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked)
+	TESTSCFLAG(Mlocked, mlocked)
+
 #else
+
+#define MLOCK_PAGES 0
+PAGEFLAG_FALSE(Mlocked)
+	SETPAGEFLAG_NOOP(Mlocked) TESTCLEARFLAG_FALSE(Mlocked)
+
 PAGEFLAG_FALSE(Unevictable) TESTCLEARFLAG_FALSE(Unevictable)
 	SETPAGEFLAG_NOOP(Unevictable) CLEARPAGEFLAG_NOOP(Unevictable)
 	__CLEARPAGEFLAG_NOOP(Unevictable)
@@ -354,15 +365,17 @@ static inline void __ClearPageTail(struct page *page)
 #endif /* !PAGEFLAGS_EXTENDED */
 
 #ifdef CONFIG_UNEVICTABLE_LRU
-#define __PG_UNEVICTABLE (1 << PG_unevictable)
+#define __PG_UNEVICTABLE	(1 << PG_unevictable)
+#define __PG_MLOCKED		(1 << PG_mlocked)
 #else
-#define __PG_UNEVICTABLE 0
+#define __PG_UNEVICTABLE	0
+#define __PG_MLOCKED		0
 #endif
 
 #define PAGE_FLAGS	(1 << PG_lru   | 1 << PG_private   | 1 << PG_locked | \
 			 1 << PG_buddy | 1 << PG_writeback | \
 			 1 << PG_slab  | 1 << PG_swapcache | 1 << PG_active | \
-			 __PG_UNEVICTABLE)
+			 __PG_UNEVICTABLE | __PG_MLOCKED)
 
 /*
  * Flags checked in bad_page().  Pages on the free list should not have
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index fed6f5e0b411..955667e6a52d 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -117,6 +117,19 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *);
  */
 int page_mkclean(struct page *);
 
+#ifdef CONFIG_UNEVICTABLE_LRU
+/*
+ * called in munlock()/munmap() path to check for other vmas holding
+ * the page mlocked.
+ */
+int try_to_munlock(struct page *);
+#else
+static inline int try_to_munlock(struct page *page)
+{
+	return 0;	/* a.k.a. SWAP_SUCCESS */
+}
+#endif
+
 #else	/* !CONFIG_MMU */
 
 #define anon_vma_init()		do {} while (0)
@@ -140,5 +153,6 @@ static inline int page_mkclean(struct page *page)
 #define SWAP_SUCCESS	0
 #define SWAP_AGAIN	1
 #define SWAP_FAIL	2
+#define SWAP_MLOCK	3
 
 #endif	/* _LINUX_RMAP_H */
diff --git a/mm/internal.h b/mm/internal.h
index 3db17b2a1ac6..4ebf0bef9a39 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -61,6 +61,10 @@ static inline unsigned long page_order(struct page *page)
 	return page_private(page);
 }
 
+extern int mlock_vma_pages_range(struct vm_area_struct *vma,
+			unsigned long start, unsigned long end);
+extern void munlock_vma_pages_all(struct vm_area_struct *vma);
+
 #ifdef CONFIG_UNEVICTABLE_LRU
 /*
  * unevictable_migrate_page() called only from migrate_page_copy() to
@@ -79,6 +83,65 @@ static inline void unevictable_migrate_page(struct page *new, struct page *old)
 }
 #endif
 
+#ifdef CONFIG_UNEVICTABLE_LRU
+/*
+ * Called only in fault path via page_evictable() for a new page
+ * to determine if it's being mapped into a LOCKED vma.
+ * If so, mark page as mlocked.
+ */
+static inline int is_mlocked_vma(struct vm_area_struct *vma, struct page *page)
+{
+	VM_BUG_ON(PageLRU(page));
+
+	if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED))
+		return 0;
+
+	SetPageMlocked(page);
+	return 1;
+}
+
+/*
+ * must be called with vma's mmap_sem held for read, and page locked.
+ */
+extern void mlock_vma_page(struct page *page);
+
+/*
+ * Clear the page's PageMlocked().  This can be useful in a situation where
+ * we want to unconditionally remove a page from the pagecache -- e.g.,
+ * on truncation or freeing.
+ *
+ * It is legal to call this function for any page, mlocked or not.
+ * If called for a page that is still mapped by mlocked vmas, all we do
+ * is revert to lazy LRU behaviour -- semantics are not broken.
+ */
+extern void __clear_page_mlock(struct page *page);
+static inline void clear_page_mlock(struct page *page)
+{
+	if (unlikely(TestClearPageMlocked(page)))
+		__clear_page_mlock(page);
+}
+
+/*
+ * mlock_migrate_page - called only from migrate_page_copy() to
+ * migrate the Mlocked page flag
+ */
+static inline void mlock_migrate_page(struct page *newpage, struct page *page)
+{
+	if (TestClearPageMlocked(page))
+		SetPageMlocked(newpage);
+}
+
+
+#else /* CONFIG_UNEVICTABLE_LRU */
+static inline int is_mlocked_vma(struct vm_area_struct *v, struct page *p)
+{
+	return 0;
+}
+static inline void clear_page_mlock(struct page *page) { }
+static inline void mlock_vma_page(struct page *page) { }
+static inline void mlock_migrate_page(struct page *new, struct page *old) { }
+
+#endif /* CONFIG_UNEVICTABLE_LRU */
 
 /*
  * FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node,
@@ -148,4 +211,12 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn,
 }
 #endif /* CONFIG_SPARSEMEM */
 
+#define GUP_FLAGS_WRITE                  0x1
+#define GUP_FLAGS_FORCE                  0x2
+#define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
+
+int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+		     unsigned long start, int len, int flags,
+		     struct page **pages, struct vm_area_struct **vmas);
+
 #endif
diff --git a/mm/memory.c b/mm/memory.c
index 71cdefd1ef14..9fef7272fb9e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -64,6 +64,8 @@
 
 #include "internal.h"
 
+#include "internal.h"
+
 #ifndef CONFIG_NEED_MULTIPLE_NODES
 /* use the per-pgdat data instead for discontigmem - mbligh */
 unsigned long max_mapnr;
@@ -1129,12 +1131,17 @@ static inline int use_zero_page(struct vm_area_struct *vma)
 	return !vma->vm_ops || !vma->vm_ops->fault;
 }
 
-int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-		unsigned long start, int len, int write, int force,
+
+
+int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+		     unsigned long start, int len, int flags,
 		struct page **pages, struct vm_area_struct **vmas)
 {
 	int i;
-	unsigned int vm_flags;
+	unsigned int vm_flags = 0;
+	int write = !!(flags & GUP_FLAGS_WRITE);
+	int force = !!(flags & GUP_FLAGS_FORCE);
+	int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
 
 	if (len <= 0)
 		return 0;
@@ -1158,7 +1165,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 			pud_t *pud;
 			pmd_t *pmd;
 			pte_t *pte;
-			if (write) /* user gate pages are read-only */
+
+			/* user gate pages are read-only */
+			if (!ignore && write)
 				return i ? : -EFAULT;
 			if (pg > TASK_SIZE)
 				pgd = pgd_offset_k(pg);
@@ -1190,8 +1199,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 			continue;
 		}
 
-		if (!vma || (vma->vm_flags & (VM_IO | VM_PFNMAP))
-				|| !(vm_flags & vma->vm_flags))
+		if (!vma ||
+		    (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
+		    (!ignore && !(vm_flags & vma->vm_flags)))
 			return i ? : -EFAULT;
 
 		if (is_vm_hugetlb_page(vma)) {
@@ -1266,6 +1276,23 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 	} while (len);
 	return i;
 }
+
+int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+		unsigned long start, int len, int write, int force,
+		struct page **pages, struct vm_area_struct **vmas)
+{
+	int flags = 0;
+
+	if (write)
+		flags |= GUP_FLAGS_WRITE;
+	if (force)
+		flags |= GUP_FLAGS_FORCE;
+
+	return __get_user_pages(tsk, mm,
+				start, len, flags,
+				pages, vmas);
+}
+
 EXPORT_SYMBOL(get_user_pages);
 
 pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,
@@ -1858,6 +1885,15 @@ gotten:
 	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
 	if (!new_page)
 		goto oom;
+	/*
+	 * Don't let another task, with possibly unlocked vma,
+	 * keep the mlocked page.
+	 */
+	if (vma->vm_flags & VM_LOCKED) {
+		lock_page(old_page);	/* for LRU manipulation */
+		clear_page_mlock(old_page);
+		unlock_page(old_page);
+	}
 	cow_user_page(new_page, old_page, address, vma);
 	__SetPageUptodate(new_page);
 
@@ -2325,7 +2361,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	page_add_anon_rmap(page, vma, address);
 
 	swap_free(entry);
-	if (vm_swap_full())
+	if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
 		remove_exclusive_swap_page(page);
 	unlock_page(page);
 
@@ -2465,6 +2501,12 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 				ret = VM_FAULT_OOM;
 				goto out;
 			}
+			/*
+			 * Don't let another task, with possibly unlocked vma,
+			 * keep the mlocked page.
+			 */
+			if (vma->vm_flags & VM_LOCKED)
+				clear_page_mlock(vmf.page);
 			copy_user_highpage(page, vmf.page, address, vma);
 			__SetPageUptodate(page);
 		} else {
diff --git a/mm/migrate.c b/mm/migrate.c
index b10237d8b459..6802a7a3dfec 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -371,6 +371,8 @@ static void migrate_page_copy(struct page *newpage, struct page *page)
 		__set_page_dirty_nobuffers(newpage);
  	}
 
+	mlock_migrate_page(newpage, page);
+
 #ifdef CONFIG_SWAP
 	ClearPageSwapCache(page);
 #endif
diff --git a/mm/mlock.c b/mm/mlock.c
index 01fbe93eff5c..8746fe3f9730 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -8,10 +8,18 @@
 #include <linux/capability.h>
 #include <linux/mman.h>
 #include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
+#include <linux/pagemap.h>
 #include <linux/mempolicy.h>
 #include <linux/syscalls.h>
 #include <linux/sched.h>
 #include <linux/module.h>
+#include <linux/rmap.h>
+#include <linux/mmzone.h>
+#include <linux/hugetlb.h>
+
+#include "internal.h"
 
 int can_do_mlock(void)
 {
@@ -23,17 +31,360 @@ int can_do_mlock(void)
 }
 EXPORT_SYMBOL(can_do_mlock);
 
+#ifdef CONFIG_UNEVICTABLE_LRU
+/*
+ * Mlocked pages are marked with PageMlocked() flag for efficient testing
+ * in vmscan and, possibly, the fault path; and to support semi-accurate
+ * statistics.
+ *
+ * An mlocked page [PageMlocked(page)] is unevictable.  As such, it will
+ * be placed on the LRU "unevictable" list, rather than the [in]active lists.
+ * The unevictable list is an LRU sibling list to the [in]active lists.
+ * PageUnevictable is set to indicate the unevictable state.
+ *
+ * When lazy mlocking via vmscan, it is important to ensure that the
+ * vma's VM_LOCKED status is not concurrently being modified, otherwise we
+ * may have mlocked a page that is being munlocked. So lazy mlock must take
+ * the mmap_sem for read, and verify that the vma really is locked
+ * (see mm/rmap.c).
+ */
+
+/*
+ *  LRU accounting for clear_page_mlock()
+ */
+void __clear_page_mlock(struct page *page)
+{
+	VM_BUG_ON(!PageLocked(page));
+
+	if (!page->mapping) {	/* truncated ? */
+		return;
+	}
+
+	if (!isolate_lru_page(page)) {
+		putback_lru_page(page);
+	} else {
+		/*
+		 * Page not on the LRU yet.  Flush all pagevecs and retry.
+		 */
+		lru_add_drain_all();
+		if (!isolate_lru_page(page))
+			putback_lru_page(page);
+	}
+}
+
+/*
+ * Mark page as mlocked if not already.
+ * If page on LRU, isolate and putback to move to unevictable list.
+ */
+void mlock_vma_page(struct page *page)
+{
+	BUG_ON(!PageLocked(page));
+
+	if (!TestSetPageMlocked(page) && !isolate_lru_page(page))
+		putback_lru_page(page);
+}
+
+/*
+ * called from munlock()/munmap() path with page supposedly on the LRU.
+ *
+ * Note:  unlike mlock_vma_page(), we can't just clear the PageMlocked
+ * [in try_to_munlock()] and then attempt to isolate the page.  We must
+ * isolate the page to keep others from messing with its unevictable
+ * and mlocked state while trying to munlock.  However, we pre-clear the
+ * mlocked state anyway as we might lose the isolation race and we might
+ * not get another chance to clear PageMlocked.  If we successfully
+ * isolate the page and try_to_munlock() detects other VM_LOCKED vmas
+ * mapping the page, it will restore the PageMlocked state, unless the page
+ * is mapped in a non-linear vma.  So, we go ahead and SetPageMlocked(),
+ * perhaps redundantly.
+ * If we lose the isolation race, and the page is mapped by other VM_LOCKED
+ * vmas, we'll detect this in vmscan--via try_to_munlock() or try_to_unmap()
+ * either of which will restore the PageMlocked state by calling
+ * mlock_vma_page() above, if it can grab the vma's mmap sem.
+ */
+static void munlock_vma_page(struct page *page)
+{
+	BUG_ON(!PageLocked(page));
+
+	if (TestClearPageMlocked(page) && !isolate_lru_page(page)) {
+		try_to_munlock(page);
+		putback_lru_page(page);
+	}
+}
+
+/*
+ * mlock a range of pages in the vma.
+ *
+ * This takes care of making the pages present too.
+ *
+ * vma->vm_mm->mmap_sem must be held for write.
+ */
+static int __mlock_vma_pages_range(struct vm_area_struct *vma,
+			unsigned long start, unsigned long end)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	unsigned long addr = start;
+	struct page *pages[16]; /* 16 gives a reasonable batch */
+	int write = !!(vma->vm_flags & VM_WRITE);
+	int nr_pages = (end - start) / PAGE_SIZE;
+	int ret;
+
+	VM_BUG_ON(start & ~PAGE_MASK || end & ~PAGE_MASK);
+	VM_BUG_ON(start < vma->vm_start || end > vma->vm_end);
+	VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem));
+
+	lru_add_drain_all();	/* push cached pages to LRU */
+
+	while (nr_pages > 0) {
+		int i;
+
+		cond_resched();
+
+		/*
+		 * get_user_pages makes pages present if we are
+		 * setting mlock. and this extra reference count will
+		 * disable migration of this page.  However, page may
+		 * still be truncated out from under us.
+		 */
+		ret = get_user_pages(current, mm, addr,
+				min_t(int, nr_pages, ARRAY_SIZE(pages)),
+				write, 0, pages, NULL);
+		/*
+		 * This can happen for, e.g., VM_NONLINEAR regions before
+		 * a page has been allocated and mapped at a given offset,
+		 * or for addresses that map beyond end of a file.
+		 * We'll mlock the the pages if/when they get faulted in.
+		 */
+		if (ret < 0)
+			break;
+		if (ret == 0) {
+			/*
+			 * We know the vma is there, so the only time
+			 * we cannot get a single page should be an
+			 * error (ret < 0) case.
+			 */
+			WARN_ON(1);
+			break;
+		}
+
+		lru_add_drain();	/* push cached pages to LRU */
+
+		for (i = 0; i < ret; i++) {
+			struct page *page = pages[i];
+
+			lock_page(page);
+			/*
+			 * Because we lock page here and migration is blocked
+			 * by the elevated reference, we need only check for
+			 * page truncation (file-cache only).
+			 */
+			if (page->mapping)
+				mlock_vma_page(page);
+			unlock_page(page);
+			put_page(page);		/* ref from get_user_pages() */
+
+			/*
+			 * here we assume that get_user_pages() has given us
+			 * a list of virtually contiguous pages.
+			 */
+			addr += PAGE_SIZE;	/* for next get_user_pages() */
+			nr_pages--;
+		}
+	}
+
+	lru_add_drain_all();	/* to update stats */
+
+	return 0;	/* count entire vma as locked_vm */
+}
+
+/*
+ * private structure for munlock page table walk
+ */
+struct munlock_page_walk {
+	struct vm_area_struct *vma;
+	pmd_t                 *pmd; /* for migration_entry_wait() */
+};
+
+/*
+ * munlock normal pages for present ptes
+ */
+static int __munlock_pte_handler(pte_t *ptep, unsigned long addr,
+				   unsigned long end, struct mm_walk *walk)
+{
+	struct munlock_page_walk *mpw = walk->private;
+	swp_entry_t entry;
+	struct page *page;
+	pte_t pte;
+
+retry:
+	pte = *ptep;
+	/*
+	 * If it's a swap pte, we might be racing with page migration.
+	 */
+	if (unlikely(!pte_present(pte))) {
+		if (!is_swap_pte(pte))
+			goto out;
+		entry = pte_to_swp_entry(pte);
+		if (is_migration_entry(entry)) {
+			migration_entry_wait(mpw->vma->vm_mm, mpw->pmd, addr);
+			goto retry;
+		}
+		goto out;
+	}
+
+	page = vm_normal_page(mpw->vma, addr, pte);
+	if (!page)
+		goto out;
+
+	lock_page(page);
+	if (!page->mapping) {
+		unlock_page(page);
+		goto retry;
+	}
+	munlock_vma_page(page);
+	unlock_page(page);
+
+out:
+	return 0;
+}
+
+/*
+ * Save pmd for pte handler for waiting on migration entries
+ */
+static int __munlock_pmd_handler(pmd_t *pmd, unsigned long addr,
+				 unsigned long end, struct mm_walk *walk)
+{
+	struct munlock_page_walk *mpw = walk->private;
+
+	mpw->pmd = pmd;
+	return 0;
+}
+
+
+/*
+ * munlock a range of pages in the vma using standard page table walk.
+ *
+ * vma->vm_mm->mmap_sem must be held for write.
+ */
+static void __munlock_vma_pages_range(struct vm_area_struct *vma,
+			      unsigned long start, unsigned long end)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	struct munlock_page_walk mpw = {
+		.vma = vma,
+	};
+	struct mm_walk munlock_page_walk = {
+		.pmd_entry = __munlock_pmd_handler,
+		.pte_entry = __munlock_pte_handler,
+		.private = &mpw,
+		.mm = mm,
+	};
+
+	VM_BUG_ON(start & ~PAGE_MASK || end & ~PAGE_MASK);
+	VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem));
+	VM_BUG_ON(start < vma->vm_start);
+	VM_BUG_ON(end > vma->vm_end);
+
+	lru_add_drain_all();	/* push cached pages to LRU */
+	walk_page_range(start, end, &munlock_page_walk);
+	lru_add_drain_all();	/* to update stats */
+}
+
+#else /* CONFIG_UNEVICTABLE_LRU */
+
+/*
+ * Just make pages present if VM_LOCKED.  No-op if unlocking.
+ */
+static int __mlock_vma_pages_range(struct vm_area_struct *vma,
+			unsigned long start, unsigned long end)
+{
+	if (vma->vm_flags & VM_LOCKED)
+		make_pages_present(start, end);
+	return 0;
+}
+
+/*
+ * munlock a range of pages in the vma -- no-op.
+ */
+static void __munlock_vma_pages_range(struct vm_area_struct *vma,
+			      unsigned long start, unsigned long end)
+{
+}
+#endif /* CONFIG_UNEVICTABLE_LRU */
+
+/*
+ * mlock all pages in this vma range.  For mmap()/mremap()/...
+ */
+int mlock_vma_pages_range(struct vm_area_struct *vma,
+			unsigned long start, unsigned long end)
+{
+	int nr_pages = (end - start) / PAGE_SIZE;
+	BUG_ON(!(vma->vm_flags & VM_LOCKED));
+
+	/*
+	 * filter unlockable vmas
+	 */
+	if (vma->vm_flags & (VM_IO | VM_PFNMAP))
+		goto no_mlock;
+
+	if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) ||
+			is_vm_hugetlb_page(vma) ||
+			vma == get_gate_vma(current)))
+		return __mlock_vma_pages_range(vma, start, end);
+
+	/*
+	 * User mapped kernel pages or huge pages:
+	 * make these pages present to populate the ptes, but
+	 * fall thru' to reset VM_LOCKED--no need to unlock, and
+	 * return nr_pages so these don't get counted against task's
+	 * locked limit.  huge pages are already counted against
+	 * locked vm limit.
+	 */
+	make_pages_present(start, end);
+
+no_mlock:
+	vma->vm_flags &= ~VM_LOCKED;	/* and don't come back! */
+	return nr_pages;		/* pages NOT mlocked */
+}
+
+
+/*
+ * munlock all pages in vma.   For munmap() and exit().
+ */
+void munlock_vma_pages_all(struct vm_area_struct *vma)
+{
+	vma->vm_flags &= ~VM_LOCKED;
+	__munlock_vma_pages_range(vma, vma->vm_start, vma->vm_end);
+}
+
+/*
+ * mlock_fixup  - handle mlock[all]/munlock[all] requests.
+ *
+ * Filters out "special" vmas -- VM_LOCKED never gets set for these, and
+ * munlock is a no-op.  However, for some special vmas, we go ahead and
+ * populate the ptes via make_pages_present().
+ *
+ * For vmas that pass the filters, merge/split as appropriate.
+ */
 static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
 	unsigned long start, unsigned long end, unsigned int newflags)
 {
-	struct mm_struct * mm = vma->vm_mm;
+	struct mm_struct *mm = vma->vm_mm;
 	pgoff_t pgoff;
-	int pages;
+	int nr_pages;
 	int ret = 0;
-
-	if (newflags == vma->vm_flags) {
-		*prev = vma;
-		goto out;
+	int lock = newflags & VM_LOCKED;
+
+	if (newflags == vma->vm_flags ||
+			(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+		goto out;	/* don't set VM_LOCKED,  don't count */
+
+	if ((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) ||
+			is_vm_hugetlb_page(vma) ||
+			vma == get_gate_vma(current)) {
+		if (lock)
+			make_pages_present(start, end);
+		goto out;	/* don't set VM_LOCKED,  don't count */
 	}
 
 	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
@@ -44,8 +395,6 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
 		goto success;
 	}
 
-	*prev = vma;
-
 	if (start != vma->vm_start) {
 		ret = split_vma(mm, vma, start, 1);
 		if (ret)
@@ -59,25 +408,32 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
 	}
 
 success:
+	/*
+	 * Keep track of amount of locked VM.
+	 */
+	nr_pages = (end - start) >> PAGE_SHIFT;
+	if (!lock)
+		nr_pages = -nr_pages;
+	mm->locked_vm += nr_pages;
+
 	/*
 	 * vm_flags is protected by the mmap_sem held in write mode.
 	 * It's okay if try_to_unmap_one unmaps a page just after we
-	 * set VM_LOCKED, make_pages_present below will bring it back.
+	 * set VM_LOCKED, __mlock_vma_pages_range will bring it back.
 	 */
 	vma->vm_flags = newflags;
 
-	/*
-	 * Keep track of amount of locked VM.
-	 */
-	pages = (end - start) >> PAGE_SHIFT;
-	if (newflags & VM_LOCKED) {
-		pages = -pages;
-		if (!(newflags & VM_IO))
-			ret = make_pages_present(start, end);
-	}
+	if (lock) {
+		ret = __mlock_vma_pages_range(vma, start, end);
+		if (ret > 0) {
+			mm->locked_vm -= ret;
+			ret = 0;
+		}
+	} else
+		__munlock_vma_pages_range(vma, start, end);
 
-	mm->locked_vm -= pages;
 out:
+	*prev = vma;
 	return ret;
 }
 
diff --git a/mm/mmap.c b/mm/mmap.c
index e7a5a68a9c2e..7bdfd2661f17 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -662,8 +662,6 @@ again:			remove_next = 1 + (end > next->vm_end);
  * If the vma has a ->close operation then the driver probably needs to release
  * per-vma resources, so we don't attempt to merge those.
  */
-#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
-
 static inline int is_mergeable_vma(struct vm_area_struct *vma,
 			struct file *file, unsigned long vm_flags)
 {
diff --git a/mm/nommu.c b/mm/nommu.c
index ed75bc962fbe..2696b24f2bb3 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -34,6 +34,8 @@
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 
+#include "internal.h"
+
 void *high_memory;
 struct page *mem_map;
 unsigned long max_mapnr;
@@ -128,20 +130,16 @@ unsigned int kobjsize(const void *objp)
 	return PAGE_SIZE << compound_order(page);
 }
 
-/*
- * get a list of pages in an address range belonging to the specified process
- * and indicate the VMA that covers each page
- * - this is potentially dodgy as we may end incrementing the page count of a
- *   slab page or a secondary page from a compound page
- * - don't permit access to VMAs that don't support it, such as I/O mappings
- */
-int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-	unsigned long start, int len, int write, int force,
-	struct page **pages, struct vm_area_struct **vmas)
+int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+		     unsigned long start, int len, int flags,
+		struct page **pages, struct vm_area_struct **vmas)
 {
 	struct vm_area_struct *vma;
 	unsigned long vm_flags;
 	int i;
+	int write = !!(flags & GUP_FLAGS_WRITE);
+	int force = !!(flags & GUP_FLAGS_FORCE);
+	int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
 
 	/* calculate required read or write permissions.
 	 * - if 'force' is set, we only require the "MAY" flags.
@@ -156,7 +154,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 
 		/* protect what we can, including chardevs */
 		if (vma->vm_flags & (VM_IO | VM_PFNMAP) ||
-		    !(vm_flags & vma->vm_flags))
+		    (!ignore && !(vm_flags & vma->vm_flags)))
 			goto finish_or_fault;
 
 		if (pages) {
@@ -174,6 +172,30 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 finish_or_fault:
 	return i ? : -EFAULT;
 }
+
+
+/*
+ * get a list of pages in an address range belonging to the specified process
+ * and indicate the VMA that covers each page
+ * - this is potentially dodgy as we may end incrementing the page count of a
+ *   slab page or a secondary page from a compound page
+ * - don't permit access to VMAs that don't support it, such as I/O mappings
+ */
+int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+	unsigned long start, int len, int write, int force,
+	struct page **pages, struct vm_area_struct **vmas)
+{
+	int flags = 0;
+
+	if (write)
+		flags |= GUP_FLAGS_WRITE;
+	if (force)
+		flags |= GUP_FLAGS_FORCE;
+
+	return __get_user_pages(tsk, mm,
+				start, len, flags,
+				pages, vmas);
+}
 EXPORT_SYMBOL(get_user_pages);
 
 DEFINE_RWLOCK(vmlist_lock);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4125230a1b2c..5886586fde6c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -616,7 +616,11 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
 
 	page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_reclaim |
 			1 << PG_referenced | 1 << PG_arch_1 |
-			1 << PG_owner_priv_1 | 1 << PG_mappedtodisk);
+			1 << PG_owner_priv_1 | 1 << PG_mappedtodisk
+#ifdef CONFIG_UNEVICTABLE_LRU
+			| 1 << PG_mlocked
+#endif
+			);
 	set_page_private(page, 0);
 	set_page_refcounted(page);
 
diff --git a/mm/rmap.c b/mm/rmap.c
index e8d639b16c6d..7e60df99018e 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -53,6 +53,8 @@
 
 #include <asm/tlbflush.h>
 
+#include "internal.h"
+
 struct kmem_cache *anon_vma_cachep;
 
 /**
@@ -290,6 +292,32 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm,
 	return NULL;
 }
 
+/**
+ * page_mapped_in_vma - check whether a page is really mapped in a VMA
+ * @page: the page to test
+ * @vma: the VMA to test
+ *
+ * Returns 1 if the page is mapped into the page tables of the VMA, 0
+ * if the page is not mapped into the page tables of this VMA.  Only
+ * valid for normal file or anonymous VMAs.
+ */
+static int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
+{
+	unsigned long address;
+	pte_t *pte;
+	spinlock_t *ptl;
+
+	address = vma_address(page, vma);
+	if (address == -EFAULT)		/* out of vma range */
+		return 0;
+	pte = page_check_address(page, vma->vm_mm, address, &ptl, 1);
+	if (!pte)			/* the page is not in this mm */
+		return 0;
+	pte_unmap_unlock(pte, ptl);
+
+	return 1;
+}
+
 /*
  * Subfunctions of page_referenced: page_referenced_one called
  * repeatedly from either page_referenced_anon or page_referenced_file.
@@ -311,10 +339,17 @@ static int page_referenced_one(struct page *page,
 	if (!pte)
 		goto out;
 
+	/*
+	 * Don't want to elevate referenced for mlocked page that gets this far,
+	 * in order that it progresses to try_to_unmap and is moved to the
+	 * unevictable list.
+	 */
 	if (vma->vm_flags & VM_LOCKED) {
-		referenced++;
 		*mapcount = 1;	/* break early from loop */
-	} else if (ptep_clear_flush_young_notify(vma, address, pte))
+		goto out_unmap;
+	}
+
+	if (ptep_clear_flush_young_notify(vma, address, pte))
 		referenced++;
 
 	/* Pretend the page is referenced if the task has the
@@ -323,6 +358,7 @@ static int page_referenced_one(struct page *page,
 			rwsem_is_locked(&mm->mmap_sem))
 		referenced++;
 
+out_unmap:
 	(*mapcount)--;
 	pte_unmap_unlock(pte, ptl);
 out:
@@ -412,11 +448,6 @@ static int page_referenced_file(struct page *page,
 		 */
 		if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
 			continue;
-		if ((vma->vm_flags & (VM_LOCKED|VM_MAYSHARE))
-				  == (VM_LOCKED|VM_MAYSHARE)) {
-			referenced++;
-			break;
-		}
 		referenced += page_referenced_one(page, vma, &mapcount);
 		if (!mapcount)
 			break;
@@ -739,11 +770,16 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 	 * If it's recently referenced (perhaps page_referenced
 	 * skipped over this mm) then we should reactivate it.
 	 */
-	if (!migration && ((vma->vm_flags & VM_LOCKED) ||
-			(ptep_clear_flush_young_notify(vma, address, pte)))) {
-		ret = SWAP_FAIL;
-		goto out_unmap;
-	}
+	if (!migration) {
+		if (vma->vm_flags & VM_LOCKED) {
+			ret = SWAP_MLOCK;
+			goto out_unmap;
+		}
+		if (ptep_clear_flush_young_notify(vma, address, pte)) {
+			ret = SWAP_FAIL;
+			goto out_unmap;
+		}
+  	}
 
 	/* Nuke the page table entry. */
 	flush_cache_page(vma, address, page_to_pfn(page));
@@ -824,12 +860,17 @@ out:
  * For very sparsely populated VMAs this is a little inefficient - chances are
  * there there won't be many ptes located within the scan cluster.  In this case
  * maybe we could scan further - to the end of the pte page, perhaps.
+ *
+ * Mlocked pages:  check VM_LOCKED under mmap_sem held for read, if we can
+ * acquire it without blocking.  If vma locked, mlock the pages in the cluster,
+ * rather than unmapping them.  If we encounter the "check_page" that vmscan is
+ * trying to unmap, return SWAP_MLOCK, else default SWAP_AGAIN.
  */
 #define CLUSTER_SIZE	min(32*PAGE_SIZE, PMD_SIZE)
 #define CLUSTER_MASK	(~(CLUSTER_SIZE - 1))
 
-static void try_to_unmap_cluster(unsigned long cursor,
-	unsigned int *mapcount, struct vm_area_struct *vma)
+static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
+		struct vm_area_struct *vma, struct page *check_page)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	pgd_t *pgd;
@@ -841,6 +882,8 @@ static void try_to_unmap_cluster(unsigned long cursor,
 	struct page *page;
 	unsigned long address;
 	unsigned long end;
+	int ret = SWAP_AGAIN;
+	int locked_vma = 0;
 
 	address = (vma->vm_start + cursor) & CLUSTER_MASK;
 	end = address + CLUSTER_SIZE;
@@ -851,15 +894,26 @@ static void try_to_unmap_cluster(unsigned long cursor,
 
 	pgd = pgd_offset(mm, address);
 	if (!pgd_present(*pgd))
-		return;
+		return ret;
 
 	pud = pud_offset(pgd, address);
 	if (!pud_present(*pud))
-		return;
+		return ret;
 
 	pmd = pmd_offset(pud, address);
 	if (!pmd_present(*pmd))
-		return;
+		return ret;
+
+	/*
+	 * MLOCK_PAGES => feature is configured.
+	 * if we can acquire the mmap_sem for read, and vma is VM_LOCKED,
+	 * keep the sem while scanning the cluster for mlocking pages.
+	 */
+	if (MLOCK_PAGES && down_read_trylock(&vma->vm_mm->mmap_sem)) {
+		locked_vma = (vma->vm_flags & VM_LOCKED);
+		if (!locked_vma)
+			up_read(&vma->vm_mm->mmap_sem); /* don't need it */
+	}
 
 	pte = pte_offset_map_lock(mm, pmd, address, &ptl);
 
@@ -872,6 +926,13 @@ static void try_to_unmap_cluster(unsigned long cursor,
 		page = vm_normal_page(vma, address, *pte);
 		BUG_ON(!page || PageAnon(page));
 
+		if (locked_vma) {
+			mlock_vma_page(page);   /* no-op if already mlocked */
+			if (page == check_page)
+				ret = SWAP_MLOCK;
+			continue;	/* don't unmap */
+		}
+
 		if (ptep_clear_flush_young_notify(vma, address, pte))
 			continue;
 
@@ -893,39 +954,104 @@ static void try_to_unmap_cluster(unsigned long cursor,
 		(*mapcount)--;
 	}
 	pte_unmap_unlock(pte - 1, ptl);
+	if (locked_vma)
+		up_read(&vma->vm_mm->mmap_sem);
+	return ret;
 }
 
-static int try_to_unmap_anon(struct page *page, int migration)
+/*
+ * common handling for pages mapped in VM_LOCKED vmas
+ */
+static int try_to_mlock_page(struct page *page, struct vm_area_struct *vma)
+{
+	int mlocked = 0;
+
+	if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
+		if (vma->vm_flags & VM_LOCKED) {
+			mlock_vma_page(page);
+			mlocked++;	/* really mlocked the page */
+		}
+		up_read(&vma->vm_mm->mmap_sem);
+	}
+	return mlocked;
+}
+
+/**
+ * try_to_unmap_anon - unmap or unlock anonymous page using the object-based
+ * rmap method
+ * @page: the page to unmap/unlock
+ * @unlock:  request for unlock rather than unmap [unlikely]
+ * @migration:  unmapping for migration - ignored if @unlock
+ *
+ * Find all the mappings of a page using the mapping pointer and the vma chains
+ * contained in the anon_vma struct it points to.
+ *
+ * This function is only called from try_to_unmap/try_to_munlock for
+ * anonymous pages.
+ * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
+ * where the page was found will be held for write.  So, we won't recheck
+ * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
+ * 'LOCKED.
+ */
+static int try_to_unmap_anon(struct page *page, int unlock, int migration)
 {
 	struct anon_vma *anon_vma;
 	struct vm_area_struct *vma;
+	unsigned int mlocked = 0;
 	int ret = SWAP_AGAIN;
 
+	if (MLOCK_PAGES && unlikely(unlock))
+		ret = SWAP_SUCCESS;	/* default for try_to_munlock() */
+
 	anon_vma = page_lock_anon_vma(page);
 	if (!anon_vma)
 		return ret;
 
 	list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
-		ret = try_to_unmap_one(page, vma, migration);
-		if (ret == SWAP_FAIL || !page_mapped(page))
-			break;
+		if (MLOCK_PAGES && unlikely(unlock)) {
+			if (!((vma->vm_flags & VM_LOCKED) &&
+			      page_mapped_in_vma(page, vma)))
+				continue;  /* must visit all unlocked vmas */
+			ret = SWAP_MLOCK;  /* saw at least one mlocked vma */
+		} else {
+			ret = try_to_unmap_one(page, vma, migration);
+			if (ret == SWAP_FAIL || !page_mapped(page))
+				break;
+		}
+		if (ret == SWAP_MLOCK) {
+			mlocked = try_to_mlock_page(page, vma);
+			if (mlocked)
+				break;	/* stop if actually mlocked page */
+		}
 	}
 
 	page_unlock_anon_vma(anon_vma);
+
+	if (mlocked)
+		ret = SWAP_MLOCK;	/* actually mlocked the page */
+	else if (ret == SWAP_MLOCK)
+		ret = SWAP_AGAIN;	/* saw VM_LOCKED vma */
+
 	return ret;
 }
 
 /**
- * try_to_unmap_file - unmap file page using the object-based rmap method
- * @page: the page to unmap
- * @migration: migration flag
+ * try_to_unmap_file - unmap/unlock file page using the object-based rmap method
+ * @page: the page to unmap/unlock
+ * @unlock:  request for unlock rather than unmap [unlikely]
+ * @migration:  unmapping for migration - ignored if @unlock
  *
  * Find all the mappings of a page using the mapping pointer and the vma chains
  * contained in the address_space struct it points to.
  *
- * This function is only called from try_to_unmap for object-based pages.
+ * This function is only called from try_to_unmap/try_to_munlock for
+ * object-based pages.
+ * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
+ * where the page was found will be held for write.  So, we won't recheck
+ * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
+ * 'LOCKED.
  */
-static int try_to_unmap_file(struct page *page, int migration)
+static int try_to_unmap_file(struct page *page, int unlock, int migration)
 {
 	struct address_space *mapping = page->mapping;
 	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
@@ -936,20 +1062,44 @@ static int try_to_unmap_file(struct page *page, int migration)
 	unsigned long max_nl_cursor = 0;
 	unsigned long max_nl_size = 0;
 	unsigned int mapcount;
+	unsigned int mlocked = 0;
+
+	if (MLOCK_PAGES && unlikely(unlock))
+		ret = SWAP_SUCCESS;	/* default for try_to_munlock() */
 
 	spin_lock(&mapping->i_mmap_lock);
 	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
-		ret = try_to_unmap_one(page, vma, migration);
-		if (ret == SWAP_FAIL || !page_mapped(page))
-			goto out;
+		if (MLOCK_PAGES && unlikely(unlock)) {
+			if (!(vma->vm_flags & VM_LOCKED))
+				continue;	/* must visit all vmas */
+			ret = SWAP_MLOCK;
+		} else {
+			ret = try_to_unmap_one(page, vma, migration);
+			if (ret == SWAP_FAIL || !page_mapped(page))
+				goto out;
+		}
+		if (ret == SWAP_MLOCK) {
+			mlocked = try_to_mlock_page(page, vma);
+			if (mlocked)
+				break;  /* stop if actually mlocked page */
+		}
 	}
 
+	if (mlocked)
+		goto out;
+
 	if (list_empty(&mapping->i_mmap_nonlinear))
 		goto out;
 
 	list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
 						shared.vm_set.list) {
-		if ((vma->vm_flags & VM_LOCKED) && !migration)
+		if (MLOCK_PAGES && unlikely(unlock)) {
+			if (!(vma->vm_flags & VM_LOCKED))
+				continue;	/* must visit all vmas */
+			ret = SWAP_MLOCK;	/* leave mlocked == 0 */
+			goto out;		/* no need to look further */
+		}
+		if (!MLOCK_PAGES && !migration && (vma->vm_flags & VM_LOCKED))
 			continue;
 		cursor = (unsigned long) vma->vm_private_data;
 		if (cursor > max_nl_cursor)
@@ -959,7 +1109,7 @@ static int try_to_unmap_file(struct page *page, int migration)
 			max_nl_size = cursor;
 	}
 
-	if (max_nl_size == 0) {	/* any nonlinears locked or reserved */
+	if (max_nl_size == 0) {	/* all nonlinears locked or reserved ? */
 		ret = SWAP_FAIL;
 		goto out;
 	}
@@ -983,12 +1133,16 @@ static int try_to_unmap_file(struct page *page, int migration)
 	do {
 		list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
 						shared.vm_set.list) {
-			if ((vma->vm_flags & VM_LOCKED) && !migration)
+			if (!MLOCK_PAGES && !migration &&
+			    (vma->vm_flags & VM_LOCKED))
 				continue;
 			cursor = (unsigned long) vma->vm_private_data;
 			while ( cursor < max_nl_cursor &&
 				cursor < vma->vm_end - vma->vm_start) {
-				try_to_unmap_cluster(cursor, &mapcount, vma);
+				ret = try_to_unmap_cluster(cursor, &mapcount,
+								vma, page);
+				if (ret == SWAP_MLOCK)
+					mlocked = 2;	/* to return below */
 				cursor += CLUSTER_SIZE;
 				vma->vm_private_data = (void *) cursor;
 				if ((int)mapcount <= 0)
@@ -1009,6 +1163,10 @@ static int try_to_unmap_file(struct page *page, int migration)
 		vma->vm_private_data = NULL;
 out:
 	spin_unlock(&mapping->i_mmap_lock);
+	if (mlocked)
+		ret = SWAP_MLOCK;	/* actually mlocked the page */
+	else if (ret == SWAP_MLOCK)
+		ret = SWAP_AGAIN;	/* saw VM_LOCKED vma */
 	return ret;
 }
 
@@ -1024,6 +1182,7 @@ out:
  * SWAP_SUCCESS	- we succeeded in removing all mappings
  * SWAP_AGAIN	- we missed a mapping, try again later
  * SWAP_FAIL	- the page is unswappable
+ * SWAP_MLOCK	- page is mlocked.
  */
 int try_to_unmap(struct page *page, int migration)
 {
@@ -1032,12 +1191,36 @@ int try_to_unmap(struct page *page, int migration)
 	BUG_ON(!PageLocked(page));
 
 	if (PageAnon(page))
-		ret = try_to_unmap_anon(page, migration);
+		ret = try_to_unmap_anon(page, 0, migration);
 	else
-		ret = try_to_unmap_file(page, migration);
-
-	if (!page_mapped(page))
+		ret = try_to_unmap_file(page, 0, migration);
+	if (ret != SWAP_MLOCK && !page_mapped(page))
 		ret = SWAP_SUCCESS;
 	return ret;
 }
 
+#ifdef CONFIG_UNEVICTABLE_LRU
+/**
+ * try_to_munlock - try to munlock a page
+ * @page: the page to be munlocked
+ *
+ * Called from munlock code.  Checks all of the VMAs mapping the page
+ * to make sure nobody else has this page mlocked. The page will be
+ * returned with PG_mlocked cleared if no other vmas have it mlocked.
+ *
+ * Return values are:
+ *
+ * SWAP_SUCCESS	- no vma's holding page mlocked.
+ * SWAP_AGAIN	- page mapped in mlocked vma -- couldn't acquire mmap sem
+ * SWAP_MLOCK	- page is now mlocked.
+ */
+int try_to_munlock(struct page *page)
+{
+	VM_BUG_ON(!PageLocked(page) || PageLRU(page));
+
+	if (PageAnon(page))
+		return try_to_unmap_anon(page, 1, 0);
+	else
+		return try_to_unmap_file(page, 1, 0);
+}
+#endif
diff --git a/mm/swap.c b/mm/swap.c
index fee6b973f143..bc58c1369dd6 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -278,7 +278,7 @@ void lru_add_drain(void)
 	put_cpu();
 }
 
-#ifdef CONFIG_NUMA
+#if defined(CONFIG_NUMA) || defined(CONFIG_UNEVICTABLE_LRU)
 static void lru_add_drain_per_cpu(struct work_struct *dummy)
 {
 	lru_add_drain();
diff --git a/mm/vmscan.c b/mm/vmscan.c
index dfb342e0db9b..e5aaaad159ef 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -582,11 +582,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 
 		sc->nr_scanned++;
 
-		if (unlikely(!page_evictable(page, NULL))) {
-			unlock_page(page);
-			putback_lru_page(page);
-			continue;
-		}
+		if (unlikely(!page_evictable(page, NULL)))
+			goto cull_mlocked;
 
 		if (!sc->may_swap && page_mapped(page))
 			goto keep_locked;
@@ -624,9 +621,19 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		 * Anonymous process memory has backing store?
 		 * Try to allocate it some swap space here.
 		 */
-		if (PageAnon(page) && !PageSwapCache(page))
+		if (PageAnon(page) && !PageSwapCache(page)) {
+			switch (try_to_munlock(page)) {
+			case SWAP_FAIL:		/* shouldn't happen */
+			case SWAP_AGAIN:
+				goto keep_locked;
+			case SWAP_MLOCK:
+				goto cull_mlocked;
+			case SWAP_SUCCESS:
+				; /* fall thru'; add to swap cache */
+			}
 			if (!add_to_swap(page, GFP_ATOMIC))
 				goto activate_locked;
+		}
 #endif /* CONFIG_SWAP */
 
 		mapping = page_mapping(page);
@@ -641,6 +648,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 				goto activate_locked;
 			case SWAP_AGAIN:
 				goto keep_locked;
+			case SWAP_MLOCK:
+				goto cull_mlocked;
 			case SWAP_SUCCESS:
 				; /* try to free the page below */
 			}
@@ -731,6 +740,11 @@ free_it:
 		}
 		continue;
 
+cull_mlocked:
+		unlock_page(page);
+		putback_lru_page(page);
+		continue;
+
 activate_locked:
 		/* Not a candidate for swapping, so reclaim swap space. */
 		if (PageSwapCache(page) && vm_swap_full())
@@ -742,7 +756,7 @@ keep_locked:
 		unlock_page(page);
 keep:
 		list_add(&page->lru, &ret_pages);
-		VM_BUG_ON(PageLRU(page));
+		VM_BUG_ON(PageLRU(page) || PageUnevictable(page));
 	}
 	list_splice(&ret_pages, page_list);
 	if (pagevec_count(&freed_pvec))
@@ -2329,12 +2343,13 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
  * @vma: the VMA in which the page is or will be mapped, may be NULL
  *
  * Test whether page is evictable--i.e., should be placed on active/inactive
- * lists vs unevictable list.
+ * lists vs unevictable list.  The vma argument is !NULL when called from the
+ * fault path to determine how to instantate a new page.
  *
  * Reasons page might not be evictable:
  * (1) page's mapping marked unevictable
+ * (2) page is part of an mlocked VMA
  *
- * TODO - later patches
  */
 int page_evictable(struct page *page, struct vm_area_struct *vma)
 {
@@ -2342,7 +2357,8 @@ int page_evictable(struct page *page, struct vm_area_struct *vma)
 	if (mapping_unevictable(page_mapping(page)))
 		return 0;
 
-	/* TODO:  test page [!]evictable conditions */
+	if (PageMlocked(page) || (vma && is_mlocked_vma(vma, page)))
+		return 0;
 
 	return 1;
 }
-- 
GitLab


From fa07e787733416c42938a310a8e717295934e33c Mon Sep 17 00:00:00 2001
From: Lee Schermerhorn <lee.schermerhorn@hp.com>
Date: Sat, 18 Oct 2008 20:26:47 -0700
Subject: [PATCH 706/895] doc: unevictable LRU and mlocked pages documentation

Documentation for unevictable lru list and its usage.

Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Rik van Riel <riel@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/vm/unevictable-lru.txt | 615 +++++++++++++++++++++++++++
 1 file changed, 615 insertions(+)
 create mode 100644 Documentation/vm/unevictable-lru.txt

diff --git a/Documentation/vm/unevictable-lru.txt b/Documentation/vm/unevictable-lru.txt
new file mode 100644
index 000000000000..125eed560e5a
--- /dev/null
+++ b/Documentation/vm/unevictable-lru.txt
@@ -0,0 +1,615 @@
+
+This document describes the Linux memory management "Unevictable LRU"
+infrastructure and the use of this infrastructure to manage several types
+of "unevictable" pages.  The document attempts to provide the overall
+rationale behind this mechanism and the rationale for some of the design
+decisions that drove the implementation.  The latter design rationale is
+discussed in the context of an implementation description.  Admittedly, one
+can obtain the implementation details--the "what does it do?"--by reading the
+code.  One hopes that the descriptions below add value by provide the answer
+to "why does it do that?".
+
+Unevictable LRU Infrastructure:
+
+The Unevictable LRU adds an additional LRU list to track unevictable pages
+and to hide these pages from vmscan.  This mechanism is based on a patch by
+Larry Woodman of Red Hat to address several scalability problems with page
+reclaim in Linux.  The problems have been observed at customer sites on large
+memory x86_64 systems.  For example, a non-numal x86_64 platform with 128GB
+of main memory will have over 32 million 4k pages in a single zone.  When a
+large fraction of these pages are not evictable for any reason [see below],
+vmscan will spend a lot of time scanning the LRU lists looking for the small
+fraction of pages that are evictable.  This can result in a situation where
+all cpus are spending 100% of their time in vmscan for hours or days on end,
+with the system completely unresponsive.
+
+The Unevictable LRU infrastructure addresses the following classes of
+unevictable pages:
+
++ page owned by ramfs
++ page mapped into SHM_LOCKed shared memory regions
++ page mapped into VM_LOCKED [mlock()ed] vmas
+
+The infrastructure might be able to handle other conditions that make pages
+unevictable, either by definition or by circumstance, in the future.
+
+
+The Unevictable LRU List
+
+The Unevictable LRU infrastructure consists of an additional, per-zone, LRU list
+called the "unevictable" list and an associated page flag, PG_unevictable, to
+indicate that the page is being managed on the unevictable list.  The
+PG_unevictable flag is analogous to, and mutually exclusive with, the PG_active
+flag in that it indicates on which LRU list a page resides when PG_lru is set.
+The unevictable LRU list is source configurable based on the UNEVICTABLE_LRU
+Kconfig option.
+
+The Unevictable LRU infrastructure maintains unevictable pages on an additional
+LRU list for a few reasons:
+
+1) We get to "treat unevictable pages just like we treat other pages in the
+   system, which means we get to use the same code to manipulate them, the
+   same code to isolate them (for migrate, etc.), the same code to keep track
+   of the statistics, etc..." [Rik van Riel]
+
+2) We want to be able to migrate unevictable pages between nodes--for memory
+   defragmentation, workload management and memory hotplug.  The linux kernel
+   can only migrate pages that it can successfully isolate from the lru lists.
+   If we were to maintain pages elsewise than on an lru-like list, where they
+   can be found by isolate_lru_page(), we would prevent their migration, unless
+   we reworked migration code to find the unevictable pages.
+
+
+The unevictable LRU list does not differentiate between file backed and swap
+backed [anon] pages.  This differentiation is only important while the pages
+are, in fact, evictable.
+
+The unevictable LRU list benefits from the "arrayification" of the per-zone
+LRU lists and statistics originally proposed and posted by Christoph Lameter.
+
+The unevictable list does not use the lru pagevec mechanism. Rather,
+unevictable pages are placed directly on the page's zone's unevictable
+list under the zone lru_lock.  The reason for this is to prevent stranding
+of pages on the unevictable list when one task has the page isolated from the
+lru and other tasks are changing the "evictability" state of the page.
+
+
+Unevictable LRU and Memory Controller Interaction
+
+The memory controller data structure automatically gets a per zone unevictable
+lru list as a result of the "arrayification" of the per-zone LRU lists.  The
+memory controller tracks the movement of pages to and from the unevictable list.
+When a memory control group comes under memory pressure, the controller will
+not attempt to reclaim pages on the unevictable list.  This has a couple of
+effects.  Because the pages are "hidden" from reclaim on the unevictable list,
+the reclaim process can be more efficient, dealing only with pages that have
+a chance of being reclaimed.  On the other hand, if too many of the pages
+charged to the control group are unevictable, the evictable portion of the
+working set of the tasks in the control group may not fit into the available
+memory.  This can cause the control group to thrash or to oom-kill tasks.
+
+
+Unevictable LRU:  Detecting Unevictable Pages
+
+The function page_evictable(page, vma) in vmscan.c determines whether a
+page is evictable or not.  For ramfs pages and pages in SHM_LOCKed regions,
+page_evictable() tests a new address space flag, AS_UNEVICTABLE, in the page's
+address space using a wrapper function.  Wrapper functions are used to set,
+clear and test the flag to reduce the requirement for #ifdef's throughout the
+source code.  AS_UNEVICTABLE is set on ramfs inode/mapping when it is created.
+This flag remains for the life of the inode.
+
+For shared memory regions, AS_UNEVICTABLE is set when an application
+successfully SHM_LOCKs the region and is removed when the region is
+SHM_UNLOCKed.  Note that shmctl(SHM_LOCK, ...) does not populate the page
+tables for the region as does, for example, mlock().   So, we make no special
+effort to push any pages in the SHM_LOCKed region to the unevictable list.
+Vmscan will do this when/if it encounters the pages during reclaim.  On
+SHM_UNLOCK, shmctl() scans the pages in the region and "rescues" them from the
+unevictable list if no other condition keeps them unevictable.  If a SHM_LOCKed
+region is destroyed, the pages are also "rescued" from the unevictable list in
+the process of freeing them.
+
+page_evictable() detects mlock()ed pages by testing an additional page flag,
+PG_mlocked via the PageMlocked() wrapper.  If the page is NOT mlocked, and a
+non-NULL vma is supplied, page_evictable() will check whether the vma is
+VM_LOCKED via is_mlocked_vma().  is_mlocked_vma() will SetPageMlocked() and
+update the appropriate statistics if the vma is VM_LOCKED.  This method allows
+efficient "culling" of pages in the fault path that are being faulted in to
+VM_LOCKED vmas.
+
+
+Unevictable Pages and Vmscan [shrink_*_list()]
+
+If unevictable pages are culled in the fault path, or moved to the unevictable
+list at mlock() or mmap() time, vmscan will never encounter the pages until
+they have become evictable again, for example, via munlock() and have been
+"rescued" from the unevictable list.  However, there may be situations where we
+decide, for the sake of expediency, to leave a unevictable page on one of the
+regular active/inactive LRU lists for vmscan to deal with.  Vmscan checks for
+such pages in all of the shrink_{active|inactive|page}_list() functions and
+will "cull" such pages that it encounters--that is, it diverts those pages to
+the unevictable list for the zone being scanned.
+
+There may be situations where a page is mapped into a VM_LOCKED vma, but the
+page is not marked as PageMlocked.  Such pages will make it all the way to
+shrink_page_list() where they will be detected when vmscan walks the reverse
+map in try_to_unmap().  If try_to_unmap() returns SWAP_MLOCK, shrink_page_list()
+will cull the page at that point.
+
+Note that for anonymous pages, shrink_page_list() attempts to add the page to
+the swap cache before it tries to unmap the page.  To avoid this unnecessary
+consumption of swap space, shrink_page_list() calls try_to_munlock() to check
+whether any VM_LOCKED vmas map the page without attempting to unmap the page.
+If try_to_munlock() returns SWAP_MLOCK, shrink_page_list() will cull the page
+without consuming swap space.  try_to_munlock() will be described below.
+
+To "cull" an unevictable page, vmscan simply puts the page back on the lru
+list using putback_lru_page()--the inverse operation to isolate_lru_page()--
+after dropping the page lock.  Because the condition which makes the page
+unevictable may change once the page is unlocked, putback_lru_page() will
+recheck the unevictable state of a page that it places on the unevictable lru
+list.  If the page has become unevictable, putback_lru_page() removes it from
+the list and retries, including the page_unevictable() test.  Because such a
+race is a rare event and movement of pages onto the unevictable list should be
+rare, these extra evictabilty checks should not occur in the majority of calls
+to putback_lru_page().
+
+
+Mlocked Page:  Prior Work
+
+The "Unevictable Mlocked Pages" infrastructure is based on work originally
+posted by Nick Piggin in an RFC patch entitled "mm: mlocked pages off LRU".
+Nick posted his patch as an alternative to a patch posted by Christoph
+Lameter to achieve the same objective--hiding mlocked pages from vmscan.
+In Nick's patch, he used one of the struct page lru list link fields as a count
+of VM_LOCKED vmas that map the page.  This use of the link field for a count
+prevented the management of the pages on an LRU list.  Thus, mlocked pages were
+not migratable as isolate_lru_page() could not find them and the lru list link
+field was not available to the migration subsystem.  Nick resolved this by
+putting mlocked pages back on the lru list before attempting to isolate them,
+thus abandoning the count of VM_LOCKED vmas.  When Nick's patch was integrated
+with the Unevictable LRU work, the count was replaced by walking the reverse
+map to determine whether any VM_LOCKED vmas mapped the page.  More on this
+below.
+
+
+Mlocked Pages:  Basic Management
+
+Mlocked pages--pages mapped into a VM_LOCKED vma--represent one class of
+unevictable pages.  When such a page has been "noticed" by the memory
+management subsystem, the page is marked with the PG_mlocked [PageMlocked()]
+flag.  A PageMlocked() page will be placed on the unevictable LRU list when
+it is added to the LRU.   Pages can be "noticed" by memory management in
+several places:
+
+1) in the mlock()/mlockall() system call handlers.
+2) in the mmap() system call handler when mmap()ing a region with the
+   MAP_LOCKED flag, or mmap()ing a region in a task that has called
+   mlockall() with the MCL_FUTURE flag.  Both of these conditions result
+   in the VM_LOCKED flag being set for the vma.
+3) in the fault path, if mlocked pages are "culled" in the fault path,
+   and when a VM_LOCKED stack segment is expanded.
+4) as mentioned above, in vmscan:shrink_page_list() with attempting to
+   reclaim a page in a VM_LOCKED vma--via try_to_unmap() or try_to_munlock().
+
+Mlocked pages become unlocked and rescued from the unevictable list when:
+
+1) mapped in a range unlocked via the munlock()/munlockall() system calls.
+2) munmapped() out of the last VM_LOCKED vma that maps the page, including
+   unmapping at task exit.
+3) when the page is truncated from the last VM_LOCKED vma of an mmap()ed file.
+4) before a page is COWed in a VM_LOCKED vma.
+
+
+Mlocked Pages:  mlock()/mlockall() System Call Handling
+
+Both [do_]mlock() and [do_]mlockall() system call handlers call mlock_fixup()
+for each vma in the range specified by the call.  In the case of mlockall(),
+this is the entire active address space of the task.  Note that mlock_fixup()
+is used for both mlock()ing and munlock()ing a range of memory.  A call to
+mlock() an already VM_LOCKED vma, or to munlock() a vma that is not VM_LOCKED
+is treated as a no-op--mlock_fixup() simply returns.
+
+If the vma passes some filtering described in "Mlocked Pages:  Filtering Vmas"
+below, mlock_fixup() will attempt to merge the vma with its neighbors or split
+off a subset of the vma if the range does not cover the entire vma.  Once the
+vma has been merged or split or neither, mlock_fixup() will call
+__mlock_vma_pages_range() to fault in the pages via get_user_pages() and
+to mark the pages as mlocked via mlock_vma_page().
+
+Note that the vma being mlocked might be mapped with PROT_NONE.  In this case,
+get_user_pages() will be unable to fault in the pages.  That's OK.  If pages
+do end up getting faulted into this VM_LOCKED vma, we'll handle them in the
+fault path or in vmscan.
+
+Also note that a page returned by get_user_pages() could be truncated or
+migrated out from under us, while we're trying to mlock it.  To detect
+this, __mlock_vma_pages_range() tests the page_mapping after acquiring
+the page lock.  If the page is still associated with its mapping, we'll
+go ahead and call mlock_vma_page().  If the mapping is gone, we just
+unlock the page and move on.  Worse case, this results in page mapped
+in a VM_LOCKED vma remaining on a normal LRU list without being
+PageMlocked().  Again, vmscan will detect and cull such pages.
+
+mlock_vma_page(), called with the page locked [N.B., not "mlocked"], will
+TestSetPageMlocked() for each page returned by get_user_pages().  We use
+TestSetPageMlocked() because the page might already be mlocked by another
+task/vma and we don't want to do extra work.  We especially do not want to
+count an mlocked page more than once in the statistics.  If the page was
+already mlocked, mlock_vma_page() is done.
+
+If the page was NOT already mlocked, mlock_vma_page() attempts to isolate the
+page from the LRU, as it is likely on the appropriate active or inactive list
+at that time.  If the isolate_lru_page() succeeds, mlock_vma_page() will
+putback the page--putback_lru_page()--which will notice that the page is now
+mlocked and divert the page to the zone's unevictable LRU list.  If
+mlock_vma_page() is unable to isolate the page from the LRU, vmscan will handle
+it later if/when it attempts to reclaim the page.
+
+
+Mlocked Pages:  Filtering Special Vmas
+
+mlock_fixup() filters several classes of "special" vmas:
+
+1) vmas with VM_IO|VM_PFNMAP set are skipped entirely.  The pages behind
+   these mappings are inherently pinned, so we don't need to mark them as
+   mlocked.  In any case, most of the pages have no struct page in which to
+   so mark the page.  Because of this, get_user_pages() will fail for these
+   vmas, so there is no sense in attempting to visit them.
+
+2) vmas mapping hugetlbfs page are already effectively pinned into memory.
+   We don't need nor want to mlock() these pages.  However, to preserve the
+   prior behavior of mlock()--before the unevictable/mlock changes--mlock_fixup()
+   will call make_pages_present() in the hugetlbfs vma range to allocate the
+   huge pages and populate the ptes.
+
+3) vmas with VM_DONTEXPAND|VM_RESERVED are generally user space mappings of
+   kernel pages, such as the vdso page, relay channel pages, etc.  These pages
+   are inherently unevictable and are not managed on the LRU lists.
+   mlock_fixup() treats these vmas the same as hugetlbfs vmas.  It calls
+   make_pages_present() to populate the ptes.
+
+Note that for all of these special vmas, mlock_fixup() does not set the
+VM_LOCKED flag.  Therefore, we won't have to deal with them later during
+munlock() or munmap()--for example, at task exit.  Neither does mlock_fixup()
+account these vmas against the task's "locked_vm".
+
+Mlocked Pages:  Downgrading the Mmap Semaphore.
+
+mlock_fixup() must be called with the mmap semaphore held for write, because
+it may have to merge or split vmas.  However, mlocking a large region of
+memory can take a long time--especially if vmscan must reclaim pages to
+satisfy the regions requirements.  Faulting in a large region with the mmap
+semaphore held for write can hold off other faults on the address space, in
+the case of a multi-threaded task.  It can also hold off scans of the task's
+address space via /proc.  While testing under heavy load, it was observed that
+the ps(1) command could be held off for many minutes while a large segment was
+mlock()ed down.
+
+To address this issue, and to make the system more responsive during mlock()ing
+of large segments, mlock_fixup() downgrades the mmap semaphore to read mode
+during the call to __mlock_vma_pages_range().  This works fine.  However, the
+callers of mlock_fixup() expect the semaphore to be returned in write mode.
+So, mlock_fixup() "upgrades" the semphore to write mode.  Linux does not
+support an atomic upgrade_sem() call, so mlock_fixup() must drop the semaphore
+and reacquire it in write mode.  In a multi-threaded task, it is possible for
+the task memory map to change while the semaphore is dropped.  Therefore,
+mlock_fixup() looks up the vma at the range start address after reacquiring
+the semaphore in write mode and verifies that it still covers the original
+range.  If not, mlock_fixup() returns an error [-EAGAIN].  All callers of
+mlock_fixup() have been changed to deal with this new error condition.
+
+Note:  when munlocking a region, all of the pages should already be resident--
+unless we have racing threads mlocking() and munlocking() regions.  So,
+unlocking should not have to wait for page allocations nor faults  of any kind.
+Therefore mlock_fixup() does not downgrade the semaphore for munlock().
+
+
+Mlocked Pages:  munlock()/munlockall() System Call Handling
+
+The munlock() and munlockall() system calls are handled by the same functions--
+do_mlock[all]()--as the mlock() and mlockall() system calls with the unlock
+vs lock operation indicated by an argument.  So, these system calls are also
+handled by mlock_fixup().  Again, if called for an already munlock()ed vma,
+mlock_fixup() simply returns.  Because of the vma filtering discussed above,
+VM_LOCKED will not be set in any "special" vmas.  So, these vmas will be
+ignored for munlock.
+
+If the vma is VM_LOCKED, mlock_fixup() again attempts to merge or split off
+the specified range.  The range is then munlocked via the function
+__mlock_vma_pages_range()--the same function used to mlock a vma range--
+passing a flag to indicate that munlock() is being performed.
+
+Because the vma access protections could have been changed to PROT_NONE after
+faulting in and mlocking some pages, get_user_pages() was unreliable for visiting
+these pages for munlocking.  Because we don't want to leave pages mlocked(),
+get_user_pages() was enhanced to accept a flag to ignore the permissions when
+fetching the pages--all of which should be resident as a result of previous
+mlock()ing.
+
+For munlock(), __mlock_vma_pages_range() unlocks individual pages by calling
+munlock_vma_page().  munlock_vma_page() unconditionally clears the PG_mlocked
+flag using TestClearPageMlocked().  As with mlock_vma_page(), munlock_vma_page()
+use the Test*PageMlocked() function to handle the case where the page might
+have already been unlocked by another task.  If the page was mlocked,
+munlock_vma_page() updates that zone statistics for the number of mlocked
+pages.  Note, however, that at this point we haven't checked whether the page
+is mapped by other VM_LOCKED vmas.
+
+We can't call try_to_munlock(), the function that walks the reverse map to check
+for other VM_LOCKED vmas, without first isolating the page from the LRU.
+try_to_munlock() is a variant of try_to_unmap() and thus requires that the page
+not be on an lru list.  [More on these below.]  However, the call to
+isolate_lru_page() could fail, in which case we couldn't try_to_munlock().
+So, we go ahead and clear PG_mlocked up front, as this might be the only chance
+we have.  If we can successfully isolate the page, we go ahead and
+try_to_munlock(), which will restore the PG_mlocked flag and update the zone
+page statistics if it finds another vma holding the page mlocked.  If we fail
+to isolate the page, we'll have left a potentially mlocked page on the LRU.
+This is fine, because we'll catch it later when/if vmscan tries to reclaim the
+page.  This should be relatively rare.
+
+Mlocked Pages:  Migrating Them...
+
+A page that is being migrated has been isolated from the lru lists and is
+held locked across unmapping of the page, updating the page's mapping
+[address_space] entry and copying the contents and state, until the
+page table entry has been replaced with an entry that refers to the new
+page.  Linux supports migration of mlocked pages and other unevictable
+pages.  This involves simply moving the PageMlocked and PageUnevictable states
+from the old page to the new page.
+
+Note that page migration can race with mlocking or munlocking of the same
+page.  This has been discussed from the mlock/munlock perspective in the
+respective sections above.  Both processes [migration, m[un]locking], hold
+the page locked.  This provides the first level of synchronization.  Page
+migration zeros out the page_mapping of the old page before unlocking it,
+so m[un]lock can skip these pages by testing the page mapping under page
+lock.
+
+When completing page migration, we place the new and old pages back onto the
+lru after dropping the page lock.  The "unneeded" page--old page on success,
+new page on failure--will be freed when the reference count held by the
+migration process is released.  To ensure that we don't strand pages on the
+unevictable list because of a race between munlock and migration, page
+migration uses the putback_lru_page() function to add migrated pages back to
+the lru.
+
+
+Mlocked Pages:  mmap(MAP_LOCKED) System Call Handling
+
+In addition the the mlock()/mlockall() system calls, an application can request
+that a region of memory be mlocked using the MAP_LOCKED flag with the mmap()
+call.  Furthermore, any mmap() call or brk() call that expands the heap by a
+task that has previously called mlockall() with the MCL_FUTURE flag will result
+in the newly mapped memory being mlocked.  Before the unevictable/mlock changes,
+the kernel simply called make_pages_present() to allocate pages and populate
+the page table.
+
+To mlock a range of memory under the unevictable/mlock infrastructure, the
+mmap() handler and task address space expansion functions call
+mlock_vma_pages_range() specifying the vma and the address range to mlock.
+mlock_vma_pages_range() filters vmas like mlock_fixup(), as described above in
+"Mlocked Pages:  Filtering Vmas".  It will clear the VM_LOCKED flag, which will
+have already been set by the caller, in filtered vmas.  Thus these vma's need
+not be visited for munlock when the region is unmapped.
+
+For "normal" vmas, mlock_vma_pages_range() calls __mlock_vma_pages_range() to
+fault/allocate the pages and mlock them.  Again, like mlock_fixup(),
+mlock_vma_pages_range() downgrades the mmap semaphore to read mode before
+attempting to fault/allocate and mlock the pages; and "upgrades" the semaphore
+back to write mode before returning.
+
+The callers of mlock_vma_pages_range() will have already added the memory
+range to be mlocked to the task's "locked_vm".  To account for filtered vmas,
+mlock_vma_pages_range() returns the number of pages NOT mlocked.  All of the
+callers then subtract a non-negative return value from the task's locked_vm.
+A negative return value represent an error--for example, from get_user_pages()
+attempting to fault in a vma with PROT_NONE access.  In this case, we leave
+the memory range accounted as locked_vm, as the protections could be changed
+later and pages allocated into that region.
+
+
+Mlocked Pages:  munmap()/exit()/exec() System Call Handling
+
+When unmapping an mlocked region of memory, whether by an explicit call to
+munmap() or via an internal unmap from exit() or exec() processing, we must
+munlock the pages if we're removing the last VM_LOCKED vma that maps the pages.
+Before the unevictable/mlock changes, mlocking did not mark the pages in any way,
+so unmapping them required no processing.
+
+To munlock a range of memory under the unevictable/mlock infrastructure, the
+munmap() hander and task address space tear down function call
+munlock_vma_pages_all().  The name reflects the observation that one always
+specifies the entire vma range when munlock()ing during unmap of a region.
+Because of the vma filtering when mlocking() regions, only "normal" vmas that
+actually contain mlocked pages will be passed to munlock_vma_pages_all().
+
+munlock_vma_pages_all() clears the VM_LOCKED vma flag and, like mlock_fixup()
+for the munlock case, calls __munlock_vma_pages_range() to walk the page table
+for the vma's memory range and munlock_vma_page() each resident page mapped by
+the vma.  This effectively munlocks the page, only if this is the last
+VM_LOCKED vma that maps the page.
+
+
+Mlocked Page:  try_to_unmap()
+
+[Note:  the code changes represented by this section are really quite small
+compared to the text to describe what happening and why, and to discuss the
+implications.]
+
+Pages can, of course, be mapped into multiple vmas.  Some of these vmas may
+have VM_LOCKED flag set.  It is possible for a page mapped into one or more
+VM_LOCKED vmas not to have the PG_mlocked flag set and therefore reside on one
+of the active or inactive LRU lists.  This could happen if, for example, a
+task in the process of munlock()ing the page could not isolate the page from
+the LRU.  As a result, vmscan/shrink_page_list() might encounter such a page
+as described in "Unevictable Pages and Vmscan [shrink_*_list()]".  To
+handle this situation, try_to_unmap() has been enhanced to check for VM_LOCKED
+vmas while it is walking a page's reverse map.
+
+try_to_unmap() is always called, by either vmscan for reclaim or for page
+migration, with the argument page locked and isolated from the LRU.  BUG_ON()
+assertions enforce this requirement.  Separate functions handle anonymous and
+mapped file pages, as these types of pages have different reverse map
+mechanisms.
+
+	try_to_unmap_anon()
+
+To unmap anonymous pages, each vma in the list anchored in the anon_vma must be
+visited--at least until a VM_LOCKED vma is encountered.  If the page is being
+unmapped for migration, VM_LOCKED vmas do not stop the process because mlocked
+pages are migratable.  However, for reclaim, if the page is mapped into a
+VM_LOCKED vma, the scan stops.  try_to_unmap() attempts to acquire the mmap
+semphore of the mm_struct to which the vma belongs in read mode.  If this is
+successful, try_to_unmap() will mlock the page via mlock_vma_page()--we
+wouldn't have gotten to try_to_unmap() if the page were already mlocked--and
+will return SWAP_MLOCK, indicating that the page is unevictable.  If the
+mmap semaphore cannot be acquired, we are not sure whether the page is really
+unevictable or not.  In this case, try_to_unmap() will return SWAP_AGAIN.
+
+	try_to_unmap_file() -- linear mappings
+
+Unmapping of a mapped file page works the same, except that the scan visits
+all vmas that maps the page's index/page offset in the page's mapping's
+reverse map priority search tree.  It must also visit each vma in the page's
+mapping's non-linear list, if the list is non-empty.  As for anonymous pages,
+on encountering a VM_LOCKED vma for a mapped file page, try_to_unmap() will
+attempt to acquire the associated mm_struct's mmap semaphore to mlock the page,
+returning SWAP_MLOCK if this is successful, and SWAP_AGAIN, if not.
+
+	try_to_unmap_file() -- non-linear mappings
+
+If a page's mapping contains a non-empty non-linear mapping vma list, then
+try_to_un{map|lock}() must also visit each vma in that list to determine
+whether the page is mapped in a VM_LOCKED vma.  Again, the scan must visit
+all vmas in the non-linear list to ensure that the pages is not/should not be
+mlocked.  If a VM_LOCKED vma is found in the list, the scan could terminate.
+However, there is no easy way to determine whether the page is actually mapped
+in a given vma--either for unmapping or testing whether the VM_LOCKED vma
+actually pins the page.
+
+So, try_to_unmap_file() handles non-linear mappings by scanning a certain
+number of pages--a "cluster"--in each non-linear vma associated with the page's
+mapping, for each file mapped page that vmscan tries to unmap.  If this happens
+to unmap the page we're trying to unmap, try_to_unmap() will notice this on
+return--(page_mapcount(page) == 0)--and return SWAP_SUCCESS.  Otherwise, it
+will return SWAP_AGAIN, causing vmscan to recirculate this page.  We take
+advantage of the cluster scan in try_to_unmap_cluster() as follows:
+
+For each non-linear vma, try_to_unmap_cluster() attempts to acquire the mmap
+semaphore of the associated mm_struct for read without blocking.  If this
+attempt is successful and the vma is VM_LOCKED, try_to_unmap_cluster() will
+retain the mmap semaphore for the scan; otherwise it drops it here.  Then,
+for each page in the cluster, if we're holding the mmap semaphore for a locked
+vma, try_to_unmap_cluster() calls mlock_vma_page() to mlock the page.  This
+call is a no-op if the page is already locked, but will mlock any pages in
+the non-linear mapping that happen to be unlocked.  If one of the pages so
+mlocked is the page passed in to try_to_unmap(), try_to_unmap_cluster() will
+return SWAP_MLOCK, rather than the default SWAP_AGAIN.  This will allow vmscan
+to cull the page, rather than recirculating it on the inactive list.  Again,
+if try_to_unmap_cluster() cannot acquire the vma's mmap sem, it returns
+SWAP_AGAIN, indicating that the page is mapped by a VM_LOCKED vma, but
+couldn't be mlocked.
+
+
+Mlocked pages:  try_to_munlock() Reverse Map Scan
+
+TODO/FIXME:  a better name might be page_mlocked()--analogous to the
+page_referenced() reverse map walker--especially if we continue to call this
+from shrink_page_list().  See related TODO/FIXME below.
+
+When munlock_vma_page()--see "Mlocked Pages:  munlock()/munlockall() System
+Call Handling" above--tries to munlock a page, or when shrink_page_list()
+encounters an anonymous page that is not yet in the swap cache, they need to
+determine whether or not the page is mapped by any VM_LOCKED vma, without
+actually attempting to unmap all ptes from the page.  For this purpose, the
+unevictable/mlock infrastructure introduced a variant of try_to_unmap() called
+try_to_munlock().
+
+try_to_munlock() calls the same functions as try_to_unmap() for anonymous and
+mapped file pages with an additional argument specifing unlock versus unmap
+processing.  Again, these functions walk the respective reverse maps looking
+for VM_LOCKED vmas.  When such a vma is found for anonymous pages and file
+pages mapped in linear VMAs, as in the try_to_unmap() case, the functions
+attempt to acquire the associated mmap semphore, mlock the page via
+mlock_vma_page() and return SWAP_MLOCK.  This effectively undoes the
+pre-clearing of the page's PG_mlocked done by munlock_vma_page() and informs
+shrink_page_list() that the anonymous page should be culled rather than added
+to the swap cache in preparation for a try_to_unmap() that will almost
+certainly fail.
+
+If try_to_unmap() is unable to acquire a VM_LOCKED vma's associated mmap
+semaphore, it will return SWAP_AGAIN.  This will allow shrink_page_list()
+to recycle the page on the inactive list and hope that it has better luck
+with the page next time.
+
+For file pages mapped into non-linear vmas, the try_to_munlock() logic works
+slightly differently.  On encountering a VM_LOCKED non-linear vma that might
+map the page, try_to_munlock() returns SWAP_AGAIN without actually mlocking
+the page.  munlock_vma_page() will just leave the page unlocked and let
+vmscan deal with it--the usual fallback position.
+
+Note that try_to_munlock()'s reverse map walk must visit every vma in a pages'
+reverse map to determine that a page is NOT mapped into any VM_LOCKED vma.
+However, the scan can terminate when it encounters a VM_LOCKED vma and can
+successfully acquire the vma's mmap semphore for read and mlock the page.
+Although try_to_munlock() can be called many [very many!] times when
+munlock()ing a large region or tearing down a large address space that has been
+mlocked via mlockall(), overall this is a fairly rare event.  In addition,
+although shrink_page_list() calls try_to_munlock() for every anonymous page that
+it handles that is not yet in the swap cache, on average anonymous pages will
+have very short reverse map lists.
+
+Mlocked Page:  Page Reclaim in shrink_*_list()
+
+shrink_active_list() culls any obviously unevictable pages--i.e.,
+!page_evictable(page, NULL)--diverting these to the unevictable lru
+list.  However, shrink_active_list() only sees unevictable pages that
+made it onto the active/inactive lru lists.  Note that these pages do not
+have PageUnevictable set--otherwise, they would be on the unevictable list and
+shrink_active_list would never see them.
+
+Some examples of these unevictable pages on the LRU lists are:
+
+1) ramfs pages that have been placed on the lru lists when first allocated.
+
+2) SHM_LOCKed shared memory pages.  shmctl(SHM_LOCK) does not attempt to
+   allocate or fault in the pages in the shared memory region.  This happens
+   when an application accesses the page the first time after SHM_LOCKing
+   the segment.
+
+3) Mlocked pages that could not be isolated from the lru and moved to the
+   unevictable list in mlock_vma_page().
+
+3) Pages mapped into multiple VM_LOCKED vmas, but try_to_munlock() couldn't
+   acquire the vma's mmap semaphore to test the flags and set PageMlocked.
+   munlock_vma_page() was forced to let the page back on to the normal
+   LRU list for vmscan to handle.
+
+shrink_inactive_list() also culls any unevictable pages that it finds
+on the inactive lists, again diverting them to the appropriate zone's unevictable
+lru list.  shrink_inactive_list() should only see SHM_LOCKed pages that became
+SHM_LOCKed after shrink_active_list() had moved them to the inactive list, or
+pages mapped into VM_LOCKED vmas that munlock_vma_page() couldn't isolate from
+the lru to recheck via try_to_munlock().  shrink_inactive_list() won't notice
+the latter, but will pass on to shrink_page_list().
+
+shrink_page_list() again culls obviously unevictable pages that it could
+encounter for similar reason to shrink_inactive_list().  As already discussed,
+shrink_page_list() proactively looks for anonymous pages that should have
+PG_mlocked set but don't--these would not be detected by page_evictable()--to
+avoid adding them to the swap cache unnecessarily.  File pages mapped into
+VM_LOCKED vmas but without PG_mlocked set will make it all the way to
+try_to_unmap().  shrink_page_list() will divert them to the unevictable list when
+try_to_unmap() returns SWAP_MLOCK, as discussed above.
+
+TODO/FIXME:  If we can enhance the swap cache to reliably remove entries
+with page_count(page) > 2, as long as all ptes are mapped to the page and
+not the swap entry, we can probably remove the call to try_to_munlock() in
+shrink_page_list() and just remove the page from the swap cache when
+try_to_unmap() returns SWAP_MLOCK.   Currently, remove_exclusive_swap_page()
+doesn't seem to allow that.
+
+
-- 
GitLab


From 8edb08caf68184fb170f4f69c7445929e199eaea Mon Sep 17 00:00:00 2001
From: Lee Schermerhorn <lee.schermerhorn@hp.com>
Date: Sat, 18 Oct 2008 20:26:49 -0700
Subject: [PATCH 707/895] mlock: downgrade mmap sem while populating mlocked
 regions

We need to hold the mmap_sem for write to initiatate mlock()/munlock()
because we may need to merge/split vmas.  However, this can lead to very
long lock hold times attempting to fault in a large memory region to mlock
it into memory.  This can hold off other faults against the mm
[multithreaded tasks] and other scans of the mm, such as via /proc.  To
alleviate this, downgrade the mmap_sem to read mode during the population
of the region for locking.  This is especially the case if we need to
reclaim memory to lock down the region.  We [probably?] don't need to do
this for unlocking as all of the pages should be resident--they're already
mlocked.

Now, the caller's of the mlock functions [mlock_fixup() and
mlock_vma_pages_range()] expect the mmap_sem to be returned in write mode.
 Changing all callers appears to be way too much effort at this point.
So, restore write mode before returning.  Note that this opens a window
where the mmap list could change in a multithreaded process.  So, at least
for mlock_fixup(), where we could be called in a loop over multiple vmas,
we check that a vma still exists at the start address and that vma still
covers the page range [start,end).  If not, we return an error, -EAGAIN,
and let the caller deal with it.

Return -EAGAIN from mlock_vma_pages_range() function and mlock_fixup() if
the vma at 'start' disappears or changes so that the page range
[start,end) is no longer contained in the vma.  Again, let the caller deal
with it.  Looks like only sys_remap_file_pages() [via mmap_region()]
should actually care.

With this patch, I no longer see processes like ps(1) blocked for seconds
or minutes at a time waiting for a large [multiple gigabyte] region to be
locked down.  However, I occassionally see delays while unlocking or
unmapping a large mlocked region.  Should we also downgrade the mmap_sem
for the unlock path?

Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/mlock.c | 46 +++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 43 insertions(+), 3 deletions(-)

diff --git a/mm/mlock.c b/mm/mlock.c
index 8746fe3f9730..c83896a72504 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -318,6 +318,7 @@ static void __munlock_vma_pages_range(struct vm_area_struct *vma,
 int mlock_vma_pages_range(struct vm_area_struct *vma,
 			unsigned long start, unsigned long end)
 {
+	struct mm_struct *mm = vma->vm_mm;
 	int nr_pages = (end - start) / PAGE_SIZE;
 	BUG_ON(!(vma->vm_flags & VM_LOCKED));
 
@@ -329,8 +330,19 @@ int mlock_vma_pages_range(struct vm_area_struct *vma,
 
 	if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) ||
 			is_vm_hugetlb_page(vma) ||
-			vma == get_gate_vma(current)))
-		return __mlock_vma_pages_range(vma, start, end);
+			vma == get_gate_vma(current))) {
+		downgrade_write(&mm->mmap_sem);
+		nr_pages = __mlock_vma_pages_range(vma, start, end);
+
+		up_read(&mm->mmap_sem);
+		/* vma can change or disappear */
+		down_write(&mm->mmap_sem);
+		vma = find_vma(mm, start);
+		/* non-NULL vma must contain @start, but need to check @end */
+		if (!vma ||  end > vma->vm_end)
+			return -EAGAIN;
+		return nr_pages;
+	}
 
 	/*
 	 * User mapped kernel pages or huge pages:
@@ -424,13 +436,41 @@ success:
 	vma->vm_flags = newflags;
 
 	if (lock) {
+		/*
+		 * mmap_sem is currently held for write.  Downgrade the write
+		 * lock to a read lock so that other faults, mmap scans, ...
+		 * while we fault in all pages.
+		 */
+		downgrade_write(&mm->mmap_sem);
+
 		ret = __mlock_vma_pages_range(vma, start, end);
 		if (ret > 0) {
 			mm->locked_vm -= ret;
 			ret = 0;
 		}
-	} else
+		/*
+		 * Need to reacquire mmap sem in write mode, as our callers
+		 * expect this.  We have no support for atomically upgrading
+		 * a sem to write, so we need to check for ranges while sem
+		 * is unlocked.
+		 */
+		up_read(&mm->mmap_sem);
+		/* vma can change or disappear */
+		down_write(&mm->mmap_sem);
+		*prev = find_vma(mm, start);
+		/* non-NULL *prev must contain @start, but need to check @end */
+		if (!(*prev) || end > (*prev)->vm_end)
+			ret = -EAGAIN;
+	} else {
+		/*
+		 * TODO:  for unlocking, pages will already be resident, so
+		 * we don't need to wait for allocations/reclaim/pagein, ...
+		 * However, unlocking a very large region can still take a
+		 * while.  Should we downgrade the semaphore for both lock
+		 * AND unlock ?
+		 */
 		__munlock_vma_pages_range(vma, start, end);
+	}
 
 out:
 	*prev = vma;
-- 
GitLab


From ba470de43188cdbff795b5da43a1474523c6c2fb Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Sat, 18 Oct 2008 20:26:50 -0700
Subject: [PATCH 708/895] mmap: handle mlocked pages during map, remap, unmap

Originally by Nick Piggin <npiggin@suse.de>

Remove mlocked pages from the LRU using "unevictable infrastructure"
during mmap(), munmap(), mremap() and truncate().  Try to move back to
normal LRU lists on munmap() when last mlocked mapping removed.  Remove
PageMlocked() status when page truncated from file.

[akpm@linux-foundation.org: cleanup]
[kamezawa.hiroyu@jp.fujitsu.com: fix double unlock_page()]
[kosaki.motohiro@jp.fujitsu.com: split LRU: munlock rework]
[lee.schermerhorn@hp.com: mlock: fix __mlock_vma_pages_range comment block]
[akpm@linux-foundation.org: remove bogus kerneldoc token]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamewzawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/fremap.c   |  27 ++++--
 mm/internal.h |   9 +-
 mm/mlock.c    | 221 ++++++++++++++++++++------------------------------
 mm/mmap.c     |  71 +++++++++++-----
 mm/mremap.c   |   8 +-
 mm/truncate.c |   4 +
 6 files changed, 180 insertions(+), 160 deletions(-)

diff --git a/mm/fremap.c b/mm/fremap.c
index 7881638e4a12..7d12ca70ef7b 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -21,6 +21,8 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
+#include "internal.h"
+
 static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long addr, pte_t *ptep)
 {
@@ -215,15 +217,31 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
 		spin_unlock(&mapping->i_mmap_lock);
 	}
 
+	if (vma->vm_flags & VM_LOCKED) {
+		/*
+		 * drop PG_Mlocked flag for over-mapped range
+		 */
+		unsigned int saved_flags = vma->vm_flags;
+		munlock_vma_pages_range(vma, start, start + size);
+		vma->vm_flags = saved_flags;
+	}
+
 	mmu_notifier_invalidate_range_start(mm, start, start + size);
 	err = populate_range(mm, vma, start, size, pgoff);
 	mmu_notifier_invalidate_range_end(mm, start, start + size);
 	if (!err && !(flags & MAP_NONBLOCK)) {
-		if (unlikely(has_write_lock)) {
-			downgrade_write(&mm->mmap_sem);
-			has_write_lock = 0;
+		if (vma->vm_flags & VM_LOCKED) {
+			/*
+			 * might be mapping previously unmapped range of file
+			 */
+			mlock_vma_pages_range(vma, start, start + size);
+		} else {
+			if (unlikely(has_write_lock)) {
+				downgrade_write(&mm->mmap_sem);
+				has_write_lock = 0;
+			}
+			make_pages_present(start, start+size);
 		}
-		make_pages_present(start, start+size);
 	}
 
 	/*
@@ -240,4 +258,3 @@ out:
 
 	return err;
 }
-
diff --git a/mm/internal.h b/mm/internal.h
index 4ebf0bef9a39..48e32f790571 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -61,9 +61,14 @@ static inline unsigned long page_order(struct page *page)
 	return page_private(page);
 }
 
-extern int mlock_vma_pages_range(struct vm_area_struct *vma,
+extern long mlock_vma_pages_range(struct vm_area_struct *vma,
 			unsigned long start, unsigned long end);
-extern void munlock_vma_pages_all(struct vm_area_struct *vma);
+extern void munlock_vma_pages_range(struct vm_area_struct *vma,
+			unsigned long start, unsigned long end);
+static inline void munlock_vma_pages_all(struct vm_area_struct *vma)
+{
+	munlock_vma_pages_range(vma, vma->vm_start, vma->vm_end);
+}
 
 #ifdef CONFIG_UNEVICTABLE_LRU
 /*
diff --git a/mm/mlock.c b/mm/mlock.c
index c83896a72504..8b478350a2a1 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -112,26 +112,49 @@ static void munlock_vma_page(struct page *page)
 	}
 }
 
-/*
- * mlock a range of pages in the vma.
+/**
+ * __mlock_vma_pages_range() -  mlock/munlock a range of pages in the vma.
+ * @vma:   target vma
+ * @start: start address
+ * @end:   end address
+ * @mlock: 0 indicate munlock, otherwise mlock.
+ *
+ * If @mlock == 0, unlock an mlocked range;
+ * else mlock the range of pages.  This takes care of making the pages present ,
+ * too.
  *
- * This takes care of making the pages present too.
+ * return 0 on success, negative error code on error.
  *
- * vma->vm_mm->mmap_sem must be held for write.
+ * vma->vm_mm->mmap_sem must be held for at least read.
  */
-static int __mlock_vma_pages_range(struct vm_area_struct *vma,
-			unsigned long start, unsigned long end)
+static long __mlock_vma_pages_range(struct vm_area_struct *vma,
+				   unsigned long start, unsigned long end,
+				   int mlock)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long addr = start;
 	struct page *pages[16]; /* 16 gives a reasonable batch */
-	int write = !!(vma->vm_flags & VM_WRITE);
 	int nr_pages = (end - start) / PAGE_SIZE;
 	int ret;
+	int gup_flags = 0;
 
-	VM_BUG_ON(start & ~PAGE_MASK || end & ~PAGE_MASK);
-	VM_BUG_ON(start < vma->vm_start || end > vma->vm_end);
-	VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem));
+	VM_BUG_ON(start & ~PAGE_MASK);
+	VM_BUG_ON(end   & ~PAGE_MASK);
+	VM_BUG_ON(start < vma->vm_start);
+	VM_BUG_ON(end   > vma->vm_end);
+	VM_BUG_ON((!rwsem_is_locked(&mm->mmap_sem)) &&
+		  (atomic_read(&mm->mm_users) != 0));
+
+	/*
+	 * mlock:   don't page populate if page has PROT_NONE permission.
+	 * munlock: the pages always do munlock althrough
+	 *          its has PROT_NONE permission.
+	 */
+	if (!mlock)
+		gup_flags |= GUP_FLAGS_IGNORE_VMA_PERMISSIONS;
+
+	if (vma->vm_flags & VM_WRITE)
+		gup_flags |= GUP_FLAGS_WRITE;
 
 	lru_add_drain_all();	/* push cached pages to LRU */
 
@@ -146,9 +169,9 @@ static int __mlock_vma_pages_range(struct vm_area_struct *vma,
 		 * disable migration of this page.  However, page may
 		 * still be truncated out from under us.
 		 */
-		ret = get_user_pages(current, mm, addr,
+		ret = __get_user_pages(current, mm, addr,
 				min_t(int, nr_pages, ARRAY_SIZE(pages)),
-				write, 0, pages, NULL);
+				gup_flags, pages, NULL);
 		/*
 		 * This can happen for, e.g., VM_NONLINEAR regions before
 		 * a page has been allocated and mapped at a given offset,
@@ -178,8 +201,12 @@ static int __mlock_vma_pages_range(struct vm_area_struct *vma,
 			 * by the elevated reference, we need only check for
 			 * page truncation (file-cache only).
 			 */
-			if (page->mapping)
-				mlock_vma_page(page);
+			if (page->mapping) {
+				if (mlock)
+					mlock_vma_page(page);
+				else
+					munlock_vma_page(page);
+			}
 			unlock_page(page);
 			put_page(page);		/* ref from get_user_pages() */
 
@@ -197,125 +224,38 @@ static int __mlock_vma_pages_range(struct vm_area_struct *vma,
 	return 0;	/* count entire vma as locked_vm */
 }
 
-/*
- * private structure for munlock page table walk
- */
-struct munlock_page_walk {
-	struct vm_area_struct *vma;
-	pmd_t                 *pmd; /* for migration_entry_wait() */
-};
-
-/*
- * munlock normal pages for present ptes
- */
-static int __munlock_pte_handler(pte_t *ptep, unsigned long addr,
-				   unsigned long end, struct mm_walk *walk)
-{
-	struct munlock_page_walk *mpw = walk->private;
-	swp_entry_t entry;
-	struct page *page;
-	pte_t pte;
-
-retry:
-	pte = *ptep;
-	/*
-	 * If it's a swap pte, we might be racing with page migration.
-	 */
-	if (unlikely(!pte_present(pte))) {
-		if (!is_swap_pte(pte))
-			goto out;
-		entry = pte_to_swp_entry(pte);
-		if (is_migration_entry(entry)) {
-			migration_entry_wait(mpw->vma->vm_mm, mpw->pmd, addr);
-			goto retry;
-		}
-		goto out;
-	}
-
-	page = vm_normal_page(mpw->vma, addr, pte);
-	if (!page)
-		goto out;
-
-	lock_page(page);
-	if (!page->mapping) {
-		unlock_page(page);
-		goto retry;
-	}
-	munlock_vma_page(page);
-	unlock_page(page);
-
-out:
-	return 0;
-}
-
-/*
- * Save pmd for pte handler for waiting on migration entries
- */
-static int __munlock_pmd_handler(pmd_t *pmd, unsigned long addr,
-				 unsigned long end, struct mm_walk *walk)
-{
-	struct munlock_page_walk *mpw = walk->private;
-
-	mpw->pmd = pmd;
-	return 0;
-}
-
-
-/*
- * munlock a range of pages in the vma using standard page table walk.
- *
- * vma->vm_mm->mmap_sem must be held for write.
- */
-static void __munlock_vma_pages_range(struct vm_area_struct *vma,
-			      unsigned long start, unsigned long end)
-{
-	struct mm_struct *mm = vma->vm_mm;
-	struct munlock_page_walk mpw = {
-		.vma = vma,
-	};
-	struct mm_walk munlock_page_walk = {
-		.pmd_entry = __munlock_pmd_handler,
-		.pte_entry = __munlock_pte_handler,
-		.private = &mpw,
-		.mm = mm,
-	};
-
-	VM_BUG_ON(start & ~PAGE_MASK || end & ~PAGE_MASK);
-	VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem));
-	VM_BUG_ON(start < vma->vm_start);
-	VM_BUG_ON(end > vma->vm_end);
-
-	lru_add_drain_all();	/* push cached pages to LRU */
-	walk_page_range(start, end, &munlock_page_walk);
-	lru_add_drain_all();	/* to update stats */
-}
-
 #else /* CONFIG_UNEVICTABLE_LRU */
 
 /*
  * Just make pages present if VM_LOCKED.  No-op if unlocking.
  */
-static int __mlock_vma_pages_range(struct vm_area_struct *vma,
-			unsigned long start, unsigned long end)
+static long __mlock_vma_pages_range(struct vm_area_struct *vma,
+				   unsigned long start, unsigned long end,
+				   int mlock)
 {
-	if (vma->vm_flags & VM_LOCKED)
+	if (mlock && (vma->vm_flags & VM_LOCKED))
 		make_pages_present(start, end);
 	return 0;
 }
-
-/*
- * munlock a range of pages in the vma -- no-op.
- */
-static void __munlock_vma_pages_range(struct vm_area_struct *vma,
-			      unsigned long start, unsigned long end)
-{
-}
 #endif /* CONFIG_UNEVICTABLE_LRU */
 
-/*
- * mlock all pages in this vma range.  For mmap()/mremap()/...
+/**
+ * mlock_vma_pages_range() - mlock pages in specified vma range.
+ * @vma - the vma containing the specfied address range
+ * @start - starting address in @vma to mlock
+ * @end   - end address [+1] in @vma to mlock
+ *
+ * For mmap()/mremap()/expansion of mlocked vma.
+ *
+ * return 0 on success for "normal" vmas.
+ *
+ * return number of pages [> 0] to be removed from locked_vm on success
+ * of "special" vmas.
+ *
+ * return negative error if vma spanning @start-@range disappears while
+ * mmap semaphore is dropped.  Unlikely?
  */
-int mlock_vma_pages_range(struct vm_area_struct *vma,
+long mlock_vma_pages_range(struct vm_area_struct *vma,
 			unsigned long start, unsigned long end)
 {
 	struct mm_struct *mm = vma->vm_mm;
@@ -331,8 +271,10 @@ int mlock_vma_pages_range(struct vm_area_struct *vma,
 	if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) ||
 			is_vm_hugetlb_page(vma) ||
 			vma == get_gate_vma(current))) {
+		long error;
 		downgrade_write(&mm->mmap_sem);
-		nr_pages = __mlock_vma_pages_range(vma, start, end);
+
+		error = __mlock_vma_pages_range(vma, start, end, 1);
 
 		up_read(&mm->mmap_sem);
 		/* vma can change or disappear */
@@ -340,8 +282,9 @@ int mlock_vma_pages_range(struct vm_area_struct *vma,
 		vma = find_vma(mm, start);
 		/* non-NULL vma must contain @start, but need to check @end */
 		if (!vma ||  end > vma->vm_end)
-			return -EAGAIN;
-		return nr_pages;
+			return -ENOMEM;
+
+		return 0;	/* hide other errors from mmap(), et al */
 	}
 
 	/*
@@ -356,17 +299,33 @@ int mlock_vma_pages_range(struct vm_area_struct *vma,
 
 no_mlock:
 	vma->vm_flags &= ~VM_LOCKED;	/* and don't come back! */
-	return nr_pages;		/* pages NOT mlocked */
+	return nr_pages;		/* error or pages NOT mlocked */
 }
 
 
 /*
- * munlock all pages in vma.   For munmap() and exit().
+ * munlock_vma_pages_range() - munlock all pages in the vma range.'
+ * @vma - vma containing range to be munlock()ed.
+ * @start - start address in @vma of the range
+ * @end - end of range in @vma.
+ *
+ *  For mremap(), munmap() and exit().
+ *
+ * Called with @vma VM_LOCKED.
+ *
+ * Returns with VM_LOCKED cleared.  Callers must be prepared to
+ * deal with this.
+ *
+ * We don't save and restore VM_LOCKED here because pages are
+ * still on lru.  In unmap path, pages might be scanned by reclaim
+ * and re-mlocked by try_to_{munlock|unmap} before we unmap and
+ * free them.  This will result in freeing mlocked pages.
  */
-void munlock_vma_pages_all(struct vm_area_struct *vma)
+void munlock_vma_pages_range(struct vm_area_struct *vma,
+			   unsigned long start, unsigned long end)
 {
 	vma->vm_flags &= ~VM_LOCKED;
-	__munlock_vma_pages_range(vma, vma->vm_start, vma->vm_end);
+	__mlock_vma_pages_range(vma, start, end, 0);
 }
 
 /*
@@ -443,7 +402,7 @@ success:
 		 */
 		downgrade_write(&mm->mmap_sem);
 
-		ret = __mlock_vma_pages_range(vma, start, end);
+		ret = __mlock_vma_pages_range(vma, start, end, 1);
 		if (ret > 0) {
 			mm->locked_vm -= ret;
 			ret = 0;
@@ -460,7 +419,7 @@ success:
 		*prev = find_vma(mm, start);
 		/* non-NULL *prev must contain @start, but need to check @end */
 		if (!(*prev) || end > (*prev)->vm_end)
-			ret = -EAGAIN;
+			ret = -ENOMEM;
 	} else {
 		/*
 		 * TODO:  for unlocking, pages will already be resident, so
@@ -469,7 +428,7 @@ success:
 		 * while.  Should we downgrade the semaphore for both lock
 		 * AND unlock ?
 		 */
-		__munlock_vma_pages_range(vma, start, end);
+		__mlock_vma_pages_range(vma, start, end, 0);
 	}
 
 out:
diff --git a/mm/mmap.c b/mm/mmap.c
index 7bdfd2661f17..505a454f365e 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -970,6 +970,7 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
 			return -EPERM;
 		vm_flags |= VM_LOCKED;
 	}
+
 	/* mlock MCL_FUTURE? */
 	if (vm_flags & VM_LOCKED) {
 		unsigned long locked, lock_limit;
@@ -1137,10 +1138,12 @@ munmap_back:
 	 * The VM_SHARED test is necessary because shmem_zero_setup
 	 * will create the file object for a shared anonymous map below.
 	 */
-	if (!file && !(vm_flags & VM_SHARED) &&
-	    vma_merge(mm, prev, addr, addr + len, vm_flags,
-					NULL, NULL, pgoff, NULL))
-		goto out;
+	if (!file && !(vm_flags & VM_SHARED)) {
+		vma = vma_merge(mm, prev, addr, addr + len, vm_flags,
+					NULL, NULL, pgoff, NULL);
+		if (vma)
+			goto out;
+	}
 
 	/*
 	 * Determine the object being mapped and call the appropriate
@@ -1222,10 +1225,14 @@ out:
 	mm->total_vm += len >> PAGE_SHIFT;
 	vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
 	if (vm_flags & VM_LOCKED) {
-		mm->locked_vm += len >> PAGE_SHIFT;
-		make_pages_present(addr, addr + len);
-	}
-	if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
+		/*
+		 * makes pages present; downgrades, drops, reacquires mmap_sem
+		 */
+		long nr_pages = mlock_vma_pages_range(vma, addr, addr + len);
+		if (nr_pages < 0)
+			return nr_pages;	/* vma gone! */
+		mm->locked_vm += (len >> PAGE_SHIFT) - nr_pages;
+	} else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
 		make_pages_present(addr, addr + len);
 	return addr;
 
@@ -1698,8 +1705,10 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
 		return vma;
 	if (!prev || expand_stack(prev, addr))
 		return NULL;
-	if (prev->vm_flags & VM_LOCKED)
-		make_pages_present(addr, prev->vm_end);
+	if (prev->vm_flags & VM_LOCKED) {
+		if (mlock_vma_pages_range(prev, addr, prev->vm_end) < 0)
+			return NULL;	/* vma gone! */
+	}
 	return prev;
 }
 #else
@@ -1725,8 +1734,10 @@ find_extend_vma(struct mm_struct * mm, unsigned long addr)
 	start = vma->vm_start;
 	if (expand_stack(vma, addr))
 		return NULL;
-	if (vma->vm_flags & VM_LOCKED)
-		make_pages_present(addr, start);
+	if (vma->vm_flags & VM_LOCKED) {
+		if (mlock_vma_pages_range(vma, addr, start) < 0)
+			return NULL;	/* vma gone! */
+	}
 	return vma;
 }
 #endif
@@ -1745,8 +1756,6 @@ static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
 		long nrpages = vma_pages(vma);
 
 		mm->total_vm -= nrpages;
-		if (vma->vm_flags & VM_LOCKED)
-			mm->locked_vm -= nrpages;
 		vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
 		vma = remove_vma(vma);
 	} while (vma);
@@ -1911,6 +1920,20 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
 	}
 	vma = prev? prev->vm_next: mm->mmap;
 
+	/*
+	 * unlock any mlock()ed ranges before detaching vmas
+	 */
+	if (mm->locked_vm) {
+		struct vm_area_struct *tmp = vma;
+		while (tmp && tmp->vm_start < end) {
+			if (tmp->vm_flags & VM_LOCKED) {
+				mm->locked_vm -= vma_pages(tmp);
+				munlock_vma_pages_all(tmp);
+			}
+			tmp = tmp->vm_next;
+		}
+	}
+
 	/*
 	 * Remove the vma's, and unmap the actual pages
 	 */
@@ -2023,8 +2046,9 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
 		return -ENOMEM;
 
 	/* Can we just expand an old private anonymous mapping? */
-	if (vma_merge(mm, prev, addr, addr + len, flags,
-					NULL, NULL, pgoff, NULL))
+	vma = vma_merge(mm, prev, addr, addr + len, flags,
+					NULL, NULL, pgoff, NULL);
+	if (vma)
 		goto out;
 
 	/*
@@ -2046,8 +2070,8 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
 out:
 	mm->total_vm += len >> PAGE_SHIFT;
 	if (flags & VM_LOCKED) {
-		mm->locked_vm += len >> PAGE_SHIFT;
-		make_pages_present(addr, addr + len);
+		if (!mlock_vma_pages_range(vma, addr, addr + len))
+			mm->locked_vm += (len >> PAGE_SHIFT);
 	}
 	return addr;
 }
@@ -2058,7 +2082,7 @@ EXPORT_SYMBOL(do_brk);
 void exit_mmap(struct mm_struct *mm)
 {
 	struct mmu_gather *tlb;
-	struct vm_area_struct *vma = mm->mmap;
+	struct vm_area_struct *vma;
 	unsigned long nr_accounted = 0;
 	unsigned long end;
 
@@ -2066,6 +2090,15 @@ void exit_mmap(struct mm_struct *mm)
 	arch_exit_mmap(mm);
 	mmu_notifier_release(mm);
 
+	if (mm->locked_vm) {
+		vma = mm->mmap;
+		while (vma) {
+			if (vma->vm_flags & VM_LOCKED)
+				munlock_vma_pages_all(vma);
+			vma = vma->vm_next;
+		}
+	}
+	vma = mm->mmap;
 	lru_add_drain();
 	flush_cache_mm(mm);
 	tlb = tlb_gather_mmu(mm, 1);
diff --git a/mm/mremap.c b/mm/mremap.c
index 1a7743923c8c..58a2908f42f5 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -24,6 +24,8 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
+#include "internal.h"
+
 static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
 {
 	pgd_t *pgd;
@@ -238,8 +240,8 @@ static unsigned long move_vma(struct vm_area_struct *vma,
 	if (vm_flags & VM_LOCKED) {
 		mm->locked_vm += new_len >> PAGE_SHIFT;
 		if (new_len > old_len)
-			make_pages_present(new_addr + old_len,
-					   new_addr + new_len);
+			mlock_vma_pages_range(new_vma, new_addr + old_len,
+						       new_addr + new_len);
 	}
 
 	return new_addr;
@@ -379,7 +381,7 @@ unsigned long do_mremap(unsigned long addr,
 			vm_stat_account(mm, vma->vm_flags, vma->vm_file, pages);
 			if (vma->vm_flags & VM_LOCKED) {
 				mm->locked_vm += pages;
-				make_pages_present(addr + old_len,
+				mlock_vma_pages_range(vma, addr + old_len,
 						   addr + new_len);
 			}
 			ret = addr;
diff --git a/mm/truncate.c b/mm/truncate.c
index e83e4b114ef1..1229211104f8 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -18,6 +18,7 @@
 #include <linux/task_io_accounting_ops.h>
 #include <linux/buffer_head.h>	/* grr. try_to_release_page,
 				   do_invalidatepage */
+#include "internal.h"
 
 
 /**
@@ -103,6 +104,7 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
 
 	cancel_dirty_page(page, PAGE_CACHE_SIZE);
 
+	clear_page_mlock(page);
 	remove_from_page_cache(page);
 	ClearPageMappedToDisk(page);
 	page_cache_release(page);	/* pagecache ref */
@@ -127,6 +129,7 @@ invalidate_complete_page(struct address_space *mapping, struct page *page)
 	if (PagePrivate(page) && !try_to_release_page(page, 0))
 		return 0;
 
+	clear_page_mlock(page);
 	ret = remove_mapping(mapping, page);
 
 	return ret;
@@ -352,6 +355,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
 	if (PageDirty(page))
 		goto failed;
 
+	clear_page_mlock(page);
 	BUG_ON(PagePrivate(page));
 	__remove_from_page_cache(page);
 	spin_unlock_irq(&mapping->tree_lock);
-- 
GitLab


From 5344b7e648980cc2ca613ec03a56a8222ff48820 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Sat, 18 Oct 2008 20:26:51 -0700
Subject: [PATCH 709/895] vmstat: mlocked pages statistics

Add NR_MLOCK zone page state, which provides a (conservative) count of
mlocked pages (actually, the number of mlocked pages moved off the LRU).

Reworked by lts to fit in with the modified mlock page support in the
Reclaim Scalability series.

[kosaki.motohiro@jp.fujitsu.com: fix incorrect Mlocked field of /proc/meminfo]
[lee.schermerhorn@hp.com: mlocked-pages: add event counting with statistics]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/node.c    |  4 +++-
 fs/proc/proc_misc.c    |  2 ++
 include/linux/mmzone.h |  2 ++
 include/linux/vmstat.h |  4 ++++
 mm/internal.h          | 16 +++++++++++++---
 mm/mlock.c             | 41 ++++++++++++++++++++++++++++++++++++-----
 mm/vmstat.c            |  5 +++++
 7 files changed, 65 insertions(+), 9 deletions(-)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 11a9a05cf554..fb45d88a2446 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -71,7 +71,8 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
 		       "Node %d Active(file):   %8lu kB\n"
 		       "Node %d Inactive(file): %8lu kB\n"
 #ifdef CONFIG_UNEVICTABLE_LRU
-		       "Node %d Noreclaim:      %8lu kB\n"
+		       "Node %d Unevictable:    %8lu kB\n"
+		       "Node %d Mlocked:        %8lu kB\n"
 #endif
 #ifdef CONFIG_HIGHMEM
 		       "Node %d HighTotal:      %8lu kB\n"
@@ -104,6 +105,7 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
 		       nid, K(node_page_state(nid, NR_INACTIVE_FILE)),
 #ifdef CONFIG_UNEVICTABLE_LRU
 		       nid, K(node_page_state(nid, NR_UNEVICTABLE)),
+		       nid, K(node_page_state(nid, NR_MLOCK)),
 #endif
 #ifdef CONFIG_HIGHMEM
 		       nid, K(i.totalhigh),
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 6dd60eaea997..61b25f4eabe6 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -176,6 +176,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
 		"Inactive(file): %8lu kB\n"
 #ifdef CONFIG_UNEVICTABLE_LRU
 		"Unevictable:    %8lu kB\n"
+		"Mlocked:        %8lu kB\n"
 #endif
 #ifdef CONFIG_HIGHMEM
 		"HighTotal:      %8lu kB\n"
@@ -217,6 +218,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
 		K(pages[LRU_INACTIVE_FILE]),
 #ifdef CONFIG_UNEVICTABLE_LRU
 		K(pages[LRU_UNEVICTABLE]),
+		K(global_page_state(NR_MLOCK)),
 #endif
 #ifdef CONFIG_HIGHMEM
 		K(i.totalhigh),
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index d1f60d5fe2ea..da2d053a95f1 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -88,8 +88,10 @@ enum zone_stat_item {
 	NR_ACTIVE_FILE,		/*  "     "     "   "       "         */
 #ifdef CONFIG_UNEVICTABLE_LRU
 	NR_UNEVICTABLE,		/*  "     "     "   "       "         */
+	NR_MLOCK,		/* mlock()ed pages found and moved off LRU */
 #else
 	NR_UNEVICTABLE = NR_ACTIVE_FILE, /* avoid compiler errors in dead code */
+	NR_MLOCK = NR_ACTIVE_FILE,
 #endif
 	NR_ANON_PAGES,	/* Mapped anonymous pages */
 	NR_FILE_MAPPED,	/* pagecache pages mapped into pagetables.
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 135840cd7feb..05b805020be2 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -45,6 +45,10 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		UNEVICTABLE_PGCULLED,	/* culled to noreclaim list */
 		UNEVICTABLE_PGSCANNED,	/* scanned for reclaimability */
 		UNEVICTABLE_PGRESCUED,	/* rescued from noreclaim list */
+		UNEVICTABLE_PGMLOCKED,
+		UNEVICTABLE_PGMUNLOCKED,
+		UNEVICTABLE_PGCLEARED,	/* on COW, page truncate */
+		UNEVICTABLE_PGSTRANDED,	/* unable to isolate on unlock */
 #endif
 		NR_VM_EVENT_ITEMS
 };
diff --git a/mm/internal.h b/mm/internal.h
index 48e32f790571..1cfbf2e2bc9e 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -101,7 +101,10 @@ static inline int is_mlocked_vma(struct vm_area_struct *vma, struct page *page)
 	if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED))
 		return 0;
 
-	SetPageMlocked(page);
+	if (!TestSetPageMlocked(page)) {
+		inc_zone_page_state(page, NR_MLOCK);
+		count_vm_event(UNEVICTABLE_PGMLOCKED);
+	}
 	return 1;
 }
 
@@ -128,12 +131,19 @@ static inline void clear_page_mlock(struct page *page)
 
 /*
  * mlock_migrate_page - called only from migrate_page_copy() to
- * migrate the Mlocked page flag
+ * migrate the Mlocked page flag; update statistics.
  */
 static inline void mlock_migrate_page(struct page *newpage, struct page *page)
 {
-	if (TestClearPageMlocked(page))
+	if (TestClearPageMlocked(page)) {
+		unsigned long flags;
+
+		local_irq_save(flags);
+		__dec_zone_page_state(page, NR_MLOCK);
 		SetPageMlocked(newpage);
+		__inc_zone_page_state(newpage, NR_MLOCK);
+		local_irq_restore(flags);
+	}
 }
 
 
diff --git a/mm/mlock.c b/mm/mlock.c
index 8b478350a2a1..bce1b22c36c2 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -60,6 +60,8 @@ void __clear_page_mlock(struct page *page)
 		return;
 	}
 
+	dec_zone_page_state(page, NR_MLOCK);
+	count_vm_event(UNEVICTABLE_PGCLEARED);
 	if (!isolate_lru_page(page)) {
 		putback_lru_page(page);
 	} else {
@@ -69,6 +71,9 @@ void __clear_page_mlock(struct page *page)
 		lru_add_drain_all();
 		if (!isolate_lru_page(page))
 			putback_lru_page(page);
+		else if (PageUnevictable(page))
+			count_vm_event(UNEVICTABLE_PGSTRANDED);
+
 	}
 }
 
@@ -80,8 +85,12 @@ void mlock_vma_page(struct page *page)
 {
 	BUG_ON(!PageLocked(page));
 
-	if (!TestSetPageMlocked(page) && !isolate_lru_page(page))
-		putback_lru_page(page);
+	if (!TestSetPageMlocked(page)) {
+		inc_zone_page_state(page, NR_MLOCK);
+		count_vm_event(UNEVICTABLE_PGMLOCKED);
+		if (!isolate_lru_page(page))
+			putback_lru_page(page);
+	}
 }
 
 /*
@@ -106,9 +115,31 @@ static void munlock_vma_page(struct page *page)
 {
 	BUG_ON(!PageLocked(page));
 
-	if (TestClearPageMlocked(page) && !isolate_lru_page(page)) {
-		try_to_munlock(page);
-		putback_lru_page(page);
+	if (TestClearPageMlocked(page)) {
+		dec_zone_page_state(page, NR_MLOCK);
+		if (!isolate_lru_page(page)) {
+			int ret = try_to_munlock(page);
+			/*
+			 * did try_to_unlock() succeed or punt?
+			 */
+			if (ret == SWAP_SUCCESS || ret == SWAP_AGAIN)
+				count_vm_event(UNEVICTABLE_PGMUNLOCKED);
+
+			putback_lru_page(page);
+		} else {
+			/*
+			 * We lost the race.  let try_to_unmap() deal
+			 * with it.  At least we get the page state and
+			 * mlock stats right.  However, page is still on
+			 * the noreclaim list.  We'll fix that up when
+			 * the page is eventually freed or we scan the
+			 * noreclaim list.
+			 */
+			if (PageUnevictable(page))
+				count_vm_event(UNEVICTABLE_PGSTRANDED);
+			else
+				count_vm_event(UNEVICTABLE_PGMUNLOCKED);
+		}
 	}
 }
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 6db2f6319313..9e28abc0a0b9 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -625,6 +625,7 @@ static const char * const vmstat_text[] = {
 	"nr_active_file",
 #ifdef CONFIG_UNEVICTABLE_LRU
 	"nr_unevictable",
+	"nr_mlock",
 #endif
 	"nr_anon_pages",
 	"nr_mapped",
@@ -684,6 +685,10 @@ static const char * const vmstat_text[] = {
 	"unevictable_pgs_culled",
 	"unevictable_pgs_scanned",
 	"unevictable_pgs_rescued",
+	"unevictable_pgs_mlocked",
+	"unevictable_pgs_munlocked",
+	"unevictable_pgs_cleared",
+	"unevictable_pgs_stranded",
 #endif
 #endif
 };
-- 
GitLab


From 64d6519dda3905dfb94d3f93c07c5f263f41813f Mon Sep 17 00:00:00 2001
From: Lee Schermerhorn <lee.schermerhorn@hp.com>
Date: Sat, 18 Oct 2008 20:26:52 -0700
Subject: [PATCH 710/895] swap: cull unevictable pages in fault path

In the fault paths that install new anonymous pages, check whether the
page is evictable or not using lru_cache_add_active_or_unevictable().  If
the page is evictable, just add it to the active lru list [via the pagevec
cache], else add it to the unevictable list.

This "proactive" culling in the fault path mimics the handling of mlocked
pages in Nick Piggin's series to keep mlocked pages off the lru lists.

Notes:

1) This patch is optional--e.g., if one is concerned about the
   additional test in the fault path.  We can defer the moving of
   nonreclaimable pages until when vmscan [shrink_*_list()]
   encounters them.  Vmscan will only need to handle such pages
   once, but if there are a lot of them it could impact system
   performance.

2) The 'vma' argument to page_evictable() is require to notice that
   we're faulting a page into an mlock()ed vma w/o having to scan the
   page's rmap in the fault path.   Culling mlock()ed anon pages is
   currently the only reason for this patch.

3) We can't cull swap pages in read_swap_cache_async() because the
   vma argument doesn't necessarily correspond to the swap cache
   offset passed in by swapin_readahead().  This could [did!] result
   in mlocking pages in non-VM_LOCKED vmas if [when] we tried to
   cull in this path.

4) Move set_pte_at() to after where we add page to lru to keep it
   hidden from other tasks that might walk the page table.
   We already do it in this order in do_anonymous() page.  And,
   these are COW'd anon pages.  Is this safe?

[riel@redhat.com: undo an overzealous code cleanup]
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |  2 ++
 mm/memory.c          | 18 ++++++++++--------
 mm/swap.c            | 21 +++++++++++++++++++++
 3 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 7edb4cbc29f9..07eda69412fb 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -173,6 +173,8 @@ extern unsigned int nr_free_pagecache_pages(void);
 /* linux/mm/swap.c */
 extern void __lru_cache_add(struct page *, enum lru_list lru);
 extern void lru_cache_add_lru(struct page *, enum lru_list lru);
+extern void lru_cache_add_active_or_unevictable(struct page *,
+					struct vm_area_struct *);
 extern void activate_page(struct page *);
 extern void mark_page_accessed(struct page *);
 extern void lru_add_drain(void);
diff --git a/mm/memory.c b/mm/memory.c
index 9fef7272fb9e..450127f4c582 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1922,12 +1922,13 @@ gotten:
 		 * thread doing COW.
 		 */
 		ptep_clear_flush_notify(vma, address, page_table);
-		set_pte_at(mm, address, page_table, entry);
-		update_mmu_cache(vma, address, entry);
 		SetPageSwapBacked(new_page);
-		lru_cache_add_active_anon(new_page);
+		lru_cache_add_active_or_unevictable(new_page, vma);
 		page_add_new_anon_rmap(new_page, vma, address);
 
+//TODO:  is this safe?  do_anonymous_page() does it this way.
+		set_pte_at(mm, address, page_table, entry);
+		update_mmu_cache(vma, address, entry);
 		if (old_page) {
 			/*
 			 * Only after switching the pte to the new page may
@@ -2420,7 +2421,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		goto release;
 	inc_mm_counter(mm, anon_rss);
 	SetPageSwapBacked(page);
-	lru_cache_add_active_anon(page);
+	lru_cache_add_active_or_unevictable(page, vma);
 	page_add_new_anon_rmap(page, vma, address);
 	set_pte_at(mm, address, page_table, entry);
 
@@ -2564,12 +2565,11 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		entry = mk_pte(page, vma->vm_page_prot);
 		if (flags & FAULT_FLAG_WRITE)
 			entry = maybe_mkwrite(pte_mkdirty(entry), vma);
-		set_pte_at(mm, address, page_table, entry);
 		if (anon) {
-                        inc_mm_counter(mm, anon_rss);
+			inc_mm_counter(mm, anon_rss);
 			SetPageSwapBacked(page);
-                        lru_cache_add_active_anon(page);
-                        page_add_new_anon_rmap(page, vma, address);
+			lru_cache_add_active_or_unevictable(page, vma);
+			page_add_new_anon_rmap(page, vma, address);
 		} else {
 			inc_mm_counter(mm, file_rss);
 			page_add_file_rmap(page);
@@ -2578,6 +2578,8 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 				get_page(dirty_page);
 			}
 		}
+//TODO:  is this safe?  do_anonymous_page() does it this way.
+		set_pte_at(mm, address, page_table, entry);
 
 		/* no need to invalidate: a not-present page won't be cached */
 		update_mmu_cache(vma, address, entry);
diff --git a/mm/swap.c b/mm/swap.c
index bc58c1369dd6..2152e48a7b8f 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -31,6 +31,8 @@
 #include <linux/backing-dev.h>
 #include <linux/memcontrol.h>
 
+#include "internal.h"
+
 /* How many pages do we try to swap or page in/out together? */
 int page_cluster;
 
@@ -244,6 +246,25 @@ void add_page_to_unevictable_list(struct page *page)
 	spin_unlock_irq(&zone->lru_lock);
 }
 
+/**
+ * lru_cache_add_active_or_unevictable
+ * @page:  the page to be added to LRU
+ * @vma:   vma in which page is mapped for determining reclaimability
+ *
+ * place @page on active or unevictable LRU list, depending on
+ * page_evictable().  Note that if the page is not evictable,
+ * it goes directly back onto it's zone's unevictable list.  It does
+ * NOT use a per cpu pagevec.
+ */
+void lru_cache_add_active_or_unevictable(struct page *page,
+					struct vm_area_struct *vma)
+{
+	if (page_evictable(page, vma))
+		lru_cache_add_lru(page, LRU_ACTIVE + page_is_file_cache(page));
+	else
+		add_page_to_unevictable_list(page);
+}
+
 /*
  * Drain pages out of the cpu's pagevecs.
  * Either "cpu" is the current CPU, and preemption has already been
-- 
GitLab


From af936a1606246a10c145feac3770f6287f483f02 Mon Sep 17 00:00:00 2001
From: Lee Schermerhorn <lee.schermerhorn@hp.com>
Date: Sat, 18 Oct 2008 20:26:53 -0700
Subject: [PATCH 711/895] vmscan: unevictable LRU scan sysctl

This patch adds a function to scan individual or all zones' unevictable
lists and move any pages that have become evictable onto the respective
zone's inactive list, where shrink_inactive_list() will deal with them.

Adds sysctl to scan all nodes, and per node attributes to individual
nodes' zones.

Kosaki: If evictable page found in unevictable lru when write
/proc/sys/vm/scan_unevictable_pages, print filename and file offset of
these pages.

[akpm@linux-foundation.org: fix one CONFIG_MMU=n build error]
[kosaki.motohiro@jp.fujitsu.com: adapt vmscan-unevictable-lru-scan-sysctl.patch to new sysfs API]
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/node.c  |   5 ++
 include/linux/rmap.h |   3 +
 include/linux/swap.h |  15 ++++
 kernel/sysctl.c      |  10 +++
 mm/rmap.c            |   4 +-
 mm/vmscan.c          | 166 +++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 201 insertions(+), 2 deletions(-)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index fb45d88a2446..f5207090885a 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -13,6 +13,7 @@
 #include <linux/nodemask.h>
 #include <linux/cpu.h>
 #include <linux/device.h>
+#include <linux/swap.h>
 
 static struct sysdev_class node_class = {
 	.name = "node",
@@ -191,6 +192,8 @@ int register_node(struct node *node, int num, struct node *parent)
 		sysdev_create_file(&node->sysdev, &attr_meminfo);
 		sysdev_create_file(&node->sysdev, &attr_numastat);
 		sysdev_create_file(&node->sysdev, &attr_distance);
+
+		scan_unevictable_register_node(node);
 	}
 	return error;
 }
@@ -210,6 +213,8 @@ void unregister_node(struct node *node)
 	sysdev_remove_file(&node->sysdev, &attr_numastat);
 	sysdev_remove_file(&node->sysdev, &attr_distance);
 
+	scan_unevictable_unregister_node(node);
+
 	sysdev_unregister(&node->sysdev);
 }
 
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 955667e6a52d..1da48db8db09 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -75,6 +75,9 @@ void anon_vma_unlink(struct vm_area_struct *);
 void anon_vma_link(struct vm_area_struct *);
 void __anon_vma_link(struct vm_area_struct *);
 
+extern struct anon_vma *page_lock_anon_vma(struct page *page);
+extern void page_unlock_anon_vma(struct anon_vma *anon_vma);
+
 /*
  * rmap interfaces called when adding or removing pte of page
  */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 07eda69412fb..a3af95b2cb6d 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -7,6 +7,7 @@
 #include <linux/list.h>
 #include <linux/memcontrol.h>
 #include <linux/sched.h>
+#include <linux/node.h>
 
 #include <asm/atomic.h>
 #include <asm/page.h>
@@ -235,15 +236,29 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order)
 #ifdef CONFIG_UNEVICTABLE_LRU
 extern int page_evictable(struct page *page, struct vm_area_struct *vma);
 extern void scan_mapping_unevictable_pages(struct address_space *);
+
+extern unsigned long scan_unevictable_pages;
+extern int scan_unevictable_handler(struct ctl_table *, int, struct file *,
+					void __user *, size_t *, loff_t *);
+extern int scan_unevictable_register_node(struct node *node);
+extern void scan_unevictable_unregister_node(struct node *node);
 #else
 static inline int page_evictable(struct page *page,
 						struct vm_area_struct *vma)
 {
 	return 1;
 }
+
 static inline void scan_mapping_unevictable_pages(struct address_space *mapping)
 {
 }
+
+static inline int scan_unevictable_register_node(struct node *node)
+{
+	return 0;
+}
+
+static inline void scan_unevictable_unregister_node(struct node *node) { }
 #endif
 
 extern int kswapd_run(int nid);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 617d41e4d6a0..b3cc73931d1f 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -833,6 +833,16 @@ static struct ctl_table kern_table[] = {
 		.proc_handler   = &proc_dointvec,
 	},
 #endif
+#ifdef CONFIG_UNEVICTABLE_LRU
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "scan_unevictable_pages",
+		.data		= &scan_unevictable_pages,
+		.maxlen		= sizeof(scan_unevictable_pages),
+		.mode		= 0644,
+		.proc_handler	= &scan_unevictable_handler,
+	},
+#endif
 /*
  * NOTE: do not add new entries to this table unless you have read
  * Documentation/sysctl/ctl_unnumbered.txt
diff --git a/mm/rmap.c b/mm/rmap.c
index 7e60df99018e..7e90bebbeb6c 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -181,7 +181,7 @@ void __init anon_vma_init(void)
  * Getting a lock on a stable anon_vma from a page off the LRU is
  * tricky: page_lock_anon_vma rely on RCU to guard against the races.
  */
-static struct anon_vma *page_lock_anon_vma(struct page *page)
+struct anon_vma *page_lock_anon_vma(struct page *page)
 {
 	struct anon_vma *anon_vma;
 	unsigned long anon_mapping;
@@ -201,7 +201,7 @@ out:
 	return NULL;
 }
 
-static void page_unlock_anon_vma(struct anon_vma *anon_vma)
+void page_unlock_anon_vma(struct anon_vma *anon_vma)
 {
 	spin_unlock(&anon_vma->lock);
 	rcu_read_unlock();
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e5aaaad159ef..ca64e3e0c518 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -39,6 +39,7 @@
 #include <linux/freezer.h>
 #include <linux/memcontrol.h>
 #include <linux/delayacct.h>
+#include <linux/sysctl.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -2363,6 +2364,39 @@ int page_evictable(struct page *page, struct vm_area_struct *vma)
 	return 1;
 }
 
+static void show_page_path(struct page *page)
+{
+	char buf[256];
+	if (page_is_file_cache(page)) {
+		struct address_space *mapping = page->mapping;
+		struct dentry *dentry;
+		pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+
+		spin_lock(&mapping->i_mmap_lock);
+		dentry = d_find_alias(mapping->host);
+		printk(KERN_INFO "rescued: %s %lu\n",
+		       dentry_path(dentry, buf, 256), pgoff);
+		spin_unlock(&mapping->i_mmap_lock);
+	} else {
+#if defined(CONFIG_MM_OWNER) && defined(CONFIG_MMU)
+		struct anon_vma *anon_vma;
+		struct vm_area_struct *vma;
+
+		anon_vma = page_lock_anon_vma(page);
+		if (!anon_vma)
+			return;
+
+		list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+			printk(KERN_INFO "rescued: anon %s\n",
+			       vma->vm_mm->owner->comm);
+			break;
+		}
+		page_unlock_anon_vma(anon_vma);
+#endif
+	}
+}
+
+
 /**
  * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list
  * @page: page to check evictability and move to appropriate lru list
@@ -2382,6 +2416,9 @@ retry:
 	ClearPageUnevictable(page);
 	if (page_evictable(page, NULL)) {
 		enum lru_list l = LRU_INACTIVE_ANON + page_is_file_cache(page);
+
+		show_page_path(page);
+
 		__dec_zone_state(zone, NR_UNEVICTABLE);
 		list_move(&page->lru, &zone->lru[l].list);
 		__inc_zone_state(zone, NR_INACTIVE_ANON + l);
@@ -2451,4 +2488,133 @@ void scan_mapping_unevictable_pages(struct address_space *mapping)
 	}
 
 }
+
+/**
+ * scan_zone_unevictable_pages - check unevictable list for evictable pages
+ * @zone - zone of which to scan the unevictable list
+ *
+ * Scan @zone's unevictable LRU lists to check for pages that have become
+ * evictable.  Move those that have to @zone's inactive list where they
+ * become candidates for reclaim, unless shrink_inactive_zone() decides
+ * to reactivate them.  Pages that are still unevictable are rotated
+ * back onto @zone's unevictable list.
+ */
+#define SCAN_UNEVICTABLE_BATCH_SIZE 16UL /* arbitrary lock hold batch size */
+void scan_zone_unevictable_pages(struct zone *zone)
+{
+	struct list_head *l_unevictable = &zone->lru[LRU_UNEVICTABLE].list;
+	unsigned long scan;
+	unsigned long nr_to_scan = zone_page_state(zone, NR_UNEVICTABLE);
+
+	while (nr_to_scan > 0) {
+		unsigned long batch_size = min(nr_to_scan,
+						SCAN_UNEVICTABLE_BATCH_SIZE);
+
+		spin_lock_irq(&zone->lru_lock);
+		for (scan = 0;  scan < batch_size; scan++) {
+			struct page *page = lru_to_page(l_unevictable);
+
+			if (!trylock_page(page))
+				continue;
+
+			prefetchw_prev_lru_page(page, l_unevictable, flags);
+
+			if (likely(PageLRU(page) && PageUnevictable(page)))
+				check_move_unevictable_page(page, zone);
+
+			unlock_page(page);
+		}
+		spin_unlock_irq(&zone->lru_lock);
+
+		nr_to_scan -= batch_size;
+	}
+}
+
+
+/**
+ * scan_all_zones_unevictable_pages - scan all unevictable lists for evictable pages
+ *
+ * A really big hammer:  scan all zones' unevictable LRU lists to check for
+ * pages that have become evictable.  Move those back to the zones'
+ * inactive list where they become candidates for reclaim.
+ * This occurs when, e.g., we have unswappable pages on the unevictable lists,
+ * and we add swap to the system.  As such, it runs in the context of a task
+ * that has possibly/probably made some previously unevictable pages
+ * evictable.
+ */
+void scan_all_zones_unevictable_pages(void)
+{
+	struct zone *zone;
+
+	for_each_zone(zone) {
+		scan_zone_unevictable_pages(zone);
+	}
+}
+
+/*
+ * scan_unevictable_pages [vm] sysctl handler.  On demand re-scan of
+ * all nodes' unevictable lists for evictable pages
+ */
+unsigned long scan_unevictable_pages;
+
+int scan_unevictable_handler(struct ctl_table *table, int write,
+			   struct file *file, void __user *buffer,
+			   size_t *length, loff_t *ppos)
+{
+	proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
+
+	if (write && *(unsigned long *)table->data)
+		scan_all_zones_unevictable_pages();
+
+	scan_unevictable_pages = 0;
+	return 0;
+}
+
+/*
+ * per node 'scan_unevictable_pages' attribute.  On demand re-scan of
+ * a specified node's per zone unevictable lists for evictable pages.
+ */
+
+static ssize_t read_scan_unevictable_node(struct sys_device *dev,
+					  struct sysdev_attribute *attr,
+					  char *buf)
+{
+	return sprintf(buf, "0\n");	/* always zero; should fit... */
+}
+
+static ssize_t write_scan_unevictable_node(struct sys_device *dev,
+					   struct sysdev_attribute *attr,
+					const char *buf, size_t count)
+{
+	struct zone *node_zones = NODE_DATA(dev->id)->node_zones;
+	struct zone *zone;
+	unsigned long res;
+	unsigned long req = strict_strtoul(buf, 10, &res);
+
+	if (!req)
+		return 1;	/* zero is no-op */
+
+	for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
+		if (!populated_zone(zone))
+			continue;
+		scan_zone_unevictable_pages(zone);
+	}
+	return 1;
+}
+
+
+static SYSDEV_ATTR(scan_unevictable_pages, S_IRUGO | S_IWUSR,
+			read_scan_unevictable_node,
+			write_scan_unevictable_node);
+
+int scan_unevictable_register_node(struct node *node)
+{
+	return sysdev_create_file(&node->sysdev, &attr_scan_unevictable_pages);
+}
+
+void scan_unevictable_unregister_node(struct node *node)
+{
+	sysdev_remove_file(&node->sysdev, &attr_scan_unevictable_pages);
+}
+
 #endif
-- 
GitLab


From 985737cf2ea096ea946aed82c7484d40defc71a8 Mon Sep 17 00:00:00 2001
From: Lee Schermerhorn <lee.schermerhorn@hp.com>
Date: Sat, 18 Oct 2008 20:26:53 -0700
Subject: [PATCH 712/895] mlock: count attempts to free mlocked page

Allow free of mlock()ed pages.  This shouldn't happen, but during
developement, it occasionally did.

This patch allows us to survive that condition, while keeping the
statistics and events correct for debug.

Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmstat.h |  1 +
 mm/internal.h          | 17 +++++++++++++++++
 mm/page_alloc.c        |  1 +
 mm/vmstat.c            |  1 +
 4 files changed, 20 insertions(+)

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 05b805020be2..9cd3ab0f554d 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -49,6 +49,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		UNEVICTABLE_PGMUNLOCKED,
 		UNEVICTABLE_PGCLEARED,	/* on COW, page truncate */
 		UNEVICTABLE_PGSTRANDED,	/* unable to isolate on unlock */
+		UNEVICTABLE_MLOCKFREED,
 #endif
 		NR_VM_EVENT_ITEMS
 };
diff --git a/mm/internal.h b/mm/internal.h
index 1cfbf2e2bc9e..e4e728bdf324 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -146,6 +146,22 @@ static inline void mlock_migrate_page(struct page *newpage, struct page *page)
 	}
 }
 
+/*
+ * free_page_mlock() -- clean up attempts to free and mlocked() page.
+ * Page should not be on lru, so no need to fix that up.
+ * free_pages_check() will verify...
+ */
+static inline void free_page_mlock(struct page *page)
+{
+	if (unlikely(TestClearPageMlocked(page))) {
+		unsigned long flags;
+
+		local_irq_save(flags);
+		__dec_zone_page_state(page, NR_MLOCK);
+		__count_vm_event(UNEVICTABLE_MLOCKFREED);
+		local_irq_restore(flags);
+	}
+}
 
 #else /* CONFIG_UNEVICTABLE_LRU */
 static inline int is_mlocked_vma(struct vm_area_struct *v, struct page *p)
@@ -155,6 +171,7 @@ static inline int is_mlocked_vma(struct vm_area_struct *v, struct page *p)
 static inline void clear_page_mlock(struct page *page) { }
 static inline void mlock_vma_page(struct page *page) { }
 static inline void mlock_migrate_page(struct page *new, struct page *old) { }
+static inline void free_page_mlock(struct page *page) { }
 
 #endif /* CONFIG_UNEVICTABLE_LRU */
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5886586fde6c..cfbadad75d1d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -454,6 +454,7 @@ static inline void __free_one_page(struct page *page,
 
 static inline int free_pages_check(struct page *page)
 {
+	free_page_mlock(page);
 	if (unlikely(page_mapcount(page) |
 		(page->mapping != NULL)  |
 		(page_get_page_cgroup(page) != NULL) |
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 9e28abc0a0b9..9343227c5c60 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -689,6 +689,7 @@ static const char * const vmstat_text[] = {
 	"unevictable_pgs_munlocked",
 	"unevictable_pgs_cleared",
 	"unevictable_pgs_stranded",
+	"unevictable_pgs_mlockfreed",
 #endif
 #endif
 };
-- 
GitLab


From 902d2e8ae0de29f483840ba1134af27343b9564d Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Sat, 18 Oct 2008 20:26:54 -0700
Subject: [PATCH 713/895] vmscan: kill unused lru functions

Several LRU manupuration function are not used now.  So they can be
removed.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_inline.h | 48 ---------------------------------------
 1 file changed, 48 deletions(-)

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 67d7697fd019..c948350c378e 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -37,54 +37,6 @@ del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l)
 	__dec_zone_state(zone, NR_LRU_BASE + l);
 }
 
-static inline void
-add_page_to_inactive_anon_list(struct zone *zone, struct page *page)
-{
-	add_page_to_lru_list(zone, page, LRU_INACTIVE_ANON);
-}
-
-static inline void
-add_page_to_active_anon_list(struct zone *zone, struct page *page)
-{
-	add_page_to_lru_list(zone, page, LRU_ACTIVE_ANON);
-}
-
-static inline void
-add_page_to_inactive_file_list(struct zone *zone, struct page *page)
-{
-	add_page_to_lru_list(zone, page, LRU_INACTIVE_FILE);
-}
-
-static inline void
-add_page_to_active_file_list(struct zone *zone, struct page *page)
-{
-	add_page_to_lru_list(zone, page, LRU_ACTIVE_FILE);
-}
-
-static inline void
-del_page_from_inactive_anon_list(struct zone *zone, struct page *page)
-{
-	del_page_from_lru_list(zone, page, LRU_INACTIVE_ANON);
-}
-
-static inline void
-del_page_from_active_anon_list(struct zone *zone, struct page *page)
-{
-	del_page_from_lru_list(zone, page, LRU_ACTIVE_ANON);
-}
-
-static inline void
-del_page_from_inactive_file_list(struct zone *zone, struct page *page)
-{
-	del_page_from_lru_list(zone, page, LRU_INACTIVE_FILE);
-}
-
-static inline void
-del_page_from_active_file_list(struct zone *zone, struct page *page)
-{
-	del_page_from_lru_list(zone, page, LRU_INACTIVE_FILE);
-}
-
 static inline void
 del_page_from_lru(struct zone *zone, struct page *page)
 {
-- 
GitLab


From e0f79b8f1f3394bb344b7b83d6f121ac2af327de Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@saeurebad.de>
Date: Sat, 18 Oct 2008 20:26:55 -0700
Subject: [PATCH 714/895] vmscan: don't accumulate scan pressure on unrelated
 lists

During each reclaim scan we accumulate scan pressure on unrelated lists
which will result in bogus scans and unwanted reclaims eventually.

Scanning lists with few reclaim candidates results in a lot of rotation
and therefor also disturbs the list balancing, putting even more
pressure on the wrong lists.

In a test-case with much streaming IO, and therefor a crowded inactive
file page list, swapping started because

  a) anon pages were reclaimed after swap_cluster_max reclaim
  invocations -- nr_scan of this list has just accumulated

  b) active file pages were scanned because *their* nr_scan has also
  accumulated through the same logic.  And this in return created a
  lot of rotation for file pages and resulted in a decrease of file
  list priority, again increasing the pressure on anon pages.

The result was an evicted working set of anon pages while there were
tons of inactive file pages that should have been taken instead.

Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/vmscan.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index ca64e3e0c518..412d7872fc75 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1413,16 +1413,13 @@ static unsigned long shrink_zone(int priority, struct zone *zone,
 		if (scan_global_lru(sc)) {
 			int file = is_file_lru(l);
 			int scan;
-			/*
-			 * Add one to nr_to_scan just to make sure that the
-			 * kernel will slowly sift through each list.
-			 */
+
 			scan = zone_page_state(zone, NR_LRU_BASE + l);
 			if (priority) {
 				scan >>= priority;
 				scan = (scan * percent[file]) / 100;
 			}
-			zone->lru[l].nr_scan += scan + 1;
+			zone->lru[l].nr_scan += scan;
 			nr[l] = zone->lru[l].nr_scan;
 			if (nr[l] >= sc->swap_cluster_max)
 				zone->lru[l].nr_scan = 0;
-- 
GitLab


From c11d69d8c830e09a0e7b3935c952afb26c48bba8 Mon Sep 17 00:00:00 2001
From: Lee Schermerhorn <lee.schermerhorn@hp.com>
Date: Sat, 18 Oct 2008 20:26:56 -0700
Subject: [PATCH 715/895] mlock: revert mainline handling of mlock error return

This change is intended to make mlock() error returns correct.
make_page_present() is a lower level function used by more than mlock().
Subsequent patch[es] will add this error return fixup in an mlock specific
path.

Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory.c | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index 450127f4c582..b8487f8cd82e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2819,19 +2819,9 @@ int make_pages_present(unsigned long addr, unsigned long end)
 	len = DIV_ROUND_UP(end, PAGE_SIZE) - addr/PAGE_SIZE;
 	ret = get_user_pages(current, current->mm, addr,
 			len, write, 0, NULL, NULL);
-	if (ret < 0) {
-		/*
-		   SUS require strange return value to mlock
-		    - invalid addr generate to ENOMEM.
-		    - out of memory should generate EAGAIN.
-		*/
-		if (ret == -EFAULT)
-			ret = -ENOMEM;
-		else if (ret == -ENOMEM)
-			ret = -EAGAIN;
+	if (ret < 0)
 		return ret;
-	}
-	return ret == len ? 0 : -ENOMEM;
+	return ret == len ? 0 : -1;
 }
 
 #if !defined(__HAVE_ARCH_GATE_AREA)
-- 
GitLab


From 9978ad583e100945b74e4f33e73317983ea32df9 Mon Sep 17 00:00:00 2001
From: Lee Schermerhorn <lee.schermerhorn@hp.com>
Date: Sat, 18 Oct 2008 20:26:56 -0700
Subject: [PATCH 716/895] mlock: make mlock error return Posixly Correct

Rework Posix error return for mlock().

Posix requires error code for mlock*() system calls for some conditions
that differ from what kernel low level functions, such as
get_user_pages(), return for those conditions.  For more info, see:

http://marc.info/?l=linux-kernel&m=121750892930775&w=2

This patch provides the same translation of get_user_pages()
error codes to posix specified error codes in the context
of the mlock rework for unevictable lru.

[akpm@linux-foundation.org: fix build]
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory.c |  2 +-
 mm/mlock.c  | 33 +++++++++++++++++++++++++++------
 2 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index b8487f8cd82e..ba86b436b85f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2821,7 +2821,7 @@ int make_pages_present(unsigned long addr, unsigned long end)
 			len, write, 0, NULL, NULL);
 	if (ret < 0)
 		return ret;
-	return ret == len ? 0 : -1;
+	return ret == len ? 0 : -EFAULT;
 }
 
 #if !defined(__HAVE_ARCH_GATE_AREA)
diff --git a/mm/mlock.c b/mm/mlock.c
index bce1b22c36c2..008ea70b7afa 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -248,11 +248,24 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
 			addr += PAGE_SIZE;	/* for next get_user_pages() */
 			nr_pages--;
 		}
+		ret = 0;
 	}
 
 	lru_add_drain_all();	/* to update stats */
 
-	return 0;	/* count entire vma as locked_vm */
+	return ret;	/* count entire vma as locked_vm */
+}
+
+/*
+ * convert get_user_pages() return value to posix mlock() error
+ */
+static int __mlock_posix_error_return(long retval)
+{
+	if (retval == -EFAULT)
+		retval = -ENOMEM;
+	else if (retval == -ENOMEM)
+		retval = -EAGAIN;
+	return retval;
 }
 
 #else /* CONFIG_UNEVICTABLE_LRU */
@@ -265,9 +278,15 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
 				   int mlock)
 {
 	if (mlock && (vma->vm_flags & VM_LOCKED))
-		make_pages_present(start, end);
+		return make_pages_present(start, end);
+	return 0;
+}
+
+static inline int __mlock_posix_error_return(long retval)
+{
 	return 0;
 }
+
 #endif /* CONFIG_UNEVICTABLE_LRU */
 
 /**
@@ -434,10 +453,7 @@ success:
 		downgrade_write(&mm->mmap_sem);
 
 		ret = __mlock_vma_pages_range(vma, start, end, 1);
-		if (ret > 0) {
-			mm->locked_vm -= ret;
-			ret = 0;
-		}
+
 		/*
 		 * Need to reacquire mmap sem in write mode, as our callers
 		 * expect this.  We have no support for atomically upgrading
@@ -451,6 +467,11 @@ success:
 		/* non-NULL *prev must contain @start, but need to check @end */
 		if (!(*prev) || end > (*prev)->vm_end)
 			ret = -ENOMEM;
+		else if (ret > 0) {
+			mm->locked_vm -= ret;
+			ret = 0;
+		} else
+			ret = __mlock_posix_error_return(ret); /* translate if needed */
 	} else {
 		/*
 		 * TODO:  for unlocking, pages will already be resident, so
-- 
GitLab


From f45840b5c128445da70e7ec33adc47b4a12bdaf4 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Sat, 18 Oct 2008 20:26:57 -0700
Subject: [PATCH 717/895] mm: pagecache insertion fewer atomics

Setting and clearing the page locked when inserting it into swapcache /
pagecache when it has no other references can use non-atomic page flags
operations because no other CPU may be operating on it at this time.

This saves one atomic operation when inserting a page into pagecache.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagemap.h | 14 +++++++-------
 mm/swap_state.c         |  4 ++--
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 4b6c4d8d26b8..7334b2b6c4c6 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -299,14 +299,14 @@ extern int __lock_page_killable(struct page *page);
 extern void __lock_page_nosync(struct page *page);
 extern void unlock_page(struct page *page);
 
-static inline void set_page_locked(struct page *page)
+static inline void __set_page_locked(struct page *page)
 {
-	set_bit(PG_locked, &page->flags);
+	__set_bit(PG_locked, &page->flags);
 }
 
-static inline void clear_page_locked(struct page *page)
+static inline void __clear_page_locked(struct page *page)
 {
-	clear_bit(PG_locked, &page->flags);
+	__clear_bit(PG_locked, &page->flags);
 }
 
 static inline int trylock_page(struct page *page)
@@ -438,17 +438,17 @@ extern void __remove_from_page_cache(struct page *page);
 
 /*
  * Like add_to_page_cache_locked, but used to add newly allocated pages:
- * the page is new, so we can just run set_page_locked() against it.
+ * the page is new, so we can just run __set_page_locked() against it.
  */
 static inline int add_to_page_cache(struct page *page,
 		struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask)
 {
 	int error;
 
-	set_page_locked(page);
+	__set_page_locked(page);
 	error = add_to_page_cache_locked(page, mapping, offset, gfp_mask);
 	if (unlikely(error))
-		clear_page_locked(page);
+		__clear_page_locked(page);
 	return error;
 }
 
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 43cda7b4b808..3353c9029cef 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -303,7 +303,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 		 * re-using the just freed swap entry for an existing page.
 		 * May fail (-ENOMEM) if radix-tree node allocation failed.
 		 */
-		set_page_locked(new_page);
+		__set_page_locked(new_page);
 		SetPageSwapBacked(new_page);
 		err = add_to_swap_cache(new_page, entry, gfp_mask & GFP_KERNEL);
 		if (likely(!err)) {
@@ -315,7 +315,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 			return new_page;
 		}
 		ClearPageSwapBacked(new_page);
-		clear_page_locked(new_page);
+		__clear_page_locked(new_page);
 		swap_free(entry);
 	} while (err != -ENOMEM);
 
-- 
GitLab


From a978d6f521063514812a7094dbe5036e056e4de3 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Sat, 18 Oct 2008 20:26:58 -0700
Subject: [PATCH 718/895] mm: unlockless reclaim

unlock_page is fairly expensive.  It can be avoided in page reclaim
success path.  By definition if we have any other references to the page
it would be a bug anyway.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/vmscan.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 412d7872fc75..3b5860294bb6 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -732,7 +732,14 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		if (!mapping || !__remove_mapping(mapping, page))
 			goto keep_locked;
 
-		unlock_page(page);
+		/*
+		 * At this point, we have no other references and there is
+		 * no way to pick any more up (removed from LRU, removed
+		 * from pagecache). Can use non-atomic bitops now (and
+		 * we obviously don't have to worry about waking up a process
+		 * waiting on the page lock, because there are no references.
+		 */
+		__clear_page_locked(page);
 free_it:
 		nr_reclaimed++;
 		if (!pagevec_add(&freed_pvec, page)) {
-- 
GitLab


From 8413ac9d8c9a1366a4f57880723126cd24e5a5c3 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Sat, 18 Oct 2008 20:26:59 -0700
Subject: [PATCH 719/895] mm: page lock use lock bitops

trylock_page, unlock_page open and close a critical section. Hence,
we can use the lock bitops to get the desired memory ordering.

Also, mark trylock as likely to succeed (and remove the annotation from
callers).

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagemap.h |  2 +-
 mm/filemap.c            | 13 +++++--------
 mm/swapfile.c           |  2 +-
 3 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 7334b2b6c4c6..709742be02f0 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -311,7 +311,7 @@ static inline void __clear_page_locked(struct page *page)
 
 static inline int trylock_page(struct page *page)
 {
-	return !test_and_set_bit(PG_locked, &page->flags);
+	return (likely(!test_and_set_bit_lock(PG_locked, &page->flags)));
 }
 
 /*
diff --git a/mm/filemap.c b/mm/filemap.c
index a1ddd2557af2..e1b23fda48de 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -573,17 +573,14 @@ EXPORT_SYMBOL(wait_on_page_bit);
  * mechananism between PageLocked pages and PageWriteback pages is shared.
  * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep.
  *
- * The first mb is necessary to safely close the critical section opened by the
- * test_and_set_bit() to lock the page; the second mb is necessary to enforce
- * ordering between the clear_bit and the read of the waitqueue (to avoid SMP
- * races with a parallel wait_on_page_locked()).
+ * The mb is necessary to enforce ordering between the clear_bit and the read
+ * of the waitqueue (to avoid SMP races with a parallel wait_on_page_locked()).
  */
 void unlock_page(struct page *page)
 {
-	smp_mb__before_clear_bit();
-	if (!test_and_clear_bit(PG_locked, &page->flags))
-		BUG();
-	smp_mb__after_clear_bit(); 
+	VM_BUG_ON(!PageLocked(page));
+	clear_bit_unlock(PG_locked, &page->flags);
+	smp_mb__after_clear_bit();
 	wake_up_page(page, PG_locked);
 }
 EXPORT_SYMBOL(unlock_page);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 2a97fafa3d89..90cb67a5417c 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -422,7 +422,7 @@ void free_swap_and_cache(swp_entry_t entry)
 	if (p) {
 		if (swap_entry_free(p, swp_offset(entry)) == 1) {
 			page = find_get_page(&swapper_space, entry.val);
-			if (page && unlikely(!trylock_page(page))) {
+			if (page && !trylock_page(page)) {
 				page_cache_release(page);
 				page = NULL;
 			}
-- 
GitLab


From 51b07fc3c5c830bb49c80fc5eac041e1f66a72e7 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Sat, 18 Oct 2008 20:27:00 -0700
Subject: [PATCH 720/895] fs: buffer lock use lock bitops

trylock_buffer and unlock_buffer open and close a critical section.
Hence, we can use the lock bitops to get the desired memory ordering.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/buffer.c                 | 3 +--
 include/linux/buffer_head.h | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index ac78d4c19b3b..6569fda5cfed 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -76,8 +76,7 @@ EXPORT_SYMBOL(__lock_buffer);
 
 void unlock_buffer(struct buffer_head *bh)
 {
-	smp_mb__before_clear_bit();
-	clear_buffer_locked(bh);
+	clear_bit_unlock(BH_Lock, &bh->b_state);
 	smp_mb__after_clear_bit();
 	wake_up_bit(&bh->b_state, BH_Lock);
 }
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index eadaab44015f..3ce64b90118c 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -322,7 +322,7 @@ static inline void wait_on_buffer(struct buffer_head *bh)
 
 static inline int trylock_buffer(struct buffer_head *bh)
 {
-	return likely(!test_and_set_bit(BH_Lock, &bh->b_state));
+	return likely(!test_and_set_bit_lock(BH_Lock, &bh->b_state));
 }
 
 static inline void lock_buffer(struct buffer_head *bh)
-- 
GitLab


From cb8f488c33539f096580e202f5438a809195008f Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sat, 18 Oct 2008 20:27:01 -0700
Subject: [PATCH 721/895] mmap.c: deinline a few functions

__vma_link_file and expand_downwards functions are not small, yeat they
are marked inline.  They probably had one callsite sometime in the past,
but now they have more.  In order to prevent similar thing, I also
deinlined expand_upwards, despite it having only pne callsite.  Nowadays
gcc auto-inlines such static functions anyway.  In find_extend_vma, I
removed one extra level of indirection.

Patch is deliberately generated with -U $BIGNUM to make
it easier to see that functions are big.

Result:

# size */*/mmap.o */vmlinux
   text    data     bss     dec     hex filename
   9514     188      16    9718    25f6 0.org/mm/mmap.o
   9237     188      16    9441    24e1 deinline/mm/mmap.o
6124402  858996  389480 7372878  70804e 0.org/vmlinux
6124113  858996  389480 7372589  707f2d deinline/vmlinux

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/mmap.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mm/mmap.c b/mm/mmap.c
index 505a454f365e..74f4d158022e 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -410,7 +410,7 @@ void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
 	rb_insert_color(&vma->vm_rb, &mm->mm_rb);
 }
 
-static inline void __vma_link_file(struct vm_area_struct *vma)
+static void __vma_link_file(struct vm_area_struct *vma)
 {
 	struct file * file;
 
@@ -1591,7 +1591,7 @@ static int acct_stack_growth(struct vm_area_struct * vma, unsigned long size, un
  * vma is the last one with address > vma->vm_end.  Have to extend vma.
  */
 #ifndef CONFIG_IA64
-static inline
+static
 #endif
 int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 {
@@ -1641,7 +1641,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 /*
  * vma is the first one with address < vma->vm_start.  Have to extend vma.
  */
-static inline int expand_downwards(struct vm_area_struct *vma,
+static int expand_downwards(struct vm_area_struct *vma,
 				   unsigned long address)
 {
 	int error;
@@ -1703,7 +1703,7 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
 	vma = find_vma_prev(mm, addr, &prev);
 	if (vma && (vma->vm_start <= addr))
 		return vma;
-	if (!prev || expand_stack(prev, addr))
+	if (expand_stack(prev, addr))
 		return NULL;
 	if (prev->vm_flags & VM_LOCKED) {
 		if (mlock_vma_pages_range(prev, addr, prev->vm_end) < 0)
-- 
GitLab


From db64fe02258f1507e13fe5212a989922323685ce Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Sat, 18 Oct 2008 20:27:03 -0700
Subject: [PATCH 722/895] mm: rewrite vmap layer

Rewrite the vmap allocator to use rbtrees and lazy tlb flushing, and
provide a fast, scalable percpu frontend for small vmaps (requires a
slightly different API, though).

The biggest problem with vmap is actually vunmap.  Presently this requires
a global kernel TLB flush, which on most architectures is a broadcast IPI
to all CPUs to flush the cache.  This is all done under a global lock.  As
the number of CPUs increases, so will the number of vunmaps a scaled
workload will want to perform, and so will the cost of a global TLB flush.
 This gives terrible quadratic scalability characteristics.

Another problem is that the entire vmap subsystem works under a single
lock.  It is a rwlock, but it is actually taken for write in all the fast
paths, and the read locking would likely never be run concurrently anyway,
so it's just pointless.

This is a rewrite of vmap subsystem to solve those problems.  The existing
vmalloc API is implemented on top of the rewritten subsystem.

The TLB flushing problem is solved by using lazy TLB unmapping.  vmap
addresses do not have to be flushed immediately when they are vunmapped,
because the kernel will not reuse them again (would be a use-after-free)
until they are reallocated.  So the addresses aren't allocated again until
a subsequent TLB flush.  A single TLB flush then can flush multiple
vunmaps from each CPU.

XEN and PAT and such do not like deferred TLB flushing because they can't
always handle multiple aliasing virtual addresses to a physical address.
They now call vm_unmap_aliases() in order to flush any deferred mappings.
That call is very expensive (well, actually not a lot more expensive than
a single vunmap under the old scheme), however it should be OK if not
called too often.

The virtual memory extent information is stored in an rbtree rather than a
linked list to improve the algorithmic scalability.

There is a per-CPU allocator for small vmaps, which amortizes or avoids
global locking.

To use the per-CPU interface, the vm_map_ram / vm_unmap_ram interfaces
must be used in place of vmap and vunmap.  Vmalloc does not use these
interfaces at the moment, so it will not be quite so scalable (although it
will use lazy TLB flushing).

As a quick test of performance, I ran a test that loops in the kernel,
linearly mapping then touching then unmapping 4 pages.  Different numbers
of tests were run in parallel on an 4 core, 2 socket opteron.  Results are
in nanoseconds per map+touch+unmap.

threads           vanilla         vmap rewrite
1                 14700           2900
2                 33600           3000
4                 49500           2800
8                 70631           2900

So with a 8 cores, the rewritten version is already 25x faster.

In a slightly more realistic test (although with an older and less
scalable version of the patch), I ripped the not-very-good vunmap batching
code out of XFS, and implemented the large buffer mapping with vm_map_ram
and vm_unmap_ram...  along with a couple of other tricks, I was able to
speed up a large directory workload by 20x on a 64 CPU system.  I believe
vmap/vunmap is actually sped up a lot more than 20x on such a system, but
I'm running into other locks now.  vmap is pretty well blown off the
profiles.

Before:
1352059 total                                      0.1401
798784 _write_lock                              8320.6667 <- vmlist_lock
529313 default_idle                             1181.5022
 15242 smp_call_function                         15.8771  <- vmap tlb flushing
  2472 __get_vm_area_node                         1.9312  <- vmap
  1762 remove_vm_area                             4.5885  <- vunmap
   316 map_vm_area                                0.2297  <- vmap
   312 kfree                                      0.1950
   300 _spin_lock                                 3.1250
   252 sn_send_IPI_phys                           0.4375  <- tlb flushing
   238 vmap                                       0.8264  <- vmap
   216 find_lock_page                             0.5192
   196 find_next_bit                              0.3603
   136 sn2_send_IPI                               0.2024
   130 pio_phys_write_mmr                         2.0312
   118 unmap_kernel_range                         0.1229

After:
 78406 total                                      0.0081
 40053 default_idle                              89.4040
 33576 ia64_spinlock_contention                 349.7500
  1650 _spin_lock                                17.1875
   319 __reg_op                                   0.5538
   281 _atomic_dec_and_lock                       1.0977
   153 mutex_unlock                               1.5938
   123 iget_locked                                0.1671
   117 xfs_dir_lookup                             0.1662
   117 dput                                       0.1406
   114 xfs_iget_core                              0.0268
    92 xfs_da_hashname                            0.1917
    75 d_alloc                                    0.0670
    68 vmap_page_range                            0.0462 <- vmap
    58 kmem_cache_alloc                           0.0604
    57 memset                                     0.0540
    52 rb_next                                    0.1625
    50 __copy_user                                0.0208
    49 bitmap_find_free_region                    0.2188 <- vmap
    46 ia64_sn_udelay                             0.1106
    45 find_inode_fast                            0.1406
    42 memcmp                                     0.2188
    42 finish_task_switch                         0.1094
    42 __d_lookup                                 0.0410
    40 radix_tree_lookup_slot                     0.1250
    37 _spin_unlock_irqrestore                    0.3854
    36 xfs_bmapi                                  0.0050
    36 kmem_cache_free                            0.0256
    35 xfs_vn_getattr                             0.0322
    34 radix_tree_lookup                          0.1062
    33 __link_path_walk                           0.0035
    31 xfs_da_do_buf                              0.0091
    30 _xfs_buf_find                              0.0204
    28 find_get_page                              0.0875
    27 xfs_iread                                  0.0241
    27 __strncpy_from_user                        0.2812
    26 _xfs_buf_initialize                        0.0406
    24 _xfs_buf_lookup_pages                      0.0179
    24 vunmap_page_range                          0.0250 <- vunmap
    23 find_lock_page                             0.0799
    22 vm_map_ram                                 0.0087 <- vmap
    20 kfree                                      0.0125
    19 put_page                                   0.0330
    18 __kmalloc                                  0.0176
    17 xfs_da_node_lookup_int                     0.0086
    17 _read_lock                                 0.0885
    17 page_waitqueue                             0.0664

vmap has gone from being the top 5 on the profiles and flushing the crap
out of all TLBs, to using less than 1% of kernel time.

[akpm@linux-foundation.org: cleanups, section fix]
[akpm@linux-foundation.org: fix build on alpha]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/pageattr.c   |   2 +
 arch/x86/xen/enlighten.c |   1 +
 arch/x86/xen/mmu.c       |   1 +
 include/linux/vmalloc.h  |  15 +-
 init/main.c              |   2 +
 mm/vmalloc.c             | 975 +++++++++++++++++++++++++++++++++------
 6 files changed, 862 insertions(+), 134 deletions(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index a9ec89c3fbca..407d8784f669 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -792,6 +792,8 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
 	/* Must avoid aliasing mappings in the highmem code */
 	kmap_flush_unused();
 
+	vm_unmap_aliases();
+
 	cpa.vaddr = addr;
 	cpa.numpages = numpages;
 	cpa.mask_set = mask_set;
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 0013a729b41d..b61534c7a4c4 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -871,6 +871,7 @@ static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned l
 			/* make sure there are no stray mappings of
 			   this page */
 			kmap_flush_unused();
+			vm_unmap_aliases();
 	}
 }
 
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index ae173f6edd8b..d4d52f5a1cf7 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -846,6 +846,7 @@ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
 		/* re-enable interrupts for kmap_flush_unused */
 		xen_mc_issue(0);
 		kmap_flush_unused();
+		vm_unmap_aliases();
 		xen_mc_batch();
 	}
 
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 328eb4022727..4c28c4d564e2 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -2,6 +2,7 @@
 #define _LINUX_VMALLOC_H
 
 #include <linux/spinlock.h>
+#include <linux/init.h>
 #include <asm/page.h>		/* pgprot_t */
 
 struct vm_area_struct;		/* vma defining user mapping in mm_types.h */
@@ -23,7 +24,6 @@ struct vm_area_struct;		/* vma defining user mapping in mm_types.h */
 #endif
 
 struct vm_struct {
-	/* keep next,addr,size together to speedup lookups */
 	struct vm_struct	*next;
 	void			*addr;
 	unsigned long		size;
@@ -37,6 +37,19 @@ struct vm_struct {
 /*
  *	Highlevel APIs for driver use
  */
+extern void vm_unmap_ram(const void *mem, unsigned int count);
+extern void *vm_map_ram(struct page **pages, unsigned int count,
+				int node, pgprot_t prot);
+extern void vm_unmap_aliases(void);
+
+#ifdef CONFIG_MMU
+extern void __init vmalloc_init(void);
+#else
+static inline void vmalloc_init(void)
+{
+}
+#endif
+
 extern void *vmalloc(unsigned long size);
 extern void *vmalloc_user(unsigned long size);
 extern void *vmalloc_node(unsigned long size, int node);
diff --git a/init/main.c b/init/main.c
index 27f6bf6108e9..4371d11721f6 100644
--- a/init/main.c
+++ b/init/main.c
@@ -27,6 +27,7 @@
 #include <linux/gfp.h>
 #include <linux/percpu.h>
 #include <linux/kmod.h>
+#include <linux/vmalloc.h>
 #include <linux/kernel_stat.h>
 #include <linux/start_kernel.h>
 #include <linux/security.h>
@@ -642,6 +643,7 @@ asmlinkage void __init start_kernel(void)
 		initrd_start = 0;
 	}
 #endif
+	vmalloc_init();
 	vfs_caches_init_early();
 	cpuset_init_early();
 	mem_init();
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index bba06c41fc59..712ae47af0bf 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -8,6 +8,7 @@
  *  Numa awareness, Christoph Lameter, SGI, June 2005
  */
 
+#include <linux/vmalloc.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/highmem.h>
@@ -18,16 +19,17 @@
 #include <linux/debugobjects.h>
 #include <linux/vmalloc.h>
 #include <linux/kallsyms.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/radix-tree.h>
+#include <linux/rcupdate.h>
 
+#include <asm/atomic.h>
 #include <asm/uaccess.h>
 #include <asm/tlbflush.h>
 
 
-DEFINE_RWLOCK(vmlist_lock);
-struct vm_struct *vmlist;
-
-static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
-			    int node, void *caller);
+/*** Page table manipulation functions ***/
 
 static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
 {
@@ -40,8 +42,7 @@ static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 }
 
-static inline void vunmap_pmd_range(pud_t *pud, unsigned long addr,
-						unsigned long end)
+static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -55,8 +56,7 @@ static inline void vunmap_pmd_range(pud_t *pud, unsigned long addr,
 	} while (pmd++, addr = next, addr != end);
 }
 
-static inline void vunmap_pud_range(pgd_t *pgd, unsigned long addr,
-						unsigned long end)
+static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end)
 {
 	pud_t *pud;
 	unsigned long next;
@@ -70,12 +70,10 @@ static inline void vunmap_pud_range(pgd_t *pgd, unsigned long addr,
 	} while (pud++, addr = next, addr != end);
 }
 
-void unmap_kernel_range(unsigned long addr, unsigned long size)
+static void vunmap_page_range(unsigned long addr, unsigned long end)
 {
 	pgd_t *pgd;
 	unsigned long next;
-	unsigned long start = addr;
-	unsigned long end = addr + size;
 
 	BUG_ON(addr >= end);
 	pgd = pgd_offset_k(addr);
@@ -86,35 +84,36 @@ void unmap_kernel_range(unsigned long addr, unsigned long size)
 			continue;
 		vunmap_pud_range(pgd, addr, next);
 	} while (pgd++, addr = next, addr != end);
-	flush_tlb_kernel_range(start, end);
-}
-
-static void unmap_vm_area(struct vm_struct *area)
-{
-	unmap_kernel_range((unsigned long)area->addr, area->size);
 }
 
 static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
-			unsigned long end, pgprot_t prot, struct page ***pages)
+		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
 {
 	pte_t *pte;
 
+	/*
+	 * nr is a running index into the array which helps higher level
+	 * callers keep track of where we're up to.
+	 */
+
 	pte = pte_alloc_kernel(pmd, addr);
 	if (!pte)
 		return -ENOMEM;
 	do {
-		struct page *page = **pages;
-		WARN_ON(!pte_none(*pte));
-		if (!page)
+		struct page *page = pages[*nr];
+
+		if (WARN_ON(!pte_none(*pte)))
+			return -EBUSY;
+		if (WARN_ON(!page))
 			return -ENOMEM;
 		set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
-		(*pages)++;
+		(*nr)++;
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 	return 0;
 }
 
-static inline int vmap_pmd_range(pud_t *pud, unsigned long addr,
-			unsigned long end, pgprot_t prot, struct page ***pages)
+static int vmap_pmd_range(pud_t *pud, unsigned long addr,
+		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -124,14 +123,14 @@ static inline int vmap_pmd_range(pud_t *pud, unsigned long addr,
 		return -ENOMEM;
 	do {
 		next = pmd_addr_end(addr, end);
-		if (vmap_pte_range(pmd, addr, next, prot, pages))
+		if (vmap_pte_range(pmd, addr, next, prot, pages, nr))
 			return -ENOMEM;
 	} while (pmd++, addr = next, addr != end);
 	return 0;
 }
 
-static inline int vmap_pud_range(pgd_t *pgd, unsigned long addr,
-			unsigned long end, pgprot_t prot, struct page ***pages)
+static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
+		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
 {
 	pud_t *pud;
 	unsigned long next;
@@ -141,44 +140,49 @@ static inline int vmap_pud_range(pgd_t *pgd, unsigned long addr,
 		return -ENOMEM;
 	do {
 		next = pud_addr_end(addr, end);
-		if (vmap_pmd_range(pud, addr, next, prot, pages))
+		if (vmap_pmd_range(pud, addr, next, prot, pages, nr))
 			return -ENOMEM;
 	} while (pud++, addr = next, addr != end);
 	return 0;
 }
 
-int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
+/*
+ * Set up page tables in kva (addr, end). The ptes shall have prot "prot", and
+ * will have pfns corresponding to the "pages" array.
+ *
+ * Ie. pte at addr+N*PAGE_SIZE shall point to pfn corresponding to pages[N]
+ */
+static int vmap_page_range(unsigned long addr, unsigned long end,
+				pgprot_t prot, struct page **pages)
 {
 	pgd_t *pgd;
 	unsigned long next;
-	unsigned long addr = (unsigned long) area->addr;
-	unsigned long end = addr + area->size - PAGE_SIZE;
-	int err;
+	int err = 0;
+	int nr = 0;
 
 	BUG_ON(addr >= end);
 	pgd = pgd_offset_k(addr);
 	do {
 		next = pgd_addr_end(addr, end);
-		err = vmap_pud_range(pgd, addr, next, prot, pages);
+		err = vmap_pud_range(pgd, addr, next, prot, pages, &nr);
 		if (err)
 			break;
 	} while (pgd++, addr = next, addr != end);
-	flush_cache_vmap((unsigned long) area->addr, end);
-	return err;
+	flush_cache_vmap(addr, end);
+
+	if (unlikely(err))
+		return err;
+	return nr;
 }
-EXPORT_SYMBOL_GPL(map_vm_area);
 
 /*
- * Map a vmalloc()-space virtual address to the physical page.
+ * Walk a vmap address to the struct page it maps.
  */
 struct page *vmalloc_to_page(const void *vmalloc_addr)
 {
 	unsigned long addr = (unsigned long) vmalloc_addr;
 	struct page *page = NULL;
 	pgd_t *pgd = pgd_offset_k(addr);
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *ptep, pte;
 
 	/*
 	 * XXX we might need to change this if we add VIRTUAL_BUG_ON for
@@ -188,10 +192,12 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
 			!is_module_address(addr));
 
 	if (!pgd_none(*pgd)) {
-		pud = pud_offset(pgd, addr);
+		pud_t *pud = pud_offset(pgd, addr);
 		if (!pud_none(*pud)) {
-			pmd = pmd_offset(pud, addr);
+			pmd_t *pmd = pmd_offset(pud, addr);
 			if (!pmd_none(*pmd)) {
+				pte_t *ptep, pte;
+
 				ptep = pte_offset_map(pmd, addr);
 				pte = *ptep;
 				if (pte_present(pte))
@@ -213,13 +219,751 @@ unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
 }
 EXPORT_SYMBOL(vmalloc_to_pfn);
 
-static struct vm_struct *
-__get_vm_area_node(unsigned long size, unsigned long flags, unsigned long start,
-		unsigned long end, int node, gfp_t gfp_mask, void *caller)
+
+/*** Global kva allocator ***/
+
+#define VM_LAZY_FREE	0x01
+#define VM_LAZY_FREEING	0x02
+#define VM_VM_AREA	0x04
+
+struct vmap_area {
+	unsigned long va_start;
+	unsigned long va_end;
+	unsigned long flags;
+	struct rb_node rb_node;		/* address sorted rbtree */
+	struct list_head list;		/* address sorted list */
+	struct list_head purge_list;	/* "lazy purge" list */
+	void *private;
+	struct rcu_head rcu_head;
+};
+
+static DEFINE_SPINLOCK(vmap_area_lock);
+static struct rb_root vmap_area_root = RB_ROOT;
+static LIST_HEAD(vmap_area_list);
+
+static struct vmap_area *__find_vmap_area(unsigned long addr)
 {
-	struct vm_struct **p, *tmp, *area;
-	unsigned long align = 1;
+	struct rb_node *n = vmap_area_root.rb_node;
+
+	while (n) {
+		struct vmap_area *va;
+
+		va = rb_entry(n, struct vmap_area, rb_node);
+		if (addr < va->va_start)
+			n = n->rb_left;
+		else if (addr > va->va_start)
+			n = n->rb_right;
+		else
+			return va;
+	}
+
+	return NULL;
+}
+
+static void __insert_vmap_area(struct vmap_area *va)
+{
+	struct rb_node **p = &vmap_area_root.rb_node;
+	struct rb_node *parent = NULL;
+	struct rb_node *tmp;
+
+	while (*p) {
+		struct vmap_area *tmp;
+
+		parent = *p;
+		tmp = rb_entry(parent, struct vmap_area, rb_node);
+		if (va->va_start < tmp->va_end)
+			p = &(*p)->rb_left;
+		else if (va->va_end > tmp->va_start)
+			p = &(*p)->rb_right;
+		else
+			BUG();
+	}
+
+	rb_link_node(&va->rb_node, parent, p);
+	rb_insert_color(&va->rb_node, &vmap_area_root);
+
+	/* address-sort this list so it is usable like the vmlist */
+	tmp = rb_prev(&va->rb_node);
+	if (tmp) {
+		struct vmap_area *prev;
+		prev = rb_entry(tmp, struct vmap_area, rb_node);
+		list_add_rcu(&va->list, &prev->list);
+	} else
+		list_add_rcu(&va->list, &vmap_area_list);
+}
+
+static void purge_vmap_area_lazy(void);
+
+/*
+ * Allocate a region of KVA of the specified size and alignment, within the
+ * vstart and vend.
+ */
+static struct vmap_area *alloc_vmap_area(unsigned long size,
+				unsigned long align,
+				unsigned long vstart, unsigned long vend,
+				int node, gfp_t gfp_mask)
+{
+	struct vmap_area *va;
+	struct rb_node *n;
+	unsigned long addr;
+	int purged = 0;
+
+	BUG_ON(size & ~PAGE_MASK);
+
+	addr = ALIGN(vstart, align);
+
+	va = kmalloc_node(sizeof(struct vmap_area),
+			gfp_mask & GFP_RECLAIM_MASK, node);
+	if (unlikely(!va))
+		return ERR_PTR(-ENOMEM);
+
+retry:
+	spin_lock(&vmap_area_lock);
+	/* XXX: could have a last_hole cache */
+	n = vmap_area_root.rb_node;
+	if (n) {
+		struct vmap_area *first = NULL;
+
+		do {
+			struct vmap_area *tmp;
+			tmp = rb_entry(n, struct vmap_area, rb_node);
+			if (tmp->va_end >= addr) {
+				if (!first && tmp->va_start < addr + size)
+					first = tmp;
+				n = n->rb_left;
+			} else {
+				first = tmp;
+				n = n->rb_right;
+			}
+		} while (n);
+
+		if (!first)
+			goto found;
+
+		if (first->va_end < addr) {
+			n = rb_next(&first->rb_node);
+			if (n)
+				first = rb_entry(n, struct vmap_area, rb_node);
+			else
+				goto found;
+		}
+
+		while (addr + size >= first->va_start && addr + size <= vend) {
+			addr = ALIGN(first->va_end + PAGE_SIZE, align);
+
+			n = rb_next(&first->rb_node);
+			if (n)
+				first = rb_entry(n, struct vmap_area, rb_node);
+			else
+				goto found;
+		}
+	}
+found:
+	if (addr + size > vend) {
+		spin_unlock(&vmap_area_lock);
+		if (!purged) {
+			purge_vmap_area_lazy();
+			purged = 1;
+			goto retry;
+		}
+		if (printk_ratelimit())
+			printk(KERN_WARNING "vmap allocation failed: "
+				 "use vmalloc=<size> to increase size.\n");
+		return ERR_PTR(-EBUSY);
+	}
+
+	BUG_ON(addr & (align-1));
+
+	va->va_start = addr;
+	va->va_end = addr + size;
+	va->flags = 0;
+	__insert_vmap_area(va);
+	spin_unlock(&vmap_area_lock);
+
+	return va;
+}
+
+static void rcu_free_va(struct rcu_head *head)
+{
+	struct vmap_area *va = container_of(head, struct vmap_area, rcu_head);
+
+	kfree(va);
+}
+
+static void __free_vmap_area(struct vmap_area *va)
+{
+	BUG_ON(RB_EMPTY_NODE(&va->rb_node));
+	rb_erase(&va->rb_node, &vmap_area_root);
+	RB_CLEAR_NODE(&va->rb_node);
+	list_del_rcu(&va->list);
+
+	call_rcu(&va->rcu_head, rcu_free_va);
+}
+
+/*
+ * Free a region of KVA allocated by alloc_vmap_area
+ */
+static void free_vmap_area(struct vmap_area *va)
+{
+	spin_lock(&vmap_area_lock);
+	__free_vmap_area(va);
+	spin_unlock(&vmap_area_lock);
+}
+
+/*
+ * Clear the pagetable entries of a given vmap_area
+ */
+static void unmap_vmap_area(struct vmap_area *va)
+{
+	vunmap_page_range(va->va_start, va->va_end);
+}
+
+/*
+ * lazy_max_pages is the maximum amount of virtual address space we gather up
+ * before attempting to purge with a TLB flush.
+ *
+ * There is a tradeoff here: a larger number will cover more kernel page tables
+ * and take slightly longer to purge, but it will linearly reduce the number of
+ * global TLB flushes that must be performed. It would seem natural to scale
+ * this number up linearly with the number of CPUs (because vmapping activity
+ * could also scale linearly with the number of CPUs), however it is likely
+ * that in practice, workloads might be constrained in other ways that mean
+ * vmap activity will not scale linearly with CPUs. Also, I want to be
+ * conservative and not introduce a big latency on huge systems, so go with
+ * a less aggressive log scale. It will still be an improvement over the old
+ * code, and it will be simple to change the scale factor if we find that it
+ * becomes a problem on bigger systems.
+ */
+static unsigned long lazy_max_pages(void)
+{
+	unsigned int log;
+
+	log = fls(num_online_cpus());
+
+	return log * (32UL * 1024 * 1024 / PAGE_SIZE);
+}
+
+static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
+
+/*
+ * Purges all lazily-freed vmap areas.
+ *
+ * If sync is 0 then don't purge if there is already a purge in progress.
+ * If force_flush is 1, then flush kernel TLBs between *start and *end even
+ * if we found no lazy vmap areas to unmap (callers can use this to optimise
+ * their own TLB flushing).
+ * Returns with *start = min(*start, lowest purged address)
+ *              *end = max(*end, highest purged address)
+ */
+static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
+					int sync, int force_flush)
+{
+	static DEFINE_SPINLOCK(purge_lock);
+	LIST_HEAD(valist);
+	struct vmap_area *va;
+	int nr = 0;
+
+	/*
+	 * If sync is 0 but force_flush is 1, we'll go sync anyway but callers
+	 * should not expect such behaviour. This just simplifies locking for
+	 * the case that isn't actually used at the moment anyway.
+	 */
+	if (!sync && !force_flush) {
+		if (!spin_trylock(&purge_lock))
+			return;
+	} else
+		spin_lock(&purge_lock);
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(va, &vmap_area_list, list) {
+		if (va->flags & VM_LAZY_FREE) {
+			if (va->va_start < *start)
+				*start = va->va_start;
+			if (va->va_end > *end)
+				*end = va->va_end;
+			nr += (va->va_end - va->va_start) >> PAGE_SHIFT;
+			unmap_vmap_area(va);
+			list_add_tail(&va->purge_list, &valist);
+			va->flags |= VM_LAZY_FREEING;
+			va->flags &= ~VM_LAZY_FREE;
+		}
+	}
+	rcu_read_unlock();
+
+	if (nr) {
+		BUG_ON(nr > atomic_read(&vmap_lazy_nr));
+		atomic_sub(nr, &vmap_lazy_nr);
+	}
+
+	if (nr || force_flush)
+		flush_tlb_kernel_range(*start, *end);
+
+	if (nr) {
+		spin_lock(&vmap_area_lock);
+		list_for_each_entry(va, &valist, purge_list)
+			__free_vmap_area(va);
+		spin_unlock(&vmap_area_lock);
+	}
+	spin_unlock(&purge_lock);
+}
+
+/*
+ * Kick off a purge of the outstanding lazy areas.
+ */
+static void purge_vmap_area_lazy(void)
+{
+	unsigned long start = ULONG_MAX, end = 0;
+
+	__purge_vmap_area_lazy(&start, &end, 0, 0);
+}
+
+/*
+ * Free and unmap a vmap area
+ */
+static void free_unmap_vmap_area(struct vmap_area *va)
+{
+	va->flags |= VM_LAZY_FREE;
+	atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr);
+	if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages()))
+		purge_vmap_area_lazy();
+}
+
+static struct vmap_area *find_vmap_area(unsigned long addr)
+{
+	struct vmap_area *va;
+
+	spin_lock(&vmap_area_lock);
+	va = __find_vmap_area(addr);
+	spin_unlock(&vmap_area_lock);
+
+	return va;
+}
+
+static void free_unmap_vmap_area_addr(unsigned long addr)
+{
+	struct vmap_area *va;
+
+	va = find_vmap_area(addr);
+	BUG_ON(!va);
+	free_unmap_vmap_area(va);
+}
+
+
+/*** Per cpu kva allocator ***/
+
+/*
+ * vmap space is limited especially on 32 bit architectures. Ensure there is
+ * room for at least 16 percpu vmap blocks per CPU.
+ */
+/*
+ * If we had a constant VMALLOC_START and VMALLOC_END, we'd like to be able
+ * to #define VMALLOC_SPACE		(VMALLOC_END-VMALLOC_START). Guess
+ * instead (we just need a rough idea)
+ */
+#if BITS_PER_LONG == 32
+#define VMALLOC_SPACE		(128UL*1024*1024)
+#else
+#define VMALLOC_SPACE		(128UL*1024*1024*1024)
+#endif
+
+#define VMALLOC_PAGES		(VMALLOC_SPACE / PAGE_SIZE)
+#define VMAP_MAX_ALLOC		BITS_PER_LONG	/* 256K with 4K pages */
+#define VMAP_BBMAP_BITS_MAX	1024	/* 4MB with 4K pages */
+#define VMAP_BBMAP_BITS_MIN	(VMAP_MAX_ALLOC*2)
+#define VMAP_MIN(x, y)		((x) < (y) ? (x) : (y)) /* can't use min() */
+#define VMAP_MAX(x, y)		((x) > (y) ? (x) : (y)) /* can't use max() */
+#define VMAP_BBMAP_BITS		VMAP_MIN(VMAP_BBMAP_BITS_MAX,		\
+					VMAP_MAX(VMAP_BBMAP_BITS_MIN,	\
+						VMALLOC_PAGES / NR_CPUS / 16))
+
+#define VMAP_BLOCK_SIZE		(VMAP_BBMAP_BITS * PAGE_SIZE)
+
+struct vmap_block_queue {
+	spinlock_t lock;
+	struct list_head free;
+	struct list_head dirty;
+	unsigned int nr_dirty;
+};
+
+struct vmap_block {
+	spinlock_t lock;
+	struct vmap_area *va;
+	struct vmap_block_queue *vbq;
+	unsigned long free, dirty;
+	DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS);
+	DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
+	union {
+		struct {
+			struct list_head free_list;
+			struct list_head dirty_list;
+		};
+		struct rcu_head rcu_head;
+	};
+};
+
+/* Queue of free and dirty vmap blocks, for allocation and flushing purposes */
+static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
+
+/*
+ * Radix tree of vmap blocks, indexed by address, to quickly find a vmap block
+ * in the free path. Could get rid of this if we change the API to return a
+ * "cookie" from alloc, to be passed to free. But no big deal yet.
+ */
+static DEFINE_SPINLOCK(vmap_block_tree_lock);
+static RADIX_TREE(vmap_block_tree, GFP_ATOMIC);
+
+/*
+ * We should probably have a fallback mechanism to allocate virtual memory
+ * out of partially filled vmap blocks. However vmap block sizing should be
+ * fairly reasonable according to the vmalloc size, so it shouldn't be a
+ * big problem.
+ */
+
+static unsigned long addr_to_vb_idx(unsigned long addr)
+{
+	addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1);
+	addr /= VMAP_BLOCK_SIZE;
+	return addr;
+}
+
+static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
+{
+	struct vmap_block_queue *vbq;
+	struct vmap_block *vb;
+	struct vmap_area *va;
+	unsigned long vb_idx;
+	int node, err;
+
+	node = numa_node_id();
+
+	vb = kmalloc_node(sizeof(struct vmap_block),
+			gfp_mask & GFP_RECLAIM_MASK, node);
+	if (unlikely(!vb))
+		return ERR_PTR(-ENOMEM);
+
+	va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE,
+					VMALLOC_START, VMALLOC_END,
+					node, gfp_mask);
+	if (unlikely(IS_ERR(va))) {
+		kfree(vb);
+		return ERR_PTR(PTR_ERR(va));
+	}
+
+	err = radix_tree_preload(gfp_mask);
+	if (unlikely(err)) {
+		kfree(vb);
+		free_vmap_area(va);
+		return ERR_PTR(err);
+	}
+
+	spin_lock_init(&vb->lock);
+	vb->va = va;
+	vb->free = VMAP_BBMAP_BITS;
+	vb->dirty = 0;
+	bitmap_zero(vb->alloc_map, VMAP_BBMAP_BITS);
+	bitmap_zero(vb->dirty_map, VMAP_BBMAP_BITS);
+	INIT_LIST_HEAD(&vb->free_list);
+	INIT_LIST_HEAD(&vb->dirty_list);
+
+	vb_idx = addr_to_vb_idx(va->va_start);
+	spin_lock(&vmap_block_tree_lock);
+	err = radix_tree_insert(&vmap_block_tree, vb_idx, vb);
+	spin_unlock(&vmap_block_tree_lock);
+	BUG_ON(err);
+	radix_tree_preload_end();
+
+	vbq = &get_cpu_var(vmap_block_queue);
+	vb->vbq = vbq;
+	spin_lock(&vbq->lock);
+	list_add(&vb->free_list, &vbq->free);
+	spin_unlock(&vbq->lock);
+	put_cpu_var(vmap_cpu_blocks);
+
+	return vb;
+}
+
+static void rcu_free_vb(struct rcu_head *head)
+{
+	struct vmap_block *vb = container_of(head, struct vmap_block, rcu_head);
+
+	kfree(vb);
+}
+
+static void free_vmap_block(struct vmap_block *vb)
+{
+	struct vmap_block *tmp;
+	unsigned long vb_idx;
+
+	spin_lock(&vb->vbq->lock);
+	if (!list_empty(&vb->free_list))
+		list_del(&vb->free_list);
+	if (!list_empty(&vb->dirty_list))
+		list_del(&vb->dirty_list);
+	spin_unlock(&vb->vbq->lock);
+
+	vb_idx = addr_to_vb_idx(vb->va->va_start);
+	spin_lock(&vmap_block_tree_lock);
+	tmp = radix_tree_delete(&vmap_block_tree, vb_idx);
+	spin_unlock(&vmap_block_tree_lock);
+	BUG_ON(tmp != vb);
+
+	free_unmap_vmap_area(vb->va);
+	call_rcu(&vb->rcu_head, rcu_free_vb);
+}
+
+static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
+{
+	struct vmap_block_queue *vbq;
+	struct vmap_block *vb;
+	unsigned long addr = 0;
+	unsigned int order;
+
+	BUG_ON(size & ~PAGE_MASK);
+	BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
+	order = get_order(size);
+
+again:
+	rcu_read_lock();
+	vbq = &get_cpu_var(vmap_block_queue);
+	list_for_each_entry_rcu(vb, &vbq->free, free_list) {
+		int i;
+
+		spin_lock(&vb->lock);
+		i = bitmap_find_free_region(vb->alloc_map,
+						VMAP_BBMAP_BITS, order);
+
+		if (i >= 0) {
+			addr = vb->va->va_start + (i << PAGE_SHIFT);
+			BUG_ON(addr_to_vb_idx(addr) !=
+					addr_to_vb_idx(vb->va->va_start));
+			vb->free -= 1UL << order;
+			if (vb->free == 0) {
+				spin_lock(&vbq->lock);
+				list_del_init(&vb->free_list);
+				spin_unlock(&vbq->lock);
+			}
+			spin_unlock(&vb->lock);
+			break;
+		}
+		spin_unlock(&vb->lock);
+	}
+	put_cpu_var(vmap_cpu_blocks);
+	rcu_read_unlock();
+
+	if (!addr) {
+		vb = new_vmap_block(gfp_mask);
+		if (IS_ERR(vb))
+			return vb;
+		goto again;
+	}
+
+	return (void *)addr;
+}
+
+static void vb_free(const void *addr, unsigned long size)
+{
+	unsigned long offset;
+	unsigned long vb_idx;
+	unsigned int order;
+	struct vmap_block *vb;
+
+	BUG_ON(size & ~PAGE_MASK);
+	BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
+	order = get_order(size);
+
+	offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1);
+
+	vb_idx = addr_to_vb_idx((unsigned long)addr);
+	rcu_read_lock();
+	vb = radix_tree_lookup(&vmap_block_tree, vb_idx);
+	rcu_read_unlock();
+	BUG_ON(!vb);
+
+	spin_lock(&vb->lock);
+	bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order);
+	if (!vb->dirty) {
+		spin_lock(&vb->vbq->lock);
+		list_add(&vb->dirty_list, &vb->vbq->dirty);
+		spin_unlock(&vb->vbq->lock);
+	}
+	vb->dirty += 1UL << order;
+	if (vb->dirty == VMAP_BBMAP_BITS) {
+		BUG_ON(vb->free || !list_empty(&vb->free_list));
+		spin_unlock(&vb->lock);
+		free_vmap_block(vb);
+	} else
+		spin_unlock(&vb->lock);
+}
+
+/**
+ * vm_unmap_aliases - unmap outstanding lazy aliases in the vmap layer
+ *
+ * The vmap/vmalloc layer lazily flushes kernel virtual mappings primarily
+ * to amortize TLB flushing overheads. What this means is that any page you
+ * have now, may, in a former life, have been mapped into kernel virtual
+ * address by the vmap layer and so there might be some CPUs with TLB entries
+ * still referencing that page (additional to the regular 1:1 kernel mapping).
+ *
+ * vm_unmap_aliases flushes all such lazy mappings. After it returns, we can
+ * be sure that none of the pages we have control over will have any aliases
+ * from the vmap layer.
+ */
+void vm_unmap_aliases(void)
+{
+	unsigned long start = ULONG_MAX, end = 0;
+	int cpu;
+	int flush = 0;
+
+	for_each_possible_cpu(cpu) {
+		struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
+		struct vmap_block *vb;
+
+		rcu_read_lock();
+		list_for_each_entry_rcu(vb, &vbq->free, free_list) {
+			int i;
+
+			spin_lock(&vb->lock);
+			i = find_first_bit(vb->dirty_map, VMAP_BBMAP_BITS);
+			while (i < VMAP_BBMAP_BITS) {
+				unsigned long s, e;
+				int j;
+				j = find_next_zero_bit(vb->dirty_map,
+					VMAP_BBMAP_BITS, i);
+
+				s = vb->va->va_start + (i << PAGE_SHIFT);
+				e = vb->va->va_start + (j << PAGE_SHIFT);
+				vunmap_page_range(s, e);
+				flush = 1;
+
+				if (s < start)
+					start = s;
+				if (e > end)
+					end = e;
+
+				i = j;
+				i = find_next_bit(vb->dirty_map,
+							VMAP_BBMAP_BITS, i);
+			}
+			spin_unlock(&vb->lock);
+		}
+		rcu_read_unlock();
+	}
+
+	__purge_vmap_area_lazy(&start, &end, 1, flush);
+}
+EXPORT_SYMBOL_GPL(vm_unmap_aliases);
+
+/**
+ * vm_unmap_ram - unmap linear kernel address space set up by vm_map_ram
+ * @mem: the pointer returned by vm_map_ram
+ * @count: the count passed to that vm_map_ram call (cannot unmap partial)
+ */
+void vm_unmap_ram(const void *mem, unsigned int count)
+{
+	unsigned long size = count << PAGE_SHIFT;
+	unsigned long addr = (unsigned long)mem;
+
+	BUG_ON(!addr);
+	BUG_ON(addr < VMALLOC_START);
+	BUG_ON(addr > VMALLOC_END);
+	BUG_ON(addr & (PAGE_SIZE-1));
+
+	debug_check_no_locks_freed(mem, size);
+
+	if (likely(count <= VMAP_MAX_ALLOC))
+		vb_free(mem, size);
+	else
+		free_unmap_vmap_area_addr(addr);
+}
+EXPORT_SYMBOL(vm_unmap_ram);
+
+/**
+ * vm_map_ram - map pages linearly into kernel virtual address (vmalloc space)
+ * @pages: an array of pointers to the pages to be mapped
+ * @count: number of pages
+ * @node: prefer to allocate data structures on this node
+ * @prot: memory protection to use. PAGE_KERNEL for regular RAM
+ * @returns: a pointer to the address that has been mapped, or NULL on failure
+ */
+void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot)
+{
+	unsigned long size = count << PAGE_SHIFT;
 	unsigned long addr;
+	void *mem;
+
+	if (likely(count <= VMAP_MAX_ALLOC)) {
+		mem = vb_alloc(size, GFP_KERNEL);
+		if (IS_ERR(mem))
+			return NULL;
+		addr = (unsigned long)mem;
+	} else {
+		struct vmap_area *va;
+		va = alloc_vmap_area(size, PAGE_SIZE,
+				VMALLOC_START, VMALLOC_END, node, GFP_KERNEL);
+		if (IS_ERR(va))
+			return NULL;
+
+		addr = va->va_start;
+		mem = (void *)addr;
+	}
+	if (vmap_page_range(addr, addr + size, prot, pages) < 0) {
+		vm_unmap_ram(mem, count);
+		return NULL;
+	}
+	return mem;
+}
+EXPORT_SYMBOL(vm_map_ram);
+
+void __init vmalloc_init(void)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		struct vmap_block_queue *vbq;
+
+		vbq = &per_cpu(vmap_block_queue, i);
+		spin_lock_init(&vbq->lock);
+		INIT_LIST_HEAD(&vbq->free);
+		INIT_LIST_HEAD(&vbq->dirty);
+		vbq->nr_dirty = 0;
+	}
+}
+
+void unmap_kernel_range(unsigned long addr, unsigned long size)
+{
+	unsigned long end = addr + size;
+	vunmap_page_range(addr, end);
+	flush_tlb_kernel_range(addr, end);
+}
+
+int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
+{
+	unsigned long addr = (unsigned long)area->addr;
+	unsigned long end = addr + area->size - PAGE_SIZE;
+	int err;
+
+	err = vmap_page_range(addr, end, prot, *pages);
+	if (err > 0) {
+		*pages += err;
+		err = 0;
+	}
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(map_vm_area);
+
+/*** Old vmalloc interfaces ***/
+DEFINE_RWLOCK(vmlist_lock);
+struct vm_struct *vmlist;
+
+static struct vm_struct *__get_vm_area_node(unsigned long size,
+		unsigned long flags, unsigned long start, unsigned long end,
+		int node, gfp_t gfp_mask, void *caller)
+{
+	static struct vmap_area *va;
+	struct vm_struct *area;
+	struct vm_struct *tmp, **p;
+	unsigned long align = 1;
 
 	BUG_ON(in_interrupt());
 	if (flags & VM_IOREMAP) {
@@ -232,13 +976,12 @@ __get_vm_area_node(unsigned long size, unsigned long flags, unsigned long start,
 
 		align = 1ul << bit;
 	}
-	addr = ALIGN(start, align);
+
 	size = PAGE_ALIGN(size);
 	if (unlikely(!size))
 		return NULL;
 
 	area = kmalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
-
 	if (unlikely(!area))
 		return NULL;
 
@@ -247,48 +990,32 @@ __get_vm_area_node(unsigned long size, unsigned long flags, unsigned long start,
 	 */
 	size += PAGE_SIZE;
 
-	write_lock(&vmlist_lock);
-	for (p = &vmlist; (tmp = *p) != NULL ;p = &tmp->next) {
-		if ((unsigned long)tmp->addr < addr) {
-			if((unsigned long)tmp->addr + tmp->size >= addr)
-				addr = ALIGN(tmp->size + 
-					     (unsigned long)tmp->addr, align);
-			continue;
-		}
-		if ((size + addr) < addr)
-			goto out;
-		if (size + addr <= (unsigned long)tmp->addr)
-			goto found;
-		addr = ALIGN(tmp->size + (unsigned long)tmp->addr, align);
-		if (addr > end - size)
-			goto out;
+	va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
+	if (IS_ERR(va)) {
+		kfree(area);
+		return NULL;
 	}
-	if ((size + addr) < addr)
-		goto out;
-	if (addr > end - size)
-		goto out;
-
-found:
-	area->next = *p;
-	*p = area;
 
 	area->flags = flags;
-	area->addr = (void *)addr;
+	area->addr = (void *)va->va_start;
 	area->size = size;
 	area->pages = NULL;
 	area->nr_pages = 0;
 	area->phys_addr = 0;
 	area->caller = caller;
+	va->private = area;
+	va->flags |= VM_VM_AREA;
+
+	write_lock(&vmlist_lock);
+	for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
+		if (tmp->addr >= area->addr)
+			break;
+	}
+	area->next = *p;
+	*p = area;
 	write_unlock(&vmlist_lock);
 
 	return area;
-
-out:
-	write_unlock(&vmlist_lock);
-	kfree(area);
-	if (printk_ratelimit())
-		printk(KERN_WARNING "allocation failed: out of vmalloc space - use vmalloc=<size> to increase size.\n");
-	return NULL;
 }
 
 struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
@@ -328,39 +1055,15 @@ struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags,
 				  gfp_mask, __builtin_return_address(0));
 }
 
-/* Caller must hold vmlist_lock */
-static struct vm_struct *__find_vm_area(const void *addr)
+static struct vm_struct *find_vm_area(const void *addr)
 {
-	struct vm_struct *tmp;
+	struct vmap_area *va;
 
-	for (tmp = vmlist; tmp != NULL; tmp = tmp->next) {
-		 if (tmp->addr == addr)
-			break;
-	}
-
-	return tmp;
-}
-
-/* Caller must hold vmlist_lock */
-static struct vm_struct *__remove_vm_area(const void *addr)
-{
-	struct vm_struct **p, *tmp;
+	va = find_vmap_area((unsigned long)addr);
+	if (va && va->flags & VM_VM_AREA)
+		return va->private;
 
-	for (p = &vmlist ; (tmp = *p) != NULL ;p = &tmp->next) {
-		 if (tmp->addr == addr)
-			 goto found;
-	}
 	return NULL;
-
-found:
-	unmap_vm_area(tmp);
-	*p = tmp->next;
-
-	/*
-	 * Remove the guard page.
-	 */
-	tmp->size -= PAGE_SIZE;
-	return tmp;
 }
 
 /**
@@ -373,11 +1076,24 @@ found:
  */
 struct vm_struct *remove_vm_area(const void *addr)
 {
-	struct vm_struct *v;
-	write_lock(&vmlist_lock);
-	v = __remove_vm_area(addr);
-	write_unlock(&vmlist_lock);
-	return v;
+	struct vmap_area *va;
+
+	va = find_vmap_area((unsigned long)addr);
+	if (va && va->flags & VM_VM_AREA) {
+		struct vm_struct *vm = va->private;
+		struct vm_struct *tmp, **p;
+		free_unmap_vmap_area(va);
+		vm->size -= PAGE_SIZE;
+
+		write_lock(&vmlist_lock);
+		for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next)
+			;
+		*p = tmp->next;
+		write_unlock(&vmlist_lock);
+
+		return vm;
+	}
+	return NULL;
 }
 
 static void __vunmap(const void *addr, int deallocate_pages)
@@ -487,6 +1203,8 @@ void *vmap(struct page **pages, unsigned int count,
 }
 EXPORT_SYMBOL(vmap);
 
+static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
+			    int node, void *caller);
 static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 				 pgprot_t prot, int node, void *caller)
 {
@@ -613,10 +1331,8 @@ void *vmalloc_user(unsigned long size)
 
 	ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL);
 	if (ret) {
-		write_lock(&vmlist_lock);
-		area = __find_vm_area(ret);
+		area = find_vm_area(ret);
 		area->flags |= VM_USERMAP;
-		write_unlock(&vmlist_lock);
 	}
 	return ret;
 }
@@ -696,10 +1412,8 @@ void *vmalloc_32_user(unsigned long size)
 
 	ret = __vmalloc(size, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL);
 	if (ret) {
-		write_lock(&vmlist_lock);
-		area = __find_vm_area(ret);
+		area = find_vm_area(ret);
 		area->flags |= VM_USERMAP;
-		write_unlock(&vmlist_lock);
 	}
 	return ret;
 }
@@ -800,26 +1514,25 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
 	struct vm_struct *area;
 	unsigned long uaddr = vma->vm_start;
 	unsigned long usize = vma->vm_end - vma->vm_start;
-	int ret;
 
 	if ((PAGE_SIZE-1) & (unsigned long)addr)
 		return -EINVAL;
 
-	read_lock(&vmlist_lock);
-	area = __find_vm_area(addr);
+	area = find_vm_area(addr);
 	if (!area)
-		goto out_einval_locked;
+		return -EINVAL;
 
 	if (!(area->flags & VM_USERMAP))
-		goto out_einval_locked;
+		return -EINVAL;
 
 	if (usize + (pgoff << PAGE_SHIFT) > area->size - PAGE_SIZE)
-		goto out_einval_locked;
-	read_unlock(&vmlist_lock);
+		return -EINVAL;
 
 	addr += pgoff << PAGE_SHIFT;
 	do {
 		struct page *page = vmalloc_to_page(addr);
+		int ret;
+
 		ret = vm_insert_page(vma, uaddr, page);
 		if (ret)
 			return ret;
@@ -832,11 +1545,7 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
 	/* Prevent "things" like memory migration? VM_flags need a cleanup... */
 	vma->vm_flags |= VM_RESERVED;
 
-	return ret;
-
-out_einval_locked:
-	read_unlock(&vmlist_lock);
-	return -EINVAL;
+	return 0;
 }
 EXPORT_SYMBOL(remap_vmalloc_range);
 
-- 
GitLab


From 2a4b3ded5c76fbe373d6415b1b3ad4841f15c9bd Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Sat, 18 Oct 2008 20:27:06 -0700
Subject: [PATCH 723/895] mm: hugetlb.c make functions static, use NULL rather
 than 0

mm/hugetlb.c:265:17: warning: symbol 'resv_map_alloc' was not declared. Should it be static?
mm/hugetlb.c:277:6: warning: symbol 'resv_map_release' was not declared. Should it be static?
mm/hugetlb.c:292:9: warning: Using plain integer as NULL pointer
mm/hugetlb.c:1750:5: warning: symbol 'unmap_ref_private' was not declared. Should it be static?

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Acked-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 2fc7fddd9b1f..ab79cd4dd23c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -262,7 +262,7 @@ struct resv_map {
 	struct list_head regions;
 };
 
-struct resv_map *resv_map_alloc(void)
+static struct resv_map *resv_map_alloc(void)
 {
 	struct resv_map *resv_map = kmalloc(sizeof(*resv_map), GFP_KERNEL);
 	if (!resv_map)
@@ -274,7 +274,7 @@ struct resv_map *resv_map_alloc(void)
 	return resv_map;
 }
 
-void resv_map_release(struct kref *ref)
+static void resv_map_release(struct kref *ref)
 {
 	struct resv_map *resv_map = container_of(ref, struct resv_map, refs);
 
@@ -289,7 +289,7 @@ static struct resv_map *vma_resv_map(struct vm_area_struct *vma)
 	if (!(vma->vm_flags & VM_SHARED))
 		return (struct resv_map *)(get_vma_private_data(vma) &
 							~HPAGE_RESV_MASK);
-	return 0;
+	return NULL;
 }
 
 static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map)
@@ -1747,10 +1747,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
  * from other VMAs and let the children be SIGKILLed if they are faulting the
  * same region.
  */
-int unmap_ref_private(struct mm_struct *mm,
-					struct vm_area_struct *vma,
-					struct page *page,
-					unsigned long address)
+static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
+				struct page *page, unsigned long address)
 {
 	struct vm_area_struct *iter_vma;
 	struct address_space *mapping;
-- 
GitLab


From d903ef9f38813e7eb268744a7e579e92f411c83a Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sat, 18 Oct 2008 20:27:06 -0700
Subject: [PATCH 724/895] mm: print out meminit for memmap

Improve debuggability of memory setup problems.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index cfbadad75d1d..8f155e9e43db 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3457,8 +3457,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
 			PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT;
 		if (realsize >= memmap_pages) {
 			realsize -= memmap_pages;
-			mminit_dprintk(MMINIT_TRACE, "memmap_init",
-				"%s zone: %lu pages used for memmap\n",
+			printk(KERN_DEBUG
+				"  %s zone: %lu pages used for memmap\n",
 				zone_names[j], memmap_pages);
 		} else
 			printk(KERN_WARNING
@@ -3468,8 +3468,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
 		/* Account for reserved pages */
 		if (j == 0 && realsize > dma_reserve) {
 			realsize -= dma_reserve;
-			mminit_dprintk(MMINIT_TRACE, "memmap_init",
-					"%s zone: %lu pages reserved\n",
+			printk(KERN_DEBUG "  %s zone: %lu pages reserved\n",
 					zone_names[0], dma_reserve);
 		}
 
-- 
GitLab


From e575f111dc0f27044e170580e7de50985ab3e011 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Sat, 18 Oct 2008 20:27:08 -0700
Subject: [PATCH 725/895] coredump_filter: add hugepage dumping

Presently hugepage's vma has a VM_RESERVED flag in order not to be
swapped.  But a VM_RESERVED vma isn't core dumped because this flag is
often used for some kernel vmas (e.g.  vmalloc, sound related).

Thus hugepages are never dumped and it can't be debugged easily.  Many
developers want hugepages to be included into core-dump.

However, We can't read generic VM_RESERVED area because this area is often
IO mapping area.  then these area reading may change device state.  it is
definitly undesiable side-effect.

So adding a hugepage specific bit to the coredump filter is better.  It
will be able to hugepage core dumping and doesn't cause any side-effect to
any i/o devices.

In additional, libhugetlb use hugetlb private mapping pages as anonymous
page.  Then, hugepage private mapping pages should be core dumped by
default.

Then, /proc/[pid]/core_dump_filter has two new bits.

 - bit 5 mean hugetlb private mapping pages are dumped or not. (default: yes)
 - bit 6 mean hugetlb shared mapping pages are dumped or not.  (default: no)

I tested by following method.

% ulimit -c unlimited
% ./crash_hugepage  50
% ./crash_hugepage  50  -p
% ls -lh
% gdb ./crash_hugepage core
%
% echo 0x43 > /proc/self/coredump_filter
% ./crash_hugepage  50
% ./crash_hugepage  50  -p
% ls -lh
% gdb ./crash_hugepage core

#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/mman.h>
#include <string.h>

#include "hugetlbfs.h"

int main(int argc, char** argv){
	char* p;
	int ch;
	int mmap_flags = MAP_SHARED;
	int fd;
	int nr_pages;

	while((ch = getopt(argc, argv, "p")) != -1) {
		switch (ch) {
		case 'p':
			mmap_flags &= ~MAP_SHARED;
			mmap_flags |= MAP_PRIVATE;
			break;
		default:
			/* nothing*/
			break;
		}
	}
	argc -= optind;
	argv += optind;

	if (argc == 0){
		printf("need # of pages\n");
		exit(1);
	}

	nr_pages = atoi(argv[0]);
	if (nr_pages < 2) {
		printf("nr_pages must >2\n");
		exit(1);
	}

	fd = hugetlbfs_unlinked_fd();
	p = mmap(NULL, nr_pages * gethugepagesize(),
		 PROT_READ|PROT_WRITE, mmap_flags, fd, 0);

	sleep(2);

	*(p + gethugepagesize()) = 1; /* COW */
	sleep(2);

	/* crash! */
	*(int*)0 = 1;

	return 0;
}

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: Kawai Hidehiro <hidehiro.kawai.ez@hitachi.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: William Irwin <wli@holomorphy.com>
Cc: Adam Litke <agl@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/proc.txt | 15 ++++++++++-----
 fs/binfmt_elf.c                    | 12 ++++++++++--
 include/linux/sched.h              |  7 +++++--
 3 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index c032bf39e8b9..02cb7faeed6b 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -2412,24 +2412,29 @@ will be dumped when the <pid> process is dumped. coredump_filter is a bitmask
 of memory types. If a bit of the bitmask is set, memory segments of the
 corresponding memory type are dumped, otherwise they are not dumped.
 
-The following 4 memory types are supported:
+The following 7 memory types are supported:
   - (bit 0) anonymous private memory
   - (bit 1) anonymous shared memory
   - (bit 2) file-backed private memory
   - (bit 3) file-backed shared memory
   - (bit 4) ELF header pages in file-backed private memory areas (it is
             effective only if the bit 2 is cleared)
+  - (bit 5) hugetlb private memory
+  - (bit 6) hugetlb shared memory
 
   Note that MMIO pages such as frame buffer are never dumped and vDSO pages
   are always dumped regardless of the bitmask status.
 
-Default value of coredump_filter is 0x3; this means all anonymous memory
-segments are dumped.
+  Note bit 0-4 doesn't effect any hugetlb memory. hugetlb memory are only
+  effected by bit 5-6.
+
+Default value of coredump_filter is 0x23; this means all anonymous memory
+segments and hugetlb private memory are dumped.
 
 If you don't want to dump all shared memory segments attached to pid 1234,
-write 1 to the process's proc file.
+write 0x21 to the process's proc file.
 
-  $ echo 0x1 > /proc/1234/coredump_filter
+  $ echo 0x21 > /proc/1234/coredump_filter
 
 When a new process is created, the process inherits the bitmask status from its
 parent. It is useful to set up coredump_filter before the program runs.
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index c76afa26edf7..e2159063198a 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1156,16 +1156,24 @@ static int dump_seek(struct file *file, loff_t off)
 static unsigned long vma_dump_size(struct vm_area_struct *vma,
 				   unsigned long mm_flags)
 {
+#define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
+
 	/* The vma can be set up to tell us the answer directly.  */
 	if (vma->vm_flags & VM_ALWAYSDUMP)
 		goto whole;
 
+	/* Hugetlb memory check */
+	if (vma->vm_flags & VM_HUGETLB) {
+		if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
+			goto whole;
+		if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
+			goto whole;
+	}
+
 	/* Do not dump I/O mapped devices or special mappings */
 	if (vma->vm_flags & (VM_IO | VM_RESERVED))
 		return 0;
 
-#define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
-
 	/* By default, dump shared memory if mapped from an anonymous file. */
 	if (vma->vm_flags & VM_SHARED) {
 		if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c226c7b82946..017cc914ef1f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -403,12 +403,15 @@ extern int get_dumpable(struct mm_struct *mm);
 #define MMF_DUMP_MAPPED_PRIVATE	4
 #define MMF_DUMP_MAPPED_SHARED	5
 #define MMF_DUMP_ELF_HEADERS	6
+#define MMF_DUMP_HUGETLB_PRIVATE 7
+#define MMF_DUMP_HUGETLB_SHARED  8
 #define MMF_DUMP_FILTER_SHIFT	MMF_DUMPABLE_BITS
-#define MMF_DUMP_FILTER_BITS	5
+#define MMF_DUMP_FILTER_BITS	7
 #define MMF_DUMP_FILTER_MASK \
 	(((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT)
 #define MMF_DUMP_FILTER_DEFAULT \
-	((1 << MMF_DUMP_ANON_PRIVATE) |	(1 << MMF_DUMP_ANON_SHARED))
+	((1 << MMF_DUMP_ANON_PRIVATE) |	(1 << MMF_DUMP_ANON_SHARED) |\
+	 (1 << MMF_DUMP_HUGETLB_PRIVATE))
 
 struct sighand_struct {
 	atomic_t		count;
-- 
GitLab


From 4b2e38ad703541f7845c2d766426148b8d1aa329 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Sat, 18 Oct 2008 20:27:10 -0700
Subject: [PATCH 726/895] hugepage: support ZERO_PAGE()

Presently hugepage doesn't use zero page at all because zero page is only
used for coredumping and hugepage can't core dump.

However we have now implemented hugepage coredumping.  Therefore we should
implement the zero page of hugepage.

Implementation note:

o Why do we only check VM_SHARED for zero page?
  normal page checked as ..

	static inline int use_zero_page(struct vm_area_struct *vma)
	{
	        if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
	                return 0;

	        return !vma->vm_ops || !vma->vm_ops->fault;
	}

First, hugepages are never mlock()ed.  We aren't concerned with VM_LOCKED.

Second, hugetlbfs is a pseudo filesystem, not a real filesystem and it
doesn't have any file backing.  Thus ops->fault checking is meaningless.

o Why don't we use zero page if !pte.

!pte indicate {pud, pmd} doesn't exist or some error happened.  So we
shouldn't return zero page if any error occurred.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Adam Litke <agl@us.ibm.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Kawai Hidehiro <hidehiro.kawai.ez@hitachi.com>
Cc: Mel Gorman <mel@skynet.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ab79cd4dd23c..ce8cbb29860b 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2071,6 +2071,14 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address,
 	return NULL;
 }
 
+static int huge_zeropage_ok(pte_t *ptep, int write, int shared)
+{
+	if (!ptep || write || shared)
+		return 0;
+	else
+		return huge_pte_none(huge_ptep_get(ptep));
+}
+
 int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			struct page **pages, struct vm_area_struct **vmas,
 			unsigned long *position, int *length, int i,
@@ -2080,6 +2088,8 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	unsigned long vaddr = *position;
 	int remainder = *length;
 	struct hstate *h = hstate_vma(vma);
+	int zeropage_ok = 0;
+	int shared = vma->vm_flags & VM_SHARED;
 
 	spin_lock(&mm->page_table_lock);
 	while (vaddr < vma->vm_end && remainder) {
@@ -2092,8 +2102,11 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		 * first, for the page indexing below to work.
 		 */
 		pte = huge_pte_offset(mm, vaddr & huge_page_mask(h));
+		if (huge_zeropage_ok(pte, write, shared))
+			zeropage_ok = 1;
 
-		if (!pte || huge_pte_none(huge_ptep_get(pte)) ||
+		if (!pte ||
+		    (huge_pte_none(huge_ptep_get(pte)) && !zeropage_ok) ||
 		    (write && !pte_write(huge_ptep_get(pte)))) {
 			int ret;
 
@@ -2113,8 +2126,11 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		page = pte_page(huge_ptep_get(pte));
 same_page:
 		if (pages) {
-			get_page(page);
-			pages[i] = page + pfn_offset;
+			if (zeropage_ok)
+				pages[i] = ZERO_PAGE(0);
+			else
+				pages[i] = page + pfn_offset;
+			get_page(pages[i]);
 		}
 
 		if (vmas)
-- 
GitLab


From 1125b4e3949949b44a7c80b619507c6f61d62911 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Sat, 18 Oct 2008 20:27:11 -0700
Subject: [PATCH 727/895] setup_per_zone_pages_min(): take zone->lock instead
 of zone->lru_lock

This replaces zone->lru_lock in setup_per_zone_pages_min() with zone->lock.
There seems to be no need for the lru_lock anymore, but there is a need for
zone->lock instead, because that function may call move_freepages() via
setup_zone_migrate_reserve().

Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Tested-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8f155e9e43db..f2fc44ec1d44 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4242,7 +4242,7 @@ void setup_per_zone_pages_min(void)
 	for_each_zone(zone) {
 		u64 tmp;
 
-		spin_lock_irqsave(&zone->lru_lock, flags);
+		spin_lock_irqsave(&zone->lock, flags);
 		tmp = (u64)pages_min * zone->present_pages;
 		do_div(tmp, lowmem_pages);
 		if (is_highmem(zone)) {
@@ -4274,7 +4274,7 @@ void setup_per_zone_pages_min(void)
 		zone->pages_low   = zone->pages_min + (tmp >> 2);
 		zone->pages_high  = zone->pages_min + (tmp >> 1);
 		setup_zone_migrate_reserve(zone);
-		spin_unlock_irqrestore(&zone->lru_lock, flags);
+		spin_unlock_irqrestore(&zone->lock, flags);
 	}
 
 	/* update totalreserve_pages */
-- 
GitLab


From 9f1b16a51ea7180eca8a0d76a6bd587385a30757 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Sat, 18 Oct 2008 20:27:12 -0700
Subject: [PATCH 728/895] memory_probe: fix wrong sysfs file attribute

This attribute just has a write operation.

[akpm@linux-foundation.org: use S_IWUSR as suggested by Randy]
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/memory.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index af0d175c025d..5260e9e0df48 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -21,6 +21,8 @@
 #include <linux/memory_hotplug.h>
 #include <linux/mm.h>
 #include <linux/mutex.h>
+#include <linux/stat.h>
+
 #include <asm/atomic.h>
 #include <asm/uaccess.h>
 
@@ -325,7 +327,7 @@ memory_probe_store(struct class *class, const char *buf, size_t count)
 
 	return count;
 }
-static CLASS_ATTR(probe, 0700, NULL, memory_probe_store);
+static CLASS_ATTR(probe, S_IWUSR, NULL, memory_probe_store);
 
 static int memory_probe_init(void)
 {
-- 
GitLab


From 7a6560e02556b6f0a798c247f3a557c523d9701b Mon Sep 17 00:00:00 2001
From: Andrea Righi <righi.andrea@gmail.com>
Date: Sat, 18 Oct 2008 20:27:13 -0700
Subject: [PATCH 729/895] documentation: clarify dirty_ratio and
 dirty_background_ratio description

The current documentation of dirty_ratio and dirty_background_ratio is a
bit misleading.

In the documentation we say that they are "a percentage of total system
memory", but the current page writeback policy, intead, is to apply the
percentages to the dirtyable memory, that means free pages + reclaimable
pages.

Better to be more explicit to clarify this concept.

Signed-off-by: Andrea Righi <righi.andrea@gmail.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/proc.txt | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 02cb7faeed6b..bcceb99b81dd 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1384,15 +1384,18 @@ causes the kernel to prefer to reclaim dentries and inodes.
 dirty_background_ratio
 ----------------------
 
-Contains, as a percentage of total system memory, the number of pages at which
-the pdflush background writeback daemon will start writing out dirty data.
+Contains, as a percentage of the dirtyable system memory (free pages + mapped
+pages + file cache, not including locked pages and HugePages), the number of
+pages at which the pdflush background writeback daemon will start writing out
+dirty data.
 
 dirty_ratio
 -----------------
 
-Contains, as a percentage of total system memory, the number of pages at which
-a process which is generating disk writes will itself start writing out dirty
-data.
+Contains, as a percentage of the dirtyable system memory (free pages + mapped
+pages + file cache, not including locked pages and HugePages), the number of
+pages at which a process which is generating disk writes will itself start
+writing out dirty data.
 
 dirty_writeback_centisecs
 -------------------------
-- 
GitLab


From de7f0cba96786cf9ec9da4532c1b25f733da9b6f Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont@austin.ibm.com>
Date: Sat, 18 Oct 2008 20:27:14 -0700
Subject: [PATCH 730/895] memory hotplug: release memory regions in
 PAGES_PER_SECTION chunks

During hotplug memory remove, memory regions should be released on a
PAGES_PER_SECTION size chunks.  This mirrors the code in add_memory where
resources are requested on a PAGES_PER_SECTION size.

Attempting to release the entire memory region fails because there is not
a single resource for the total number of pages being removed.  Instead
the resources for the pages are split in PAGES_PER_SECTION size chunks as
requested during memory add.

Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>
Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Acked-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory_hotplug.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 3b4975815141..6837a1014372 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -324,11 +324,11 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
 	BUG_ON(phys_start_pfn & ~PAGE_SECTION_MASK);
 	BUG_ON(nr_pages % PAGES_PER_SECTION);
 
-	release_mem_region(phys_start_pfn << PAGE_SHIFT, nr_pages * PAGE_SIZE);
-
 	sections_to_remove = nr_pages / PAGES_PER_SECTION;
 	for (i = 0; i < sections_to_remove; i++) {
 		unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
+		release_mem_region(pfn << PAGE_SHIFT,
+				   PAGES_PER_SECTION << PAGE_SHIFT);
 		ret = __remove_section(zone, __pfn_to_section(pfn));
 		if (ret)
 			break;
-- 
GitLab


From e78bbfa8262424417a29349a8064a535053912b9 Mon Sep 17 00:00:00 2001
From: Brice Goglin <Brice.Goglin@inria.fr>
Date: Sat, 18 Oct 2008 20:27:15 -0700
Subject: [PATCH 731/895] mm: stop returning -ENOENT from sys_move_pages() if
 nothing got migrated

A patchset reworking sys_move_pages().  It removes the possibly large
vmalloc by using multiple chunks when migrating large buffers.  It also
dramatically increases the throughput for large buffers since the lookup
in new_page_node() is now limited to a single chunk, causing the quadratic
complexity to have a much slower impact.  There is no need to use any
radix-tree-like structure to improve this lookup.

sys_move_pages() duration on a 4-quadcore-opteron 2347HE (1.9Gz),
migrating between nodes #2 and #3:

	length		move_pages (us)		move_pages+patch (us)
	4kB		126			98
	40kB		198			168
	400kB		963			937
	4MB		12503			11930
	40MB		246867			11848

Patches #1 and #4 are the important ones:
1) stop returning -ENOENT from sys_move_pages() if nothing got migrated
2) don't vmalloc a huge page_to_node array for do_pages_stat()
3) extract do_pages_move() out of sys_move_pages()
4) rework do_pages_move() to work on page_sized chunks
5) move_pages: no need to set pp->page to ZERO_PAGE(0) by default

This patch:

There is no point in returning -ENOENT from sys_move_pages() if all pages
were already on the right node, while we return 0 if only 1 page was not.
Most application don't know where their pages are allocated, so it's not
an error to try to migrate them anyway.

Just return 0 and let the status array in user-space be checked if the
application needs details.

It will make the upcoming chunked-move_pages() support much easier.

Signed-off-by: Brice Goglin <Brice.Goglin@inria.fr>
Acked-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/migrate.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index 6802a7a3dfec..f233519f0453 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -896,11 +896,10 @@ set_status:
 		pp->status = err;
 	}
 
+	err = 0;
 	if (!list_empty(&pagelist))
 		err = migrate_pages(&pagelist, new_page_node,
 				(unsigned long)pm);
-	else
-		err = -ENOENT;
 
 	up_read(&mm->mmap_sem);
 	return err;
-- 
GitLab


From 2f007e74bb85b9fc4eab28524052161703300f1a Mon Sep 17 00:00:00 2001
From: Brice Goglin <Brice.Goglin@inria.fr>
Date: Sat, 18 Oct 2008 20:27:16 -0700
Subject: [PATCH 732/895] mm: don't vmalloc a huge page_to_node array for
 do_pages_stat()

do_pages_stat() does not need any page_to_node entry for real.  Just pass
the pointers to the user-space page address array and to the user-space
status array, and have do_pages_stat() traverse the former and fill the
latter directly.

Signed-off-by: Brice Goglin <Brice.Goglin@inria.fr>
Acked-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/migrate.c | 40 +++++++++++++++++++++++++---------------
 1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index f233519f0453..a4c29081ebce 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -906,25 +906,33 @@ set_status:
 }
 
 /*
- * Determine the nodes of a list of pages. The addr in the pm array
- * must have been set to the virtual address of which we want to determine
- * the node number.
+ * Determine the nodes of an array of pages and store it in an array of status.
  */
-static int do_pages_stat(struct mm_struct *mm, struct page_to_node *pm)
+static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
+			 const void __user * __user *pages,
+			 int __user *status)
 {
+	unsigned long i;
+	int err;
+
 	down_read(&mm->mmap_sem);
 
-	for ( ; pm->node != MAX_NUMNODES; pm++) {
+	for (i = 0; i < nr_pages; i++) {
+		const void __user *p;
+		unsigned long addr;
 		struct vm_area_struct *vma;
 		struct page *page;
-		int err;
 
 		err = -EFAULT;
-		vma = find_vma(mm, pm->addr);
+		if (get_user(p, pages+i))
+			goto out;
+		addr = (unsigned long) p;
+
+		vma = find_vma(mm, addr);
 		if (!vma)
 			goto set_status;
 
-		page = follow_page(vma, pm->addr, 0);
+		page = follow_page(vma, addr, 0);
 
 		err = PTR_ERR(page);
 		if (IS_ERR(page))
@@ -937,11 +945,13 @@ static int do_pages_stat(struct mm_struct *mm, struct page_to_node *pm)
 
 		err = page_to_nid(page);
 set_status:
-		pm->status = err;
+		put_user(err, status+i);
 	}
+	err = 0;
 
+out:
 	up_read(&mm->mmap_sem);
-	return 0;
+	return err;
 }
 
 /*
@@ -997,6 +1007,10 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
  	if (err)
  		goto out2;
 
+	if (!nodes) {
+		err = do_pages_stat(mm, nr_pages, pages, status);
+		goto out2;
+	}
 
 	task_nodes = cpuset_mems_allowed(task);
 
@@ -1045,11 +1059,7 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
 	/* End marker */
 	pm[nr_pages].node = MAX_NUMNODES;
 
-	if (nodes)
-		err = do_move_pages(mm, pm, flags & MPOL_MF_MOVE_ALL);
-	else
-		err = do_pages_stat(mm, pm);
-
+	err = do_move_pages(mm, pm, flags & MPOL_MF_MOVE_ALL);
 	if (err >= 0)
 		/* Return status information */
 		for (i = 0; i < nr_pages; i++)
-- 
GitLab


From 5e9a0f023bee02bfb94e08590d998660c01f5a49 Mon Sep 17 00:00:00 2001
From: Brice Goglin <Brice.Goglin@inria.fr>
Date: Sat, 18 Oct 2008 20:27:17 -0700
Subject: [PATCH 733/895] mm: extract do_pages_move() out of sys_move_pages()

To prepare the chunking, move the sys_move_pages() code that is used when
nodes!=NULL into do_pages_move().  And rename do_move_pages() into
do_move_page_to_node_array().

Signed-off-by: Brice Goglin <Brice.Goglin@inria.fr>
Acked-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/migrate.c | 152 +++++++++++++++++++++++++++++----------------------
 1 file changed, 86 insertions(+), 66 deletions(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index a4c29081ebce..11c6c56ec017 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -826,9 +826,11 @@ static struct page *new_page_node(struct page *p, unsigned long private,
  * Move a set of pages as indicated in the pm array. The addr
  * field must be set to the virtual address of the page to be moved
  * and the node number must contain a valid target node.
+ * The pm array ends with node = MAX_NUMNODES.
  */
-static int do_move_pages(struct mm_struct *mm, struct page_to_node *pm,
-				int migrate_all)
+static int do_move_page_to_node_array(struct mm_struct *mm,
+				      struct page_to_node *pm,
+				      int migrate_all)
 {
 	int err;
 	struct page_to_node *pp;
@@ -905,6 +907,81 @@ set_status:
 	return err;
 }
 
+/*
+ * Migrate an array of page address onto an array of nodes and fill
+ * the corresponding array of status.
+ */
+static int do_pages_move(struct mm_struct *mm, struct task_struct *task,
+			 unsigned long nr_pages,
+			 const void __user * __user *pages,
+			 const int __user *nodes,
+			 int __user *status, int flags)
+{
+	struct page_to_node *pm = NULL;
+	nodemask_t task_nodes;
+	int err = 0;
+	int i;
+
+	task_nodes = cpuset_mems_allowed(task);
+
+	/* Limit nr_pages so that the multiplication may not overflow */
+	if (nr_pages >= ULONG_MAX / sizeof(struct page_to_node) - 1) {
+		err = -E2BIG;
+		goto out;
+	}
+
+	pm = vmalloc((nr_pages + 1) * sizeof(struct page_to_node));
+	if (!pm) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/*
+	 * Get parameters from user space and initialize the pm
+	 * array. Return various errors if the user did something wrong.
+	 */
+	for (i = 0; i < nr_pages; i++) {
+		const void __user *p;
+
+		err = -EFAULT;
+		if (get_user(p, pages + i))
+			goto out_pm;
+
+		pm[i].addr = (unsigned long)p;
+		if (nodes) {
+			int node;
+
+			if (get_user(node, nodes + i))
+				goto out_pm;
+
+			err = -ENODEV;
+			if (!node_state(node, N_HIGH_MEMORY))
+				goto out_pm;
+
+			err = -EACCES;
+			if (!node_isset(node, task_nodes))
+				goto out_pm;
+
+			pm[i].node = node;
+		} else
+			pm[i].node = 0;	/* anything to not match MAX_NUMNODES */
+	}
+	/* End marker */
+	pm[nr_pages].node = MAX_NUMNODES;
+
+	err = do_move_page_to_node_array(mm, pm, flags & MPOL_MF_MOVE_ALL);
+	if (err >= 0)
+		/* Return status information */
+		for (i = 0; i < nr_pages; i++)
+			if (put_user(pm[i].status, status + i))
+				err = -EFAULT;
+
+out_pm:
+	vfree(pm);
+out:
+	return err;
+}
+
 /*
  * Determine the nodes of an array of pages and store it in an array of status.
  */
@@ -963,12 +1040,9 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
 			const int __user *nodes,
 			int __user *status, int flags)
 {
-	int err = 0;
-	int i;
 	struct task_struct *task;
-	nodemask_t task_nodes;
 	struct mm_struct *mm;
-	struct page_to_node *pm = NULL;
+	int err;
 
 	/* Check flags */
 	if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
@@ -1000,75 +1074,21 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
 	    (current->uid != task->suid) && (current->uid != task->uid) &&
 	    !capable(CAP_SYS_NICE)) {
 		err = -EPERM;
-		goto out2;
+		goto out;
 	}
 
  	err = security_task_movememory(task);
  	if (err)
- 		goto out2;
+		goto out;
 
-	if (!nodes) {
+	if (nodes) {
+		err = do_pages_move(mm, task, nr_pages, pages, nodes, status,
+				    flags);
+	} else {
 		err = do_pages_stat(mm, nr_pages, pages, status);
-		goto out2;
-	}
-
-	task_nodes = cpuset_mems_allowed(task);
-
-	/* Limit nr_pages so that the multiplication may not overflow */
-	if (nr_pages >= ULONG_MAX / sizeof(struct page_to_node) - 1) {
-		err = -E2BIG;
-		goto out2;
 	}
 
-	pm = vmalloc((nr_pages + 1) * sizeof(struct page_to_node));
-	if (!pm) {
-		err = -ENOMEM;
-		goto out2;
-	}
-
-	/*
-	 * Get parameters from user space and initialize the pm
-	 * array. Return various errors if the user did something wrong.
-	 */
-	for (i = 0; i < nr_pages; i++) {
-		const void __user *p;
-
-		err = -EFAULT;
-		if (get_user(p, pages + i))
-			goto out;
-
-		pm[i].addr = (unsigned long)p;
-		if (nodes) {
-			int node;
-
-			if (get_user(node, nodes + i))
-				goto out;
-
-			err = -ENODEV;
-			if (!node_state(node, N_HIGH_MEMORY))
-				goto out;
-
-			err = -EACCES;
-			if (!node_isset(node, task_nodes))
-				goto out;
-
-			pm[i].node = node;
-		} else
-			pm[i].node = 0;	/* anything to not match MAX_NUMNODES */
-	}
-	/* End marker */
-	pm[nr_pages].node = MAX_NUMNODES;
-
-	err = do_move_pages(mm, pm, flags & MPOL_MF_MOVE_ALL);
-	if (err >= 0)
-		/* Return status information */
-		for (i = 0; i < nr_pages; i++)
-			if (put_user(pm[i].status, status + i))
-				err = -EFAULT;
-
 out:
-	vfree(pm);
-out2:
 	mmput(mm);
 	return err;
 }
-- 
GitLab


From 83224b08372be48d5fcefedc4886457da29130c8 Mon Sep 17 00:00:00 2001
From: Matt Helsley <matthltc@us.ibm.com>
Date: Sat, 18 Oct 2008 20:27:18 -0700
Subject: [PATCH 734/895] container freezer: add TIF_FREEZE flag to all
 architectures

This patch series introduces a cgroup subsystem that utilizes the swsusp
freezer to freeze a group of tasks.  It's immediately useful for batch job
management scripts.  It should also be useful in the future for
implementing container checkpoint/restart.

The freezer subsystem in the container filesystem defines a cgroup file
named freezer.state.  Reading freezer.state will return the current state
of the cgroup.  Writing "FROZEN" to the state file will freeze all tasks
in the cgroup.  Subsequently writing "RUNNING" will unfreeze the tasks in
the cgroup.

* Examples of usage :

   # mkdir /containers/freezer
   # mount -t cgroup -ofreezer freezer  /containers
   # mkdir /containers/0
   # echo $some_pid > /containers/0/tasks

to get status of the freezer subsystem :

   # cat /containers/0/freezer.state
   RUNNING

to freeze all tasks in the container :

   # echo FROZEN > /containers/0/freezer.state
   # cat /containers/0/freezer.state
   FREEZING
   # cat /containers/0/freezer.state
   FROZEN

to unfreeze all tasks in the container :

   # echo RUNNING > /containers/0/freezer.state
   # cat /containers/0/freezer.state
   RUNNING

This patch:

The first step in making the refrigerator() available to all
architectures, even for those without power management.

The purpose of such a change is to be able to use the refrigerator() in a
new control group subsystem which will implement a control group freezer.

[akpm@linux-foundation.org: fix sparc]
Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>
Signed-off-by: Matt Helsley <matthltc@us.ibm.com>
Acked-by: Pavel Machek <pavel@suse.cz>
Acked-by: Serge E. Hallyn <serue@us.ibm.com>
Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Nigel Cunningham <nigel@tuxonice.net>
Tested-by: Matt Helsley <matthltc@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/include/asm/thread_info.h     | 2 ++
 arch/avr32/include/asm/thread_info.h     | 1 +
 arch/h8300/include/asm/thread_info.h     | 2 ++
 arch/m68knommu/include/asm/thread_info.h | 2 ++
 arch/s390/include/asm/thread_info.h      | 2 ++
 arch/sparc/include/asm/thread_info_32.h  | 2 ++
 arch/sparc/include/asm/thread_info_64.h  | 2 ++
 include/asm-cris/thread_info.h           | 2 ++
 include/asm-m68k/thread_info.h           | 1 +
 include/asm-parisc/thread_info.h         | 2 ++
 include/asm-um/thread_info.h             | 2 ++
 include/asm-xtensa/thread_info.h         | 2 ++
 12 files changed, 22 insertions(+)

diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h
index 15fda4344424..d069526bd767 100644
--- a/arch/alpha/include/asm/thread_info.h
+++ b/arch/alpha/include/asm/thread_info.h
@@ -74,12 +74,14 @@ register struct thread_info *__current_thread_info __asm__("$8");
 #define TIF_UAC_SIGBUS		7
 #define TIF_MEMDIE		8
 #define TIF_RESTORE_SIGMASK	9	/* restore signal mask in do_signal */
+#define TIF_FREEZE		16	/* is freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
+#define _TIF_FREEZE		(1<<TIF_FREEZE)
 
 /* Work to do on interrupt/exception return.  */
 #define _TIF_WORK_MASK		(_TIF_SIGPENDING | _TIF_NEED_RESCHED)
diff --git a/arch/avr32/include/asm/thread_info.h b/arch/avr32/include/asm/thread_info.h
index 294b25f9323d..4442f8d2d423 100644
--- a/arch/avr32/include/asm/thread_info.h
+++ b/arch/avr32/include/asm/thread_info.h
@@ -96,6 +96,7 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_MEMDIE		(1 << TIF_MEMDIE)
 #define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
 #define _TIF_CPU_GOING_TO_SLEEP (1 << TIF_CPU_GOING_TO_SLEEP)
+#define _TIF_FREEZE		(1 << TIF_FREEZE)
 
 /* Note: The masks below must never span more than 16 bits! */
 
diff --git a/arch/h8300/include/asm/thread_info.h b/arch/h8300/include/asm/thread_info.h
index aafd4d322ec3..700014d2155f 100644
--- a/arch/h8300/include/asm/thread_info.h
+++ b/arch/h8300/include/asm/thread_info.h
@@ -89,6 +89,7 @@ static inline struct thread_info *current_thread_info(void)
 					   TIF_NEED_RESCHED */
 #define TIF_MEMDIE		4
 #define TIF_RESTORE_SIGMASK	5	/* restore signal mask in do_signal() */
+#define TIF_FREEZE		16	/* is freezing for suspend */
 
 /* as above, but as bit values */
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
@@ -96,6 +97,7 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
+#define _TIF_FREEZE		(1<<TIF_FREEZE)
 
 #define _TIF_WORK_MASK		0x0000FFFE	/* work to do on interrupt/exception return */
 
diff --git a/arch/m68knommu/include/asm/thread_info.h b/arch/m68knommu/include/asm/thread_info.h
index 0c9bc095f3f0..82529f424ea3 100644
--- a/arch/m68knommu/include/asm/thread_info.h
+++ b/arch/m68knommu/include/asm/thread_info.h
@@ -84,12 +84,14 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_POLLING_NRFLAG	3	/* true if poll_idle() is polling
 					   TIF_NEED_RESCHED */
 #define TIF_MEMDIE		4
+#define TIF_FREEZE		16	/* is freezing for suspend */
 
 /* as above, but as bit values */
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
+#define _TIF_FREEZE		(1<<TIF_FREEZE)
 
 #define _TIF_WORK_MASK		0x0000FFFE	/* work to do on interrupt/exception return */
 
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index ea40a9d690fc..de3fad60c682 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -99,6 +99,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_31BIT		18	/* 32bit process */ 
 #define TIF_MEMDIE		19
 #define TIF_RESTORE_SIGMASK	20	/* restore signal mask in do_signal() */
+#define TIF_FREEZE		21	/* thread is freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
@@ -112,6 +113,7 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_USEDFPU		(1<<TIF_USEDFPU)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
 #define _TIF_31BIT		(1<<TIF_31BIT)
+#define _TIF_FREEZE		(1<<TIF_FREEZE)
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/sparc/include/asm/thread_info_32.h b/arch/sparc/include/asm/thread_info_32.h
index 29899fd5b1b2..80fe547c3f45 100644
--- a/arch/sparc/include/asm/thread_info_32.h
+++ b/arch/sparc/include/asm/thread_info_32.h
@@ -135,6 +135,7 @@ BTFIXUPDEF_CALL(void, free_thread_info, struct thread_info *)
 #define TIF_POLLING_NRFLAG	9	/* true if poll_idle() is polling
 					 * TIF_NEED_RESCHED */
 #define TIF_MEMDIE		10
+#define TIF_FREEZE		11	/* is freezing for suspend */
 
 /* as above, but as bit values */
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
@@ -148,6 +149,7 @@ BTFIXUPDEF_CALL(void, free_thread_info, struct thread_info *)
 #define _TIF_DO_NOTIFY_RESUME_MASK	(_TIF_NOTIFY_RESUME | \
 					 _TIF_SIGPENDING | \
 					 _TIF_RESTORE_SIGMASK)
+#define _TIF_FREEZE		(1<<TIF_FREEZE)
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
index c0a737d7292c..639ac805448a 100644
--- a/arch/sparc/include/asm/thread_info_64.h
+++ b/arch/sparc/include/asm/thread_info_64.h
@@ -237,6 +237,7 @@ register struct thread_info *current_thread_info_reg asm("g6");
 #define TIF_ABI_PENDING		12
 #define TIF_MEMDIE		13
 #define TIF_POLLING_NRFLAG	14
+#define TIF_FREEZE		15	/* is freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
@@ -249,6 +250,7 @@ register struct thread_info *current_thread_info_reg asm("g6");
 #define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
 #define _TIF_ABI_PENDING	(1<<TIF_ABI_PENDING)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
+#define _TIF_FREEZE		(1<<TIF_FREEZE)
 
 #define _TIF_USER_WORK_MASK	((0xff << TI_FLAG_WSAVED_SHIFT) | \
 				 _TIF_DO_NOTIFY_RESUME_MASK | \
diff --git a/include/asm-cris/thread_info.h b/include/asm-cris/thread_info.h
index 7efe1000f99d..cee97f14af3b 100644
--- a/include/asm-cris/thread_info.h
+++ b/include/asm-cris/thread_info.h
@@ -88,6 +88,7 @@ struct thread_info {
 #define TIF_RESTORE_SIGMASK	9	/* restore signal mask in do_signal() */
 #define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling TIF_NEED_RESCHED */
 #define TIF_MEMDIE		17
+#define TIF_FREEZE		18	/* is freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
@@ -95,6 +96,7 @@ struct thread_info {
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
+#define _TIF_FREEZE		(1<<TIF_FREEZE)
 
 #define _TIF_WORK_MASK		0x0000FFFE	/* work to do on interrupt/exception return */
 #define _TIF_ALLWORK_MASK	0x0000FFFF	/* work to do on any return to u-space */
diff --git a/include/asm-m68k/thread_info.h b/include/asm-m68k/thread_info.h
index abc002798a2b..af0fda46e94b 100644
--- a/include/asm-m68k/thread_info.h
+++ b/include/asm-m68k/thread_info.h
@@ -52,5 +52,6 @@ struct thread_info {
 #define TIF_DELAYED_TRACE	14	/* single step a syscall */
 #define TIF_SYSCALL_TRACE	15	/* syscall trace active */
 #define TIF_MEMDIE		16
+#define TIF_FREEZE		17	/* thread is freezing for suspend */
 
 #endif	/* _ASM_M68K_THREAD_INFO_H */
diff --git a/include/asm-parisc/thread_info.h b/include/asm-parisc/thread_info.h
index 9f812741c355..0407959da489 100644
--- a/include/asm-parisc/thread_info.h
+++ b/include/asm-parisc/thread_info.h
@@ -58,6 +58,7 @@ struct thread_info {
 #define TIF_32BIT               4       /* 32 bit binary */
 #define TIF_MEMDIE		5
 #define TIF_RESTORE_SIGMASK	6	/* restore saved signal mask */
+#define TIF_FREEZE		7	/* is freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
@@ -65,6 +66,7 @@ struct thread_info {
 #define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
 #define _TIF_32BIT		(1 << TIF_32BIT)
 #define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
+#define _TIF_FREEZE		(1 << TIF_FREEZE)
 
 #define _TIF_USER_WORK_MASK     (_TIF_SIGPENDING | \
                                  _TIF_NEED_RESCHED | _TIF_RESTORE_SIGMASK)
diff --git a/include/asm-um/thread_info.h b/include/asm-um/thread_info.h
index e07e72846c7a..62274ab9471f 100644
--- a/include/asm-um/thread_info.h
+++ b/include/asm-um/thread_info.h
@@ -69,6 +69,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_MEMDIE	 	5
 #define TIF_SYSCALL_AUDIT	6
 #define TIF_RESTORE_SIGMASK	7
+#define TIF_FREEZE		16	/* is freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
@@ -77,5 +78,6 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_MEMDIE		(1 << TIF_MEMDIE)
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
+#define _TIF_FREEZE		(1 << TIF_FREEZE)
 
 #endif
diff --git a/include/asm-xtensa/thread_info.h b/include/asm-xtensa/thread_info.h
index 7e4131dd546c..0f4fe1faf9ba 100644
--- a/include/asm-xtensa/thread_info.h
+++ b/include/asm-xtensa/thread_info.h
@@ -134,6 +134,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_MEMDIE		5
 #define TIF_RESTORE_SIGMASK	6	/* restore signal mask in do_signal() */
 #define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling TIF_NEED_RESCHED */
+#define TIF_FREEZE		17	/* is freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
@@ -142,6 +143,7 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_IRET		(1<<TIF_IRET)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
+#define _TIF_FREEZE		(1<<TIF_FREEZE)
 
 #define _TIF_WORK_MASK		0x0000FFFE	/* work to do on interrupt/exception return */
 #define _TIF_ALLWORK_MASK	0x0000FFFF	/* work to do on any return to u-space */
-- 
GitLab


From 8174f1503f4bf7e9a14b3fbbfdb30c6be6e29f77 Mon Sep 17 00:00:00 2001
From: Matt Helsley <matthltc@us.ibm.com>
Date: Sat, 18 Oct 2008 20:27:19 -0700
Subject: [PATCH 735/895] container freezer: make refrigerator always available

Now that the TIF_FREEZE flag is available in all architectures, extract
the refrigerator() and freeze_task() from kernel/power/process.c and make
it available to all.

The refrigerator() can now be used in a control group subsystem
implementing a control group freezer.

Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>
Signed-off-by: Matt Helsley <matthltc@us.ibm.com>
Acked-by: Serge E. Hallyn <serue@us.ibm.com>
Tested-by: Matt Helsley <matthltc@us.ibm.com>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/freezer.h |  14 ++++-
 kernel/Makefile         |   1 +
 kernel/freezer.c        | 122 ++++++++++++++++++++++++++++++++++++++++
 kernel/power/Kconfig    |   3 +
 kernel/power/process.c  | 116 --------------------------------------
 5 files changed, 137 insertions(+), 119 deletions(-)
 create mode 100644 kernel/freezer.c

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index deddeedf3257..17e3bb42dd3c 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -6,7 +6,7 @@
 #include <linux/sched.h>
 #include <linux/wait.h>
 
-#ifdef CONFIG_PM_SLEEP
+#ifdef CONFIG_FREEZER
 /*
  * Check if a process has been frozen
  */
@@ -39,6 +39,11 @@ static inline void clear_freeze_flag(struct task_struct *p)
 	clear_tsk_thread_flag(p, TIF_FREEZE);
 }
 
+static inline bool should_send_signal(struct task_struct *p)
+{
+	return !(p->flags & PF_FREEZER_NOSIG);
+}
+
 /*
  * Wake up a frozen process
  *
@@ -75,6 +80,9 @@ static inline int try_to_freeze(void)
 		return 0;
 }
 
+extern bool freeze_task(struct task_struct *p, bool sig_only);
+extern void cancel_freezing(struct task_struct *p);
+
 /*
  * The PF_FREEZER_SKIP flag should be set by a vfork parent right before it
  * calls wait_for_completion(&vfork) and reset right after it returns from this
@@ -166,7 +174,7 @@ static inline void set_freezable_with_signal(void)
 	} while (try_to_freeze());					\
 	__retval;							\
 })
-#else /* !CONFIG_PM_SLEEP */
+#else /* !CONFIG_FREEZER */
 static inline int frozen(struct task_struct *p) { return 0; }
 static inline int freezing(struct task_struct *p) { return 0; }
 static inline void set_freeze_flag(struct task_struct *p) {}
@@ -191,6 +199,6 @@ static inline void set_freezable_with_signal(void) {}
 #define wait_event_freezable_timeout(wq, condition, timeout)		\
 		wait_event_interruptible_timeout(wq, condition, timeout)
 
-#endif /* !CONFIG_PM_SLEEP */
+#endif /* !CONFIG_FREEZER */
 
 #endif	/* FREEZER_H_INCLUDED */
diff --git a/kernel/Makefile b/kernel/Makefile
index 4e1d7df7c3e2..e8194d15d5f4 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -24,6 +24,7 @@ CFLAGS_REMOVE_sched_clock.o = -pg
 CFLAGS_REMOVE_sched.o = -mno-spe -pg
 endif
 
+obj-$(CONFIG_FREEZER) += freezer.o
 obj-$(CONFIG_PROFILING) += profile.o
 obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
 obj-$(CONFIG_STACKTRACE) += stacktrace.o
diff --git a/kernel/freezer.c b/kernel/freezer.c
new file mode 100644
index 000000000000..cb0931f89306
--- /dev/null
+++ b/kernel/freezer.c
@@ -0,0 +1,122 @@
+/*
+ * kernel/freezer.c - Function to freeze a process
+ *
+ * Originally from kernel/power/process.c
+ */
+
+#include <linux/interrupt.h>
+#include <linux/suspend.h>
+#include <linux/module.h>
+#include <linux/syscalls.h>
+#include <linux/freezer.h>
+
+/*
+ * freezing is complete, mark current process as frozen
+ */
+static inline void frozen_process(void)
+{
+	if (!unlikely(current->flags & PF_NOFREEZE)) {
+		current->flags |= PF_FROZEN;
+		wmb();
+	}
+	clear_freeze_flag(current);
+}
+
+/* Refrigerator is place where frozen processes are stored :-). */
+void refrigerator(void)
+{
+	/* Hmm, should we be allowed to suspend when there are realtime
+	   processes around? */
+	long save;
+
+	task_lock(current);
+	if (freezing(current)) {
+		frozen_process();
+		task_unlock(current);
+	} else {
+		task_unlock(current);
+		return;
+	}
+	save = current->state;
+	pr_debug("%s entered refrigerator\n", current->comm);
+
+	spin_lock_irq(&current->sighand->siglock);
+	recalc_sigpending(); /* We sent fake signal, clean it up */
+	spin_unlock_irq(&current->sighand->siglock);
+
+	for (;;) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		if (!frozen(current))
+			break;
+		schedule();
+	}
+	pr_debug("%s left refrigerator\n", current->comm);
+	__set_current_state(save);
+}
+EXPORT_SYMBOL(refrigerator);
+
+static void fake_signal_wake_up(struct task_struct *p)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&p->sighand->siglock, flags);
+	signal_wake_up(p, 0);
+	spin_unlock_irqrestore(&p->sighand->siglock, flags);
+}
+
+/**
+ *	freeze_task - send a freeze request to given task
+ *	@p: task to send the request to
+ *	@sig_only: if set, the request will only be sent if the task has the
+ *		PF_FREEZER_NOSIG flag unset
+ *	Return value: 'false', if @sig_only is set and the task has
+ *		PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise
+ *
+ *	The freeze request is sent by setting the tasks's TIF_FREEZE flag and
+ *	either sending a fake signal to it or waking it up, depending on whether
+ *	or not it has PF_FREEZER_NOSIG set.  If @sig_only is set and the task
+ *	has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its
+ *	TIF_FREEZE flag will not be set.
+ */
+bool freeze_task(struct task_struct *p, bool sig_only)
+{
+	/*
+	 * We first check if the task is freezing and next if it has already
+	 * been frozen to avoid the race with frozen_process() which first marks
+	 * the task as frozen and next clears its TIF_FREEZE.
+	 */
+	if (!freezing(p)) {
+		rmb();
+		if (frozen(p))
+			return false;
+
+		if (!sig_only || should_send_signal(p))
+			set_freeze_flag(p);
+		else
+			return false;
+	}
+
+	if (should_send_signal(p)) {
+		if (!signal_pending(p))
+			fake_signal_wake_up(p);
+	} else if (sig_only) {
+		return false;
+	} else {
+		wake_up_state(p, TASK_INTERRUPTIBLE);
+	}
+
+	return true;
+}
+
+void cancel_freezing(struct task_struct *p)
+{
+	unsigned long flags;
+
+	if (freezing(p)) {
+		pr_debug("  clean up: %s\n", p->comm);
+		clear_freeze_flag(p);
+		spin_lock_irqsave(&p->sighand->siglock, flags);
+		recalc_sigpending_and_wake(p);
+		spin_unlock_irqrestore(&p->sighand->siglock, flags);
+	}
+}
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index dcd165f92a88..ebdd7f55273d 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -85,6 +85,9 @@ config PM_SLEEP
 	depends on SUSPEND || HIBERNATION || XEN_SAVE_RESTORE
 	default y
 
+config FREEZER
+	def_bool PM_SLEEP
+
 config SUSPEND
 	bool "Suspend to RAM and standby"
 	depends on PM && ARCH_SUSPEND_POSSIBLE
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 278946aecaf0..444cea80fde8 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -28,121 +28,6 @@ static inline int freezeable(struct task_struct * p)
 	return 1;
 }
 
-/*
- * freezing is complete, mark current process as frozen
- */
-static inline void frozen_process(void)
-{
-	if (!unlikely(current->flags & PF_NOFREEZE)) {
-		current->flags |= PF_FROZEN;
-		wmb();
-	}
-	clear_freeze_flag(current);
-}
-
-/* Refrigerator is place where frozen processes are stored :-). */
-void refrigerator(void)
-{
-	/* Hmm, should we be allowed to suspend when there are realtime
-	   processes around? */
-	long save;
-
-	task_lock(current);
-	if (freezing(current)) {
-		frozen_process();
-		task_unlock(current);
-	} else {
-		task_unlock(current);
-		return;
-	}
-	save = current->state;
-	pr_debug("%s entered refrigerator\n", current->comm);
-
-	spin_lock_irq(&current->sighand->siglock);
-	recalc_sigpending(); /* We sent fake signal, clean it up */
-	spin_unlock_irq(&current->sighand->siglock);
-
-	for (;;) {
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		if (!frozen(current))
-			break;
-		schedule();
-	}
-	pr_debug("%s left refrigerator\n", current->comm);
-	__set_current_state(save);
-}
-
-static void fake_signal_wake_up(struct task_struct *p)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&p->sighand->siglock, flags);
-	signal_wake_up(p, 0);
-	spin_unlock_irqrestore(&p->sighand->siglock, flags);
-}
-
-static inline bool should_send_signal(struct task_struct *p)
-{
-	return !(p->flags & PF_FREEZER_NOSIG);
-}
-
-/**
- *	freeze_task - send a freeze request to given task
- *	@p: task to send the request to
- *	@sig_only: if set, the request will only be sent if the task has the
- *		PF_FREEZER_NOSIG flag unset
- *	Return value: 'false', if @sig_only is set and the task has
- *		PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise
- *
- *	The freeze request is sent by setting the tasks's TIF_FREEZE flag and
- *	either sending a fake signal to it or waking it up, depending on whether
- *	or not it has PF_FREEZER_NOSIG set.  If @sig_only is set and the task
- *	has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its
- *	TIF_FREEZE flag will not be set.
- */
-static bool freeze_task(struct task_struct *p, bool sig_only)
-{
-	/*
-	 * We first check if the task is freezing and next if it has already
-	 * been frozen to avoid the race with frozen_process() which first marks
-	 * the task as frozen and next clears its TIF_FREEZE.
-	 */
-	if (!freezing(p)) {
-		rmb();
-		if (frozen(p))
-			return false;
-
-		if (!sig_only || should_send_signal(p))
-			set_freeze_flag(p);
-		else
-			return false;
-	}
-
-	if (should_send_signal(p)) {
-		if (!signal_pending(p))
-			fake_signal_wake_up(p);
-	} else if (sig_only) {
-		return false;
-	} else {
-		wake_up_state(p, TASK_INTERRUPTIBLE);
-	}
-
-	return true;
-}
-
-static void cancel_freezing(struct task_struct *p)
-{
-	unsigned long flags;
-
-	if (freezing(p)) {
-		pr_debug("  clean up: %s\n", p->comm);
-		clear_freeze_flag(p);
-		spin_lock_irqsave(&p->sighand->siglock, flags);
-		recalc_sigpending_and_wake(p);
-		spin_unlock_irqrestore(&p->sighand->siglock, flags);
-	}
-}
-
 static int try_to_freeze_tasks(bool sig_only)
 {
 	struct task_struct *g, *p;
@@ -264,4 +149,3 @@ void thaw_processes(void)
 	printk("done.\n");
 }
 
-EXPORT_SYMBOL(refrigerator);
-- 
GitLab


From dc52ddc0e6f45b04780b26fc0813509f8e798c42 Mon Sep 17 00:00:00 2001
From: Matt Helsley <matthltc@us.ibm.com>
Date: Sat, 18 Oct 2008 20:27:21 -0700
Subject: [PATCH 736/895] container freezer: implement freezer cgroup subsystem

This patch implements a new freezer subsystem in the control groups
framework.  It provides a way to stop and resume execution of all tasks in
a cgroup by writing in the cgroup filesystem.

The freezer subsystem in the container filesystem defines a file named
freezer.state.  Writing "FROZEN" to the state file will freeze all tasks
in the cgroup.  Subsequently writing "RUNNING" will unfreeze the tasks in
the cgroup.  Reading will return the current state.

* Examples of usage :

   # mkdir /containers/freezer
   # mount -t cgroup -ofreezer freezer  /containers
   # mkdir /containers/0
   # echo $some_pid > /containers/0/tasks

to get status of the freezer subsystem :

   # cat /containers/0/freezer.state
   RUNNING

to freeze all tasks in the container :

   # echo FROZEN > /containers/0/freezer.state
   # cat /containers/0/freezer.state
   FREEZING
   # cat /containers/0/freezer.state
   FROZEN

to unfreeze all tasks in the container :

   # echo RUNNING > /containers/0/freezer.state
   # cat /containers/0/freezer.state
   RUNNING

This is the basic mechanism which should do the right thing for user space
task in a simple scenario.

It's important to note that freezing can be incomplete.  In that case we
return EBUSY.  This means that some tasks in the cgroup are busy doing
something that prevents us from completely freezing the cgroup at this
time.  After EBUSY, the cgroup will remain partially frozen -- reflected
by freezer.state reporting "FREEZING" when read.  The state will remain
"FREEZING" until one of these things happens:

	1) Userspace cancels the freezing operation by writing "RUNNING" to
		the freezer.state file
	2) Userspace retries the freezing operation by writing "FROZEN" to
		the freezer.state file (writing "FREEZING" is not legal
		and returns EIO)
	3) The tasks that blocked the cgroup from entering the "FROZEN"
		state disappear from the cgroup's set of tasks.

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: export thaw_process]
Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>
Signed-off-by: Matt Helsley <matthltc@us.ibm.com>
Acked-by: Serge E. Hallyn <serue@us.ibm.com>
Tested-by: Matt Helsley <matthltc@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/Kconfig            |   1 +
 arch/arm/Kconfig              |   2 +
 arch/avr32/Kconfig            |   2 +
 arch/blackfin/Kconfig         |   3 +
 arch/cris/Kconfig             |   2 +
 arch/frv/Kconfig              |   2 +
 arch/h8300/Kconfig            |   2 +
 arch/ia64/Kconfig             |   2 +
 arch/m32r/Kconfig             |   2 +
 arch/m68k/Kconfig             |   2 +
 arch/m68knommu/Kconfig        |   2 +
 arch/mips/Kconfig             |   2 +
 arch/mn10300/Kconfig          |   2 +
 arch/parisc/Kconfig           |   2 +
 arch/powerpc/Kconfig          |   2 +
 arch/s390/Kconfig             |   2 +
 arch/sh/Kconfig               |   2 +
 arch/sparc/Kconfig            |   2 +
 arch/sparc64/Kconfig          |   1 +
 arch/um/Kconfig               |   2 +
 arch/x86/Kconfig              |   1 +
 arch/xtensa/Kconfig           |   1 +
 include/linux/cgroup_subsys.h |   6 +
 include/linux/freezer.h       |  29 +--
 init/Kconfig                  |   7 +
 kernel/Kconfig.freezer        |   2 +
 kernel/Makefile               |   1 +
 kernel/cgroup_freezer.c       | 366 ++++++++++++++++++++++++++++++++++
 kernel/freezer.c              |  32 +++
 kernel/power/Kconfig          |   3 -
 30 files changed, 465 insertions(+), 22 deletions(-)
 create mode 100644 kernel/Kconfig.freezer
 create mode 100644 kernel/cgroup_freezer.c

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index a0f642b6a4b9..6110197757a3 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -70,6 +70,7 @@ config AUTO_IRQ_AFFINITY
 	default y
 
 source "init/Kconfig"
+source "kernel/Kconfig.freezer"
 
 
 menu "System setup"
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 4853f9df37bd..df39d20f7425 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -192,6 +192,8 @@ config VECTORS_BASE
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 menu "System Type"
 
 choice
diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig
index 7c239a916275..33a5b2969eb4 100644
--- a/arch/avr32/Kconfig
+++ b/arch/avr32/Kconfig
@@ -72,6 +72,8 @@ config GENERIC_BUG
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 menu "System Type and features"
 
 source "kernel/time/Kconfig"
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index 8102c79aaa94..29e71ed6b8a7 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -64,8 +64,11 @@ config HARDWARE_PM
 	depends on OPROFILE
 
 source "init/Kconfig"
+
 source "kernel/Kconfig.preempt"
 
+source "kernel/Kconfig.freezer"
+
 menu "Blackfin Processor Options"
 
 comment "Processor and Board Settings"
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index 9389d38f222f..07335e719bf8 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -62,6 +62,8 @@ config HZ
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 menu "General setup"
 
 source "fs/Kconfig.binfmt"
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index a5aac1b07562..9d1552a9ee2c 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -66,6 +66,8 @@ mainmenu "Fujitsu FR-V Kernel Configuration"
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 
 menu "Fujitsu FR-V system setup"
 
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index c7966746fbfe..bd1995403c67 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -90,6 +90,8 @@ config HZ
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 source "arch/h8300/Kconfig.cpu"
 
 menu "Executable file formats"
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 3b7aa38254a8..912c57db2d21 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -7,6 +7,8 @@ mainmenu "IA-64 Linux Kernel Configuration"
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 menu "Processor type and features"
 
 config IA64
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index 00289c178f89..dbaed4a63815 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -42,6 +42,8 @@ config HZ
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 
 menu "Processor type and features"
 
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 677c93a490f6..836fb66f080d 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -62,6 +62,8 @@ mainmenu "Linux/68k Kernel Configuration"
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 menu "Platform dependent setup"
 
 config EISA
diff --git a/arch/m68knommu/Kconfig b/arch/m68knommu/Kconfig
index 0a8998315e5e..76b66feb74df 100644
--- a/arch/m68knommu/Kconfig
+++ b/arch/m68knommu/Kconfig
@@ -75,6 +75,8 @@ config NO_IOPORT
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 menu "Processor type and features"
 
 choice
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index b905744d7915..5f149b030c0f 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1885,6 +1885,8 @@ config PROBE_INITRD_HEADER
 	  add initrd or initramfs image to the kernel image.
 	  Otherwise, say N.
 
+source "kernel/Kconfig.freezer"
+
 menu "Bus options (PCI, PCMCIA, EISA, ISA, TC)"
 
 config HW_HAS_EISA
diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig
index dd557c9cf001..9a9f43358879 100644
--- a/arch/mn10300/Kconfig
+++ b/arch/mn10300/Kconfig
@@ -68,6 +68,8 @@ mainmenu "Matsushita MN10300/AM33 Kernel Configuration"
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 
 menu "Matsushita MN10300 system setup"
 
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 8313fccced5e..2bd1f6ef5db0 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -90,6 +90,8 @@ config ARCH_MAY_HAVE_PC_FDC
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 
 menu "Processor type and features"
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 380baa1780e9..9391199d9e77 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -230,6 +230,8 @@ config PPC_OF_PLATFORM_PCI
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 source "arch/powerpc/sysdev/Kconfig"
 source "arch/powerpc/platforms/Kconfig"
 
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index bc581d8a7cd9..70b7645ce745 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -78,6 +78,8 @@ config S390
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 menu "Base setup"
 
 comment "Processor type and features"
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 5131d50f851a..2ed5713b7540 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -106,6 +106,8 @@ config IO_TRAPPED
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 menu "System type"
 
 #
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 97671dac12a6..e594559c8dba 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -37,6 +37,8 @@ config HZ
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 menu "General machine setup"
 
 config SMP
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index 5446e2a499b1..035b15af90d8 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -96,6 +96,7 @@ config GENERIC_HARDIRQS_NO__DO_IRQ
 	def_bool y
 
 source "init/Kconfig"
+source "kernel/Kconfig.freezer"
 
 menu "Processor type and features"
 
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index 6976812cfb18..393bccfe1785 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -229,6 +229,8 @@ endmenu
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 source "drivers/block/Kconfig"
 
 source "arch/um/Kconfig.char"
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index bd3c2c53873e..49349ba77d80 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -193,6 +193,7 @@ config X86_TRAMPOLINE
 config KTIME_SCALAR
 	def_bool X86_32
 source "init/Kconfig"
+source "kernel/Kconfig.freezer"
 
 menu "Processor type and features"
 
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 02e417d3d8e9..a213260b51e5 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -55,6 +55,7 @@ config HZ
 	default 100
 
 source "init/Kconfig"
+source "kernel/Kconfig.freezer"
 
 menu "Processor type and features"
 
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index e2877454ec82..9c22396e8b50 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -48,3 +48,9 @@ SUBSYS(devices)
 #endif
 
 /* */
+
+#ifdef CONFIG_CGROUP_FREEZER
+SUBSYS(freezer)
+#endif
+
+/* */
diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index 17e3bb42dd3c..8f225339eee9 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -46,26 +46,11 @@ static inline bool should_send_signal(struct task_struct *p)
 
 /*
  * Wake up a frozen process
- *
- * task_lock() is taken to prevent the race with refrigerator() which may
- * occur if the freezing of tasks fails.  Namely, without the lock, if the
- * freezing of tasks failed, thaw_tasks() might have run before a task in
- * refrigerator() could call frozen_process(), in which case the task would be
- * frozen and no one would thaw it.
  */
-static inline int thaw_process(struct task_struct *p)
-{
-	task_lock(p);
-	if (frozen(p)) {
-		p->flags &= ~PF_FROZEN;
-		task_unlock(p);
-		wake_up_process(p);
-		return 1;
-	}
-	clear_freeze_flag(p);
-	task_unlock(p);
-	return 0;
-}
+extern int __thaw_process(struct task_struct *p);
+
+/* Takes and releases task alloc lock using task_lock() */
+extern int thaw_process(struct task_struct *p);
 
 extern void refrigerator(void);
 extern int freeze_processes(void);
@@ -83,6 +68,12 @@ static inline int try_to_freeze(void)
 extern bool freeze_task(struct task_struct *p, bool sig_only);
 extern void cancel_freezing(struct task_struct *p);
 
+#ifdef CONFIG_CGROUP_FREEZER
+extern int cgroup_frozen(struct task_struct *task);
+#else /* !CONFIG_CGROUP_FREEZER */
+static inline int cgroup_frozen(struct task_struct *task) { return 0; }
+#endif /* !CONFIG_CGROUP_FREEZER */
+
 /*
  * The PF_FREEZER_SKIP flag should be set by a vfork parent right before it
  * calls wait_for_completion(&vfork) and reset right after it returns from this
diff --git a/init/Kconfig b/init/Kconfig
index 5ceff3249a2d..8828ed0b2051 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -299,6 +299,13 @@ config CGROUP_NS
           for instance virtual servers and checkpoint/restart
           jobs.
 
+config CGROUP_FREEZER
+        bool "control group freezer subsystem"
+        depends on CGROUPS
+        help
+          Provides a way to freeze and unfreeze all tasks in a
+	  cgroup.
+
 config CGROUP_DEVICE
 	bool "Device controller for cgroups"
 	depends on CGROUPS && EXPERIMENTAL
diff --git a/kernel/Kconfig.freezer b/kernel/Kconfig.freezer
new file mode 100644
index 000000000000..a3bb4cb52539
--- /dev/null
+++ b/kernel/Kconfig.freezer
@@ -0,0 +1,2 @@
+config FREEZER
+	def_bool PM_SLEEP || CGROUP_FREEZER
diff --git a/kernel/Makefile b/kernel/Makefile
index e8194d15d5f4..066550aa61c5 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -56,6 +56,7 @@ obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
 obj-$(CONFIG_COMPAT) += compat.o
 obj-$(CONFIG_CGROUPS) += cgroup.o
 obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o
+obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o
 obj-$(CONFIG_CPUSETS) += cpuset.o
 obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
 obj-$(CONFIG_UTS_NS) += utsname.o
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
new file mode 100644
index 000000000000..b08722de610c
--- /dev/null
+++ b/kernel/cgroup_freezer.c
@@ -0,0 +1,366 @@
+/*
+ * cgroup_freezer.c -  control group freezer subsystem
+ *
+ * Copyright IBM Corporation, 2007
+ *
+ * Author : Cedric Le Goater <clg@fr.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include <linux/module.h>
+#include <linux/cgroup.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/freezer.h>
+#include <linux/seq_file.h>
+
+enum freezer_state {
+	STATE_RUNNING = 0,
+	STATE_FREEZING,
+	STATE_FROZEN,
+};
+
+struct freezer {
+	struct cgroup_subsys_state css;
+	enum freezer_state state;
+	spinlock_t lock; /* protects _writes_ to state */
+};
+
+static inline struct freezer *cgroup_freezer(
+		struct cgroup *cgroup)
+{
+	return container_of(
+		cgroup_subsys_state(cgroup, freezer_subsys_id),
+		struct freezer, css);
+}
+
+static inline struct freezer *task_freezer(struct task_struct *task)
+{
+	return container_of(task_subsys_state(task, freezer_subsys_id),
+			    struct freezer, css);
+}
+
+int cgroup_frozen(struct task_struct *task)
+{
+	struct freezer *freezer;
+	enum freezer_state state;
+
+	task_lock(task);
+	freezer = task_freezer(task);
+	state = freezer->state;
+	task_unlock(task);
+
+	return state == STATE_FROZEN;
+}
+
+/*
+ * cgroups_write_string() limits the size of freezer state strings to
+ * CGROUP_LOCAL_BUFFER_SIZE
+ */
+static const char *freezer_state_strs[] = {
+	"RUNNING",
+	"FREEZING",
+	"FROZEN",
+};
+
+/*
+ * State diagram
+ * Transitions are caused by userspace writes to the freezer.state file.
+ * The values in parenthesis are state labels. The rest are edge labels.
+ *
+ * (RUNNING) --FROZEN--> (FREEZING) --FROZEN--> (FROZEN)
+ *    ^ ^                     |                       |
+ *    | \_______RUNNING_______/                       |
+ *    \_____________________________RUNNING___________/
+ */
+
+struct cgroup_subsys freezer_subsys;
+
+/* Locks taken and their ordering
+ * ------------------------------
+ * css_set_lock
+ * cgroup_mutex (AKA cgroup_lock)
+ * task->alloc_lock (AKA task_lock)
+ * freezer->lock
+ * task->sighand->siglock
+ *
+ * cgroup code forces css_set_lock to be taken before task->alloc_lock
+ *
+ * freezer_create(), freezer_destroy():
+ * cgroup_mutex [ by cgroup core ]
+ *
+ * can_attach():
+ * cgroup_mutex
+ *
+ * cgroup_frozen():
+ * task->alloc_lock (to get task's cgroup)
+ *
+ * freezer_fork() (preserving fork() performance means can't take cgroup_mutex):
+ * task->alloc_lock (to get task's cgroup)
+ * freezer->lock
+ *  sighand->siglock (if the cgroup is freezing)
+ *
+ * freezer_read():
+ * cgroup_mutex
+ *  freezer->lock
+ *   read_lock css_set_lock (cgroup iterator start)
+ *
+ * freezer_write() (freeze):
+ * cgroup_mutex
+ *  freezer->lock
+ *   read_lock css_set_lock (cgroup iterator start)
+ *    sighand->siglock
+ *
+ * freezer_write() (unfreeze):
+ * cgroup_mutex
+ *  freezer->lock
+ *   read_lock css_set_lock (cgroup iterator start)
+ *    task->alloc_lock (to prevent races with freeze_task())
+ *     sighand->siglock
+ */
+static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss,
+						  struct cgroup *cgroup)
+{
+	struct freezer *freezer;
+
+	freezer = kzalloc(sizeof(struct freezer), GFP_KERNEL);
+	if (!freezer)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock_init(&freezer->lock);
+	freezer->state = STATE_RUNNING;
+	return &freezer->css;
+}
+
+static void freezer_destroy(struct cgroup_subsys *ss,
+			    struct cgroup *cgroup)
+{
+	kfree(cgroup_freezer(cgroup));
+}
+
+
+static int freezer_can_attach(struct cgroup_subsys *ss,
+			      struct cgroup *new_cgroup,
+			      struct task_struct *task)
+{
+	struct freezer *freezer;
+	int retval = 0;
+
+	/*
+	 * The call to cgroup_lock() in the freezer.state write method prevents
+	 * a write to that file racing against an attach, and hence the
+	 * can_attach() result will remain valid until the attach completes.
+	 */
+	freezer = cgroup_freezer(new_cgroup);
+	if (freezer->state == STATE_FROZEN)
+		retval = -EBUSY;
+	return retval;
+}
+
+static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
+{
+	struct freezer *freezer;
+
+	task_lock(task);
+	freezer = task_freezer(task);
+	task_unlock(task);
+
+	BUG_ON(freezer->state == STATE_FROZEN);
+	spin_lock_irq(&freezer->lock);
+	/* Locking avoids race with FREEZING -> RUNNING transitions. */
+	if (freezer->state == STATE_FREEZING)
+		freeze_task(task, true);
+	spin_unlock_irq(&freezer->lock);
+}
+
+/*
+ * caller must hold freezer->lock
+ */
+static void check_if_frozen(struct cgroup *cgroup,
+			     struct freezer *freezer)
+{
+	struct cgroup_iter it;
+	struct task_struct *task;
+	unsigned int nfrozen = 0, ntotal = 0;
+
+	cgroup_iter_start(cgroup, &it);
+	while ((task = cgroup_iter_next(cgroup, &it))) {
+		ntotal++;
+		/*
+		 * Task is frozen or will freeze immediately when next it gets
+		 * woken
+		 */
+		if (frozen(task) ||
+		    (task_is_stopped_or_traced(task) && freezing(task)))
+			nfrozen++;
+	}
+
+	/*
+	 * Transition to FROZEN when no new tasks can be added ensures
+	 * that we never exist in the FROZEN state while there are unfrozen
+	 * tasks.
+	 */
+	if (nfrozen == ntotal)
+		freezer->state = STATE_FROZEN;
+	cgroup_iter_end(cgroup, &it);
+}
+
+static int freezer_read(struct cgroup *cgroup, struct cftype *cft,
+			struct seq_file *m)
+{
+	struct freezer *freezer;
+	enum freezer_state state;
+
+	if (!cgroup_lock_live_group(cgroup))
+		return -ENODEV;
+
+	freezer = cgroup_freezer(cgroup);
+	spin_lock_irq(&freezer->lock);
+	state = freezer->state;
+	if (state == STATE_FREEZING) {
+		/* We change from FREEZING to FROZEN lazily if the cgroup was
+		 * only partially frozen when we exitted write. */
+		check_if_frozen(cgroup, freezer);
+		state = freezer->state;
+	}
+	spin_unlock_irq(&freezer->lock);
+	cgroup_unlock();
+
+	seq_puts(m, freezer_state_strs[state]);
+	seq_putc(m, '\n');
+	return 0;
+}
+
+static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
+{
+	struct cgroup_iter it;
+	struct task_struct *task;
+	unsigned int num_cant_freeze_now = 0;
+
+	freezer->state = STATE_FREEZING;
+	cgroup_iter_start(cgroup, &it);
+	while ((task = cgroup_iter_next(cgroup, &it))) {
+		if (!freeze_task(task, true))
+			continue;
+		if (task_is_stopped_or_traced(task) && freezing(task))
+			/*
+			 * The freeze flag is set so these tasks will
+			 * immediately go into the fridge upon waking.
+			 */
+			continue;
+		if (!freezing(task) && !freezer_should_skip(task))
+			num_cant_freeze_now++;
+	}
+	cgroup_iter_end(cgroup, &it);
+
+	return num_cant_freeze_now ? -EBUSY : 0;
+}
+
+static int unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
+{
+	struct cgroup_iter it;
+	struct task_struct *task;
+
+	cgroup_iter_start(cgroup, &it);
+	while ((task = cgroup_iter_next(cgroup, &it))) {
+		int do_wake;
+
+		task_lock(task);
+		do_wake = __thaw_process(task);
+		task_unlock(task);
+		if (do_wake)
+			wake_up_process(task);
+	}
+	cgroup_iter_end(cgroup, &it);
+	freezer->state = STATE_RUNNING;
+
+	return 0;
+}
+
+static int freezer_change_state(struct cgroup *cgroup,
+				enum freezer_state goal_state)
+{
+	struct freezer *freezer;
+	int retval = 0;
+
+	freezer = cgroup_freezer(cgroup);
+	spin_lock_irq(&freezer->lock);
+	check_if_frozen(cgroup, freezer); /* may update freezer->state */
+	if (goal_state == freezer->state)
+		goto out;
+	switch (freezer->state) {
+	case STATE_RUNNING:
+		retval = try_to_freeze_cgroup(cgroup, freezer);
+		break;
+	case STATE_FREEZING:
+		if (goal_state == STATE_FROZEN) {
+			/* Userspace is retrying after
+			 * "/bin/echo FROZEN > freezer.state" returned -EBUSY */
+			retval = try_to_freeze_cgroup(cgroup, freezer);
+			break;
+		}
+		/* state == FREEZING and goal_state == RUNNING, so unfreeze */
+	case STATE_FROZEN:
+		retval = unfreeze_cgroup(cgroup, freezer);
+		break;
+	default:
+		break;
+	}
+out:
+	spin_unlock_irq(&freezer->lock);
+
+	return retval;
+}
+
+static int freezer_write(struct cgroup *cgroup,
+			 struct cftype *cft,
+			 const char *buffer)
+{
+	int retval;
+	enum freezer_state goal_state;
+
+	if (strcmp(buffer, freezer_state_strs[STATE_RUNNING]) == 0)
+		goal_state = STATE_RUNNING;
+	else if (strcmp(buffer, freezer_state_strs[STATE_FROZEN]) == 0)
+		goal_state = STATE_FROZEN;
+	else
+		return -EIO;
+
+	if (!cgroup_lock_live_group(cgroup))
+		return -ENODEV;
+	retval = freezer_change_state(cgroup, goal_state);
+	cgroup_unlock();
+	return retval;
+}
+
+static struct cftype files[] = {
+	{
+		.name = "state",
+		.read_seq_string = freezer_read,
+		.write_string = freezer_write,
+	},
+};
+
+static int freezer_populate(struct cgroup_subsys *ss, struct cgroup *cgroup)
+{
+	return cgroup_add_files(cgroup, ss, files, ARRAY_SIZE(files));
+}
+
+struct cgroup_subsys freezer_subsys = {
+	.name		= "freezer",
+	.create		= freezer_create,
+	.destroy	= freezer_destroy,
+	.populate	= freezer_populate,
+	.subsys_id	= freezer_subsys_id,
+	.can_attach	= freezer_can_attach,
+	.attach		= NULL,
+	.fork		= freezer_fork,
+	.exit		= NULL,
+};
diff --git a/kernel/freezer.c b/kernel/freezer.c
index cb0931f89306..ba6248b323ef 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -120,3 +120,35 @@ void cancel_freezing(struct task_struct *p)
 		spin_unlock_irqrestore(&p->sighand->siglock, flags);
 	}
 }
+
+/*
+ * Wake up a frozen process
+ *
+ * task_lock() is needed to prevent the race with refrigerator() which may
+ * occur if the freezing of tasks fails.  Namely, without the lock, if the
+ * freezing of tasks failed, thaw_tasks() might have run before a task in
+ * refrigerator() could call frozen_process(), in which case the task would be
+ * frozen and no one would thaw it.
+ */
+int __thaw_process(struct task_struct *p)
+{
+	if (frozen(p)) {
+		p->flags &= ~PF_FROZEN;
+		return 1;
+	}
+	clear_freeze_flag(p);
+	return 0;
+}
+
+int thaw_process(struct task_struct *p)
+{
+	task_lock(p);
+	if (__thaw_process(p) == 1) {
+		task_unlock(p);
+		wake_up_process(p);
+		return 1;
+	}
+	task_unlock(p);
+	return 0;
+}
+EXPORT_SYMBOL(thaw_process);
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index ebdd7f55273d..dcd165f92a88 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -85,9 +85,6 @@ config PM_SLEEP
 	depends on SUSPEND || HIBERNATION || XEN_SAVE_RESTORE
 	default y
 
-config FREEZER
-	def_bool PM_SLEEP
-
 config SUSPEND
 	bool "Suspend to RAM and standby"
 	depends on PM && ARCH_SUSPEND_POSSIBLE
-- 
GitLab


From 5a06915c6df9b89cda5ddb3f8cce5f9a6be534d2 Mon Sep 17 00:00:00 2001
From: Matt Helsley <matthltc@us.ibm.com>
Date: Sat, 18 Oct 2008 20:27:22 -0700
Subject: [PATCH 737/895] container freezer: skip frozen cgroups during power
 management resume

When a system is resumed after a suspend, it will also unfreeze frozen
cgroups.

This patchs modifies the resume sequence to skip the tasks which are part
of a frozen control group.

Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>
Signed-off-by: Matt Helsley <matthltc@us.ibm.com>
Acked-by: Serge E. Hallyn <serue@us.ibm.com>
Tested-by: Matt Helsley <matthltc@us.ibm.com>
Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/power/process.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/power/process.c b/kernel/power/process.c
index 444cea80fde8..ca634019497a 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -135,6 +135,9 @@ static void thaw_tasks(bool nosig_only)
 		if (nosig_only && should_send_signal(p))
 			continue;
 
+		if (cgroup_frozen(p))
+			continue;
+
 		thaw_process(p);
 	} while_each_thread(g, p);
 	read_unlock(&tasklist_lock);
-- 
GitLab


From 957a4eeaf4af614ab0fc4c09a22593d6ab233f5b Mon Sep 17 00:00:00 2001
From: Matt Helsley <matthltc@us.ibm.com>
Date: Sat, 18 Oct 2008 20:27:22 -0700
Subject: [PATCH 738/895] container freezer: prevent frozen tasks or cgroups
 from changing

Don't let frozen tasks or cgroups change.  This means frozen tasks can't
leave their current cgroup for another cgroup.  It also means that tasks
cannot be added to or removed from a cgroup in the FROZEN state.  We
enforce these rules by checking for frozen tasks and cgroups in the
can_attach() function.

Signed-off-by: Matt Helsley <matthltc@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/cgroup_freezer.c | 43 +++++++++++++++++++++++++----------------
 1 file changed, 26 insertions(+), 17 deletions(-)

diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index b08722de610c..df884022fbd9 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -145,22 +145,40 @@ static void freezer_destroy(struct cgroup_subsys *ss,
 	kfree(cgroup_freezer(cgroup));
 }
 
+/* Task is frozen or will freeze immediately when next it gets woken */
+static bool is_task_frozen_enough(struct task_struct *task)
+{
+	return frozen(task) ||
+		(task_is_stopped_or_traced(task) && freezing(task));
+}
 
+/*
+ * The call to cgroup_lock() in the freezer.state write method prevents
+ * a write to that file racing against an attach, and hence the
+ * can_attach() result will remain valid until the attach completes.
+ */
 static int freezer_can_attach(struct cgroup_subsys *ss,
 			      struct cgroup *new_cgroup,
 			      struct task_struct *task)
 {
 	struct freezer *freezer;
-	int retval = 0;
+	int retval;
+
+	/* Anything frozen can't move or be moved to/from */
+
+	if (is_task_frozen_enough(task))
+		return -EBUSY;
 
-	/*
-	 * The call to cgroup_lock() in the freezer.state write method prevents
-	 * a write to that file racing against an attach, and hence the
-	 * can_attach() result will remain valid until the attach completes.
-	 */
 	freezer = cgroup_freezer(new_cgroup);
+	if (freezer->state == STATE_FROZEN)
+		return -EBUSY;
+
+	retval = 0;
+	task_lock(task);
+	freezer = task_freezer(task);
 	if (freezer->state == STATE_FROZEN)
 		retval = -EBUSY;
+	task_unlock(task);
 	return retval;
 }
 
@@ -193,12 +211,7 @@ static void check_if_frozen(struct cgroup *cgroup,
 	cgroup_iter_start(cgroup, &it);
 	while ((task = cgroup_iter_next(cgroup, &it))) {
 		ntotal++;
-		/*
-		 * Task is frozen or will freeze immediately when next it gets
-		 * woken
-		 */
-		if (frozen(task) ||
-		    (task_is_stopped_or_traced(task) && freezing(task)))
+		if (is_task_frozen_enough(task))
 			nfrozen++;
 	}
 
@@ -249,11 +262,7 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
 	while ((task = cgroup_iter_next(cgroup, &it))) {
 		if (!freeze_task(task, true))
 			continue;
-		if (task_is_stopped_or_traced(task) && freezing(task))
-			/*
-			 * The freeze flag is set so these tasks will
-			 * immediately go into the fridge upon waking.
-			 */
+		if (is_task_frozen_enough(task))
 			continue;
 		if (!freezing(task) && !freezer_should_skip(task))
 			num_cant_freeze_now++;
-- 
GitLab


From 81dcf33c2ae314899f754aa7aaa1cb1fe2f84da6 Mon Sep 17 00:00:00 2001
From: Matt Helsley <matthltc@us.ibm.com>
Date: Sat, 18 Oct 2008 20:27:23 -0700
Subject: [PATCH 739/895] container freezer: make freezer state names less
 generic

Rename cgroup freezer states to be less generic to avoid any name
collisions while also better describing what each state is.

Signed-off-by: Matt Helsley <matthltc@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/cgroup_freezer.c | 56 ++++++++++++++++++++---------------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index df884022fbd9..6e4280d77429 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -22,9 +22,9 @@
 #include <linux/seq_file.h>
 
 enum freezer_state {
-	STATE_RUNNING = 0,
-	STATE_FREEZING,
-	STATE_FROZEN,
+	CGROUP_THAWED = 0,
+	CGROUP_FREEZING,
+	CGROUP_FROZEN,
 };
 
 struct freezer {
@@ -57,7 +57,7 @@ int cgroup_frozen(struct task_struct *task)
 	state = freezer->state;
 	task_unlock(task);
 
-	return state == STATE_FROZEN;
+	return state == CGROUP_FROZEN;
 }
 
 /*
@@ -65,7 +65,7 @@ int cgroup_frozen(struct task_struct *task)
  * CGROUP_LOCAL_BUFFER_SIZE
  */
 static const char *freezer_state_strs[] = {
-	"RUNNING",
+	"THAWED",
 	"FREEZING",
 	"FROZEN",
 };
@@ -75,10 +75,10 @@ static const char *freezer_state_strs[] = {
  * Transitions are caused by userspace writes to the freezer.state file.
  * The values in parenthesis are state labels. The rest are edge labels.
  *
- * (RUNNING) --FROZEN--> (FREEZING) --FROZEN--> (FROZEN)
- *    ^ ^                     |                       |
- *    | \_______RUNNING_______/                       |
- *    \_____________________________RUNNING___________/
+ * (THAWED) --FROZEN--> (FREEZING) --FROZEN--> (FROZEN)
+ *    ^ ^                    |                     |
+ *    | \_______THAWED_______/                     |
+ *    \__________________________THAWED____________/
  */
 
 struct cgroup_subsys freezer_subsys;
@@ -135,7 +135,7 @@ static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss,
 		return ERR_PTR(-ENOMEM);
 
 	spin_lock_init(&freezer->lock);
-	freezer->state = STATE_RUNNING;
+	freezer->state = CGROUP_THAWED;
 	return &freezer->css;
 }
 
@@ -170,13 +170,13 @@ static int freezer_can_attach(struct cgroup_subsys *ss,
 		return -EBUSY;
 
 	freezer = cgroup_freezer(new_cgroup);
-	if (freezer->state == STATE_FROZEN)
+	if (freezer->state == CGROUP_FROZEN)
 		return -EBUSY;
 
 	retval = 0;
 	task_lock(task);
 	freezer = task_freezer(task);
-	if (freezer->state == STATE_FROZEN)
+	if (freezer->state == CGROUP_FROZEN)
 		retval = -EBUSY;
 	task_unlock(task);
 	return retval;
@@ -190,10 +190,10 @@ static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
 	freezer = task_freezer(task);
 	task_unlock(task);
 
-	BUG_ON(freezer->state == STATE_FROZEN);
+	BUG_ON(freezer->state == CGROUP_FROZEN);
 	spin_lock_irq(&freezer->lock);
-	/* Locking avoids race with FREEZING -> RUNNING transitions. */
-	if (freezer->state == STATE_FREEZING)
+	/* Locking avoids race with FREEZING -> THAWED transitions. */
+	if (freezer->state == CGROUP_FREEZING)
 		freeze_task(task, true);
 	spin_unlock_irq(&freezer->lock);
 }
@@ -221,7 +221,7 @@ static void check_if_frozen(struct cgroup *cgroup,
 	 * tasks.
 	 */
 	if (nfrozen == ntotal)
-		freezer->state = STATE_FROZEN;
+		freezer->state = CGROUP_FROZEN;
 	cgroup_iter_end(cgroup, &it);
 }
 
@@ -237,7 +237,7 @@ static int freezer_read(struct cgroup *cgroup, struct cftype *cft,
 	freezer = cgroup_freezer(cgroup);
 	spin_lock_irq(&freezer->lock);
 	state = freezer->state;
-	if (state == STATE_FREEZING) {
+	if (state == CGROUP_FREEZING) {
 		/* We change from FREEZING to FROZEN lazily if the cgroup was
 		 * only partially frozen when we exitted write. */
 		check_if_frozen(cgroup, freezer);
@@ -257,7 +257,7 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
 	struct task_struct *task;
 	unsigned int num_cant_freeze_now = 0;
 
-	freezer->state = STATE_FREEZING;
+	freezer->state = CGROUP_FREEZING;
 	cgroup_iter_start(cgroup, &it);
 	while ((task = cgroup_iter_next(cgroup, &it))) {
 		if (!freeze_task(task, true))
@@ -288,7 +288,7 @@ static int unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
 			wake_up_process(task);
 	}
 	cgroup_iter_end(cgroup, &it);
-	freezer->state = STATE_RUNNING;
+	freezer->state = CGROUP_THAWED;
 
 	return 0;
 }
@@ -305,18 +305,18 @@ static int freezer_change_state(struct cgroup *cgroup,
 	if (goal_state == freezer->state)
 		goto out;
 	switch (freezer->state) {
-	case STATE_RUNNING:
+	case CGROUP_THAWED:
 		retval = try_to_freeze_cgroup(cgroup, freezer);
 		break;
-	case STATE_FREEZING:
-		if (goal_state == STATE_FROZEN) {
+	case CGROUP_FREEZING:
+		if (goal_state == CGROUP_FROZEN) {
 			/* Userspace is retrying after
 			 * "/bin/echo FROZEN > freezer.state" returned -EBUSY */
 			retval = try_to_freeze_cgroup(cgroup, freezer);
 			break;
 		}
-		/* state == FREEZING and goal_state == RUNNING, so unfreeze */
-	case STATE_FROZEN:
+		/* state == FREEZING and goal_state == THAWED, so unfreeze */
+	case CGROUP_FROZEN:
 		retval = unfreeze_cgroup(cgroup, freezer);
 		break;
 	default:
@@ -335,10 +335,10 @@ static int freezer_write(struct cgroup *cgroup,
 	int retval;
 	enum freezer_state goal_state;
 
-	if (strcmp(buffer, freezer_state_strs[STATE_RUNNING]) == 0)
-		goal_state = STATE_RUNNING;
-	else if (strcmp(buffer, freezer_state_strs[STATE_FROZEN]) == 0)
-		goal_state = STATE_FROZEN;
+	if (strcmp(buffer, freezer_state_strs[CGROUP_THAWED]) == 0)
+		goal_state = CGROUP_THAWED;
+	else if (strcmp(buffer, freezer_state_strs[CGROUP_FROZEN]) == 0)
+		goal_state = CGROUP_FROZEN;
 	else
 		return -EIO;
 
-- 
GitLab


From 1aece34833721d64eb33fc15cd923c727296d3d3 Mon Sep 17 00:00:00 2001
From: Matt Helsley <matthltc@us.ibm.com>
Date: Sat, 18 Oct 2008 20:27:24 -0700
Subject: [PATCH 740/895] container freezer: rename check_if_frozen()

check_if_frozen() sounds like it should return something when in fact it's
just updating the freezer state.

Signed-off-by: Matt Helsley <matthltc@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/cgroup_freezer.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 6e4280d77429..e95056954498 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -201,8 +201,8 @@ static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
 /*
  * caller must hold freezer->lock
  */
-static void check_if_frozen(struct cgroup *cgroup,
-			     struct freezer *freezer)
+static void update_freezer_state(struct cgroup *cgroup,
+				 struct freezer *freezer)
 {
 	struct cgroup_iter it;
 	struct task_struct *task;
@@ -222,6 +222,10 @@ static void check_if_frozen(struct cgroup *cgroup,
 	 */
 	if (nfrozen == ntotal)
 		freezer->state = CGROUP_FROZEN;
+	else if (nfrozen > 0)
+		freezer->state = CGROUP_FREEZING;
+	else
+		freezer->state = CGROUP_THAWED;
 	cgroup_iter_end(cgroup, &it);
 }
 
@@ -240,7 +244,7 @@ static int freezer_read(struct cgroup *cgroup, struct cftype *cft,
 	if (state == CGROUP_FREEZING) {
 		/* We change from FREEZING to FROZEN lazily if the cgroup was
 		 * only partially frozen when we exitted write. */
-		check_if_frozen(cgroup, freezer);
+		update_freezer_state(cgroup, freezer);
 		state = freezer->state;
 	}
 	spin_unlock_irq(&freezer->lock);
@@ -301,7 +305,7 @@ static int freezer_change_state(struct cgroup *cgroup,
 
 	freezer = cgroup_freezer(cgroup);
 	spin_lock_irq(&freezer->lock);
-	check_if_frozen(cgroup, freezer); /* may update freezer->state */
+	update_freezer_state(cgroup, freezer);
 	if (goal_state == freezer->state)
 		goto out;
 	switch (freezer->state) {
-- 
GitLab


From bde5ab65581a63e9f4f4bacfae8f201d04d25bed Mon Sep 17 00:00:00 2001
From: Matt Helsley <matthltc@us.ibm.com>
Date: Sat, 18 Oct 2008 20:27:24 -0700
Subject: [PATCH 741/895] container freezer: document the cgroup freezer
 subsystem.

Describe why we need the freezer subsystem and how to use it in a
documentation file.  Since the cgroups.txt file is focused on the
subsystem-agnostic portions of cgroups make a directory and move the old
cgroups.txt file at the same time.

Signed-off-by: Matt Helsley <matthltc@us.ibm.com>
Cc: Paul Menage <menage@google.com>
Cc: containers@lists.linux-foundation.org
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/{ => cgroups}/cgroups.txt     |  0
 Documentation/cgroups/freezer-subsystem.txt | 99 +++++++++++++++++++++
 Documentation/cpusets.txt                   |  2 +-
 3 files changed, 100 insertions(+), 1 deletion(-)
 rename Documentation/{ => cgroups}/cgroups.txt (100%)
 create mode 100644 Documentation/cgroups/freezer-subsystem.txt

diff --git a/Documentation/cgroups.txt b/Documentation/cgroups/cgroups.txt
similarity index 100%
rename from Documentation/cgroups.txt
rename to Documentation/cgroups/cgroups.txt
diff --git a/Documentation/cgroups/freezer-subsystem.txt b/Documentation/cgroups/freezer-subsystem.txt
new file mode 100644
index 000000000000..c50ab58b72eb
--- /dev/null
+++ b/Documentation/cgroups/freezer-subsystem.txt
@@ -0,0 +1,99 @@
+	The cgroup freezer is useful to batch job management system which start
+and stop sets of tasks in order to schedule the resources of a machine
+according to the desires of a system administrator. This sort of program
+is often used on HPC clusters to schedule access to the cluster as a
+whole. The cgroup freezer uses cgroups to describe the set of tasks to
+be started/stopped by the batch job management system. It also provides
+a means to start and stop the tasks composing the job.
+
+	The cgroup freezer will also be useful for checkpointing running groups
+of tasks. The freezer allows the checkpoint code to obtain a consistent
+image of the tasks by attempting to force the tasks in a cgroup into a
+quiescent state. Once the tasks are quiescent another task can
+walk /proc or invoke a kernel interface to gather information about the
+quiesced tasks. Checkpointed tasks can be restarted later should a
+recoverable error occur. This also allows the checkpointed tasks to be
+migrated between nodes in a cluster by copying the gathered information
+to another node and restarting the tasks there.
+
+	Sequences of SIGSTOP and SIGCONT are not always sufficient for stopping
+and resuming tasks in userspace. Both of these signals are observable
+from within the tasks we wish to freeze. While SIGSTOP cannot be caught,
+blocked, or ignored it can be seen by waiting or ptracing parent tasks.
+SIGCONT is especially unsuitable since it can be caught by the task. Any
+programs designed to watch for SIGSTOP and SIGCONT could be broken by
+attempting to use SIGSTOP and SIGCONT to stop and resume tasks. We can
+demonstrate this problem using nested bash shells:
+
+	$ echo $$
+	16644
+	$ bash
+	$ echo $$
+	16690
+
+	From a second, unrelated bash shell:
+	$ kill -SIGSTOP 16690
+	$ kill -SIGCONT 16990
+
+	<at this point 16990 exits and causes 16644 to exit too>
+
+	This happens because bash can observe both signals and choose how it
+responds to them.
+
+	Another example of a program which catches and responds to these
+signals is gdb. In fact any program designed to use ptrace is likely to
+have a problem with this method of stopping and resuming tasks.
+
+	 In contrast, the cgroup freezer uses the kernel freezer code to
+prevent the freeze/unfreeze cycle from becoming visible to the tasks
+being frozen. This allows the bash example above and gdb to run as
+expected.
+
+	The freezer subsystem in the container filesystem defines a file named
+freezer.state. Writing "FROZEN" to the state file will freeze all tasks in the
+cgroup. Subsequently writing "THAWED" will unfreeze the tasks in the cgroup.
+Reading will return the current state.
+
+* Examples of usage :
+
+   # mkdir /containers/freezer
+   # mount -t cgroup -ofreezer freezer  /containers
+   # mkdir /containers/0
+   # echo $some_pid > /containers/0/tasks
+
+to get status of the freezer subsystem :
+
+   # cat /containers/0/freezer.state
+   THAWED
+
+to freeze all tasks in the container :
+
+   # echo FROZEN > /containers/0/freezer.state
+   # cat /containers/0/freezer.state
+   FREEZING
+   # cat /containers/0/freezer.state
+   FROZEN
+
+to unfreeze all tasks in the container :
+
+   # echo THAWED > /containers/0/freezer.state
+   # cat /containers/0/freezer.state
+   THAWED
+
+This is the basic mechanism which should do the right thing for user space task
+in a simple scenario.
+
+It's important to note that freezing can be incomplete. In that case we return
+EBUSY. This means that some tasks in the cgroup are busy doing something that
+prevents us from completely freezing the cgroup at this time. After EBUSY,
+the cgroup will remain partially frozen -- reflected by freezer.state reporting
+"FREEZING" when read. The state will remain "FREEZING" until one of these
+things happens:
+
+	1) Userspace cancels the freezing operation by writing "THAWED" to
+		the freezer.state file
+	2) Userspace retries the freezing operation by writing "FROZEN" to
+		the freezer.state file (writing "FREEZING" is not legal
+		and returns EIO)
+	3) The tasks that blocked the cgroup from entering the "FROZEN"
+		state disappear from the cgroup's set of tasks.
diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt
index 47e568a9370a..5c86c258c791 100644
--- a/Documentation/cpusets.txt
+++ b/Documentation/cpusets.txt
@@ -48,7 +48,7 @@ hooks, beyond what is already present, required to manage dynamic
 job placement on large systems.
 
 Cpusets use the generic cgroup subsystem described in
-Documentation/cgroup.txt.
+Documentation/cgroups/cgroups.txt.
 
 Requests by a task, using the sched_setaffinity(2) system call to
 include CPUs in its CPU affinity mask, and using the mbind(2) and
-- 
GitLab


From d12a6f7f913afb2c6f5de44cbac1bb7232a03175 Mon Sep 17 00:00:00 2001
From: Huang Weiyi <weiyi.huang@gmail.com>
Date: Sat, 18 Oct 2008 20:27:25 -0700
Subject: [PATCH 742/895] arch/m68k/bvme6000/rtc.c: remove duplicated include

Removed duplicated include file <linux/smp_lock.h> in
arch/m68k/bvme6000/rtc.c.

Signed-off-by: Huang Weiyi <weiyi.huang@gmail.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m68k/bvme6000/rtc.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/m68k/bvme6000/rtc.c b/arch/m68k/bvme6000/rtc.c
index 808c9018b115..c50bec8aabb1 100644
--- a/arch/m68k/bvme6000/rtc.c
+++ b/arch/m68k/bvme6000/rtc.c
@@ -18,7 +18,6 @@
 #include <linux/poll.h>
 #include <linux/module.h>
 #include <linux/mc146818rtc.h>	/* For struct rtc_time and ioctls, etc */
-#include <linux/smp_lock.h>
 #include <linux/bcd.h>
 #include <asm/bvme6000hw.h>
 
-- 
GitLab


From 966c8079c0eae66242692184982dc70671dbe90f Mon Sep 17 00:00:00 2001
From: WANG Cong <wangcong@zeuux.org>
Date: Sat, 18 Oct 2008 20:27:26 -0700
Subject: [PATCH 743/895] uml: fix a compile error

Fix

arch/um/sys-i386/signal.c: In function 'copy_sc_from_user':
arch/um/sys-i386/signal.c:182: warning: dereferencing 'void *' pointer
arch/um/sys-i386/signal.c:182: error: request for member '_fxsr_env' in something not a structure or union

Signed-off-by: WANG Cong <wangcong@zeuux.org>
Cc: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/um/sys-i386/signal.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/um/sys-i386/signal.c b/arch/um/sys-i386/signal.c
index fd0c25ad6af3..129647375a6c 100644
--- a/arch/um/sys-i386/signal.c
+++ b/arch/um/sys-i386/signal.c
@@ -179,7 +179,8 @@ static int copy_sc_from_user(struct pt_regs *regs,
 	if (have_fpx_regs) {
 		struct user_fxsr_struct fpx;
 
-		err = copy_from_user(&fpx, &sc.fpstate->_fxsr_env[0],
+		err = copy_from_user(&fpx,
+			&((struct _fpstate __user *)sc.fpstate)->_fxsr_env[0],
 				     sizeof(struct user_fxsr_struct));
 		if (err)
 			return 1;
-- 
GitLab


From 2a80a3783d975dadea9740b0ac84c2e8796ee5bb Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Sat, 18 Oct 2008 20:27:27 -0700
Subject: [PATCH 744/895] Fix documentation of sysrq-q

I fell into the trap recently that it only dumps hrtimers instead of
all timers. Fix the documentation.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/sysrq.txt | 3 ++-
 drivers/char/sysrq.c    | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
index 5ce0952aa065..49378a9f2b5f 100644
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -95,7 +95,8 @@ On all -  write a character to /proc/sysrq-trigger.  e.g.:
 
 'p'     - Will dump the current registers and flags to your console.
 
-'q'     - Will dump a list of all running timers.
+'q'     - Will dump a list of all running hrtimers.
+	  WARNING: Does not cover any other timers
 
 'r'     - Turns off keyboard raw mode and sets it to XLATE.
 
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index dce4cc0e6953..d0c0d64ed366 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -168,7 +168,7 @@ static void sysrq_handle_show_timers(int key, struct tty_struct *tty)
 static struct sysrq_key_op sysrq_show_timers_op = {
 	.handler	= sysrq_handle_show_timers,
 	.help_msg	= "show-all-timers(Q)",
-	.action_msg	= "Show Pending Timers",
+	.action_msg	= "Show pending hrtimers (no others)",
 };
 
 static void sysrq_handle_mountro(int key, struct tty_struct *tty)
-- 
GitLab


From 5c6248411639e2b4e7f167632f30f5f3a7aa105f Mon Sep 17 00:00:00 2001
From: Eric Piel <eric.piel@tremplin-utc.net>
Date: Sat, 18 Oct 2008 20:27:28 -0700
Subject: [PATCH 745/895] HP-WMI: additional keycode (or typo)

On my HP 2510, pressing the (i) button generates an unknown keycode:
0x213b. So here is a patch adding support for it. However, as it seems
there is already support for a similar button connected to 0x231b as
keycode, I wonder if it could be a typo in the driver?

Signed-off-by: Eric Piel <eric.piel@tremplin-utc.net>
Cc: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/hp-wmi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/misc/hp-wmi.c b/drivers/misc/hp-wmi.c
index 5dabfb69ee53..4b7c24c519c3 100644
--- a/drivers/misc/hp-wmi.c
+++ b/drivers/misc/hp-wmi.c
@@ -82,6 +82,7 @@ static struct key_entry hp_wmi_keymap[] = {
 	{KE_KEY, 0x03, KEY_BRIGHTNESSDOWN},
 	{KE_KEY, 0x20e6, KEY_PROG1},
 	{KE_KEY, 0x2142, KEY_MEDIA},
+	{KE_KEY, 0x213b, KEY_INFO},
 	{KE_KEY, 0x231b, KEY_HELP},
 	{KE_END, 0}
 };
-- 
GitLab


From c3b9f5afc7f4c8abc0452f132edb13ac8c1fafbf Mon Sep 17 00:00:00 2001
From: WANG Cong <wangcong@zeuux.org>
Date: Sat, 18 Oct 2008 20:27:29 -0700
Subject: [PATCH 746/895] kernel/configs.c: remove useless comments

These comments are useless, remove them.

Signed-off-by: WANG Cong <wangcong@zeuux.org>
Cc: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/configs.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/kernel/configs.c b/kernel/configs.c
index 4c345210ed8c..abaee684ecbf 100644
--- a/kernel/configs.c
+++ b/kernel/configs.c
@@ -54,9 +54,6 @@
 
 #ifdef CONFIG_IKCONFIG_PROC
 
-/**************************************************/
-/* globals and useful constants                   */
-
 static ssize_t
 ikconfig_read_current(struct file *file, char __user *buf,
 		      size_t len, loff_t * offset)
@@ -71,9 +68,6 @@ static const struct file_operations ikconfig_file_ops = {
 	.read = ikconfig_read_current,
 };
 
-/***************************************************/
-/* ikconfig_init: start up everything we need to */
-
 static int __init ikconfig_init(void)
 {
 	struct proc_dir_entry *entry;
@@ -89,9 +83,6 @@ static int __init ikconfig_init(void)
 	return 0;
 }
 
-/***************************************************/
-/* ikconfig_cleanup: clean up our mess           */
-
 static void __exit ikconfig_cleanup(void)
 {
 	remove_proc_entry("config.gz", NULL);
-- 
GitLab


From 4d235ba6c2e9c717da8facafd1b990dcb687d1e0 Mon Sep 17 00:00:00 2001
From: Ameya Palande <2ameya@gmail.com>
Date: Sat, 18 Oct 2008 20:27:30 -0700
Subject: [PATCH 747/895] intel-iommu: typo fix and correct word in the comment

Fix for a typo and and replacing incorrect word in the comment.

Signed-off-by: Ameya Palande <2ameya@gmail.com>
Cc: "Ashok Raj" <ashok.raj@intel.com>
Cc: "Shaohua Li" <shaohua.li@intel.com>
Cc: "Anil S Keshavamurthy" <anil.s.keshavamurthy@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/pci/intel-iommu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index fc5f2dbf5323..8b51e10b7783 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -563,7 +563,7 @@ static int __iommu_flush_context(struct intel_iommu *iommu,
 
 	spin_unlock_irqrestore(&iommu->register_lock, flag);
 
-	/* flush context entry will implictly flush write buffer */
+	/* flush context entry will implicitly flush write buffer */
 	return 0;
 }
 
@@ -656,7 +656,7 @@ static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
 	if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
 		pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
 			DMA_TLB_IIRG(type), DMA_TLB_IAIG(val));
-	/* flush context entry will implictly flush write buffer */
+	/* flush iotlb entry will implicitly flush write buffer */
 	return 0;
 }
 
-- 
GitLab


From 28f74e71775b1ae0ebf7fe87f7a7f39ecb77a2b8 Mon Sep 17 00:00:00 2001
From: Jim Cromie <jim.cromie@gmail.com>
Date: Sat, 18 Oct 2008 20:27:30 -0700
Subject: [PATCH 748/895] hwmon/pc87360 separate alarm files: define some
 constants

Bring hwmon/pc87360 into agreement with
Documentation/hwmon/sysfs-interface.

Patchset adds separate limit alarms for voltages and temps, it also adds
temp[123]_fault files.  On my Soekris, temps 1,2 are unused/unconnected,
so temp[123]_fault = 1,1,0 respectively.  This agrees with
/usr/bin/sensors, which has always shown them as OPEN.  Temps 4,5,6 are
thermistor based, and dont have a fault bit in their status register.

This patch:

2 different kinds of constants added:
- CHAN_ALM_* constants for (later) vin, temp alarm callbacks.
- CHAN_* conversion constants, used in _init_device, partly for RW1C bits

Signed-off-by: Jim Cromie <jim.cromie@gmail.com>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: "Mark M. Hoffman" <mhoffman@lightlink.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/hwmon/pc87360.c | 33 ++++++++++++++++++++++-----------
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/drivers/hwmon/pc87360.c b/drivers/hwmon/pc87360.c
index 9b462bb13fa3..1043b116bded 100644
--- a/drivers/hwmon/pc87360.c
+++ b/drivers/hwmon/pc87360.c
@@ -489,6 +489,11 @@ static struct sensor_device_attribute in_max[] = {
 	SENSOR_ATTR(in10_max, S_IWUSR | S_IRUGO, show_in_max, set_in_max, 10),
 };
 
+/* (temp & vin) channel status register alarm bits (pdf sec.11.5.12) */
+#define CHAN_ALM_MIN	0x02	/* min limit crossed */
+#define CHAN_ALM_MAX	0x04	/* max limit exceeded */
+#define TEMP_ALM_CRIT	0x08	/* temp crit exceeded (temp only) */
+
 #define VIN_UNIT_ATTRS(X) \
 	&in_input[X].dev_attr.attr,	\
 	&in_status[X].dev_attr.attr,	\
@@ -1131,6 +1136,12 @@ static void pc87360_write_value(struct pc87360_data *data, u8 ldi, u8 bank,
 	mutex_unlock(&(data->lock));
 }
 
+/* (temp & vin) channel conversion status register flags (pdf sec.11.5.12) */
+#define CHAN_CNVRTD	0x80	/* new data ready */
+#define CHAN_ENA	0x01	/* enabled channel (temp or vin) */
+#define CHAN_ALM_ENA	0x10	/* propagate to alarms-reg ?? (chk val!) */
+#define CHAN_READY	(CHAN_ENA|CHAN_CNVRTD) /* sample ready mask */
+
 static void pc87360_init_device(struct platform_device *pdev,
 				int use_thermistors)
 {
@@ -1156,7 +1167,7 @@ static void pc87360_init_device(struct platform_device *pdev,
 			/* Forcibly enable voltage channel */
 			reg = pc87360_read_value(data, LD_IN, i,
 						 PC87365_REG_IN_STATUS);
-			if (!(reg & 0x01)) {
+			if (!(reg & CHAN_ENA)) {
 				dev_dbg(&pdev->dev, "Forcibly "
 					"enabling in%d\n", i);
 				pc87360_write_value(data, LD_IN, i,
@@ -1171,7 +1182,7 @@ static void pc87360_init_device(struct platform_device *pdev,
 	for (i = 11; i < data->innr; i++) {
 		reg = pc87360_read_value(data, LD_IN, i,
 					 PC87365_REG_TEMP_STATUS);
-		use_thermistors = use_thermistors || (reg & 0x01);
+		use_thermistors = use_thermistors || (reg & CHAN_ENA);
 	}
 
 	i = use_thermistors ? 2 : 0;
@@ -1180,7 +1191,7 @@ static void pc87360_init_device(struct platform_device *pdev,
 			/* Forcibly enable temperature channel */
 			reg = pc87360_read_value(data, LD_TEMP, i,
 						 PC87365_REG_TEMP_STATUS);
-			if (!(reg & 0x01)) {
+			if (!(reg & CHAN_ENA)) {
 				dev_dbg(&pdev->dev, "Forcibly "
 					"enabling temp%d\n", i+1);
 				pc87360_write_value(data, LD_TEMP, i,
@@ -1197,7 +1208,7 @@ static void pc87360_init_device(struct platform_device *pdev,
 				   diodes */
 				reg = pc87360_read_value(data, LD_TEMP,
 				      (i-11)/2, PC87365_REG_TEMP_STATUS);
-				if (reg & 0x01) {
+				if (reg & CHAN_ENA) {
 					dev_dbg(&pdev->dev, "Skipping "
 						"temp%d, pin already in use "
 						"by temp%d\n", i-7, (i-11)/2);
@@ -1207,7 +1218,7 @@ static void pc87360_init_device(struct platform_device *pdev,
 				/* Forcibly enable thermistor channel */
 				reg = pc87360_read_value(data, LD_IN, i,
 							 PC87365_REG_IN_STATUS);
-				if (!(reg & 0x01)) {
+				if (!(reg & CHAN_ENA)) {
 					dev_dbg(&pdev->dev, "Forcibly "
 						"enabling temp%d\n", i-7);
 					pc87360_write_value(data, LD_IN, i,
@@ -1221,7 +1232,7 @@ static void pc87360_init_device(struct platform_device *pdev,
 	if (data->innr) {
 		reg = pc87360_read_value(data, LD_IN, NO_BANK,
 					 PC87365_REG_IN_CONFIG);
-		if (reg & 0x01) {
+		if (reg & CHAN_ENA) {
 			dev_dbg(&pdev->dev, "Forcibly "
 				"enabling monitoring (VLM)\n");
 			pc87360_write_value(data, LD_IN, NO_BANK,
@@ -1233,7 +1244,7 @@ static void pc87360_init_device(struct platform_device *pdev,
 	if (data->tempnr) {
 		reg = pc87360_read_value(data, LD_TEMP, NO_BANK,
 					 PC87365_REG_TEMP_CONFIG);
-		if (reg & 0x01) {
+		if (reg & CHAN_ENA) {
 			dev_dbg(&pdev->dev, "Forcibly enabling "
 				"monitoring (TMS)\n");
 			pc87360_write_value(data, LD_TEMP, NO_BANK,
@@ -1336,11 +1347,11 @@ static struct pc87360_data *pc87360_update_device(struct device *dev)
 			pc87360_write_value(data, LD_IN, i,
 					    PC87365_REG_IN_STATUS,
 					    data->in_status[i]);
-			if ((data->in_status[i] & 0x81) == 0x81) {
+			if ((data->in_status[i] & CHAN_READY) == CHAN_READY) {
 				data->in[i] = pc87360_read_value(data, LD_IN,
 					      i, PC87365_REG_IN);
 			}
-			if (data->in_status[i] & 0x01) {
+			if (data->in_status[i] & CHAN_ENA) {
 				data->in_min[i] = pc87360_read_value(data,
 						  LD_IN, i,
 						  PC87365_REG_IN_MIN);
@@ -1373,12 +1384,12 @@ static struct pc87360_data *pc87360_update_device(struct device *dev)
 			pc87360_write_value(data, LD_TEMP, i,
 					    PC87365_REG_TEMP_STATUS,
 					    data->temp_status[i]);
-			if ((data->temp_status[i] & 0x81) == 0x81) {
+			if ((data->temp_status[i] & CHAN_READY) == CHAN_READY) {
 				data->temp[i] = pc87360_read_value(data,
 						LD_TEMP, i,
 						PC87365_REG_TEMP);
 			}
-			if (data->temp_status[i] & 0x01) {
+			if (data->temp_status[i] & CHAN_ENA) {
 				data->temp_min[i] = pc87360_read_value(data,
 						    LD_TEMP, i,
 						    PC87365_REG_TEMP_MIN);
-- 
GitLab


From 492e9657d1a790f44ab9b0e03fc72c1d8145f590 Mon Sep 17 00:00:00 2001
From: Jim Cromie <jim.cromie@gmail.com>
Date: Sat, 18 Oct 2008 20:27:32 -0700
Subject: [PATCH 749/895] hwmon/pc87360 separate alarm files: add
 in-min/max-alarms

Adds vin-min/max-alarm callbacks, sensor-device-attribute declarations,
and refs to those new decls in the macro used to initialize the vin_group
(of sysfs files)

[akpm@linux-foundation.org: cleanups]
Signed-off-by: Jim Cromie <jim.cromie@gmail.com>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: "Mark M. Hoffman" <mhoffman@lightlink.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/hwmon/pc87360.c | 52 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 51 insertions(+), 1 deletion(-)

diff --git a/drivers/hwmon/pc87360.c b/drivers/hwmon/pc87360.c
index 1043b116bded..3a7d7fa64ebc 100644
--- a/drivers/hwmon/pc87360.c
+++ b/drivers/hwmon/pc87360.c
@@ -494,11 +494,61 @@ static struct sensor_device_attribute in_max[] = {
 #define CHAN_ALM_MAX	0x04	/* max limit exceeded */
 #define TEMP_ALM_CRIT	0x08	/* temp crit exceeded (temp only) */
 
+/* show_in_min/max_alarm() reads data from the per-channel status
+   register (sec 11.5.12), not the vin event status registers (sec
+   11.5.2) that (legacy) show_in_alarm() resds (via data->in_alarms) */
+
+static ssize_t show_in_min_alarm(struct device *dev,
+			struct device_attribute *devattr, char *buf)
+{
+	struct pc87360_data *data = pc87360_update_device(dev);
+	unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+	return sprintf(buf, "%u\n", !!(data->in_status[nr] & CHAN_ALM_MIN));
+}
+static ssize_t show_in_max_alarm(struct device *dev,
+			struct device_attribute *devattr, char *buf)
+{
+	struct pc87360_data *data = pc87360_update_device(dev);
+	unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+	return sprintf(buf, "%u\n", !!(data->in_status[nr] & CHAN_ALM_MAX));
+}
+
+static struct sensor_device_attribute in_min_alarm[] = {
+	SENSOR_ATTR(in0_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 0),
+	SENSOR_ATTR(in1_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 1),
+	SENSOR_ATTR(in2_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 2),
+	SENSOR_ATTR(in3_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 3),
+	SENSOR_ATTR(in4_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 4),
+	SENSOR_ATTR(in5_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 5),
+	SENSOR_ATTR(in6_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 6),
+	SENSOR_ATTR(in7_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 7),
+	SENSOR_ATTR(in8_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 8),
+	SENSOR_ATTR(in9_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 9),
+	SENSOR_ATTR(in10_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 10),
+};
+static struct sensor_device_attribute in_max_alarm[] = {
+	SENSOR_ATTR(in0_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 0),
+	SENSOR_ATTR(in1_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 1),
+	SENSOR_ATTR(in2_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 2),
+	SENSOR_ATTR(in3_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 3),
+	SENSOR_ATTR(in4_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 4),
+	SENSOR_ATTR(in5_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 5),
+	SENSOR_ATTR(in6_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 6),
+	SENSOR_ATTR(in7_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 7),
+	SENSOR_ATTR(in8_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 8),
+	SENSOR_ATTR(in9_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 9),
+	SENSOR_ATTR(in10_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 10),
+};
+
 #define VIN_UNIT_ATTRS(X) \
 	&in_input[X].dev_attr.attr,	\
 	&in_status[X].dev_attr.attr,	\
 	&in_min[X].dev_attr.attr,	\
-	&in_max[X].dev_attr.attr
+	&in_max[X].dev_attr.attr,	\
+	&in_min_alarm[X].dev_attr.attr,	\
+	&in_max_alarm[X].dev_attr.attr
 
 static ssize_t show_vid(struct device *dev, struct device_attribute *attr, char *buf)
 {
-- 
GitLab


From b267e8cdc638dfa53360d532aa56afa0ec12e6a5 Mon Sep 17 00:00:00 2001
From: Jim Cromie <jim.cromie@gmail.com>
Date: Sat, 18 Oct 2008 20:27:32 -0700
Subject: [PATCH 750/895] hwmon/pc87360 separate alarm files: add
 temp-min/max/crit/fault-alarms

Adds temp-min/max/crit/fault-alarm callbacks, sensor-device-attribute
declarations, and refs to those new decls in the macro used to initialize
the temp_group (of sysfs files)

[akpm@linux-foundation.org: cleanups]
Signed-off-by: Jim Cromie <jim.cromie@gmail.com>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: "Mark M. Hoffman" <mhoffman@lightlink.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/hwmon/pc87360.c | 84 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 80 insertions(+), 4 deletions(-)

diff --git a/drivers/hwmon/pc87360.c b/drivers/hwmon/pc87360.c
index 3a7d7fa64ebc..c80ee5f056ba 100644
--- a/drivers/hwmon/pc87360.c
+++ b/drivers/hwmon/pc87360.c
@@ -845,12 +845,76 @@ static ssize_t show_temp_alarms(struct device *dev, struct device_attribute *att
 }
 static DEVICE_ATTR(alarms_temp, S_IRUGO, show_temp_alarms, NULL);
 
+/* show_temp_min/max_alarm() reads data from the per-channel status
+   register (sec 12.3.7), not the temp event status registers (sec
+   12.3.2) that show_temp_alarm() reads (via data->temp_alarms) */
+
+static ssize_t show_temp_min_alarm(struct device *dev,
+			struct device_attribute *devattr, char *buf)
+{
+	struct pc87360_data *data = pc87360_update_device(dev);
+	unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+	return sprintf(buf, "%u\n", !!(data->temp_status[nr] & CHAN_ALM_MIN));
+}
+static ssize_t show_temp_max_alarm(struct device *dev,
+			struct device_attribute *devattr, char *buf)
+{
+	struct pc87360_data *data = pc87360_update_device(dev);
+	unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+	return sprintf(buf, "%u\n", !!(data->temp_status[nr] & CHAN_ALM_MAX));
+}
+static ssize_t show_temp_crit_alarm(struct device *dev,
+			struct device_attribute *devattr, char *buf)
+{
+	struct pc87360_data *data = pc87360_update_device(dev);
+	unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+	return sprintf(buf, "%u\n", !!(data->temp_status[nr] & TEMP_ALM_CRIT));
+}
+
+static struct sensor_device_attribute temp_min_alarm[] = {
+	SENSOR_ATTR(temp1_min_alarm, S_IRUGO, show_temp_min_alarm, NULL, 0),
+	SENSOR_ATTR(temp2_min_alarm, S_IRUGO, show_temp_min_alarm, NULL, 1),
+	SENSOR_ATTR(temp3_min_alarm, S_IRUGO, show_temp_min_alarm, NULL, 2),
+};
+static struct sensor_device_attribute temp_max_alarm[] = {
+	SENSOR_ATTR(temp1_max_alarm, S_IRUGO, show_temp_max_alarm, NULL, 0),
+	SENSOR_ATTR(temp2_max_alarm, S_IRUGO, show_temp_max_alarm, NULL, 1),
+	SENSOR_ATTR(temp3_max_alarm, S_IRUGO, show_temp_max_alarm, NULL, 2),
+};
+static struct sensor_device_attribute temp_crit_alarm[] = {
+	SENSOR_ATTR(temp1_crit_alarm, S_IRUGO, show_temp_crit_alarm, NULL, 0),
+	SENSOR_ATTR(temp2_crit_alarm, S_IRUGO, show_temp_crit_alarm, NULL, 1),
+	SENSOR_ATTR(temp3_crit_alarm, S_IRUGO, show_temp_crit_alarm, NULL, 2),
+};
+
+#define TEMP_FAULT	0x40	/* open diode */
+static ssize_t show_temp_fault(struct device *dev,
+			struct device_attribute *devattr, char *buf)
+{
+	struct pc87360_data *data = pc87360_update_device(dev);
+	unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+	return sprintf(buf, "%u\n", !!(data->temp_status[nr] & TEMP_FAULT));
+}
+static struct sensor_device_attribute temp_fault[] = {
+	SENSOR_ATTR(temp1_fault, S_IRUGO, show_temp_fault, NULL, 0),
+	SENSOR_ATTR(temp2_fault, S_IRUGO, show_temp_fault, NULL, 1),
+	SENSOR_ATTR(temp3_fault, S_IRUGO, show_temp_fault, NULL, 2),
+};
+
 #define TEMP_UNIT_ATTRS(X) \
 	&temp_input[X].dev_attr.attr,	\
 	&temp_status[X].dev_attr.attr,	\
 	&temp_min[X].dev_attr.attr,	\
 	&temp_max[X].dev_attr.attr,	\
-	&temp_crit[X].dev_attr.attr
+	&temp_crit[X].dev_attr.attr,	\
+	&temp_min_alarm[X].dev_attr.attr, \
+	&temp_max_alarm[X].dev_attr.attr, \
+	&temp_crit_alarm[X].dev_attr.attr, \
+	&temp_fault[X].dev_attr.attr
 
 static struct attribute * pc8736x_temp_attr_array[] = {
 	TEMP_UNIT_ATTRS(0),
@@ -864,8 +928,8 @@ static const struct attribute_group pc8736x_temp_group = {
 	.attrs = pc8736x_temp_attr_array,
 };
 
-static ssize_t show_name(struct device *dev, struct device_attribute
-			 *devattr, char *buf)
+static ssize_t show_name(struct device *dev,
+			struct device_attribute *devattr, char *buf)
 {
 	struct pc87360_data *data = dev_get_drvdata(dev);
 	return sprintf(buf, "%s\n", data->name);
@@ -1086,7 +1150,15 @@ static int __devinit pc87360_probe(struct platform_device *pdev)
 			    || (err = device_create_file(dev,
 					&temp_crit[i].dev_attr))
 			    || (err = device_create_file(dev,
-					&temp_status[i].dev_attr)))
+					&temp_status[i].dev_attr))
+			    || (err = device_create_file(dev,
+					&temp_min_alarm[i].dev_attr))
+			    || (err = device_create_file(dev,
+					&temp_max_alarm[i].dev_attr))
+			    || (err = device_create_file(dev,
+					&temp_crit_alarm[i].dev_attr))
+			    || (err = device_create_file(dev,
+					&temp_fault[i].dev_attr)))
 				goto ERROR3;
 		}
 		if ((err = device_create_file(dev, &dev_attr_alarms_temp)))
@@ -1192,6 +1264,10 @@ static void pc87360_write_value(struct pc87360_data *data, u8 ldi, u8 bank,
 #define CHAN_ALM_ENA	0x10	/* propagate to alarms-reg ?? (chk val!) */
 #define CHAN_READY	(CHAN_ENA|CHAN_CNVRTD) /* sample ready mask */
 
+#define TEMP_OTS_OE	0x20	/* OTS Output Enable */
+#define VIN_RW1C_MASK	(CHAN_READY|CHAN_ALM_MAX|CHAN_ALM_MIN)   /* 0x87 */
+#define TEMP_RW1C_MASK	(VIN_RW1C_MASK|TEMP_ALM_CRIT|TEMP_FAULT) /* 0xCF */
+
 static void pc87360_init_device(struct platform_device *pdev,
 				int use_thermistors)
 {
-- 
GitLab


From 2a32ec2500514109754e07721d9d5d431a706a31 Mon Sep 17 00:00:00 2001
From: Jim Cromie <jim.cromie@gmail.com>
Date: Sat, 18 Oct 2008 20:27:33 -0700
Subject: [PATCH 751/895] hwmon/pc87360 separate alarm files: define LDNI_MAX
 const

Driver handles 3 logical devices in fixed length array.  Give this a
define-d constant.

Signed-off-by: Jim Cromie <jim.cromie@gmail.com>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: "Mark M. Hoffman" <mhoffman@lightlink.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/hwmon/pc87360.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/hwmon/pc87360.c b/drivers/hwmon/pc87360.c
index c80ee5f056ba..7153b630ad4c 100644
--- a/drivers/hwmon/pc87360.c
+++ b/drivers/hwmon/pc87360.c
@@ -75,7 +75,8 @@ MODULE_PARM_DESC(force_id, "Override the detected device ID");
 #define FSCM	0x09	/* Logical device: fans */
 #define VLM	0x0d	/* Logical device: voltages */
 #define TMS	0x0e	/* Logical device: temperatures */
-static const u8 logdev[3] = { FSCM, VLM, TMS };
+#define LDNI_MAX 3
+static const u8 logdev[LDNI_MAX] = { FSCM, VLM, TMS };
 
 #define LD_FAN		0
 #define LD_IN		1
@@ -1074,7 +1075,7 @@ static int __devinit pc87360_probe(struct platform_device *pdev)
 	mutex_init(&data->update_lock);
 	platform_set_drvdata(pdev, data);
 
-	for (i = 0; i < 3; i++) {
+	for (i = 0; i < LDNI_MAX; i++) {
 		if (((data->address[i] = extra_isa[i]))
 		 && !request_region(extra_isa[i], PC87360_EXTENT,
 		 		    pc87360_driver.driver.name)) {
-- 
GitLab


From 8ca136741e15e8546e787ae92a7f4aeca84be494 Mon Sep 17 00:00:00 2001
From: Jim Cromie <jim.cromie@gmail.com>
Date: Sat, 18 Oct 2008 20:27:34 -0700
Subject: [PATCH 752/895] hwmon/pc87360 separate alarm files: add dev_dbg help

temp and vin status register values may be set by chip specifications, set
again by bios, or by this previously loaded driver.  Debug output nicely
displays modprobe init=\d actions.

Signed-off-by: Jim Cromie <jim.cromie@gmail.com>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: "Mark M. Hoffman" <mhoffman@lightlink.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/hwmon/pc87360.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/hwmon/pc87360.c b/drivers/hwmon/pc87360.c
index 7153b630ad4c..6804a3570f39 100644
--- a/drivers/hwmon/pc87360.c
+++ b/drivers/hwmon/pc87360.c
@@ -1290,10 +1290,11 @@ static void pc87360_init_device(struct platform_device *pdev,
 
 	nr = data->innr < 11 ? data->innr : 11;
 	for (i = 0; i < nr; i++) {
+		reg = pc87360_read_value(data, LD_IN, i,
+					 PC87365_REG_IN_STATUS);
+		dev_dbg(&pdev->dev, "bios in%d status:0x%02x\n", i, reg);
 		if (init >= init_in[i]) {
 			/* Forcibly enable voltage channel */
-			reg = pc87360_read_value(data, LD_IN, i,
-						 PC87365_REG_IN_STATUS);
 			if (!(reg & CHAN_ENA)) {
 				dev_dbg(&pdev->dev, "Forcibly "
 					"enabling in%d\n", i);
@@ -1306,18 +1307,23 @@ static void pc87360_init_device(struct platform_device *pdev,
 
 	/* We can't blindly trust the Super-I/O space configuration bit,
 	   most BIOS won't set it properly */
+	dev_dbg(&pdev->dev, "bios thermistors:%d\n", use_thermistors);
 	for (i = 11; i < data->innr; i++) {
 		reg = pc87360_read_value(data, LD_IN, i,
 					 PC87365_REG_TEMP_STATUS);
 		use_thermistors = use_thermistors || (reg & CHAN_ENA);
+		/* thermistors are temp[4-6], measured on vin[11-14] */
+		dev_dbg(&pdev->dev, "bios temp%d_status:0x%02x\n", i-7, reg);
 	}
+	dev_dbg(&pdev->dev, "using thermistors:%d\n", use_thermistors);
 
 	i = use_thermistors ? 2 : 0;
 	for (; i < data->tempnr; i++) {
+		reg = pc87360_read_value(data, LD_TEMP, i,
+					 PC87365_REG_TEMP_STATUS);
+		dev_dbg(&pdev->dev, "bios temp%d_status:0x%02x\n", i+1, reg);
 		if (init >= init_temp[i]) {
 			/* Forcibly enable temperature channel */
-			reg = pc87360_read_value(data, LD_TEMP, i,
-						 PC87365_REG_TEMP_STATUS);
 			if (!(reg & CHAN_ENA)) {
 				dev_dbg(&pdev->dev, "Forcibly "
 					"enabling temp%d\n", i+1);
@@ -1359,6 +1365,7 @@ static void pc87360_init_device(struct platform_device *pdev,
 	if (data->innr) {
 		reg = pc87360_read_value(data, LD_IN, NO_BANK,
 					 PC87365_REG_IN_CONFIG);
+		dev_dbg(&pdev->dev, "bios vin-cfg:0x%02x\n", reg);
 		if (reg & CHAN_ENA) {
 			dev_dbg(&pdev->dev, "Forcibly "
 				"enabling monitoring (VLM)\n");
@@ -1371,6 +1378,7 @@ static void pc87360_init_device(struct platform_device *pdev,
 	if (data->tempnr) {
 		reg = pc87360_read_value(data, LD_TEMP, NO_BANK,
 					 PC87365_REG_TEMP_CONFIG);
+		dev_dbg(&pdev->dev, "bios temp-cfg:0x%02x\n", reg);
 		if (reg & CHAN_ENA) {
 			dev_dbg(&pdev->dev, "Forcibly enabling "
 				"monitoring (TMS)\n");
-- 
GitLab


From 865c295360f61c2f3634fb1387b4468fdc8287da Mon Sep 17 00:00:00 2001
From: Jim Cromie <jim.cromie@gmail.com>
Date: Sat, 18 Oct 2008 20:27:35 -0700
Subject: [PATCH 753/895] hwmon/pc87360 separate alarm files: add
 therm-min/max/crit-alarms

Adds therm-min/max/crit-alarm callbacks, sensor-device-attribute
declarations, and refs to those new decls in the macro used to initialize
the therm_group (of sysfs files)

The thermistors use voltage channels to measure; so they don't have a
fault-alarm, but unlike the other voltages, they do have an overtemp,
which we call crit (by convention).

[akpm@linux-foundation.org: cleanup]
Signed-off-by: Jim Cromie <jim.cromie@gmail.com>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: "Mark M. Hoffman" <mhoffman@lightlink.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/hwmon/pc87360.c | 58 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 57 insertions(+), 1 deletion(-)

diff --git a/drivers/hwmon/pc87360.c b/drivers/hwmon/pc87360.c
index 6804a3570f39..5fbfa34c110e 100644
--- a/drivers/hwmon/pc87360.c
+++ b/drivers/hwmon/pc87360.c
@@ -714,12 +714,68 @@ static struct sensor_device_attribute therm_crit[] = {
 		    show_therm_crit, set_therm_crit, 2+11),
 };
 
+/* show_therm_min/max_alarm() reads data from the per-channel voltage
+   status register (sec 11.5.12) */
+
+static ssize_t show_therm_min_alarm(struct device *dev,
+				struct device_attribute *devattr, char *buf)
+{
+	struct pc87360_data *data = pc87360_update_device(dev);
+	unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+	return sprintf(buf, "%u\n", !!(data->in_status[nr] & CHAN_ALM_MIN));
+}
+static ssize_t show_therm_max_alarm(struct device *dev,
+				struct device_attribute *devattr, char *buf)
+{
+	struct pc87360_data *data = pc87360_update_device(dev);
+	unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+	return sprintf(buf, "%u\n", !!(data->in_status[nr] & CHAN_ALM_MAX));
+}
+static ssize_t show_therm_crit_alarm(struct device *dev,
+				struct device_attribute *devattr, char *buf)
+{
+	struct pc87360_data *data = pc87360_update_device(dev);
+	unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+	return sprintf(buf, "%u\n", !!(data->in_status[nr] & TEMP_ALM_CRIT));
+}
+
+static struct sensor_device_attribute therm_min_alarm[] = {
+	SENSOR_ATTR(temp4_min_alarm, S_IRUGO,
+		    show_therm_min_alarm, NULL, 0+11),
+	SENSOR_ATTR(temp5_min_alarm, S_IRUGO,
+		    show_therm_min_alarm, NULL, 1+11),
+	SENSOR_ATTR(temp6_min_alarm, S_IRUGO,
+		    show_therm_min_alarm, NULL, 2+11),
+};
+static struct sensor_device_attribute therm_max_alarm[] = {
+	SENSOR_ATTR(temp4_max_alarm, S_IRUGO,
+		    show_therm_max_alarm, NULL, 0+11),
+	SENSOR_ATTR(temp5_max_alarm, S_IRUGO,
+		    show_therm_max_alarm, NULL, 1+11),
+	SENSOR_ATTR(temp6_max_alarm, S_IRUGO,
+		    show_therm_max_alarm, NULL, 2+11),
+};
+static struct sensor_device_attribute therm_crit_alarm[] = {
+	SENSOR_ATTR(temp4_crit_alarm, S_IRUGO,
+		    show_therm_crit_alarm, NULL, 0+11),
+	SENSOR_ATTR(temp5_crit_alarm, S_IRUGO,
+		    show_therm_crit_alarm, NULL, 1+11),
+	SENSOR_ATTR(temp6_crit_alarm, S_IRUGO,
+		    show_therm_crit_alarm, NULL, 2+11),
+};
+
 #define THERM_UNIT_ATTRS(X) \
 	&therm_input[X].dev_attr.attr,	\
 	&therm_status[X].dev_attr.attr,	\
 	&therm_min[X].dev_attr.attr,	\
 	&therm_max[X].dev_attr.attr,	\
-	&therm_crit[X].dev_attr.attr
+	&therm_crit[X].dev_attr.attr,	\
+	&therm_min_alarm[X].dev_attr.attr, \
+	&therm_max_alarm[X].dev_attr.attr, \
+	&therm_crit_alarm[X].dev_attr.attr
 
 static struct attribute * pc8736x_therm_attr_array[] = {
 	THERM_UNIT_ATTRS(0),
-- 
GitLab


From 05224091af06177c0ce7c1fae1f498455b47a6be Mon Sep 17 00:00:00 2001
From: Henrik Rydberg <rydberg@euromail.se>
Date: Sat, 18 Oct 2008 20:27:35 -0700
Subject: [PATCH 754/895] hwmon: applesmc: specified number of bytes to read
 should match actual

At one single place in the code, the specified number of bytes to read and
the actual number of bytes read differ by one.  This one-liner patch fixes
that inconsistency.

Signed-off-by: Henrik Rydberg <rydberg@euromail.se>
Cc: Nicolas Boichat <nicolas@boichat.ch>
Cc: Riki Oktarianto <rkoktarianto@gmail.com>
Cc: Mark M. Hoffman <mhoffman@lightlink.com>
Cc: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/hwmon/applesmc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
index b06b8e090a27..b401975bc0da 100644
--- a/drivers/hwmon/applesmc.c
+++ b/drivers/hwmon/applesmc.c
@@ -325,7 +325,7 @@ static int applesmc_get_key_type(char* key, char* type)
 			return -EIO;
 	}
 
-	outb(5, APPLESMC_DATA_PORT);
+	outb(6, APPLESMC_DATA_PORT);
 
 	for (i = 0; i < 6; i++) {
 		if (__wait_status(0x05))
-- 
GitLab


From 84d2d7f2ee98dc9bd799790c89b5eb8280af2d37 Mon Sep 17 00:00:00 2001
From: Henrik Rydberg <rydberg@euromail.se>
Date: Sat, 18 Oct 2008 20:27:38 -0700
Subject: [PATCH 755/895] hwmon: applesmc: fix the 'wait status failed: c != 8'
 problem

On many Macbooks since mid 2007, the Pro, C2D and Air models, applesmc
fails to read some or all SMC ports.  This problem has various effects,
such as flooded logfiles, malfunctioning temperature sensors,
accelerometers failing to initialize, and difficulties getting backlight
functionality to work properly.

The root of the problem seems to be the command protocol.  The current
code sends out a command byte, then repeatedly polls for an ack before
continuing to send or recieve data.  From experiments leading to this
patch, it seems the command protocol never quite worked or changed so that
one now sends a command byte, waits a little bit, polls for an ack, and if
it fails, repeats the whole thing by sending the command byte again.

This patch implements a send_command function according to the new
interpretation of the protocol, and should work also for earlier models.

Signed-off-by: Henrik Rydberg <rydberg@euromail.se>
Cc: Nicolas Boichat <nicolas@boichat.ch>
Cc: Riki Oktarianto <rkoktarianto@gmail.com>
Cc: Mark M. Hoffman <mhoffman@lightlink.com>
Cc: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/hwmon/applesmc.c | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
index b401975bc0da..d5bd0cadbf31 100644
--- a/drivers/hwmon/applesmc.c
+++ b/drivers/hwmon/applesmc.c
@@ -190,6 +190,26 @@ static int __wait_status(u8 val)
 	return -EIO;
 }
 
+/*
+ * special treatment of command port - on newer macbooks, it seems necessary
+ * to resend the command byte before polling the status again. Callers must
+ * hold applesmc_lock.
+ */
+static int send_command(u8 cmd)
+{
+	int i;
+	for (i = 0; i < 1000; i++) {
+		outb(cmd, APPLESMC_CMD_PORT);
+		udelay(5);
+		if ((inb(APPLESMC_CMD_PORT) & APPLESMC_STATUS_MASK) == 0x0c)
+			return 0;
+		udelay(5);
+	}
+	printk(KERN_WARNING "applesmc: command failed: %x -> %x\n",
+		cmd, inb(APPLESMC_CMD_PORT));
+	return -EIO;
+}
+
 /*
  * applesmc_read_key - reads len bytes from a given key, and put them in buffer.
  * Returns zero on success or a negative error on failure. Callers must
@@ -205,8 +225,7 @@ static int applesmc_read_key(const char* key, u8* buffer, u8 len)
 		return -EINVAL;
 	}
 
-	outb(APPLESMC_READ_CMD, APPLESMC_CMD_PORT);
-	if (__wait_status(0x0c))
+	if (send_command(APPLESMC_READ_CMD))
 		return -EIO;
 
 	for (i = 0; i < 4; i++) {
@@ -249,8 +268,7 @@ static int applesmc_write_key(const char* key, u8* buffer, u8 len)
 		return -EINVAL;
 	}
 
-	outb(APPLESMC_WRITE_CMD, APPLESMC_CMD_PORT);
-	if (__wait_status(0x0c))
+	if (send_command(APPLESMC_WRITE_CMD))
 		return -EIO;
 
 	for (i = 0; i < 4; i++) {
@@ -284,8 +302,7 @@ static int applesmc_get_key_at_index(int index, char* key)
 	readkey[2] = index >> 8;
 	readkey[3] = index;
 
-	outb(APPLESMC_GET_KEY_BY_INDEX_CMD, APPLESMC_CMD_PORT);
-	if (__wait_status(0x0c))
+	if (send_command(APPLESMC_GET_KEY_BY_INDEX_CMD))
 		return -EIO;
 
 	for (i = 0; i < 4; i++) {
@@ -315,8 +332,7 @@ static int applesmc_get_key_type(char* key, char* type)
 {
 	int i;
 
-	outb(APPLESMC_GET_KEY_TYPE_CMD, APPLESMC_CMD_PORT);
-	if (__wait_status(0x0c))
+	if (send_command(APPLESMC_GET_KEY_TYPE_CMD))
 		return -EIO;
 
 	for (i = 0; i < 4; i++) {
-- 
GitLab


From 02fcbd144d684167aa67b1d3ad68f18d265f2d08 Mon Sep 17 00:00:00 2001
From: Henrik Rydberg <rydberg@euromail.se>
Date: Sat, 18 Oct 2008 20:27:39 -0700
Subject: [PATCH 756/895] hwmon: applesmc: prolong status wait

The time to wait for a status change while reading or writing to the SMC
ports is a balance between read reliability and system performance.  The
current setting yields rougly three errors in a thousand when
simultaneously reading three different temperature values on a Macbook
Air.  This patch increases the setting to a value yielding roughly one
error in ten thousand, with no noticable system performance degradation.

Signed-off-by: Henrik Rydberg <rydberg@euromail.se>
Cc: Nicolas Boichat <nicolas@boichat.ch>
Cc: Riki Oktarianto <rkoktarianto@gmail.com>
Cc: Mark M. Hoffman <mhoffman@lightlink.com>
Cc: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/hwmon/applesmc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
index d5bd0cadbf31..9a43c4b60ea9 100644
--- a/drivers/hwmon/applesmc.c
+++ b/drivers/hwmon/applesmc.c
@@ -163,7 +163,7 @@ static unsigned int key_at_index;
 static struct workqueue_struct *applesmc_led_wq;
 
 /*
- * __wait_status - Wait up to 2ms for the status port to get a certain value
+ * __wait_status - Wait up to 10ms for the status port to get a certain value
  * (masked with 0x0f), returning zero if the value is obtained.  Callers must
  * hold applesmc_lock.
  */
@@ -173,7 +173,7 @@ static int __wait_status(u8 val)
 
 	val = val & APPLESMC_STATUS_MASK;
 
-	for (i = 0; i < 200; i++) {
+	for (i = 0; i < 1000; i++) {
 		if ((inb(APPLESMC_CMD_PORT) & APPLESMC_STATUS_MASK) == val) {
 			if (debug)
 				printk(KERN_DEBUG
-- 
GitLab


From 8bd1a12a51871f0c870612d8220b1485d6aefc73 Mon Sep 17 00:00:00 2001
From: Henrik Rydberg <rydberg@euromail.se>
Date: Sat, 18 Oct 2008 20:27:39 -0700
Subject: [PATCH 757/895] hwmon: applesmc: allow for variable ALV0 and ALV1
 package length

On some recent Macbooks, the package length for the light sensors ALV0 and
ALV1 has changed from 6 to 10.  This patch allows for a variable package
length encompassing both variants.

Signed-off-by: Henrik Rydberg <rydberg@euromail.se>
Cc: Nicolas Boichat <nicolas@boichat.ch>
Cc: Riki Oktarianto <rkoktarianto@gmail.com>
Cc: Mark M. Hoffman <mhoffman@lightlink.com>
Cc: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/hwmon/applesmc.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
index 9a43c4b60ea9..de696b67a4fc 100644
--- a/drivers/hwmon/applesmc.c
+++ b/drivers/hwmon/applesmc.c
@@ -57,8 +57,8 @@
 
 #define KEY_COUNT_KEY		"#KEY" /* r-o ui32 */
 
-#define LIGHT_SENSOR_LEFT_KEY	"ALV0" /* r-o {alv (6 bytes) */
-#define LIGHT_SENSOR_RIGHT_KEY	"ALV1" /* r-o {alv (6 bytes) */
+#define LIGHT_SENSOR_LEFT_KEY	"ALV0" /* r-o {alv (6-10 bytes) */
+#define LIGHT_SENSOR_RIGHT_KEY	"ALV1" /* r-o {alv (6-10 bytes) */
 #define BACKLIGHT_KEY		"LKSB" /* w-o {lkb (2 bytes) */
 
 #define CLAMSHELL_KEY		"MSLD" /* r-o ui8 (unused) */
@@ -543,17 +543,27 @@ out:
 static ssize_t applesmc_light_show(struct device *dev,
 				struct device_attribute *attr, char *sysfsbuf)
 {
+	static int data_length;
 	int ret;
 	u8 left = 0, right = 0;
-	u8 buffer[6];
+	u8 buffer[10], query[6];
 
 	mutex_lock(&applesmc_lock);
 
-	ret = applesmc_read_key(LIGHT_SENSOR_LEFT_KEY, buffer, 6);
+	if (!data_length) {
+		ret = applesmc_get_key_type(LIGHT_SENSOR_LEFT_KEY, query);
+		if (ret)
+			goto out;
+		data_length = clamp_val(query[0], 0, 10);
+		printk(KERN_INFO "applesmc: light sensor data length set to "
+			"%d\n", data_length);
+	}
+
+	ret = applesmc_read_key(LIGHT_SENSOR_LEFT_KEY, buffer, data_length);
 	left = buffer[2];
 	if (ret)
 		goto out;
-	ret = applesmc_read_key(LIGHT_SENSOR_RIGHT_KEY, buffer, 6);
+	ret = applesmc_read_key(LIGHT_SENSOR_RIGHT_KEY, buffer, data_length);
 	right = buffer[2];
 
 out:
-- 
GitLab


From f5274c972bac2d14c092a9c634e03f51785c7b76 Mon Sep 17 00:00:00 2001
From: Henrik Rydberg <rydberg@euromail.se>
Date: Sat, 18 Oct 2008 20:27:40 -0700
Subject: [PATCH 758/895] hwmon: applesmc: add support for Macbook Air

This patch adds accelerometer, backlight and temperature sensor support
for the Macbook Air.

Signed-off-by: Henrik Rydberg <rydberg@euromail.se>
Cc: Nicolas Boichat <nicolas@boichat.ch>
Cc: Riki Oktarianto <rkoktarianto@gmail.com>
Cc: Mark M. Hoffman <mhoffman@lightlink.com>
Cc: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/hwmon/applesmc.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
index de696b67a4fc..b57bd82504a7 100644
--- a/drivers/hwmon/applesmc.c
+++ b/drivers/hwmon/applesmc.c
@@ -104,6 +104,9 @@ static const char* temperature_sensors_sets[][36] = {
 /* Set 6: Macbook3 set */
 	{ "TB0T", "TC0D", "TC0P", "TM0P", "TN0P", "TTF0", "TW0P", "Th0H",
 	  "Th0S", "Th1H", NULL },
+/* Set 7: Macbook Air */
+	{ "TB0T", "TB1S", "TB1T", "TB2S", "TB2T", "TC0D", "TC0P", "TCFP",
+	  "TTF0", "TW0P", "Th0H", "Tp0P", "TpFP", "Ts0P", "Ts0S", NULL },
 };
 
 /* List of keys used to read/write fan speeds */
@@ -1259,11 +1262,17 @@ static __initdata struct dmi_match_data applesmc_dmi_data[] = {
 	{ .accelerometer = 0, .light = 0, .temperature_set = 5 },
 /* MacBook3: accelerometer and temperature set 6 */
 	{ .accelerometer = 1, .light = 0, .temperature_set = 6 },
+/* MacBook Air: accelerometer, backlight and temperature set 7 */
+	{ .accelerometer = 1, .light = 1, .temperature_set = 7 },
 };
 
 /* Note that DMI_MATCH(...,"MacBook") will match "MacBookPro1,1".
  * So we need to put "Apple MacBook Pro" before "Apple MacBook". */
 static __initdata struct dmi_system_id applesmc_whitelist[] = {
+	{ applesmc_dmi_match, "Apple MacBook Air", {
+	  DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
+	  DMI_MATCH(DMI_PRODUCT_NAME, "MacBookAir") },
+		(void*)&applesmc_dmi_data[7]},
 	{ applesmc_dmi_match, "Apple MacBook Pro", {
 	  DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
 	  DMI_MATCH(DMI_PRODUCT_NAME,"MacBookPro") },
-- 
GitLab


From 7b5e3cb28f52f42d8cf2b36771be580f33bbc7ae Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Sat, 18 Oct 2008 20:27:41 -0700
Subject: [PATCH 759/895] drivers/hwmon/applesmc.c: remove unneeded casts

dmi_system_id.driver_data is already void*.

Cc: Henrik Rydberg <rydberg@euromail.se>
Cc: Nicolas Boichat <nicolas@boichat.ch>
Cc: Riki Oktarianto <rkoktarianto@gmail.com>
Cc: Mark M. Hoffman <mhoffman@lightlink.com>
Cc: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/hwmon/applesmc.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
index b57bd82504a7..208520c6eb0e 100644
--- a/drivers/hwmon/applesmc.c
+++ b/drivers/hwmon/applesmc.c
@@ -1272,35 +1272,35 @@ static __initdata struct dmi_system_id applesmc_whitelist[] = {
 	{ applesmc_dmi_match, "Apple MacBook Air", {
 	  DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
 	  DMI_MATCH(DMI_PRODUCT_NAME, "MacBookAir") },
-		(void*)&applesmc_dmi_data[7]},
+		&applesmc_dmi_data[7]},
 	{ applesmc_dmi_match, "Apple MacBook Pro", {
 	  DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
 	  DMI_MATCH(DMI_PRODUCT_NAME,"MacBookPro") },
-		(void*)&applesmc_dmi_data[0]},
+		&applesmc_dmi_data[0]},
 	{ applesmc_dmi_match, "Apple MacBook (v2)", {
 	  DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
 	  DMI_MATCH(DMI_PRODUCT_NAME,"MacBook2") },
-		(void*)&applesmc_dmi_data[1]},
+		&applesmc_dmi_data[1]},
 	{ applesmc_dmi_match, "Apple MacBook (v3)", {
 	  DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
 	  DMI_MATCH(DMI_PRODUCT_NAME,"MacBook3") },
-		(void*)&applesmc_dmi_data[6]},
+		&applesmc_dmi_data[6]},
 	{ applesmc_dmi_match, "Apple MacBook", {
 	  DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
 	  DMI_MATCH(DMI_PRODUCT_NAME,"MacBook") },
-		(void*)&applesmc_dmi_data[2]},
+		&applesmc_dmi_data[2]},
 	{ applesmc_dmi_match, "Apple Macmini", {
 	  DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
 	  DMI_MATCH(DMI_PRODUCT_NAME,"Macmini") },
-		(void*)&applesmc_dmi_data[3]},
+		&applesmc_dmi_data[3]},
 	{ applesmc_dmi_match, "Apple MacPro2", {
 	  DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
 	  DMI_MATCH(DMI_PRODUCT_NAME,"MacPro2") },
-		(void*)&applesmc_dmi_data[4]},
+		&applesmc_dmi_data[4]},
 	{ applesmc_dmi_match, "Apple iMac", {
 	  DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
 	  DMI_MATCH(DMI_PRODUCT_NAME,"iMac") },
-		(void*)&applesmc_dmi_data[5]},
+		&applesmc_dmi_data[5]},
 	{ .ident = NULL }
 };
 
-- 
GitLab


From d7549905f157f217b888edb0f2055f5090eaf1d8 Mon Sep 17 00:00:00 2001
From: Henrik Rydberg <rydberg@euromail.se>
Date: Sat, 18 Oct 2008 20:27:41 -0700
Subject: [PATCH 760/895] hwmon: applesmc: Add support for Macbook Pro 4

Adds temperature sensor support for the Macbook Pro 4.

Signed-off-by: Henrik Rydberg <rydberg@euromail.se>
Cc: Nicolas Boichat <nicolas@boichat.ch>
Cc: Riki Oktarianto <rkoktarianto@gmail.com>
Cc: Mark M. Hoffman <mhoffman@lightlink.com>
Cc: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/hwmon/applesmc.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
index 208520c6eb0e..efca98a2e35f 100644
--- a/drivers/hwmon/applesmc.c
+++ b/drivers/hwmon/applesmc.c
@@ -107,6 +107,9 @@ static const char* temperature_sensors_sets[][36] = {
 /* Set 7: Macbook Air */
 	{ "TB0T", "TB1S", "TB1T", "TB2S", "TB2T", "TC0D", "TC0P", "TCFP",
 	  "TTF0", "TW0P", "Th0H", "Tp0P", "TpFP", "Ts0P", "Ts0S", NULL },
+/* Set 8: Macbook Pro 4,1 (Penryn) */
+	{ "TB0T", "TC0D", "TC0P", "TG0D", "TG0H", "TTF0", "TW0P", "Th0H",
+	  "Th1H", "Th2H", "Tm0P", "Ts0P", NULL },
 };
 
 /* List of keys used to read/write fan speeds */
@@ -1264,6 +1267,8 @@ static __initdata struct dmi_match_data applesmc_dmi_data[] = {
 	{ .accelerometer = 1, .light = 0, .temperature_set = 6 },
 /* MacBook Air: accelerometer, backlight and temperature set 7 */
 	{ .accelerometer = 1, .light = 1, .temperature_set = 7 },
+/* MacBook Pro 4: accelerometer, backlight and temperature set 8 */
+	{ .accelerometer = 1, .light = 1, .temperature_set = 8 },
 };
 
 /* Note that DMI_MATCH(...,"MacBook") will match "MacBookPro1,1".
@@ -1273,6 +1278,10 @@ static __initdata struct dmi_system_id applesmc_whitelist[] = {
 	  DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
 	  DMI_MATCH(DMI_PRODUCT_NAME, "MacBookAir") },
 		&applesmc_dmi_data[7]},
+	{ applesmc_dmi_match, "Apple MacBook Pro 4", {
+	  DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
+	  DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro4") },
+		&applesmc_dmi_data[8]},
 	{ applesmc_dmi_match, "Apple MacBook Pro", {
 	  DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
 	  DMI_MATCH(DMI_PRODUCT_NAME,"MacBookPro") },
-- 
GitLab


From 07e8dbd3ebbdedc71335049dd4b0d542cb038d7d Mon Sep 17 00:00:00 2001
From: Henrik Rydberg <rydberg@euromail.se>
Date: Sat, 18 Oct 2008 20:27:42 -0700
Subject: [PATCH 761/895] hwmon: applesmc: Add support for Macbook Pro 3

Add temperature sensor support for Macbook Pro 3.

Signed-off-by: Henrik Rydberg <rydberg@euromail.se>
Cc: Nicolas Boichat <nicolas@boichat.ch>
Cc: Riki Oktarianto <rkoktarianto@gmail.com>
Cc: Mark M. Hoffman <mhoffman@lightlink.com>
Cc: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/hwmon/applesmc.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
index efca98a2e35f..59266ce60cc2 100644
--- a/drivers/hwmon/applesmc.c
+++ b/drivers/hwmon/applesmc.c
@@ -110,6 +110,9 @@ static const char* temperature_sensors_sets[][36] = {
 /* Set 8: Macbook Pro 4,1 (Penryn) */
 	{ "TB0T", "TC0D", "TC0P", "TG0D", "TG0H", "TTF0", "TW0P", "Th0H",
 	  "Th1H", "Th2H", "Tm0P", "Ts0P", NULL },
+/* Set 9: Macbook Pro 3,1 (Santa Rosa) */
+	{ "TALP", "TB0T", "TC0D", "TC0P", "TG0D", "TG0H", "TTF0", "TW0P",
+	  "Th0H", "Th1H", "Th2H", "Tm0P", "Ts0P", NULL },
 };
 
 /* List of keys used to read/write fan speeds */
@@ -1269,6 +1272,8 @@ static __initdata struct dmi_match_data applesmc_dmi_data[] = {
 	{ .accelerometer = 1, .light = 1, .temperature_set = 7 },
 /* MacBook Pro 4: accelerometer, backlight and temperature set 8 */
 	{ .accelerometer = 1, .light = 1, .temperature_set = 8 },
+/* MacBook Pro 3: accelerometer, backlight and temperature set 9 */
+	{ .accelerometer = 1, .light = 1, .temperature_set = 9 },
 };
 
 /* Note that DMI_MATCH(...,"MacBook") will match "MacBookPro1,1".
@@ -1282,6 +1287,10 @@ static __initdata struct dmi_system_id applesmc_whitelist[] = {
 	  DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
 	  DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro4") },
 		&applesmc_dmi_data[8]},
+	{ applesmc_dmi_match, "Apple MacBook Pro 3", {
+	  DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
+	  DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro3") },
+		&applesmc_dmi_data[9]},
 	{ applesmc_dmi_match, "Apple MacBook Pro", {
 	  DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
 	  DMI_MATCH(DMI_PRODUCT_NAME,"MacBookPro") },
-- 
GitLab


From 8c9398d1e9766e3659e277acb2e8ca1c17684139 Mon Sep 17 00:00:00 2001
From: Henrik Rydberg <rydberg@euromail.se>
Date: Sat, 18 Oct 2008 20:27:43 -0700
Subject: [PATCH 762/895] hwmon: applesmc: lighter wait mechanism, drastic
 improvement

The read fail ratio is sensitive to the delay between the first byte
written and the first byte read; apparently the sensors cannot be rushed.
Increasing the minimum wait time, without changing the total wait time,
improves the fail ratio from a 8% chance that any of the sensors fails in
one read, down to 0.4%, on a Macbook Air.  On a Macbook Pro 3,1, the
effect is even more apparent.  By reducing the number of status polls, the
ratio is further improved to below 0.1%.  Finally, increasing the total
wait time brings the fail ratio down to virtually zero.

Signed-off-by: Henrik Rydberg <rydberg@euromail.se>
Tested-by: Bob McElrath <bob@mcelrath.org>
Cc: Nicolas Boichat <nicolas@boichat.ch>
Cc: "Mark M. Hoffman" <mhoffman@lightlink.com>
Cc: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/hwmon/applesmc.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
index 59266ce60cc2..bc011da79e14 100644
--- a/drivers/hwmon/applesmc.c
+++ b/drivers/hwmon/applesmc.c
@@ -49,6 +49,9 @@
 
 #define APPLESMC_MAX_DATA_LENGTH 32
 
+#define APPLESMC_MIN_WAIT	0x0040
+#define APPLESMC_MAX_WAIT	0x8000
+
 #define APPLESMC_STATUS_MASK	0x0f
 #define APPLESMC_READ_CMD	0x10
 #define APPLESMC_WRITE_CMD	0x11
@@ -172,25 +175,25 @@ static unsigned int key_at_index;
 static struct workqueue_struct *applesmc_led_wq;
 
 /*
- * __wait_status - Wait up to 10ms for the status port to get a certain value
+ * __wait_status - Wait up to 32ms for the status port to get a certain value
  * (masked with 0x0f), returning zero if the value is obtained.  Callers must
  * hold applesmc_lock.
  */
 static int __wait_status(u8 val)
 {
-	unsigned int i;
+	int us;
 
 	val = val & APPLESMC_STATUS_MASK;
 
-	for (i = 0; i < 1000; i++) {
+	for (us = APPLESMC_MIN_WAIT; us < APPLESMC_MAX_WAIT; us <<= 1) {
+		udelay(us);
 		if ((inb(APPLESMC_CMD_PORT) & APPLESMC_STATUS_MASK) == val) {
 			if (debug)
 				printk(KERN_DEBUG
-						"Waited %d us for status %x\n",
-						i*10, val);
+					"Waited %d us for status %x\n",
+					2 * us - APPLESMC_MIN_WAIT, val);
 			return 0;
 		}
-		udelay(10);
 	}
 
 	printk(KERN_WARNING "applesmc: wait status failed: %x != %x\n",
@@ -206,13 +209,12 @@ static int __wait_status(u8 val)
  */
 static int send_command(u8 cmd)
 {
-	int i;
-	for (i = 0; i < 1000; i++) {
+	int us;
+	for (us = APPLESMC_MIN_WAIT; us < APPLESMC_MAX_WAIT; us <<= 1) {
 		outb(cmd, APPLESMC_CMD_PORT);
-		udelay(5);
+		udelay(us);
 		if ((inb(APPLESMC_CMD_PORT) & APPLESMC_STATUS_MASK) == 0x0c)
 			return 0;
-		udelay(5);
 	}
 	printk(KERN_WARNING "applesmc: command failed: %x -> %x\n",
 		cmd, inb(APPLESMC_CMD_PORT));
-- 
GitLab


From da1cfe1ae48b2af7394718ec07194806db5dfe61 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Sat, 18 Oct 2008 20:27:44 -0700
Subject: [PATCH 763/895] Char: sx, remove bogus iomap

readl/writel are not expected to accept iomap return value. Replace
bogus mapping by standard ioremap.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Cc: <R.E.Wolff@BitWizard.nl>
Acked-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/sx.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/char/sx.c b/drivers/char/sx.c
index 5b8d7a1aa3e6..ba4e86281fbf 100644
--- a/drivers/char/sx.c
+++ b/drivers/char/sx.c
@@ -2504,7 +2504,7 @@ static void __devexit sx_remove_card(struct sx_board *board,
 		del_timer(&board->timer);
 		if (pdev) {
 #ifdef CONFIG_PCI
-			pci_iounmap(pdev, board->base2);
+			iounmap(board->base2);
 			pci_release_region(pdev, IS_CF_BOARD(board) ? 3 : 2);
 #endif
 		} else {
@@ -2677,7 +2677,7 @@ static int __devinit sx_pci_probe(struct pci_dev *pdev,
 	}
 	board->hw_base = pci_resource_start(pdev, reg);
 	board->base2 =
-	board->base = pci_iomap(pdev, reg, WINDOW_LEN(board));
+	board->base = ioremap_nocache(board->hw_base, WINDOW_LEN(board));
 	if (!board->base) {
 		dev_err(&pdev->dev, "ioremap failed\n");
 		goto err_reg;
@@ -2703,7 +2703,7 @@ static int __devinit sx_pci_probe(struct pci_dev *pdev,
 
 	return 0;
 err_unmap:
-	pci_iounmap(pdev, board->base2);
+	iounmap(board->base2);
 err_reg:
 	pci_release_region(pdev, reg);
 err_flag:
-- 
GitLab


From 9f561dfceaafaae73ecfc81b1156dde3056e1ec9 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Sat, 18 Oct 2008 20:27:45 -0700
Subject: [PATCH 764/895] make probe_serial_gsc() static

This patch makes the needlessly global probe_serial_gsc() static.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/serial/8250_gsc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/serial/8250_gsc.c b/drivers/serial/8250_gsc.c
index 0416ad3bc127..418b4fe9a0a1 100644
--- a/drivers/serial/8250_gsc.c
+++ b/drivers/serial/8250_gsc.c
@@ -111,7 +111,7 @@ static struct parisc_driver serial_driver = {
 	.probe		= serial_init_chip,
 };
 
-int __init probe_serial_gsc(void)
+static int __init probe_serial_gsc(void)
 {
 	register_parisc_driver(&lasi_driver);
 	register_parisc_driver(&serial_driver);
-- 
GitLab


From 1f3ccaed13944b9cfa9af7f6c70bfb292e42a347 Mon Sep 17 00:00:00 2001
From: roel kluin <roel.kluin@gmail.com>
Date: Sat, 18 Oct 2008 20:27:46 -0700
Subject: [PATCH 765/895] Altix serial: fix

In function sn_sal_switch_to_asynch(): drivers/serial/sn_console.c:713:

HZ * SN_SAL_UART_FIFO_DEPTH / SN_SAL_UART_FIFO_SPEED_CPS;

After preprocessing (see defines in patch) this becomes HZ * 16 / 9600 / 10
(associativity from left to right), not equivalent to HZ * 16 / 960.

Looks-obviously-right-to: Tony Luck <tony.luck@intel.com>
Cc: Jes Sorensen <jes@sgi.com>
Acked-by: Pat Gefre <pfg@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/serial/sn_console.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/serial/sn_console.c b/drivers/serial/sn_console.c
index b73e3c0056cd..d5276c012f78 100644
--- a/drivers/serial/sn_console.c
+++ b/drivers/serial/sn_console.c
@@ -61,7 +61,7 @@
 #define SN_SAL_BUFFER_SIZE (64 * (1 << 10))
 
 #define SN_SAL_UART_FIFO_DEPTH 16
-#define SN_SAL_UART_FIFO_SPEED_CPS 9600/10
+#define SN_SAL_UART_FIFO_SPEED_CPS (9600/10)
 
 /* sn_transmit_chars() calling args */
 #define TRANSMIT_BUFFERED	0
-- 
GitLab


From c8fc40cd345bfd88d8a98e7916909b9143502999 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Sat, 18 Oct 2008 20:27:47 -0700
Subject: [PATCH 766/895] rtc-cmos: export second NVRAM bank

Teach rtc-cmos about the second bank of registers found on most modern x86
systems, giving access to 128 bytes more NVRAM.

This version only sees that extra NVRAM when both register banks are
provided as part of *one* PNP resource.  Since BIOS on some systems
presents them using two IO resources, and nothing merges them, this can't
always show all the NVRAM.  (We're supposed to be able to use PNP id
PNP0b01 too, but BIOS tables doesn't often seem to use that particular
option.)

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/rtc-cmos.c | 70 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 63 insertions(+), 7 deletions(-)

diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 963ad0b6a4e9..f1695d7fa0fa 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -143,6 +143,43 @@ static inline int hpet_unregister_irq_handler(irq_handler_t handler)
 
 /*----------------------------------------------------------------*/
 
+#ifdef RTC_PORT
+
+/* Most newer x86 systems have two register banks, the first used
+ * for RTC and NVRAM and the second only for NVRAM.  Caller must
+ * own rtc_lock ... and we won't worry about access during NMI.
+ */
+#define can_bank2	true
+
+static inline unsigned char cmos_read_bank2(unsigned char addr)
+{
+	outb(addr, RTC_PORT(2));
+	return inb(RTC_PORT(3));
+}
+
+static inline void cmos_write_bank2(unsigned char val, unsigned char addr)
+{
+	outb(addr, RTC_PORT(2));
+	outb(val, RTC_PORT(2));
+}
+
+#else
+
+#define can_bank2	false
+
+static inline unsigned char cmos_read_bank2(unsigned char addr)
+{
+	return 0;
+}
+
+static inline void cmos_write_bank2(unsigned char val, unsigned char addr)
+{
+}
+
+#endif
+
+/*----------------------------------------------------------------*/
+
 static int cmos_read_time(struct device *dev, struct rtc_time *t)
 {
 	/* REVISIT:  if the clock has a "century" register, use
@@ -491,12 +528,21 @@ cmos_nvram_read(struct kobject *kobj, struct bin_attribute *attr,
 
 	if (unlikely(off >= attr->size))
 		return 0;
+	if (unlikely(off < 0))
+		return -EINVAL;
 	if ((off + count) > attr->size)
 		count = attr->size - off;
 
+	off += NVRAM_OFFSET;
 	spin_lock_irq(&rtc_lock);
-	for (retval = 0, off += NVRAM_OFFSET; count--; retval++, off++)
-		*buf++ = CMOS_READ(off);
+	for (retval = 0; count; count--, off++, retval++) {
+		if (off < 128)
+			*buf++ = CMOS_READ(off);
+		else if (can_bank2)
+			*buf++ = cmos_read_bank2(off);
+		else
+			break;
+	}
 	spin_unlock_irq(&rtc_lock);
 
 	return retval;
@@ -512,6 +558,8 @@ cmos_nvram_write(struct kobject *kobj, struct bin_attribute *attr,
 	cmos = dev_get_drvdata(container_of(kobj, struct device, kobj));
 	if (unlikely(off >= attr->size))
 		return -EFBIG;
+	if (unlikely(off < 0))
+		return -EINVAL;
 	if ((off + count) > attr->size)
 		count = attr->size - off;
 
@@ -520,15 +568,20 @@ cmos_nvram_write(struct kobject *kobj, struct bin_attribute *attr,
 	 * here.  If userspace is smart enough to know what fields of
 	 * NVRAM to update, updating checksums is also part of its job.
 	 */
+	off += NVRAM_OFFSET;
 	spin_lock_irq(&rtc_lock);
-	for (retval = 0, off += NVRAM_OFFSET; count--; retval++, off++) {
+	for (retval = 0; count; count--, off++, retval++) {
 		/* don't trash RTC registers */
 		if (off == cmos->day_alrm
 				|| off == cmos->mon_alrm
 				|| off == cmos->century)
 			buf++;
-		else
+		else if (off < 128)
 			CMOS_WRITE(*buf++, off);
+		else if (can_bank2)
+			cmos_write_bank2(*buf++, off);
+		else
+			break;
 	}
 	spin_unlock_irq(&rtc_lock);
 
@@ -631,8 +684,8 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 
 	/* Heuristic to deduce NVRAM size ... do what the legacy NVRAM
 	 * driver did, but don't reject unknown configs.   Old hardware
-	 * won't address 128 bytes, and for now we ignore the way newer
-	 * chips can address 256 bytes (using two more i/o ports).
+	 * won't address 128 bytes.  Newer chips have multiple banks,
+	 * though they may not be listed in one I/O resource.
 	 */
 #if	defined(CONFIG_ATARI)
 	address_space = 64;
@@ -642,6 +695,8 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 #warning Assuming 128 bytes of RTC+NVRAM address space, not 64 bytes.
 	address_space = 128;
 #endif
+	if (can_bank2 && ports->end > (ports->start + 1))
+		address_space = 256;
 
 	/* For ACPI systems extension info comes from the FADT.  On others,
 	 * board specific setup provides it as appropriate.  Systems where
@@ -740,7 +795,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 		goto cleanup2;
 	}
 
-	pr_info("%s: alarms up to one %s%s%s\n",
+	pr_info("%s: alarms up to one %s%s, %zd bytes nvram, %s irqs\n",
 			cmos_rtc.rtc->dev.bus_id,
 			is_valid_irq(rtc_irq)
 				?  (cmos_rtc.mon_alrm
@@ -749,6 +804,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 						? "month" : "day"))
 				: "no",
 			cmos_rtc.century ? ", y3k" : "",
+			nvram.size,
 			is_hpet_enabled() ? ", hpet irqs" : "");
 
 	return 0;
-- 
GitLab


From e3274e915061a65717454106301d4a5ea8f71783 Mon Sep 17 00:00:00 2001
From: "Steven A. Falco" <sfalco@harris.com>
Date: Sat, 18 Oct 2008 20:27:48 -0700
Subject: [PATCH 767/895] gpio: modify sysfs gpio export so that "value"
 displays as 0 or 1

gpiolib can export GPIOs to userspace via sysfs.  This patch modifies the
gpio_value_show() so that any non-zero value is explicitly printed as "1",
rather than whatever numerical value the lower-level driver returns.

Signed-off-by: Steve Falco <sfalco@harris.com>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/gpiolib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 9112830107a5..70de72c443e4 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -248,7 +248,7 @@ static ssize_t gpio_value_show(struct device *dev,
 	if (!test_bit(FLAG_EXPORT, &desc->flags))
 		status = -EIO;
 	else
-		status = sprintf(buf, "%d\n", gpio_get_value_cansleep(gpio));
+		status = sprintf(buf, "%d\n", !!gpio_get_value_cansleep(gpio));
 
 	mutex_unlock(&sysfs_lock);
 	return status;
-- 
GitLab


From 978ccaa8ea5d8c7bf6b676209f2fc126eae6355b Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Sat, 18 Oct 2008 20:27:49 -0700
Subject: [PATCH 768/895] gpiolib: fix oops in gpio_get_value_cansleep()

We can get the following oops from gpio_get_value_cansleep() when a GPIO
controller doesn't provide a get() callback:

 Unable to handle kernel paging request for instruction fetch
 Faulting instruction address: 0x00000000
 Oops: Kernel access of bad area, sig: 11 [#1]
 [...]
 NIP [00000000] 0x0
 LR [c0182fb0] gpio_get_value_cansleep+0x40/0x50
 Call Trace:
 [c7b79e80] [c0183f28] gpio_value_show+0x5c/0x94
 [c7b79ea0] [c01a584c] dev_attr_show+0x30/0x7c
 [c7b79eb0] [c00d6b48] fill_read_buffer+0x68/0xe0
 [c7b79ed0] [c00d6c54] sysfs_read_file+0x94/0xbc
 [c7b79ef0] [c008f24c] vfs_read+0xb4/0x16c
 [c7b79f10] [c008f580] sys_read+0x4c/0x90
 [c7b79f40] [c0013a14] ret_from_syscall+0x0/0x38

It's OK to request the value of *any* GPIO; most GPIOs are bidirectional,
so configuring them as outputs just enables an output driver and doesn't
disable the input logic.

So the problem is that gpio_get_value_cansleep() isn't making the same
sanity check that gpio_get_value() does: making sure this GPIO isn't one
of the atypical "no input logic" cases.

Reported-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Cc: <stable@kernel.org>		[2.6.27.x, 2.6.26.x, 2.6.25.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/gpiolib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 70de72c443e4..22edc4273ef6 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1105,7 +1105,7 @@ int gpio_get_value_cansleep(unsigned gpio)
 
 	might_sleep_if(extra_checks);
 	chip = gpio_to_chip(gpio);
-	return chip->get(chip, gpio - chip->base);
+	return chip->get ? chip->get(chip, gpio - chip->base) : 0;
 }
 EXPORT_SYMBOL_GPL(gpio_get_value_cansleep);
 
-- 
GitLab


From e53677113e32e6f118e31b8391a2eab7ee52c0a7 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Sat, 18 Oct 2008 20:27:51 -0700
Subject: [PATCH 769/895] fb: push down the BKL in the ioctl handler

Framebuffer is heavily BKL dependant at the moment so just wrap the ioctl
handler in the driver as we push down.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Alan Cox <alan@redhat.com>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/fbmem.c | 141 ++++++++++++++++++++++++++----------------
 1 file changed, 86 insertions(+), 55 deletions(-)

diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index 217c5118ae9e..2a0f013dc37b 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -1002,101 +1002,132 @@ fb_blank(struct fb_info *info, int blank)
  	return ret;
 }
 
-static int 
-fb_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
+static long
+fb_ioctl(struct file *file, unsigned int cmd,
 	 unsigned long arg)
 {
+	struct inode *inode = file->f_path.dentry->d_inode;
 	int fbidx = iminor(inode);
-	struct fb_info *info = registered_fb[fbidx];
-	struct fb_ops *fb = info->fbops;
+	struct fb_info *info;
+	struct fb_ops *fb;
 	struct fb_var_screeninfo var;
 	struct fb_fix_screeninfo fix;
 	struct fb_con2fbmap con2fb;
 	struct fb_cmap_user cmap;
 	struct fb_event event;
 	void __user *argp = (void __user *)arg;
-	int i;
-	
-	if (!fb)
+	long ret = 0;
+
+	lock_kernel();
+	info = registered_fb[fbidx];
+	fb = info->fbops;
+
+	if (!fb) {
+		unlock_kernel();
 		return -ENODEV;
+	}
 	switch (cmd) {
 	case FBIOGET_VSCREENINFO:
-		return copy_to_user(argp, &info->var,
+		ret = copy_to_user(argp, &info->var,
 				    sizeof(var)) ? -EFAULT : 0;
+		break;
 	case FBIOPUT_VSCREENINFO:
-		if (copy_from_user(&var, argp, sizeof(var)))
-			return -EFAULT;
+		if (copy_from_user(&var, argp, sizeof(var))) {
+			ret =  -EFAULT;
+			break;
+		}
 		acquire_console_sem();
 		info->flags |= FBINFO_MISC_USEREVENT;
-		i = fb_set_var(info, &var);
+		ret = fb_set_var(info, &var);
 		info->flags &= ~FBINFO_MISC_USEREVENT;
 		release_console_sem();
-		if (i) return i;
-		if (copy_to_user(argp, &var, sizeof(var)))
-			return -EFAULT;
-		return 0;
+		if (ret == 0 && copy_to_user(argp, &var, sizeof(var)))
+			ret = -EFAULT;
+		break;
 	case FBIOGET_FSCREENINFO:
-		return copy_to_user(argp, &info->fix,
+		ret = copy_to_user(argp, &info->fix,
 				    sizeof(fix)) ? -EFAULT : 0;
+		break;
 	case FBIOPUTCMAP:
 		if (copy_from_user(&cmap, argp, sizeof(cmap)))
-			return -EFAULT;
-		return (fb_set_user_cmap(&cmap, info));
+			ret = -EFAULT;
+		else
+			ret = fb_set_user_cmap(&cmap, info);
+		break;
 	case FBIOGETCMAP:
 		if (copy_from_user(&cmap, argp, sizeof(cmap)))
-			return -EFAULT;
-		return fb_cmap_to_user(&info->cmap, &cmap);
+			ret = -EFAULT;
+		else
+			ret = fb_cmap_to_user(&info->cmap, &cmap);
+		break;
 	case FBIOPAN_DISPLAY:
-		if (copy_from_user(&var, argp, sizeof(var)))
-			return -EFAULT;
+		if (copy_from_user(&var, argp, sizeof(var))) {
+			ret = -EFAULT;
+			break;
+		}
 		acquire_console_sem();
-		i = fb_pan_display(info, &var);
+		ret = fb_pan_display(info, &var);
 		release_console_sem();
-		if (i)
-			return i;
-		if (copy_to_user(argp, &var, sizeof(var)))
-			return -EFAULT;
-		return 0;
+		if (ret == 0 && copy_to_user(argp, &var, sizeof(var)))
+			ret = -EFAULT;
+		break;
 	case FBIO_CURSOR:
-		return -EINVAL;
+		ret = -EINVAL;
+		break;
 	case FBIOGET_CON2FBMAP:
 		if (copy_from_user(&con2fb, argp, sizeof(con2fb)))
-			return -EFAULT;
-		if (con2fb.console < 1 || con2fb.console > MAX_NR_CONSOLES)
-		    return -EINVAL;
-		con2fb.framebuffer = -1;
-		event.info = info;
-		event.data = &con2fb;
-		fb_notifier_call_chain(FB_EVENT_GET_CONSOLE_MAP, &event);
-		return copy_to_user(argp, &con2fb,
+			ret = -EFAULT;
+		else if (con2fb.console < 1 || con2fb.console > MAX_NR_CONSOLES)
+			ret = -EINVAL;
+		else {
+			con2fb.framebuffer = -1;
+			event.info = info;
+			event.data = &con2fb;
+			fb_notifier_call_chain(FB_EVENT_GET_CONSOLE_MAP,
+								&event);
+			ret = copy_to_user(argp, &con2fb,
 				    sizeof(con2fb)) ? -EFAULT : 0;
+		}
+		break;
 	case FBIOPUT_CON2FBMAP:
-		if (copy_from_user(&con2fb, argp, sizeof(con2fb)))
-			return - EFAULT;
-		if (con2fb.console < 1 || con2fb.console > MAX_NR_CONSOLES)
-		    return -EINVAL;
-		if (con2fb.framebuffer < 0 || con2fb.framebuffer >= FB_MAX)
-		    return -EINVAL;
-		if (!registered_fb[con2fb.framebuffer])
-		    request_module("fb%d", con2fb.framebuffer);
+		if (copy_from_user(&con2fb, argp, sizeof(con2fb))) {
+			ret = -EFAULT;
+			break;
+		}
+		if (con2fb.console < 1 || con2fb.console > MAX_NR_CONSOLES) {
+			ret = -EINVAL;
+			break;
+		}
+		if (con2fb.framebuffer < 0 || con2fb.framebuffer >= FB_MAX) {
+			ret = -EINVAL;
+			break;
+		}
 		if (!registered_fb[con2fb.framebuffer])
-		    return -EINVAL;
+			request_module("fb%d", con2fb.framebuffer);
+		if (!registered_fb[con2fb.framebuffer]) {
+			ret = -EINVAL;
+			break;
+		}
 		event.info = info;
 		event.data = &con2fb;
-		return fb_notifier_call_chain(FB_EVENT_SET_CONSOLE_MAP,
+		ret = fb_notifier_call_chain(FB_EVENT_SET_CONSOLE_MAP,
 					      &event);
+		break;
 	case FBIOBLANK:
 		acquire_console_sem();
 		info->flags |= FBINFO_MISC_USEREVENT;
-		i = fb_blank(info, arg);
+		ret = fb_blank(info, arg);
 		info->flags &= ~FBINFO_MISC_USEREVENT;
 		release_console_sem();
-		return i;
+		break;;
 	default:
 		if (fb->fb_ioctl == NULL)
-			return -EINVAL;
-		return fb->fb_ioctl(info, cmd, arg);
+			ret = -ENOTTY;
+		else
+			ret = fb->fb_ioctl(info, cmd, arg);
 	}
+	unlock_kernel();
+	return ret;
 }
 
 #ifdef CONFIG_COMPAT
@@ -1150,7 +1181,7 @@ static int fb_getput_cmap(struct inode *inode, struct file *file,
 	    put_user(compat_ptr(data), &cmap->transp))
 		return -EFAULT;
 
-	err = fb_ioctl(inode, file, cmd, (unsigned long) cmap);
+	err = fb_ioctl(file, cmd, (unsigned long) cmap);
 
 	if (!err) {
 		if (copy_in_user(&cmap32->start,
@@ -1204,7 +1235,7 @@ static int fb_get_fscreeninfo(struct inode *inode, struct file *file,
 
 	old_fs = get_fs();
 	set_fs(KERNEL_DS);
-	err = fb_ioctl(inode, file, cmd, (unsigned long) &fix);
+	err = fb_ioctl(file, cmd, (unsigned long) &fix);
 	set_fs(old_fs);
 
 	if (!err)
@@ -1231,7 +1262,7 @@ fb_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case FBIOPUT_CON2FBMAP:
 		arg = (unsigned long) compat_ptr(arg);
 	case FBIOBLANK:
-		ret = fb_ioctl(inode, file, cmd, arg);
+		ret = fb_ioctl(file, cmd, arg);
 		break;
 
 	case FBIOGET_FSCREENINFO:
@@ -1358,7 +1389,7 @@ static const struct file_operations fb_fops = {
 	.owner =	THIS_MODULE,
 	.read =		fb_read,
 	.write =	fb_write,
-	.ioctl =	fb_ioctl,
+	.unlocked_ioctl = fb_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = fb_compat_ioctl,
 #endif
-- 
GitLab


From 3e680aae4e53ab54cdbb0c29257dae0cbb158e1c Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Sat, 18 Oct 2008 20:27:51 -0700
Subject: [PATCH 770/895] fb: convert lock/unlock_kernel() into local fb mutex

Change lock_kernel()/unlock_kernel() to local fb mutex.  Each frame buffer
instance has its own mutex.

The one line try_to_load() function is unrolled to request_module() in two
places for readability.

[righi.andrea@gmail.com: fb: fix NULL pointer BUG dereference in fb_open()]
Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrea Righi <righi.andrea@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/fbmem.c | 39 ++++++++++++++++++++-------------------
 include/linux/fb.h    |  1 +
 2 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index 2a0f013dc37b..cd5f20da738a 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -1018,12 +1018,12 @@ fb_ioctl(struct file *file, unsigned int cmd,
 	void __user *argp = (void __user *)arg;
 	long ret = 0;
 
-	lock_kernel();
 	info = registered_fb[fbidx];
+	mutex_lock(&info->lock);
 	fb = info->fbops;
 
 	if (!fb) {
-		unlock_kernel();
+		mutex_unlock(&info->lock);
 		return -ENODEV;
 	}
 	switch (cmd) {
@@ -1126,7 +1126,7 @@ fb_ioctl(struct file *file, unsigned int cmd,
 		else
 			ret = fb->fb_ioctl(info, cmd, arg);
 	}
-	unlock_kernel();
+	mutex_unlock(&info->lock);
 	return ret;
 }
 
@@ -1253,7 +1253,7 @@ fb_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	struct fb_ops *fb = info->fbops;
 	long ret = -ENOIOCTLCMD;
 
-	lock_kernel();
+	mutex_lock(&info->lock);
 	switch(cmd) {
 	case FBIOGET_VSCREENINFO:
 	case FBIOPUT_VSCREENINFO:
@@ -1279,7 +1279,7 @@ fb_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 			ret = fb->fb_compat_ioctl(info, cmd, arg);
 		break;
 	}
-	unlock_kernel();
+	mutex_unlock(&info->lock);
 	return ret;
 }
 #endif
@@ -1301,13 +1301,13 @@ fb_mmap(struct file *file, struct vm_area_struct * vma)
 		return -ENODEV;
 	if (fb->fb_mmap) {
 		int res;
-		lock_kernel();
+		mutex_lock(&info->lock);
 		res = fb->fb_mmap(info, vma);
-		unlock_kernel();
+		mutex_unlock(&info->lock);
 		return res;
 	}
 
-	lock_kernel();
+	mutex_lock(&info->lock);
 
 	/* frame buffer memory */
 	start = info->fix.smem_start;
@@ -1316,13 +1316,13 @@ fb_mmap(struct file *file, struct vm_area_struct * vma)
 		/* memory mapped io */
 		off -= len;
 		if (info->var.accel_flags) {
-			unlock_kernel();
+			mutex_unlock(&info->lock);
 			return -EINVAL;
 		}
 		start = info->fix.mmio_start;
 		len = PAGE_ALIGN((start & ~PAGE_MASK) + info->fix.mmio_len);
 	}
-	unlock_kernel();
+	mutex_unlock(&info->lock);
 	start &= PAGE_MASK;
 	if ((vma->vm_end - vma->vm_start + off) > len)
 		return -EINVAL;
@@ -1346,13 +1346,13 @@ fb_open(struct inode *inode, struct file *file)
 
 	if (fbidx >= FB_MAX)
 		return -ENODEV;
-	lock_kernel();
-	if (!(info = registered_fb[fbidx]))
+	info = registered_fb[fbidx];
+	if (!info)
 		request_module("fb%d", fbidx);
-	if (!(info = registered_fb[fbidx])) {
-		res = -ENODEV;
-		goto out;
-	}
+	info = registered_fb[fbidx];
+	if (!info)
+		return -ENODEV;
+	mutex_lock(&info->lock);
 	if (!try_module_get(info->fbops->owner)) {
 		res = -ENODEV;
 		goto out;
@@ -1368,7 +1368,7 @@ fb_open(struct inode *inode, struct file *file)
 		fb_deferred_io_open(info, inode, file);
 #endif
 out:
-	unlock_kernel();
+	mutex_unlock(&info->lock);
 	return res;
 }
 
@@ -1377,11 +1377,11 @@ fb_release(struct inode *inode, struct file *file)
 {
 	struct fb_info * const info = file->private_data;
 
-	lock_kernel();
+	mutex_lock(&info->lock);
 	if (info->fbops->fb_release)
 		info->fbops->fb_release(info,1);
 	module_put(info->fbops->owner);
-	unlock_kernel();
+	mutex_unlock(&info->lock);
 	return 0;
 }
 
@@ -1460,6 +1460,7 @@ register_framebuffer(struct fb_info *fb_info)
 		if (!registered_fb[i])
 			break;
 	fb_info->node = i;
+	mutex_init(&fb_info->lock);
 
 	fb_info->dev = device_create(fb_class, fb_info->device,
 				     MKDEV(FB_MAJOR, i), NULL, "fb%d", i);
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 531ccd5f5960..75a81eaf3430 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -808,6 +808,7 @@ struct fb_tile_ops {
 struct fb_info {
 	int node;
 	int flags;
+	struct mutex lock;		/* Lock for open/release/ioctl funcs */
 	struct fb_var_screeninfo var;	/* Current var */
 	struct fb_fix_screeninfo fix;	/* Current fix */
 	struct fb_monspecs monspecs;	/* Current Monitor specs */
-- 
GitLab


From 60c11d2abf4ef811d0ce3ea34279746729c4c6fc Mon Sep 17 00:00:00 2001
From: Richard Holden <aciddeath@gmail.com>
Date: Sat, 18 Oct 2008 20:27:53 -0700
Subject: [PATCH 771/895] phonedev: remove BKL

The phone_device array is covered by the phone_lock mutex in all cases and
request_module no longer needs the BKL so we can remove the only remaining
instance of the BKL from phonedev.

Signed-off-by: Richard Holden <aciddeath@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/telephony/phonedev.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/telephony/phonedev.c b/drivers/telephony/phonedev.c
index 4d74ba36c3a1..37caf4d69037 100644
--- a/drivers/telephony/phonedev.c
+++ b/drivers/telephony/phonedev.c
@@ -54,7 +54,6 @@ static int phone_open(struct inode *inode, struct file *file)
 	if (minor >= PHONE_NUM_DEVICES)
 		return -ENODEV;
 
-	lock_kernel();
 	mutex_lock(&phone_lock);
 	p = phone_device[minor];
 	if (p)
@@ -81,7 +80,6 @@ static int phone_open(struct inode *inode, struct file *file)
 	fops_put(old_fops);
 end:
 	mutex_unlock(&phone_lock);
-	unlock_kernel();
 	return err;
 }
 
-- 
GitLab


From d1645e526a1e5842c9ac433d73419ba886676cf3 Mon Sep 17 00:00:00 2001
From: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Date: Sat, 18 Oct 2008 20:27:53 -0700
Subject: [PATCH 772/895] jbd: abort when failed to log metadata buffers

If we failed to write metadata buffers to the journal space and succeeded
to write the commit record, stale data can be written back to the
filesystem as metadata in the recovery phase.

To avoid this, when we failed to write out metadata buffers, abort the
journal before writing the commit record.

Signed-off-by: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Acked-by: Jan Kara <jack@suse.cz>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/jbd/commit.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index ae08c057e751..f1ea861b9929 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -762,6 +762,9 @@ wait_for_iobuf:
 		/* AKPM: bforget here */
 	}
 
+	if (err)
+		journal_abort(journal, err);
+
 	jbd_debug(3, "JBD: commit phase 6\n");
 
 	if (journal_write_commit_record(journal, commit_transaction))
-- 
GitLab


From 885e353c7427db7b60692789741b34e605b0b69b Mon Sep 17 00:00:00 2001
From: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Date: Sat, 18 Oct 2008 20:27:54 -0700
Subject: [PATCH 773/895] jbd: don't dirty original metadata buffer on abort

Currently, original metadata buffers are dirtied when they are unfiled
whether the journal has aborted or not.  Eventually these buffers will be
written-back to the filesystem by pdflush.  This means some metadata
buffers are written to the filesystem without journaling if the journal
aborts.  So if both journal abort and system crash happen at the same
time, the filesystem would become inconsistent state.  Additionally,
replaying journaled metadata can overwrite the latest metadata on the
filesystem partly.  Because, if the journal aborts, journaled metadata are
preserved and replayed during the next mount not to lose uncheckpointed
metadata.  This would also break the consistency of the filesystem.

This patch prevents original metadata buffers from being dirtied on abort
by clearing BH_JBDDirty flag from those buffers.  Thus, no metadata
buffers are written to the filesystem without journaling.

Signed-off-by: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Acked-by: Jan Kara <jack@suse.cz>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/jbd/commit.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index f1ea861b9929..d6a6659f3e46 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -518,9 +518,10 @@ void journal_commit_transaction(journal_t *journal)
 		jh = commit_transaction->t_buffers;
 
 		/* If we're in abort mode, we just un-journal the buffer and
-		   release it for background writing. */
+		   release it. */
 
 		if (is_journal_aborted(journal)) {
+			clear_buffer_jbddirty(jh2bh(jh));
 			JBUFFER_TRACE(jh, "journal is aborting: refile");
 			journal_refile_buffer(journal, jh);
 			/* If that was the last one, we need to clean up
@@ -855,6 +856,8 @@ restart_loop:
 		if (buffer_jbddirty(bh)) {
 			JBUFFER_TRACE(jh, "add to new checkpointing trans");
 			__journal_insert_checkpoint(jh, commit_transaction);
+			if (is_journal_aborted(journal))
+				clear_buffer_jbddirty(bh);
 			JBUFFER_TRACE(jh, "refile for checkpoint writeback");
 			__journal_refile_buffer(jh);
 			jbd_unlock_bh_state(bh);
-- 
GitLab


From 972fbf779832e5ad15effa7712789aeff9224c37 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@redhat.com>
Date: Sat, 18 Oct 2008 20:27:55 -0700
Subject: [PATCH 774/895] ext3: don't try to resize if there are no reserved
 gdt blocks left

When trying to resize a ext3 fs and you run out of reserved gdt blocks,
you get an error that doesn't actually tell you what went wrong, it just
says that the gdb it picked is not correct, which is the case since you
don't have any reserved gdt blocks left.  This patch adds a check to make
sure you have reserved gdt blocks to use, and if not prints out a more
relevant error.

Signed-off-by: Josef Bacik <jbacik@redhat.com>
Cc: <linux-ext4@vger.kernel.org>
Cc: Andreas Dilger <adilger@sun.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext3/resize.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 77278e947e94..78fdf3836370 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -790,7 +790,8 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 
 	if (reserved_gdb || gdb_off == 0) {
 		if (!EXT3_HAS_COMPAT_FEATURE(sb,
-					     EXT3_FEATURE_COMPAT_RESIZE_INODE)){
+					     EXT3_FEATURE_COMPAT_RESIZE_INODE)
+		    || !le16_to_cpu(es->s_reserved_gdt_blocks)) {
 			ext3_warning(sb, __func__,
 				     "No reserved GDT blocks, can't resize");
 			return -EPERM;
-- 
GitLab


From 46d01a225e694f1a4343beea44f1e85105aedd7e Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Sat, 18 Oct 2008 20:27:56 -0700
Subject: [PATCH 775/895] ext3: fix ext3 block reservation early ENOSPC issue

We could run into ENOSPC error on ext3, even when there is free blocks on
the filesystem.

The problem is triggered in the case the goal block group has 0 free
blocks , and the rest block groups are skipped due to the check of
"free_blocks < windowsz/2".  Current code could fall back to non
reservation allocation to prevent early ENOSPC after examing all the block
groups with reservation on , but this code was bypassed if the reservation
window is turned off already, which is true in this case.

This patch fixed two issues:
1) We don't need to turn off block reservation if the goal block group has
0 free blocks left and continue search for the rest of block groups.

Current code the intention is to turn off the block reservation if the
goal allocation group has a few (some) free blocks left (not enough for
make the desired reservation window),to try to allocation in the goal
block group, to get better locality.  But if the goal blocks have 0 free
blocks, it should leave the block reservation on, and continues search for
the next block groups,rather than turn off block reservation completely.

2) we don't need to check the window size if the block reservation is off.

The problem was originally found and fixed in ext4.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext3/balloc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 92fd0338a6eb..f5b57a2ca35a 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1547,6 +1547,7 @@ retry_alloc:
 	 * turn off reservation for this allocation
 	 */
 	if (my_rsv && (free_blocks < windowsz)
+		&& (free_blocks > 0)
 		&& (rsv_is_empty(&my_rsv->rsv_window)))
 		my_rsv = NULL;
 
@@ -1585,7 +1586,7 @@ retry_alloc:
 		 * free blocks is less than half of the reservation
 		 * window size.
 		 */
-		if (free_blocks <= (windowsz/2))
+		if (my_rsv && (free_blocks <= (windowsz/2)))
 			continue;
 
 		brelse(bitmap_bh);
-- 
GitLab


From 0e4fb5e283870757024294bc4567a7c59d936f0b Mon Sep 17 00:00:00 2001
From: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Date: Sat, 18 Oct 2008 20:27:57 -0700
Subject: [PATCH 776/895] ext3: add an option to control error handling on file
 data

If the journal doesn't abort when it gets an IO error in file data blocks,
the file data corruption will spread silently.  Because most of
applications and commands do buffered writes without fsync(), they don't
notice the IO error.  It's scary for mission critical systems.  On the
other hand, if the journal aborts whenever it gets an IO error in file
data blocks, the system will easily become inoperable.  So this patch
introduces a filesystem option to determine whether it aborts the journal
or just call printk() when it gets an IO error in file data.

If you mount a ext3 fs with data_err=abort option, it aborts on file data
write error.  If you mount it with data_err=ignore, it doesn't abort, just
call printk().  data_err=ignore is the default.

Signed-off-by: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Cc: Jan Kara <jack@ucw.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/ext3.txt |  5 +++++
 fs/ext3/super.c                    | 16 ++++++++++++++++
 fs/jbd/commit.c                    |  2 ++
 include/linux/ext3_fs.h            |  2 ++
 include/linux/jbd.h                |  3 +++
 5 files changed, 28 insertions(+)

diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt
index 295f26cd895a..9dd2a3bb2acc 100644
--- a/Documentation/filesystems/ext3.txt
+++ b/Documentation/filesystems/ext3.txt
@@ -96,6 +96,11 @@ errors=remount-ro(*)	Remount the filesystem read-only on an error.
 errors=continue		Keep going on a filesystem error.
 errors=panic		Panic and halt the machine if an error occurs.
 
+data_err=ignore(*)	Just print an error message if an error occurs
+			in a file data buffer in ordered mode.
+data_err=abort		Abort the journal if an error occurs in a file
+			data buffer in ordered mode.
+
 grpid			Give objects the same group ID as their creator.
 bsdgroups
 
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 399a96a6c556..3a260af5544d 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -625,6 +625,9 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
 	else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)
 		seq_puts(seq, ",data=writeback");
 
+	if (test_opt(sb, DATA_ERR_ABORT))
+		seq_puts(seq, ",data_err=abort");
+
 	ext3_show_quota_options(seq, sb);
 
 	return 0;
@@ -754,6 +757,7 @@ enum {
 	Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
 	Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
 	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
+	Opt_data_err_abort, Opt_data_err_ignore,
 	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
 	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
 	Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
@@ -796,6 +800,8 @@ static const match_table_t tokens = {
 	{Opt_data_journal, "data=journal"},
 	{Opt_data_ordered, "data=ordered"},
 	{Opt_data_writeback, "data=writeback"},
+	{Opt_data_err_abort, "data_err=abort"},
+	{Opt_data_err_ignore, "data_err=ignore"},
 	{Opt_offusrjquota, "usrjquota="},
 	{Opt_usrjquota, "usrjquota=%s"},
 	{Opt_offgrpjquota, "grpjquota="},
@@ -1011,6 +1017,12 @@ static int parse_options (char *options, struct super_block *sb,
 				sbi->s_mount_opt |= data_opt;
 			}
 			break;
+		case Opt_data_err_abort:
+			set_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
+			break;
+		case Opt_data_err_ignore:
+			clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
+			break;
 #ifdef CONFIG_QUOTA
 		case Opt_usrjquota:
 			qtype = USRQUOTA;
@@ -1986,6 +1998,10 @@ static void ext3_init_journal_params(struct super_block *sb, journal_t *journal)
 		journal->j_flags |= JFS_BARRIER;
 	else
 		journal->j_flags &= ~JFS_BARRIER;
+	if (test_opt(sb, DATA_ERR_ABORT))
+		journal->j_flags |= JFS_ABORT_ON_SYNCDATA_ERR;
+	else
+		journal->j_flags &= ~JFS_ABORT_ON_SYNCDATA_ERR;
 	spin_unlock(&journal->j_state_lock);
 }
 
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index d6a6659f3e46..25719d902c51 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -482,6 +482,8 @@ void journal_commit_transaction(journal_t *journal)
 		printk(KERN_WARNING
 			"JBD: Detected IO errors while flushing file data "
 			"on %s\n", bdevname(journal->j_fs_dev, b));
+		if (journal->j_flags & JFS_ABORT_ON_SYNCDATA_ERR)
+			journal_abort(journal, err);
 		err = 0;
 	}
 
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 159d9b476cd7..d14f02918483 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -380,6 +380,8 @@ struct ext3_inode {
 #define EXT3_MOUNT_QUOTA		0x80000 /* Some quota option set */
 #define EXT3_MOUNT_USRQUOTA		0x100000 /* "old" user quota */
 #define EXT3_MOUNT_GRPQUOTA		0x200000 /* "old" group quota */
+#define EXT3_MOUNT_DATA_ERR_ABORT	0x400000 /* Abort on file data write
+						  * error in ordered mode */
 
 /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
 #ifndef _LINUX_EXT2_FS_H
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 7ebbcb1c9ba4..35d4f6342fac 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -816,6 +816,9 @@ struct journal_s
 #define JFS_FLUSHED	0x008	/* The journal superblock has been flushed */
 #define JFS_LOADED	0x010	/* The journal superblock has been loaded */
 #define JFS_BARRIER	0x020	/* Use IDE barriers */
+#define JFS_ABORT_ON_SYNCDATA_ERR	0x040  /* Abort the journal on file
+						* data write error in ordered
+						* mode */
 
 /*
  * Function declarations for the journaling transaction and buffer
-- 
GitLab


From 960a22ae60c8a723bd17da3b929fe0bcea6d007e Mon Sep 17 00:00:00 2001
From: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Date: Sat, 18 Oct 2008 20:27:58 -0700
Subject: [PATCH 777/895] jbd: ordered data integrity fix

In ordered mode, if a file data buffer being dirtied exists in the
committing transaction, we write the buffer to the disk, move it from the
committing transaction to the running transaction, then dirty it.  But we
don't have to remove the buffer from the committing transaction when the
buffer couldn't be written out, otherwise it would miss the error and the
committing transaction would not abort.

This patch adds an error check before removing the buffer from the
committing transaction.

Signed-off-by: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/jbd/transaction.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 0540ca27a446..d15cd6e7251e 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -954,9 +954,10 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 	journal_t *journal = handle->h_transaction->t_journal;
 	int need_brelse = 0;
 	struct journal_head *jh;
+	int ret = 0;
 
 	if (is_handle_aborted(handle))
-		return 0;
+		return ret;
 
 	jh = journal_add_journal_head(bh);
 	JBUFFER_TRACE(jh, "entry");
@@ -1067,7 +1068,16 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 				   time if it is redirtied */
 			}
 
-			/* journal_clean_data_list() may have got there first */
+			/*
+			 * We cannot remove the buffer with io error from the
+			 * committing transaction, because otherwise it would
+			 * miss the error and the commit would not abort.
+			 */
+			if (unlikely(!buffer_uptodate(bh))) {
+				ret = -EIO;
+				goto no_journal;
+			}
+
 			if (jh->b_transaction != NULL) {
 				JBUFFER_TRACE(jh, "unfile from commit");
 				__journal_temp_unlink_buffer(jh);
@@ -1108,7 +1118,7 @@ no_journal:
 	}
 	JBUFFER_TRACE(jh, "exit");
 	journal_put_journal_head(jh);
-	return 0;
+	return ret;
 }
 
 /**
-- 
GitLab


From 6a897cf447a83c9c3fd1b85a1e525c02d6eada7d Mon Sep 17 00:00:00 2001
From: Eugene Dashevsky <eugene@ibrix.com>
Date: Sat, 18 Oct 2008 20:27:59 -0700
Subject: [PATCH 778/895] ext3: fix ext3_dx_readdir hash collision handling

This fixes a bug where readdir() would return a directory entry twice
if there was a hash collision in an hash tree indexed directory.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Eugene Dashevsky <eugene@ibrix.com>
Signed-off-by: Mike Snitzer <msnitzer@ibrix.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext3/dir.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 2eea96ec78ed..28b681ef47e8 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -410,7 +410,7 @@ static int call_filldir(struct file * filp, void * dirent,
 				get_dtype(sb, fname->file_type));
 		if (error) {
 			filp->f_pos = curr_pos;
-			info->extra_fname = fname->next;
+			info->extra_fname = fname;
 			return error;
 		}
 		fname = fname->next;
@@ -449,11 +449,21 @@ static int ext3_dx_readdir(struct file * filp,
 	 * If there are any leftover names on the hash collision
 	 * chain, return them first.
 	 */
-	if (info->extra_fname &&
-	    call_filldir(filp, dirent, filldir, info->extra_fname))
-		goto finished;
+	if (info->extra_fname) {
+		if (call_filldir(filp, dirent, filldir, info->extra_fname))
+			goto finished;
 
-	if (!info->curr_node)
+		info->extra_fname = NULL;
+		info->curr_node = rb_next(info->curr_node);
+		if (!info->curr_node) {
+			if (info->next_hash == ~0) {
+				filp->f_pos = EXT3_HTREE_EOF;
+				goto finished;
+			}
+			info->curr_hash = info->next_hash;
+			info->curr_minor_hash = 0;
+		}
+	} else if (!info->curr_node)
 		info->curr_node = rb_first(&info->root);
 
 	while (1) {
-- 
GitLab


From 5ec8b75e3a2a94860ee99b5456fe1a963c8680e5 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Sat, 18 Oct 2008 20:28:00 -0700
Subject: [PATCH 779/895] ext3: truncate block allocated on a failed
 ext3_write_begin

For blocksize < pagesize we need to remove blocks that got allocated in
block_write_begin() if we fail with ENOSPC for later blocks.
block_write_begin() internally does this if it allocated page locally.
This makes sure we don't have blocks outside inode.i_size during ENOSPC.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext3/inode.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index ebfec4d0148e..f8424ad89971 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1186,6 +1186,13 @@ write_begin_failed:
 		ext3_journal_stop(handle);
 		unlock_page(page);
 		page_cache_release(page);
+		/*
+		 * block_write_begin may have instantiated a few blocks
+		 * outside i_size.  Trim these off again. Don't need
+		 * i_size_read because we hold i_mutex.
+		 */
+		if (pos + len > inode->i_size)
+			vmtruncate(inode, inode->i_size);
 	}
 	if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
 		goto retry;
-- 
GitLab


From cdbf6dba28e8e6268c8420857696309470009fd9 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Sat, 18 Oct 2008 20:28:00 -0700
Subject: [PATCH 780/895] ext3: avoid printk floods in the face of directory
 corruption

A very large directory with many read failures (either due to storage
problems, or due to invalid size & blocks from corruption) will generate a
printk storm as the filesystem continues to try to read all the blocks.
This flood of messages can tie up the box until it is complete - which may
be a very long time, especially for very large corrupted values.

This is fixed by only reporting the corruption once each time we try to
read the directory.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: Eugene Teo <eugeneteo@kernel.sg>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext3/dir.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 28b681ef47e8..4c82531ea0a8 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -102,6 +102,7 @@ static int ext3_readdir(struct file * filp,
 	int err;
 	struct inode *inode = filp->f_path.dentry->d_inode;
 	int ret = 0;
+	int dir_has_error = 0;
 
 	sb = inode->i_sb;
 
@@ -148,9 +149,12 @@ static int ext3_readdir(struct file * filp,
 		 * of recovering data when there's a bad sector
 		 */
 		if (!bh) {
-			ext3_error (sb, "ext3_readdir",
-				"directory #%lu contains a hole at offset %lu",
-				inode->i_ino, (unsigned long)filp->f_pos);
+			if (!dir_has_error) {
+				ext3_error(sb, __func__, "directory #%lu "
+					"contains a hole at offset %lld",
+					inode->i_ino, filp->f_pos);
+				dir_has_error = 1;
+			}
 			/* corrupt size?  Maybe no more blocks to read */
 			if (filp->f_pos > inode->i_blocks << 9)
 				break;
-- 
GitLab


From 6e7152944426be786c6c232990914e4565290d35 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Sat, 18 Oct 2008 20:28:01 -0700
Subject: [PATCH 781/895] hfsplus: missing O_LARGEFILE check

hfsplus: O_LARGEFILE checking is missing

Addresses http://bugzilla.kernel.org/show_bug.cgi?id=8490

From: Alan Cox <alan@redhat.com>
Reported-by: didier <did447@gmail.com>
Cc: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hfsplus/inode.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index b085d64a2b67..963be644297a 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -254,6 +254,8 @@ static int hfsplus_file_open(struct inode *inode, struct file *file)
 {
 	if (HFSPLUS_IS_RSRC(inode))
 		inode = HFSPLUS_I(inode).rsrc_inode;
+	if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
+		return -EOVERFLOW;
 	atomic_inc(&HFSPLUS_I(inode).opencnt);
 	return 0;
 }
-- 
GitLab


From 248736c2a57206388c86f8cdd3392ee986e84f9f Mon Sep 17 00:00:00 2001
From: Eric Sesterhenn <snakebyte@gmx.de>
Date: Sat, 18 Oct 2008 20:28:02 -0700
Subject: [PATCH 782/895] hfsplus: fix possible deadlock when handling
 corrupted extents

A corrupted extent for the extent file itself may try to get an impossible
extent, causing a deadlock if I see it correctly.

Check the inode number after the first_blocks checks and fail if it's the
extent file, as according to the spec the extent file should have no
extent for itself.

Signed-off-by: Eric Sesterhenn <snakebyte@gmx.de>
Cc: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hfsplus/extents.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index fec8f61227ff..0022eec63cda 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -199,6 +199,9 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock,
 		goto done;
 	}
 
+	if (inode->i_ino == HFSPLUS_EXT_CNID)
+		return -EIO;
+
 	mutex_lock(&HFSPLUS_I(inode).extents_lock);
 	res = hfsplus_ext_read_extent(inode, ablock);
 	if (!res) {
-- 
GitLab


From 146aa1bd0511f88ddb4e92fafa2b8aad4f2f65f3 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Sat, 18 Oct 2008 20:28:03 -0700
Subject: [PATCH 783/895] cgroups: fix probable race with
 put_css_set[_taskexit] and find_css_set

put_css_set_taskexit may be called when find_css_set is called on other
cpu.  And the race will occur:

put_css_set_taskexit side                    find_css_set side

                                        |
atomic_dec_and_test(&kref->refcount)    |
    /* kref->refcount = 0 */            |
....................................................................
                                        |  read_lock(&css_set_lock)
                                        |  find_existing_css_set
                                        |  get_css_set
                                        |  read_unlock(&css_set_lock);
....................................................................
__release_css_set                       |
....................................................................
                                        | /* use a released css_set */
                                        |

[put_css_set is the same. But in the current code, all put_css_set are
put into cgroup mutex critical region as the same as find_css_set.]

[akpm@linux-foundation.org: repair comments]
[menage@google.com: eliminate race in css_set refcounting]
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Paul Menage <menage@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h |  3 +--
 kernel/cgroup.c        | 43 ++++++++++++++++++++----------------------
 kernel/cgroup_debug.c  |  4 ++--
 3 files changed, 23 insertions(+), 27 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 30934e4bfaab..7166023e07d2 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -9,7 +9,6 @@
  */
 
 #include <linux/sched.h>
-#include <linux/kref.h>
 #include <linux/cpumask.h>
 #include <linux/nodemask.h>
 #include <linux/rcupdate.h>
@@ -149,7 +148,7 @@ struct cgroup {
 struct css_set {
 
 	/* Reference count */
-	struct kref ref;
+	atomic_t refcount;
 
 	/*
 	 * List running through all cgroup groups in the same hash
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 8c6e1c17e6d3..1e49218457e0 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -241,7 +241,6 @@ static void unlink_css_set(struct css_set *cg)
 	struct cg_cgroup_link *link;
 	struct cg_cgroup_link *saved_link;
 
-	write_lock(&css_set_lock);
 	hlist_del(&cg->hlist);
 	css_set_count--;
 
@@ -251,16 +250,25 @@ static void unlink_css_set(struct css_set *cg)
 		list_del(&link->cgrp_link_list);
 		kfree(link);
 	}
-
-	write_unlock(&css_set_lock);
 }
 
-static void __release_css_set(struct kref *k, int taskexit)
+static void __put_css_set(struct css_set *cg, int taskexit)
 {
 	int i;
-	struct css_set *cg = container_of(k, struct css_set, ref);
-
+	/*
+	 * Ensure that the refcount doesn't hit zero while any readers
+	 * can see it. Similar to atomic_dec_and_lock(), but for an
+	 * rwlock
+	 */
+	if (atomic_add_unless(&cg->refcount, -1, 1))
+		return;
+	write_lock(&css_set_lock);
+	if (!atomic_dec_and_test(&cg->refcount)) {
+		write_unlock(&css_set_lock);
+		return;
+	}
 	unlink_css_set(cg);
+	write_unlock(&css_set_lock);
 
 	rcu_read_lock();
 	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
@@ -276,32 +284,22 @@ static void __release_css_set(struct kref *k, int taskexit)
 	kfree(cg);
 }
 
-static void release_css_set(struct kref *k)
-{
-	__release_css_set(k, 0);
-}
-
-static void release_css_set_taskexit(struct kref *k)
-{
-	__release_css_set(k, 1);
-}
-
 /*
  * refcounted get/put for css_set objects
  */
 static inline void get_css_set(struct css_set *cg)
 {
-	kref_get(&cg->ref);
+	atomic_inc(&cg->refcount);
 }
 
 static inline void put_css_set(struct css_set *cg)
 {
-	kref_put(&cg->ref, release_css_set);
+	__put_css_set(cg, 0);
 }
 
 static inline void put_css_set_taskexit(struct css_set *cg)
 {
-	kref_put(&cg->ref, release_css_set_taskexit);
+	__put_css_set(cg, 1);
 }
 
 /*
@@ -427,7 +425,7 @@ static struct css_set *find_css_set(
 		return NULL;
 	}
 
-	kref_init(&res->ref);
+	atomic_set(&res->refcount, 1);
 	INIT_LIST_HEAD(&res->cg_links);
 	INIT_LIST_HEAD(&res->tasks);
 	INIT_HLIST_NODE(&res->hlist);
@@ -1728,7 +1726,7 @@ int cgroup_task_count(const struct cgroup *cgrp)
 
 	read_lock(&css_set_lock);
 	list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
-		count += atomic_read(&link->cg->ref.refcount);
+		count += atomic_read(&link->cg->refcount);
 	}
 	read_unlock(&css_set_lock);
 	return count;
@@ -2495,8 +2493,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
 int __init cgroup_init_early(void)
 {
 	int i;
-	kref_init(&init_css_set.ref);
-	kref_get(&init_css_set.ref);
+	atomic_set(&init_css_set.refcount, 1);
 	INIT_LIST_HEAD(&init_css_set.cg_links);
 	INIT_LIST_HEAD(&init_css_set.tasks);
 	INIT_HLIST_NODE(&init_css_set.hlist);
diff --git a/kernel/cgroup_debug.c b/kernel/cgroup_debug.c
index c3dc3aba4c02..daca6209202d 100644
--- a/kernel/cgroup_debug.c
+++ b/kernel/cgroup_debug.c
@@ -57,7 +57,7 @@ static u64 current_css_set_refcount_read(struct cgroup *cont,
 	u64 count;
 
 	rcu_read_lock();
-	count = atomic_read(&current->cgroups->ref.refcount);
+	count = atomic_read(&current->cgroups->refcount);
 	rcu_read_unlock();
 	return count;
 }
@@ -90,7 +90,7 @@ static struct cftype files[] =  {
 	{
 		.name = "releasable",
 		.read_u64 = releasable_read,
-	}
+	},
 };
 
 static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)
-- 
GitLab


From cc31edceee04a7b87f2be48f9489ebb72d264844 Mon Sep 17 00:00:00 2001
From: Paul Menage <menage@google.com>
Date: Sat, 18 Oct 2008 20:28:04 -0700
Subject: [PATCH 784/895] cgroups: convert tasks file to use a seq_file with
 shared pid array

Rather than pre-generating the entire text for the "tasks" file each
time the file is opened, we instead just generate/update the array of
process ids and use a seq_file to report these to userspace.  All open
file handles on the same "tasks" file can share a pid array, which may
be updated any time that no thread is actively reading the array.  By
sharing the array, the potential for userspace to DoS the system by
opening many handles on the same "tasks" file is removed.

[Based on a patch by Lai Jiangshan, extended to use seq_file]

Signed-off-by: Paul Menage <menage@google.com>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Serge Hallyn <serue@us.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h |  10 ++
 kernel/cgroup.c        | 222 ++++++++++++++++++++++++++---------------
 2 files changed, 149 insertions(+), 83 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 7166023e07d2..8ab91880a0ad 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -14,6 +14,7 @@
 #include <linux/rcupdate.h>
 #include <linux/cgroupstats.h>
 #include <linux/prio_heap.h>
+#include <linux/rwsem.h>
 
 #ifdef CONFIG_CGROUPS
 
@@ -136,6 +137,15 @@ struct cgroup {
 	 * release_list_lock
 	 */
 	struct list_head release_list;
+
+	/* pids_mutex protects the fields below */
+	struct rw_semaphore pids_mutex;
+	/* Array of process ids in the cgroup */
+	pid_t *tasks_pids;
+	/* How many files are using the current tasks_pids array */
+	int pids_use_count;
+	/* Length of the current tasks_pids array */
+	int pids_length;
 };
 
 /* A css_set is a structure holding pointers to a set of
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 1e49218457e0..046c1609606b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -868,6 +868,14 @@ static struct super_operations cgroup_ops = {
 	.remount_fs = cgroup_remount,
 };
 
+static void init_cgroup_housekeeping(struct cgroup *cgrp)
+{
+	INIT_LIST_HEAD(&cgrp->sibling);
+	INIT_LIST_HEAD(&cgrp->children);
+	INIT_LIST_HEAD(&cgrp->css_sets);
+	INIT_LIST_HEAD(&cgrp->release_list);
+	init_rwsem(&cgrp->pids_mutex);
+}
 static void init_cgroup_root(struct cgroupfs_root *root)
 {
 	struct cgroup *cgrp = &root->top_cgroup;
@@ -876,10 +884,7 @@ static void init_cgroup_root(struct cgroupfs_root *root)
 	root->number_of_cgroups = 1;
 	cgrp->root = root;
 	cgrp->top_cgroup = cgrp;
-	INIT_LIST_HEAD(&cgrp->sibling);
-	INIT_LIST_HEAD(&cgrp->children);
-	INIT_LIST_HEAD(&cgrp->css_sets);
-	INIT_LIST_HEAD(&cgrp->release_list);
+	init_cgroup_housekeeping(cgrp);
 }
 
 static int cgroup_test_super(struct super_block *sb, void *data)
@@ -1995,16 +2000,7 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
  * but we cannot guarantee that the information we produce is correct
  * unless we produce it entirely atomically.
  *
- * Upon tasks file open(), a struct ctr_struct is allocated, that
- * will have a pointer to an array (also allocated here).  The struct
- * ctr_struct * is stored in file->private_data.  Its resources will
- * be freed by release() when the file is closed.  The array is used
- * to sprintf the PIDs and then used by read().
  */
-struct ctr_struct {
-	char *buf;
-	int bufsz;
-};
 
 /*
  * Load into 'pidarray' up to 'npids' of the tasks using cgroup
@@ -2086,42 +2082,132 @@ static int cmppid(const void *a, const void *b)
 	return *(pid_t *)a - *(pid_t *)b;
 }
 
+
 /*
- * Convert array 'a' of 'npids' pid_t's to a string of newline separated
- * decimal pids in 'buf'.  Don't write more than 'sz' chars, but return
- * count 'cnt' of how many chars would be written if buf were large enough.
+ * seq_file methods for the "tasks" file. The seq_file position is the
+ * next pid to display; the seq_file iterator is a pointer to the pid
+ * in the cgroup->tasks_pids array.
  */
-static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids)
+
+static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)
 {
-	int cnt = 0;
-	int i;
+	/*
+	 * Initially we receive a position value that corresponds to
+	 * one more than the last pid shown (or 0 on the first call or
+	 * after a seek to the start). Use a binary-search to find the
+	 * next pid to display, if any
+	 */
+	struct cgroup *cgrp = s->private;
+	int index = 0, pid = *pos;
+	int *iter;
 
-	for (i = 0; i < npids; i++)
-		cnt += snprintf(buf + cnt, max(sz - cnt, 0), "%d\n", a[i]);
-	return cnt;
+	down_read(&cgrp->pids_mutex);
+	if (pid) {
+		int end = cgrp->pids_length;
+		int i;
+		while (index < end) {
+			int mid = (index + end) / 2;
+			if (cgrp->tasks_pids[mid] == pid) {
+				index = mid;
+				break;
+			} else if (cgrp->tasks_pids[mid] <= pid)
+				index = mid + 1;
+			else
+				end = mid;
+		}
+	}
+	/* If we're off the end of the array, we're done */
+	if (index >= cgrp->pids_length)
+		return NULL;
+	/* Update the abstract position to be the actual pid that we found */
+	iter = cgrp->tasks_pids + index;
+	*pos = *iter;
+	return iter;
+}
+
+static void cgroup_tasks_stop(struct seq_file *s, void *v)
+{
+	struct cgroup *cgrp = s->private;
+	up_read(&cgrp->pids_mutex);
 }
 
+static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos)
+{
+	struct cgroup *cgrp = s->private;
+	int *p = v;
+	int *end = cgrp->tasks_pids + cgrp->pids_length;
+
+	/*
+	 * Advance to the next pid in the array. If this goes off the
+	 * end, we're done
+	 */
+	p++;
+	if (p >= end) {
+		return NULL;
+	} else {
+		*pos = *p;
+		return p;
+	}
+}
+
+static int cgroup_tasks_show(struct seq_file *s, void *v)
+{
+	return seq_printf(s, "%d\n", *(int *)v);
+}
+
+static struct seq_operations cgroup_tasks_seq_operations = {
+	.start = cgroup_tasks_start,
+	.stop = cgroup_tasks_stop,
+	.next = cgroup_tasks_next,
+	.show = cgroup_tasks_show,
+};
+
+static void release_cgroup_pid_array(struct cgroup *cgrp)
+{
+	down_write(&cgrp->pids_mutex);
+	BUG_ON(!cgrp->pids_use_count);
+	if (!--cgrp->pids_use_count) {
+		kfree(cgrp->tasks_pids);
+		cgrp->tasks_pids = NULL;
+		cgrp->pids_length = 0;
+	}
+	up_write(&cgrp->pids_mutex);
+}
+
+static int cgroup_tasks_release(struct inode *inode, struct file *file)
+{
+	struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
+
+	if (!(file->f_mode & FMODE_READ))
+		return 0;
+
+	release_cgroup_pid_array(cgrp);
+	return seq_release(inode, file);
+}
+
+static struct file_operations cgroup_tasks_operations = {
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.write = cgroup_file_write,
+	.release = cgroup_tasks_release,
+};
+
 /*
- * Handle an open on 'tasks' file.  Prepare a buffer listing the
+ * Handle an open on 'tasks' file.  Prepare an array containing the
  * process id's of tasks currently attached to the cgroup being opened.
- *
- * Does not require any specific cgroup mutexes, and does not take any.
  */
+
 static int cgroup_tasks_open(struct inode *unused, struct file *file)
 {
 	struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
-	struct ctr_struct *ctr;
 	pid_t *pidarray;
 	int npids;
-	char c;
+	int retval;
 
+	/* Nothing to do for write-only files */
 	if (!(file->f_mode & FMODE_READ))
 		return 0;
 
-	ctr = kmalloc(sizeof(*ctr), GFP_KERNEL);
-	if (!ctr)
-		goto err0;
-
 	/*
 	 * If cgroup gets more users after we read count, we won't have
 	 * enough space - tough.  This race is indistinguishable to the
@@ -2129,57 +2215,31 @@ static int cgroup_tasks_open(struct inode *unused, struct file *file)
 	 * show up until sometime later on.
 	 */
 	npids = cgroup_task_count(cgrp);
-	if (npids) {
-		pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
-		if (!pidarray)
-			goto err1;
-
-		npids = pid_array_load(pidarray, npids, cgrp);
-		sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
-
-		/* Call pid_array_to_buf() twice, first just to get bufsz */
-		ctr->bufsz = pid_array_to_buf(&c, sizeof(c), pidarray, npids) + 1;
-		ctr->buf = kmalloc(ctr->bufsz, GFP_KERNEL);
-		if (!ctr->buf)
-			goto err2;
-		ctr->bufsz = pid_array_to_buf(ctr->buf, ctr->bufsz, pidarray, npids);
-
-		kfree(pidarray);
-	} else {
-		ctr->buf = NULL;
-		ctr->bufsz = 0;
-	}
-	file->private_data = ctr;
-	return 0;
-
-err2:
-	kfree(pidarray);
-err1:
-	kfree(ctr);
-err0:
-	return -ENOMEM;
-}
-
-static ssize_t cgroup_tasks_read(struct cgroup *cgrp,
-				    struct cftype *cft,
-				    struct file *file, char __user *buf,
-				    size_t nbytes, loff_t *ppos)
-{
-	struct ctr_struct *ctr = file->private_data;
+	pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
+	if (!pidarray)
+		return -ENOMEM;
+	npids = pid_array_load(pidarray, npids, cgrp);
+	sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
 
-	return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz);
-}
+	/*
+	 * Store the array in the cgroup, freeing the old
+	 * array if necessary
+	 */
+	down_write(&cgrp->pids_mutex);
+	kfree(cgrp->tasks_pids);
+	cgrp->tasks_pids = pidarray;
+	cgrp->pids_length = npids;
+	cgrp->pids_use_count++;
+	up_write(&cgrp->pids_mutex);
 
-static int cgroup_tasks_release(struct inode *unused_inode,
-					struct file *file)
-{
-	struct ctr_struct *ctr;
+	file->f_op = &cgroup_tasks_operations;
 
-	if (file->f_mode & FMODE_READ) {
-		ctr = file->private_data;
-		kfree(ctr->buf);
-		kfree(ctr);
+	retval = seq_open(file, &cgroup_tasks_seq_operations);
+	if (retval) {
+		release_cgroup_pid_array(cgrp);
+		return retval;
 	}
+	((struct seq_file *)file->private_data)->private = cgrp;
 	return 0;
 }
 
@@ -2208,7 +2268,6 @@ static struct cftype files[] = {
 	{
 		.name = "tasks",
 		.open = cgroup_tasks_open,
-		.read = cgroup_tasks_read,
 		.write_u64 = cgroup_tasks_write,
 		.release = cgroup_tasks_release,
 		.private = FILE_TASKLIST,
@@ -2298,10 +2357,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 
 	mutex_lock(&cgroup_mutex);
 
-	INIT_LIST_HEAD(&cgrp->sibling);
-	INIT_LIST_HEAD(&cgrp->children);
-	INIT_LIST_HEAD(&cgrp->css_sets);
-	INIT_LIST_HEAD(&cgrp->release_list);
+	init_cgroup_housekeeping(cgrp);
 
 	cgrp->parent = parent;
 	cgrp->root = parent->root;
-- 
GitLab


From 886465f407e57d6c3c81013c919ea670ce1ae0d0 Mon Sep 17 00:00:00 2001
From: Paul Menage <menage@google.com>
Date: Sat, 18 Oct 2008 20:28:05 -0700
Subject: [PATCH 785/895] cgroups: fix declaration of cgroup_mm_owner_callbacks

The choice of real/dummy declaration for cgroup_mm_owner_callbacks()
shouldn't be based on CONFIG_MM_OWNER, but on CONFIG_CGROUPS.  Otherwise
kernel/exit.c fails to compile when something other than a cgroups
controller selects CONFIG_MM_OWNER

Signed-off-by: Paul Menage <menage@google.com>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 8ab91880a0ad..8b00f6643e93 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -403,6 +403,9 @@ void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it);
 int cgroup_scan_tasks(struct cgroup_scanner *scan);
 int cgroup_attach_task(struct cgroup *, struct task_struct *);
 
+void cgroup_mm_owner_callbacks(struct task_struct *old,
+			       struct task_struct *new);
+
 #else /* !CONFIG_CGROUPS */
 
 static inline int cgroup_init_early(void) { return 0; }
@@ -421,15 +424,9 @@ static inline int cgroupstats_build(struct cgroupstats *stats,
 	return -EINVAL;
 }
 
+static inline void cgroup_mm_owner_callbacks(struct task_struct *old,
+					     struct task_struct *new) {}
+
 #endif /* !CONFIG_CGROUPS */
 
-#ifdef CONFIG_MM_OWNER
-extern void
-cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new);
-#else /* !CONFIG_MM_OWNER */
-static inline void
-cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new)
-{
-}
-#endif /* CONFIG_MM_OWNER */
 #endif /* _LINUX_CGROUP_H */
-- 
GitLab


From 2cdc7241a290bb2b9ef4c2e2969a4a3ed92abb63 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Sat, 18 Oct 2008 20:28:06 -0700
Subject: [PATCH 786/895] devcgroup: use kmemdup()

This saves 40 bytes on my x86_32 box.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Cc: Paul Menage <menage@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 security/device_cgroup.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index 46f23971f7e4..9c625f65ee56 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -84,13 +84,9 @@ static int dev_whitelist_copy(struct list_head *dest, struct list_head *orig)
 	struct dev_whitelist_item *wh, *tmp, *new;
 
 	list_for_each_entry(wh, orig, list) {
-		new = kmalloc(sizeof(*wh), GFP_KERNEL);
+		new = kmemdup(wh, sizeof(*wh), GFP_KERNEL);
 		if (!new)
 			goto free_and_exit;
-		new->major = wh->major;
-		new->minor = wh->minor;
-		new->type = wh->type;
-		new->access = wh->access;
 		list_add_tail(&new->list, dest);
 	}
 
@@ -114,11 +110,10 @@ static int dev_whitelist_add(struct dev_cgroup *dev_cgroup,
 {
 	struct dev_whitelist_item *whcopy, *walk;
 
-	whcopy = kmalloc(sizeof(*whcopy), GFP_KERNEL);
+	whcopy = kmemdup(wh, sizeof(*wh), GFP_KERNEL);
 	if (!whcopy)
 		return -ENOMEM;
 
-	memcpy(whcopy, wh, sizeof(*whcopy));
 	spin_lock(&dev_cgroup->lock);
 	list_for_each_entry(walk, &dev_cgroup->whitelist, list) {
 		if (walk->type != wh->type)
-- 
GitLab


From c012a54ae0b2ee2c73499f54596e0f5257288fec Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Sat, 18 Oct 2008 20:28:07 -0700
Subject: [PATCH 787/895] devcgroup: remove unused variable

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Cc: Paul Menage <menage@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 security/device_cgroup.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index 9c625f65ee56..d5c15a7c67c3 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -355,7 +355,7 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
 	struct dev_cgroup *cur_devcgroup;
 	const char *b;
 	char *endp;
-	int retval = 0, count;
+	int count;
 	struct dev_whitelist_item wh;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -432,7 +432,6 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
 	}
 
 handle:
-	retval = 0;
 	switch (filetype) {
 	case DEVCG_ALLOW:
 		if (!parent_has_perm(devcgroup, &wh))
-- 
GitLab


From 47c59803becb55b72b26cdab3838d621a15badc8 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Sat, 18 Oct 2008 20:28:07 -0700
Subject: [PATCH 788/895] devcgroup: remove spin_lock()

Since we introduced rcu for read side, spin_lock is used only for update.
But we always hold cgroup_lock() when update, so spin_lock() is not need.

Additional cleanup:
1) include linux/rcupdate.h explicitly
2) remove unused variable cur_devcgroup in devcgroup_update_access()

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: "Serge E. Hallyn" <serue@us.ibm.com>
Cc: Paul Menage <menage@google.com>
Cc: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 security/device_cgroup.c | 34 +++++-----------------------------
 1 file changed, 5 insertions(+), 29 deletions(-)

diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index d5c15a7c67c3..5ba78701adc3 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -1,5 +1,5 @@
 /*
- * dev_cgroup.c - device cgroup subsystem
+ * device_cgroup.c - device cgroup subsystem
  *
  * Copyright 2007 IBM Corp
  */
@@ -10,6 +10,7 @@
 #include <linux/list.h>
 #include <linux/uaccess.h>
 #include <linux/seq_file.h>
+#include <linux/rcupdate.h>
 
 #define ACC_MKNOD 1
 #define ACC_READ  2
@@ -22,18 +23,8 @@
 
 /*
  * whitelist locking rules:
- * cgroup_lock() cannot be taken under dev_cgroup->lock.
- * dev_cgroup->lock can be taken with or without cgroup_lock().
- *
- * modifications always require cgroup_lock
- * modifications to a list which is visible require the
- *   dev_cgroup->lock *and* cgroup_lock()
- * walking the list requires dev_cgroup->lock or cgroup_lock().
- *
- * reasoning: dev_whitelist_copy() needs to kmalloc, so needs
- *   a mutex, which the cgroup_lock() is.  Since modifying
- *   a visible list requires both locks, either lock can be
- *   taken for walking the list.
+ * hold cgroup_lock() for update/read.
+ * hold rcu_read_lock() for read.
  */
 
 struct dev_whitelist_item {
@@ -47,7 +38,6 @@ struct dev_whitelist_item {
 struct dev_cgroup {
 	struct cgroup_subsys_state css;
 	struct list_head whitelist;
-	spinlock_t lock;
 };
 
 static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s)
@@ -103,7 +93,6 @@ free_and_exit:
 /* Stupid prototype - don't bother combining existing entries */
 /*
  * called under cgroup_lock()
- * since the list is visible to other tasks, we need the spinlock also
  */
 static int dev_whitelist_add(struct dev_cgroup *dev_cgroup,
 			struct dev_whitelist_item *wh)
@@ -114,7 +103,6 @@ static int dev_whitelist_add(struct dev_cgroup *dev_cgroup,
 	if (!whcopy)
 		return -ENOMEM;
 
-	spin_lock(&dev_cgroup->lock);
 	list_for_each_entry(walk, &dev_cgroup->whitelist, list) {
 		if (walk->type != wh->type)
 			continue;
@@ -130,7 +118,6 @@ static int dev_whitelist_add(struct dev_cgroup *dev_cgroup,
 
 	if (whcopy != NULL)
 		list_add_tail_rcu(&whcopy->list, &dev_cgroup->whitelist);
-	spin_unlock(&dev_cgroup->lock);
 	return 0;
 }
 
@@ -144,14 +131,12 @@ static void whitelist_item_free(struct rcu_head *rcu)
 
 /*
  * called under cgroup_lock()
- * since the list is visible to other tasks, we need the spinlock also
  */
 static void dev_whitelist_rm(struct dev_cgroup *dev_cgroup,
 			struct dev_whitelist_item *wh)
 {
 	struct dev_whitelist_item *walk, *tmp;
 
-	spin_lock(&dev_cgroup->lock);
 	list_for_each_entry_safe(walk, tmp, &dev_cgroup->whitelist, list) {
 		if (walk->type == DEV_ALL)
 			goto remove;
@@ -169,7 +154,6 @@ remove:
 			call_rcu(&walk->rcu, whitelist_item_free);
 		}
 	}
-	spin_unlock(&dev_cgroup->lock);
 }
 
 /*
@@ -209,7 +193,6 @@ static struct cgroup_subsys_state *devcgroup_create(struct cgroup_subsys *ss,
 		}
 	}
 
-	spin_lock_init(&dev_cgroup->lock);
 	return &dev_cgroup->css;
 }
 
@@ -325,15 +308,11 @@ static int parent_has_perm(struct dev_cgroup *childcg,
 {
 	struct cgroup *pcg = childcg->css.cgroup->parent;
 	struct dev_cgroup *parent;
-	int ret;
 
 	if (!pcg)
 		return 1;
 	parent = cgroup_to_devcgroup(pcg);
-	spin_lock(&parent->lock);
-	ret = may_access_whitelist(parent, wh);
-	spin_unlock(&parent->lock);
-	return ret;
+	return may_access_whitelist(parent, wh);
 }
 
 /*
@@ -352,7 +331,6 @@ static int parent_has_perm(struct dev_cgroup *childcg,
 static int devcgroup_update_access(struct dev_cgroup *devcgroup,
 				   int filetype, const char *buffer)
 {
-	struct dev_cgroup *cur_devcgroup;
 	const char *b;
 	char *endp;
 	int count;
@@ -361,8 +339,6 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	cur_devcgroup = task_devcgroup(current);
-
 	memset(&wh, 0, sizeof(wh));
 	b = buffer;
 
-- 
GitLab


From 073e587ec2cc377867e53d8b8959738a8e16cff6 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Sat, 18 Oct 2008 20:28:08 -0700
Subject: [PATCH 789/895] memcg: move charge swapin under lock

While page-cache's charge/uncharge is done under page_lock(), swap-cache
isn't.  (anonymous page is charged when it's newly allocated.)

This patch moves do_swap_page()'s charge() call under lock.  I don't see
any bad problem *now* but this fix will be good for future for avoiding
unnecessary racy state.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index ba86b436b85f..54cf20ee0a83 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2326,16 +2326,17 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		count_vm_event(PGMAJFAULT);
 	}
 
+	mark_page_accessed(page);
+
+	lock_page(page);
+	delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
+
 	if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
-		delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
 		ret = VM_FAULT_OOM;
+		unlock_page(page);
 		goto out;
 	}
 
-	mark_page_accessed(page);
-	lock_page(page);
-	delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
-
 	/*
 	 * Back out if somebody else already faulted in this pte.
 	 */
-- 
GitLab


From b7abea9630bc8ffc663a751e46680db25c4cdf8d Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Sat, 18 Oct 2008 20:28:09 -0700
Subject: [PATCH 790/895] memcg: make page->mapping NULL before uncharge

This patch tries to make page->mapping to be NULL before
mem_cgroup_uncharge_cache_page() is called.

"page->mapping == NULL" is a good check for "whether the page is still
radix-tree or not".  This patch also adds BUG_ON() to
mem_cgroup_uncharge_cache_page();

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/filemap.c    | 2 +-
 mm/memcontrol.c | 1 +
 mm/migrate.c    | 9 +++++++--
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index e1b23fda48de..ab8553658af3 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -116,12 +116,12 @@ void __remove_from_page_cache(struct page *page)
 {
 	struct address_space *mapping = page->mapping;
 
-	mem_cgroup_uncharge_cache_page(page);
 	radix_tree_delete(&mapping->page_tree, page->index);
 	page->mapping = NULL;
 	mapping->nrpages--;
 	__dec_zone_page_state(page, NR_FILE_PAGES);
 	BUG_ON(page_mapped(page));
+	mem_cgroup_uncharge_cache_page(page);
 
 	/*
 	 * Some filesystems seem to re-dirty the page even after
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e93a4db93fbe..6f8b5b3b38b2 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -734,6 +734,7 @@ void mem_cgroup_uncharge_page(struct page *page)
 void mem_cgroup_uncharge_cache_page(struct page *page)
 {
 	VM_BUG_ON(page_mapped(page));
+	VM_BUG_ON(page->mapping);
 	__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
 }
 
diff --git a/mm/migrate.c b/mm/migrate.c
index 11c6c56ec017..6602941bfab0 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -330,8 +330,6 @@ static int migrate_page_move_mapping(struct address_space *mapping,
 	__inc_zone_page_state(newpage, NR_FILE_PAGES);
 
 	spin_unlock_irq(&mapping->tree_lock);
-	if (!PageSwapCache(newpage))
-		mem_cgroup_uncharge_cache_page(page);
 
 	return 0;
 }
@@ -341,6 +339,8 @@ static int migrate_page_move_mapping(struct address_space *mapping,
  */
 static void migrate_page_copy(struct page *newpage, struct page *page)
 {
+	int anon;
+
 	copy_highpage(newpage, page);
 
 	if (PageError(page))
@@ -378,8 +378,13 @@ static void migrate_page_copy(struct page *newpage, struct page *page)
 #endif
 	ClearPagePrivate(page);
 	set_page_private(page, 0);
+	/* page->mapping contains a flag for PageAnon() */
+	anon = PageAnon(page);
 	page->mapping = NULL;
 
+	if (!anon) /* This page was removed from radix-tree. */
+		mem_cgroup_uncharge_cache_page(page);
+
 	/*
 	 * If any waiters have accumulated on the new page then
 	 * wake them up.
-- 
GitLab


From 5b4e655e948d8b6e9b0d001616d4c9d7e7ffe924 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Sat, 18 Oct 2008 20:28:10 -0700
Subject: [PATCH 791/895] memcg: avoid accounting special pages

There are not-on-LRU pages which can be mapped and they are not worth to
be accounted.  (becasue we can't shrink them and need dirty codes to
handle specical case) We'd like to make use of usual objrmap/radix-tree's
protcol and don't want to account out-of-vm's control pages.

When special_mapping_fault() is called, page->mapping is tend to be NULL
and it's charged as Anonymous page.  insert_page() also handles some
special pages from drivers.

This patch is for avoiding to account special pages.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/controllers/memory.txt | 24 ++++++++++++++++--------
 mm/memory.c                          | 25 +++++++++++--------------
 mm/rmap.c                            |  4 ++--
 3 files changed, 29 insertions(+), 24 deletions(-)

diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt
index 9b53d5827361..1c07547d3f81 100644
--- a/Documentation/controllers/memory.txt
+++ b/Documentation/controllers/memory.txt
@@ -112,14 +112,22 @@ the per cgroup LRU.
 
 2.2.1 Accounting details
 
-All mapped pages (RSS) and unmapped user pages (Page Cache) are accounted.
-RSS pages are accounted at the time of page_add_*_rmap() unless they've already
-been accounted for earlier. A file page will be accounted for as Page Cache;
-it's mapped into the page tables of a process, duplicate accounting is carefully
-avoided. Page Cache pages are accounted at the time of add_to_page_cache().
-The corresponding routines that remove a page from the page tables or removes
-a page from Page Cache is used to decrement the accounting counters of the
-cgroup.
+All mapped anon pages (RSS) and cache pages (Page Cache) are accounted.
+(some pages which never be reclaimable and will not be on global LRU
+ are not accounted. we just accounts pages under usual vm management.)
+
+RSS pages are accounted at page_fault unless they've already been accounted
+for earlier. A file page will be accounted for as Page Cache when it's
+inserted into inode (radix-tree). While it's mapped into the page tables of
+processes, duplicate accounting is carefully avoided.
+
+A RSS page is unaccounted when it's fully unmapped. A PageCache page is
+unaccounted when it's removed from radix-tree.
+
+At page migration, accounting information is kept.
+
+Note: we just account pages-on-lru because our purpose is to control amount
+of used pages. not-on-lru pages are tend to be out-of-control from vm view.
 
 2.3 Shared Page Accounting
 
diff --git a/mm/memory.c b/mm/memory.c
index 54cf20ee0a83..3a6c4a658325 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1323,18 +1323,14 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr,
 	pte_t *pte;
 	spinlock_t *ptl;
 
-	retval = mem_cgroup_charge(page, mm, GFP_KERNEL);
-	if (retval)
-		goto out;
-
 	retval = -EINVAL;
 	if (PageAnon(page))
-		goto out_uncharge;
+		goto out;
 	retval = -ENOMEM;
 	flush_dcache_page(page);
 	pte = get_locked_pte(mm, addr, &ptl);
 	if (!pte)
-		goto out_uncharge;
+		goto out;
 	retval = -EBUSY;
 	if (!pte_none(*pte))
 		goto out_unlock;
@@ -1350,8 +1346,6 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr,
 	return retval;
 out_unlock:
 	pte_unmap_unlock(pte, ptl);
-out_uncharge:
-	mem_cgroup_uncharge_page(page);
 out:
 	return retval;
 }
@@ -2463,6 +2457,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	struct page *page;
 	pte_t entry;
 	int anon = 0;
+	int charged = 0;
 	struct page *dirty_page = NULL;
 	struct vm_fault vmf;
 	int ret;
@@ -2503,6 +2498,12 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 				ret = VM_FAULT_OOM;
 				goto out;
 			}
+			if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
+				ret = VM_FAULT_OOM;
+				page_cache_release(page);
+				goto out;
+			}
+			charged = 1;
 			/*
 			 * Don't let another task, with possibly unlocked vma,
 			 * keep the mlocked page.
@@ -2543,11 +2544,6 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 
 	}
 
-	if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
-		ret = VM_FAULT_OOM;
-		goto out;
-	}
-
 	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
 
 	/*
@@ -2585,7 +2581,8 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		/* no need to invalidate: a not-present page won't be cached */
 		update_mmu_cache(vma, address, entry);
 	} else {
-		mem_cgroup_uncharge_page(page);
+		if (charged)
+			mem_cgroup_uncharge_page(page);
 		if (anon)
 			page_cache_release(page);
 		else
diff --git a/mm/rmap.c b/mm/rmap.c
index 7e90bebbeb6c..8701d5fce732 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -727,8 +727,8 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma)
 			page_clear_dirty(page);
 			set_page_dirty(page);
 		}
-
-		mem_cgroup_uncharge_page(page);
+		if (PageAnon(page))
+			mem_cgroup_uncharge_page(page);
 		__dec_zone_page_state(page,
 			PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED);
 		/*
-- 
GitLab


From addb9efebb2ee2202d324e75b593b39868528f68 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Sat, 18 Oct 2008 20:28:10 -0700
Subject: [PATCH 792/895] memcg: optimize per-cpu statistics

Some obvious optimization to memcg.

I found mem_cgroup_charge_statistics() is a little big (in object) and
does unnecessary address calclation.  This patch is for optimization to
reduce the size of this function.

And res_counter_charge() is 'likely' to succeed.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memcontrol.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6f8b5b3b38b2..10846b9656aa 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -66,11 +66,10 @@ struct mem_cgroup_stat {
 /*
  * For accounting under irq disable, no need for increment preempt count.
  */
-static void __mem_cgroup_stat_add_safe(struct mem_cgroup_stat *stat,
+static inline void __mem_cgroup_stat_add_safe(struct mem_cgroup_stat_cpu *stat,
 		enum mem_cgroup_stat_index idx, int val)
 {
-	int cpu = smp_processor_id();
-	stat->cpustat[cpu].count[idx] += val;
+	stat->count[idx] += val;
 }
 
 static s64 mem_cgroup_read_stat(struct mem_cgroup_stat *stat,
@@ -190,18 +189,21 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, int flags,
 {
 	int val = (charge)? 1 : -1;
 	struct mem_cgroup_stat *stat = &mem->stat;
+	struct mem_cgroup_stat_cpu *cpustat;
 
 	VM_BUG_ON(!irqs_disabled());
+
+	cpustat = &stat->cpustat[smp_processor_id()];
 	if (flags & PAGE_CGROUP_FLAG_CACHE)
-		__mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_CACHE, val);
+		__mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_CACHE, val);
 	else
-		__mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_RSS, val);
+		__mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_RSS, val);
 
 	if (charge)
-		__mem_cgroup_stat_add_safe(stat,
+		__mem_cgroup_stat_add_safe(cpustat,
 				MEM_CGROUP_STAT_PGPGIN_COUNT, 1);
 	else
-		__mem_cgroup_stat_add_safe(stat,
+		__mem_cgroup_stat_add_safe(cpustat,
 				MEM_CGROUP_STAT_PGPGOUT_COUNT, 1);
 }
 
@@ -558,7 +560,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
 		css_get(&memcg->css);
 	}
 
-	while (res_counter_charge(&mem->res, PAGE_SIZE)) {
+	while (unlikely(res_counter_charge(&mem->res, PAGE_SIZE))) {
 		if (!(gfp_mask & __GFP_WAIT))
 			goto out;
 
-- 
GitLab


From c05555b572921c464d064d9267f7f7bc06d424fa Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Sat, 18 Oct 2008 20:28:11 -0700
Subject: [PATCH 793/895] memcg: atomic ops for page_cgroup->flags

This patch makes page_cgroup->flags to be atomic_ops and define functions
(and macros) to access it.

Before trying to modify memory resource controller, this atomic operation
on flags is necessary.  Most of flags in this patch is for LRU and modfied
under mz->lru_lock but we'll add another flags which is not for LRU soon.
For example, we'll place LOCK bit on flags field.  We need atomic
operation to modify LRU bit without LOCK.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memcontrol.c | 122 ++++++++++++++++++++++++++++++++----------------
 1 file changed, 82 insertions(+), 40 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 10846b9656aa..031682e7ef0c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -157,12 +157,46 @@ struct page_cgroup {
 	struct list_head lru;		/* per cgroup LRU list */
 	struct page *page;
 	struct mem_cgroup *mem_cgroup;
-	int flags;
+	unsigned long flags;
+};
+
+enum {
+	/* flags for mem_cgroup */
+	PCG_CACHE, /* charged as cache */
+	/* flags for LRU placement */
+	PCG_ACTIVE, /* page is active in this cgroup */
+	PCG_FILE, /* page is file system backed */
+	PCG_UNEVICTABLE, /* page is unevictableable */
 };
-#define PAGE_CGROUP_FLAG_CACHE	   (0x1)	/* charged as cache */
-#define PAGE_CGROUP_FLAG_ACTIVE    (0x2)	/* page is active in this cgroup */
-#define PAGE_CGROUP_FLAG_FILE	   (0x4)	/* page is file system backed */
-#define PAGE_CGROUP_FLAG_UNEVICTABLE (0x8)	/* page is unevictableable */
+
+#define TESTPCGFLAG(uname, lname)			\
+static inline int PageCgroup##uname(struct page_cgroup *pc)	\
+	{ return test_bit(PCG_##lname, &pc->flags); }
+
+#define SETPCGFLAG(uname, lname)			\
+static inline void SetPageCgroup##uname(struct page_cgroup *pc)\
+	{ set_bit(PCG_##lname, &pc->flags);  }
+
+#define CLEARPCGFLAG(uname, lname)			\
+static inline void ClearPageCgroup##uname(struct page_cgroup *pc)	\
+	{ clear_bit(PCG_##lname, &pc->flags);  }
+
+
+/* Cache flag is set only once (at allocation) */
+TESTPCGFLAG(Cache, CACHE)
+
+/* LRU management flags (from global-lru definition) */
+TESTPCGFLAG(File, FILE)
+SETPCGFLAG(File, FILE)
+CLEARPCGFLAG(File, FILE)
+
+TESTPCGFLAG(Active, ACTIVE)
+SETPCGFLAG(Active, ACTIVE)
+CLEARPCGFLAG(Active, ACTIVE)
+
+TESTPCGFLAG(Unevictable, UNEVICTABLE)
+SETPCGFLAG(Unevictable, UNEVICTABLE)
+CLEARPCGFLAG(Unevictable, UNEVICTABLE)
 
 static int page_cgroup_nid(struct page_cgroup *pc)
 {
@@ -177,15 +211,25 @@ static enum zone_type page_cgroup_zid(struct page_cgroup *pc)
 enum charge_type {
 	MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
 	MEM_CGROUP_CHARGE_TYPE_MAPPED,
-	MEM_CGROUP_CHARGE_TYPE_FORCE,	/* used by force_empty */
 	MEM_CGROUP_CHARGE_TYPE_SHMEM,	/* used by page migration of shmem */
+	MEM_CGROUP_CHARGE_TYPE_FORCE,	/* used by force_empty */
+	NR_CHARGE_TYPE,
+};
+
+static const unsigned long
+pcg_default_flags[NR_CHARGE_TYPE] = {
+	((1 << PCG_CACHE) | (1 << PCG_FILE)),
+	((1 << PCG_ACTIVE)),
+	((1 << PCG_ACTIVE) | (1 << PCG_CACHE)),
+	0,
 };
 
 /*
  * Always modified under lru lock. Then, not necessary to preempt_disable()
  */
-static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, int flags,
-					bool charge)
+static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
+					 struct page_cgroup *pc,
+					 bool charge)
 {
 	int val = (charge)? 1 : -1;
 	struct mem_cgroup_stat *stat = &mem->stat;
@@ -194,7 +238,7 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, int flags,
 	VM_BUG_ON(!irqs_disabled());
 
 	cpustat = &stat->cpustat[smp_processor_id()];
-	if (flags & PAGE_CGROUP_FLAG_CACHE)
+	if (PageCgroupCache(pc))
 		__mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_CACHE, val);
 	else
 		__mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_RSS, val);
@@ -295,18 +339,18 @@ static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz,
 {
 	int lru = LRU_BASE;
 
-	if (pc->flags & PAGE_CGROUP_FLAG_UNEVICTABLE)
+	if (PageCgroupUnevictable(pc))
 		lru = LRU_UNEVICTABLE;
 	else {
-		if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE)
+		if (PageCgroupActive(pc))
 			lru += LRU_ACTIVE;
-		if (pc->flags & PAGE_CGROUP_FLAG_FILE)
+		if (PageCgroupFile(pc))
 			lru += LRU_FILE;
 	}
 
 	MEM_CGROUP_ZSTAT(mz, lru) -= 1;
 
-	mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, false);
+	mem_cgroup_charge_statistics(pc->mem_cgroup, pc, false);
 	list_del(&pc->lru);
 }
 
@@ -315,27 +359,27 @@ static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz,
 {
 	int lru = LRU_BASE;
 
-	if (pc->flags & PAGE_CGROUP_FLAG_UNEVICTABLE)
+	if (PageCgroupUnevictable(pc))
 		lru = LRU_UNEVICTABLE;
 	else {
-		if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE)
+		if (PageCgroupActive(pc))
 			lru += LRU_ACTIVE;
-		if (pc->flags & PAGE_CGROUP_FLAG_FILE)
+		if (PageCgroupFile(pc))
 			lru += LRU_FILE;
 	}
 
 	MEM_CGROUP_ZSTAT(mz, lru) += 1;
 	list_add(&pc->lru, &mz->lists[lru]);
 
-	mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, true);
+	mem_cgroup_charge_statistics(pc->mem_cgroup, pc, true);
 }
 
 static void __mem_cgroup_move_lists(struct page_cgroup *pc, enum lru_list lru)
 {
 	struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc);
-	int active    = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
-	int file      = pc->flags & PAGE_CGROUP_FLAG_FILE;
-	int unevictable = pc->flags & PAGE_CGROUP_FLAG_UNEVICTABLE;
+	int active    = PageCgroupActive(pc);
+	int file      = PageCgroupFile(pc);
+	int unevictable = PageCgroupUnevictable(pc);
 	enum lru_list from = unevictable ? LRU_UNEVICTABLE :
 				(LRU_FILE * !!file + !!active);
 
@@ -343,16 +387,20 @@ static void __mem_cgroup_move_lists(struct page_cgroup *pc, enum lru_list lru)
 		return;
 
 	MEM_CGROUP_ZSTAT(mz, from) -= 1;
-
+	/*
+	 * However this is done under mz->lru_lock, another flags, which
+	 * are not related to LRU, will be modified from out-of-lock.
+	 * We have to use atomic set/clear flags.
+	 */
 	if (is_unevictable_lru(lru)) {
-		pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE;
-		pc->flags |= PAGE_CGROUP_FLAG_UNEVICTABLE;
+		ClearPageCgroupActive(pc);
+		SetPageCgroupUnevictable(pc);
 	} else {
 		if (is_active_lru(lru))
-			pc->flags |= PAGE_CGROUP_FLAG_ACTIVE;
+			SetPageCgroupActive(pc);
 		else
-			pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE;
-		pc->flags &= ~PAGE_CGROUP_FLAG_UNEVICTABLE;
+			ClearPageCgroupActive(pc);
+		ClearPageCgroupUnevictable(pc);
 	}
 
 	MEM_CGROUP_ZSTAT(mz, lru) += 1;
@@ -589,16 +637,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
 	 * If a page is accounted as a page cache, insert to inactive list.
 	 * If anon, insert to active list.
 	 */
-	if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) {
-		pc->flags = PAGE_CGROUP_FLAG_CACHE;
-		if (page_is_file_cache(page))
-			pc->flags |= PAGE_CGROUP_FLAG_FILE;
-		else
-			pc->flags |= PAGE_CGROUP_FLAG_ACTIVE;
-	} else if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
-		pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
-	else /* MEM_CGROUP_CHARGE_TYPE_SHMEM */
-		pc->flags = PAGE_CGROUP_FLAG_CACHE | PAGE_CGROUP_FLAG_ACTIVE;
+	pc->flags = pcg_default_flags[ctype];
 
 	lock_page_cgroup(page);
 	if (unlikely(page_get_page_cgroup(page))) {
@@ -677,8 +716,12 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 	if (unlikely(!mm))
 		mm = &init_mm;
 
-	return mem_cgroup_charge_common(page, mm, gfp_mask,
+	if (page_is_file_cache(page))
+		return mem_cgroup_charge_common(page, mm, gfp_mask,
 				MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
+	else
+		return mem_cgroup_charge_common(page, mm, gfp_mask,
+				MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL);
 }
 
 /*
@@ -706,8 +749,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
 	VM_BUG_ON(pc->page != page);
 
 	if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
-	    && ((pc->flags & PAGE_CGROUP_FLAG_CACHE)
-		|| page_mapped(page)))
+	    && ((PageCgroupCache(pc) || page_mapped(page))))
 		goto unlock;
 
 	mz = page_cgroup_zoneinfo(pc);
@@ -758,7 +800,7 @@ int mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
 	if (pc) {
 		mem = pc->mem_cgroup;
 		css_get(&mem->css);
-		if (pc->flags & PAGE_CGROUP_FLAG_CACHE) {
+		if (PageCgroupCache(pc)) {
 			if (page_is_file_cache(page))
 				ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
 			else
-- 
GitLab


From 52d4b9ac0b985168009c2a57098324e67bae171f Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Sat, 18 Oct 2008 20:28:16 -0700
Subject: [PATCH 794/895] memcg: allocate all page_cgroup at boot

Allocate all page_cgroup at boot and remove page_cgroup poitner from
struct page.  This patch adds an interface as

 struct page_cgroup *lookup_page_cgroup(struct page*)

All FLATMEM/DISCONTIGMEM/SPARSEMEM  and MEMORY_HOTPLUG is supported.

Remove page_cgroup pointer reduces the amount of memory by
 - 4 bytes per PAGE_SIZE.
 - 8 bytes per PAGE_SIZE
if memory controller is disabled. (even if configured.)

On usual 8GB x86-32 server, this saves 8MB of NORMAL_ZONE memory.
On my x86-64 server with 48GB of memory, this saves 96MB of memory.
I think this reduction makes sense.

By pre-allocation, kmalloc/kfree in charge/uncharge are removed.
This means
  - we're not necessary to be afraid of kmalloc faiulre.
    (this can happen because of gfp_mask type.)
  - we can avoid calling kmalloc/kfree.
  - we can avoid allocating tons of small objects which can be fragmented.
  - we can know what amount of memory will be used for this extra-lru handling.

I added printk message as

	"allocated %ld bytes of page_cgroup"
        "please try cgroup_disable=memory option if you don't want"

maybe enough informative for users.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h  |  13 +-
 include/linux/mm_types.h    |   3 -
 include/linux/mmzone.h      |  14 +-
 include/linux/page_cgroup.h | 103 +++++++++++++++
 mm/Makefile                 |   3 +-
 mm/memcontrol.c             | 247 ++++++++++++------------------------
 mm/page_alloc.c             |  12 +-
 mm/page_cgroup.c            | 237 ++++++++++++++++++++++++++++++++++
 8 files changed, 438 insertions(+), 194 deletions(-)
 create mode 100644 include/linux/page_cgroup.h
 create mode 100644 mm/page_cgroup.c

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index ee1b2fcb4410..1fbe14d39521 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -27,9 +27,6 @@ struct mm_struct;
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
 
-#define page_reset_bad_cgroup(page)	((page)->page_cgroup = 0)
-
-extern struct page_cgroup *page_get_page_cgroup(struct page *page);
 extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
 				gfp_t gfp_mask);
 extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
@@ -72,16 +69,8 @@ extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
 extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone,
 					int priority, enum lru_list lru);
 
-#else /* CONFIG_CGROUP_MEM_RES_CTLR */
-static inline void page_reset_bad_cgroup(struct page *page)
-{
-}
-
-static inline struct page_cgroup *page_get_page_cgroup(struct page *page)
-{
-	return NULL;
-}
 
+#else /* CONFIG_CGROUP_MEM_RES_CTLR */
 static inline int mem_cgroup_charge(struct page *page,
 					struct mm_struct *mm, gfp_t gfp_mask)
 {
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 9d49fa36bbef..fe825471d5aa 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -94,9 +94,6 @@ struct page {
 	void *virtual;			/* Kernel virtual address (NULL if
 					   not kmapped, ie. highmem) */
 #endif /* WANT_PAGE_VIRTUAL */
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
-	unsigned long page_cgroup;
-#endif
 };
 
 /*
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index da2d053a95f1..35a7b5e19465 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -601,8 +601,11 @@ typedef struct pglist_data {
 	struct zone node_zones[MAX_NR_ZONES];
 	struct zonelist node_zonelists[MAX_ZONELISTS];
 	int nr_zones;
-#ifdef CONFIG_FLAT_NODE_MEM_MAP
+#ifdef CONFIG_FLAT_NODE_MEM_MAP	/* means !SPARSEMEM */
 	struct page *node_mem_map;
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+	struct page_cgroup *node_page_cgroup;
+#endif
 #endif
 	struct bootmem_data *bdata;
 #ifdef CONFIG_MEMORY_HOTPLUG
@@ -931,6 +934,7 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn)
 #endif
 
 struct page;
+struct page_cgroup;
 struct mem_section {
 	/*
 	 * This is, logically, a pointer to an array of struct
@@ -948,6 +952,14 @@ struct mem_section {
 
 	/* See declaration of similar field in struct zone */
 	unsigned long *pageblock_flags;
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+	/*
+	 * If !SPARSEMEM, pgdat doesn't have page_cgroup pointer. We use
+	 * section. (see memcontrol.h/page_cgroup.h about this.)
+	 */
+	struct page_cgroup *page_cgroup;
+	unsigned long pad;
+#endif
 };
 
 #ifdef CONFIG_SPARSEMEM_EXTREME
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
new file mode 100644
index 000000000000..0fd39f2231ec
--- /dev/null
+++ b/include/linux/page_cgroup.h
@@ -0,0 +1,103 @@
+#ifndef __LINUX_PAGE_CGROUP_H
+#define __LINUX_PAGE_CGROUP_H
+
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+#include <linux/bit_spinlock.h>
+/*
+ * Page Cgroup can be considered as an extended mem_map.
+ * A page_cgroup page is associated with every page descriptor. The
+ * page_cgroup helps us identify information about the cgroup
+ * All page cgroups are allocated at boot or memory hotplug event,
+ * then the page cgroup for pfn always exists.
+ */
+struct page_cgroup {
+	unsigned long flags;
+	struct mem_cgroup *mem_cgroup;
+	struct page *page;
+	struct list_head lru;		/* per cgroup LRU list */
+};
+
+void __init pgdat_page_cgroup_init(struct pglist_data *pgdat);
+void __init page_cgroup_init(void);
+struct page_cgroup *lookup_page_cgroup(struct page *page);
+
+enum {
+	/* flags for mem_cgroup */
+	PCG_LOCK,  /* page cgroup is locked */
+	PCG_CACHE, /* charged as cache */
+	PCG_USED, /* this object is in use. */
+	/* flags for LRU placement */
+	PCG_ACTIVE, /* page is active in this cgroup */
+	PCG_FILE, /* page is file system backed */
+	PCG_UNEVICTABLE, /* page is unevictableable */
+};
+
+#define TESTPCGFLAG(uname, lname)			\
+static inline int PageCgroup##uname(struct page_cgroup *pc)	\
+	{ return test_bit(PCG_##lname, &pc->flags); }
+
+#define SETPCGFLAG(uname, lname)			\
+static inline void SetPageCgroup##uname(struct page_cgroup *pc)\
+	{ set_bit(PCG_##lname, &pc->flags);  }
+
+#define CLEARPCGFLAG(uname, lname)			\
+static inline void ClearPageCgroup##uname(struct page_cgroup *pc)	\
+	{ clear_bit(PCG_##lname, &pc->flags);  }
+
+/* Cache flag is set only once (at allocation) */
+TESTPCGFLAG(Cache, CACHE)
+
+TESTPCGFLAG(Used, USED)
+CLEARPCGFLAG(Used, USED)
+
+/* LRU management flags (from global-lru definition) */
+TESTPCGFLAG(File, FILE)
+SETPCGFLAG(File, FILE)
+CLEARPCGFLAG(File, FILE)
+
+TESTPCGFLAG(Active, ACTIVE)
+SETPCGFLAG(Active, ACTIVE)
+CLEARPCGFLAG(Active, ACTIVE)
+
+TESTPCGFLAG(Unevictable, UNEVICTABLE)
+SETPCGFLAG(Unevictable, UNEVICTABLE)
+CLEARPCGFLAG(Unevictable, UNEVICTABLE)
+
+static inline int page_cgroup_nid(struct page_cgroup *pc)
+{
+	return page_to_nid(pc->page);
+}
+
+static inline enum zone_type page_cgroup_zid(struct page_cgroup *pc)
+{
+	return page_zonenum(pc->page);
+}
+
+static inline void lock_page_cgroup(struct page_cgroup *pc)
+{
+	bit_spin_lock(PCG_LOCK, &pc->flags);
+}
+
+static inline int trylock_page_cgroup(struct page_cgroup *pc)
+{
+	return bit_spin_trylock(PCG_LOCK, &pc->flags);
+}
+
+static inline void unlock_page_cgroup(struct page_cgroup *pc)
+{
+	bit_spin_unlock(PCG_LOCK, &pc->flags);
+}
+
+#else /* CONFIG_CGROUP_MEM_RES_CTLR */
+struct page_cgroup;
+
+static inline void pgdat_page_cgroup_init(struct pglist_data *pgdat)
+{
+}
+
+static inline struct page_cgroup *lookup_page_cgroup(struct page *page)
+{
+	return NULL;
+}
+#endif
+#endif
diff --git a/mm/Makefile b/mm/Makefile
index da4ccf015aea..c06b45a1ff5f 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -33,5 +33,4 @@ obj-$(CONFIG_FS_XIP) += filemap_xip.o
 obj-$(CONFIG_MIGRATION) += migrate.o
 obj-$(CONFIG_SMP) += allocpercpu.o
 obj-$(CONFIG_QUICKLIST) += quicklist.o
-obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o
-
+obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 031682e7ef0c..d4a92b63e98e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -33,11 +33,11 @@
 #include <linux/seq_file.h>
 #include <linux/vmalloc.h>
 #include <linux/mm_inline.h>
+#include <linux/page_cgroup.h>
 
 #include <asm/uaccess.h>
 
 struct cgroup_subsys mem_cgroup_subsys __read_mostly;
-static struct kmem_cache *page_cgroup_cache __read_mostly;
 #define MEM_CGROUP_RECLAIM_RETRIES	5
 
 /*
@@ -135,79 +135,6 @@ struct mem_cgroup {
 };
 static struct mem_cgroup init_mem_cgroup;
 
-/*
- * We use the lower bit of the page->page_cgroup pointer as a bit spin
- * lock.  We need to ensure that page->page_cgroup is at least two
- * byte aligned (based on comments from Nick Piggin).  But since
- * bit_spin_lock doesn't actually set that lock bit in a non-debug
- * uniprocessor kernel, we should avoid setting it here too.
- */
-#define PAGE_CGROUP_LOCK_BIT 	0x0
-#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
-#define PAGE_CGROUP_LOCK 	(1 << PAGE_CGROUP_LOCK_BIT)
-#else
-#define PAGE_CGROUP_LOCK	0x0
-#endif
-
-/*
- * A page_cgroup page is associated with every page descriptor. The
- * page_cgroup helps us identify information about the cgroup
- */
-struct page_cgroup {
-	struct list_head lru;		/* per cgroup LRU list */
-	struct page *page;
-	struct mem_cgroup *mem_cgroup;
-	unsigned long flags;
-};
-
-enum {
-	/* flags for mem_cgroup */
-	PCG_CACHE, /* charged as cache */
-	/* flags for LRU placement */
-	PCG_ACTIVE, /* page is active in this cgroup */
-	PCG_FILE, /* page is file system backed */
-	PCG_UNEVICTABLE, /* page is unevictableable */
-};
-
-#define TESTPCGFLAG(uname, lname)			\
-static inline int PageCgroup##uname(struct page_cgroup *pc)	\
-	{ return test_bit(PCG_##lname, &pc->flags); }
-
-#define SETPCGFLAG(uname, lname)			\
-static inline void SetPageCgroup##uname(struct page_cgroup *pc)\
-	{ set_bit(PCG_##lname, &pc->flags);  }
-
-#define CLEARPCGFLAG(uname, lname)			\
-static inline void ClearPageCgroup##uname(struct page_cgroup *pc)	\
-	{ clear_bit(PCG_##lname, &pc->flags);  }
-
-
-/* Cache flag is set only once (at allocation) */
-TESTPCGFLAG(Cache, CACHE)
-
-/* LRU management flags (from global-lru definition) */
-TESTPCGFLAG(File, FILE)
-SETPCGFLAG(File, FILE)
-CLEARPCGFLAG(File, FILE)
-
-TESTPCGFLAG(Active, ACTIVE)
-SETPCGFLAG(Active, ACTIVE)
-CLEARPCGFLAG(Active, ACTIVE)
-
-TESTPCGFLAG(Unevictable, UNEVICTABLE)
-SETPCGFLAG(Unevictable, UNEVICTABLE)
-CLEARPCGFLAG(Unevictable, UNEVICTABLE)
-
-static int page_cgroup_nid(struct page_cgroup *pc)
-{
-	return page_to_nid(pc->page);
-}
-
-static enum zone_type page_cgroup_zid(struct page_cgroup *pc)
-{
-	return page_zonenum(pc->page);
-}
-
 enum charge_type {
 	MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
 	MEM_CGROUP_CHARGE_TYPE_MAPPED,
@@ -216,12 +143,18 @@ enum charge_type {
 	NR_CHARGE_TYPE,
 };
 
+/* only for here (for easy reading.) */
+#define PCGF_CACHE	(1UL << PCG_CACHE)
+#define PCGF_USED	(1UL << PCG_USED)
+#define PCGF_ACTIVE	(1UL << PCG_ACTIVE)
+#define PCGF_LOCK	(1UL << PCG_LOCK)
+#define PCGF_FILE	(1UL << PCG_FILE)
 static const unsigned long
 pcg_default_flags[NR_CHARGE_TYPE] = {
-	((1 << PCG_CACHE) | (1 << PCG_FILE)),
-	((1 << PCG_ACTIVE)),
-	((1 << PCG_ACTIVE) | (1 << PCG_CACHE)),
-	0,
+	PCGF_CACHE | PCGF_FILE | PCGF_USED | PCGF_LOCK, /* File Cache */
+	PCGF_ACTIVE | PCGF_USED | PCGF_LOCK, /* Anon */
+	PCGF_ACTIVE | PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* Shmem */
+	0, /* FORCE */
 };
 
 /*
@@ -303,37 +236,6 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
 				struct mem_cgroup, css);
 }
 
-static inline int page_cgroup_locked(struct page *page)
-{
-	return bit_spin_is_locked(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
-}
-
-static void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc)
-{
-	VM_BUG_ON(!page_cgroup_locked(page));
-	page->page_cgroup = ((unsigned long)pc | PAGE_CGROUP_LOCK);
-}
-
-struct page_cgroup *page_get_page_cgroup(struct page *page)
-{
-	return (struct page_cgroup *) (page->page_cgroup & ~PAGE_CGROUP_LOCK);
-}
-
-static void lock_page_cgroup(struct page *page)
-{
-	bit_spin_lock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
-}
-
-static int try_lock_page_cgroup(struct page *page)
-{
-	return bit_spin_trylock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
-}
-
-static void unlock_page_cgroup(struct page *page)
-{
-	bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
-}
-
 static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz,
 			struct page_cgroup *pc)
 {
@@ -436,17 +338,16 @@ void mem_cgroup_move_lists(struct page *page, enum lru_list lru)
 	 * safely get to page_cgroup without it, so just try_lock it:
 	 * mem_cgroup_isolate_pages allows for page left on wrong list.
 	 */
-	if (!try_lock_page_cgroup(page))
+	pc = lookup_page_cgroup(page);
+	if (!trylock_page_cgroup(pc))
 		return;
-
-	pc = page_get_page_cgroup(page);
-	if (pc) {
+	if (pc && PageCgroupUsed(pc)) {
 		mz = page_cgroup_zoneinfo(pc);
 		spin_lock_irqsave(&mz->lru_lock, flags);
 		__mem_cgroup_move_lists(pc, lru);
 		spin_unlock_irqrestore(&mz->lru_lock, flags);
 	}
-	unlock_page_cgroup(page);
+	unlock_page_cgroup(pc);
 }
 
 /*
@@ -533,6 +434,8 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 	list_for_each_entry_safe_reverse(pc, tmp, src, lru) {
 		if (scan >= nr_to_scan)
 			break;
+		if (unlikely(!PageCgroupUsed(pc)))
+			continue;
 		page = pc->page;
 
 		if (unlikely(!PageLRU(page)))
@@ -576,26 +479,27 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
 {
 	struct mem_cgroup *mem;
 	struct page_cgroup *pc;
-	unsigned long flags;
 	unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
 	struct mem_cgroup_per_zone *mz;
+	unsigned long flags;
 
-	pc = kmem_cache_alloc(page_cgroup_cache, gfp_mask);
-	if (unlikely(pc == NULL))
-		goto err;
-
+	pc = lookup_page_cgroup(page);
+	/* can happen at boot */
+	if (unlikely(!pc))
+		return 0;
+	prefetchw(pc);
 	/*
 	 * We always charge the cgroup the mm_struct belongs to.
 	 * The mm_struct's mem_cgroup changes on task migration if the
 	 * thread group leader migrates. It's possible that mm is not
 	 * set, if so charge the init_mm (happens for pagecache usage).
 	 */
+
 	if (likely(!memcg)) {
 		rcu_read_lock();
 		mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
 		if (unlikely(!mem)) {
 			rcu_read_unlock();
-			kmem_cache_free(page_cgroup_cache, pc);
 			return 0;
 		}
 		/*
@@ -631,36 +535,33 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
 		}
 	}
 
+
+	lock_page_cgroup(pc);
+	if (unlikely(PageCgroupUsed(pc))) {
+		unlock_page_cgroup(pc);
+		res_counter_uncharge(&mem->res, PAGE_SIZE);
+		css_put(&mem->css);
+
+		goto done;
+	}
 	pc->mem_cgroup = mem;
-	pc->page = page;
 	/*
 	 * If a page is accounted as a page cache, insert to inactive list.
 	 * If anon, insert to active list.
 	 */
 	pc->flags = pcg_default_flags[ctype];
 
-	lock_page_cgroup(page);
-	if (unlikely(page_get_page_cgroup(page))) {
-		unlock_page_cgroup(page);
-		res_counter_uncharge(&mem->res, PAGE_SIZE);
-		css_put(&mem->css);
-		kmem_cache_free(page_cgroup_cache, pc);
-		goto done;
-	}
-	page_assign_page_cgroup(page, pc);
-
 	mz = page_cgroup_zoneinfo(pc);
+
 	spin_lock_irqsave(&mz->lru_lock, flags);
 	__mem_cgroup_add_list(mz, pc);
 	spin_unlock_irqrestore(&mz->lru_lock, flags);
+	unlock_page_cgroup(pc);
 
-	unlock_page_cgroup(page);
 done:
 	return 0;
 out:
 	css_put(&mem->css);
-	kmem_cache_free(page_cgroup_cache, pc);
-err:
 	return -ENOMEM;
 }
 
@@ -668,7 +569,8 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
 {
 	if (mem_cgroup_subsys.disabled)
 		return 0;
-
+	if (PageCompound(page))
+		return 0;
 	/*
 	 * If already mapped, we don't have to account.
 	 * If page cache, page->mapping has address_space.
@@ -689,7 +591,8 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 {
 	if (mem_cgroup_subsys.disabled)
 		return 0;
-
+	if (PageCompound(page))
+		return 0;
 	/*
 	 * Corner case handling. This is called from add_to_page_cache()
 	 * in usual. But some FS (shmem) precharges this page before calling it
@@ -702,15 +605,16 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 	if (!(gfp_mask & __GFP_WAIT)) {
 		struct page_cgroup *pc;
 
-		lock_page_cgroup(page);
-		pc = page_get_page_cgroup(page);
-		if (pc) {
-			VM_BUG_ON(pc->page != page);
-			VM_BUG_ON(!pc->mem_cgroup);
-			unlock_page_cgroup(page);
+
+		pc = lookup_page_cgroup(page);
+		if (!pc)
+			return 0;
+		lock_page_cgroup(pc);
+		if (PageCgroupUsed(pc)) {
+			unlock_page_cgroup(pc);
 			return 0;
 		}
-		unlock_page_cgroup(page);
+		unlock_page_cgroup(pc);
 	}
 
 	if (unlikely(!mm))
@@ -741,37 +645,39 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
 	/*
 	 * Check if our page_cgroup is valid
 	 */
-	lock_page_cgroup(page);
-	pc = page_get_page_cgroup(page);
-	if (unlikely(!pc))
-		goto unlock;
-
-	VM_BUG_ON(pc->page != page);
+	pc = lookup_page_cgroup(page);
+	if (unlikely(!pc || !PageCgroupUsed(pc)))
+		return;
 
-	if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
-	    && ((PageCgroupCache(pc) || page_mapped(page))))
-		goto unlock;
+	lock_page_cgroup(pc);
+	if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED && page_mapped(page))
+	     || !PageCgroupUsed(pc)) {
+		/* This happens at race in zap_pte_range() and do_swap_page()*/
+		unlock_page_cgroup(pc);
+		return;
+	}
+	ClearPageCgroupUsed(pc);
+	mem = pc->mem_cgroup;
 
 	mz = page_cgroup_zoneinfo(pc);
 	spin_lock_irqsave(&mz->lru_lock, flags);
 	__mem_cgroup_remove_list(mz, pc);
 	spin_unlock_irqrestore(&mz->lru_lock, flags);
+	unlock_page_cgroup(pc);
 
-	page_assign_page_cgroup(page, NULL);
-	unlock_page_cgroup(page);
-
-	mem = pc->mem_cgroup;
 	res_counter_uncharge(&mem->res, PAGE_SIZE);
 	css_put(&mem->css);
 
-	kmem_cache_free(page_cgroup_cache, pc);
 	return;
-unlock:
-	unlock_page_cgroup(page);
 }
 
 void mem_cgroup_uncharge_page(struct page *page)
 {
+	/* early check. */
+	if (page_mapped(page))
+		return;
+	if (page->mapping && !PageAnon(page))
+		return;
 	__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED);
 }
 
@@ -795,9 +701,9 @@ int mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
 	if (mem_cgroup_subsys.disabled)
 		return 0;
 
-	lock_page_cgroup(page);
-	pc = page_get_page_cgroup(page);
-	if (pc) {
+	pc = lookup_page_cgroup(page);
+	lock_page_cgroup(pc);
+	if (PageCgroupUsed(pc)) {
 		mem = pc->mem_cgroup;
 		css_get(&mem->css);
 		if (PageCgroupCache(pc)) {
@@ -807,7 +713,7 @@ int mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
 				ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
 		}
 	}
-	unlock_page_cgroup(page);
+	unlock_page_cgroup(pc);
 	if (mem) {
 		ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL,
 			ctype, mem);
@@ -832,7 +738,7 @@ void mem_cgroup_end_migration(struct page *newpage)
 	 */
 	if (!newpage->mapping)
 		__mem_cgroup_uncharge_common(newpage,
-					 MEM_CGROUP_CHARGE_TYPE_FORCE);
+				MEM_CGROUP_CHARGE_TYPE_FORCE);
 	else if (PageAnon(newpage))
 		mem_cgroup_uncharge_page(newpage);
 }
@@ -918,6 +824,8 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
 	while (!list_empty(list)) {
 		pc = list_entry(list->prev, struct page_cgroup, lru);
 		page = pc->page;
+		if (!PageCgroupUsed(pc))
+			break;
 		get_page(page);
 		spin_unlock_irqrestore(&mz->lru_lock, flags);
 		/*
@@ -932,8 +840,10 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
 				count = FORCE_UNCHARGE_BATCH;
 				cond_resched();
 			}
-		} else
-			cond_resched();
+		} else {
+			spin_lock_irqsave(&mz->lru_lock, flags);
+			break;
+		}
 		spin_lock_irqsave(&mz->lru_lock, flags);
 	}
 	spin_unlock_irqrestore(&mz->lru_lock, flags);
@@ -957,6 +867,8 @@ static int mem_cgroup_force_empty(struct mem_cgroup *mem)
 	while (mem->res.usage > 0) {
 		if (atomic_read(&mem->css.cgroup->count) > 0)
 			goto out;
+		/* This is for making all *used* pages to be on LRU. */
+		lru_add_drain_all();
 		for_each_node_state(node, N_POSSIBLE)
 			for (zid = 0; zid < MAX_NR_ZONES; zid++) {
 				struct mem_cgroup_per_zone *mz;
@@ -965,6 +877,7 @@ static int mem_cgroup_force_empty(struct mem_cgroup *mem)
 				for_each_lru(l)
 					mem_cgroup_force_empty_list(mem, mz, l);
 			}
+		cond_resched();
 	}
 	ret = 0;
 out:
@@ -1175,8 +1088,8 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
 	int node;
 
 	if (unlikely((cont->parent) == NULL)) {
+		page_cgroup_init();
 		mem = &init_mem_cgroup;
-		page_cgroup_cache = KMEM_CACHE(page_cgroup, SLAB_PANIC);
 	} else {
 		mem = mem_cgroup_alloc();
 		if (!mem)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f2fc44ec1d44..d0a240fbb8bf 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -44,7 +44,7 @@
 #include <linux/backing-dev.h>
 #include <linux/fault-inject.h>
 #include <linux/page-isolation.h>
-#include <linux/memcontrol.h>
+#include <linux/page_cgroup.h>
 #include <linux/debugobjects.h>
 
 #include <asm/tlbflush.h>
@@ -223,17 +223,12 @@ static inline int bad_range(struct zone *zone, struct page *page)
 
 static void bad_page(struct page *page)
 {
-	void *pc = page_get_page_cgroup(page);
-
 	printk(KERN_EMERG "Bad page state in process '%s'\n" KERN_EMERG
 		"page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n",
 		current->comm, page, (int)(2*sizeof(unsigned long)),
 		(unsigned long)page->flags, page->mapping,
 		page_mapcount(page), page_count(page));
-	if (pc) {
-		printk(KERN_EMERG "cgroup:%p\n", pc);
-		page_reset_bad_cgroup(page);
-	}
+
 	printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n"
 		KERN_EMERG "Backtrace:\n");
 	dump_stack();
@@ -457,7 +452,6 @@ static inline int free_pages_check(struct page *page)
 	free_page_mlock(page);
 	if (unlikely(page_mapcount(page) |
 		(page->mapping != NULL)  |
-		(page_get_page_cgroup(page) != NULL) |
 		(page_count(page) != 0)  |
 		(page->flags & PAGE_FLAGS_CHECK_AT_FREE)))
 		bad_page(page);
@@ -603,7 +597,6 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
 {
 	if (unlikely(page_mapcount(page) |
 		(page->mapping != NULL)  |
-		(page_get_page_cgroup(page) != NULL) |
 		(page_count(page) != 0)  |
 		(page->flags & PAGE_FLAGS_CHECK_AT_PREP)))
 		bad_page(page);
@@ -3438,6 +3431,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
 	pgdat->nr_zones = 0;
 	init_waitqueue_head(&pgdat->kswapd_wait);
 	pgdat->kswapd_max_order = 0;
+	pgdat_page_cgroup_init(pgdat);
 	
 	for (j = 0; j < MAX_NR_ZONES; j++) {
 		struct zone *zone = pgdat->node_zones + j;
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
new file mode 100644
index 000000000000..5d86550701f2
--- /dev/null
+++ b/mm/page_cgroup.c
@@ -0,0 +1,237 @@
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/bootmem.h>
+#include <linux/bit_spinlock.h>
+#include <linux/page_cgroup.h>
+#include <linux/hash.h>
+#include <linux/memory.h>
+
+static void __meminit
+__init_page_cgroup(struct page_cgroup *pc, unsigned long pfn)
+{
+	pc->flags = 0;
+	pc->mem_cgroup = NULL;
+	pc->page = pfn_to_page(pfn);
+}
+static unsigned long total_usage;
+
+#if !defined(CONFIG_SPARSEMEM)
+
+
+void __init pgdat_page_cgroup_init(struct pglist_data *pgdat)
+{
+	pgdat->node_page_cgroup = NULL;
+}
+
+struct page_cgroup *lookup_page_cgroup(struct page *page)
+{
+	unsigned long pfn = page_to_pfn(page);
+	unsigned long offset;
+	struct page_cgroup *base;
+
+	base = NODE_DATA(page_to_nid(page))->node_page_cgroup;
+	if (unlikely(!base))
+		return NULL;
+
+	offset = pfn - NODE_DATA(page_to_nid(page))->node_start_pfn;
+	return base + offset;
+}
+
+static int __init alloc_node_page_cgroup(int nid)
+{
+	struct page_cgroup *base, *pc;
+	unsigned long table_size;
+	unsigned long start_pfn, nr_pages, index;
+
+	start_pfn = NODE_DATA(nid)->node_start_pfn;
+	nr_pages = NODE_DATA(nid)->node_spanned_pages;
+
+	table_size = sizeof(struct page_cgroup) * nr_pages;
+
+	base = __alloc_bootmem_node_nopanic(NODE_DATA(nid),
+			table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
+	if (!base)
+		return -ENOMEM;
+	for (index = 0; index < nr_pages; index++) {
+		pc = base + index;
+		__init_page_cgroup(pc, start_pfn + index);
+	}
+	NODE_DATA(nid)->node_page_cgroup = base;
+	total_usage += table_size;
+	return 0;
+}
+
+void __init page_cgroup_init(void)
+{
+
+	int nid, fail;
+
+	for_each_online_node(nid)  {
+		fail = alloc_node_page_cgroup(nid);
+		if (fail)
+			goto fail;
+	}
+	printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
+	printk(KERN_INFO "please try cgroup_disable=memory option if you"
+	" don't want\n");
+	return;
+fail:
+	printk(KERN_CRIT "allocation of page_cgroup was failed.\n");
+	printk(KERN_CRIT "please try cgroup_disable=memory boot option\n");
+	panic("Out of memory");
+}
+
+#else /* CONFIG_FLAT_NODE_MEM_MAP */
+
+struct page_cgroup *lookup_page_cgroup(struct page *page)
+{
+	unsigned long pfn = page_to_pfn(page);
+	struct mem_section *section = __pfn_to_section(pfn);
+
+	return section->page_cgroup + pfn;
+}
+
+int __meminit init_section_page_cgroup(unsigned long pfn)
+{
+	struct mem_section *section;
+	struct page_cgroup *base, *pc;
+	unsigned long table_size;
+	int nid, index;
+
+	section = __pfn_to_section(pfn);
+
+	if (section->page_cgroup)
+		return 0;
+
+	nid = page_to_nid(pfn_to_page(pfn));
+
+	table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
+	base = kmalloc_node(table_size, GFP_KERNEL, nid);
+	if (!base)
+		base = vmalloc_node(table_size, nid);
+
+	if (!base) {
+		printk(KERN_ERR "page cgroup allocation failure\n");
+		return -ENOMEM;
+	}
+
+	for (index = 0; index < PAGES_PER_SECTION; index++) {
+		pc = base + index;
+		__init_page_cgroup(pc, pfn + index);
+	}
+
+	section = __pfn_to_section(pfn);
+	section->page_cgroup = base - pfn;
+	total_usage += table_size;
+	return 0;
+}
+#ifdef CONFIG_MEMORY_HOTPLUG
+void __free_page_cgroup(unsigned long pfn)
+{
+	struct mem_section *ms;
+	struct page_cgroup *base;
+
+	ms = __pfn_to_section(pfn);
+	if (!ms || !ms->page_cgroup)
+		return;
+	base = ms->page_cgroup + pfn;
+	ms->page_cgroup = NULL;
+	if (is_vmalloc_addr(base))
+		vfree(base);
+	else
+		kfree(base);
+}
+
+int online_page_cgroup(unsigned long start_pfn,
+			unsigned long nr_pages,
+			int nid)
+{
+	unsigned long start, end, pfn;
+	int fail = 0;
+
+	start = start_pfn & (PAGES_PER_SECTION - 1);
+	end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION);
+
+	for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) {
+		if (!pfn_present(pfn))
+			continue;
+		fail = init_section_page_cgroup(pfn);
+	}
+	if (!fail)
+		return 0;
+
+	/* rollback */
+	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
+		__free_page_cgroup(pfn);
+
+	return -ENOMEM;
+}
+
+int offline_page_cgroup(unsigned long start_pfn,
+		unsigned long nr_pages, int nid)
+{
+	unsigned long start, end, pfn;
+
+	start = start_pfn & (PAGES_PER_SECTION - 1);
+	end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION);
+
+	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
+		__free_page_cgroup(pfn);
+	return 0;
+
+}
+
+static int page_cgroup_callback(struct notifier_block *self,
+			       unsigned long action, void *arg)
+{
+	struct memory_notify *mn = arg;
+	int ret = 0;
+	switch (action) {
+	case MEM_GOING_ONLINE:
+		ret = online_page_cgroup(mn->start_pfn,
+				   mn->nr_pages, mn->status_change_nid);
+		break;
+	case MEM_CANCEL_ONLINE:
+	case MEM_OFFLINE:
+		offline_page_cgroup(mn->start_pfn,
+				mn->nr_pages, mn->status_change_nid);
+		break;
+	case MEM_GOING_OFFLINE:
+		break;
+	case MEM_ONLINE:
+	case MEM_CANCEL_OFFLINE:
+		break;
+	}
+	ret = notifier_from_errno(ret);
+	return ret;
+}
+
+#endif
+
+void __init page_cgroup_init(void)
+{
+	unsigned long pfn;
+	int fail = 0;
+
+	for (pfn = 0; !fail && pfn < max_pfn; pfn += PAGES_PER_SECTION) {
+		if (!pfn_present(pfn))
+			continue;
+		fail = init_section_page_cgroup(pfn);
+	}
+	if (fail) {
+		printk(KERN_CRIT "try cgroup_disable=memory boot option\n");
+		panic("Out of memory");
+	} else {
+		hotplug_memory_notifier(page_cgroup_callback, 0);
+	}
+	printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
+	printk(KERN_INFO "please try cgroup_disable=memory option if you don't"
+	" want\n");
+}
+
+void __init pgdat_page_cgroup_init(struct pglist_data *pgdat)
+{
+	return;
+}
+
+#endif
-- 
GitLab


From 40b6a76237563c70466ec7315f644ba87d57dbe5 Mon Sep 17 00:00:00 2001
From: Rakib Mullick <rakib.mullick@gmail.com>
Date: Sat, 18 Oct 2008 20:28:18 -0700
Subject: [PATCH 795/895] cpuset.c: remove extra variable

Remove the use of int cpus_nonempty variable from 'update_flag' function.

Signed-off-by: Md.Rakib H. Mullick <rakib.mullick@gmail.com>
Acked-by: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/cpuset.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index eab7bd6628e0..81d4d2426fd4 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1172,7 +1172,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
 {
 	struct cpuset trialcs;
 	int err;
-	int cpus_nonempty, balance_flag_changed;
+	int balance_flag_changed;
 
 	trialcs = *cs;
 	if (turning_on)
@@ -1184,7 +1184,6 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
 	if (err < 0)
 		return err;
 
-	cpus_nonempty = !cpus_empty(trialcs.cpus_allowed);
 	balance_flag_changed = (is_sched_load_balance(cs) !=
 		 			is_sched_load_balance(&trialcs));
 
@@ -1192,7 +1191,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
 	cs->flags = trialcs.flags;
 	mutex_unlock(&callback_mutex);
 
-	if (cpus_nonempty && balance_flag_changed)
+	if (!cpus_empty(trialcs.cpus_allowed) && balance_flag_changed)
 		async_rebuild_sched_domains();
 
 	return 0;
-- 
GitLab


From 85dd030edb174376ef43bc95e5fae4755af1ec98 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Sat, 18 Oct 2008 20:28:18 -0700
Subject: [PATCH 796/895] seq_file: don't call bitmap_scnprintf_len()

"m->count + len < m->size" is true commonly, so bitmap_scnprintf()
is commonly called. this fix saves a call to bitmap_scnprintf_len().

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Paul Menage <menage@google.com>
Cc: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/seq_file.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/fs/seq_file.c b/fs/seq_file.c
index bd20f7f5a933..11c85fec6b4f 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -452,17 +452,18 @@ int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
 
 int seq_bitmap(struct seq_file *m, unsigned long *bits, unsigned int nr_bits)
 {
-	size_t len = bitmap_scnprintf_len(nr_bits);
-
-	if (m->count + len < m->size) {
-		bitmap_scnprintf(m->buf + m->count, m->size - m->count,
-				 bits, nr_bits);
-		m->count += len;
-		return 0;
+	if (m->count < m->size) {
+		int len = bitmap_scnprintf(m->buf + m->count,
+				m->size - m->count, bits, nr_bits);
+		if (m->count + len < m->size) {
+			m->count += len;
+			return 0;
+		}
 	}
 	m->count = m->size;
 	return -1;
 }
+EXPORT_SYMBOL(seq_bitmap);
 
 static void *single_start(struct seq_file *p, loff_t *pos)
 {
-- 
GitLab


From 3eda20118000941e7e8994fc5fac8706d8c10f00 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Sat, 18 Oct 2008 20:28:19 -0700
Subject: [PATCH 797/895] seq_file: add seq_cpumask_list(), seq_nodemask_list()

seq_cpumask_list(), seq_nodemask_list() are very like seq_cpumask(),
seq_nodemask(), but they print human readable string.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Paul Menage <menage@google.com>
Cc: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/seq_file.c            | 16 ++++++++++++++++
 include/linux/seq_file.h | 13 +++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/fs/seq_file.c b/fs/seq_file.c
index 11c85fec6b4f..eba2eabcd2b8 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -465,6 +465,22 @@ int seq_bitmap(struct seq_file *m, unsigned long *bits, unsigned int nr_bits)
 }
 EXPORT_SYMBOL(seq_bitmap);
 
+int seq_bitmap_list(struct seq_file *m, unsigned long *bits,
+		unsigned int nr_bits)
+{
+	if (m->count < m->size) {
+		int len = bitmap_scnlistprintf(m->buf + m->count,
+				m->size - m->count, bits, nr_bits);
+		if (m->count + len < m->size) {
+			m->count += len;
+			return 0;
+		}
+	}
+	m->count = m->size;
+	return -1;
+}
+EXPORT_SYMBOL(seq_bitmap_list);
+
 static void *single_start(struct seq_file *p, loff_t *pos)
 {
 	return NULL + (*pos == 0);
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index a1783b229ef4..dc50bcc282a8 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -60,6 +60,19 @@ static inline int seq_nodemask(struct seq_file *m, nodemask_t *mask)
 	return seq_bitmap(m, mask->bits, MAX_NUMNODES);
 }
 
+int seq_bitmap_list(struct seq_file *m, unsigned long *bits,
+		unsigned int nr_bits);
+
+static inline int seq_cpumask_list(struct seq_file *m, cpumask_t *mask)
+{
+	return seq_bitmap_list(m, mask->bits, NR_CPUS);
+}
+
+static inline int seq_nodemask_list(struct seq_file *m, nodemask_t *mask)
+{
+	return seq_bitmap_list(m, mask->bits, MAX_NUMNODES);
+}
+
 int single_open(struct file *, int (*)(struct seq_file *, void *), void *);
 int single_release(struct inode *, struct file *);
 void *__seq_open_private(struct file *, const struct seq_operations *, int);
-- 
GitLab


From 30e8e13603c247c301fdacabef2a765c84840994 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Sat, 18 Oct 2008 20:28:20 -0700
Subject: [PATCH 798/895] cpuset: use seq_*mask_* to print masks

1) seq_file excepts that m->count == m->size when it's buf is full,
   so current code will causes bugs when buf is overflow.

2) There is not too good that cpuset accesses struct seq_file's
   fields directly.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: Paul Menage <menage@google.com>
Cc: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/cpuset.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 81d4d2426fd4..3e00526f52ec 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2436,19 +2436,15 @@ const struct file_operations proc_cpuset_operations = {
 void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
 {
 	seq_printf(m, "Cpus_allowed:\t");
-	m->count += cpumask_scnprintf(m->buf + m->count, m->size - m->count,
-					task->cpus_allowed);
+	seq_cpumask(m, &task->cpus_allowed);
 	seq_printf(m, "\n");
 	seq_printf(m, "Cpus_allowed_list:\t");
-	m->count += cpulist_scnprintf(m->buf + m->count, m->size - m->count,
-					task->cpus_allowed);
+	seq_cpumask_list(m, &task->cpus_allowed);
 	seq_printf(m, "\n");
 	seq_printf(m, "Mems_allowed:\t");
-	m->count += nodemask_scnprintf(m->buf + m->count, m->size - m->count,
-					task->mems_allowed);
+	seq_nodemask(m, &task->mems_allowed);
 	seq_printf(m, "\n");
 	seq_printf(m, "Mems_allowed_list:\t");
-	m->count += nodelist_scnprintf(m->buf + m->count, m->size - m->count,
-					task->mems_allowed);
+	seq_nodemask_list(m, &task->mems_allowed);
 	seq_printf(m, "\n");
 }
-- 
GitLab


From c4596435404976b0ded9cdf18b456ca2e1408ddd Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Sat, 18 Oct 2008 20:28:21 -0700
Subject: [PATCH 799/895] bitmask: remove bitmap_scnprintf_len()

bitmap_scnprintf_len() is not used now, so we remove it.

Otherwise we have to maintain it and make its return
value always equal to bitmap_scnprintf()'s return value.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Paul Menage <menage@google.com>
Cc: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h |  1 -
 lib/bitmap.c           | 11 -----------
 2 files changed, 12 deletions(-)

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 89781fd48859..1abfe664c444 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -110,7 +110,6 @@ extern int __bitmap_weight(const unsigned long *bitmap, int bits);
 
 extern int bitmap_scnprintf(char *buf, unsigned int len,
 			const unsigned long *src, int nbits);
-extern int bitmap_scnprintf_len(unsigned int nr_bits);
 extern int __bitmap_parse(const char *buf, unsigned int buflen, int is_user,
 			unsigned long *dst, int nbits);
 extern int bitmap_parse_user(const char __user *ubuf, unsigned int ulen,
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 06fb57c86de0..482df94ea21e 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -315,17 +315,6 @@ int bitmap_scnprintf(char *buf, unsigned int buflen,
 }
 EXPORT_SYMBOL(bitmap_scnprintf);
 
-/**
- * bitmap_scnprintf_len - return buffer length needed to convert
- * bitmap to an ASCII hex string
- * @nr_bits: number of bits to be converted
- */
-int bitmap_scnprintf_len(unsigned int nr_bits)
-{
-	unsigned int nr_nibbles = ALIGN(nr_bits, 4) / 4;
-	return nr_nibbles + ALIGN(nr_nibbles, CHUNKSZ / 4) / (CHUNKSZ / 4) - 1;
-}
-
 /**
  * __bitmap_parse - convert an ASCII hex string into a bitmap.
  * @buf: pointer to buffer containing string.
-- 
GitLab


From b747c8c102cc0677a7a8056a093f58d7c9b500e7 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Sat, 18 Oct 2008 20:28:21 -0700
Subject: [PATCH 800/895] make ptrace_untrace() static

ptrace_untrace() can now become static.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ptrace.h | 1 -
 kernel/ptrace.c        | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index ea7416c901d1..22641d5d45df 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -94,7 +94,6 @@ extern void ptrace_notify(int exit_code);
 extern void __ptrace_link(struct task_struct *child,
 			  struct task_struct *new_parent);
 extern void __ptrace_unlink(struct task_struct *child);
-extern void ptrace_untrace(struct task_struct *child);
 #define PTRACE_MODE_READ   1
 #define PTRACE_MODE_ATTACH 2
 /* Returns 0 on success, -errno on denial. */
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 356699a96d56..1e68e4c39e2c 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -45,7 +45,7 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
  * TASK_TRACED, resume it now.
  * Requires that irqs be disabled.
  */
-void ptrace_untrace(struct task_struct *child)
+static void ptrace_untrace(struct task_struct *child)
 {
 	spin_lock(&child->sighand->siglock);
 	if (task_is_traced(child)) {
-- 
GitLab


From 6409324b385f3f63a03645b4422e3be67348d922 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Sat, 18 Oct 2008 20:28:22 -0700
Subject: [PATCH 801/895] coredump: format_corename: don't append .%pid if
 multi-threaded

If the coredumping is multi-threaded, format_corename() appends .%pid to
the corename.  This was needed before the proper multi-thread core dump
support, now all the threads in the mm go into a single unified core file.

Remove this special case, it is not even documented and we have "%p"
and core_uses_pid.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Roland McGrath <roland@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: La Monte Yarroll <piggy@laurelnetworks.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index a41e7902ed0b..4e834f16d9da 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1386,7 +1386,7 @@ EXPORT_SYMBOL(set_binfmt);
  * name into corename, which must have space for at least
  * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
  */
-static int format_corename(char *corename, int nr_threads, long signr)
+static int format_corename(char *corename, long signr)
 {
 	const char *pat_ptr = core_pattern;
 	int ispipe = (*pat_ptr == '|');
@@ -1493,8 +1493,7 @@ static int format_corename(char *corename, int nr_threads, long signr)
 	 * If core_pattern does not include a %p (as is the default)
 	 * and core_uses_pid is set, then .%pid will be appended to
 	 * the filename. Do not do this for piped commands. */
-	if (!ispipe && !pid_in_pattern
-	    && (core_uses_pid || nr_threads)) {
+	if (!ispipe && !pid_in_pattern && core_uses_pid) {
 		rc = snprintf(out_ptr, out_end - out_ptr,
 			      ".%d", task_tgid_vnr(current));
 		if (rc > out_end - out_ptr)
@@ -1757,7 +1756,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 	 * uses lock_kernel()
 	 */
  	lock_kernel();
-	ispipe = format_corename(corename, retval, signr);
+	ispipe = format_corename(corename, signr);
 	unlock_kernel();
 	/*
 	 * Don't bother to check the RLIMIT_CORE value if core_pattern points
-- 
GitLab


From 656eb2cd5da153762f2e8419ca117ce12ef522c3 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Sat, 18 Oct 2008 20:28:23 -0700
Subject: [PATCH 802/895] add CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS

This adds a kconfig option to change the /proc/PID/coredump_filter default.
Fedora has been carrying a trivial patch to change the hard-wired value for
this default, since Fedora 8.  The default default can't change safely
because there are old GDB versions out there (all before 6.7) that are
confused by the core dump files created by the MMF_DUMP_ELF_HEADERS setting.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Kawai Hidehiro <hidehiro.kawai.ez@hitachi.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: David Jones <davej@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/Kconfig.binfmt     | 22 ++++++++++++++++++++++
 include/linux/sched.h |  8 +++++++-
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 801db1341811..ce9fb3fbfae4 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -40,6 +40,28 @@ config BINFMT_ELF_FDPIC
 
 	  It is also possible to run FDPIC ELF binaries on MMU linux also.
 
+config CORE_DUMP_DEFAULT_ELF_HEADERS
+	bool "Write ELF core dumps with partial segments"
+	default n
+	depends on BINFMT_ELF
+	help
+	  ELF core dump files describe each memory mapping of the crashed
+	  process, and can contain or omit the memory contents of each one.
+	  The contents of an unmodified text mapping are omitted by default.
+
+	  For an unmodified text mapping of an ELF object, including just
+	  the first page of the file in a core dump makes it possible to
+	  identify the build ID bits in the file, without paying the i/o
+	  cost and disk space to dump all the text.  However, versions of
+	  GDB before 6.7 are confused by ELF core dump files in this format.
+
+	  The core dump behavior can be controlled per process using
+	  the /proc/PID/coredump_filter pseudo-file; this setting is
+	  inherited.  See Documentation/filesystems/proc.txt for details.
+
+	  This config option changes the default setting of coredump_filter
+	  seen at boot time.  If unsure, say N.
+
 config BINFMT_FLAT
 	bool "Kernel support for flat binaries"
 	depends on !MMU && (!FRV || BROKEN)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 017cc914ef1f..f52dbd3587a7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -411,7 +411,13 @@ extern int get_dumpable(struct mm_struct *mm);
 	(((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT)
 #define MMF_DUMP_FILTER_DEFAULT \
 	((1 << MMF_DUMP_ANON_PRIVATE) |	(1 << MMF_DUMP_ANON_SHARED) |\
-	 (1 << MMF_DUMP_HUGETLB_PRIVATE))
+	 (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF)
+
+#ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS
+# define MMF_DUMP_MASK_DEFAULT_ELF	(1 << MMF_DUMP_ELF_HEADERS)
+#else
+# define MMF_DUMP_MASK_DEFAULT_ELF	0
+#endif
 
 struct sighand_struct {
 	atomic_t		count;
-- 
GitLab


From 293adee601bcd4cdb5076a9bda187137de17e96e Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Sat, 18 Oct 2008 20:28:24 -0700
Subject: [PATCH 803/895] kthread_bind: use
 wait_task_inactive(TASK_UNINTERRUPTIBLE)

Now that wait_task_inactive(task, state) checks task->state == state,
we can simplify the code and make this debugging check more robust.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/kthread.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/kernel/kthread.c b/kernel/kthread.c
index 96cff2f8710b..14ec64fe175a 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -171,12 +171,11 @@ EXPORT_SYMBOL(kthread_create);
  */
 void kthread_bind(struct task_struct *k, unsigned int cpu)
 {
-	if (k->state != TASK_UNINTERRUPTIBLE) {
+	/* Must have done schedule() in kthread() before we set_task_cpu */
+	if (!wait_task_inactive(k, TASK_UNINTERRUPTIBLE)) {
 		WARN_ON(1);
 		return;
 	}
-	/* Must have done schedule() in kthread() before we set_task_cpu */
-	wait_task_inactive(k, 0);
 	set_task_cpu(k, cpu);
 	k->cpus_allowed = cpumask_of_cpu(cpu);
 	k->rt.nr_cpus_allowed = 1;
-- 
GitLab


From 57cac4d1880527e0baf6c2fda529d2ad1d815aec Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Sat, 18 Oct 2008 20:28:25 -0700
Subject: [PATCH 804/895] kdump: make elfcorehdr_addr independent of
 CONFIG_PROC_VMCORE

o elfcorehdr_addr is used by not only the code under CONFIG_PROC_VMCORE
  but also by the code which is not inside CONFIG_PROC_VMCORE.  For
  example, is_kdump_kernel() is used by powerpc code to determine if
  kernel is booting after a panic then use previous kernel's TCE table.
  So even if CONFIG_PROC_VMCORE is not set in second kernel, one should be
  able to correctly determine that we are booting after a panic and setup
  calgary iommu accordingly.

o So remove the assumption that elfcorehdr_addr is under
  CONFIG_PROC_VMCORE.

o Move definition of elfcorehdr_addr to arch dependent crash files.
  (Unfortunately crash dump does not have an arch independent file
  otherwise that would have been the best place).

o kexec.c is not the right place as one can Have CRASH_DUMP enabled in
  second kernel without KEXEC being enabled.

o I don't see sh setup code parsing the command line for
  elfcorehdr_addr.  I am wondering how does vmcore interface work on sh.
  Anyway, I am atleast defining elfcoredhr_addr so that compilation is not
  broken on sh.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Acked-by: Simon Horman <horms@verge.net.au>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/kernel/crash_dump.c    |  4 ++++
 arch/ia64/kernel/setup.c         |  9 ++++++++-
 arch/powerpc/kernel/crash_dump.c | 10 ++++++++--
 arch/sh/kernel/crash_dump.c      |  3 +++
 arch/x86/kernel/crash_dump_32.c  |  3 +++
 arch/x86/kernel/crash_dump_64.c  |  3 +++
 arch/x86/kernel/setup.c          |  8 +++++++-
 fs/proc/vmcore.c                 |  3 ---
 include/linux/crash_dump.h       | 14 ++++++++++----
 9 files changed, 46 insertions(+), 11 deletions(-)

diff --git a/arch/ia64/kernel/crash_dump.c b/arch/ia64/kernel/crash_dump.c
index da60e90eeeb1..23e91290e41f 100644
--- a/arch/ia64/kernel/crash_dump.c
+++ b/arch/ia64/kernel/crash_dump.c
@@ -8,10 +8,14 @@
 
 #include <linux/errno.h>
 #include <linux/types.h>
+#include <linux/crash_dump.h>
 
 #include <asm/page.h>
 #include <asm/uaccess.h>
 
+/* Stores the physical address of elf header of crash image. */
+unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
+
 /**
  * copy_oldmem_page - copy one page from "oldmem"
  * @pfn: page frame number to be copied
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index de636b215677..a0286be6c235 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -478,7 +478,12 @@ static __init int setup_nomca(char *s)
 }
 early_param("nomca", setup_nomca);
 
-#ifdef CONFIG_PROC_VMCORE
+/*
+ * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
+ * is_kdump_kernel() to determine if we are booting after a panic. Hence
+ * ifdef it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE.
+ */
+#ifdef CONFIG_CRASH_DUMP
 /* elfcorehdr= specifies the location of elf core header
  * stored by the crashed kernel.
  */
@@ -491,7 +496,9 @@ static int __init parse_elfcorehdr(char *arg)
 	return 0;
 }
 early_param("elfcorehdr", parse_elfcorehdr);
+#endif
 
+#ifdef CONFIG_PROC_VMCORE
 int __init reserve_elfcorehdr(unsigned long *start, unsigned long *end)
 {
 	unsigned long length;
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index a323c9b32ee1..97e056379728 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -27,6 +27,9 @@
 #define DBG(fmt...)
 #endif
 
+/* Stores the physical address of elf header of crash image. */
+unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
+
 void __init reserve_kdump_trampoline(void)
 {
 	lmb_reserve(0, KDUMP_RESERVE_LIMIT);
@@ -66,7 +69,11 @@ void __init setup_kdump_trampoline(void)
 	DBG(" <- setup_kdump_trampoline()\n");
 }
 
-#ifdef CONFIG_PROC_VMCORE
+/*
+ * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
+ * is_kdump_kernel() to determine if we are booting after a panic. Hence
+ * ifdef it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE.
+ */
 static int __init parse_elfcorehdr(char *p)
 {
 	if (p)
@@ -75,7 +82,6 @@ static int __init parse_elfcorehdr(char *p)
 	return 1;
 }
 __setup("elfcorehdr=", parse_elfcorehdr);
-#endif
 
 static int __init parse_savemaxmem(char *p)
 {
diff --git a/arch/sh/kernel/crash_dump.c b/arch/sh/kernel/crash_dump.c
index 4a2ecbe27d8e..95d216255565 100644
--- a/arch/sh/kernel/crash_dump.c
+++ b/arch/sh/kernel/crash_dump.c
@@ -10,6 +10,9 @@
 #include <linux/io.h>
 #include <asm/uaccess.h>
 
+/* Stores the physical address of elf header of crash image. */
+unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
+
 /**
  * copy_oldmem_page - copy one page from "oldmem"
  * @pfn: page frame number to be copied
diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c
index 72d0c56c1b48..f7cdb3b457aa 100644
--- a/arch/x86/kernel/crash_dump_32.c
+++ b/arch/x86/kernel/crash_dump_32.c
@@ -13,6 +13,9 @@
 
 static void *kdump_buf_page;
 
+/* Stores the physical address of elf header of crash image. */
+unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
+
 /**
  * copy_oldmem_page - copy one page from "oldmem"
  * @pfn: page frame number to be copied
diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c
index e90a60ef10c2..045b36cada65 100644
--- a/arch/x86/kernel/crash_dump_64.c
+++ b/arch/x86/kernel/crash_dump_64.c
@@ -10,6 +10,9 @@
 #include <linux/uaccess.h>
 #include <linux/io.h>
 
+/* Stores the physical address of elf header of crash image. */
+unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
+
 /**
  * copy_oldmem_page - copy one page from "oldmem"
  * @pfn: page frame number to be copied
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 2255782e8d4b..b2c97874ec0f 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -561,7 +561,13 @@ static void __init reserve_standard_io_resources(void)
 
 }
 
-#ifdef CONFIG_PROC_VMCORE
+/*
+ * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
+ * is_kdump_kernel() to determine if we are booting after a panic. Hence
+ * ifdef it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE.
+ */
+
+#ifdef CONFIG_CRASH_DUMP
 /* elfcorehdr= specifies the location of elf core header
  * stored by the crashed kernel. This option will be passed
  * by kexec loader to the capture kernel.
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 841368b87a29..4c65ca432d30 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -32,9 +32,6 @@ static size_t elfcorebuf_sz;
 /* Total size of vmcore file. */
 static u64 vmcore_size;
 
-/* Stores the physical address of elf header of crash image. */
-unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
-
 struct proc_dir_entry *proc_vmcore = NULL;
 
 /* Reads a page from the oldmem device from given offset. */
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 025e4f575103..de027d1db745 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -9,11 +9,7 @@
 
 #define ELFCORE_ADDR_MAX	(-1ULL)
 
-#ifdef CONFIG_PROC_VMCORE
 extern unsigned long long elfcorehdr_addr;
-#else
-static const unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
-#endif
 
 extern ssize_t copy_oldmem_page(unsigned long, char *, size_t,
 						unsigned long, int);
@@ -28,6 +24,16 @@ extern struct proc_dir_entry *proc_vmcore;
 
 #define vmcore_elf_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x))
 
+/*
+ * is_kdump_kernel() checks whether this kernel is booting after a panic of
+ * previous kernel or not. This is determined by checking if previous kernel
+ * has passed the elf core header address on command line.
+ *
+ * This is not just a test if CONFIG_CRASH_DUMP is enabled or not. It will
+ * return 1 if CONFIG_CRASH_DUMP=y and if kernel is booting after a panic of
+ * previous kernel.
+ */
+
 static inline int is_kdump_kernel(void)
 {
 	return (elfcorehdr_addr != ELFCORE_ADDR_MAX) ? 1 : 0;
-- 
GitLab


From e515a0d60066c802cc605a5d9c446948f7691519 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Sat, 18 Oct 2008 20:28:27 -0700
Subject: [PATCH 805/895] kdump: update elfcorehdr documentation to reflect
 supported architectures

IA64, PPC and SH also support the elfcorehdr command line.

Signed-off-by: Simon Horman <horms@verge.net.au>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/kernel-parameters.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index d4f4875fc7c6..b49fcee5e9cc 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -690,7 +690,7 @@ and is between 256 and 4096 characters. It is defined in the file
 			See Documentation/block/as-iosched.txt and
 			Documentation/block/deadline-iosched.txt for details.
 
-	elfcorehdr=	[X86-32, X86_64]
+	elfcorehdr=	[IA64,PPC,SH,X86-32,X86_64]
 			Specifies physical address of start of kernel core
 			image elf header. Generally kexec loader will
 			pass this option to capture kernel.
-- 
GitLab


From 630bf20747e27391b20f137a5be2edb4235ca8fa Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Sat, 18 Oct 2008 20:28:28 -0700
Subject: [PATCH 806/895] kdump: use is_kdump_kernel() in sba_init()

o Make use of is_kdump_kernel() rather than checking elfcorehdr_addr directly.

o Remove CONFIG_CRASH_DUMP as is_kdump_kernel() is safe to call anywhere

o Remove CONFIG_PROC_FS as it is bogus, the check
  should occur regardless of if CONFIG_PROC_FS is set or not.

Signed-off-by: Simon Horman <horms@verge.net.au>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/hp/common/sba_iommu.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index 4956be40d7b5..d98f0f4ff83f 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -2070,14 +2070,13 @@ sba_init(void)
 	if (!ia64_platform_is("hpzx1") && !ia64_platform_is("hpzx1_swiotlb"))
 		return 0;
 
-#if defined(CONFIG_IA64_GENERIC) && defined(CONFIG_CRASH_DUMP) && \
-        defined(CONFIG_PROC_FS)
+#if defined(CONFIG_IA64_GENERIC)
 	/* If we are booting a kdump kernel, the sba_iommu will
 	 * cause devices that were not shutdown properly to MCA
 	 * as soon as they are turned back on.  Our only option for
 	 * a successful kdump kernel boot is to use the swiotlb.
 	 */
-	if (elfcorehdr_addr < ELFCORE_ADDR_MAX) {
+	if (is_kdump_kernel()) {
 		if (swiotlb_late_init_with_default_size(64 * (1<<20)) != 0)
 			panic("Unable to initialize software I/O TLB:"
 				  " Try machvec=dig boot option");
-- 
GitLab


From 85a0ee342e0c06c19d78fdf48307211c6cf18fcb Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Sat, 18 Oct 2008 20:28:29 -0700
Subject: [PATCH 807/895] kdump: add is_vmcore_usable() and vmcore_unusable()

The usage of elfcorehdr_addr has changed recently such that being set to
ELFCORE_ADDR_MAX is used by is_kdump_kernel() to indicate if the code is
executing in a kernel executed as a crash kernel.

However, arch/ia64/kernel/setup.c:reserve_elfcorehdr will rest
elfcorehdr_addr to ELFCORE_ADDR_MAX on error, which means any subsequent
calls to is_kdump_kernel() will return 0, even though they should return
1.

Ok, at this point in time there are no subsequent calls, but I think its
fair to say that there is ample scope for error or at the very least
confusion.

This patch add an extra state, ELFCORE_ADDR_ERR, which indicates that
elfcorehdr_addr was passed on the command line, and thus execution is
taking place in a crashdump kernel, but vmcore can't be used for some
reason.  This is tested for using is_vmcore_usable() and set using
vmcore_unusable().  A subsequent patch makes use of this new code.

To summarise, the states that elfcorehdr_addr can now be in are as follows:

ELFCORE_ADDR_MAX: not a crashdump kernel
ELFCORE_ADDR_ERR: crashdump kernel but vmcore is unusable
any other value:  crash dump kernel and vmcore is usable

Signed-off-by: Simon Horman <horms@verge.net.au>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/kernel/setup.c   |  4 ++--
 fs/proc/vmcore.c           |  2 +-
 include/linux/crash_dump.h | 24 ++++++++++++++++++++++++
 3 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index a0286be6c235..60286522d54a 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -509,11 +509,11 @@ int __init reserve_elfcorehdr(unsigned long *start, unsigned long *end)
 	 * to work properly.
 	 */
 
-	if (elfcorehdr_addr >= ELFCORE_ADDR_MAX)
+	if (!is_vmcore_usable())
 		return -EINVAL;
 
 	if ((length = vmcore_find_descriptor_size(elfcorehdr_addr)) == 0) {
-		elfcorehdr_addr = ELFCORE_ADDR_MAX;
+		vmcore_unusable();
 		return -EINVAL;
 	}
 
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 4c65ca432d30..cd9ca67f841b 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -644,7 +644,7 @@ static int __init vmcore_init(void)
 	int rc = 0;
 
 	/* If elfcorehdr= has been passed in cmdline, then capture the dump.*/
-	if (!(elfcorehdr_addr < ELFCORE_ADDR_MAX))
+	if (!(is_vmcore_usable()))
 		return rc;
 	rc = parse_crash_elf_headers();
 	if (rc) {
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index de027d1db745..0acf3b737e2e 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -8,6 +8,7 @@
 #include <linux/proc_fs.h>
 
 #define ELFCORE_ADDR_MAX	(-1ULL)
+#define ELFCORE_ADDR_ERR	(-2ULL)
 
 extern unsigned long long elfcorehdr_addr;
 
@@ -38,6 +39,29 @@ static inline int is_kdump_kernel(void)
 {
 	return (elfcorehdr_addr != ELFCORE_ADDR_MAX) ? 1 : 0;
 }
+
+/* is_vmcore_usable() checks if the kernel is booting after a panic and
+ * the vmcore region is usable.
+ *
+ * This makes use of the fact that due to alignment -2ULL is not
+ * a valid pointer, much in the vain of IS_ERR(), except
+ * dealing directly with an unsigned long long rather than a pointer.
+ */
+
+static inline int is_vmcore_usable(void)
+{
+	return is_kdump_kernel() && elfcorehdr_addr != ELFCORE_ADDR_ERR ? 1 : 0;
+}
+
+/* vmcore_unusable() marks the vmcore as unusable,
+ * without disturbing the logic of is_kdump_kernel()
+ */
+
+static inline void vmcore_unusable(void)
+{
+	if (is_kdump_kernel())
+		elfcorehdr_addr = ELFCORE_ADDR_ERR;
+}
 #else /* !CONFIG_CRASH_DUMP */
 static inline int is_kdump_kernel(void) { return 0; }
 #endif /* CONFIG_CRASH_DUMP */
-- 
GitLab


From d9a9855d0b06ca6d6cc92596fedcc03f8512e062 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Sat, 18 Oct 2008 20:28:29 -0700
Subject: [PATCH 808/895] always reserve elfcore header memory in crash kernel

elfcore header memory needs to be reserved in a crash kernel.  This means
that the relevant code should be protected by CONFIG_CRASH_DUMP rather
than CONFIG_PROC_VMCORE.

Signed-off-by: Simon Horman <horms@verge.net.au>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/kernel/efi.c   | 2 +-
 arch/ia64/kernel/setup.c | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 51b75cea7018..efaff15d8cf1 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -1335,7 +1335,7 @@ kdump_find_rsvd_region (unsigned long size, struct rsvd_region *r, int n)
 }
 #endif
 
-#ifdef CONFIG_PROC_VMCORE
+#ifdef CONFIG_CRASH_DUMP
 /* locate the size find a the descriptor at a certain address */
 unsigned long __init
 vmcore_find_descriptor_size (unsigned long address)
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 60286522d54a..916ba898237f 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -352,7 +352,7 @@ reserve_memory (void)
 	}
 #endif
 
-#ifdef CONFIG_PROC_VMCORE
+#ifdef CONFIG_CRASH_KERNEL
 	if (reserve_elfcorehdr(&rsvd_region[n].start,
 			       &rsvd_region[n].end) == 0)
 		n++;
@@ -496,9 +496,7 @@ static int __init parse_elfcorehdr(char *arg)
 	return 0;
 }
 early_param("elfcorehdr", parse_elfcorehdr);
-#endif
 
-#ifdef CONFIG_PROC_VMCORE
 int __init reserve_elfcorehdr(unsigned long *start, unsigned long *end)
 {
 	unsigned long length;
-- 
GitLab


From acd99dbf54020f5c80b9aa2f2ea86f43cb285b02 Mon Sep 17 00:00:00 2001
From: Ken'ichi Ohmichi <oomichi@mxs.nes.nec.co.jp>
Date: Sat, 18 Oct 2008 20:28:30 -0700
Subject: [PATCH 809/895] kdump: add vmlist.addr to vmcoreinfo for x86 vmalloc
 translation.

Add the symbols 'vmlist' and offset 'vm_struct.addr' to the vmcoreinfo[1]
data for i386 vmalloc translation.

makedumpfile[2] needs VMALLOC_START value for distinguishing a vmalloc
address or not, because it should choose suitable translation method.  If
applying this patch, makedumpfile will be able to take VMALLOC_START value
from 'vmlist.addr'.

vmcoreinfo[1]:
The vmcoreinfo data has the minimum debugging information only for dump
filtering. makedumpfile[2] uses it to distinguish unnecessary pages and
creates a small dumpfile.

makedumpfile[2]:
dump filtering command
https://sourceforge.net/projects/makedumpfile/

Signed-off-by: Ken'ichi Ohmichi <oomichi@mxs.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/kexec.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/kexec.c b/kernel/kexec.c
index aef265325cd3..777ac458ac99 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1371,6 +1371,7 @@ static int __init crash_save_vmcoreinfo_init(void)
 	VMCOREINFO_SYMBOL(node_online_map);
 	VMCOREINFO_SYMBOL(swapper_pg_dir);
 	VMCOREINFO_SYMBOL(_stext);
+	VMCOREINFO_SYMBOL(vmlist);
 
 #ifndef CONFIG_NEED_MULTIPLE_NODES
 	VMCOREINFO_SYMBOL(mem_map);
@@ -1406,6 +1407,7 @@ static int __init crash_save_vmcoreinfo_init(void)
 	VMCOREINFO_OFFSET(free_area, free_list);
 	VMCOREINFO_OFFSET(list_head, next);
 	VMCOREINFO_OFFSET(list_head, prev);
+	VMCOREINFO_OFFSET(vm_struct, addr);
 	VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER);
 	VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
 	VMCOREINFO_NUMBER(NR_FREE_PAGES);
-- 
GitLab


From b231cca4381ee15ec99afbfb244fbc0324869927 Mon Sep 17 00:00:00 2001
From: Joe Korty <joe.korty@ccur.com>
Date: Sat, 18 Oct 2008 20:28:32 -0700
Subject: [PATCH 810/895] message queues: increase range limits

Increase the range of various posix message queue limits.

Posix gives the message queue user the ability to 'trade off' the maximum
size of messages with the number of possible messages that can be 'in
flight'.  Linux currently makes this trade off more restrictive than it
needs to be.

In particular, the maximum message size today can be made no smaller than
8192.  This greatly restricts those applications that would like to have
the ability to post large numbers of very small messages.

So this task lowers the limit that the maximum message size can be set to,
from 8192 to 128.  It also lowers the limit that the maximum #number of
messages in flight can be set to, from 10 to 1.

With these changes the message queue user can make better trade offs
between #messages and message size, in order to get everything to fit
within the setrlimit(RLIMIT_MSGQUEUE) limit for that particular user.

This patch also applies the values in

	/proc/sys/fs/mqueue/msg_max
	/proc/sys/fs/mqueue/msgsize_max

as the defaults for the max #messages allowed and the max message size
allowed, respectively, for those applications that do not supply these.
Previously, the defaults were hardwired to 10 and 8192, respectively.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Joe Korty <joe.korty@ccur.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Nadia Derbey <Nadia.Derbey@bull.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/mqueue.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 96fb36cd9874..68eb857cfdea 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -52,6 +52,14 @@
 #define HARD_MSGMAX 	(131072/sizeof(void*))
 #define DFLT_MSGSIZEMAX 8192	/* max message size */
 
+/*
+ * Define the ranges various user-specified maximum values can
+ * be set to.
+ */
+#define MIN_MSGMAX	1		/* min value for msg_max */
+#define MAX_MSGMAX	HARD_MSGMAX	/* max value for msg_max */
+#define MIN_MSGSIZEMAX	128		/* min value for msgsize_max */
+#define MAX_MSGSIZEMAX	(8192*128)	/* max value for msgsize_max */
 
 struct ext_wait_queue {		/* queue of sleeping tasks */
 	struct task_struct *task;
@@ -134,8 +142,8 @@ static struct inode *mqueue_get_inode(struct super_block *sb, int mode,
 			info->qsize = 0;
 			info->user = NULL;	/* set when all is ok */
 			memset(&info->attr, 0, sizeof(info->attr));
-			info->attr.mq_maxmsg = DFLT_MSGMAX;
-			info->attr.mq_msgsize = DFLT_MSGSIZEMAX;
+			info->attr.mq_maxmsg = msg_max;
+			info->attr.mq_msgsize = msgsize_max;
 			if (attr) {
 				info->attr.mq_maxmsg = attr->mq_maxmsg;
 				info->attr.mq_msgsize = attr->mq_msgsize;
@@ -1191,11 +1199,11 @@ static struct file_system_type mqueue_fs_type = {
 	.kill_sb = kill_litter_super,
 };
 
-static int msg_max_limit_min = DFLT_MSGMAX;
-static int msg_max_limit_max = HARD_MSGMAX;
+static int msg_max_limit_min = MIN_MSGMAX;
+static int msg_max_limit_max = MAX_MSGMAX;
 
-static int msg_maxsize_limit_min = DFLT_MSGSIZEMAX;
-static int msg_maxsize_limit_max = INT_MAX;
+static int msg_maxsize_limit_min = MIN_MSGSIZEMAX;
+static int msg_maxsize_limit_max = MAX_MSGSIZEMAX;
 
 static ctl_table mq_sysctls[] = {
 	{
-- 
GitLab


From b64fd291acd8c921b4757faed1d4dded31c27edf Mon Sep 17 00:00:00 2001
From: Andre Haupt <andre@bitwigglers.org>
Date: Sat, 18 Oct 2008 20:28:33 -0700
Subject: [PATCH 811/895] pc8736x_gpio: add support for PC87365 chips

This is only compile tested, because I do not own appropriate hardware.

Signed-off-by: Andre Haupt <andre@bitwigglers.org>
Cc: Jim Cromie <jim.cromie@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/pc8736x_gpio.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/char/pc8736x_gpio.c b/drivers/char/pc8736x_gpio.c
index b930de50407a..3f7da8cf3a80 100644
--- a/drivers/char/pc8736x_gpio.c
+++ b/drivers/char/pc8736x_gpio.c
@@ -41,7 +41,8 @@ static u8 pc8736x_gpio_shadow[4];
 #define SIO_BASE2       0x4E	/* alt command-reg to check */
 
 #define SIO_SID		0x20	/* SuperI/O ID Register */
-#define SIO_SID_VALUE	0xe9	/* Expected value in SuperI/O ID Register */
+#define SIO_SID_PC87365	0xe5	/* Expected value in ID Register for PC87365 */
+#define SIO_SID_PC87366	0xe9	/* Expected value in ID Register for PC87366 */
 
 #define SIO_CF1		0x21	/* chip config, bit0 is chip enable */
 
@@ -91,13 +92,17 @@ static inline int superio_inb(int addr)
 
 static int pc8736x_superio_present(void)
 {
+	int id;
+
 	/* try the 2 possible values, read a hardware reg to verify */
 	superio_cmd = SIO_BASE1;
-	if (superio_inb(SIO_SID) == SIO_SID_VALUE)
+	id = superio_inb(SIO_SID);
+	if (id == SIO_SID_PC87365 || id == SIO_SID_PC87366)
 		return superio_cmd;
 
 	superio_cmd = SIO_BASE2;
-	if (superio_inb(SIO_SID) == SIO_SID_VALUE)
+	id = superio_inb(SIO_SID);
+	if (id == SIO_SID_PC87365 || id == SIO_SID_PC87366)
 		return superio_cmd;
 
 	return 0;
-- 
GitLab


From 3b274f44d2ca05f719fe39947b6a5293a2dbd8fd Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Sat, 18 Oct 2008 20:28:35 -0700
Subject: [PATCH 812/895] edac cell: fix incorrect edac_mode

The cell_edac driver is setting the edac_mode field of the csrow's to an
incorrect value, causing the sysfs show routine for that field to go out
of an array bound and Oopsing the kernel when used.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Cc: <stable@kernel.org>		[2.6.27.x, 2.6.26.x. 2.6.25.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/edac/cell_edac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/edac/cell_edac.c b/drivers/edac/cell_edac.c
index 0e024fe2d8c4..887072f5dc8b 100644
--- a/drivers/edac/cell_edac.c
+++ b/drivers/edac/cell_edac.c
@@ -142,7 +142,7 @@ static void __devinit cell_edac_init_csrows(struct mem_ctl_info *mci)
 		csrow->nr_pages = (r.end - r.start + 1) >> PAGE_SHIFT;
 		csrow->last_page = csrow->first_page + csrow->nr_pages - 1;
 		csrow->mtype = MEM_XDR;
-		csrow->edac_mode = EDAC_FLAG_EC | EDAC_FLAG_SECDED;
+		csrow->edac_mode = EDAC_SECDED;
 		dev_dbg(mci->dev,
 			"Initialized on node %d, chanmask=0x%x,"
 			" first_page=0x%lx, nr_pages=0x%x\n",
-- 
GitLab


From b8e465f4945bc0e9f324e3bbe15f5180a8e9a6fe Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Sat, 18 Oct 2008 20:28:35 -0700
Subject: [PATCH 813/895] byteorder: add new headers for make headers-install

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/Kbuild | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index bf9aca548f14..e531783e5d78 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -183,6 +183,7 @@ unifdef-y += auto_fs.h
 unifdef-y += auxvec.h
 unifdef-y += binfmts.h
 unifdef-y += blktrace_api.h
+unifdef-y += byteorder.h
 unifdef-y += capability.h
 unifdef-y += capi.h
 unifdef-y += cciss_ioctl.h
@@ -340,6 +341,7 @@ unifdef-y += soundcard.h
 unifdef-y += stat.h
 unifdef-y += stddef.h
 unifdef-y += string.h
+unifdef-y += swab.h
 unifdef-y += synclink.h
 unifdef-y += sysctl.h
 unifdef-y += tcp.h
-- 
GitLab


From acf0108a84edae22b99655eb2f6f6c9f7ec4d449 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Sat, 18 Oct 2008 20:28:36 -0700
Subject: [PATCH 814/895] byteorder: use generic C version for value
 byteswapping

This makes the new implementation of the byteorder helpers match the old
in how it degraded when an arch-defined version was not available:

1) swab()
	- look for arch defined
	- if not, use generic c version

2) swabp()
	- look for arch-defined
	- if not, deref pointer and use swab()

3) swabs()
	- look for arch defined
	- if not, use swabp

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swab.h | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/include/linux/swab.h b/include/linux/swab.h
index 270d5c208a89..bbed279f3b32 100644
--- a/include/linux/swab.h
+++ b/include/linux/swab.h
@@ -47,8 +47,6 @@ static inline __attribute_const__ __u16 ___swab16(__u16 val)
 {
 #ifdef __arch_swab16
 	return __arch_swab16(val);
-#elif defined(__arch_swab16p)
-	return __arch_swab16p(&val);
 #else
 	return __const_swab16(val);
 #endif
@@ -58,8 +56,6 @@ static inline __attribute_const__ __u32 ___swab32(__u32 val)
 {
 #ifdef __arch_swab32
 	return __arch_swab32(val);
-#elif defined(__arch_swab32p)
-	return __arch_swab32p(&val);
 #else
 	return __const_swab32(val);
 #endif
@@ -69,8 +65,6 @@ static inline __attribute_const__ __u64 ___swab64(__u64 val)
 {
 #ifdef __arch_swab64
 	return __arch_swab64(val);
-#elif defined(__arch_swab64p)
-	return __arch_swab64p(&val);
 #elif defined(__SWAB_64_THRU_32__)
 	__u32 h = val >> 32;
 	__u32 l = val & ((1ULL << 32) - 1);
@@ -84,8 +78,6 @@ static inline __attribute_const__ __u32 ___swahw32(__u32 val)
 {
 #ifdef __arch_swahw32
 	return __arch_swahw32(val);
-#elif defined(__arch_swahw32p)
-	return __arch_swahw32p(&val);
 #else
 	return __const_swahw32(val);
 #endif
@@ -95,8 +87,6 @@ static inline __attribute_const__ __u32 ___swahb32(__u32 val)
 {
 #ifdef __arch_swahb32
 	return __arch_swahb32(val);
-#elif defined(__arch_swahb32p)
-	return __arch_swahb32p(&val);
 #else
 	return __const_swahb32(val);
 #endif
-- 
GitLab


From 1d8cca44b6a244b7e378546d719041819049a0f9 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Sat, 18 Oct 2008 20:28:37 -0700
Subject: [PATCH 815/895] byteorder: provide swabb.h generically in
 asm/byteorder.h

This is needed during the transition to the new byteorder headers as the
swabb.h functionality will be provided from asm/byteorder.h in the new
version.  To avoid breakage on arches still using the old implementation,
provide swabb.h from asm/byteorder.h as well.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/byteorder/Kbuild          | 1 +
 include/linux/byteorder/big_endian.h    | 1 +
 include/linux/byteorder/little_endian.h | 1 +
 3 files changed, 3 insertions(+)

diff --git a/include/linux/byteorder/Kbuild b/include/linux/byteorder/Kbuild
index 1133d5f9d818..fbaa7f9cee32 100644
--- a/include/linux/byteorder/Kbuild
+++ b/include/linux/byteorder/Kbuild
@@ -1,3 +1,4 @@
 unifdef-y += big_endian.h
 unifdef-y += little_endian.h
 unifdef-y += swab.h
+unifdef-y += swabb.h
diff --git a/include/linux/byteorder/big_endian.h b/include/linux/byteorder/big_endian.h
index 44f95b92393b..1cba3f3efe5f 100644
--- a/include/linux/byteorder/big_endian.h
+++ b/include/linux/byteorder/big_endian.h
@@ -10,6 +10,7 @@
 
 #include <linux/types.h>
 #include <linux/byteorder/swab.h>
+#include <linux/byteorder/swabb.h>
 
 #define __constant_htonl(x) ((__force __be32)(__u32)(x))
 #define __constant_ntohl(x) ((__force __u32)(__be32)(x))
diff --git a/include/linux/byteorder/little_endian.h b/include/linux/byteorder/little_endian.h
index 4cc170a31762..cedc1b5a289c 100644
--- a/include/linux/byteorder/little_endian.h
+++ b/include/linux/byteorder/little_endian.h
@@ -10,6 +10,7 @@
 
 #include <linux/types.h>
 #include <linux/byteorder/swab.h>
+#include <linux/byteorder/swabb.h>
 
 #define __constant_htonl(x) ((__force __be32)___constant_swab32((x)))
 #define __constant_ntohl(x) ___constant_swab32((__force __be32)(x))
-- 
GitLab


From 1a651a00e20fd4997f0b91258f6f95b7d96edcd9 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Sat, 18 Oct 2008 20:28:37 -0700
Subject: [PATCH 816/895] byteorder: remove direct includes of
 linux/byteorder/swab[b].h

A consolidated implementation will provide this generically through
asm/byteorder, remove direct includes to avoid breakage when the
changeover to the new implementation occurs.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Acked-by: Mauro Carvalho Chehab <mchehab@infradead.org>
Acked-by: "Paul E. McKenney" <paulmck@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/media/dvb/ttpci/av7110.c       | 2 +-
 drivers/media/video/cx18/cx18-driver.h | 1 +
 drivers/media/video/ivtv/ivtv-driver.h | 1 +
 kernel/rcupreempt.c                    | 2 +-
 4 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/media/dvb/ttpci/av7110.c b/drivers/media/dvb/ttpci/av7110.c
index c7c770c28988..aa1ff524256e 100644
--- a/drivers/media/dvb/ttpci/av7110.c
+++ b/drivers/media/dvb/ttpci/av7110.c
@@ -36,7 +36,6 @@
 #include <linux/fs.h>
 #include <linux/timer.h>
 #include <linux/poll.h>
-#include <linux/byteorder/swabb.h>
 #include <linux/smp_lock.h>
 
 #include <linux/kernel.h>
@@ -52,6 +51,7 @@
 #include <linux/i2c.h>
 #include <linux/kthread.h>
 #include <asm/unaligned.h>
+#include <asm/byteorder.h>
 
 #include <asm/system.h>
 
diff --git a/drivers/media/video/cx18/cx18-driver.h b/drivers/media/video/cx18/cx18-driver.h
index fa8be0731a3f..a4b1708fafe7 100644
--- a/drivers/media/video/cx18/cx18-driver.h
+++ b/drivers/media/video/cx18/cx18-driver.h
@@ -41,6 +41,7 @@
 #include <linux/pagemap.h>
 #include <linux/workqueue.h>
 #include <linux/mutex.h>
+#include <asm/byteorder.h>
 
 #include <linux/dvb/video.h>
 #include <linux/dvb/audio.h>
diff --git a/drivers/media/video/ivtv/ivtv-driver.h b/drivers/media/video/ivtv/ivtv-driver.h
index bc29436e8a3c..3733b2afec5f 100644
--- a/drivers/media/video/ivtv/ivtv-driver.h
+++ b/drivers/media/video/ivtv/ivtv-driver.h
@@ -55,6 +55,7 @@
 #include <linux/mutex.h>
 #include <asm/uaccess.h>
 #include <asm/system.h>
+#include <asm/byteorder.h>
 
 #include <linux/dvb/video.h>
 #include <linux/dvb/audio.h>
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index ca4bbbe04aa4..59236e8b9daa 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -54,9 +54,9 @@
 #include <linux/cpu.h>
 #include <linux/random.h>
 #include <linux/delay.h>
-#include <linux/byteorder/swabb.h>
 #include <linux/cpumask.h>
 #include <linux/rcupreempt_trace.h>
+#include <asm/byteorder.h>
 
 /*
  * PREEMPT_RCU data structures.
-- 
GitLab


From fdd2e5f88a259a537bb239e0c03c973cb6ea402a Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Sat, 18 Oct 2008 20:28:38 -0700
Subject: [PATCH 817/895] make mm/rmap.c:anon_vma_cachep static

This patch makes the needlessly global anon_vma_cachep static.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rmap.h | 12 ------------
 mm/rmap.c            | 12 +++++++++++-
 2 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 1da48db8db09..89f0564b10c8 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -39,18 +39,6 @@ struct anon_vma {
 
 #ifdef CONFIG_MMU
 
-extern struct kmem_cache *anon_vma_cachep;
-
-static inline struct anon_vma *anon_vma_alloc(void)
-{
-	return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
-}
-
-static inline void anon_vma_free(struct anon_vma *anon_vma)
-{
-	kmem_cache_free(anon_vma_cachep, anon_vma);
-}
-
 static inline void anon_vma_lock(struct vm_area_struct *vma)
 {
 	struct anon_vma *anon_vma = vma->anon_vma;
diff --git a/mm/rmap.c b/mm/rmap.c
index 8701d5fce732..10993942d6c9 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -55,7 +55,17 @@
 
 #include "internal.h"
 
-struct kmem_cache *anon_vma_cachep;
+static struct kmem_cache *anon_vma_cachep;
+
+static inline struct anon_vma *anon_vma_alloc(void)
+{
+	return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
+}
+
+static inline void anon_vma_free(struct anon_vma *anon_vma)
+{
+	kmem_cache_free(anon_vma_cachep, anon_vma);
+}
 
 /**
  * anon_vma_prepare - attach an anon_vma to a memory region
-- 
GitLab


From ab527d75787c16114bcf2bc1b98bef18c67bb57c Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Sat, 18 Oct 2008 20:28:39 -0700
Subject: [PATCH 818/895] acpi: use bcd2bin/bin2bcd

Change ACPI to use the new bcd2bin/bin2bcd functions instead of the
obsolete BCD_TO_BIN/BIN_TO_BCD macros.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/acpi/sleep/proc.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/acpi/sleep/proc.c b/drivers/acpi/sleep/proc.c
index bf5b04de02d1..631ee2ee2ca0 100644
--- a/drivers/acpi/sleep/proc.c
+++ b/drivers/acpi/sleep/proc.c
@@ -120,13 +120,13 @@ static int acpi_system_alarm_seq_show(struct seq_file *seq, void *offset)
 	spin_unlock_irqrestore(&rtc_lock, flags);
 
 	if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-		BCD_TO_BIN(sec);
-		BCD_TO_BIN(min);
-		BCD_TO_BIN(hr);
-		BCD_TO_BIN(day);
-		BCD_TO_BIN(mo);
-		BCD_TO_BIN(yr);
-		BCD_TO_BIN(cent);
+		sec = bcd2bin(sec);
+		min = bcd2bin(min);
+		hr = bcd2bin(hr);
+		day = bcd2bin(day);
+		mo = bcd2bin(mo);
+		yr = bcd2bin(yr);
+		cent = bcd2bin(cent);
 	}
 
 	/* we're trusting the FADT (see above) */
@@ -204,7 +204,7 @@ static u32 cmos_bcd_read(int offset, int rtc_control)
 {
 	u32 val = CMOS_READ(offset);
 	if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-		BCD_TO_BIN(val);
+		val = bcd2bin(val);
 	return val;
 }
 
@@ -212,7 +212,7 @@ static u32 cmos_bcd_read(int offset, int rtc_control)
 static void cmos_bcd_write(u32 val, int offset, int rtc_control)
 {
 	if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-		BIN_TO_BCD(val);
+		val = bin2bcd(val);
 	CMOS_WRITE(val, offset);
 }
 
-- 
GitLab


From 18b1bd054991266d19413e155e371b5e25c98cb7 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Sat, 18 Oct 2008 20:28:39 -0700
Subject: [PATCH 819/895] alpha: use bcd2bin/bin2bcd

Change alpha to use the new bcd2bin/bin2bcd functions instead of the
obsolete BCD_TO_BIN/BIN_TO_BCD macros.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/kernel/core_marvel.c |  4 ++--
 arch/alpha/kernel/time.c        | 18 +++++++++---------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c
index 04dcc5e5d4c1..9cd8dca742a7 100644
--- a/arch/alpha/kernel/core_marvel.c
+++ b/arch/alpha/kernel/core_marvel.c
@@ -655,7 +655,7 @@ __marvel_rtc_io(u8 b, unsigned long addr, int write)
 
 	case 0x71:					/* RTC_PORT(1) */
 		rtc_access.index = index;
-		rtc_access.data = BCD_TO_BIN(b);
+		rtc_access.data = bcd2bin(b);
 		rtc_access.function = 0x48 + !write;	/* GET/PUT_TOY */
 
 #ifdef CONFIG_SMP
@@ -668,7 +668,7 @@ __marvel_rtc_io(u8 b, unsigned long addr, int write)
 #else
 		__marvel_access_rtc(&rtc_access);
 #endif
-		ret = BIN_TO_BCD(rtc_access.data);
+		ret = bin2bcd(rtc_access.data);
 		break;
 
 	default:
diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index 75480cab0893..e6a231435cba 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -346,12 +346,12 @@ time_init(void)
 	year = CMOS_READ(RTC_YEAR);
 
 	if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-		BCD_TO_BIN(sec);
-		BCD_TO_BIN(min);
-		BCD_TO_BIN(hour);
-		BCD_TO_BIN(day);
-		BCD_TO_BIN(mon);
-		BCD_TO_BIN(year);
+		sec = bcd2bin(sec);
+		min = bcd2bin(min);
+		hour = bcd2bin(hour);
+		day = bcd2bin(day);
+		mon = bcd2bin(mon);
+		year = bcd2bin(year);
 	}
 
 	/* PC-like is standard; used for year >= 70 */
@@ -525,7 +525,7 @@ set_rtc_mmss(unsigned long nowtime)
 
 	cmos_minutes = CMOS_READ(RTC_MINUTES);
 	if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-		BCD_TO_BIN(cmos_minutes);
+		cmos_minutes = bcd2bin(cmos_minutes);
 
 	/*
 	 * since we're only adjusting minutes and seconds,
@@ -543,8 +543,8 @@ set_rtc_mmss(unsigned long nowtime)
 
 	if (abs(real_minutes - cmos_minutes) < 30) {
 		if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-			BIN_TO_BCD(real_seconds);
-			BIN_TO_BCD(real_minutes);
+			real_seconds = bin2bcd(real_seconds);
+			real_minutes = bin2bcd(real_minutes);
 		}
 		CMOS_WRITE(real_seconds,RTC_SECONDS);
 		CMOS_WRITE(real_minutes,RTC_MINUTES);
-- 
GitLab


From 4110a0d6206bd175419cc5503f80cc296d184cbf Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Sat, 18 Oct 2008 20:28:40 -0700
Subject: [PATCH 820/895] cris: use bcd2bin/bin2bcd

Change cris to use the new bcd2bin/bin2bcd functions instead of the
obsolete BCD_TO_BIN/BIN_TO_BCD macros.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Cc: Chris Zankel <zankel@tensilica.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/cris/arch-v10/drivers/ds1302.c  | 24 ++++++++++++------------
 arch/cris/arch-v10/drivers/pcf8563.c | 24 ++++++++++++------------
 arch/cris/arch-v32/drivers/pcf8563.c | 24 ++++++++++++------------
 arch/cris/kernel/time.c              | 18 +++++++++---------
 4 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/arch/cris/arch-v10/drivers/ds1302.c b/arch/cris/arch-v10/drivers/ds1302.c
index c9aa3904be05..3bdfaf43390c 100644
--- a/arch/cris/arch-v10/drivers/ds1302.c
+++ b/arch/cris/arch-v10/drivers/ds1302.c
@@ -215,12 +215,12 @@ get_rtc_time(struct rtc_time *rtc_tm)
 
 	local_irq_restore(flags);
 	
-	BCD_TO_BIN(rtc_tm->tm_sec);
-	BCD_TO_BIN(rtc_tm->tm_min);
-	BCD_TO_BIN(rtc_tm->tm_hour);
-	BCD_TO_BIN(rtc_tm->tm_mday);
-	BCD_TO_BIN(rtc_tm->tm_mon);
-	BCD_TO_BIN(rtc_tm->tm_year);
+	rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
+	rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
+	rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
+	rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
+	rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
+	rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
 
 	/*
 	 * Account for differences between how the RTC uses the values
@@ -295,12 +295,12 @@ rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 			else
 				yrs -= 1900;	/* RTC (70, 71, ... 99) */
 
-			BIN_TO_BCD(sec);
-			BIN_TO_BCD(min);
-			BIN_TO_BCD(hrs);
-			BIN_TO_BCD(day);
-			BIN_TO_BCD(mon);
-			BIN_TO_BCD(yrs);
+			sec = bin2bcd(sec);
+			min = bin2bcd(min);
+			hrs = bin2bcd(hrs);
+			day = bin2bcd(day);
+			mon = bin2bcd(mon);
+			yrs = bin2bcd(yrs);
 
 			local_irq_save(flags);
 			CMOS_WRITE(yrs, RTC_YEAR);
diff --git a/arch/cris/arch-v10/drivers/pcf8563.c b/arch/cris/arch-v10/drivers/pcf8563.c
index 8769dc914073..1e90c1a9c849 100644
--- a/arch/cris/arch-v10/drivers/pcf8563.c
+++ b/arch/cris/arch-v10/drivers/pcf8563.c
@@ -122,7 +122,7 @@ get_rtc_time(struct rtc_time *tm)
 		       "information is no longer guaranteed!\n", PCF8563_NAME);
 	}
 
-	tm->tm_year  = BCD_TO_BIN(tm->tm_year) +
+	tm->tm_year  = bcd2bin(tm->tm_year) +
 		       ((tm->tm_mon & 0x80) ? 100 : 0);
 	tm->tm_sec  &= 0x7F;
 	tm->tm_min  &= 0x7F;
@@ -131,11 +131,11 @@ get_rtc_time(struct rtc_time *tm)
 	tm->tm_wday &= 0x07; /* Not coded in BCD. */
 	tm->tm_mon  &= 0x1F;
 
-	BCD_TO_BIN(tm->tm_sec);
-	BCD_TO_BIN(tm->tm_min);
-	BCD_TO_BIN(tm->tm_hour);
-	BCD_TO_BIN(tm->tm_mday);
-	BCD_TO_BIN(tm->tm_mon);
+	tm->tm_sec = bcd2bin(tm->tm_sec);
+	tm->tm_min = bcd2bin(tm->tm_min);
+	tm->tm_hour = bcd2bin(tm->tm_hour);
+	tm->tm_mday = bcd2bin(tm->tm_mday);
+	tm->tm_mon = bcd2bin(tm->tm_mon);
 	tm->tm_mon--; /* Month is 1..12 in RTC but 0..11 in linux */
 }
 
@@ -282,12 +282,12 @@ int pcf8563_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 		century = (tm.tm_year >= 2000) ? 0x80 : 0;
 		tm.tm_year = tm.tm_year % 100;
 
-		BIN_TO_BCD(tm.tm_year);
-		BIN_TO_BCD(tm.tm_mon);
-		BIN_TO_BCD(tm.tm_mday);
-		BIN_TO_BCD(tm.tm_hour);
-		BIN_TO_BCD(tm.tm_min);
-		BIN_TO_BCD(tm.tm_sec);
+		tm.tm_year = bin2bcd(tm.tm_year);
+		tm.tm_mon = bin2bcd(tm.tm_mon);
+		tm.tm_mday = bin2bcd(tm.tm_mday);
+		tm.tm_hour = bin2bcd(tm.tm_hour);
+		tm.tm_min = bin2bcd(tm.tm_min);
+		tm.tm_sec = bin2bcd(tm.tm_sec);
 		tm.tm_mon |= century;
 
 		mutex_lock(&rtc_lock);
diff --git a/arch/cris/arch-v32/drivers/pcf8563.c b/arch/cris/arch-v32/drivers/pcf8563.c
index f263ab571221..f4478506e52c 100644
--- a/arch/cris/arch-v32/drivers/pcf8563.c
+++ b/arch/cris/arch-v32/drivers/pcf8563.c
@@ -118,7 +118,7 @@ get_rtc_time(struct rtc_time *tm)
 		       "information is no longer guaranteed!\n", PCF8563_NAME);
 	}
 
-	tm->tm_year  = BCD_TO_BIN(tm->tm_year) +
+	tm->tm_year  = bcd2bin(tm->tm_year) +
 		       ((tm->tm_mon & 0x80) ? 100 : 0);
 	tm->tm_sec  &= 0x7F;
 	tm->tm_min  &= 0x7F;
@@ -127,11 +127,11 @@ get_rtc_time(struct rtc_time *tm)
 	tm->tm_wday &= 0x07; /* Not coded in BCD. */
 	tm->tm_mon  &= 0x1F;
 
-	BCD_TO_BIN(tm->tm_sec);
-	BCD_TO_BIN(tm->tm_min);
-	BCD_TO_BIN(tm->tm_hour);
-	BCD_TO_BIN(tm->tm_mday);
-	BCD_TO_BIN(tm->tm_mon);
+	tm->tm_sec = bcd2bin(tm->tm_sec);
+	tm->tm_min = bcd2bin(tm->tm_min);
+	tm->tm_hour = bcd2bin(tm->tm_hour);
+	tm->tm_mday = bcd2bin(tm->tm_mday);
+	tm->tm_mon = bcd2bin(tm->tm_mon);
 	tm->tm_mon--; /* Month is 1..12 in RTC but 0..11 in linux */
 }
 
@@ -279,12 +279,12 @@ int pcf8563_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 		century = (tm.tm_year >= 2000) ? 0x80 : 0;
 		tm.tm_year = tm.tm_year % 100;
 
-		BIN_TO_BCD(tm.tm_year);
-		BIN_TO_BCD(tm.tm_mon);
-		BIN_TO_BCD(tm.tm_mday);
-		BIN_TO_BCD(tm.tm_hour);
-		BIN_TO_BCD(tm.tm_min);
-		BIN_TO_BCD(tm.tm_sec);
+		tm.tm_year = bin2bcd(tm.tm_year);
+		tm.tm_mon = bin2bcd(tm.tm_mon);
+		tm.tm_mday = bin2bcd(tm.tm_mday);
+		tm.tm_hour = bin2bcd(tm.tm_hour);
+		tm.tm_min = bin2bcd(tm.tm_min);
+		tm.tm_sec = bin2bcd(tm.tm_sec);
 		tm.tm_mon |= century;
 
 		mutex_lock(&rtc_lock);
diff --git a/arch/cris/kernel/time.c b/arch/cris/kernel/time.c
index ff4c6aa75def..074fe7dea96b 100644
--- a/arch/cris/kernel/time.c
+++ b/arch/cris/kernel/time.c
@@ -127,7 +127,7 @@ int set_rtc_mmss(unsigned long nowtime)
 		return 0;
 
 	cmos_minutes = CMOS_READ(RTC_MINUTES);
-	BCD_TO_BIN(cmos_minutes);
+	cmos_minutes = bcd2bin(cmos_minutes);
 
 	/*
 	 * since we're only adjusting minutes and seconds,
@@ -142,8 +142,8 @@ int set_rtc_mmss(unsigned long nowtime)
 	real_minutes %= 60;
 
 	if (abs(real_minutes - cmos_minutes) < 30) {
-		BIN_TO_BCD(real_seconds);
-		BIN_TO_BCD(real_minutes);
+		real_seconds = bin2bcd(real_seconds);
+		real_minutes = bin2bcd(real_minutes);
 		CMOS_WRITE(real_seconds,RTC_SECONDS);
 		CMOS_WRITE(real_minutes,RTC_MINUTES);
 	} else {
@@ -170,12 +170,12 @@ get_cmos_time(void)
 	mon = CMOS_READ(RTC_MONTH);
 	year = CMOS_READ(RTC_YEAR);
 
-	BCD_TO_BIN(sec);
-	BCD_TO_BIN(min);
-	BCD_TO_BIN(hour);
-	BCD_TO_BIN(day);
-	BCD_TO_BIN(mon);
-	BCD_TO_BIN(year);
+	sec = bcd2bin(sec);
+	min = bcd2bin(min);
+	hour = bcd2bin(hour);
+	day = bcd2bin(day);
+	mon = bcd2bin(mon);
+	year = bcd2bin(year);
 
 	if ((year += 1900) < 1970)
 		year += 100;
-- 
GitLab


From fe20ba70abf7d6e5855c3dacc729490b3d0d077f Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Sat, 18 Oct 2008 20:28:41 -0700
Subject: [PATCH 821/895] drivers/rtc/: use bcd2bin/bin2bcd

Change drivers/rtc/ to use the new bcd2bin/bin2bcd functions instead of
the obsolete BCD_TO_BIN/BIN_TO_BCD/BCD2BIN/BIN2BCD macros.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Acked-by: Alessandro Zummo <a.zummo@towertech.it>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/rtc-at91rm9200.c | 42 +++++++++++++--------------
 drivers/rtc/rtc-cmos.c       | 20 ++++++-------
 drivers/rtc/rtc-ds1216.c     | 26 ++++++++---------
 drivers/rtc/rtc-ds1302.c     | 28 +++++++++---------
 drivers/rtc/rtc-ds1305.c     | 38 ++++++++++++------------
 drivers/rtc/rtc-ds1307.c     | 40 +++++++++++++-------------
 drivers/rtc/rtc-ds1511.c     | 42 +++++++++++++--------------
 drivers/rtc/rtc-ds1553.c     | 38 ++++++++++++------------
 drivers/rtc/rtc-ds1742.c     | 30 +++++++++----------
 drivers/rtc/rtc-fm3130.c     | 56 ++++++++++++++++++------------------
 drivers/rtc/rtc-isl1208.c    | 42 +++++++++++++--------------
 drivers/rtc/rtc-m41t80.c     | 44 ++++++++++++++--------------
 drivers/rtc/rtc-m41t94.c     | 28 +++++++++---------
 drivers/rtc/rtc-m48t59.c     | 48 +++++++++++++++----------------
 drivers/rtc/rtc-m48t86.c     | 28 +++++++++---------
 drivers/rtc/rtc-max6900.c    | 32 ++++++++++-----------
 drivers/rtc/rtc-max6902.c    | 32 ++++++++++-----------
 drivers/rtc/rtc-omap.c       | 24 ++++++++--------
 drivers/rtc/rtc-pcf8563.c    | 24 ++++++++--------
 drivers/rtc/rtc-pcf8583.c    | 20 ++++++-------
 drivers/rtc/rtc-r9701.c      | 24 ++++++++--------
 drivers/rtc/rtc-rs5c313.c    | 28 +++++++++---------
 drivers/rtc/rtc-rs5c348.c    | 30 +++++++++----------
 drivers/rtc/rtc-rs5c372.c    | 42 +++++++++++++--------------
 drivers/rtc/rtc-s35390a.c    | 34 +++++++++++-----------
 drivers/rtc/rtc-s3c.c        | 42 +++++++++++++--------------
 drivers/rtc/rtc-sh.c         | 40 +++++++++++++-------------
 drivers/rtc/rtc-stk17ta8.c   | 38 ++++++++++++------------
 drivers/rtc/rtc-v3020.c      | 28 +++++++++---------
 drivers/rtc/rtc-x1205.c      | 30 +++++++++----------
 30 files changed, 509 insertions(+), 509 deletions(-)

diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c
index 37082616482b..b5bf93706913 100644
--- a/drivers/rtc/rtc-at91rm9200.c
+++ b/drivers/rtc/rtc-at91rm9200.c
@@ -53,21 +53,21 @@ static void at91_rtc_decodetime(unsigned int timereg, unsigned int calreg,
 	} while ((time != at91_sys_read(timereg)) ||
 			(date != at91_sys_read(calreg)));
 
-	tm->tm_sec  = BCD2BIN((time & AT91_RTC_SEC) >> 0);
-	tm->tm_min  = BCD2BIN((time & AT91_RTC_MIN) >> 8);
-	tm->tm_hour = BCD2BIN((time & AT91_RTC_HOUR) >> 16);
+	tm->tm_sec  = bcd2bin((time & AT91_RTC_SEC) >> 0);
+	tm->tm_min  = bcd2bin((time & AT91_RTC_MIN) >> 8);
+	tm->tm_hour = bcd2bin((time & AT91_RTC_HOUR) >> 16);
 
 	/*
 	 * The Calendar Alarm register does not have a field for
 	 * the year - so these will return an invalid value.  When an
 	 * alarm is set, at91_alarm_year wille store the current year.
 	 */
-	tm->tm_year  = BCD2BIN(date & AT91_RTC_CENT) * 100;	/* century */
-	tm->tm_year += BCD2BIN((date & AT91_RTC_YEAR) >> 8);	/* year */
+	tm->tm_year  = bcd2bin(date & AT91_RTC_CENT) * 100;	/* century */
+	tm->tm_year += bcd2bin((date & AT91_RTC_YEAR) >> 8);	/* year */
 
-	tm->tm_wday = BCD2BIN((date & AT91_RTC_DAY) >> 21) - 1;	/* day of the week [0-6], Sunday=0 */
-	tm->tm_mon  = BCD2BIN((date & AT91_RTC_MONTH) >> 16) - 1;
-	tm->tm_mday = BCD2BIN((date & AT91_RTC_DATE) >> 24);
+	tm->tm_wday = bcd2bin((date & AT91_RTC_DAY) >> 21) - 1;	/* day of the week [0-6], Sunday=0 */
+	tm->tm_mon  = bcd2bin((date & AT91_RTC_MONTH) >> 16) - 1;
+	tm->tm_mday = bcd2bin((date & AT91_RTC_DATE) >> 24);
 }
 
 /*
@@ -106,16 +106,16 @@ static int at91_rtc_settime(struct device *dev, struct rtc_time *tm)
 	at91_sys_write(AT91_RTC_IDR, AT91_RTC_ACKUPD);
 
 	at91_sys_write(AT91_RTC_TIMR,
-			  BIN2BCD(tm->tm_sec) << 0
-			| BIN2BCD(tm->tm_min) << 8
-			| BIN2BCD(tm->tm_hour) << 16);
+			  bin2bcd(tm->tm_sec) << 0
+			| bin2bcd(tm->tm_min) << 8
+			| bin2bcd(tm->tm_hour) << 16);
 
 	at91_sys_write(AT91_RTC_CALR,
-			  BIN2BCD((tm->tm_year + 1900) / 100)	/* century */
-			| BIN2BCD(tm->tm_year % 100) << 8	/* year */
-			| BIN2BCD(tm->tm_mon + 1) << 16		/* tm_mon starts at zero */
-			| BIN2BCD(tm->tm_wday + 1) << 21	/* day of the week [0-6], Sunday=0 */
-			| BIN2BCD(tm->tm_mday) << 24);
+			  bin2bcd((tm->tm_year + 1900) / 100)	/* century */
+			| bin2bcd(tm->tm_year % 100) << 8	/* year */
+			| bin2bcd(tm->tm_mon + 1) << 16		/* tm_mon starts at zero */
+			| bin2bcd(tm->tm_wday + 1) << 21	/* day of the week [0-6], Sunday=0 */
+			| bin2bcd(tm->tm_mday) << 24);
 
 	/* Restart Time/Calendar */
 	cr = at91_sys_read(AT91_RTC_CR);
@@ -162,13 +162,13 @@ static int at91_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
 
 	at91_sys_write(AT91_RTC_IDR, AT91_RTC_ALARM);
 	at91_sys_write(AT91_RTC_TIMALR,
-		  BIN2BCD(tm.tm_sec) << 0
-		| BIN2BCD(tm.tm_min) << 8
-		| BIN2BCD(tm.tm_hour) << 16
+		  bin2bcd(tm.tm_sec) << 0
+		| bin2bcd(tm.tm_min) << 8
+		| bin2bcd(tm.tm_hour) << 16
 		| AT91_RTC_HOUREN | AT91_RTC_MINEN | AT91_RTC_SECEN);
 	at91_sys_write(AT91_RTC_CALALR,
-		  BIN2BCD(tm.tm_mon + 1) << 16		/* tm_mon starts at zero */
-		| BIN2BCD(tm.tm_mday) << 24
+		  bin2bcd(tm.tm_mon + 1) << 16		/* tm_mon starts at zero */
+		| bin2bcd(tm.tm_mday) << 24
 		| AT91_RTC_DATEEN | AT91_RTC_MTHEN);
 
 	if (alrm->enabled) {
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index f1695d7fa0fa..957365e4a746 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -240,26 +240,26 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t)
 	/* REVISIT this assumes PC style usage:  always BCD */
 
 	if (((unsigned)t->time.tm_sec) < 0x60)
-		t->time.tm_sec = BCD2BIN(t->time.tm_sec);
+		t->time.tm_sec = bcd2bin(t->time.tm_sec);
 	else
 		t->time.tm_sec = -1;
 	if (((unsigned)t->time.tm_min) < 0x60)
-		t->time.tm_min = BCD2BIN(t->time.tm_min);
+		t->time.tm_min = bcd2bin(t->time.tm_min);
 	else
 		t->time.tm_min = -1;
 	if (((unsigned)t->time.tm_hour) < 0x24)
-		t->time.tm_hour = BCD2BIN(t->time.tm_hour);
+		t->time.tm_hour = bcd2bin(t->time.tm_hour);
 	else
 		t->time.tm_hour = -1;
 
 	if (cmos->day_alrm) {
 		if (((unsigned)t->time.tm_mday) <= 0x31)
-			t->time.tm_mday = BCD2BIN(t->time.tm_mday);
+			t->time.tm_mday = bcd2bin(t->time.tm_mday);
 		else
 			t->time.tm_mday = -1;
 		if (cmos->mon_alrm) {
 			if (((unsigned)t->time.tm_mon) <= 0x12)
-				t->time.tm_mon = BCD2BIN(t->time.tm_mon) - 1;
+				t->time.tm_mon = bcd2bin(t->time.tm_mon) - 1;
 			else
 				t->time.tm_mon = -1;
 		}
@@ -331,19 +331,19 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 	/* Writing 0xff means "don't care" or "match all".  */
 
 	mon = t->time.tm_mon + 1;
-	mon = (mon <= 12) ? BIN2BCD(mon) : 0xff;
+	mon = (mon <= 12) ? bin2bcd(mon) : 0xff;
 
 	mday = t->time.tm_mday;
-	mday = (mday >= 1 && mday <= 31) ? BIN2BCD(mday) : 0xff;
+	mday = (mday >= 1 && mday <= 31) ? bin2bcd(mday) : 0xff;
 
 	hrs = t->time.tm_hour;
-	hrs = (hrs < 24) ? BIN2BCD(hrs) : 0xff;
+	hrs = (hrs < 24) ? bin2bcd(hrs) : 0xff;
 
 	min = t->time.tm_min;
-	min = (min < 60) ? BIN2BCD(min) : 0xff;
+	min = (min < 60) ? bin2bcd(min) : 0xff;
 
 	sec = t->time.tm_sec;
-	sec = (sec < 60) ? BIN2BCD(sec) : 0xff;
+	sec = (sec < 60) ? bin2bcd(sec) : 0xff;
 
 	spin_lock_irq(&rtc_lock);
 
diff --git a/drivers/rtc/rtc-ds1216.c b/drivers/rtc/rtc-ds1216.c
index 0b17770b032b..9a234a4ec06d 100644
--- a/drivers/rtc/rtc-ds1216.c
+++ b/drivers/rtc/rtc-ds1216.c
@@ -86,19 +86,19 @@ static int ds1216_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	ds1216_switch_ds_to_clock(priv->ioaddr);
 	ds1216_read(priv->ioaddr, (u8 *)&regs);
 
-	tm->tm_sec = BCD2BIN(regs.sec);
-	tm->tm_min = BCD2BIN(regs.min);
+	tm->tm_sec = bcd2bin(regs.sec);
+	tm->tm_min = bcd2bin(regs.min);
 	if (regs.hour & DS1216_HOUR_1224) {
 		/* AM/PM mode */
-		tm->tm_hour = BCD2BIN(regs.hour & 0x1f);
+		tm->tm_hour = bcd2bin(regs.hour & 0x1f);
 		if (regs.hour & DS1216_HOUR_AMPM)
 			tm->tm_hour += 12;
 	} else
-		tm->tm_hour = BCD2BIN(regs.hour & 0x3f);
+		tm->tm_hour = bcd2bin(regs.hour & 0x3f);
 	tm->tm_wday = (regs.wday & 7) - 1;
-	tm->tm_mday = BCD2BIN(regs.mday & 0x3f);
-	tm->tm_mon = BCD2BIN(regs.month & 0x1f);
-	tm->tm_year = BCD2BIN(regs.year);
+	tm->tm_mday = bcd2bin(regs.mday & 0x3f);
+	tm->tm_mon = bcd2bin(regs.month & 0x1f);
+	tm->tm_year = bcd2bin(regs.year);
 	if (tm->tm_year < 70)
 		tm->tm_year += 100;
 	return 0;
@@ -114,19 +114,19 @@ static int ds1216_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	ds1216_read(priv->ioaddr, (u8 *)&regs);
 
 	regs.tsec = 0; /* clear 0.1 and 0.01 seconds */
-	regs.sec = BIN2BCD(tm->tm_sec);
-	regs.min = BIN2BCD(tm->tm_min);
+	regs.sec = bin2bcd(tm->tm_sec);
+	regs.min = bin2bcd(tm->tm_min);
 	regs.hour &= DS1216_HOUR_1224;
 	if (regs.hour && tm->tm_hour > 12) {
 		regs.hour |= DS1216_HOUR_AMPM;
 		tm->tm_hour -= 12;
 	}
-	regs.hour |= BIN2BCD(tm->tm_hour);
+	regs.hour |= bin2bcd(tm->tm_hour);
 	regs.wday &= ~7;
 	regs.wday |= tm->tm_wday;
-	regs.mday = BIN2BCD(tm->tm_mday);
-	regs.month = BIN2BCD(tm->tm_mon);
-	regs.year = BIN2BCD(tm->tm_year % 100);
+	regs.mday = bin2bcd(tm->tm_mday);
+	regs.month = bin2bcd(tm->tm_mon);
+	regs.year = bin2bcd(tm->tm_year % 100);
 
 	ds1216_switch_ds_to_clock(priv->ioaddr);
 	ds1216_write(priv->ioaddr, (u8 *)&regs);
diff --git a/drivers/rtc/rtc-ds1302.c b/drivers/rtc/rtc-ds1302.c
index b9397818f73a..16bdba6a6b0c 100644
--- a/drivers/rtc/rtc-ds1302.c
+++ b/drivers/rtc/rtc-ds1302.c
@@ -107,13 +107,13 @@ static int ds1302_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
 	spin_lock_irq(&rtc->lock);
 
-	tm->tm_sec	= BCD2BIN(ds1302_readbyte(RTC_ADDR_SEC));
-	tm->tm_min	= BCD2BIN(ds1302_readbyte(RTC_ADDR_MIN));
-	tm->tm_hour	= BCD2BIN(ds1302_readbyte(RTC_ADDR_HOUR));
-	tm->tm_wday	= BCD2BIN(ds1302_readbyte(RTC_ADDR_DAY));
-	tm->tm_mday	= BCD2BIN(ds1302_readbyte(RTC_ADDR_DATE));
-	tm->tm_mon	= BCD2BIN(ds1302_readbyte(RTC_ADDR_MON)) - 1;
-	tm->tm_year	= BCD2BIN(ds1302_readbyte(RTC_ADDR_YEAR));
+	tm->tm_sec	= bcd2bin(ds1302_readbyte(RTC_ADDR_SEC));
+	tm->tm_min	= bcd2bin(ds1302_readbyte(RTC_ADDR_MIN));
+	tm->tm_hour	= bcd2bin(ds1302_readbyte(RTC_ADDR_HOUR));
+	tm->tm_wday	= bcd2bin(ds1302_readbyte(RTC_ADDR_DAY));
+	tm->tm_mday	= bcd2bin(ds1302_readbyte(RTC_ADDR_DATE));
+	tm->tm_mon	= bcd2bin(ds1302_readbyte(RTC_ADDR_MON)) - 1;
+	tm->tm_year	= bcd2bin(ds1302_readbyte(RTC_ADDR_YEAR));
 
 	if (tm->tm_year < 70)
 		tm->tm_year += 100;
@@ -141,13 +141,13 @@ static int ds1302_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	/* Stop RTC */
 	ds1302_writebyte(RTC_ADDR_SEC, ds1302_readbyte(RTC_ADDR_SEC) | 0x80);
 
-	ds1302_writebyte(RTC_ADDR_SEC, BIN2BCD(tm->tm_sec));
-	ds1302_writebyte(RTC_ADDR_MIN, BIN2BCD(tm->tm_min));
-	ds1302_writebyte(RTC_ADDR_HOUR, BIN2BCD(tm->tm_hour));
-	ds1302_writebyte(RTC_ADDR_DAY, BIN2BCD(tm->tm_wday));
-	ds1302_writebyte(RTC_ADDR_DATE, BIN2BCD(tm->tm_mday));
-	ds1302_writebyte(RTC_ADDR_MON, BIN2BCD(tm->tm_mon + 1));
-	ds1302_writebyte(RTC_ADDR_YEAR, BIN2BCD(tm->tm_year % 100));
+	ds1302_writebyte(RTC_ADDR_SEC, bin2bcd(tm->tm_sec));
+	ds1302_writebyte(RTC_ADDR_MIN, bin2bcd(tm->tm_min));
+	ds1302_writebyte(RTC_ADDR_HOUR, bin2bcd(tm->tm_hour));
+	ds1302_writebyte(RTC_ADDR_DAY, bin2bcd(tm->tm_wday));
+	ds1302_writebyte(RTC_ADDR_DATE, bin2bcd(tm->tm_mday));
+	ds1302_writebyte(RTC_ADDR_MON, bin2bcd(tm->tm_mon + 1));
+	ds1302_writebyte(RTC_ADDR_YEAR, bin2bcd(tm->tm_year % 100));
 
 	/* Start RTC */
 	ds1302_writebyte(RTC_ADDR_SEC, ds1302_readbyte(RTC_ADDR_SEC) & ~0x80);
diff --git a/drivers/rtc/rtc-ds1305.c b/drivers/rtc/rtc-ds1305.c
index b91d02a3ace9..57b470f3fc0b 100644
--- a/drivers/rtc/rtc-ds1305.c
+++ b/drivers/rtc/rtc-ds1305.c
@@ -114,10 +114,10 @@ static unsigned bcd2hour(u8 bcd)
 			hour = 12;
 			bcd &= ~DS1305_HR_PM;
 		}
-		hour += BCD2BIN(bcd);
+		hour += bcd2bin(bcd);
 		return hour - 1;
 	}
-	return BCD2BIN(bcd);
+	return bcd2bin(bcd);
 }
 
 static u8 hour2bcd(bool hr12, int hour)
@@ -125,11 +125,11 @@ static u8 hour2bcd(bool hr12, int hour)
 	if (hr12) {
 		hour++;
 		if (hour <= 12)
-			return DS1305_HR_12 | BIN2BCD(hour);
+			return DS1305_HR_12 | bin2bcd(hour);
 		hour -= 12;
-		return DS1305_HR_12 | DS1305_HR_PM | BIN2BCD(hour);
+		return DS1305_HR_12 | DS1305_HR_PM | bin2bcd(hour);
 	}
-	return BIN2BCD(hour);
+	return bin2bcd(hour);
 }
 
 /*----------------------------------------------------------------------*/
@@ -206,13 +206,13 @@ static int ds1305_get_time(struct device *dev, struct rtc_time *time)
 		buf[4], buf[5], buf[6]);
 
 	/* Decode the registers */
-	time->tm_sec = BCD2BIN(buf[DS1305_SEC]);
-	time->tm_min = BCD2BIN(buf[DS1305_MIN]);
+	time->tm_sec = bcd2bin(buf[DS1305_SEC]);
+	time->tm_min = bcd2bin(buf[DS1305_MIN]);
 	time->tm_hour = bcd2hour(buf[DS1305_HOUR]);
 	time->tm_wday = buf[DS1305_WDAY] - 1;
-	time->tm_mday = BCD2BIN(buf[DS1305_MDAY]);
-	time->tm_mon = BCD2BIN(buf[DS1305_MON]) - 1;
-	time->tm_year = BCD2BIN(buf[DS1305_YEAR]) + 100;
+	time->tm_mday = bcd2bin(buf[DS1305_MDAY]);
+	time->tm_mon = bcd2bin(buf[DS1305_MON]) - 1;
+	time->tm_year = bcd2bin(buf[DS1305_YEAR]) + 100;
 
 	dev_vdbg(dev, "%s secs=%d, mins=%d, "
 		"hours=%d, mday=%d, mon=%d, year=%d, wday=%d\n",
@@ -239,13 +239,13 @@ static int ds1305_set_time(struct device *dev, struct rtc_time *time)
 	/* Write registers starting at the first time/date address. */
 	*bp++ = DS1305_WRITE | DS1305_SEC;
 
-	*bp++ = BIN2BCD(time->tm_sec);
-	*bp++ = BIN2BCD(time->tm_min);
+	*bp++ = bin2bcd(time->tm_sec);
+	*bp++ = bin2bcd(time->tm_min);
 	*bp++ = hour2bcd(ds1305->hr12, time->tm_hour);
 	*bp++ = (time->tm_wday < 7) ? (time->tm_wday + 1) : 1;
-	*bp++ = BIN2BCD(time->tm_mday);
-	*bp++ = BIN2BCD(time->tm_mon + 1);
-	*bp++ = BIN2BCD(time->tm_year - 100);
+	*bp++ = bin2bcd(time->tm_mday);
+	*bp++ = bin2bcd(time->tm_mon + 1);
+	*bp++ = bin2bcd(time->tm_year - 100);
 
 	dev_dbg(dev, "%s: %02x %02x %02x, %02x %02x %02x %02x\n",
 		"write", buf[1], buf[2], buf[3],
@@ -329,8 +329,8 @@ static int ds1305_get_alarm(struct device *dev, struct rtc_wkalrm *alm)
 	 * fill in the rest ... and also handle rollover to tomorrow when
 	 * that's needed.
 	 */
-	alm->time.tm_sec = BCD2BIN(buf[DS1305_SEC]);
-	alm->time.tm_min = BCD2BIN(buf[DS1305_MIN]);
+	alm->time.tm_sec = bcd2bin(buf[DS1305_SEC]);
+	alm->time.tm_min = bcd2bin(buf[DS1305_MIN]);
 	alm->time.tm_hour = bcd2hour(buf[DS1305_HOUR]);
 	alm->time.tm_mday = -1;
 	alm->time.tm_mon = -1;
@@ -387,8 +387,8 @@ static int ds1305_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
 
 	/* write alarm */
 	buf[0] = DS1305_WRITE | DS1305_ALM0(DS1305_SEC);
-	buf[1 + DS1305_SEC] = BIN2BCD(alm->time.tm_sec);
-	buf[1 + DS1305_MIN] = BIN2BCD(alm->time.tm_min);
+	buf[1 + DS1305_SEC] = bin2bcd(alm->time.tm_sec);
+	buf[1 + DS1305_MIN] = bin2bcd(alm->time.tm_min);
 	buf[1 + DS1305_HOUR] = hour2bcd(ds1305->hr12, alm->time.tm_hour);
 	buf[1 + DS1305_WDAY] = DS1305_ALM_DISABLE;
 
diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index 4fcf0734a6ef..cad23bcfebd4 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -222,17 +222,17 @@ static int ds1307_get_time(struct device *dev, struct rtc_time *t)
 			ds1307->regs[4], ds1307->regs[5],
 			ds1307->regs[6]);
 
-	t->tm_sec = BCD2BIN(ds1307->regs[DS1307_REG_SECS] & 0x7f);
-	t->tm_min = BCD2BIN(ds1307->regs[DS1307_REG_MIN] & 0x7f);
+	t->tm_sec = bcd2bin(ds1307->regs[DS1307_REG_SECS] & 0x7f);
+	t->tm_min = bcd2bin(ds1307->regs[DS1307_REG_MIN] & 0x7f);
 	tmp = ds1307->regs[DS1307_REG_HOUR] & 0x3f;
-	t->tm_hour = BCD2BIN(tmp);
-	t->tm_wday = BCD2BIN(ds1307->regs[DS1307_REG_WDAY] & 0x07) - 1;
-	t->tm_mday = BCD2BIN(ds1307->regs[DS1307_REG_MDAY] & 0x3f);
+	t->tm_hour = bcd2bin(tmp);
+	t->tm_wday = bcd2bin(ds1307->regs[DS1307_REG_WDAY] & 0x07) - 1;
+	t->tm_mday = bcd2bin(ds1307->regs[DS1307_REG_MDAY] & 0x3f);
 	tmp = ds1307->regs[DS1307_REG_MONTH] & 0x1f;
-	t->tm_mon = BCD2BIN(tmp) - 1;
+	t->tm_mon = bcd2bin(tmp) - 1;
 
 	/* assume 20YY not 19YY, and ignore DS1337_BIT_CENTURY */
-	t->tm_year = BCD2BIN(ds1307->regs[DS1307_REG_YEAR]) + 100;
+	t->tm_year = bcd2bin(ds1307->regs[DS1307_REG_YEAR]) + 100;
 
 	dev_dbg(dev, "%s secs=%d, mins=%d, "
 		"hours=%d, mday=%d, mon=%d, year=%d, wday=%d\n",
@@ -258,16 +258,16 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t)
 		t->tm_mon, t->tm_year, t->tm_wday);
 
 	*buf++ = 0;		/* first register addr */
-	buf[DS1307_REG_SECS] = BIN2BCD(t->tm_sec);
-	buf[DS1307_REG_MIN] = BIN2BCD(t->tm_min);
-	buf[DS1307_REG_HOUR] = BIN2BCD(t->tm_hour);
-	buf[DS1307_REG_WDAY] = BIN2BCD(t->tm_wday + 1);
-	buf[DS1307_REG_MDAY] = BIN2BCD(t->tm_mday);
-	buf[DS1307_REG_MONTH] = BIN2BCD(t->tm_mon + 1);
+	buf[DS1307_REG_SECS] = bin2bcd(t->tm_sec);
+	buf[DS1307_REG_MIN] = bin2bcd(t->tm_min);
+	buf[DS1307_REG_HOUR] = bin2bcd(t->tm_hour);
+	buf[DS1307_REG_WDAY] = bin2bcd(t->tm_wday + 1);
+	buf[DS1307_REG_MDAY] = bin2bcd(t->tm_mday);
+	buf[DS1307_REG_MONTH] = bin2bcd(t->tm_mon + 1);
 
 	/* assume 20YY not 19YY */
 	tmp = t->tm_year - 100;
-	buf[DS1307_REG_YEAR] = BIN2BCD(tmp);
+	buf[DS1307_REG_YEAR] = bin2bcd(tmp);
 
 	switch (ds1307->type) {
 	case ds_1337:
@@ -709,18 +709,18 @@ read_rtc:
 	}
 
 	tmp = ds1307->regs[DS1307_REG_SECS];
-	tmp = BCD2BIN(tmp & 0x7f);
+	tmp = bcd2bin(tmp & 0x7f);
 	if (tmp > 60)
 		goto exit_bad;
-	tmp = BCD2BIN(ds1307->regs[DS1307_REG_MIN] & 0x7f);
+	tmp = bcd2bin(ds1307->regs[DS1307_REG_MIN] & 0x7f);
 	if (tmp > 60)
 		goto exit_bad;
 
-	tmp = BCD2BIN(ds1307->regs[DS1307_REG_MDAY] & 0x3f);
+	tmp = bcd2bin(ds1307->regs[DS1307_REG_MDAY] & 0x3f);
 	if (tmp == 0 || tmp > 31)
 		goto exit_bad;
 
-	tmp = BCD2BIN(ds1307->regs[DS1307_REG_MONTH] & 0x1f);
+	tmp = bcd2bin(ds1307->regs[DS1307_REG_MONTH] & 0x1f);
 	if (tmp == 0 || tmp > 12)
 		goto exit_bad;
 
@@ -739,14 +739,14 @@ read_rtc:
 		/* Be sure we're in 24 hour mode.  Multi-master systems
 		 * take note...
 		 */
-		tmp = BCD2BIN(tmp & 0x1f);
+		tmp = bcd2bin(tmp & 0x1f);
 		if (tmp == 12)
 			tmp = 0;
 		if (ds1307->regs[DS1307_REG_HOUR] & DS1307_BIT_PM)
 			tmp += 12;
 		i2c_smbus_write_byte_data(client,
 				DS1307_REG_HOUR,
-				BIN2BCD(tmp));
+				bin2bcd(tmp));
 	}
 
 	ds1307->rtc = rtc_device_register(client->name, &client->dev,
diff --git a/drivers/rtc/rtc-ds1511.c b/drivers/rtc/rtc-ds1511.c
index 86981d34fbb6..7bb0a962ad21 100644
--- a/drivers/rtc/rtc-ds1511.c
+++ b/drivers/rtc/rtc-ds1511.c
@@ -153,8 +153,8 @@ ds1511_wdog_set(unsigned long deciseconds)
 	/*
 	 * set the wdog values in the wdog registers
 	 */
-	rtc_write(BIN2BCD(deciseconds % 100), DS1511_WD_MSEC);
-	rtc_write(BIN2BCD(deciseconds / 100), DS1511_WD_SEC);
+	rtc_write(bin2bcd(deciseconds % 100), DS1511_WD_MSEC);
+	rtc_write(bin2bcd(deciseconds / 100), DS1511_WD_SEC);
 	/*
 	 * set wdog enable and wdog 'steering' bit to issue a reset
 	 */
@@ -220,13 +220,13 @@ static int ds1511_rtc_set_time(struct device *dev, struct rtc_time *rtc_tm)
 	/*
 	 * each register is a different number of valid bits
 	 */
-	sec = BIN2BCD(sec) & 0x7f;
-	min = BIN2BCD(min) & 0x7f;
-	hrs = BIN2BCD(hrs) & 0x3f;
-	day = BIN2BCD(day) & 0x3f;
-	mon = BIN2BCD(mon) & 0x1f;
-	yrs = BIN2BCD(yrs) & 0xff;
-	cen = BIN2BCD(cen) & 0xff;
+	sec = bin2bcd(sec) & 0x7f;
+	min = bin2bcd(min) & 0x7f;
+	hrs = bin2bcd(hrs) & 0x3f;
+	day = bin2bcd(day) & 0x3f;
+	mon = bin2bcd(mon) & 0x1f;
+	yrs = bin2bcd(yrs) & 0xff;
+	cen = bin2bcd(cen) & 0xff;
 
 	spin_lock_irqsave(&ds1511_lock, flags);
 	rtc_disable_update();
@@ -264,14 +264,14 @@ static int ds1511_rtc_read_time(struct device *dev, struct rtc_time *rtc_tm)
 	rtc_enable_update();
 	spin_unlock_irqrestore(&ds1511_lock, flags);
 
-	rtc_tm->tm_sec = BCD2BIN(rtc_tm->tm_sec);
-	rtc_tm->tm_min = BCD2BIN(rtc_tm->tm_min);
-	rtc_tm->tm_hour = BCD2BIN(rtc_tm->tm_hour);
-	rtc_tm->tm_mday = BCD2BIN(rtc_tm->tm_mday);
-	rtc_tm->tm_wday = BCD2BIN(rtc_tm->tm_wday);
-	rtc_tm->tm_mon = BCD2BIN(rtc_tm->tm_mon);
-	rtc_tm->tm_year = BCD2BIN(rtc_tm->tm_year);
-	century = BCD2BIN(century) * 100;
+	rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
+	rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
+	rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
+	rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
+	rtc_tm->tm_wday = bcd2bin(rtc_tm->tm_wday);
+	rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
+	rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
+	century = bcd2bin(century) * 100;
 
 	/*
 	 * Account for differences between how the RTC uses the values
@@ -304,16 +304,16 @@ ds1511_rtc_update_alarm(struct rtc_plat_data *pdata)
 
 	spin_lock_irqsave(&pdata->rtc->irq_lock, flags);
 	rtc_write(pdata->alrm_mday < 0 || (pdata->irqen & RTC_UF) ?
-	       0x80 : BIN2BCD(pdata->alrm_mday) & 0x3f,
+	       0x80 : bin2bcd(pdata->alrm_mday) & 0x3f,
 	       RTC_ALARM_DATE);
 	rtc_write(pdata->alrm_hour < 0 || (pdata->irqen & RTC_UF) ?
-	       0x80 : BIN2BCD(pdata->alrm_hour) & 0x3f,
+	       0x80 : bin2bcd(pdata->alrm_hour) & 0x3f,
 	       RTC_ALARM_HOUR);
 	rtc_write(pdata->alrm_min < 0 || (pdata->irqen & RTC_UF) ?
-	       0x80 : BIN2BCD(pdata->alrm_min) & 0x7f,
+	       0x80 : bin2bcd(pdata->alrm_min) & 0x7f,
 	       RTC_ALARM_MIN);
 	rtc_write(pdata->alrm_sec < 0 || (pdata->irqen & RTC_UF) ?
-	       0x80 : BIN2BCD(pdata->alrm_sec) & 0x7f,
+	       0x80 : bin2bcd(pdata->alrm_sec) & 0x7f,
 	       RTC_ALARM_SEC);
 	rtc_write(rtc_read(RTC_CMD) | (pdata->irqen ? RTC_TIE : 0), RTC_CMD);
 	rtc_read(RTC_CMD1);	/* clear interrupts */
diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c
index 4ef59285b489..b9475cd20210 100644
--- a/drivers/rtc/rtc-ds1553.c
+++ b/drivers/rtc/rtc-ds1553.c
@@ -78,17 +78,17 @@ static int ds1553_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	void __iomem *ioaddr = pdata->ioaddr;
 	u8 century;
 
-	century = BIN2BCD((tm->tm_year + 1900) / 100);
+	century = bin2bcd((tm->tm_year + 1900) / 100);
 
 	writeb(RTC_WRITE, pdata->ioaddr + RTC_CONTROL);
 
-	writeb(BIN2BCD(tm->tm_year % 100), ioaddr + RTC_YEAR);
-	writeb(BIN2BCD(tm->tm_mon + 1), ioaddr + RTC_MONTH);
-	writeb(BIN2BCD(tm->tm_wday) & RTC_DAY_MASK, ioaddr + RTC_DAY);
-	writeb(BIN2BCD(tm->tm_mday), ioaddr + RTC_DATE);
-	writeb(BIN2BCD(tm->tm_hour), ioaddr + RTC_HOURS);
-	writeb(BIN2BCD(tm->tm_min), ioaddr + RTC_MINUTES);
-	writeb(BIN2BCD(tm->tm_sec) & RTC_SECONDS_MASK, ioaddr + RTC_SECONDS);
+	writeb(bin2bcd(tm->tm_year % 100), ioaddr + RTC_YEAR);
+	writeb(bin2bcd(tm->tm_mon + 1), ioaddr + RTC_MONTH);
+	writeb(bin2bcd(tm->tm_wday) & RTC_DAY_MASK, ioaddr + RTC_DAY);
+	writeb(bin2bcd(tm->tm_mday), ioaddr + RTC_DATE);
+	writeb(bin2bcd(tm->tm_hour), ioaddr + RTC_HOURS);
+	writeb(bin2bcd(tm->tm_min), ioaddr + RTC_MINUTES);
+	writeb(bin2bcd(tm->tm_sec) & RTC_SECONDS_MASK, ioaddr + RTC_SECONDS);
 
 	/* RTC_CENTURY and RTC_CONTROL share same register */
 	writeb(RTC_WRITE | (century & RTC_CENTURY_MASK), ioaddr + RTC_CENTURY);
@@ -118,14 +118,14 @@ static int ds1553_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	year = readb(ioaddr + RTC_YEAR);
 	century = readb(ioaddr + RTC_CENTURY) & RTC_CENTURY_MASK;
 	writeb(0, ioaddr + RTC_CONTROL);
-	tm->tm_sec = BCD2BIN(second);
-	tm->tm_min = BCD2BIN(minute);
-	tm->tm_hour = BCD2BIN(hour);
-	tm->tm_mday = BCD2BIN(day);
-	tm->tm_wday = BCD2BIN(week);
-	tm->tm_mon = BCD2BIN(month) - 1;
+	tm->tm_sec = bcd2bin(second);
+	tm->tm_min = bcd2bin(minute);
+	tm->tm_hour = bcd2bin(hour);
+	tm->tm_mday = bcd2bin(day);
+	tm->tm_wday = bcd2bin(week);
+	tm->tm_mon = bcd2bin(month) - 1;
 	/* year is 1900 + tm->tm_year */
-	tm->tm_year = BCD2BIN(year) + BCD2BIN(century) * 100 - 1900;
+	tm->tm_year = bcd2bin(year) + bcd2bin(century) * 100 - 1900;
 
 	if (rtc_valid_tm(tm) < 0) {
 		dev_err(dev, "retrieved date/time is not valid.\n");
@@ -141,16 +141,16 @@ static void ds1553_rtc_update_alarm(struct rtc_plat_data *pdata)
 
 	spin_lock_irqsave(&pdata->rtc->irq_lock, flags);
 	writeb(pdata->alrm_mday < 0 || (pdata->irqen & RTC_UF) ?
-	       0x80 : BIN2BCD(pdata->alrm_mday),
+	       0x80 : bin2bcd(pdata->alrm_mday),
 	       ioaddr + RTC_DATE_ALARM);
 	writeb(pdata->alrm_hour < 0 || (pdata->irqen & RTC_UF) ?
-	       0x80 : BIN2BCD(pdata->alrm_hour),
+	       0x80 : bin2bcd(pdata->alrm_hour),
 	       ioaddr + RTC_HOURS_ALARM);
 	writeb(pdata->alrm_min < 0 || (pdata->irqen & RTC_UF) ?
-	       0x80 : BIN2BCD(pdata->alrm_min),
+	       0x80 : bin2bcd(pdata->alrm_min),
 	       ioaddr + RTC_MINUTES_ALARM);
 	writeb(pdata->alrm_sec < 0 || (pdata->irqen & RTC_UF) ?
-	       0x80 : BIN2BCD(pdata->alrm_sec),
+	       0x80 : bin2bcd(pdata->alrm_sec),
 	       ioaddr + RTC_SECONDS_ALARM);
 	writeb(pdata->irqen ? RTC_INTS_AE : 0, ioaddr + RTC_INTERRUPTS);
 	readb(ioaddr + RTC_FLAGS);	/* clear interrupts */
diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c
index 24d35ede2dbf..8bc8501bffc8 100644
--- a/drivers/rtc/rtc-ds1742.c
+++ b/drivers/rtc/rtc-ds1742.c
@@ -66,17 +66,17 @@ static int ds1742_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	void __iomem *ioaddr = pdata->ioaddr_rtc;
 	u8 century;
 
-	century = BIN2BCD((tm->tm_year + 1900) / 100);
+	century = bin2bcd((tm->tm_year + 1900) / 100);
 
 	writeb(RTC_WRITE, ioaddr + RTC_CONTROL);
 
-	writeb(BIN2BCD(tm->tm_year % 100), ioaddr + RTC_YEAR);
-	writeb(BIN2BCD(tm->tm_mon + 1), ioaddr + RTC_MONTH);
-	writeb(BIN2BCD(tm->tm_wday) & RTC_DAY_MASK, ioaddr + RTC_DAY);
-	writeb(BIN2BCD(tm->tm_mday), ioaddr + RTC_DATE);
-	writeb(BIN2BCD(tm->tm_hour), ioaddr + RTC_HOURS);
-	writeb(BIN2BCD(tm->tm_min), ioaddr + RTC_MINUTES);
-	writeb(BIN2BCD(tm->tm_sec) & RTC_SECONDS_MASK, ioaddr + RTC_SECONDS);
+	writeb(bin2bcd(tm->tm_year % 100), ioaddr + RTC_YEAR);
+	writeb(bin2bcd(tm->tm_mon + 1), ioaddr + RTC_MONTH);
+	writeb(bin2bcd(tm->tm_wday) & RTC_DAY_MASK, ioaddr + RTC_DAY);
+	writeb(bin2bcd(tm->tm_mday), ioaddr + RTC_DATE);
+	writeb(bin2bcd(tm->tm_hour), ioaddr + RTC_HOURS);
+	writeb(bin2bcd(tm->tm_min), ioaddr + RTC_MINUTES);
+	writeb(bin2bcd(tm->tm_sec) & RTC_SECONDS_MASK, ioaddr + RTC_SECONDS);
 
 	/* RTC_CENTURY and RTC_CONTROL share same register */
 	writeb(RTC_WRITE | (century & RTC_CENTURY_MASK), ioaddr + RTC_CENTURY);
@@ -106,14 +106,14 @@ static int ds1742_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	year = readb(ioaddr + RTC_YEAR);
 	century = readb(ioaddr + RTC_CENTURY) & RTC_CENTURY_MASK;
 	writeb(0, ioaddr + RTC_CONTROL);
-	tm->tm_sec = BCD2BIN(second);
-	tm->tm_min = BCD2BIN(minute);
-	tm->tm_hour = BCD2BIN(hour);
-	tm->tm_mday = BCD2BIN(day);
-	tm->tm_wday = BCD2BIN(week);
-	tm->tm_mon = BCD2BIN(month) - 1;
+	tm->tm_sec = bcd2bin(second);
+	tm->tm_min = bcd2bin(minute);
+	tm->tm_hour = bcd2bin(hour);
+	tm->tm_mday = bcd2bin(day);
+	tm->tm_wday = bcd2bin(week);
+	tm->tm_mon = bcd2bin(month) - 1;
 	/* year is 1900 + tm->tm_year */
-	tm->tm_year = BCD2BIN(year) + BCD2BIN(century) * 100 - 1900;
+	tm->tm_year = bcd2bin(year) + bcd2bin(century) * 100 - 1900;
 
 	if (rtc_valid_tm(tm) < 0) {
 		dev_err(dev, "retrieved date/time is not valid.\n");
diff --git a/drivers/rtc/rtc-fm3130.c b/drivers/rtc/rtc-fm3130.c
index abfdfcbaa059..3a7be11cc6b9 100644
--- a/drivers/rtc/rtc-fm3130.c
+++ b/drivers/rtc/rtc-fm3130.c
@@ -131,17 +131,17 @@ static int fm3130_get_time(struct device *dev, struct rtc_time *t)
 			fm3130->regs[0xc], fm3130->regs[0xd],
 			fm3130->regs[0xe]);
 
-	t->tm_sec = BCD2BIN(fm3130->regs[FM3130_RTC_SECONDS] & 0x7f);
-	t->tm_min = BCD2BIN(fm3130->regs[FM3130_RTC_MINUTES] & 0x7f);
+	t->tm_sec = bcd2bin(fm3130->regs[FM3130_RTC_SECONDS] & 0x7f);
+	t->tm_min = bcd2bin(fm3130->regs[FM3130_RTC_MINUTES] & 0x7f);
 	tmp = fm3130->regs[FM3130_RTC_HOURS] & 0x3f;
-	t->tm_hour = BCD2BIN(tmp);
-	t->tm_wday = BCD2BIN(fm3130->regs[FM3130_RTC_DAY] & 0x07) - 1;
-	t->tm_mday = BCD2BIN(fm3130->regs[FM3130_RTC_DATE] & 0x3f);
+	t->tm_hour = bcd2bin(tmp);
+	t->tm_wday = bcd2bin(fm3130->regs[FM3130_RTC_DAY] & 0x07) - 1;
+	t->tm_mday = bcd2bin(fm3130->regs[FM3130_RTC_DATE] & 0x3f);
 	tmp = fm3130->regs[FM3130_RTC_MONTHS] & 0x1f;
-	t->tm_mon = BCD2BIN(tmp) - 1;
+	t->tm_mon = bcd2bin(tmp) - 1;
 
 	/* assume 20YY not 19YY, and ignore CF bit */
-	t->tm_year = BCD2BIN(fm3130->regs[FM3130_RTC_YEARS]) + 100;
+	t->tm_year = bcd2bin(fm3130->regs[FM3130_RTC_YEARS]) + 100;
 
 	dev_dbg(dev, "%s secs=%d, mins=%d, "
 		"hours=%d, mday=%d, mon=%d, year=%d, wday=%d\n",
@@ -167,16 +167,16 @@ static int fm3130_set_time(struct device *dev, struct rtc_time *t)
 		t->tm_mon, t->tm_year, t->tm_wday);
 
 	/* first register addr */
-	buf[FM3130_RTC_SECONDS] = BIN2BCD(t->tm_sec);
-	buf[FM3130_RTC_MINUTES] = BIN2BCD(t->tm_min);
-	buf[FM3130_RTC_HOURS] = BIN2BCD(t->tm_hour);
-	buf[FM3130_RTC_DAY] = BIN2BCD(t->tm_wday + 1);
-	buf[FM3130_RTC_DATE] = BIN2BCD(t->tm_mday);
-	buf[FM3130_RTC_MONTHS] = BIN2BCD(t->tm_mon + 1);
+	buf[FM3130_RTC_SECONDS] = bin2bcd(t->tm_sec);
+	buf[FM3130_RTC_MINUTES] = bin2bcd(t->tm_min);
+	buf[FM3130_RTC_HOURS] = bin2bcd(t->tm_hour);
+	buf[FM3130_RTC_DAY] = bin2bcd(t->tm_wday + 1);
+	buf[FM3130_RTC_DATE] = bin2bcd(t->tm_mday);
+	buf[FM3130_RTC_MONTHS] = bin2bcd(t->tm_mon + 1);
 
 	/* assume 20YY not 19YY */
 	tmp = t->tm_year - 100;
-	buf[FM3130_RTC_YEARS] = BIN2BCD(tmp);
+	buf[FM3130_RTC_YEARS] = bin2bcd(tmp);
 
 	dev_dbg(dev, "%s: %02x %02x %02x %02x %02x %02x %02x"
 		"%02x %02x %02x %02x %02x %02x %02x %02x\n",
@@ -222,11 +222,11 @@ static int fm3130_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 			fm3130->regs[FM3130_ALARM_MONTHS]);
 
 
-	tm->tm_sec	= BCD2BIN(fm3130->regs[FM3130_ALARM_SECONDS] & 0x7F);
-	tm->tm_min	= BCD2BIN(fm3130->regs[FM3130_ALARM_MINUTES] & 0x7F);
-	tm->tm_hour	= BCD2BIN(fm3130->regs[FM3130_ALARM_HOURS] & 0x3F);
-	tm->tm_mday	= BCD2BIN(fm3130->regs[FM3130_ALARM_DATE] & 0x3F);
-	tm->tm_mon	= BCD2BIN(fm3130->regs[FM3130_ALARM_MONTHS] & 0x1F);
+	tm->tm_sec	= bcd2bin(fm3130->regs[FM3130_ALARM_SECONDS] & 0x7F);
+	tm->tm_min	= bcd2bin(fm3130->regs[FM3130_ALARM_MINUTES] & 0x7F);
+	tm->tm_hour	= bcd2bin(fm3130->regs[FM3130_ALARM_HOURS] & 0x3F);
+	tm->tm_mday	= bcd2bin(fm3130->regs[FM3130_ALARM_DATE] & 0x3F);
+	tm->tm_mon	= bcd2bin(fm3130->regs[FM3130_ALARM_MONTHS] & 0x1F);
 	if (tm->tm_mon > 0)
 		tm->tm_mon -= 1; /* RTC is 1-12, tm_mon is 0-11 */
 	dev_dbg(dev, "%s secs=%d, mins=%d, "
@@ -252,23 +252,23 @@ static int fm3130_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 
 	if (tm->tm_sec != -1)
 		fm3130->regs[FM3130_ALARM_SECONDS] =
-			BIN2BCD(tm->tm_sec) | 0x80;
+			bin2bcd(tm->tm_sec) | 0x80;
 
 	if (tm->tm_min != -1)
 		fm3130->regs[FM3130_ALARM_MINUTES] =
-			BIN2BCD(tm->tm_min) | 0x80;
+			bin2bcd(tm->tm_min) | 0x80;
 
 	if (tm->tm_hour != -1)
 		fm3130->regs[FM3130_ALARM_HOURS] =
-			BIN2BCD(tm->tm_hour) | 0x80;
+			bin2bcd(tm->tm_hour) | 0x80;
 
 	if (tm->tm_mday != -1)
 		fm3130->regs[FM3130_ALARM_DATE] =
-			BIN2BCD(tm->tm_mday) | 0x80;
+			bin2bcd(tm->tm_mday) | 0x80;
 
 	if (tm->tm_mon != -1)
 		fm3130->regs[FM3130_ALARM_MONTHS] =
-			BIN2BCD(tm->tm_mon + 1) | 0x80;
+			bin2bcd(tm->tm_mon + 1) | 0x80;
 
 	dev_dbg(dev, "alarm write %02x %02x %02x %02x %02x\n",
 			fm3130->regs[FM3130_ALARM_SECONDS],
@@ -414,18 +414,18 @@ static int __devinit fm3130_probe(struct i2c_client *client,
 	/* TODO */
 	/* TODO need to sanity check alarm */
 	tmp = fm3130->regs[FM3130_RTC_SECONDS];
-	tmp = BCD2BIN(tmp & 0x7f);
+	tmp = bcd2bin(tmp & 0x7f);
 	if (tmp > 60)
 		goto exit_bad;
-	tmp = BCD2BIN(fm3130->regs[FM3130_RTC_MINUTES] & 0x7f);
+	tmp = bcd2bin(fm3130->regs[FM3130_RTC_MINUTES] & 0x7f);
 	if (tmp > 60)
 		goto exit_bad;
 
-	tmp = BCD2BIN(fm3130->regs[FM3130_RTC_DATE] & 0x3f);
+	tmp = bcd2bin(fm3130->regs[FM3130_RTC_DATE] & 0x3f);
 	if (tmp == 0 || tmp > 31)
 		goto exit_bad;
 
-	tmp = BCD2BIN(fm3130->regs[FM3130_RTC_MONTHS] & 0x1f);
+	tmp = bcd2bin(fm3130->regs[FM3130_RTC_MONTHS] & 0x1f);
 	if (tmp == 0 || tmp > 12)
 		goto exit_bad;
 
diff --git a/drivers/rtc/rtc-isl1208.c b/drivers/rtc/rtc-isl1208.c
index a81adab6e515..2cd77ab8fc66 100644
--- a/drivers/rtc/rtc-isl1208.c
+++ b/drivers/rtc/rtc-isl1208.c
@@ -259,26 +259,26 @@ isl1208_i2c_read_time(struct i2c_client *client, struct rtc_time *tm)
 		return sr;
 	}
 
-	tm->tm_sec = BCD2BIN(regs[ISL1208_REG_SC]);
-	tm->tm_min = BCD2BIN(regs[ISL1208_REG_MN]);
+	tm->tm_sec = bcd2bin(regs[ISL1208_REG_SC]);
+	tm->tm_min = bcd2bin(regs[ISL1208_REG_MN]);
 
 	/* HR field has a more complex interpretation */
 	{
 		const u8 _hr = regs[ISL1208_REG_HR];
 		if (_hr & ISL1208_REG_HR_MIL)	/* 24h format */
-			tm->tm_hour = BCD2BIN(_hr & 0x3f);
+			tm->tm_hour = bcd2bin(_hr & 0x3f);
 		else {
 			/* 12h format */
-			tm->tm_hour = BCD2BIN(_hr & 0x1f);
+			tm->tm_hour = bcd2bin(_hr & 0x1f);
 			if (_hr & ISL1208_REG_HR_PM)	/* PM flag set */
 				tm->tm_hour += 12;
 		}
 	}
 
-	tm->tm_mday = BCD2BIN(regs[ISL1208_REG_DT]);
-	tm->tm_mon = BCD2BIN(regs[ISL1208_REG_MO]) - 1;	/* rtc starts at 1 */
-	tm->tm_year = BCD2BIN(regs[ISL1208_REG_YR]) + 100;
-	tm->tm_wday = BCD2BIN(regs[ISL1208_REG_DW]);
+	tm->tm_mday = bcd2bin(regs[ISL1208_REG_DT]);
+	tm->tm_mon = bcd2bin(regs[ISL1208_REG_MO]) - 1;	/* rtc starts at 1 */
+	tm->tm_year = bcd2bin(regs[ISL1208_REG_YR]) + 100;
+	tm->tm_wday = bcd2bin(regs[ISL1208_REG_DW]);
 
 	return 0;
 }
@@ -305,13 +305,13 @@ isl1208_i2c_read_alarm(struct i2c_client *client, struct rtc_wkalrm *alarm)
 	}
 
 	/* MSB of each alarm register is an enable bit */
-	tm->tm_sec = BCD2BIN(regs[ISL1208_REG_SCA - ISL1208_REG_SCA] & 0x7f);
-	tm->tm_min = BCD2BIN(regs[ISL1208_REG_MNA - ISL1208_REG_SCA] & 0x7f);
-	tm->tm_hour = BCD2BIN(regs[ISL1208_REG_HRA - ISL1208_REG_SCA] & 0x3f);
-	tm->tm_mday = BCD2BIN(regs[ISL1208_REG_DTA - ISL1208_REG_SCA] & 0x3f);
+	tm->tm_sec = bcd2bin(regs[ISL1208_REG_SCA - ISL1208_REG_SCA] & 0x7f);
+	tm->tm_min = bcd2bin(regs[ISL1208_REG_MNA - ISL1208_REG_SCA] & 0x7f);
+	tm->tm_hour = bcd2bin(regs[ISL1208_REG_HRA - ISL1208_REG_SCA] & 0x3f);
+	tm->tm_mday = bcd2bin(regs[ISL1208_REG_DTA - ISL1208_REG_SCA] & 0x3f);
 	tm->tm_mon =
-		BCD2BIN(regs[ISL1208_REG_MOA - ISL1208_REG_SCA] & 0x1f) - 1;
-	tm->tm_wday = BCD2BIN(regs[ISL1208_REG_DWA - ISL1208_REG_SCA] & 0x03);
+		bcd2bin(regs[ISL1208_REG_MOA - ISL1208_REG_SCA] & 0x1f) - 1;
+	tm->tm_wday = bcd2bin(regs[ISL1208_REG_DWA - ISL1208_REG_SCA] & 0x03);
 
 	return 0;
 }
@@ -328,15 +328,15 @@ isl1208_i2c_set_time(struct i2c_client *client, struct rtc_time const *tm)
 	int sr;
 	u8 regs[ISL1208_RTC_SECTION_LEN] = { 0, };
 
-	regs[ISL1208_REG_SC] = BIN2BCD(tm->tm_sec);
-	regs[ISL1208_REG_MN] = BIN2BCD(tm->tm_min);
-	regs[ISL1208_REG_HR] = BIN2BCD(tm->tm_hour) | ISL1208_REG_HR_MIL;
+	regs[ISL1208_REG_SC] = bin2bcd(tm->tm_sec);
+	regs[ISL1208_REG_MN] = bin2bcd(tm->tm_min);
+	regs[ISL1208_REG_HR] = bin2bcd(tm->tm_hour) | ISL1208_REG_HR_MIL;
 
-	regs[ISL1208_REG_DT] = BIN2BCD(tm->tm_mday);
-	regs[ISL1208_REG_MO] = BIN2BCD(tm->tm_mon + 1);
-	regs[ISL1208_REG_YR] = BIN2BCD(tm->tm_year - 100);
+	regs[ISL1208_REG_DT] = bin2bcd(tm->tm_mday);
+	regs[ISL1208_REG_MO] = bin2bcd(tm->tm_mon + 1);
+	regs[ISL1208_REG_YR] = bin2bcd(tm->tm_year - 100);
 
-	regs[ISL1208_REG_DW] = BIN2BCD(tm->tm_wday & 7);
+	regs[ISL1208_REG_DW] = bin2bcd(tm->tm_wday & 7);
 
 	sr = isl1208_i2c_get_sr(client);
 	if (sr < 0) {
diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c
index 470fb2d29545..893f7dece239 100644
--- a/drivers/rtc/rtc-m41t80.c
+++ b/drivers/rtc/rtc-m41t80.c
@@ -110,15 +110,15 @@ static int m41t80_get_datetime(struct i2c_client *client,
 		return -EIO;
 	}
 
-	tm->tm_sec = BCD2BIN(buf[M41T80_REG_SEC] & 0x7f);
-	tm->tm_min = BCD2BIN(buf[M41T80_REG_MIN] & 0x7f);
-	tm->tm_hour = BCD2BIN(buf[M41T80_REG_HOUR] & 0x3f);
-	tm->tm_mday = BCD2BIN(buf[M41T80_REG_DAY] & 0x3f);
+	tm->tm_sec = bcd2bin(buf[M41T80_REG_SEC] & 0x7f);
+	tm->tm_min = bcd2bin(buf[M41T80_REG_MIN] & 0x7f);
+	tm->tm_hour = bcd2bin(buf[M41T80_REG_HOUR] & 0x3f);
+	tm->tm_mday = bcd2bin(buf[M41T80_REG_DAY] & 0x3f);
 	tm->tm_wday = buf[M41T80_REG_WDAY] & 0x07;
-	tm->tm_mon = BCD2BIN(buf[M41T80_REG_MON] & 0x1f) - 1;
+	tm->tm_mon = bcd2bin(buf[M41T80_REG_MON] & 0x1f) - 1;
 
 	/* assume 20YY not 19YY, and ignore the Century Bit */
-	tm->tm_year = BCD2BIN(buf[M41T80_REG_YEAR]) + 100;
+	tm->tm_year = bcd2bin(buf[M41T80_REG_YEAR]) + 100;
 	return 0;
 }
 
@@ -161,19 +161,19 @@ static int m41t80_set_datetime(struct i2c_client *client, struct rtc_time *tm)
 	/* Merge time-data and register flags into buf[0..7] */
 	buf[M41T80_REG_SSEC] = 0;
 	buf[M41T80_REG_SEC] =
-		BIN2BCD(tm->tm_sec) | (buf[M41T80_REG_SEC] & ~0x7f);
+		bin2bcd(tm->tm_sec) | (buf[M41T80_REG_SEC] & ~0x7f);
 	buf[M41T80_REG_MIN] =
-		BIN2BCD(tm->tm_min) | (buf[M41T80_REG_MIN] & ~0x7f);
+		bin2bcd(tm->tm_min) | (buf[M41T80_REG_MIN] & ~0x7f);
 	buf[M41T80_REG_HOUR] =
-		BIN2BCD(tm->tm_hour) | (buf[M41T80_REG_HOUR] & ~0x3f) ;
+		bin2bcd(tm->tm_hour) | (buf[M41T80_REG_HOUR] & ~0x3f) ;
 	buf[M41T80_REG_WDAY] =
 		(tm->tm_wday & 0x07) | (buf[M41T80_REG_WDAY] & ~0x07);
 	buf[M41T80_REG_DAY] =
-		BIN2BCD(tm->tm_mday) | (buf[M41T80_REG_DAY] & ~0x3f);
+		bin2bcd(tm->tm_mday) | (buf[M41T80_REG_DAY] & ~0x3f);
 	buf[M41T80_REG_MON] =
-		BIN2BCD(tm->tm_mon + 1) | (buf[M41T80_REG_MON] & ~0x1f);
+		bin2bcd(tm->tm_mon + 1) | (buf[M41T80_REG_MON] & ~0x1f);
 	/* assume 20YY not 19YY */
-	buf[M41T80_REG_YEAR] = BIN2BCD(tm->tm_year % 100);
+	buf[M41T80_REG_YEAR] = bin2bcd(tm->tm_year % 100);
 
 	if (i2c_transfer(client->adapter, msgs, 1) != 1) {
 		dev_err(&client->dev, "write error\n");
@@ -288,15 +288,15 @@ static int m41t80_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 
 	wbuf[0] = M41T80_REG_ALARM_MON; /* offset into rtc's regs */
 	reg[M41T80_REG_ALARM_SEC] |= t->time.tm_sec >= 0 ?
-		BIN2BCD(t->time.tm_sec) : 0x80;
+		bin2bcd(t->time.tm_sec) : 0x80;
 	reg[M41T80_REG_ALARM_MIN] |= t->time.tm_min >= 0 ?
-		BIN2BCD(t->time.tm_min) : 0x80;
+		bin2bcd(t->time.tm_min) : 0x80;
 	reg[M41T80_REG_ALARM_HOUR] |= t->time.tm_hour >= 0 ?
-		BIN2BCD(t->time.tm_hour) : 0x80;
+		bin2bcd(t->time.tm_hour) : 0x80;
 	reg[M41T80_REG_ALARM_DAY] |= t->time.tm_mday >= 0 ?
-		BIN2BCD(t->time.tm_mday) : 0x80;
+		bin2bcd(t->time.tm_mday) : 0x80;
 	if (t->time.tm_mon >= 0)
-		reg[M41T80_REG_ALARM_MON] |= BIN2BCD(t->time.tm_mon + 1);
+		reg[M41T80_REG_ALARM_MON] |= bin2bcd(t->time.tm_mon + 1);
 	else
 		reg[M41T80_REG_ALARM_DAY] |= 0x40;
 
@@ -347,15 +347,15 @@ static int m41t80_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *t)
 	t->time.tm_mday = -1;
 	t->time.tm_mon = -1;
 	if (!(reg[M41T80_REG_ALARM_SEC] & 0x80))
-		t->time.tm_sec = BCD2BIN(reg[M41T80_REG_ALARM_SEC] & 0x7f);
+		t->time.tm_sec = bcd2bin(reg[M41T80_REG_ALARM_SEC] & 0x7f);
 	if (!(reg[M41T80_REG_ALARM_MIN] & 0x80))
-		t->time.tm_min = BCD2BIN(reg[M41T80_REG_ALARM_MIN] & 0x7f);
+		t->time.tm_min = bcd2bin(reg[M41T80_REG_ALARM_MIN] & 0x7f);
 	if (!(reg[M41T80_REG_ALARM_HOUR] & 0x80))
-		t->time.tm_hour = BCD2BIN(reg[M41T80_REG_ALARM_HOUR] & 0x3f);
+		t->time.tm_hour = bcd2bin(reg[M41T80_REG_ALARM_HOUR] & 0x3f);
 	if (!(reg[M41T80_REG_ALARM_DAY] & 0x80))
-		t->time.tm_mday = BCD2BIN(reg[M41T80_REG_ALARM_DAY] & 0x3f);
+		t->time.tm_mday = bcd2bin(reg[M41T80_REG_ALARM_DAY] & 0x3f);
 	if (!(reg[M41T80_REG_ALARM_DAY] & 0x40))
-		t->time.tm_mon = BCD2BIN(reg[M41T80_REG_ALARM_MON] & 0x1f) - 1;
+		t->time.tm_mon = bcd2bin(reg[M41T80_REG_ALARM_MON] & 0x1f) - 1;
 	t->time.tm_year = -1;
 	t->time.tm_wday = -1;
 	t->time.tm_yday = -1;
diff --git a/drivers/rtc/rtc-m41t94.c b/drivers/rtc/rtc-m41t94.c
index 9b19499c829e..c3a18c58daf6 100644
--- a/drivers/rtc/rtc-m41t94.c
+++ b/drivers/rtc/rtc-m41t94.c
@@ -41,17 +41,17 @@ static int m41t94_set_time(struct device *dev, struct rtc_time *tm)
 		tm->tm_mon, tm->tm_year, tm->tm_wday);
 
 	buf[0] = 0x80 | M41T94_REG_SECONDS; /* write time + date */
-	buf[M41T94_REG_SECONDS] = BIN2BCD(tm->tm_sec);
-	buf[M41T94_REG_MINUTES] = BIN2BCD(tm->tm_min);
-	buf[M41T94_REG_HOURS]   = BIN2BCD(tm->tm_hour);
-	buf[M41T94_REG_WDAY]    = BIN2BCD(tm->tm_wday + 1);
-	buf[M41T94_REG_DAY]     = BIN2BCD(tm->tm_mday);
-	buf[M41T94_REG_MONTH]   = BIN2BCD(tm->tm_mon + 1);
+	buf[M41T94_REG_SECONDS] = bin2bcd(tm->tm_sec);
+	buf[M41T94_REG_MINUTES] = bin2bcd(tm->tm_min);
+	buf[M41T94_REG_HOURS]   = bin2bcd(tm->tm_hour);
+	buf[M41T94_REG_WDAY]    = bin2bcd(tm->tm_wday + 1);
+	buf[M41T94_REG_DAY]     = bin2bcd(tm->tm_mday);
+	buf[M41T94_REG_MONTH]   = bin2bcd(tm->tm_mon + 1);
 
 	buf[M41T94_REG_HOURS] |= M41T94_BIT_CEB;
 	if (tm->tm_year >= 100)
 		buf[M41T94_REG_HOURS] |= M41T94_BIT_CB;
-	buf[M41T94_REG_YEAR] = BIN2BCD(tm->tm_year % 100);
+	buf[M41T94_REG_YEAR] = bin2bcd(tm->tm_year % 100);
 
 	return spi_write(spi, buf, 8);
 }
@@ -82,14 +82,14 @@ static int m41t94_read_time(struct device *dev, struct rtc_time *tm)
 		spi_write(spi, buf, 2);
 	}
 
-	tm->tm_sec  = BCD2BIN(spi_w8r8(spi, M41T94_REG_SECONDS));
-	tm->tm_min  = BCD2BIN(spi_w8r8(spi, M41T94_REG_MINUTES));
+	tm->tm_sec  = bcd2bin(spi_w8r8(spi, M41T94_REG_SECONDS));
+	tm->tm_min  = bcd2bin(spi_w8r8(spi, M41T94_REG_MINUTES));
 	hour = spi_w8r8(spi, M41T94_REG_HOURS);
-	tm->tm_hour = BCD2BIN(hour & 0x3f);
-	tm->tm_wday = BCD2BIN(spi_w8r8(spi, M41T94_REG_WDAY)) - 1;
-	tm->tm_mday = BCD2BIN(spi_w8r8(spi, M41T94_REG_DAY));
-	tm->tm_mon  = BCD2BIN(spi_w8r8(spi, M41T94_REG_MONTH)) - 1;
-	tm->tm_year = BCD2BIN(spi_w8r8(spi, M41T94_REG_YEAR));
+	tm->tm_hour = bcd2bin(hour & 0x3f);
+	tm->tm_wday = bcd2bin(spi_w8r8(spi, M41T94_REG_WDAY)) - 1;
+	tm->tm_mday = bcd2bin(spi_w8r8(spi, M41T94_REG_DAY));
+	tm->tm_mon  = bcd2bin(spi_w8r8(spi, M41T94_REG_MONTH)) - 1;
+	tm->tm_year = bcd2bin(spi_w8r8(spi, M41T94_REG_YEAR));
 	if ((hour & M41T94_BIT_CB) || !(hour & M41T94_BIT_CEB))
 		tm->tm_year += 100;
 
diff --git a/drivers/rtc/rtc-m48t59.c b/drivers/rtc/rtc-m48t59.c
index ce4eff6a8d51..ed671e29e07d 100644
--- a/drivers/rtc/rtc-m48t59.c
+++ b/drivers/rtc/rtc-m48t59.c
@@ -76,10 +76,10 @@ static int m48t59_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	/* Issue the READ command */
 	M48T59_SET_BITS(M48T59_CNTL_READ, M48T59_CNTL);
 
-	tm->tm_year	= BCD2BIN(M48T59_READ(M48T59_YEAR));
+	tm->tm_year	= bcd2bin(M48T59_READ(M48T59_YEAR));
 	/* tm_mon is 0-11 */
-	tm->tm_mon	= BCD2BIN(M48T59_READ(M48T59_MONTH)) - 1;
-	tm->tm_mday	= BCD2BIN(M48T59_READ(M48T59_MDAY));
+	tm->tm_mon	= bcd2bin(M48T59_READ(M48T59_MONTH)) - 1;
+	tm->tm_mday	= bcd2bin(M48T59_READ(M48T59_MDAY));
 
 	val = M48T59_READ(M48T59_WDAY);
 	if ((pdata->type == M48T59RTC_TYPE_M48T59) &&
@@ -88,10 +88,10 @@ static int m48t59_rtc_read_time(struct device *dev, struct rtc_time *tm)
 		tm->tm_year += 100;	/* one century */
 	}
 
-	tm->tm_wday	= BCD2BIN(val & 0x07);
-	tm->tm_hour	= BCD2BIN(M48T59_READ(M48T59_HOUR) & 0x3F);
-	tm->tm_min	= BCD2BIN(M48T59_READ(M48T59_MIN) & 0x7F);
-	tm->tm_sec	= BCD2BIN(M48T59_READ(M48T59_SEC) & 0x7F);
+	tm->tm_wday	= bcd2bin(val & 0x07);
+	tm->tm_hour	= bcd2bin(M48T59_READ(M48T59_HOUR) & 0x3F);
+	tm->tm_min	= bcd2bin(M48T59_READ(M48T59_MIN) & 0x7F);
+	tm->tm_sec	= bcd2bin(M48T59_READ(M48T59_SEC) & 0x7F);
 
 	/* Clear the READ bit */
 	M48T59_CLEAR_BITS(M48T59_CNTL_READ, M48T59_CNTL);
@@ -119,17 +119,17 @@ static int m48t59_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	/* Issue the WRITE command */
 	M48T59_SET_BITS(M48T59_CNTL_WRITE, M48T59_CNTL);
 
-	M48T59_WRITE((BIN2BCD(tm->tm_sec) & 0x7F), M48T59_SEC);
-	M48T59_WRITE((BIN2BCD(tm->tm_min) & 0x7F), M48T59_MIN);
-	M48T59_WRITE((BIN2BCD(tm->tm_hour) & 0x3F), M48T59_HOUR);
-	M48T59_WRITE((BIN2BCD(tm->tm_mday) & 0x3F), M48T59_MDAY);
+	M48T59_WRITE((bin2bcd(tm->tm_sec) & 0x7F), M48T59_SEC);
+	M48T59_WRITE((bin2bcd(tm->tm_min) & 0x7F), M48T59_MIN);
+	M48T59_WRITE((bin2bcd(tm->tm_hour) & 0x3F), M48T59_HOUR);
+	M48T59_WRITE((bin2bcd(tm->tm_mday) & 0x3F), M48T59_MDAY);
 	/* tm_mon is 0-11 */
-	M48T59_WRITE((BIN2BCD(tm->tm_mon + 1) & 0x1F), M48T59_MONTH);
-	M48T59_WRITE(BIN2BCD(tm->tm_year % 100), M48T59_YEAR);
+	M48T59_WRITE((bin2bcd(tm->tm_mon + 1) & 0x1F), M48T59_MONTH);
+	M48T59_WRITE(bin2bcd(tm->tm_year % 100), M48T59_YEAR);
 
 	if (pdata->type == M48T59RTC_TYPE_M48T59 && (tm->tm_year / 100))
 		val = (M48T59_WDAY_CEB | M48T59_WDAY_CB);
-	val |= (BIN2BCD(tm->tm_wday) & 0x07);
+	val |= (bin2bcd(tm->tm_wday) & 0x07);
 	M48T59_WRITE(val, M48T59_WDAY);
 
 	/* Clear the WRITE bit */
@@ -158,18 +158,18 @@ static int m48t59_rtc_readalarm(struct device *dev, struct rtc_wkalrm *alrm)
 	/* Issue the READ command */
 	M48T59_SET_BITS(M48T59_CNTL_READ, M48T59_CNTL);
 
-	tm->tm_year = BCD2BIN(M48T59_READ(M48T59_YEAR));
+	tm->tm_year = bcd2bin(M48T59_READ(M48T59_YEAR));
 	/* tm_mon is 0-11 */
-	tm->tm_mon = BCD2BIN(M48T59_READ(M48T59_MONTH)) - 1;
+	tm->tm_mon = bcd2bin(M48T59_READ(M48T59_MONTH)) - 1;
 
 	val = M48T59_READ(M48T59_WDAY);
 	if ((val & M48T59_WDAY_CEB) && (val & M48T59_WDAY_CB))
 		tm->tm_year += 100;	/* one century */
 
-	tm->tm_mday = BCD2BIN(M48T59_READ(M48T59_ALARM_DATE));
-	tm->tm_hour = BCD2BIN(M48T59_READ(M48T59_ALARM_HOUR));
-	tm->tm_min = BCD2BIN(M48T59_READ(M48T59_ALARM_MIN));
-	tm->tm_sec = BCD2BIN(M48T59_READ(M48T59_ALARM_SEC));
+	tm->tm_mday = bcd2bin(M48T59_READ(M48T59_ALARM_DATE));
+	tm->tm_hour = bcd2bin(M48T59_READ(M48T59_ALARM_HOUR));
+	tm->tm_min = bcd2bin(M48T59_READ(M48T59_ALARM_MIN));
+	tm->tm_sec = bcd2bin(M48T59_READ(M48T59_ALARM_SEC));
 
 	/* Clear the READ bit */
 	M48T59_CLEAR_BITS(M48T59_CNTL_READ, M48T59_CNTL);
@@ -201,18 +201,18 @@ static int m48t59_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
 	 * 0xff means "always match"
 	 */
 	mday = tm->tm_mday;
-	mday = (mday >= 1 && mday <= 31) ? BIN2BCD(mday) : 0xff;
+	mday = (mday >= 1 && mday <= 31) ? bin2bcd(mday) : 0xff;
 	if (mday == 0xff)
 		mday = M48T59_READ(M48T59_MDAY);
 
 	hour = tm->tm_hour;
-	hour = (hour < 24) ? BIN2BCD(hour) : 0x00;
+	hour = (hour < 24) ? bin2bcd(hour) : 0x00;
 
 	min = tm->tm_min;
-	min = (min < 60) ? BIN2BCD(min) : 0x00;
+	min = (min < 60) ? bin2bcd(min) : 0x00;
 
 	sec = tm->tm_sec;
-	sec = (sec < 60) ? BIN2BCD(sec) : 0x00;
+	sec = (sec < 60) ? bin2bcd(sec) : 0x00;
 
 	spin_lock_irqsave(&m48t59->lock, flags);
 	/* Issue the WRITE command */
diff --git a/drivers/rtc/rtc-m48t86.c b/drivers/rtc/rtc-m48t86.c
index 3f7f99a5d96a..7c045cffa9ff 100644
--- a/drivers/rtc/rtc-m48t86.c
+++ b/drivers/rtc/rtc-m48t86.c
@@ -62,14 +62,14 @@ static int m48t86_rtc_read_time(struct device *dev, struct rtc_time *tm)
 		tm->tm_wday	= ops->readbyte(M48T86_REG_DOW);
 	} else {
 		/* bcd mode */
-		tm->tm_sec	= BCD2BIN(ops->readbyte(M48T86_REG_SEC));
-		tm->tm_min	= BCD2BIN(ops->readbyte(M48T86_REG_MIN));
-		tm->tm_hour	= BCD2BIN(ops->readbyte(M48T86_REG_HOUR) & 0x3F);
-		tm->tm_mday	= BCD2BIN(ops->readbyte(M48T86_REG_DOM));
+		tm->tm_sec	= bcd2bin(ops->readbyte(M48T86_REG_SEC));
+		tm->tm_min	= bcd2bin(ops->readbyte(M48T86_REG_MIN));
+		tm->tm_hour	= bcd2bin(ops->readbyte(M48T86_REG_HOUR) & 0x3F);
+		tm->tm_mday	= bcd2bin(ops->readbyte(M48T86_REG_DOM));
 		/* tm_mon is 0-11 */
-		tm->tm_mon	= BCD2BIN(ops->readbyte(M48T86_REG_MONTH)) - 1;
-		tm->tm_year	= BCD2BIN(ops->readbyte(M48T86_REG_YEAR)) + 100;
-		tm->tm_wday	= BCD2BIN(ops->readbyte(M48T86_REG_DOW));
+		tm->tm_mon	= bcd2bin(ops->readbyte(M48T86_REG_MONTH)) - 1;
+		tm->tm_year	= bcd2bin(ops->readbyte(M48T86_REG_YEAR)) + 100;
+		tm->tm_wday	= bcd2bin(ops->readbyte(M48T86_REG_DOW));
 	}
 
 	/* correct the hour if the clock is in 12h mode */
@@ -103,13 +103,13 @@ static int m48t86_rtc_set_time(struct device *dev, struct rtc_time *tm)
 		ops->writebyte(tm->tm_wday, M48T86_REG_DOW);
 	} else {
 		/* bcd mode */
-		ops->writebyte(BIN2BCD(tm->tm_sec), M48T86_REG_SEC);
-		ops->writebyte(BIN2BCD(tm->tm_min), M48T86_REG_MIN);
-		ops->writebyte(BIN2BCD(tm->tm_hour), M48T86_REG_HOUR);
-		ops->writebyte(BIN2BCD(tm->tm_mday), M48T86_REG_DOM);
-		ops->writebyte(BIN2BCD(tm->tm_mon + 1), M48T86_REG_MONTH);
-		ops->writebyte(BIN2BCD(tm->tm_year % 100), M48T86_REG_YEAR);
-		ops->writebyte(BIN2BCD(tm->tm_wday), M48T86_REG_DOW);
+		ops->writebyte(bin2bcd(tm->tm_sec), M48T86_REG_SEC);
+		ops->writebyte(bin2bcd(tm->tm_min), M48T86_REG_MIN);
+		ops->writebyte(bin2bcd(tm->tm_hour), M48T86_REG_HOUR);
+		ops->writebyte(bin2bcd(tm->tm_mday), M48T86_REG_DOM);
+		ops->writebyte(bin2bcd(tm->tm_mon + 1), M48T86_REG_MONTH);
+		ops->writebyte(bin2bcd(tm->tm_year % 100), M48T86_REG_YEAR);
+		ops->writebyte(bin2bcd(tm->tm_wday), M48T86_REG_DOW);
 	}
 
 	/* update ended */
diff --git a/drivers/rtc/rtc-max6900.c b/drivers/rtc/rtc-max6900.c
index 12c9cd25cad8..80782798763f 100644
--- a/drivers/rtc/rtc-max6900.c
+++ b/drivers/rtc/rtc-max6900.c
@@ -150,14 +150,14 @@ static int max6900_i2c_read_time(struct i2c_client *client, struct rtc_time *tm)
 	if (rc < 0)
 		return rc;
 
-	tm->tm_sec = BCD2BIN(regs[MAX6900_REG_SC]);
-	tm->tm_min = BCD2BIN(regs[MAX6900_REG_MN]);
-	tm->tm_hour = BCD2BIN(regs[MAX6900_REG_HR] & 0x3f);
-	tm->tm_mday = BCD2BIN(regs[MAX6900_REG_DT]);
-	tm->tm_mon = BCD2BIN(regs[MAX6900_REG_MO]) - 1;
-	tm->tm_year = BCD2BIN(regs[MAX6900_REG_YR]) +
-	    BCD2BIN(regs[MAX6900_REG_CENTURY]) * 100 - 1900;
-	tm->tm_wday = BCD2BIN(regs[MAX6900_REG_DW]);
+	tm->tm_sec = bcd2bin(regs[MAX6900_REG_SC]);
+	tm->tm_min = bcd2bin(regs[MAX6900_REG_MN]);
+	tm->tm_hour = bcd2bin(regs[MAX6900_REG_HR] & 0x3f);
+	tm->tm_mday = bcd2bin(regs[MAX6900_REG_DT]);
+	tm->tm_mon = bcd2bin(regs[MAX6900_REG_MO]) - 1;
+	tm->tm_year = bcd2bin(regs[MAX6900_REG_YR]) +
+		      bcd2bin(regs[MAX6900_REG_CENTURY]) * 100 - 1900;
+	tm->tm_wday = bcd2bin(regs[MAX6900_REG_DW]);
 
 	return 0;
 }
@@ -184,14 +184,14 @@ max6900_i2c_set_time(struct i2c_client *client, struct rtc_time const *tm)
 	if (rc < 0)
 		return rc;
 
-	regs[MAX6900_REG_SC] = BIN2BCD(tm->tm_sec);
-	regs[MAX6900_REG_MN] = BIN2BCD(tm->tm_min);
-	regs[MAX6900_REG_HR] = BIN2BCD(tm->tm_hour);
-	regs[MAX6900_REG_DT] = BIN2BCD(tm->tm_mday);
-	regs[MAX6900_REG_MO] = BIN2BCD(tm->tm_mon + 1);
-	regs[MAX6900_REG_DW] = BIN2BCD(tm->tm_wday);
-	regs[MAX6900_REG_YR] = BIN2BCD(tm->tm_year % 100);
-	regs[MAX6900_REG_CENTURY] = BIN2BCD((tm->tm_year + 1900) / 100);
+	regs[MAX6900_REG_SC] = bin2bcd(tm->tm_sec);
+	regs[MAX6900_REG_MN] = bin2bcd(tm->tm_min);
+	regs[MAX6900_REG_HR] = bin2bcd(tm->tm_hour);
+	regs[MAX6900_REG_DT] = bin2bcd(tm->tm_mday);
+	regs[MAX6900_REG_MO] = bin2bcd(tm->tm_mon + 1);
+	regs[MAX6900_REG_DW] = bin2bcd(tm->tm_wday);
+	regs[MAX6900_REG_YR] = bin2bcd(tm->tm_year % 100);
+	regs[MAX6900_REG_CENTURY] = bin2bcd((tm->tm_year + 1900) / 100);
 	/* set write protect */
 	regs[MAX6900_REG_CT] = MAX6900_REG_CT_WP;
 
diff --git a/drivers/rtc/rtc-max6902.c b/drivers/rtc/rtc-max6902.c
index 78b2551fb19d..2f6507df7b49 100644
--- a/drivers/rtc/rtc-max6902.c
+++ b/drivers/rtc/rtc-max6902.c
@@ -124,15 +124,15 @@ static int max6902_get_datetime(struct device *dev, struct rtc_time *dt)
 
 	/* The chip sends data in this order:
 	 * Seconds, Minutes, Hours, Date, Month, Day, Year */
-	dt->tm_sec	= BCD2BIN(chip->buf[1]);
-	dt->tm_min	= BCD2BIN(chip->buf[2]);
-	dt->tm_hour	= BCD2BIN(chip->buf[3]);
-	dt->tm_mday	= BCD2BIN(chip->buf[4]);
-	dt->tm_mon	= BCD2BIN(chip->buf[5]) - 1;
-	dt->tm_wday	= BCD2BIN(chip->buf[6]);
-	dt->tm_year = BCD2BIN(chip->buf[7]);
+	dt->tm_sec	= bcd2bin(chip->buf[1]);
+	dt->tm_min	= bcd2bin(chip->buf[2]);
+	dt->tm_hour	= bcd2bin(chip->buf[3]);
+	dt->tm_mday	= bcd2bin(chip->buf[4]);
+	dt->tm_mon	= bcd2bin(chip->buf[5]) - 1;
+	dt->tm_wday	= bcd2bin(chip->buf[6]);
+	dt->tm_year = bcd2bin(chip->buf[7]);
 
-	century = BCD2BIN(tmp) * 100;
+	century = bcd2bin(tmp) * 100;
 
 	dt->tm_year += century;
 	dt->tm_year -= 1900;
@@ -168,15 +168,15 @@ static int max6902_set_datetime(struct device *dev, struct rtc_time *dt)
 	/* Remove write protection */
 	max6902_set_reg(dev, 0xF, 0);
 
-	max6902_set_reg(dev, 0x01, BIN2BCD(dt->tm_sec));
-	max6902_set_reg(dev, 0x03, BIN2BCD(dt->tm_min));
-	max6902_set_reg(dev, 0x05, BIN2BCD(dt->tm_hour));
+	max6902_set_reg(dev, 0x01, bin2bcd(dt->tm_sec));
+	max6902_set_reg(dev, 0x03, bin2bcd(dt->tm_min));
+	max6902_set_reg(dev, 0x05, bin2bcd(dt->tm_hour));
 
-	max6902_set_reg(dev, 0x07, BIN2BCD(dt->tm_mday));
-	max6902_set_reg(dev, 0x09, BIN2BCD(dt->tm_mon+1));
-	max6902_set_reg(dev, 0x0B, BIN2BCD(dt->tm_wday));
-	max6902_set_reg(dev, 0x0D, BIN2BCD(dt->tm_year%100));
-	max6902_set_reg(dev, 0x13, BIN2BCD(dt->tm_year/100));
+	max6902_set_reg(dev, 0x07, bin2bcd(dt->tm_mday));
+	max6902_set_reg(dev, 0x09, bin2bcd(dt->tm_mon+1));
+	max6902_set_reg(dev, 0x0B, bin2bcd(dt->tm_wday));
+	max6902_set_reg(dev, 0x0D, bin2bcd(dt->tm_year%100));
+	max6902_set_reg(dev, 0x13, bin2bcd(dt->tm_year/100));
 
 	/* Compulab used a delay here. However, the datasheet
 	 * does not mention a delay being required anywhere... */
diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c
index 8876605d4d4b..2cbeb0794f14 100644
--- a/drivers/rtc/rtc-omap.c
+++ b/drivers/rtc/rtc-omap.c
@@ -186,30 +186,30 @@ static int tm2bcd(struct rtc_time *tm)
 	if (rtc_valid_tm(tm) != 0)
 		return -EINVAL;
 
-	tm->tm_sec = BIN2BCD(tm->tm_sec);
-	tm->tm_min = BIN2BCD(tm->tm_min);
-	tm->tm_hour = BIN2BCD(tm->tm_hour);
-	tm->tm_mday = BIN2BCD(tm->tm_mday);
+	tm->tm_sec = bin2bcd(tm->tm_sec);
+	tm->tm_min = bin2bcd(tm->tm_min);
+	tm->tm_hour = bin2bcd(tm->tm_hour);
+	tm->tm_mday = bin2bcd(tm->tm_mday);
 
-	tm->tm_mon = BIN2BCD(tm->tm_mon + 1);
+	tm->tm_mon = bin2bcd(tm->tm_mon + 1);
 
 	/* epoch == 1900 */
 	if (tm->tm_year < 100 || tm->tm_year > 199)
 		return -EINVAL;
-	tm->tm_year = BIN2BCD(tm->tm_year - 100);
+	tm->tm_year = bin2bcd(tm->tm_year - 100);
 
 	return 0;
 }
 
 static void bcd2tm(struct rtc_time *tm)
 {
-	tm->tm_sec = BCD2BIN(tm->tm_sec);
-	tm->tm_min = BCD2BIN(tm->tm_min);
-	tm->tm_hour = BCD2BIN(tm->tm_hour);
-	tm->tm_mday = BCD2BIN(tm->tm_mday);
-	tm->tm_mon = BCD2BIN(tm->tm_mon) - 1;
+	tm->tm_sec = bcd2bin(tm->tm_sec);
+	tm->tm_min = bcd2bin(tm->tm_min);
+	tm->tm_hour = bcd2bin(tm->tm_hour);
+	tm->tm_mday = bcd2bin(tm->tm_mday);
+	tm->tm_mon = bcd2bin(tm->tm_mon) - 1;
 	/* epoch == 1900 */
-	tm->tm_year = BCD2BIN(tm->tm_year) + 100;
+	tm->tm_year = bcd2bin(tm->tm_year) + 100;
 }
 
 
diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c
index a829f20ad6d6..b725913ccbe8 100644
--- a/drivers/rtc/rtc-pcf8563.c
+++ b/drivers/rtc/rtc-pcf8563.c
@@ -97,13 +97,13 @@ static int pcf8563_get_datetime(struct i2c_client *client, struct rtc_time *tm)
 		buf[8]);
 
 
-	tm->tm_sec = BCD2BIN(buf[PCF8563_REG_SC] & 0x7F);
-	tm->tm_min = BCD2BIN(buf[PCF8563_REG_MN] & 0x7F);
-	tm->tm_hour = BCD2BIN(buf[PCF8563_REG_HR] & 0x3F); /* rtc hr 0-23 */
-	tm->tm_mday = BCD2BIN(buf[PCF8563_REG_DM] & 0x3F);
+	tm->tm_sec = bcd2bin(buf[PCF8563_REG_SC] & 0x7F);
+	tm->tm_min = bcd2bin(buf[PCF8563_REG_MN] & 0x7F);
+	tm->tm_hour = bcd2bin(buf[PCF8563_REG_HR] & 0x3F); /* rtc hr 0-23 */
+	tm->tm_mday = bcd2bin(buf[PCF8563_REG_DM] & 0x3F);
 	tm->tm_wday = buf[PCF8563_REG_DW] & 0x07;
-	tm->tm_mon = BCD2BIN(buf[PCF8563_REG_MO] & 0x1F) - 1; /* rtc mn 1-12 */
-	tm->tm_year = BCD2BIN(buf[PCF8563_REG_YR]);
+	tm->tm_mon = bcd2bin(buf[PCF8563_REG_MO] & 0x1F) - 1; /* rtc mn 1-12 */
+	tm->tm_year = bcd2bin(buf[PCF8563_REG_YR]);
 	if (tm->tm_year < 70)
 		tm->tm_year += 100;	/* assume we are in 1970...2069 */
 	/* detect the polarity heuristically. see note above. */
@@ -138,17 +138,17 @@ static int pcf8563_set_datetime(struct i2c_client *client, struct rtc_time *tm)
 		tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
 
 	/* hours, minutes and seconds */
-	buf[PCF8563_REG_SC] = BIN2BCD(tm->tm_sec);
-	buf[PCF8563_REG_MN] = BIN2BCD(tm->tm_min);
-	buf[PCF8563_REG_HR] = BIN2BCD(tm->tm_hour);
+	buf[PCF8563_REG_SC] = bin2bcd(tm->tm_sec);
+	buf[PCF8563_REG_MN] = bin2bcd(tm->tm_min);
+	buf[PCF8563_REG_HR] = bin2bcd(tm->tm_hour);
 
-	buf[PCF8563_REG_DM] = BIN2BCD(tm->tm_mday);
+	buf[PCF8563_REG_DM] = bin2bcd(tm->tm_mday);
 
 	/* month, 1 - 12 */
-	buf[PCF8563_REG_MO] = BIN2BCD(tm->tm_mon + 1);
+	buf[PCF8563_REG_MO] = bin2bcd(tm->tm_mon + 1);
 
 	/* year and century */
-	buf[PCF8563_REG_YR] = BIN2BCD(tm->tm_year % 100);
+	buf[PCF8563_REG_YR] = bin2bcd(tm->tm_year % 100);
 	if (pcf8563->c_polarity ? (tm->tm_year >= 100) : (tm->tm_year < 100))
 		buf[PCF8563_REG_MO] |= PCF8563_MO_C;
 
diff --git a/drivers/rtc/rtc-pcf8583.c b/drivers/rtc/rtc-pcf8583.c
index d388c662bf4b..7d33cda3f8f6 100644
--- a/drivers/rtc/rtc-pcf8583.c
+++ b/drivers/rtc/rtc-pcf8583.c
@@ -76,11 +76,11 @@ static int pcf8583_get_datetime(struct i2c_client *client, struct rtc_time *dt)
 		buf[4] &= 0x3f;
 		buf[5] &= 0x1f;
 
-		dt->tm_sec = BCD2BIN(buf[1]);
-		dt->tm_min = BCD2BIN(buf[2]);
-		dt->tm_hour = BCD2BIN(buf[3]);
-		dt->tm_mday = BCD2BIN(buf[4]);
-		dt->tm_mon = BCD2BIN(buf[5]) - 1;
+		dt->tm_sec = bcd2bin(buf[1]);
+		dt->tm_min = bcd2bin(buf[2]);
+		dt->tm_hour = bcd2bin(buf[3]);
+		dt->tm_mday = bcd2bin(buf[4]);
+		dt->tm_mon = bcd2bin(buf[5]) - 1;
 	}
 
 	return ret == 2 ? 0 : -EIO;
@@ -94,14 +94,14 @@ static int pcf8583_set_datetime(struct i2c_client *client, struct rtc_time *dt,
 	buf[0] = 0;
 	buf[1] = get_ctrl(client) | 0x80;
 	buf[2] = 0;
-	buf[3] = BIN2BCD(dt->tm_sec);
-	buf[4] = BIN2BCD(dt->tm_min);
-	buf[5] = BIN2BCD(dt->tm_hour);
+	buf[3] = bin2bcd(dt->tm_sec);
+	buf[4] = bin2bcd(dt->tm_min);
+	buf[5] = bin2bcd(dt->tm_hour);
 
 	if (datetoo) {
 		len = 8;
-		buf[6] = BIN2BCD(dt->tm_mday) | (dt->tm_year << 6);
-		buf[7] = BIN2BCD(dt->tm_mon + 1)  | (dt->tm_wday << 5);
+		buf[6] = bin2bcd(dt->tm_mday) | (dt->tm_year << 6);
+		buf[7] = bin2bcd(dt->tm_mon + 1)  | (dt->tm_wday << 5);
 	}
 
 	ret = i2c_master_send(client, (char *)buf, len);
diff --git a/drivers/rtc/rtc-r9701.c b/drivers/rtc/rtc-r9701.c
index 395985b339c9..42028f233bef 100644
--- a/drivers/rtc/rtc-r9701.c
+++ b/drivers/rtc/rtc-r9701.c
@@ -80,13 +80,13 @@ static int r9701_get_datetime(struct device *dev, struct rtc_time *dt)
 
 	memset(dt, 0, sizeof(*dt));
 
-	dt->tm_sec = BCD2BIN(buf[0]); /* RSECCNT */
-	dt->tm_min = BCD2BIN(buf[1]); /* RMINCNT */
-	dt->tm_hour = BCD2BIN(buf[2]); /* RHRCNT */
+	dt->tm_sec = bcd2bin(buf[0]); /* RSECCNT */
+	dt->tm_min = bcd2bin(buf[1]); /* RMINCNT */
+	dt->tm_hour = bcd2bin(buf[2]); /* RHRCNT */
 
-	dt->tm_mday = BCD2BIN(buf[3]); /* RDAYCNT */
-	dt->tm_mon = BCD2BIN(buf[4]) - 1; /* RMONCNT */
-	dt->tm_year = BCD2BIN(buf[5]) + 100; /* RYRCNT */
+	dt->tm_mday = bcd2bin(buf[3]); /* RDAYCNT */
+	dt->tm_mon = bcd2bin(buf[4]) - 1; /* RMONCNT */
+	dt->tm_year = bcd2bin(buf[5]) + 100; /* RYRCNT */
 
 	/* the rtc device may contain illegal values on power up
 	 * according to the data sheet. make sure they are valid.
@@ -103,12 +103,12 @@ static int r9701_set_datetime(struct device *dev, struct rtc_time *dt)
 	if (year >= 2100 || year < 2000)
 		return -EINVAL;
 
-	ret = write_reg(dev, RHRCNT, BIN2BCD(dt->tm_hour));
-	ret = ret ? ret : write_reg(dev, RMINCNT, BIN2BCD(dt->tm_min));
-	ret = ret ? ret : write_reg(dev, RSECCNT, BIN2BCD(dt->tm_sec));
-	ret = ret ? ret : write_reg(dev, RDAYCNT, BIN2BCD(dt->tm_mday));
-	ret = ret ? ret : write_reg(dev, RMONCNT, BIN2BCD(dt->tm_mon + 1));
-	ret = ret ? ret : write_reg(dev, RYRCNT, BIN2BCD(dt->tm_year - 100));
+	ret = write_reg(dev, RHRCNT, bin2bcd(dt->tm_hour));
+	ret = ret ? ret : write_reg(dev, RMINCNT, bin2bcd(dt->tm_min));
+	ret = ret ? ret : write_reg(dev, RSECCNT, bin2bcd(dt->tm_sec));
+	ret = ret ? ret : write_reg(dev, RDAYCNT, bin2bcd(dt->tm_mday));
+	ret = ret ? ret : write_reg(dev, RMONCNT, bin2bcd(dt->tm_mon + 1));
+	ret = ret ? ret : write_reg(dev, RYRCNT, bin2bcd(dt->tm_year - 100));
 	ret = ret ? ret : write_reg(dev, RWKCNT, 1 << dt->tm_wday);
 
 	return ret;
diff --git a/drivers/rtc/rtc-rs5c313.c b/drivers/rtc/rtc-rs5c313.c
index 1c14d4497c4d..e6ea3f5ee1eb 100644
--- a/drivers/rtc/rtc-rs5c313.c
+++ b/drivers/rtc/rtc-rs5c313.c
@@ -235,33 +235,33 @@ static int rs5c313_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
 	data = rs5c313_read_reg(RS5C313_ADDR_SEC);
 	data |= (rs5c313_read_reg(RS5C313_ADDR_SEC10) << 4);
-	tm->tm_sec = BCD2BIN(data);
+	tm->tm_sec = bcd2bin(data);
 
 	data = rs5c313_read_reg(RS5C313_ADDR_MIN);
 	data |= (rs5c313_read_reg(RS5C313_ADDR_MIN10) << 4);
-	tm->tm_min = BCD2BIN(data);
+	tm->tm_min = bcd2bin(data);
 
 	data = rs5c313_read_reg(RS5C313_ADDR_HOUR);
 	data |= (rs5c313_read_reg(RS5C313_ADDR_HOUR10) << 4);
-	tm->tm_hour = BCD2BIN(data);
+	tm->tm_hour = bcd2bin(data);
 
 	data = rs5c313_read_reg(RS5C313_ADDR_DAY);
 	data |= (rs5c313_read_reg(RS5C313_ADDR_DAY10) << 4);
-	tm->tm_mday = BCD2BIN(data);
+	tm->tm_mday = bcd2bin(data);
 
 	data = rs5c313_read_reg(RS5C313_ADDR_MON);
 	data |= (rs5c313_read_reg(RS5C313_ADDR_MON10) << 4);
-	tm->tm_mon = BCD2BIN(data) - 1;
+	tm->tm_mon = bcd2bin(data) - 1;
 
 	data = rs5c313_read_reg(RS5C313_ADDR_YEAR);
 	data |= (rs5c313_read_reg(RS5C313_ADDR_YEAR10) << 4);
-	tm->tm_year = BCD2BIN(data);
+	tm->tm_year = bcd2bin(data);
 
 	if (tm->tm_year < 70)
 		tm->tm_year += 100;
 
 	data = rs5c313_read_reg(RS5C313_ADDR_WEEK);
-	tm->tm_wday = BCD2BIN(data);
+	tm->tm_wday = bcd2bin(data);
 
 	RS5C313_CEDISABLE;
 	ndelay(700);		/* CE:L */
@@ -294,31 +294,31 @@ static int rs5c313_rtc_set_time(struct device *dev, struct rtc_time *tm)
 		}
 	}
 
-	data = BIN2BCD(tm->tm_sec);
+	data = bin2bcd(tm->tm_sec);
 	rs5c313_write_reg(RS5C313_ADDR_SEC, data);
 	rs5c313_write_reg(RS5C313_ADDR_SEC10, (data >> 4));
 
-	data = BIN2BCD(tm->tm_min);
+	data = bin2bcd(tm->tm_min);
 	rs5c313_write_reg(RS5C313_ADDR_MIN, data );
 	rs5c313_write_reg(RS5C313_ADDR_MIN10, (data >> 4));
 
-	data = BIN2BCD(tm->tm_hour);
+	data = bin2bcd(tm->tm_hour);
 	rs5c313_write_reg(RS5C313_ADDR_HOUR, data);
 	rs5c313_write_reg(RS5C313_ADDR_HOUR10, (data >> 4));
 
-	data = BIN2BCD(tm->tm_mday);
+	data = bin2bcd(tm->tm_mday);
 	rs5c313_write_reg(RS5C313_ADDR_DAY, data);
 	rs5c313_write_reg(RS5C313_ADDR_DAY10, (data>> 4));
 
-	data = BIN2BCD(tm->tm_mon + 1);
+	data = bin2bcd(tm->tm_mon + 1);
 	rs5c313_write_reg(RS5C313_ADDR_MON, data);
 	rs5c313_write_reg(RS5C313_ADDR_MON10, (data >> 4));
 
-	data = BIN2BCD(tm->tm_year % 100);
+	data = bin2bcd(tm->tm_year % 100);
 	rs5c313_write_reg(RS5C313_ADDR_YEAR, data);
 	rs5c313_write_reg(RS5C313_ADDR_YEAR10, (data >> 4));
 
-	data = BIN2BCD(tm->tm_wday);
+	data = bin2bcd(tm->tm_wday);
 	rs5c313_write_reg(RS5C313_ADDR_WEEK, data);
 
 	RS5C313_CEDISABLE;	/* CE:H */
diff --git a/drivers/rtc/rtc-rs5c348.c b/drivers/rtc/rtc-rs5c348.c
index 839462659afa..dd1e2bc7a472 100644
--- a/drivers/rtc/rtc-rs5c348.c
+++ b/drivers/rtc/rtc-rs5c348.c
@@ -74,20 +74,20 @@ rs5c348_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	txbuf[3] = 0;	/* dummy */
 	txbuf[4] = RS5C348_CMD_MW(RS5C348_REG_SECS); /* cmd, sec, ... */
 	txp = &txbuf[5];
-	txp[RS5C348_REG_SECS] = BIN2BCD(tm->tm_sec);
-	txp[RS5C348_REG_MINS] = BIN2BCD(tm->tm_min);
+	txp[RS5C348_REG_SECS] = bin2bcd(tm->tm_sec);
+	txp[RS5C348_REG_MINS] = bin2bcd(tm->tm_min);
 	if (pdata->rtc_24h) {
-		txp[RS5C348_REG_HOURS] = BIN2BCD(tm->tm_hour);
+		txp[RS5C348_REG_HOURS] = bin2bcd(tm->tm_hour);
 	} else {
 		/* hour 0 is AM12, noon is PM12 */
-		txp[RS5C348_REG_HOURS] = BIN2BCD((tm->tm_hour + 11) % 12 + 1) |
+		txp[RS5C348_REG_HOURS] = bin2bcd((tm->tm_hour + 11) % 12 + 1) |
 			(tm->tm_hour >= 12 ? RS5C348_BIT_PM : 0);
 	}
-	txp[RS5C348_REG_WDAY] = BIN2BCD(tm->tm_wday);
-	txp[RS5C348_REG_DAY] = BIN2BCD(tm->tm_mday);
-	txp[RS5C348_REG_MONTH] = BIN2BCD(tm->tm_mon + 1) |
+	txp[RS5C348_REG_WDAY] = bin2bcd(tm->tm_wday);
+	txp[RS5C348_REG_DAY] = bin2bcd(tm->tm_mday);
+	txp[RS5C348_REG_MONTH] = bin2bcd(tm->tm_mon + 1) |
 		(tm->tm_year >= 100 ? RS5C348_BIT_Y2K : 0);
-	txp[RS5C348_REG_YEAR] = BIN2BCD(tm->tm_year % 100);
+	txp[RS5C348_REG_YEAR] = bin2bcd(tm->tm_year % 100);
 	/* write in one transfer to avoid data inconsistency */
 	ret = spi_write_then_read(spi, txbuf, sizeof(txbuf), NULL, 0);
 	udelay(62);	/* Tcsr 62us */
@@ -116,20 +116,20 @@ rs5c348_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	if (ret < 0)
 		return ret;
 
-	tm->tm_sec = BCD2BIN(rxbuf[RS5C348_REG_SECS] & RS5C348_SECS_MASK);
-	tm->tm_min = BCD2BIN(rxbuf[RS5C348_REG_MINS] & RS5C348_MINS_MASK);
-	tm->tm_hour = BCD2BIN(rxbuf[RS5C348_REG_HOURS] & RS5C348_HOURS_MASK);
+	tm->tm_sec = bcd2bin(rxbuf[RS5C348_REG_SECS] & RS5C348_SECS_MASK);
+	tm->tm_min = bcd2bin(rxbuf[RS5C348_REG_MINS] & RS5C348_MINS_MASK);
+	tm->tm_hour = bcd2bin(rxbuf[RS5C348_REG_HOURS] & RS5C348_HOURS_MASK);
 	if (!pdata->rtc_24h) {
 		tm->tm_hour %= 12;
 		if (rxbuf[RS5C348_REG_HOURS] & RS5C348_BIT_PM)
 			tm->tm_hour += 12;
 	}
-	tm->tm_wday = BCD2BIN(rxbuf[RS5C348_REG_WDAY] & RS5C348_WDAY_MASK);
-	tm->tm_mday = BCD2BIN(rxbuf[RS5C348_REG_DAY] & RS5C348_DAY_MASK);
+	tm->tm_wday = bcd2bin(rxbuf[RS5C348_REG_WDAY] & RS5C348_WDAY_MASK);
+	tm->tm_mday = bcd2bin(rxbuf[RS5C348_REG_DAY] & RS5C348_DAY_MASK);
 	tm->tm_mon =
-		BCD2BIN(rxbuf[RS5C348_REG_MONTH] & RS5C348_MONTH_MASK) - 1;
+		bcd2bin(rxbuf[RS5C348_REG_MONTH] & RS5C348_MONTH_MASK) - 1;
 	/* year is 1900 + tm->tm_year */
-	tm->tm_year = BCD2BIN(rxbuf[RS5C348_REG_YEAR]) +
+	tm->tm_year = bcd2bin(rxbuf[RS5C348_REG_YEAR]) +
 		((rxbuf[RS5C348_REG_MONTH] & RS5C348_BIT_Y2K) ? 100 : 0);
 
 	if (rtc_valid_tm(tm) < 0) {
diff --git a/drivers/rtc/rtc-rs5c372.c b/drivers/rtc/rtc-rs5c372.c
index 8b561958fb1e..2f2c68d476da 100644
--- a/drivers/rtc/rtc-rs5c372.c
+++ b/drivers/rtc/rtc-rs5c372.c
@@ -148,9 +148,9 @@ static unsigned rs5c_reg2hr(struct rs5c372 *rs5c, unsigned reg)
 	unsigned	hour;
 
 	if (rs5c->time24)
-		return BCD2BIN(reg & 0x3f);
+		return bcd2bin(reg & 0x3f);
 
-	hour = BCD2BIN(reg & 0x1f);
+	hour = bcd2bin(reg & 0x1f);
 	if (hour == 12)
 		hour = 0;
 	if (reg & 0x20)
@@ -161,15 +161,15 @@ static unsigned rs5c_reg2hr(struct rs5c372 *rs5c, unsigned reg)
 static unsigned rs5c_hr2reg(struct rs5c372 *rs5c, unsigned hour)
 {
 	if (rs5c->time24)
-		return BIN2BCD(hour);
+		return bin2bcd(hour);
 
 	if (hour > 12)
-		return 0x20 | BIN2BCD(hour - 12);
+		return 0x20 | bin2bcd(hour - 12);
 	if (hour == 12)
-		return 0x20 | BIN2BCD(12);
+		return 0x20 | bin2bcd(12);
 	if (hour == 0)
-		return BIN2BCD(12);
-	return BIN2BCD(hour);
+		return bin2bcd(12);
+	return bin2bcd(hour);
 }
 
 static int rs5c372_get_datetime(struct i2c_client *client, struct rtc_time *tm)
@@ -180,18 +180,18 @@ static int rs5c372_get_datetime(struct i2c_client *client, struct rtc_time *tm)
 	if (status < 0)
 		return status;
 
-	tm->tm_sec = BCD2BIN(rs5c->regs[RS5C372_REG_SECS] & 0x7f);
-	tm->tm_min = BCD2BIN(rs5c->regs[RS5C372_REG_MINS] & 0x7f);
+	tm->tm_sec = bcd2bin(rs5c->regs[RS5C372_REG_SECS] & 0x7f);
+	tm->tm_min = bcd2bin(rs5c->regs[RS5C372_REG_MINS] & 0x7f);
 	tm->tm_hour = rs5c_reg2hr(rs5c, rs5c->regs[RS5C372_REG_HOURS]);
 
-	tm->tm_wday = BCD2BIN(rs5c->regs[RS5C372_REG_WDAY] & 0x07);
-	tm->tm_mday = BCD2BIN(rs5c->regs[RS5C372_REG_DAY] & 0x3f);
+	tm->tm_wday = bcd2bin(rs5c->regs[RS5C372_REG_WDAY] & 0x07);
+	tm->tm_mday = bcd2bin(rs5c->regs[RS5C372_REG_DAY] & 0x3f);
 
 	/* tm->tm_mon is zero-based */
-	tm->tm_mon = BCD2BIN(rs5c->regs[RS5C372_REG_MONTH] & 0x1f) - 1;
+	tm->tm_mon = bcd2bin(rs5c->regs[RS5C372_REG_MONTH] & 0x1f) - 1;
 
 	/* year is 1900 + tm->tm_year */
-	tm->tm_year = BCD2BIN(rs5c->regs[RS5C372_REG_YEAR]) + 100;
+	tm->tm_year = bcd2bin(rs5c->regs[RS5C372_REG_YEAR]) + 100;
 
 	dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d, "
 		"mday=%d, mon=%d, year=%d, wday=%d\n",
@@ -216,13 +216,13 @@ static int rs5c372_set_datetime(struct i2c_client *client, struct rtc_time *tm)
 		tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
 
 	addr   = RS5C_ADDR(RS5C372_REG_SECS);
-	buf[0] = BIN2BCD(tm->tm_sec);
-	buf[1] = BIN2BCD(tm->tm_min);
+	buf[0] = bin2bcd(tm->tm_sec);
+	buf[1] = bin2bcd(tm->tm_min);
 	buf[2] = rs5c_hr2reg(rs5c, tm->tm_hour);
-	buf[3] = BIN2BCD(tm->tm_wday);
-	buf[4] = BIN2BCD(tm->tm_mday);
-	buf[5] = BIN2BCD(tm->tm_mon + 1);
-	buf[6] = BIN2BCD(tm->tm_year - 100);
+	buf[3] = bin2bcd(tm->tm_wday);
+	buf[4] = bin2bcd(tm->tm_mday);
+	buf[5] = bin2bcd(tm->tm_mon + 1);
+	buf[6] = bin2bcd(tm->tm_year - 100);
 
 	if (i2c_smbus_write_i2c_block_data(client, addr, sizeof(buf), buf) < 0) {
 		dev_err(&client->dev, "%s: write error\n", __func__);
@@ -367,7 +367,7 @@ static int rs5c_read_alarm(struct device *dev, struct rtc_wkalrm *t)
 
 	/* report alarm time */
 	t->time.tm_sec = 0;
-	t->time.tm_min = BCD2BIN(rs5c->regs[RS5C_REG_ALARM_A_MIN] & 0x7f);
+	t->time.tm_min = bcd2bin(rs5c->regs[RS5C_REG_ALARM_A_MIN] & 0x7f);
 	t->time.tm_hour = rs5c_reg2hr(rs5c, rs5c->regs[RS5C_REG_ALARM_A_HOURS]);
 	t->time.tm_mday = -1;
 	t->time.tm_mon = -1;
@@ -413,7 +413,7 @@ static int rs5c_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 	}
 
 	/* set alarm */
-	buf[0] = BIN2BCD(t->time.tm_min);
+	buf[0] = bin2bcd(t->time.tm_min);
 	buf[1] = rs5c_hr2reg(rs5c, t->time.tm_hour);
 	buf[2] = 0x7f;	/* any/all days */
 
diff --git a/drivers/rtc/rtc-s35390a.c b/drivers/rtc/rtc-s35390a.c
index a6fa1f2f2ca6..def4d396d0b0 100644
--- a/drivers/rtc/rtc-s35390a.c
+++ b/drivers/rtc/rtc-s35390a.c
@@ -104,12 +104,12 @@ static int s35390a_disable_test_mode(struct s35390a *s35390a)
 static char s35390a_hr2reg(struct s35390a *s35390a, int hour)
 {
 	if (s35390a->twentyfourhour)
-		return BIN2BCD(hour);
+		return bin2bcd(hour);
 
 	if (hour < 12)
-		return BIN2BCD(hour);
+		return bin2bcd(hour);
 
-	return 0x40 | BIN2BCD(hour - 12);
+	return 0x40 | bin2bcd(hour - 12);
 }
 
 static int s35390a_reg2hr(struct s35390a *s35390a, char reg)
@@ -117,9 +117,9 @@ static int s35390a_reg2hr(struct s35390a *s35390a, char reg)
 	unsigned hour;
 
 	if (s35390a->twentyfourhour)
-		return BCD2BIN(reg & 0x3f);
+		return bcd2bin(reg & 0x3f);
 
-	hour = BCD2BIN(reg & 0x3f);
+	hour = bcd2bin(reg & 0x3f);
 	if (reg & 0x40)
 		hour += 12;
 
@@ -137,13 +137,13 @@ static int s35390a_set_datetime(struct i2c_client *client, struct rtc_time *tm)
 		tm->tm_min, tm->tm_hour, tm->tm_mday, tm->tm_mon, tm->tm_year,
 		tm->tm_wday);
 
-	buf[S35390A_BYTE_YEAR] = BIN2BCD(tm->tm_year - 100);
-	buf[S35390A_BYTE_MONTH] = BIN2BCD(tm->tm_mon + 1);
-	buf[S35390A_BYTE_DAY] = BIN2BCD(tm->tm_mday);
-	buf[S35390A_BYTE_WDAY] = BIN2BCD(tm->tm_wday);
+	buf[S35390A_BYTE_YEAR] = bin2bcd(tm->tm_year - 100);
+	buf[S35390A_BYTE_MONTH] = bin2bcd(tm->tm_mon + 1);
+	buf[S35390A_BYTE_DAY] = bin2bcd(tm->tm_mday);
+	buf[S35390A_BYTE_WDAY] = bin2bcd(tm->tm_wday);
 	buf[S35390A_BYTE_HOURS] = s35390a_hr2reg(s35390a, tm->tm_hour);
-	buf[S35390A_BYTE_MINS] = BIN2BCD(tm->tm_min);
-	buf[S35390A_BYTE_SECS] = BIN2BCD(tm->tm_sec);
+	buf[S35390A_BYTE_MINS] = bin2bcd(tm->tm_min);
+	buf[S35390A_BYTE_SECS] = bin2bcd(tm->tm_sec);
 
 	/* This chip expects the bits of each byte to be in reverse order */
 	for (i = 0; i < 7; ++i)
@@ -168,13 +168,13 @@ static int s35390a_get_datetime(struct i2c_client *client, struct rtc_time *tm)
 	for (i = 0; i < 7; ++i)
 		buf[i] = bitrev8(buf[i]);
 
-	tm->tm_sec = BCD2BIN(buf[S35390A_BYTE_SECS]);
-	tm->tm_min = BCD2BIN(buf[S35390A_BYTE_MINS]);
+	tm->tm_sec = bcd2bin(buf[S35390A_BYTE_SECS]);
+	tm->tm_min = bcd2bin(buf[S35390A_BYTE_MINS]);
 	tm->tm_hour = s35390a_reg2hr(s35390a, buf[S35390A_BYTE_HOURS]);
-	tm->tm_wday = BCD2BIN(buf[S35390A_BYTE_WDAY]);
-	tm->tm_mday = BCD2BIN(buf[S35390A_BYTE_DAY]);
-	tm->tm_mon = BCD2BIN(buf[S35390A_BYTE_MONTH]) - 1;
-	tm->tm_year = BCD2BIN(buf[S35390A_BYTE_YEAR]) + 100;
+	tm->tm_wday = bcd2bin(buf[S35390A_BYTE_WDAY]);
+	tm->tm_mday = bcd2bin(buf[S35390A_BYTE_DAY]);
+	tm->tm_mon = bcd2bin(buf[S35390A_BYTE_MONTH]) - 1;
+	tm->tm_year = bcd2bin(buf[S35390A_BYTE_YEAR]) + 100;
 
 	dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d, mday=%d, "
 		"mon=%d, year=%d, wday=%d\n", __func__, tm->tm_sec,
diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index e7d19b6c265a..910bc704939c 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -134,12 +134,12 @@ static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
 		 rtc_tm->tm_year, rtc_tm->tm_mon, rtc_tm->tm_mday,
 		 rtc_tm->tm_hour, rtc_tm->tm_min, rtc_tm->tm_sec);
 
-	BCD_TO_BIN(rtc_tm->tm_sec);
-	BCD_TO_BIN(rtc_tm->tm_min);
-	BCD_TO_BIN(rtc_tm->tm_hour);
-	BCD_TO_BIN(rtc_tm->tm_mday);
-	BCD_TO_BIN(rtc_tm->tm_mon);
-	BCD_TO_BIN(rtc_tm->tm_year);
+	rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
+	rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
+	rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
+	rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
+	rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
+	rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
 
 	rtc_tm->tm_year += 100;
 	rtc_tm->tm_mon -= 1;
@@ -163,12 +163,12 @@ static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm)
 		return -EINVAL;
 	}
 
-	writeb(BIN2BCD(tm->tm_sec),  base + S3C2410_RTCSEC);
-	writeb(BIN2BCD(tm->tm_min),  base + S3C2410_RTCMIN);
-	writeb(BIN2BCD(tm->tm_hour), base + S3C2410_RTCHOUR);
-	writeb(BIN2BCD(tm->tm_mday), base + S3C2410_RTCDATE);
-	writeb(BIN2BCD(tm->tm_mon + 1), base + S3C2410_RTCMON);
-	writeb(BIN2BCD(year), base + S3C2410_RTCYEAR);
+	writeb(bin2bcd(tm->tm_sec),  base + S3C2410_RTCSEC);
+	writeb(bin2bcd(tm->tm_min),  base + S3C2410_RTCMIN);
+	writeb(bin2bcd(tm->tm_hour), base + S3C2410_RTCHOUR);
+	writeb(bin2bcd(tm->tm_mday), base + S3C2410_RTCDATE);
+	writeb(bin2bcd(tm->tm_mon + 1), base + S3C2410_RTCMON);
+	writeb(bin2bcd(year), base + S3C2410_RTCYEAR);
 
 	return 0;
 }
@@ -199,34 +199,34 @@ static int s3c_rtc_getalarm(struct device *dev, struct rtc_wkalrm *alrm)
 	/* decode the alarm enable field */
 
 	if (alm_en & S3C2410_RTCALM_SECEN)
-		BCD_TO_BIN(alm_tm->tm_sec);
+		alm_tm->tm_sec = bcd2bin(alm_tm->tm_sec);
 	else
 		alm_tm->tm_sec = 0xff;
 
 	if (alm_en & S3C2410_RTCALM_MINEN)
-		BCD_TO_BIN(alm_tm->tm_min);
+		alm_tm->tm_min = bcd2bin(alm_tm->tm_min);
 	else
 		alm_tm->tm_min = 0xff;
 
 	if (alm_en & S3C2410_RTCALM_HOUREN)
-		BCD_TO_BIN(alm_tm->tm_hour);
+		alm_tm->tm_hour = bcd2bin(alm_tm->tm_hour);
 	else
 		alm_tm->tm_hour = 0xff;
 
 	if (alm_en & S3C2410_RTCALM_DAYEN)
-		BCD_TO_BIN(alm_tm->tm_mday);
+		alm_tm->tm_mday = bcd2bin(alm_tm->tm_mday);
 	else
 		alm_tm->tm_mday = 0xff;
 
 	if (alm_en & S3C2410_RTCALM_MONEN) {
-		BCD_TO_BIN(alm_tm->tm_mon);
+		alm_tm->tm_mon = bcd2bin(alm_tm->tm_mon);
 		alm_tm->tm_mon -= 1;
 	} else {
 		alm_tm->tm_mon = 0xff;
 	}
 
 	if (alm_en & S3C2410_RTCALM_YEAREN)
-		BCD_TO_BIN(alm_tm->tm_year);
+		alm_tm->tm_year = bcd2bin(alm_tm->tm_year);
 	else
 		alm_tm->tm_year = 0xffff;
 
@@ -250,17 +250,17 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
 
 	if (tm->tm_sec < 60 && tm->tm_sec >= 0) {
 		alrm_en |= S3C2410_RTCALM_SECEN;
-		writeb(BIN2BCD(tm->tm_sec), base + S3C2410_ALMSEC);
+		writeb(bin2bcd(tm->tm_sec), base + S3C2410_ALMSEC);
 	}
 
 	if (tm->tm_min < 60 && tm->tm_min >= 0) {
 		alrm_en |= S3C2410_RTCALM_MINEN;
-		writeb(BIN2BCD(tm->tm_min), base + S3C2410_ALMMIN);
+		writeb(bin2bcd(tm->tm_min), base + S3C2410_ALMMIN);
 	}
 
 	if (tm->tm_hour < 24 && tm->tm_hour >= 0) {
 		alrm_en |= S3C2410_RTCALM_HOUREN;
-		writeb(BIN2BCD(tm->tm_hour), base + S3C2410_ALMHOUR);
+		writeb(bin2bcd(tm->tm_hour), base + S3C2410_ALMHOUR);
 	}
 
 	pr_debug("setting S3C2410_RTCALM to %08x\n", alrm_en);
diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c
index fcead4c4cd1f..1ec297128ea6 100644
--- a/drivers/rtc/rtc-sh.c
+++ b/drivers/rtc/rtc-sh.c
@@ -324,23 +324,23 @@ static int sh_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
 		sec128 = readb(rtc->regbase + R64CNT);
 
-		tm->tm_sec	= BCD2BIN(readb(rtc->regbase + RSECCNT));
-		tm->tm_min	= BCD2BIN(readb(rtc->regbase + RMINCNT));
-		tm->tm_hour	= BCD2BIN(readb(rtc->regbase + RHRCNT));
-		tm->tm_wday	= BCD2BIN(readb(rtc->regbase + RWKCNT));
-		tm->tm_mday	= BCD2BIN(readb(rtc->regbase + RDAYCNT));
-		tm->tm_mon	= BCD2BIN(readb(rtc->regbase + RMONCNT)) - 1;
+		tm->tm_sec	= bcd2bin(readb(rtc->regbase + RSECCNT));
+		tm->tm_min	= bcd2bin(readb(rtc->regbase + RMINCNT));
+		tm->tm_hour	= bcd2bin(readb(rtc->regbase + RHRCNT));
+		tm->tm_wday	= bcd2bin(readb(rtc->regbase + RWKCNT));
+		tm->tm_mday	= bcd2bin(readb(rtc->regbase + RDAYCNT));
+		tm->tm_mon	= bcd2bin(readb(rtc->regbase + RMONCNT)) - 1;
 
 		if (rtc->capabilities & RTC_CAP_4_DIGIT_YEAR) {
 			yr  = readw(rtc->regbase + RYRCNT);
-			yr100 = BCD2BIN(yr >> 8);
+			yr100 = bcd2bin(yr >> 8);
 			yr &= 0xff;
 		} else {
 			yr  = readb(rtc->regbase + RYRCNT);
-			yr100 = BCD2BIN((yr == 0x99) ? 0x19 : 0x20);
+			yr100 = bcd2bin((yr == 0x99) ? 0x19 : 0x20);
 		}
 
-		tm->tm_year = (yr100 * 100 + BCD2BIN(yr)) - 1900;
+		tm->tm_year = (yr100 * 100 + bcd2bin(yr)) - 1900;
 
 		sec2 = readb(rtc->regbase + R64CNT);
 		cf_bit = readb(rtc->regbase + RCR1) & RCR1_CF;
@@ -382,20 +382,20 @@ static int sh_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	tmp &= ~RCR2_START;
 	writeb(tmp, rtc->regbase + RCR2);
 
-	writeb(BIN2BCD(tm->tm_sec),  rtc->regbase + RSECCNT);
-	writeb(BIN2BCD(tm->tm_min),  rtc->regbase + RMINCNT);
-	writeb(BIN2BCD(tm->tm_hour), rtc->regbase + RHRCNT);
-	writeb(BIN2BCD(tm->tm_wday), rtc->regbase + RWKCNT);
-	writeb(BIN2BCD(tm->tm_mday), rtc->regbase + RDAYCNT);
-	writeb(BIN2BCD(tm->tm_mon + 1), rtc->regbase + RMONCNT);
+	writeb(bin2bcd(tm->tm_sec),  rtc->regbase + RSECCNT);
+	writeb(bin2bcd(tm->tm_min),  rtc->regbase + RMINCNT);
+	writeb(bin2bcd(tm->tm_hour), rtc->regbase + RHRCNT);
+	writeb(bin2bcd(tm->tm_wday), rtc->regbase + RWKCNT);
+	writeb(bin2bcd(tm->tm_mday), rtc->regbase + RDAYCNT);
+	writeb(bin2bcd(tm->tm_mon + 1), rtc->regbase + RMONCNT);
 
 	if (rtc->capabilities & RTC_CAP_4_DIGIT_YEAR) {
-		year = (BIN2BCD((tm->tm_year + 1900) / 100) << 8) |
-			BIN2BCD(tm->tm_year % 100);
+		year = (bin2bcd((tm->tm_year + 1900) / 100) << 8) |
+			bin2bcd(tm->tm_year % 100);
 		writew(year, rtc->regbase + RYRCNT);
 	} else {
 		year = tm->tm_year % 100;
-		writeb(BIN2BCD(year), rtc->regbase + RYRCNT);
+		writeb(bin2bcd(year), rtc->regbase + RYRCNT);
 	}
 
 	/* Start RTC */
@@ -417,7 +417,7 @@ static inline int sh_rtc_read_alarm_value(struct sh_rtc *rtc, int reg_off)
 	byte = readb(rtc->regbase + reg_off);
 	if (byte & AR_ENB) {
 		byte &= ~AR_ENB;	/* strip the enable bit */
-		value = BCD2BIN(byte);
+		value = bcd2bin(byte);
 	}
 
 	return value;
@@ -455,7 +455,7 @@ static inline void sh_rtc_write_alarm_value(struct sh_rtc *rtc,
 	if (value < 0)
 		writeb(0, rtc->regbase + reg_off);
 	else
-		writeb(BIN2BCD(value) | AR_ENB,  rtc->regbase + reg_off);
+		writeb(bin2bcd(value) | AR_ENB,  rtc->regbase + reg_off);
 }
 
 static int sh_rtc_check_alarm(struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-stk17ta8.c b/drivers/rtc/rtc-stk17ta8.c
index 9a7e920315fa..0947f8c23957 100644
--- a/drivers/rtc/rtc-stk17ta8.c
+++ b/drivers/rtc/rtc-stk17ta8.c
@@ -82,14 +82,14 @@ static int stk17ta8_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	flags = readb(pdata->ioaddr + RTC_FLAGS);
 	writeb(flags | RTC_WRITE, pdata->ioaddr + RTC_FLAGS);
 
-	writeb(BIN2BCD(tm->tm_year % 100), ioaddr + RTC_YEAR);
-	writeb(BIN2BCD(tm->tm_mon + 1), ioaddr + RTC_MONTH);
-	writeb(BIN2BCD(tm->tm_wday) & RTC_DAY_MASK, ioaddr + RTC_DAY);
-	writeb(BIN2BCD(tm->tm_mday), ioaddr + RTC_DATE);
-	writeb(BIN2BCD(tm->tm_hour), ioaddr + RTC_HOURS);
-	writeb(BIN2BCD(tm->tm_min), ioaddr + RTC_MINUTES);
-	writeb(BIN2BCD(tm->tm_sec) & RTC_SECONDS_MASK, ioaddr + RTC_SECONDS);
-	writeb(BIN2BCD((tm->tm_year + 1900) / 100), ioaddr + RTC_CENTURY);
+	writeb(bin2bcd(tm->tm_year % 100), ioaddr + RTC_YEAR);
+	writeb(bin2bcd(tm->tm_mon + 1), ioaddr + RTC_MONTH);
+	writeb(bin2bcd(tm->tm_wday) & RTC_DAY_MASK, ioaddr + RTC_DAY);
+	writeb(bin2bcd(tm->tm_mday), ioaddr + RTC_DATE);
+	writeb(bin2bcd(tm->tm_hour), ioaddr + RTC_HOURS);
+	writeb(bin2bcd(tm->tm_min), ioaddr + RTC_MINUTES);
+	writeb(bin2bcd(tm->tm_sec) & RTC_SECONDS_MASK, ioaddr + RTC_SECONDS);
+	writeb(bin2bcd((tm->tm_year + 1900) / 100), ioaddr + RTC_CENTURY);
 
 	writeb(flags & ~RTC_WRITE, pdata->ioaddr + RTC_FLAGS);
 	return 0;
@@ -120,14 +120,14 @@ static int stk17ta8_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	year = readb(ioaddr + RTC_YEAR);
 	century = readb(ioaddr + RTC_CENTURY);
 	writeb(flags & ~RTC_READ, ioaddr + RTC_FLAGS);
-	tm->tm_sec = BCD2BIN(second);
-	tm->tm_min = BCD2BIN(minute);
-	tm->tm_hour = BCD2BIN(hour);
-	tm->tm_mday = BCD2BIN(day);
-	tm->tm_wday = BCD2BIN(week);
-	tm->tm_mon = BCD2BIN(month) - 1;
+	tm->tm_sec = bcd2bin(second);
+	tm->tm_min = bcd2bin(minute);
+	tm->tm_hour = bcd2bin(hour);
+	tm->tm_mday = bcd2bin(day);
+	tm->tm_wday = bcd2bin(week);
+	tm->tm_mon = bcd2bin(month) - 1;
 	/* year is 1900 + tm->tm_year */
-	tm->tm_year = BCD2BIN(year) + BCD2BIN(century) * 100 - 1900;
+	tm->tm_year = bcd2bin(year) + bcd2bin(century) * 100 - 1900;
 
 	if (rtc_valid_tm(tm) < 0) {
 		dev_err(dev, "retrieved date/time is not valid.\n");
@@ -148,16 +148,16 @@ static void stk17ta8_rtc_update_alarm(struct rtc_plat_data *pdata)
 	writeb(flags | RTC_WRITE, ioaddr + RTC_FLAGS);
 
 	writeb(pdata->alrm_mday < 0 || (pdata->irqen & RTC_UF) ?
-	       0x80 : BIN2BCD(pdata->alrm_mday),
+	       0x80 : bin2bcd(pdata->alrm_mday),
 	       ioaddr + RTC_DATE_ALARM);
 	writeb(pdata->alrm_hour < 0 || (pdata->irqen & RTC_UF) ?
-	       0x80 : BIN2BCD(pdata->alrm_hour),
+	       0x80 : bin2bcd(pdata->alrm_hour),
 	       ioaddr + RTC_HOURS_ALARM);
 	writeb(pdata->alrm_min < 0 || (pdata->irqen & RTC_UF) ?
-	       0x80 : BIN2BCD(pdata->alrm_min),
+	       0x80 : bin2bcd(pdata->alrm_min),
 	       ioaddr + RTC_MINUTES_ALARM);
 	writeb(pdata->alrm_sec < 0 || (pdata->irqen & RTC_UF) ?
-	       0x80 : BIN2BCD(pdata->alrm_sec),
+	       0x80 : bin2bcd(pdata->alrm_sec),
 	       ioaddr + RTC_SECONDS_ALARM);
 	writeb(pdata->irqen ? RTC_INTS_AIE : 0, ioaddr + RTC_INTERRUPTS);
 	readb(ioaddr + RTC_FLAGS);	/* clear interrupts */
diff --git a/drivers/rtc/rtc-v3020.c b/drivers/rtc/rtc-v3020.c
index 10025d840268..14d4f036a768 100644
--- a/drivers/rtc/rtc-v3020.c
+++ b/drivers/rtc/rtc-v3020.c
@@ -92,19 +92,19 @@ static int v3020_read_time(struct device *dev, struct rtc_time *dt)
 
 	/* ...and then read constant values. */
 	tmp = v3020_get_reg(chip, V3020_SECONDS);
-	dt->tm_sec	= BCD2BIN(tmp);
+	dt->tm_sec	= bcd2bin(tmp);
 	tmp = v3020_get_reg(chip, V3020_MINUTES);
-	dt->tm_min	= BCD2BIN(tmp);
+	dt->tm_min	= bcd2bin(tmp);
 	tmp = v3020_get_reg(chip, V3020_HOURS);
-	dt->tm_hour	= BCD2BIN(tmp);
+	dt->tm_hour	= bcd2bin(tmp);
 	tmp = v3020_get_reg(chip, V3020_MONTH_DAY);
-	dt->tm_mday	= BCD2BIN(tmp);
+	dt->tm_mday	= bcd2bin(tmp);
 	tmp = v3020_get_reg(chip, V3020_MONTH);
-	dt->tm_mon    = BCD2BIN(tmp) - 1;
+	dt->tm_mon    = bcd2bin(tmp) - 1;
 	tmp = v3020_get_reg(chip, V3020_WEEK_DAY);
-	dt->tm_wday	= BCD2BIN(tmp);
+	dt->tm_wday	= bcd2bin(tmp);
 	tmp = v3020_get_reg(chip, V3020_YEAR);
-	dt->tm_year = BCD2BIN(tmp)+100;
+	dt->tm_year = bcd2bin(tmp)+100;
 
 #ifdef DEBUG
 	printk("\n%s : Read RTC values\n",__func__);
@@ -136,13 +136,13 @@ static int v3020_set_time(struct device *dev, struct rtc_time *dt)
 #endif
 
 	/* Write all the values to ram... */
-	v3020_set_reg(chip, V3020_SECONDS, 	BIN2BCD(dt->tm_sec));
-	v3020_set_reg(chip, V3020_MINUTES, 	BIN2BCD(dt->tm_min));
-	v3020_set_reg(chip, V3020_HOURS, 	BIN2BCD(dt->tm_hour));
-	v3020_set_reg(chip, V3020_MONTH_DAY,	BIN2BCD(dt->tm_mday));
-	v3020_set_reg(chip, V3020_MONTH,     BIN2BCD(dt->tm_mon + 1));
-	v3020_set_reg(chip, V3020_WEEK_DAY, 	BIN2BCD(dt->tm_wday));
-	v3020_set_reg(chip, V3020_YEAR, 	BIN2BCD(dt->tm_year % 100));
+	v3020_set_reg(chip, V3020_SECONDS, 	bin2bcd(dt->tm_sec));
+	v3020_set_reg(chip, V3020_MINUTES, 	bin2bcd(dt->tm_min));
+	v3020_set_reg(chip, V3020_HOURS, 	bin2bcd(dt->tm_hour));
+	v3020_set_reg(chip, V3020_MONTH_DAY,	bin2bcd(dt->tm_mday));
+	v3020_set_reg(chip, V3020_MONTH,     bin2bcd(dt->tm_mon + 1));
+	v3020_set_reg(chip, V3020_WEEK_DAY, 	bin2bcd(dt->tm_wday));
+	v3020_set_reg(chip, V3020_YEAR, 	bin2bcd(dt->tm_year % 100));
 
 	/* ...and set the clock. */
 	v3020_set_reg(chip, V3020_CMD_RAM2CLOCK, 0);
diff --git a/drivers/rtc/rtc-x1205.c b/drivers/rtc/rtc-x1205.c
index 7dcfba1bbfe1..310c10795e9a 100644
--- a/drivers/rtc/rtc-x1205.c
+++ b/drivers/rtc/rtc-x1205.c
@@ -118,13 +118,13 @@ static int x1205_get_datetime(struct i2c_client *client, struct rtc_time *tm,
 		for (i = 0; i <= 4; i++)
 			buf[i] &= 0x7F;
 
-	tm->tm_sec = BCD2BIN(buf[CCR_SEC]);
-	tm->tm_min = BCD2BIN(buf[CCR_MIN]);
-	tm->tm_hour = BCD2BIN(buf[CCR_HOUR] & 0x3F); /* hr is 0-23 */
-	tm->tm_mday = BCD2BIN(buf[CCR_MDAY]);
-	tm->tm_mon = BCD2BIN(buf[CCR_MONTH]) - 1; /* mon is 0-11 */
-	tm->tm_year = BCD2BIN(buf[CCR_YEAR])
-			+ (BCD2BIN(buf[CCR_Y2K]) * 100) - 1900;
+	tm->tm_sec = bcd2bin(buf[CCR_SEC]);
+	tm->tm_min = bcd2bin(buf[CCR_MIN]);
+	tm->tm_hour = bcd2bin(buf[CCR_HOUR] & 0x3F); /* hr is 0-23 */
+	tm->tm_mday = bcd2bin(buf[CCR_MDAY]);
+	tm->tm_mon = bcd2bin(buf[CCR_MONTH]) - 1; /* mon is 0-11 */
+	tm->tm_year = bcd2bin(buf[CCR_YEAR])
+			+ (bcd2bin(buf[CCR_Y2K]) * 100) - 1900;
 	tm->tm_wday = buf[CCR_WDAY];
 
 	dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d, "
@@ -174,11 +174,11 @@ static int x1205_set_datetime(struct i2c_client *client, struct rtc_time *tm,
 		__func__,
 		tm->tm_sec, tm->tm_min, tm->tm_hour);
 
-	buf[CCR_SEC] = BIN2BCD(tm->tm_sec);
-	buf[CCR_MIN] = BIN2BCD(tm->tm_min);
+	buf[CCR_SEC] = bin2bcd(tm->tm_sec);
+	buf[CCR_MIN] = bin2bcd(tm->tm_min);
 
 	/* set hour and 24hr bit */
-	buf[CCR_HOUR] = BIN2BCD(tm->tm_hour) | X1205_HR_MIL;
+	buf[CCR_HOUR] = bin2bcd(tm->tm_hour) | X1205_HR_MIL;
 
 	/* should we also set the date? */
 	if (datetoo) {
@@ -187,15 +187,15 @@ static int x1205_set_datetime(struct i2c_client *client, struct rtc_time *tm,
 			__func__,
 			tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
 
-		buf[CCR_MDAY] = BIN2BCD(tm->tm_mday);
+		buf[CCR_MDAY] = bin2bcd(tm->tm_mday);
 
 		/* month, 1 - 12 */
-		buf[CCR_MONTH] = BIN2BCD(tm->tm_mon + 1);
+		buf[CCR_MONTH] = bin2bcd(tm->tm_mon + 1);
 
 		/* year, since the rtc epoch*/
-		buf[CCR_YEAR] = BIN2BCD(tm->tm_year % 100);
+		buf[CCR_YEAR] = bin2bcd(tm->tm_year % 100);
 		buf[CCR_WDAY] = tm->tm_wday & 0x07;
-		buf[CCR_Y2K] = BIN2BCD(tm->tm_year / 100);
+		buf[CCR_Y2K] = bin2bcd(tm->tm_year / 100);
 	}
 
 	/* If writing alarm registers, set compare bits on registers 0-4 */
@@ -437,7 +437,7 @@ static int x1205_validate_client(struct i2c_client *client)
 			return -EIO;
 		}
 
-		value = BCD2BIN(reg & probe_limits_pattern[i].mask);
+		value = bcd2bin(reg & probe_limits_pattern[i].mask);
 
 		if (value > probe_limits_pattern[i].max ||
 			value < probe_limits_pattern[i].min) {
-- 
GitLab


From 357c6e63590895dc87cc9300f5a1c27544ea69e8 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Sat, 18 Oct 2008 20:28:42 -0700
Subject: [PATCH 822/895] rtc: use bcd2bin/bin2bcd

Change various rtc related code to use the new bcd2bin/bin2bcd functions
instead of the obsolete BCD_TO_BIN/BIN_TO_BCD/BCD2BIN/BIN2BCD macros.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Acked-by: Alessandro Zummo <a.zummo@towertech.it>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/rtc.c     | 20 ++++++++++----------
 drivers/char/ds1286.c     | 34 ++++++++++++++++-----------------
 drivers/char/ds1302.c     | 24 +++++++++++------------
 drivers/char/ip27-rtc.c   | 24 +++++++++++------------
 drivers/char/rtc.c        | 40 +++++++++++++++++++--------------------
 include/asm-generic/rtc.h | 24 +++++++++++------------
 6 files changed, 83 insertions(+), 83 deletions(-)

diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 0a23b5795b25..dd6f2b71561b 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -52,7 +52,7 @@ int mach_set_rtc_mmss(unsigned long nowtime)
 
 	cmos_minutes = CMOS_READ(RTC_MINUTES);
 	if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-		BCD_TO_BIN(cmos_minutes);
+		cmos_minutes = bcd2bin(cmos_minutes);
 
 	/*
 	 * since we're only adjusting minutes and seconds,
@@ -69,8 +69,8 @@ int mach_set_rtc_mmss(unsigned long nowtime)
 
 	if (abs(real_minutes - cmos_minutes) < 30) {
 		if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-			BIN_TO_BCD(real_seconds);
-			BIN_TO_BCD(real_minutes);
+			real_seconds = bin2bcd(real_seconds);
+			real_minutes = bin2bcd(real_minutes);
 		}
 		CMOS_WRITE(real_seconds,RTC_SECONDS);
 		CMOS_WRITE(real_minutes,RTC_MINUTES);
@@ -124,16 +124,16 @@ unsigned long mach_get_cmos_time(void)
 	WARN_ON_ONCE(RTC_ALWAYS_BCD && (status & RTC_DM_BINARY));
 
 	if (RTC_ALWAYS_BCD || !(status & RTC_DM_BINARY)) {
-		BCD_TO_BIN(sec);
-		BCD_TO_BIN(min);
-		BCD_TO_BIN(hour);
-		BCD_TO_BIN(day);
-		BCD_TO_BIN(mon);
-		BCD_TO_BIN(year);
+		sec = bcd2bin(sec);
+		min = bcd2bin(min);
+		hour = bcd2bin(hour);
+		day = bcd2bin(day);
+		mon = bcd2bin(mon);
+		year = bcd2bin(year);
 	}
 
 	if (century) {
-		BCD_TO_BIN(century);
+		century = bcd2bin(century);
 		year += century * 100;
 		printk(KERN_INFO "Extended CMOS year: %d\n", century * 100);
 	} else
diff --git a/drivers/char/ds1286.c b/drivers/char/ds1286.c
index 5329d482b582..0a826d7be10e 100644
--- a/drivers/char/ds1286.c
+++ b/drivers/char/ds1286.c
@@ -210,8 +210,8 @@ static int ds1286_ioctl(struct inode *inode, struct file *file,
 		if (sec != 0)
 			return -EINVAL;
 
-		min = BIN2BCD(min);
-		min = BIN2BCD(hrs);
+		min = bin2bcd(min);
+		min = bin2bcd(hrs);
 
 		spin_lock(&ds1286_lock);
 		rtc_write(hrs, RTC_HOURS_ALARM);
@@ -353,7 +353,7 @@ static int ds1286_proc_output(char *buf)
 
 	ds1286_get_time(&tm);
 	hundredth = rtc_read(RTC_HUNDREDTH_SECOND);
-	BCD_TO_BIN(hundredth);
+	hundredth = bcd2bin(hundredth);
 
 	p += sprintf(p,
 	             "rtc_time\t: %02d:%02d:%02d.%02d\n"
@@ -477,12 +477,12 @@ static void ds1286_get_time(struct rtc_time *rtc_tm)
 	rtc_write(save_control, RTC_CMD);
 	spin_unlock_irqrestore(&ds1286_lock, flags);
 
-	BCD_TO_BIN(rtc_tm->tm_sec);
-	BCD_TO_BIN(rtc_tm->tm_min);
-	BCD_TO_BIN(rtc_tm->tm_hour);
-	BCD_TO_BIN(rtc_tm->tm_mday);
-	BCD_TO_BIN(rtc_tm->tm_mon);
-	BCD_TO_BIN(rtc_tm->tm_year);
+	rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
+	rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
+	rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
+	rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
+	rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
+	rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
 
 	/*
 	 * Account for differences between how the RTC uses the values
@@ -531,12 +531,12 @@ static int ds1286_set_time(struct rtc_time *rtc_tm)
 	if (yrs >= 100)
 		yrs -= 100;
 
-	BIN_TO_BCD(sec);
-	BIN_TO_BCD(min);
-	BIN_TO_BCD(hrs);
-	BIN_TO_BCD(day);
-	BIN_TO_BCD(mon);
-	BIN_TO_BCD(yrs);
+	sec = bin2bcd(sec);
+	min = bin2bcd(min);
+	hrs = bin2bcd(hrs);
+	day = bin2bcd(day);
+	mon = bin2bcd(mon);
+	yrs = bin2bcd(yrs);
 
 	spin_lock_irqsave(&ds1286_lock, flags);
 	save_control = rtc_read(RTC_CMD);
@@ -572,8 +572,8 @@ static void ds1286_get_alm_time(struct rtc_time *alm_tm)
 	cmd = rtc_read(RTC_CMD);
 	spin_unlock_irqrestore(&ds1286_lock, flags);
 
-	BCD_TO_BIN(alm_tm->tm_min);
-	BCD_TO_BIN(alm_tm->tm_hour);
+	alm_tm->tm_min = bcd2bin(alm_tm->tm_min);
+	alm_tm->tm_hour = bcd2bin(alm_tm->tm_hour);
 	alm_tm->tm_sec = 0;
 }
 
diff --git a/drivers/char/ds1302.c b/drivers/char/ds1302.c
index c5e67a623951..170693c93c73 100644
--- a/drivers/char/ds1302.c
+++ b/drivers/char/ds1302.c
@@ -131,12 +131,12 @@ get_rtc_time(struct rtc_time *rtc_tm)
 
 	local_irq_restore(flags);
 
-	BCD_TO_BIN(rtc_tm->tm_sec);
-	BCD_TO_BIN(rtc_tm->tm_min);
-	BCD_TO_BIN(rtc_tm->tm_hour);
-	BCD_TO_BIN(rtc_tm->tm_mday);
-	BCD_TO_BIN(rtc_tm->tm_mon);
-	BCD_TO_BIN(rtc_tm->tm_year);
+	rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
+	rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
+	rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
+	rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
+	rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
+	rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
 
 	/*
 	 * Account for differences between how the RTC uses the values
@@ -211,12 +211,12 @@ static long rtc_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 			else
 				yrs -= 1900;	/* RTC (70, 71, ... 99) */
 
-			BIN_TO_BCD(sec);
-			BIN_TO_BCD(min);
-			BIN_TO_BCD(hrs);
-			BIN_TO_BCD(day);
-			BIN_TO_BCD(mon);
-			BIN_TO_BCD(yrs);
+			sec = bin2bcd(sec);
+			min = bin2bcd(min);
+			hrs = bin2bcd(hrs);
+			day = bin2bcd(day);
+			mon = bin2bcd(mon);
+			yrs = bin2bcd(yrs);
 
 			lock_kernel();
 			local_irq_save(flags);
diff --git a/drivers/char/ip27-rtc.c b/drivers/char/ip27-rtc.c
index ec9d0443d92c..2abd881b4cbc 100644
--- a/drivers/char/ip27-rtc.c
+++ b/drivers/char/ip27-rtc.c
@@ -130,12 +130,12 @@ static long rtc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		if (yrs >= 100)
 			yrs -= 100;
 
-		sec = BIN2BCD(sec);
-		min = BIN2BCD(min);
-		hrs = BIN2BCD(hrs);
-		day = BIN2BCD(day);
-		mon = BIN2BCD(mon);
-		yrs = BIN2BCD(yrs);
+		sec = bin2bcd(sec);
+		min = bin2bcd(min);
+		hrs = bin2bcd(hrs);
+		day = bin2bcd(day);
+		mon = bin2bcd(mon);
+		yrs = bin2bcd(yrs);
 
 		spin_lock_irq(&rtc_lock);
 		rtc->control |= M48T35_RTC_SET;
@@ -311,12 +311,12 @@ static void get_rtc_time(struct rtc_time *rtc_tm)
 	rtc->control &= ~M48T35_RTC_READ;
 	spin_unlock_irq(&rtc_lock);
 
-	rtc_tm->tm_sec = BCD2BIN(rtc_tm->tm_sec);
-	rtc_tm->tm_min = BCD2BIN(rtc_tm->tm_min);
-	rtc_tm->tm_hour = BCD2BIN(rtc_tm->tm_hour);
-	rtc_tm->tm_mday = BCD2BIN(rtc_tm->tm_mday);
-	rtc_tm->tm_mon = BCD2BIN(rtc_tm->tm_mon);
-	rtc_tm->tm_year = BCD2BIN(rtc_tm->tm_year);
+	rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
+	rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
+	rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
+	rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
+	rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
+	rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
 
 	/*
 	 * Account for differences between how the RTC uses the values
diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c
index 17683de95717..32dc89720d58 100644
--- a/drivers/char/rtc.c
+++ b/drivers/char/rtc.c
@@ -518,17 +518,17 @@ static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel)
 		if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) ||
 							RTC_ALWAYS_BCD) {
 			if (sec < 60)
-				BIN_TO_BCD(sec);
+				sec = bin2bcd(sec);
 			else
 				sec = 0xff;
 
 			if (min < 60)
-				BIN_TO_BCD(min);
+				min = bin2bcd(min);
 			else
 				min = 0xff;
 
 			if (hrs < 24)
-				BIN_TO_BCD(hrs);
+				hrs = bin2bcd(hrs);
 			else
 				hrs = 0xff;
 		}
@@ -614,12 +614,12 @@ static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel)
 
 		if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY)
 		    || RTC_ALWAYS_BCD) {
-			BIN_TO_BCD(sec);
-			BIN_TO_BCD(min);
-			BIN_TO_BCD(hrs);
-			BIN_TO_BCD(day);
-			BIN_TO_BCD(mon);
-			BIN_TO_BCD(yrs);
+			sec = bin2bcd(sec);
+			min = bin2bcd(min);
+			hrs = bin2bcd(hrs);
+			day = bin2bcd(day);
+			mon = bin2bcd(mon);
+			yrs = bin2bcd(yrs);
 		}
 
 		save_control = CMOS_READ(RTC_CONTROL);
@@ -1099,7 +1099,7 @@ no_irq:
 	spin_unlock_irq(&rtc_lock);
 
 	if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-		BCD_TO_BIN(year);       /* This should never happen... */
+		year = bcd2bin(year);       /* This should never happen... */
 
 	if (year < 20) {
 		epoch = 2000;
@@ -1352,13 +1352,13 @@ static void rtc_get_rtc_time(struct rtc_time *rtc_tm)
 	spin_unlock_irqrestore(&rtc_lock, flags);
 
 	if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-		BCD_TO_BIN(rtc_tm->tm_sec);
-		BCD_TO_BIN(rtc_tm->tm_min);
-		BCD_TO_BIN(rtc_tm->tm_hour);
-		BCD_TO_BIN(rtc_tm->tm_mday);
-		BCD_TO_BIN(rtc_tm->tm_mon);
-		BCD_TO_BIN(rtc_tm->tm_year);
-		BCD_TO_BIN(rtc_tm->tm_wday);
+		rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
+		rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
+		rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
+		rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
+		rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
+		rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
+		rtc_tm->tm_wday = bcd2bin(rtc_tm->tm_wday);
 	}
 
 #ifdef CONFIG_MACH_DECSTATION
@@ -1392,9 +1392,9 @@ static void get_rtc_alm_time(struct rtc_time *alm_tm)
 	spin_unlock_irq(&rtc_lock);
 
 	if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-		BCD_TO_BIN(alm_tm->tm_sec);
-		BCD_TO_BIN(alm_tm->tm_min);
-		BCD_TO_BIN(alm_tm->tm_hour);
+		alm_tm->tm_sec = bcd2bin(alm_tm->tm_sec);
+		alm_tm->tm_min = bcd2bin(alm_tm->tm_min);
+		alm_tm->tm_hour = bcd2bin(alm_tm->tm_hour);
 	}
 }
 
diff --git a/include/asm-generic/rtc.h b/include/asm-generic/rtc.h
index 71ef3f0b9685..89061c1a67d4 100644
--- a/include/asm-generic/rtc.h
+++ b/include/asm-generic/rtc.h
@@ -84,12 +84,12 @@ static inline unsigned int get_rtc_time(struct rtc_time *time)
 
 	if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
 	{
-		BCD_TO_BIN(time->tm_sec);
-		BCD_TO_BIN(time->tm_min);
-		BCD_TO_BIN(time->tm_hour);
-		BCD_TO_BIN(time->tm_mday);
-		BCD_TO_BIN(time->tm_mon);
-		BCD_TO_BIN(time->tm_year);
+		time->tm_sec = bcd2bin(time->tm_sec);
+		time->tm_min = bcd2bin(time->tm_min);
+		time->tm_hour = bcd2bin(time->tm_hour);
+		time->tm_mday = bcd2bin(time->tm_mday);
+		time->tm_mon = bcd2bin(time->tm_mon);
+		time->tm_year = bcd2bin(time->tm_year);
 	}
 
 #ifdef CONFIG_MACH_DECSTATION
@@ -159,12 +159,12 @@ static inline int set_rtc_time(struct rtc_time *time)
 
 	if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY)
 	    || RTC_ALWAYS_BCD) {
-		BIN_TO_BCD(sec);
-		BIN_TO_BCD(min);
-		BIN_TO_BCD(hrs);
-		BIN_TO_BCD(day);
-		BIN_TO_BCD(mon);
-		BIN_TO_BCD(yrs);
+		sec = bin2bcd(sec);
+		min = bin2bcd(min);
+		hrs = bin2bcd(hrs);
+		day = bin2bcd(day);
+		mon = bin2bcd(mon);
+		yrs = bin2bcd(yrs);
 	}
 
 	save_control = CMOS_READ(RTC_CONTROL);
-- 
GitLab


From e232cfdc3d087d435c9ee767ddd4e6f52457d02a Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Sat, 18 Oct 2008 20:28:43 -0700
Subject: [PATCH 823/895] drivers/rtc/rtc-bq4802.c: don't use BIN_2_BCD and
 BCD_2_BIN

These are going away.

Cc: Takashi Iwai <tiwai@suse.de>
Cc: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/rtc-bq4802.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/rtc/rtc-bq4802.c b/drivers/rtc/rtc-bq4802.c
index 189a018bdf34..d00a274df8fc 100644
--- a/drivers/rtc/rtc-bq4802.c
+++ b/drivers/rtc/rtc-bq4802.c
@@ -71,14 +71,14 @@ static int bq4802_read_time(struct device *dev, struct rtc_time *tm)
 
 	spin_unlock_irqrestore(&p->lock, flags);
 
-	BCD_TO_BIN(tm->tm_sec);
-	BCD_TO_BIN(tm->tm_min);
-	BCD_TO_BIN(tm->tm_hour);
-	BCD_TO_BIN(tm->tm_mday);
-	BCD_TO_BIN(tm->tm_mon);
-	BCD_TO_BIN(tm->tm_year);
-	BCD_TO_BIN(tm->tm_wday);
-	BCD_TO_BIN(century);
+	tm->tm_sec = bcd2bin(tm->tm_sec);
+	tm->tm_min = bcd2bin(tm->tm_min);
+	tm->tm_hour = bcd2bin(tm->tm_hour);
+	tm->tm_mday = bcd2bin(tm->tm_mday);
+	tm->tm_mon = bcd2bin(tm->tm_mon);
+	tm->tm_year = bcd2bin(tm->tm_year);
+	tm->tm_wday = bcd2bin(tm->tm_wday);
+	century = bcd2bin(century);
 
 	tm->tm_year += (century * 100);
 	tm->tm_year -= 1900;
@@ -106,13 +106,13 @@ static int bq4802_set_time(struct device *dev, struct rtc_time *tm)
 	min = tm->tm_min;
 	sec = tm->tm_sec;
 
-	BIN_TO_BCD(sec);
-	BIN_TO_BCD(min);
-	BIN_TO_BCD(hrs);
-	BIN_TO_BCD(day);
-	BIN_TO_BCD(mon);
-	BIN_TO_BCD(yrs);
-	BIN_TO_BCD(century);
+	sec = bin2bcd(sec);
+	min = bin2bcd(min);
+	hrs = bin2bcd(hrs);
+	day = bin2bcd(day);
+	mon = bin2bcd(mon);
+	yrs = bin2bcd(yrs);
+	century = bin2bcd(century);
 
 	spin_lock_irqsave(&p->lock, flags);
 
-- 
GitLab


From 02112dbc925f664bc4d24ff098686b9d21bfbfb1 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Sat, 18 Oct 2008 20:28:44 -0700
Subject: [PATCH 824/895] mips: use bcd2bin/bin2bcd

Changes mips to use the new bcd2bin/bin2bcd functions instead of the
obsolete BCD_TO_BIN/BIN_TO_BCD/BCD2BIN/BIN2BCD macros.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/mips/dec/time.c                   | 18 ++++++++--------
 arch/mips/include/asm/mc146818-time.h  | 18 ++++++++--------
 arch/mips/pmc-sierra/yosemite/setup.c  | 30 +++++++++++++-------------
 arch/mips/sibyte/swarm/rtc_m41t81.c    | 26 +++++++++++-----------
 arch/mips/sibyte/swarm/rtc_xicor1241.c | 26 +++++++++++-----------
 5 files changed, 59 insertions(+), 59 deletions(-)

diff --git a/arch/mips/dec/time.c b/arch/mips/dec/time.c
index 3965fda94a89..1359c03ded51 100644
--- a/arch/mips/dec/time.c
+++ b/arch/mips/dec/time.c
@@ -45,12 +45,12 @@ unsigned long read_persistent_clock(void)
 	spin_unlock_irqrestore(&rtc_lock, flags);
 
 	if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-		sec = BCD2BIN(sec);
-		min = BCD2BIN(min);
-		hour = BCD2BIN(hour);
-		day = BCD2BIN(day);
-		mon = BCD2BIN(mon);
-		year = BCD2BIN(year);
+		sec = bcd2bin(sec);
+		min = bcd2bin(min);
+		hour = bcd2bin(hour);
+		day = bcd2bin(day);
+		mon = bcd2bin(mon);
+		year = bcd2bin(year);
 	}
 
 	year += real_year - 72 + 2000;
@@ -83,7 +83,7 @@ int rtc_mips_set_mmss(unsigned long nowtime)
 
 	cmos_minutes = CMOS_READ(RTC_MINUTES);
 	if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-		cmos_minutes = BCD2BIN(cmos_minutes);
+		cmos_minutes = bcd2bin(cmos_minutes);
 
 	/*
 	 * since we're only adjusting minutes and seconds,
@@ -99,8 +99,8 @@ int rtc_mips_set_mmss(unsigned long nowtime)
 
 	if (abs(real_minutes - cmos_minutes) < 30) {
 		if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-			real_seconds = BIN2BCD(real_seconds);
-			real_minutes = BIN2BCD(real_minutes);
+			real_seconds = bin2bcd(real_seconds);
+			real_minutes = bin2bcd(real_minutes);
 		}
 		CMOS_WRITE(real_seconds, RTC_SECONDS);
 		CMOS_WRITE(real_minutes, RTC_MINUTES);
diff --git a/arch/mips/include/asm/mc146818-time.h b/arch/mips/include/asm/mc146818-time.h
index cdc379a0a94e..199b45733a95 100644
--- a/arch/mips/include/asm/mc146818-time.h
+++ b/arch/mips/include/asm/mc146818-time.h
@@ -44,7 +44,7 @@ static inline int mc146818_set_rtc_mmss(unsigned long nowtime)
 
 	cmos_minutes = CMOS_READ(RTC_MINUTES);
 	if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-		BCD_TO_BIN(cmos_minutes);
+		cmos_minutes = bcd2bin(cmos_minutes);
 
 	/*
 	 * since we're only adjusting minutes and seconds,
@@ -60,8 +60,8 @@ static inline int mc146818_set_rtc_mmss(unsigned long nowtime)
 
 	if (abs(real_minutes - cmos_minutes) < 30) {
 		if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-			BIN_TO_BCD(real_seconds);
-			BIN_TO_BCD(real_minutes);
+			real_seconds = bin2bcd(real_seconds);
+			real_minutes = bin2bcd(real_minutes);
 		}
 		CMOS_WRITE(real_seconds, RTC_SECONDS);
 		CMOS_WRITE(real_minutes, RTC_MINUTES);
@@ -103,12 +103,12 @@ static inline unsigned long mc146818_get_cmos_time(void)
 	} while (sec != CMOS_READ(RTC_SECONDS));
 
 	if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-		BCD_TO_BIN(sec);
-		BCD_TO_BIN(min);
-		BCD_TO_BIN(hour);
-		BCD_TO_BIN(day);
-		BCD_TO_BIN(mon);
-		BCD_TO_BIN(year);
+		sec = bcd2bin(sec);
+		min = bcd2bin(min);
+		hour = bcd2bin(hour);
+		day = bcd2bin(day);
+		mon = bcd2bin(mon);
+		year = bcd2bin(year);
 	}
 	spin_unlock_irqrestore(&rtc_lock, flags);
 	year = mc146818_decode_year(year);
diff --git a/arch/mips/pmc-sierra/yosemite/setup.c b/arch/mips/pmc-sierra/yosemite/setup.c
index 6537d90a25bb..2d3c0dca275d 100644
--- a/arch/mips/pmc-sierra/yosemite/setup.c
+++ b/arch/mips/pmc-sierra/yosemite/setup.c
@@ -79,14 +79,14 @@ unsigned long read_persistent_clock(void)
 	/* Stop the update to the time */
 	m48t37_base->control = 0x40;
 
-	year = BCD2BIN(m48t37_base->year);
-	year += BCD2BIN(m48t37_base->century) * 100;
+	year = bcd2bin(m48t37_base->year);
+	year += bcd2bin(m48t37_base->century) * 100;
 
-	month = BCD2BIN(m48t37_base->month);
-	day = BCD2BIN(m48t37_base->date);
-	hour = BCD2BIN(m48t37_base->hour);
-	min = BCD2BIN(m48t37_base->min);
-	sec = BCD2BIN(m48t37_base->sec);
+	month = bcd2bin(m48t37_base->month);
+	day = bcd2bin(m48t37_base->date);
+	hour = bcd2bin(m48t37_base->hour);
+	min = bcd2bin(m48t37_base->min);
+	sec = bcd2bin(m48t37_base->sec);
 
 	/* Start the update to the time again */
 	m48t37_base->control = 0x00;
@@ -113,22 +113,22 @@ int rtc_mips_set_time(unsigned long tim)
 	m48t37_base->control = 0x80;
 
 	/* year */
-	m48t37_base->year = BIN2BCD(tm.tm_year % 100);
-	m48t37_base->century = BIN2BCD(tm.tm_year / 100);
+	m48t37_base->year = bin2bcd(tm.tm_year % 100);
+	m48t37_base->century = bin2bcd(tm.tm_year / 100);
 
 	/* month */
-	m48t37_base->month = BIN2BCD(tm.tm_mon);
+	m48t37_base->month = bin2bcd(tm.tm_mon);
 
 	/* day */
-	m48t37_base->date = BIN2BCD(tm.tm_mday);
+	m48t37_base->date = bin2bcd(tm.tm_mday);
 
 	/* hour/min/sec */
-	m48t37_base->hour = BIN2BCD(tm.tm_hour);
-	m48t37_base->min = BIN2BCD(tm.tm_min);
-	m48t37_base->sec = BIN2BCD(tm.tm_sec);
+	m48t37_base->hour = bin2bcd(tm.tm_hour);
+	m48t37_base->min = bin2bcd(tm.tm_min);
+	m48t37_base->sec = bin2bcd(tm.tm_sec);
 
 	/* day of week -- not really used, but let's keep it up-to-date */
-	m48t37_base->day = BIN2BCD(tm.tm_wday + 1);
+	m48t37_base->day = bin2bcd(tm.tm_wday + 1);
 
 	/* disable writing */
 	m48t37_base->control = 0x00;
diff --git a/arch/mips/sibyte/swarm/rtc_m41t81.c b/arch/mips/sibyte/swarm/rtc_m41t81.c
index 26fbff4c15b1..b732600b47f5 100644
--- a/arch/mips/sibyte/swarm/rtc_m41t81.c
+++ b/arch/mips/sibyte/swarm/rtc_m41t81.c
@@ -156,32 +156,32 @@ int m41t81_set_time(unsigned long t)
 	 */
 
 	spin_lock_irqsave(&rtc_lock, flags);
-	tm.tm_sec = BIN2BCD(tm.tm_sec);
+	tm.tm_sec = bin2bcd(tm.tm_sec);
 	m41t81_write(M41T81REG_SC, tm.tm_sec);
 
-	tm.tm_min = BIN2BCD(tm.tm_min);
+	tm.tm_min = bin2bcd(tm.tm_min);
 	m41t81_write(M41T81REG_MN, tm.tm_min);
 
-	tm.tm_hour = BIN2BCD(tm.tm_hour);
+	tm.tm_hour = bin2bcd(tm.tm_hour);
 	tm.tm_hour = (tm.tm_hour & 0x3f) | (m41t81_read(M41T81REG_HR) & 0xc0);
 	m41t81_write(M41T81REG_HR, tm.tm_hour);
 
 	/* tm_wday starts from 0 to 6 */
 	if (tm.tm_wday == 0) tm.tm_wday = 7;
-	tm.tm_wday = BIN2BCD(tm.tm_wday);
+	tm.tm_wday = bin2bcd(tm.tm_wday);
 	m41t81_write(M41T81REG_DY, tm.tm_wday);
 
-	tm.tm_mday = BIN2BCD(tm.tm_mday);
+	tm.tm_mday = bin2bcd(tm.tm_mday);
 	m41t81_write(M41T81REG_DT, tm.tm_mday);
 
 	/* tm_mon starts from 0, *ick* */
 	tm.tm_mon ++;
-	tm.tm_mon = BIN2BCD(tm.tm_mon);
+	tm.tm_mon = bin2bcd(tm.tm_mon);
 	m41t81_write(M41T81REG_MO, tm.tm_mon);
 
 	/* we don't do century, everything is beyond 2000 */
 	tm.tm_year %= 100;
-	tm.tm_year = BIN2BCD(tm.tm_year);
+	tm.tm_year = bin2bcd(tm.tm_year);
 	m41t81_write(M41T81REG_YR, tm.tm_year);
 	spin_unlock_irqrestore(&rtc_lock, flags);
 
@@ -209,12 +209,12 @@ unsigned long m41t81_get_time(void)
 	year = m41t81_read(M41T81REG_YR);
 	spin_unlock_irqrestore(&rtc_lock, flags);
 
-	sec = BCD2BIN(sec);
-	min = BCD2BIN(min);
-	hour = BCD2BIN(hour);
-	day = BCD2BIN(day);
-	mon = BCD2BIN(mon);
-	year = BCD2BIN(year);
+	sec = bcd2bin(sec);
+	min = bcd2bin(min);
+	hour = bcd2bin(hour);
+	day = bcd2bin(day);
+	mon = bcd2bin(mon);
+	year = bcd2bin(year);
 
 	year += 2000;
 
diff --git a/arch/mips/sibyte/swarm/rtc_xicor1241.c b/arch/mips/sibyte/swarm/rtc_xicor1241.c
index ff3e5dabb348..4438b2195c44 100644
--- a/arch/mips/sibyte/swarm/rtc_xicor1241.c
+++ b/arch/mips/sibyte/swarm/rtc_xicor1241.c
@@ -124,18 +124,18 @@ int xicor_set_time(unsigned long t)
 	xicor_write(X1241REG_SR, X1241REG_SR_WEL | X1241REG_SR_RWEL);
 
 	/* trivial ones */
-	tm.tm_sec = BIN2BCD(tm.tm_sec);
+	tm.tm_sec = bin2bcd(tm.tm_sec);
 	xicor_write(X1241REG_SC, tm.tm_sec);
 
-	tm.tm_min = BIN2BCD(tm.tm_min);
+	tm.tm_min = bin2bcd(tm.tm_min);
 	xicor_write(X1241REG_MN, tm.tm_min);
 
-	tm.tm_mday = BIN2BCD(tm.tm_mday);
+	tm.tm_mday = bin2bcd(tm.tm_mday);
 	xicor_write(X1241REG_DT, tm.tm_mday);
 
 	/* tm_mon starts from 0, *ick* */
 	tm.tm_mon ++;
-	tm.tm_mon = BIN2BCD(tm.tm_mon);
+	tm.tm_mon = bin2bcd(tm.tm_mon);
 	xicor_write(X1241REG_MO, tm.tm_mon);
 
 	/* year is split */
@@ -148,7 +148,7 @@ int xicor_set_time(unsigned long t)
 	tmp = xicor_read(X1241REG_HR);
 	if (tmp & X1241REG_HR_MIL) {
 		/* 24 hour format */
-		tm.tm_hour = BIN2BCD(tm.tm_hour);
+		tm.tm_hour = bin2bcd(tm.tm_hour);
 		tmp = (tmp & ~0x3f) | (tm.tm_hour & 0x3f);
 	} else {
 		/* 12 hour format, with 0x2 for pm */
@@ -157,7 +157,7 @@ int xicor_set_time(unsigned long t)
 			tmp |= 0x20;
 			tm.tm_hour -= 12;
 		}
-		tm.tm_hour = BIN2BCD(tm.tm_hour);
+		tm.tm_hour = bin2bcd(tm.tm_hour);
 		tmp |= tm.tm_hour;
 	}
 	xicor_write(X1241REG_HR, tmp);
@@ -191,13 +191,13 @@ unsigned long xicor_get_time(void)
 	y2k = xicor_read(X1241REG_Y2K);
 	spin_unlock_irqrestore(&rtc_lock, flags);
 
-	sec = BCD2BIN(sec);
-	min = BCD2BIN(min);
-	hour = BCD2BIN(hour);
-	day = BCD2BIN(day);
-	mon = BCD2BIN(mon);
-	year = BCD2BIN(year);
-	y2k = BCD2BIN(y2k);
+	sec = bcd2bin(sec);
+	min = bcd2bin(min);
+	hour = bcd2bin(hour);
+	day = bcd2bin(day);
+	mon = bcd2bin(mon);
+	year = bcd2bin(year);
+	y2k = bcd2bin(y2k);
 
 	year += (y2k * 100);
 
-- 
GitLab


From f6a2298c5fd1f074aa3626febea33f48cd3b5a94 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Sat, 18 Oct 2008 20:28:45 -0700
Subject: [PATCH 825/895] mn10300: use bcd2bin/bin2bcd

Change mn10300 to use the new bcd2bin/bin2bcd functions instead of the
obsolete BCD_TO_BIN/BIN_TO_BCD macros.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/mn10300/kernel/rtc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/mn10300/kernel/rtc.c b/arch/mn10300/kernel/rtc.c
index 042f792d8430..7978470b5749 100644
--- a/arch/mn10300/kernel/rtc.c
+++ b/arch/mn10300/kernel/rtc.c
@@ -67,7 +67,7 @@ static int set_rtc_mmss(unsigned long nowtime)
 
 	cmos_minutes = CMOS_READ(RTC_MINUTES);
 	if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-		BCD_TO_BIN(cmos_minutes);
+		cmos_minutes = bcd2bin(cmos_minutes);
 
 	/*
 	 * since we're only adjusting minutes and seconds,
@@ -84,8 +84,8 @@ static int set_rtc_mmss(unsigned long nowtime)
 
 	if (abs(real_minutes - cmos_minutes) < 30) {
 		if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-			BIN_TO_BCD(real_seconds);
-			BIN_TO_BCD(real_minutes);
+			real_seconds = bin2bcd(real_seconds);
+			real_minutes = bin2bcd(real_minutes);
 		}
 		CMOS_WRITE(real_seconds, RTC_SECONDS);
 		CMOS_WRITE(real_minutes, RTC_MINUTES);
-- 
GitLab


From e4d33969c2d0a9b92c7a2853e3f890dcb4ea37d1 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Sat, 18 Oct 2008 20:28:46 -0700
Subject: [PATCH 826/895] i2c: use bcd2bin/bin2bcd

Change i2c to use the new bcd2bin/bin2bcd functions instead of the
obsolete BCD2BIN/BIN2BCD macros.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Cc: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/i2c/chips/menelaus.c | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/i2c/chips/menelaus.c b/drivers/i2c/chips/menelaus.c
index 176126d3a01d..4b364bae6b3e 100644
--- a/drivers/i2c/chips/menelaus.c
+++ b/drivers/i2c/chips/menelaus.c
@@ -832,52 +832,52 @@ static irqreturn_t menelaus_irq(int irq, void *_menelaus)
 
 static void menelaus_to_time(char *regs, struct rtc_time *t)
 {
-	t->tm_sec = BCD2BIN(regs[0]);
-	t->tm_min = BCD2BIN(regs[1]);
+	t->tm_sec = bcd2bin(regs[0]);
+	t->tm_min = bcd2bin(regs[1]);
 	if (the_menelaus->rtc_control & RTC_CTRL_MODE12) {
-		t->tm_hour = BCD2BIN(regs[2] & 0x1f) - 1;
+		t->tm_hour = bcd2bin(regs[2] & 0x1f) - 1;
 		if (regs[2] & RTC_HR_PM)
 			t->tm_hour += 12;
 	} else
-		t->tm_hour = BCD2BIN(regs[2] & 0x3f);
-	t->tm_mday = BCD2BIN(regs[3]);
-	t->tm_mon = BCD2BIN(regs[4]) - 1;
-	t->tm_year = BCD2BIN(regs[5]) + 100;
+		t->tm_hour = bcd2bin(regs[2] & 0x3f);
+	t->tm_mday = bcd2bin(regs[3]);
+	t->tm_mon = bcd2bin(regs[4]) - 1;
+	t->tm_year = bcd2bin(regs[5]) + 100;
 }
 
 static int time_to_menelaus(struct rtc_time *t, int regnum)
 {
 	int	hour, status;
 
-	status = menelaus_write_reg(regnum++, BIN2BCD(t->tm_sec));
+	status = menelaus_write_reg(regnum++, bin2bcd(t->tm_sec));
 	if (status < 0)
 		goto fail;
 
-	status = menelaus_write_reg(regnum++, BIN2BCD(t->tm_min));
+	status = menelaus_write_reg(regnum++, bin2bcd(t->tm_min));
 	if (status < 0)
 		goto fail;
 
 	if (the_menelaus->rtc_control & RTC_CTRL_MODE12) {
 		hour = t->tm_hour + 1;
 		if (hour > 12)
-			hour = RTC_HR_PM | BIN2BCD(hour - 12);
+			hour = RTC_HR_PM | bin2bcd(hour - 12);
 		else
-			hour = BIN2BCD(hour);
+			hour = bin2bcd(hour);
 	} else
-		hour = BIN2BCD(t->tm_hour);
+		hour = bin2bcd(t->tm_hour);
 	status = menelaus_write_reg(regnum++, hour);
 	if (status < 0)
 		goto fail;
 
-	status = menelaus_write_reg(regnum++, BIN2BCD(t->tm_mday));
+	status = menelaus_write_reg(regnum++, bin2bcd(t->tm_mday));
 	if (status < 0)
 		goto fail;
 
-	status = menelaus_write_reg(regnum++, BIN2BCD(t->tm_mon + 1));
+	status = menelaus_write_reg(regnum++, bin2bcd(t->tm_mon + 1));
 	if (status < 0)
 		goto fail;
 
-	status = menelaus_write_reg(regnum++, BIN2BCD(t->tm_year - 100));
+	status = menelaus_write_reg(regnum++, bin2bcd(t->tm_year - 100));
 	if (status < 0)
 		goto fail;
 
@@ -914,7 +914,7 @@ static int menelaus_read_time(struct device *dev, struct rtc_time *t)
 	}
 
 	menelaus_to_time(regs, t);
-	t->tm_wday = BCD2BIN(regs[6]);
+	t->tm_wday = bcd2bin(regs[6]);
 
 	return 0;
 }
@@ -927,7 +927,7 @@ static int menelaus_set_time(struct device *dev, struct rtc_time *t)
 	status = time_to_menelaus(t, MENELAUS_RTC_SEC);
 	if (status < 0)
 		return status;
-	status = menelaus_write_reg(MENELAUS_RTC_WKDAY, BIN2BCD(t->tm_wday));
+	status = menelaus_write_reg(MENELAUS_RTC_WKDAY, bin2bcd(t->tm_wday));
 	if (status < 0) {
 		dev_err(&the_menelaus->client->dev, "rtc write reg %02x "
 				"err %d\n", MENELAUS_RTC_WKDAY, status);
-- 
GitLab


From 2cee5dfa8b01524a1d937d56ee518677b7acfb38 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Sat, 18 Oct 2008 20:28:47 -0700
Subject: [PATCH 827/895] drivers/scsi/sr_vendor.c: use bcd2bin

Change sr_vendor.c to use the new bcd2bin function instead of the obsolete
BCD2BIN macro.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/scsi/sr_vendor.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/scsi/sr_vendor.c b/drivers/scsi/sr_vendor.c
index 4eb3da996b36..4ad3e017213f 100644
--- a/drivers/scsi/sr_vendor.c
+++ b/drivers/scsi/sr_vendor.c
@@ -223,9 +223,9 @@ int sr_cd_check(struct cdrom_device_info *cdi)
 				no_multi = 1;
 				break;
 			}
-			min = BCD2BIN(buffer[15]);
-			sec = BCD2BIN(buffer[16]);
-			frame = BCD2BIN(buffer[17]);
+			min = bcd2bin(buffer[15]);
+			sec = bcd2bin(buffer[16]);
+			frame = bcd2bin(buffer[17]);
 			sector = min * CD_SECS * CD_FRAMES + sec * CD_FRAMES + frame;
 			break;
 		}
@@ -252,9 +252,9 @@ int sr_cd_check(struct cdrom_device_info *cdi)
 			}
 			if (rc != 0)
 				break;
-			min = BCD2BIN(buffer[1]);
-			sec = BCD2BIN(buffer[2]);
-			frame = BCD2BIN(buffer[3]);
+			min = bcd2bin(buffer[1]);
+			sec = bcd2bin(buffer[2]);
+			frame = bcd2bin(buffer[3]);
 			sector = min * CD_SECS * CD_FRAMES + sec * CD_FRAMES + frame;
 			if (sector)
 				sector -= CD_MSF_OFFSET;
-- 
GitLab


From a0098efd6ee4e8c04d82d761aa1bb9ec7a0aa32d Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Sat, 18 Oct 2008 20:28:47 -0700
Subject: [PATCH 828/895] remove the obsolete BCD*BIN/BIN*BCD macros

Remove the following obsolete macros:

- BCD2BIN
- BIN2BCD
- BCD_TO_BIN
- BIN_TO_BCD

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bcd.h | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/include/linux/bcd.h b/include/linux/bcd.h
index 7ac518e3c152..75f6d6e699a0 100644
--- a/include/linux/bcd.h
+++ b/include/linux/bcd.h
@@ -15,11 +15,4 @@
 unsigned bcd2bin(unsigned char val) __attribute_const__;
 unsigned char bin2bcd(unsigned val) __attribute_const__;
 
-#define BCD2BIN(val)	bcd2bin(val)
-#define BIN2BCD(val)	bin2bcd(val)
-
-/* backwards compat */
-#define BCD_TO_BIN(val) ((val)=BCD2BIN(val))
-#define BIN_TO_BCD(val) ((val)=BIN2BCD(val))
-
 #endif /* _BCD_H */
-- 
GitLab


From 5a85a7dda15f88b7f9c96c67fe826b5d0486d601 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Sat, 18 Oct 2008 20:28:48 -0700
Subject: [PATCH 829/895] include/linux/bcd.h: remove comments

- the macros are gone
- there's no more code in this file,
  LGPL + GPL = GPL,
  and the code that was moved to lib/bcd.c is anyway trivial

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bcd.h | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/include/linux/bcd.h b/include/linux/bcd.h
index 75f6d6e699a0..22ea563ba3eb 100644
--- a/include/linux/bcd.h
+++ b/include/linux/bcd.h
@@ -1,12 +1,3 @@
-/* Permission is hereby granted to copy, modify and redistribute this code
- * in terms of the GNU Library General Public License, Version 2 or later,
- * at your option.
- */
-
-/* macros to translate to/from binary and binary-coded decimal (frequently
- * found in RTC chips).
- */
-
 #ifndef _BCD_H
 #define _BCD_H
 
-- 
GitLab


From bb26b963d8343bb1bde842fba0b6e00cad841f31 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 18 Oct 2008 20:28:49 -0700
Subject: [PATCH 830/895] fs/Kconfig: move CIFS out

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Steven French <sfrench@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/Kconfig      | 143 +-----------------------------------------------
 fs/cifs/Kconfig | 142 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 143 insertions(+), 142 deletions(-)
 create mode 100644 fs/cifs/Kconfig

diff --git a/fs/Kconfig b/fs/Kconfig
index d0a1174fb516..c189089f35a5 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1913,148 +1913,7 @@ config SMB_NLS_REMOTE
 
 	  smbmount from samba 2.2.0 or later supports this.
 
-config CIFS
-	tristate "CIFS support (advanced network filesystem, SMBFS successor)"
-	depends on INET
-	select NLS
-	help
-	  This is the client VFS module for the Common Internet File System
-	  (CIFS) protocol which is the successor to the Server Message Block 
-	  (SMB) protocol, the native file sharing mechanism for most early
-	  PC operating systems.  The CIFS protocol is fully supported by 
-	  file servers such as Windows 2000 (including Windows 2003, NT 4  
-	  and Windows XP) as well by Samba (which provides excellent CIFS
-	  server support for Linux and many other operating systems). Limited
-	  support for OS/2 and Windows ME and similar servers is provided as
-	  well.
-
-	  The cifs module provides an advanced network file system
-	  client for mounting to CIFS compliant servers.  It includes
-	  support for DFS (hierarchical name space), secure per-user
-	  session establishment via Kerberos or NTLM or NTLMv2,
-	  safe distributed caching (oplock), optional packet
-	  signing, Unicode and other internationalization improvements.
-	  If you need to mount to Samba or Windows from this machine, say Y.
-
-config CIFS_STATS
-        bool "CIFS statistics"
-        depends on CIFS
-        help
-          Enabling this option will cause statistics for each server share
-	  mounted by the cifs client to be displayed in /proc/fs/cifs/Stats
-
-config CIFS_STATS2
-	bool "Extended statistics"
-	depends on CIFS_STATS
-	help
-	  Enabling this option will allow more detailed statistics on SMB
-	  request timing to be displayed in /proc/fs/cifs/DebugData and also
-	  allow optional logging of slow responses to dmesg (depending on the
-	  value of /proc/fs/cifs/cifsFYI, see fs/cifs/README for more details).
-	  These additional statistics may have a minor effect on performance
-	  and memory utilization.
-
-	  Unless you are a developer or are doing network performance analysis
-	  or tuning, say N.
-
-config CIFS_WEAK_PW_HASH
-	bool "Support legacy servers which use weaker LANMAN security"
-	depends on CIFS
-	help
-	  Modern CIFS servers including Samba and most Windows versions
-	  (since 1997) support stronger NTLM (and even NTLMv2 and Kerberos)
-	  security mechanisms. These hash the password more securely
-	  than the mechanisms used in the older LANMAN version of the
-	  SMB protocol but LANMAN based authentication is needed to
-	  establish sessions with some old SMB servers.
-
-	  Enabling this option allows the cifs module to mount to older
-	  LANMAN based servers such as OS/2 and Windows 95, but such
-	  mounts may be less secure than mounts using NTLM or more recent
-	  security mechanisms if you are on a public network.  Unless you
-	  have a need to access old SMB servers (and are on a private
-	  network) you probably want to say N.  Even if this support
-	  is enabled in the kernel build, LANMAN authentication will not be
-	  used automatically. At runtime LANMAN mounts are disabled but
-	  can be set to required (or optional) either in
-	  /proc/fs/cifs (see fs/cifs/README for more detail) or via an
-	  option on the mount command. This support is disabled by
-	  default in order to reduce the possibility of a downgrade
-	  attack.
-
-	  If unsure, say N.
-
-config CIFS_UPCALL
-	  bool "Kerberos/SPNEGO advanced session setup"
-	  depends on CIFS && KEYS
-	  help
-	    Enables an upcall mechanism for CIFS which accesses
-	    userspace helper utilities to provide SPNEGO packaged (RFC 4178)
-	    Kerberos tickets which are needed to mount to certain secure servers
-	    (for which more secure Kerberos authentication is required). If
-	    unsure, say N.
-
-config CIFS_XATTR
-        bool "CIFS extended attributes"
-        depends on CIFS
-        help
-          Extended attributes are name:value pairs associated with inodes by
-          the kernel or by users (see the attr(5) manual page, or visit
-          <http://acl.bestbits.at/> for details).  CIFS maps the name of
-          extended attributes beginning with the user namespace prefix
-          to SMB/CIFS EAs. EAs are stored on Windows servers without the
-          user namespace prefix, but their names are seen by Linux cifs clients
-          prefaced by the user namespace prefix. The system namespace
-          (used by some filesystems to store ACLs) is not supported at
-          this time.
-
-          If unsure, say N.
-
-config CIFS_POSIX
-        bool "CIFS POSIX Extensions"
-        depends on CIFS_XATTR
-        help
-          Enabling this option will cause the cifs client to attempt to
-	  negotiate a newer dialect with servers, such as Samba 3.0.5
-	  or later, that optionally can handle more POSIX like (rather
-	  than Windows like) file behavior.  It also enables
-	  support for POSIX ACLs (getfacl and setfacl) to servers
-	  (such as Samba 3.10 and later) which can negotiate
-	  CIFS POSIX ACL support.  If unsure, say N.
-
-config CIFS_DEBUG2
-	bool "Enable additional CIFS debugging routines"
-	depends on CIFS
-	help
-	   Enabling this option adds a few more debugging routines
-	   to the cifs code which slightly increases the size of
-	   the cifs module and can cause additional logging of debug
-	   messages in some error paths, slowing performance. This
-	   option can be turned off unless you are debugging
-	   cifs problems.  If unsure, say N.
-
-config CIFS_EXPERIMENTAL
-	  bool "CIFS Experimental Features (EXPERIMENTAL)"
-	  depends on CIFS && EXPERIMENTAL
-	  help
-	    Enables cifs features under testing. These features are
-	    experimental and currently include DFS support and directory 
-	    change notification ie fcntl(F_DNOTIFY), as well as the upcall
-	    mechanism which will be used for Kerberos session negotiation
-	    and uid remapping.  Some of these features also may depend on 
-	    setting a value of 1 to the pseudo-file /proc/fs/cifs/Experimental
-	    (which is disabled by default). See the file fs/cifs/README 
-	    for more details.  If unsure, say N.
-
-config CIFS_DFS_UPCALL
-	  bool "DFS feature support (EXPERIMENTAL)"
-	  depends on CIFS_EXPERIMENTAL
-	  depends on KEYS
-	  help
-	    Enables an upcall mechanism for CIFS which contacts userspace
-	    helper utilities to provide server name resolution (host names to
-	    IP addresses) which is needed for implicit mounts of DFS junction
-	    points. If unsure, say N.
+source "fs/cifs/Kconfig"
 
 config NCP_FS
 	tristate "NCP file system support (to mount NetWare volumes)"
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
new file mode 100644
index 000000000000..341a98965bd0
--- /dev/null
+++ b/fs/cifs/Kconfig
@@ -0,0 +1,142 @@
+config CIFS
+	tristate "CIFS support (advanced network filesystem, SMBFS successor)"
+	depends on INET
+	select NLS
+	help
+	  This is the client VFS module for the Common Internet File System
+	  (CIFS) protocol which is the successor to the Server Message Block
+	  (SMB) protocol, the native file sharing mechanism for most early
+	  PC operating systems.  The CIFS protocol is fully supported by
+	  file servers such as Windows 2000 (including Windows 2003, NT 4
+	  and Windows XP) as well by Samba (which provides excellent CIFS
+	  server support for Linux and many other operating systems). Limited
+	  support for OS/2 and Windows ME and similar servers is provided as
+	  well.
+
+	  The cifs module provides an advanced network file system
+	  client for mounting to CIFS compliant servers.  It includes
+	  support for DFS (hierarchical name space), secure per-user
+	  session establishment via Kerberos or NTLM or NTLMv2,
+	  safe distributed caching (oplock), optional packet
+	  signing, Unicode and other internationalization improvements.
+	  If you need to mount to Samba or Windows from this machine, say Y.
+
+config CIFS_STATS
+        bool "CIFS statistics"
+        depends on CIFS
+        help
+          Enabling this option will cause statistics for each server share
+	  mounted by the cifs client to be displayed in /proc/fs/cifs/Stats
+
+config CIFS_STATS2
+	bool "Extended statistics"
+	depends on CIFS_STATS
+	help
+	  Enabling this option will allow more detailed statistics on SMB
+	  request timing to be displayed in /proc/fs/cifs/DebugData and also
+	  allow optional logging of slow responses to dmesg (depending on the
+	  value of /proc/fs/cifs/cifsFYI, see fs/cifs/README for more details).
+	  These additional statistics may have a minor effect on performance
+	  and memory utilization.
+
+	  Unless you are a developer or are doing network performance analysis
+	  or tuning, say N.
+
+config CIFS_WEAK_PW_HASH
+	bool "Support legacy servers which use weaker LANMAN security"
+	depends on CIFS
+	help
+	  Modern CIFS servers including Samba and most Windows versions
+	  (since 1997) support stronger NTLM (and even NTLMv2 and Kerberos)
+	  security mechanisms. These hash the password more securely
+	  than the mechanisms used in the older LANMAN version of the
+	  SMB protocol but LANMAN based authentication is needed to
+	  establish sessions with some old SMB servers.
+
+	  Enabling this option allows the cifs module to mount to older
+	  LANMAN based servers such as OS/2 and Windows 95, but such
+	  mounts may be less secure than mounts using NTLM or more recent
+	  security mechanisms if you are on a public network.  Unless you
+	  have a need to access old SMB servers (and are on a private
+	  network) you probably want to say N.  Even if this support
+	  is enabled in the kernel build, LANMAN authentication will not be
+	  used automatically. At runtime LANMAN mounts are disabled but
+	  can be set to required (or optional) either in
+	  /proc/fs/cifs (see fs/cifs/README for more detail) or via an
+	  option on the mount command. This support is disabled by
+	  default in order to reduce the possibility of a downgrade
+	  attack.
+
+	  If unsure, say N.
+
+config CIFS_UPCALL
+	  bool "Kerberos/SPNEGO advanced session setup"
+	  depends on CIFS && KEYS
+	  help
+	    Enables an upcall mechanism for CIFS which accesses
+	    userspace helper utilities to provide SPNEGO packaged (RFC 4178)
+	    Kerberos tickets which are needed to mount to certain secure servers
+	    (for which more secure Kerberos authentication is required). If
+	    unsure, say N.
+
+config CIFS_XATTR
+        bool "CIFS extended attributes"
+        depends on CIFS
+        help
+          Extended attributes are name:value pairs associated with inodes by
+          the kernel or by users (see the attr(5) manual page, or visit
+          <http://acl.bestbits.at/> for details).  CIFS maps the name of
+          extended attributes beginning with the user namespace prefix
+          to SMB/CIFS EAs. EAs are stored on Windows servers without the
+          user namespace prefix, but their names are seen by Linux cifs clients
+          prefaced by the user namespace prefix. The system namespace
+          (used by some filesystems to store ACLs) is not supported at
+          this time.
+
+          If unsure, say N.
+
+config CIFS_POSIX
+        bool "CIFS POSIX Extensions"
+        depends on CIFS_XATTR
+        help
+          Enabling this option will cause the cifs client to attempt to
+	  negotiate a newer dialect with servers, such as Samba 3.0.5
+	  or later, that optionally can handle more POSIX like (rather
+	  than Windows like) file behavior.  It also enables
+	  support for POSIX ACLs (getfacl and setfacl) to servers
+	  (such as Samba 3.10 and later) which can negotiate
+	  CIFS POSIX ACL support.  If unsure, say N.
+
+config CIFS_DEBUG2
+	bool "Enable additional CIFS debugging routines"
+	depends on CIFS
+	help
+	   Enabling this option adds a few more debugging routines
+	   to the cifs code which slightly increases the size of
+	   the cifs module and can cause additional logging of debug
+	   messages in some error paths, slowing performance. This
+	   option can be turned off unless you are debugging
+	   cifs problems.  If unsure, say N.
+
+config CIFS_EXPERIMENTAL
+	  bool "CIFS Experimental Features (EXPERIMENTAL)"
+	  depends on CIFS && EXPERIMENTAL
+	  help
+	    Enables cifs features under testing. These features are
+	    experimental and currently include DFS support and directory
+	    change notification ie fcntl(F_DNOTIFY), as well as the upcall
+	    mechanism which will be used for Kerberos session negotiation
+	    and uid remapping.  Some of these features also may depend on
+	    setting a value of 1 to the pseudo-file /proc/fs/cifs/Experimental
+	    (which is disabled by default). See the file fs/cifs/README
+	    for more details.  If unsure, say N.
+
+config CIFS_DFS_UPCALL
+	  bool "DFS feature support (EXPERIMENTAL)"
+	  depends on CIFS_EXPERIMENTAL
+	  depends on KEYS
+	  help
+	    Enables an upcall mechanism for CIFS which contacts userspace
+	    helper utilities to provide server name resolution (host names to
+	    IP addresses) which is needed for implicit mounts of DFS junction
+	    points. If unsure, say N.
-- 
GitLab


From 01e8ef11bc1a74e65678ed55795f59266d4add01 Mon Sep 17 00:00:00 2001
From: Parag Warudkar <parag.lkml@gmail.com>
Date: Sat, 18 Oct 2008 20:28:50 -0700
Subject: [PATCH 831/895] x86: sysfs: kill owner field from attribute

Tejun's commit 7b595756ec1f49e0049a9e01a1298d53a7faaa15 made sysfs
attribute->owner unnecessary.  But the field was left in the structure to
ease the merge.  It's been over a year since that change and it is now
time to start killing attribute->owner along with its users - one arch at
a time!

This patch is attempt #1 to get rid of attribute->owner only for
CONFIG_X86_64 or CONFIG_X86_32 .  We will deal with other arches later on
as and when possible - avr32 will be the next since that is something I
can test.  Compile (make allyesconfig / make allmodconfig / custom config)
and boot tested.

akpm: the idea is that we put the declaration of sttribute.owner inside
`#ifndef CONFIG_X86'.  But that proved to be too ambitious for now because
new usages kept on turning up in subsystem trees.

[akpm: remove the ifdef for now]
Signed-off-by: Parag Warudkar <parag.lkml@gmail.com>
Cc: Greg KH <greg@kroah.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Tejun Heo <htejun@gmail.com>
Cc: Len Brown <lenb@kernel.org>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: Roland Dreier <rolandd@cisco.com>
Cc: David Brownell <david-b@pacbell.net>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/acpi/battery.c              | 2 +-
 drivers/acpi/sbs.c                  | 2 +-
 drivers/acpi/system.c               | 1 -
 drivers/block/aoe/aoeblk.c          | 2 +-
 drivers/block/nbd.c                 | 2 +-
 drivers/firmware/iscsi_ibft.c       | 1 -
 drivers/i2c/chips/at24.c            | 1 -
 drivers/i2c/chips/ds1682.c          | 1 -
 drivers/infiniband/core/cm.c        | 2 +-
 drivers/memstick/core/mspro_block.c | 1 -
 drivers/power/power_supply_sysfs.c  | 2 +-
 drivers/rtc/rtc-cmos.c              | 1 -
 drivers/rtc/rtc-ds1305.c            | 1 -
 drivers/rtc/rtc-ds1307.c            | 1 -
 drivers/rtc/rtc-ds1511.c            | 1 -
 drivers/rtc/rtc-m48t59.c            | 1 -
 drivers/rtc/rtc-stk17ta8.c          | 1 -
 drivers/scsi/arcmsr/arcmsr_attr.c   | 3 ---
 drivers/w1/slaves/w1_ds2760.c       | 1 -
 include/linux/sysfs.h               | 5 +++--
 20 files changed, 9 insertions(+), 23 deletions(-)

diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index b1c723f9f58d..70f7f60929ca 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -431,7 +431,7 @@ static ssize_t acpi_battery_alarm_store(struct device *dev,
 }
 
 static struct device_attribute alarm_attr = {
-	.attr = {.name = "alarm", .mode = 0644, .owner = THIS_MODULE},
+	.attr = {.name = "alarm", .mode = 0644},
 	.show = acpi_battery_alarm_show,
 	.store = acpi_battery_alarm_store,
 };
diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c
index 10a36512647c..7b011e7e29fe 100644
--- a/drivers/acpi/sbs.c
+++ b/drivers/acpi/sbs.c
@@ -463,7 +463,7 @@ static ssize_t acpi_battery_alarm_store(struct device *dev,
 }
 
 static struct device_attribute alarm_attr = {
-	.attr = {.name = "alarm", .mode = 0644, .owner = THIS_MODULE},
+	.attr = {.name = "alarm", .mode = 0644},
 	.show = acpi_battery_alarm_show,
 	.store = acpi_battery_alarm_store,
 };
diff --git a/drivers/acpi/system.c b/drivers/acpi/system.c
index 91dec448b3ed..24e80fd927e2 100644
--- a/drivers/acpi/system.c
+++ b/drivers/acpi/system.c
@@ -115,7 +115,6 @@ static void acpi_table_attr_init(struct acpi_table_attr *table_attr,
 	table_attr->attr.read = acpi_table_show;
 	table_attr->attr.attr.name = table_attr->name;
 	table_attr->attr.attr.mode = 0444;
-	table_attr->attr.attr.owner = THIS_MODULE;
 
 	return;
 }
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index b82654e883a7..d876ad861237 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -90,7 +90,7 @@ static DEVICE_ATTR(state, S_IRUGO, aoedisk_show_state, NULL);
 static DEVICE_ATTR(mac, S_IRUGO, aoedisk_show_mac, NULL);
 static DEVICE_ATTR(netif, S_IRUGO, aoedisk_show_netif, NULL);
 static struct device_attribute dev_attr_firmware_version = {
-	.attr = { .name = "firmware-version", .mode = S_IRUGO, .owner = THIS_MODULE },
+	.attr = { .name = "firmware-version", .mode = S_IRUGO },
 	.show = aoedisk_show_fwver,
 };
 
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 7b3351260d56..9034ca585afd 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -391,7 +391,7 @@ static ssize_t pid_show(struct device *dev,
 }
 
 static struct device_attribute pid_attr = {
-	.attr = { .name = "pid", .mode = S_IRUGO, .owner = THIS_MODULE },
+	.attr = { .name = "pid", .mode = S_IRUGO},
 	.show = pid_show,
 };
 
diff --git a/drivers/firmware/iscsi_ibft.c b/drivers/firmware/iscsi_ibft.c
index deb154aa47c4..4353414a0b77 100644
--- a/drivers/firmware/iscsi_ibft.c
+++ b/drivers/firmware/iscsi_ibft.c
@@ -732,7 +732,6 @@ static int __init ibft_create_attribute(struct ibft_kobject *kobj_data,
 
 	attr->attr.name = name;
 	attr->attr.mode = S_IRUSR;
-	attr->attr.owner = THIS_MODULE;
 
 	attr->hdr = hdr;
 	attr->show = show;
diff --git a/drivers/i2c/chips/at24.c b/drivers/i2c/chips/at24.c
index 2a4acb269569..d4775528abc6 100644
--- a/drivers/i2c/chips/at24.c
+++ b/drivers/i2c/chips/at24.c
@@ -460,7 +460,6 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	 */
 	at24->bin.attr.name = "eeprom";
 	at24->bin.attr.mode = chip.flags & AT24_FLAG_IRUGO ? S_IRUGO : S_IRUSR;
-	at24->bin.attr.owner = THIS_MODULE;
 	at24->bin.read = at24_bin_read;
 	at24->bin.size = chip.byte_len;
 
diff --git a/drivers/i2c/chips/ds1682.c b/drivers/i2c/chips/ds1682.c
index 23be4d42cb02..f3ee4a1abb77 100644
--- a/drivers/i2c/chips/ds1682.c
+++ b/drivers/i2c/chips/ds1682.c
@@ -190,7 +190,6 @@ static struct bin_attribute ds1682_eeprom_attr = {
 	.attr = {
 		.name = "eeprom",
 		.mode = S_IRUGO | S_IWUSR,
-		.owner = THIS_MODULE,
 	},
 	.size = DS1682_EEPROM_SIZE,
 	.read = ds1682_eeprom_read,
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index a78d35aecee3..f1e82a92e61e 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -122,7 +122,7 @@ struct cm_counter_attribute {
 
 #define CM_COUNTER_ATTR(_name, _index) \
 struct cm_counter_attribute cm_##_name##_counter_attr = { \
-	.attr = { .name = __stringify(_name), .mode = 0444, .owner = THIS_MODULE }, \
+	.attr = { .name = __stringify(_name), .mode = 0444 }, \
 	.index = _index \
 }
 
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index 6e291bf8237a..5263913e0c69 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -1044,7 +1044,6 @@ static int mspro_block_read_attributes(struct memstick_dev *card)
 
 		s_attr->dev_attr.attr.name = s_attr->name;
 		s_attr->dev_attr.attr.mode = S_IRUGO;
-		s_attr->dev_attr.attr.owner = THIS_MODULE;
 		s_attr->dev_attr.show = mspro_block_attr_show(s_attr->id);
 
 		if (!rc)
diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index fe2aeb11939b..23ae8460f5c1 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -30,7 +30,7 @@
 
 #define POWER_SUPPLY_ATTR(_name)					\
 {									\
-	.attr = { .name = #_name, .mode = 0444, .owner = THIS_MODULE },	\
+	.attr = { .name = #_name, .mode = 0444 },	\
 	.show = power_supply_show_property,				\
 	.store = NULL,							\
 }
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 957365e4a746..5549231179a2 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -592,7 +592,6 @@ static struct bin_attribute nvram = {
 	.attr = {
 		.name	= "nvram",
 		.mode	= S_IRUGO | S_IWUSR,
-		.owner	= THIS_MODULE,
 	},
 
 	.read	= cmos_nvram_read,
diff --git a/drivers/rtc/rtc-ds1305.c b/drivers/rtc/rtc-ds1305.c
index 57b470f3fc0b..fc372df6534b 100644
--- a/drivers/rtc/rtc-ds1305.c
+++ b/drivers/rtc/rtc-ds1305.c
@@ -606,7 +606,6 @@ ds1305_nvram_write(struct kobject *kobj, struct bin_attribute *attr,
 static struct bin_attribute nvram = {
 	.attr.name	= "nvram",
 	.attr.mode	= S_IRUGO | S_IWUSR,
-	.attr.owner	= THIS_MODULE,
 	.read		= ds1305_nvram_read,
 	.write		= ds1305_nvram_write,
 	.size		= DS1305_NVRAM_LEN,
diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index cad23bcfebd4..162330b9d1dc 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -551,7 +551,6 @@ static struct bin_attribute nvram = {
 	.attr = {
 		.name	= "nvram",
 		.mode	= S_IRUGO | S_IWUSR,
-		.owner	= THIS_MODULE,
 	},
 
 	.read	= ds1307_nvram_read,
diff --git a/drivers/rtc/rtc-ds1511.c b/drivers/rtc/rtc-ds1511.c
index 7bb0a962ad21..25caada78398 100644
--- a/drivers/rtc/rtc-ds1511.c
+++ b/drivers/rtc/rtc-ds1511.c
@@ -481,7 +481,6 @@ static struct bin_attribute ds1511_nvram_attr = {
 	.attr = {
 		.name = "nvram",
 		.mode = S_IRUGO | S_IWUGO,
-		.owner = THIS_MODULE,
 	},
 	.size = DS1511_RAM_MAX,
 	.read = ds1511_nvram_read,
diff --git a/drivers/rtc/rtc-m48t59.c b/drivers/rtc/rtc-m48t59.c
index ed671e29e07d..04b63dab6932 100644
--- a/drivers/rtc/rtc-m48t59.c
+++ b/drivers/rtc/rtc-m48t59.c
@@ -360,7 +360,6 @@ static struct bin_attribute m48t59_nvram_attr = {
 	.attr = {
 		.name = "nvram",
 		.mode = S_IRUGO | S_IWUSR,
-		.owner = THIS_MODULE,
 	},
 	.read = m48t59_nvram_read,
 	.write = m48t59_nvram_write,
diff --git a/drivers/rtc/rtc-stk17ta8.c b/drivers/rtc/rtc-stk17ta8.c
index 0947f8c23957..f4cd46e15af9 100644
--- a/drivers/rtc/rtc-stk17ta8.c
+++ b/drivers/rtc/rtc-stk17ta8.c
@@ -280,7 +280,6 @@ static struct bin_attribute stk17ta8_nvram_attr = {
 	.attr = {
 		.name = "nvram",
 		.mode = S_IRUGO | S_IWUSR,
-		.owner = THIS_MODULE,
 	},
 	.size = RTC_OFFSET,
 	.read = stk17ta8_nvram_read,
diff --git a/drivers/scsi/arcmsr/arcmsr_attr.c b/drivers/scsi/arcmsr/arcmsr_attr.c
index 69f8346aa288..5877f29a6005 100644
--- a/drivers/scsi/arcmsr/arcmsr_attr.c
+++ b/drivers/scsi/arcmsr/arcmsr_attr.c
@@ -189,7 +189,6 @@ static struct bin_attribute arcmsr_sysfs_message_read_attr = {
 	.attr = {
 		.name = "mu_read",
 		.mode = S_IRUSR ,
-		.owner = THIS_MODULE,
 	},
 	.size = 1032,
 	.read = arcmsr_sysfs_iop_message_read,
@@ -199,7 +198,6 @@ static struct bin_attribute arcmsr_sysfs_message_write_attr = {
 	.attr = {
 		.name = "mu_write",
 		.mode = S_IWUSR,
-		.owner = THIS_MODULE,
 	},
 	.size = 1032,
 	.write = arcmsr_sysfs_iop_message_write,
@@ -209,7 +207,6 @@ static struct bin_attribute arcmsr_sysfs_message_clear_attr = {
 	.attr = {
 		.name = "mu_clear",
 		.mode = S_IWUSR,
-		.owner = THIS_MODULE,
 	},
 	.size = 1,
 	.write = arcmsr_sysfs_iop_message_clear,
diff --git a/drivers/w1/slaves/w1_ds2760.c b/drivers/w1/slaves/w1_ds2760.c
index ed6b0576208c..1f09d4e4144c 100644
--- a/drivers/w1/slaves/w1_ds2760.c
+++ b/drivers/w1/slaves/w1_ds2760.c
@@ -80,7 +80,6 @@ static struct bin_attribute w1_ds2760_bin_attr = {
 	.attr = {
 		.name = "w1_slave",
 		.mode = S_IRUGO,
-		.owner = THIS_MODULE,
 	},
 	.size = DS2760_DATA_SIZE,
 	.read = w1_ds2760_read_bin,
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index b330e289d71f..9d68fed50f11 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -21,8 +21,9 @@ struct kobject;
 struct module;
 
 /* FIXME
- * The *owner field is no longer used, but leave around
- * until the tree gets cleaned up fully.
+ * The *owner field is no longer used.
+ * x86 tree has been cleaned up. The owner
+ * attribute is still left for other arches.
  */
 struct attribute {
 	const char		*name;
-- 
GitLab


From a1cf64944f6f2ffe5ef30a0ee4d529db771f7db2 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Mon, 20 Oct 2008 22:59:18 +1100
Subject: [PATCH 832/895] staging: sxg depends on X86

sxghif.h has code that explicitly will not build fo other architecures.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/staging/sxg/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/staging/sxg/Kconfig b/drivers/staging/sxg/Kconfig
index 1ae350806600..6e6cf0b9ef99 100644
--- a/drivers/staging/sxg/Kconfig
+++ b/drivers/staging/sxg/Kconfig
@@ -1,6 +1,7 @@
 config SXG
 	tristate "Alacritech SLIC Technology Non-Accelerated 10Gbe support"
 	depends on PCI && NETDEV_10000
+	depends on X86
 	default n
 	help
 	  This driver supports the Alacritech SLIC Technology Non-Accelerated
-- 
GitLab


From 332d2e7834986a9326ec3fa6a91641daef81746c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 20 Oct 2008 15:07:34 +1100
Subject: [PATCH 833/895] Implement %pR to print struct resource content

Add a %pR option to the kernel vsnprintf that prints the range of
addresses inside a struct resource passed by pointer.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/vsprintf.c | 49 +++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 43 insertions(+), 6 deletions(-)

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index cceecb6a963d..a013bbc23717 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -24,6 +24,7 @@
 #include <linux/kernel.h>
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
+#include <linux/ioport.h>
 
 #include <asm/page.h>		/* for PAGE_SIZE */
 #include <asm/div64.h>
@@ -550,18 +551,51 @@ static char *symbol_string(char *buf, char *end, void *ptr, int field_width, int
 #endif
 }
 
+static char *resource_string(char *buf, char *end, struct resource *res, int field_width, int precision, int flags)
+{
+#ifndef IO_RSRC_PRINTK_SIZE
+#define IO_RSRC_PRINTK_SIZE	4
+#endif
+
+#ifndef MEM_RSRC_PRINTK_SIZE
+#define MEM_RSRC_PRINTK_SIZE	8
+#endif
+
+	/* room for the actual numbers, the two "0x", -, [, ] and the final zero */
+	char sym[4*sizeof(resource_size_t) + 8];
+	char *p = sym, *pend = sym + sizeof(sym);
+	int size = -1;
+
+	if (res->flags & IORESOURCE_IO)
+		size = IO_RSRC_PRINTK_SIZE;
+	else if (res->flags & IORESOURCE_MEM)
+		size = MEM_RSRC_PRINTK_SIZE;
+
+	*p++ = '[';
+	p = number(p, pend, res->start, 16, size, -1, SPECIAL | SMALL | ZEROPAD);
+	*p++ = '-';
+	p = number(p, pend, res->end, 16, size, -1, SPECIAL | SMALL | ZEROPAD);
+	*p++ = ']';
+	*p = 0;
+
+	return string(buf, end, sym, field_width, precision, flags);
+}
+
 /*
  * Show a '%p' thing.  A kernel extension is that the '%p' is followed
  * by an extra set of alphanumeric characters that are extended format
  * specifiers.
  *
- * Right now we just handle 'F' (for symbolic Function descriptor pointers)
- * and 'S' (for Symbolic direct pointers), but this can easily be
- * extended in the future (network address types etc).
+ * Right now we handle:
+ *
+ * - 'F' For symbolic function descriptor pointers
+ * - 'S' For symbolic direct pointers
+ * - 'R' For a struct resource pointer, it prints the range of
+ *       addresses (not the name nor the flags)
  *
- * The difference between 'S' and 'F' is that on ia64 and ppc64 function
- * pointers are really function descriptors, which contain a pointer the
- * real address. 
+ * Note: The difference between 'S' and 'F' is that on ia64 and ppc64
+ * function pointers are really function descriptors, which contain a
+ * pointer to the real address.
  */
 static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field_width, int precision, int flags)
 {
@@ -571,6 +605,8 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field
 		/* Fallthrough */
 	case 'S':
 		return symbol_string(buf, end, ptr, field_width, precision, flags);
+	case 'R':
+		return resource_string(buf, end, ptr, field_width, precision, flags);
 	}
 	flags |= SMALL;
 	if (field_width == -1) {
@@ -590,6 +626,7 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field
  * This function follows C99 vsnprintf, but has some extensions:
  * %pS output the name of a text symbol
  * %pF output the name of a function pointer
+ * %pR output the address range in a struct resource
  *
  * The return value is the number of characters which would
  * be generated for the given input, excluding the trailing
-- 
GitLab


From 096e6f673dc02a6394dc9a7d8f8735c6978f5b91 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 20 Oct 2008 15:07:37 +1100
Subject: [PATCH 834/895] pci: Use new %pR to print resource ranges

This converts things in drivers/pci to use %pR to printout the
content of a struct resource instead of hand-casted %llx or
other variants.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/pci/pci.c       |  5 ++---
 drivers/pci/probe.c     | 28 +++++++++++++---------------
 drivers/pci/setup-bus.c | 13 ++++---------
 drivers/pci/setup-res.c | 40 +++++++++++++---------------------------
 4 files changed, 32 insertions(+), 54 deletions(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index c9884bba22de..dbe9f39f4436 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1358,11 +1358,10 @@ int pci_request_region(struct pci_dev *pdev, int bar, const char *res_name)
 	return 0;
 
 err_out:
-	dev_warn(&pdev->dev, "BAR %d: can't reserve %s region [%#llx-%#llx]\n",
+	dev_warn(&pdev->dev, "BAR %d: can't reserve %s region %pR\n",
 		 bar,
 		 pci_resource_flags(pdev, bar) & IORESOURCE_IO ? "I/O" : "mem",
-		 (unsigned long long)pci_resource_start(pdev, bar),
-		 (unsigned long long)pci_resource_end(pdev, bar));
+		 &pdev->resource[bar]);
 	return -EBUSY;
 }
 
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index dd9161a054e1..d3db8b249729 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -304,9 +304,8 @@ static int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
 		} else {
 			res->start = l64;
 			res->end = l64 + sz64;
-			printk(KERN_DEBUG "PCI: %s reg %x 64bit mmio: [%llx, %llx]\n",
-				pci_name(dev), pos, (unsigned long long)res->start,
-				(unsigned long long)res->end);
+			printk(KERN_DEBUG "PCI: %s reg %x 64bit mmio: %pR\n",
+				pci_name(dev), pos, res);
 		}
 	} else {
 		sz = pci_size(l, sz, mask);
@@ -316,9 +315,10 @@ static int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
 
 		res->start = l;
 		res->end = l + sz;
-		printk(KERN_DEBUG "PCI: %s reg %x %s: [%llx, %llx]\n", pci_name(dev),
-			pos, (res->flags & IORESOURCE_IO) ? "io port":"32bit mmio",
-			(unsigned long long)res->start, (unsigned long long)res->end);
+		printk(KERN_DEBUG "PCI: %s reg %x %s: %pR\n",
+		       pci_name(dev), pos,
+		       (res->flags & IORESOURCE_IO) ? "io port":"32bit mmio",
+		       res);
 	}
 
  out:
@@ -389,9 +389,8 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
 			res->start = base;
 		if (!res->end)
 			res->end = limit + 0xfff;
-		printk(KERN_DEBUG "PCI: bridge %s io port: [%llx, %llx]\n",
-			pci_name(dev), (unsigned long long) res->start,
-			(unsigned long long) res->end);
+		printk(KERN_DEBUG "PCI: bridge %s io port: %pR\n",
+		       pci_name(dev), res);
 	}
 
 	res = child->resource[1];
@@ -403,9 +402,8 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
 		res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM;
 		res->start = base;
 		res->end = limit + 0xfffff;
-		printk(KERN_DEBUG "PCI: bridge %s 32bit mmio: [%llx, %llx]\n",
-			pci_name(dev), (unsigned long long) res->start,
-			(unsigned long long) res->end);
+		printk(KERN_DEBUG "PCI: bridge %s 32bit mmio: %pR\n",
+		       pci_name(dev), res);
 	}
 
 	res = child->resource[2];
@@ -441,9 +439,9 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
 		res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM | IORESOURCE_PREFETCH;
 		res->start = base;
 		res->end = limit + 0xfffff;
-		printk(KERN_DEBUG "PCI: bridge %s %sbit mmio pref: [%llx, %llx]\n",
-			pci_name(dev), (res->flags & PCI_PREF_RANGE_TYPE_64) ? "64" : "32",
-			(unsigned long long) res->start, (unsigned long long) res->end);
+		printk(KERN_DEBUG "PCI: bridge %s %sbit mmio pref: %pR\n",
+		       pci_name(dev),
+		       (res->flags & PCI_PREF_RANGE_TYPE_64) ? "64":"32", res);
 	}
 }
 
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index d5e2106760f8..471a429d7a20 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -356,10 +356,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long
 			order = __ffs(align) - 20;
 			if (order > 11) {
 				dev_warn(&dev->dev, "BAR %d bad alignment %llx: "
-				       "%#016llx-%#016llx\n", i,
-				       (unsigned long long)align,
-				       (unsigned long long)r->start,
-				       (unsigned long long)r->end);
+					 "%pR\n", i, (unsigned long long)align, r);
 				r->flags = 0;
 				continue;
 			}
@@ -539,11 +536,9 @@ static void pci_bus_dump_res(struct pci_bus *bus)
                 if (!res)
                         continue;
 
-		printk(KERN_INFO "bus: %02x index %x %s: [%llx, %llx]\n",
-			bus->number, i,
-			(res->flags & IORESOURCE_IO) ? "io port" : "mmio",
-			(unsigned long long) res->start,
-			(unsigned long long) res->end);
+		printk(KERN_INFO "bus: %02x index %x %s: %pR\n",
+		       bus->number, i,
+		       (res->flags & IORESOURCE_IO) ? "io port" : "mmio", res);
         }
 }
 
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index 1a5fc83c71b3..d4b5c690eaa7 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -49,10 +49,8 @@ void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno)
 
 	pcibios_resource_to_bus(dev, &region, res);
 
-	dev_dbg(&dev->dev, "BAR %d: got res [%#llx-%#llx] bus [%#llx-%#llx] "
-		"flags %#lx\n", resno,
-		 (unsigned long long)res->start,
-		 (unsigned long long)res->end,
+	dev_dbg(&dev->dev, "BAR %d: got res %pR bus [%#llx-%#llx] "
+		"flags %#lx\n", resno, res,
 		 (unsigned long long)region.start,
 		 (unsigned long long)region.end,
 		 (unsigned long)res->flags);
@@ -114,13 +112,11 @@ int pci_claim_resource(struct pci_dev *dev, int resource)
 		err = insert_resource(root, res);
 
 	if (err) {
-		dev_err(&dev->dev, "BAR %d: %s of %s [%#llx-%#llx]\n",
+		dev_err(&dev->dev, "BAR %d: %s of %s %pR\n",
 			resource,
 			root ? "address space collision on" :
 				"no parent found for",
-			dtype,
-			(unsigned long long)res->start,
-			(unsigned long long)res->end);
+			dtype, res);
 	}
 
 	return err;
@@ -139,9 +135,8 @@ int pci_assign_resource(struct pci_dev *dev, int resno)
 	align = resource_alignment(res);
 	if (!align) {
 		dev_err(&dev->dev, "BAR %d: can't allocate resource (bogus "
-			"alignment) [%#llx-%#llx] flags %#lx\n",
-			resno, (unsigned long long)res->start,
-			(unsigned long long)res->end, res->flags);
+			"alignment) %pR flags %#lx\n",
+			resno, res, res->flags);
 		return -EINVAL;
 	}
 
@@ -162,11 +157,8 @@ int pci_assign_resource(struct pci_dev *dev, int resno)
 	}
 
 	if (ret) {
-		dev_err(&dev->dev, "BAR %d: can't allocate %s resource "
-			"[%#llx-%#llx]\n", resno,
-			res->flags & IORESOURCE_IO ? "I/O" : "mem",
-			(unsigned long long)res->start,
-			(unsigned long long)res->end);
+		dev_err(&dev->dev, "BAR %d: can't allocate %s resource %pR\n",
+			resno, res->flags & IORESOURCE_IO ? "I/O" : "mem", res);
 	} else {
 		res->flags &= ~IORESOURCE_STARTALIGN;
 		if (resno < PCI_BRIDGE_RESOURCES)
@@ -202,11 +194,8 @@ int pci_assign_resource_fixed(struct pci_dev *dev, int resno)
 	}
 
 	if (ret) {
-		dev_err(&dev->dev, "BAR %d: can't allocate %s resource "
-			"[%#llx-%#llx\n]", resno,
-			res->flags & IORESOURCE_IO ? "I/O" : "mem",
-			(unsigned long long)res->start,
-			(unsigned long long)res->end);
+		dev_err(&dev->dev, "BAR %d: can't allocate %s resource %pR\n",
+			resno, res->flags & IORESOURCE_IO ? "I/O" : "mem", res);
 	} else if (resno < PCI_BRIDGE_RESOURCES) {
 		pci_update_resource(dev, res, resno);
 	}
@@ -237,9 +226,8 @@ void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head)
 		r_align = resource_alignment(r);
 		if (!r_align) {
 			dev_warn(&dev->dev, "BAR %d: bogus alignment "
-				"[%#llx-%#llx] flags %#lx\n",
-				i, (unsigned long long)r->start,
-				(unsigned long long)r->end, r->flags);
+				"%pR flags %#lx\n",
+				i, r, r->flags);
 			continue;
 		}
 		for (list = head; ; list = list->next) {
@@ -287,9 +275,7 @@ int pci_enable_resources(struct pci_dev *dev, int mask)
 
 		if (!r->parent) {
 			dev_err(&dev->dev, "device not available because of "
-				"BAR %d [%#llx-%#llx] collisions\n", i,
-				(unsigned long long) r->start,
-				(unsigned long long) r->end);
+				"BAR %d %pR collisions\n", i, r);
 			return -EINVAL;
 		}
 
-- 
GitLab


From 252883e512c6d8fbc03b6738f1620fda44c4d472 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 17 Oct 2008 20:28:25 +0100
Subject: [PATCH 835/895] epca: Add infinite break support

The EPCA can support indefinte break lengths and with info from digi that
can now be added

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/epca.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/char/epca.c b/drivers/char/epca.c
index 4998b2761e8f..cf2461d34e5f 100644
--- a/drivers/char/epca.c
+++ b/drivers/char/epca.c
@@ -2477,7 +2477,11 @@ static int pc_send_break(struct tty_struct *tty, int msec)
 	unsigned long flags;
 
 	if (msec == -1)
-		return -EOPNOTSUPP;
+		msec = 0xFFFF;
+	else if (msec > 0xFFFE)
+		msec = 0xFFFE;
+	else if (msec < 1)
+		msec = 1;
 
 	spin_lock_irqsave(&epca_lock, flags);
 	globalwinon(ch);
-- 
GitLab


From edbc25caaa492a82e19baa915f1f6b0a0db6554d Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Thu, 10 Jul 2008 16:29:37 -0500
Subject: [PATCH 836/895] PCI: remove dynids.use_driver_data

The driver flag dynids.use_driver_data is almost consistently not set,
and causes more problems than it solves.  It was initially intended as a
flag to indicate whether a driver's usage of driver_data had been
carefully inspected and was ready for values from userspace.  That audit
was never done, so most drivers just get a 0 for driver_data when new
IDs are added from userspace via sysfs.  So remove the flag, allowing
drivers to see the data directly (a followon patch validates the passed
driver_data value against what the drivers expect).

Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/i2c/busses/i2c-amd756.c | 1 -
 drivers/i2c/busses/i2c-viapro.c | 1 -
 drivers/pci/pci-driver.c        | 3 +--
 drivers/scsi/ipr.c              | 1 -
 include/linux/pci.h             | 1 -
 5 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/i2c/busses/i2c-amd756.c b/drivers/i2c/busses/i2c-amd756.c
index 1ea39254dac6..a3542b053c8e 100644
--- a/drivers/i2c/busses/i2c-amd756.c
+++ b/drivers/i2c/busses/i2c-amd756.c
@@ -412,7 +412,6 @@ static struct pci_driver amd756_driver = {
 	.id_table	= amd756_ids,
 	.probe		= amd756_probe,
 	.remove		= __devexit_p(amd756_remove),
-	.dynids.use_driver_data = 1,
 };
 
 static int __init amd756_init(void)
diff --git a/drivers/i2c/busses/i2c-viapro.c b/drivers/i2c/busses/i2c-viapro.c
index 73dc52e114eb..2324780484c0 100644
--- a/drivers/i2c/busses/i2c-viapro.c
+++ b/drivers/i2c/busses/i2c-viapro.c
@@ -483,7 +483,6 @@ static struct pci_driver vt596_driver = {
 	.name		= "vt596_smbus",
 	.id_table	= vt596_ids,
 	.probe		= vt596_probe,
-	.dynids.use_driver_data = 1,
 };
 
 static int __init i2c_vt596_init(void)
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index a13f53486114..4940a53c56a3 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -65,8 +65,7 @@ store_new_id(struct device_driver *driver, const char *buf, size_t count)
 	dynid->id.subdevice = subdevice;
 	dynid->id.class = class;
 	dynid->id.class_mask = class_mask;
-	dynid->id.driver_data = pdrv->dynids.use_driver_data ?
-		driver_data : 0UL;
+	dynid->id.driver_data = driver_data;
 
 	spin_lock(&pdrv->dynids.lock);
 	list_add_tail(&dynid->node, &pdrv->dynids.list);
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index d30eb7ba018e..098739deb02e 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -7859,7 +7859,6 @@ static struct pci_driver ipr_driver = {
 	.remove = ipr_remove,
 	.shutdown = ipr_shutdown,
 	.err_handler = &ipr_err_handler,
-	.dynids.use_driver_data = 1
 };
 
 /**
diff --git a/include/linux/pci.h b/include/linux/pci.h
index acf8f24037cd..c989f58d09bf 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -347,7 +347,6 @@ struct pci_bus_region {
 struct pci_dynids {
 	spinlock_t lock;            /* protects list, index */
 	struct list_head list;      /* for IDs added at runtime */
-	unsigned int use_driver_data:1; /* pci_device_id->driver_data is used */
 };
 
 /* ---------------------------------------------------------------- */
-- 
GitLab


From b41d6cf38e27a940d998d989526a9748de1bf028 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sun, 17 Aug 2008 21:06:59 +0200
Subject: [PATCH 837/895] PCI: Check dynids driver_data value for validity

Only accept dynids whose driver_data value matches one of the driver's
pci_driver_id entries. This prevents the user from accidentally passing
values the drivers do not expect.

Cc: Milton Miller <miltonm@bga.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 Documentation/PCI/pci.txt       |  4 ++++
 drivers/i2c/busses/i2c-amd756.c |  4 ----
 drivers/i2c/busses/i2c-viapro.c |  4 ----
 drivers/pci/pci-driver.c        | 18 ++++++++++++++++--
 4 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/Documentation/PCI/pci.txt b/Documentation/PCI/pci.txt
index 8d4dc6250c58..fd4907a2968c 100644
--- a/Documentation/PCI/pci.txt
+++ b/Documentation/PCI/pci.txt
@@ -163,6 +163,10 @@ need pass only as many optional fields as necessary:
 	o class and classmask fields default to 0
 	o driver_data defaults to 0UL.
 
+Note that driver_data must match the value used by any of the pci_device_id
+entries defined in the driver. This makes the driver_data field mandatory
+if all the pci_device_id entries have a non-zero driver_data value.
+
 Once added, the driver probe routine will be invoked for any unclaimed
 PCI devices listed in its (newly updated) pci_ids list.
 
diff --git a/drivers/i2c/busses/i2c-amd756.c b/drivers/i2c/busses/i2c-amd756.c
index a3542b053c8e..424dad6f18d8 100644
--- a/drivers/i2c/busses/i2c-amd756.c
+++ b/drivers/i2c/busses/i2c-amd756.c
@@ -332,10 +332,6 @@ static int __devinit amd756_probe(struct pci_dev *pdev,
 	int error;
 	u8 temp;
 	
-	/* driver_data might come from user-space, so check it */
-	if (id->driver_data >= ARRAY_SIZE(chipname))
-		return -EINVAL;
-
 	if (amd756_ioport) {
 		dev_err(&pdev->dev, "Only one device supported "
 		       "(you have a strange motherboard, btw)\n");
diff --git a/drivers/i2c/busses/i2c-viapro.c b/drivers/i2c/busses/i2c-viapro.c
index 2324780484c0..9f194d9efd91 100644
--- a/drivers/i2c/busses/i2c-viapro.c
+++ b/drivers/i2c/busses/i2c-viapro.c
@@ -332,10 +332,6 @@ static int __devinit vt596_probe(struct pci_dev *pdev,
 	unsigned char temp;
 	int error = -ENODEV;
 
-	/* driver_data might come from user-space, so check it */
-	if (id->driver_data & 1 || id->driver_data > 0xff)
-		return -EINVAL;
-
 	/* Determine the address of the SMBus areas */
 	if (force_addr) {
 		vt596_smba = force_addr & 0xfff0;
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 4940a53c56a3..b4cdd690ae71 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -43,18 +43,32 @@ store_new_id(struct device_driver *driver, const char *buf, size_t count)
 {
 	struct pci_dynid *dynid;
 	struct pci_driver *pdrv = to_pci_driver(driver);
+	const struct pci_device_id *ids = pdrv->id_table;
 	__u32 vendor, device, subvendor=PCI_ANY_ID,
 		subdevice=PCI_ANY_ID, class=0, class_mask=0;
 	unsigned long driver_data=0;
 	int fields=0;
-	int retval = 0;
+	int retval;
 
-	fields = sscanf(buf, "%x %x %x %x %x %x %lux",
+	fields = sscanf(buf, "%x %x %x %x %x %x %lx",
 			&vendor, &device, &subvendor, &subdevice,
 			&class, &class_mask, &driver_data);
 	if (fields < 2)
 		return -EINVAL;
 
+	/* Only accept driver_data values that match an existing id_table
+	   entry */
+	retval = -EINVAL;
+	while (ids->vendor || ids->subvendor || ids->class_mask) {
+		if (driver_data == ids->driver_data) {
+			retval = 0;
+			break;
+		}
+		ids++;
+	}
+	if (retval)	/* No match */
+		return retval;
+
 	dynid = kzalloc(sizeof(*dynid), GFP_KERNEL);
 	if (!dynid)
 		return -ENOMEM;
-- 
GitLab


From 3d137310245e4cdc3e8c8ba1bea2e145a87ae8e3 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@enix.org>
Date: Tue, 19 Aug 2008 10:28:24 +0200
Subject: [PATCH 838/895] PCI: allow quirks to be compiled out

This patch adds the CONFIG_PCI_QUIRKS option which allows to remove all
the PCI quirks, which are not necessarily used on embedded systems when
PCI is working properly. As this is a size-reduction option, it depends
on CONFIG_EMBEDDED. It allows to save almost 12 kilobytes of kernel
code:

   text	   data	    bss	    dec	    hex	filename
1287806	 123596	 212992	1624394	 18c94a	vmlinux.old
1275854	 123596	 212992	1612442	 189a9a	vmlinux
 -11952       0       0  -11952   -2EB0 +/-

This patch has originally been written by Zwane Mwaikambo
<zwane@arm.linux.org.uk> and is part of the Linux Tiny project.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/quirks.c | 176 ++++++++++++++++++++++---------------------
 init/Kconfig         |   8 ++
 2 files changed, 98 insertions(+), 86 deletions(-)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index e872ac925b4b..246918fe9482 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -24,6 +24,14 @@
 #include <linux/kallsyms.h>
 #include "pci.h"
 
+int isa_dma_bridge_buggy;
+EXPORT_SYMBOL(isa_dma_bridge_buggy);
+int pci_pci_problems;
+EXPORT_SYMBOL(pci_pci_problems);
+int pcie_mch_quirk;
+EXPORT_SYMBOL(pcie_mch_quirk);
+
+#ifdef CONFIG_PCI_QUIRKS
 /* The Mellanox Tavor device gives false positive parity errors
  * Mark this device with a broken_parity_status, to allow
  * PCI scanning code to "skip" this now blacklisted device.
@@ -62,8 +70,6 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82441,	quirk_p
     
     This appears to be BIOS not version dependent. So presumably there is a 
     chipset level fix */
-int isa_dma_bridge_buggy;
-EXPORT_SYMBOL(isa_dma_bridge_buggy);
     
 static void __devinit quirk_isa_dma_hangs(struct pci_dev *dev)
 {
@@ -84,9 +90,6 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC,	PCI_DEVICE_ID_NEC_CBUS_1,	quirk_isa_d
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC,	PCI_DEVICE_ID_NEC_CBUS_2,	quirk_isa_dma_hangs);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC,	PCI_DEVICE_ID_NEC_CBUS_3,	quirk_isa_dma_hangs);
 
-int pci_pci_problems;
-EXPORT_SYMBOL(pci_pci_problems);
-
 /*
  *	Chipsets where PCI->PCI transfers vanish or hang
  */
@@ -1362,9 +1365,6 @@ static void __init quirk_alder_ioapic(struct pci_dev *pdev)
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_EESSC,	quirk_alder_ioapic);
 #endif
 
-int pcie_mch_quirk;
-EXPORT_SYMBOL(pcie_mch_quirk);
-
 static void __devinit quirk_pcie_mch(struct pci_dev *pdev)
 {
 	pcie_mch_quirk = 1;
@@ -1555,84 +1555,6 @@ static void __devinit fixup_rev1_53c810(struct pci_dev* dev)
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C810, fixup_rev1_53c810);
 
-static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, struct pci_fixup *end)
-{
-	while (f < end) {
-		if ((f->vendor == dev->vendor || f->vendor == (u16) PCI_ANY_ID) &&
- 		    (f->device == dev->device || f->device == (u16) PCI_ANY_ID)) {
-#ifdef DEBUG
-			dev_dbg(&dev->dev, "calling %pF\n", f->hook);
-#endif
-			f->hook(dev);
-		}
-		f++;
-	}
-}
-
-extern struct pci_fixup __start_pci_fixups_early[];
-extern struct pci_fixup __end_pci_fixups_early[];
-extern struct pci_fixup __start_pci_fixups_header[];
-extern struct pci_fixup __end_pci_fixups_header[];
-extern struct pci_fixup __start_pci_fixups_final[];
-extern struct pci_fixup __end_pci_fixups_final[];
-extern struct pci_fixup __start_pci_fixups_enable[];
-extern struct pci_fixup __end_pci_fixups_enable[];
-extern struct pci_fixup __start_pci_fixups_resume[];
-extern struct pci_fixup __end_pci_fixups_resume[];
-extern struct pci_fixup __start_pci_fixups_resume_early[];
-extern struct pci_fixup __end_pci_fixups_resume_early[];
-extern struct pci_fixup __start_pci_fixups_suspend[];
-extern struct pci_fixup __end_pci_fixups_suspend[];
-
-
-void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev)
-{
-	struct pci_fixup *start, *end;
-
-	switch(pass) {
-	case pci_fixup_early:
-		start = __start_pci_fixups_early;
-		end = __end_pci_fixups_early;
-		break;
-
-	case pci_fixup_header:
-		start = __start_pci_fixups_header;
-		end = __end_pci_fixups_header;
-		break;
-
-	case pci_fixup_final:
-		start = __start_pci_fixups_final;
-		end = __end_pci_fixups_final;
-		break;
-
-	case pci_fixup_enable:
-		start = __start_pci_fixups_enable;
-		end = __end_pci_fixups_enable;
-		break;
-
-	case pci_fixup_resume:
-		start = __start_pci_fixups_resume;
-		end = __end_pci_fixups_resume;
-		break;
-
-	case pci_fixup_resume_early:
-		start = __start_pci_fixups_resume_early;
-		end = __end_pci_fixups_resume_early;
-		break;
-
-	case pci_fixup_suspend:
-		start = __start_pci_fixups_suspend;
-		end = __end_pci_fixups_suspend;
-		break;
-
-	default:
-		/* stupid compiler warning, you would think with an enum... */
-		return;
-	}
-	pci_do_fixups(dev, start, end);
-}
-EXPORT_SYMBOL(pci_fixup_device);
-
 /* Enable 1k I/O space granularity on the Intel P64H2 */
 static void __devinit quirk_p64h2_1k_io(struct pci_dev *dev)
 {
@@ -2006,3 +1928,85 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x4375,
 			quirk_msi_intx_disable_bug);
 
 #endif /* CONFIG_PCI_MSI */
+
+static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, struct pci_fixup *end)
+{
+	while (f < end) {
+		if ((f->vendor == dev->vendor || f->vendor == (u16) PCI_ANY_ID) &&
+ 		    (f->device == dev->device || f->device == (u16) PCI_ANY_ID)) {
+#ifdef DEBUG
+			dev_dbg(&dev->dev, "calling ");
+			print_fn_descriptor_symbol("%s\n", f->hook);
+#endif
+			f->hook(dev);
+		}
+		f++;
+	}
+}
+
+extern struct pci_fixup __start_pci_fixups_early[];
+extern struct pci_fixup __end_pci_fixups_early[];
+extern struct pci_fixup __start_pci_fixups_header[];
+extern struct pci_fixup __end_pci_fixups_header[];
+extern struct pci_fixup __start_pci_fixups_final[];
+extern struct pci_fixup __end_pci_fixups_final[];
+extern struct pci_fixup __start_pci_fixups_enable[];
+extern struct pci_fixup __end_pci_fixups_enable[];
+extern struct pci_fixup __start_pci_fixups_resume[];
+extern struct pci_fixup __end_pci_fixups_resume[];
+extern struct pci_fixup __start_pci_fixups_resume_early[];
+extern struct pci_fixup __end_pci_fixups_resume_early[];
+extern struct pci_fixup __start_pci_fixups_suspend[];
+extern struct pci_fixup __end_pci_fixups_suspend[];
+
+
+void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev)
+{
+	struct pci_fixup *start, *end;
+
+	switch(pass) {
+	case pci_fixup_early:
+		start = __start_pci_fixups_early;
+		end = __end_pci_fixups_early;
+		break;
+
+	case pci_fixup_header:
+		start = __start_pci_fixups_header;
+		end = __end_pci_fixups_header;
+		break;
+
+	case pci_fixup_final:
+		start = __start_pci_fixups_final;
+		end = __end_pci_fixups_final;
+		break;
+
+	case pci_fixup_enable:
+		start = __start_pci_fixups_enable;
+		end = __end_pci_fixups_enable;
+		break;
+
+	case pci_fixup_resume:
+		start = __start_pci_fixups_resume;
+		end = __end_pci_fixups_resume;
+		break;
+
+	case pci_fixup_resume_early:
+		start = __start_pci_fixups_resume_early;
+		end = __end_pci_fixups_resume_early;
+		break;
+
+	case pci_fixup_suspend:
+		start = __start_pci_fixups_suspend;
+		end = __end_pci_fixups_suspend;
+		break;
+
+	default:
+		/* stupid compiler warning, you would think with an enum... */
+		return;
+	}
+	pci_do_fixups(dev, start, end);
+}
+#else
+void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev) {}
+#endif
+EXPORT_SYMBOL(pci_fixup_device);
diff --git a/init/Kconfig b/init/Kconfig
index 8828ed0b2051..06330a305249 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -737,6 +737,14 @@ config VM_EVENT_COUNTERS
 	  on EMBEDDED systems.  /proc/vmstat will only show page counts
 	  if VM event counters are disabled.
 
+config PCI_QUIRKS
+	default y
+	bool "Enable PCI quirk workarounds" if EMBEDDED && PCI
+	help
+	  This enables workarounds for various PCI chipset
+          bugs/quirks. Disable this only if your target machine is
+          unaffected by PCI quirks.
+
 config SLUB_DEBUG
 	default y
 	bool "Enable SLUB debugging support" if EMBEDDED
-- 
GitLab


From 0235c4fc7fc6f621dc0dd89eba102ad5aa373390 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Mon, 18 Aug 2008 21:38:00 +0200
Subject: [PATCH 839/895] PCI PM: Introduce function pci_wake_from_d3

Many device drivers use the following sequence of statements to enable
the device to wake up the system while being in the D3_hot or D3_cold
low power state:

        pci_enable_wake(pdev, PCI_D3hot, 1);
        pci_enable_wake(pdev, PCI_D3cold, 1);

However, the second call is not necessary if the first one succeeds (the
ordering of the statements above doesn't matter here) and it may even be
harmful, because we are not supposed to enable PME# after the wake-up
power has been enabled for the device.

To allow drivers to overcome this problem, introduce function
pci_wake_from_d3() that will enable the device to wake up the system
from any of D3_hot and D3_cold as long as the wake-up from at least one
of them is supported.

Acked-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci.c   | 22 ++++++++++++++++++++++
 include/linux/pci.h |  1 +
 2 files changed, 23 insertions(+)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index dbe9f39f4436..2797112c9400 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1126,6 +1126,27 @@ int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable)
 	return pme_done ? 0 : error;
 }
 
+/**
+ * pci_wake_from_d3 - enable/disable device to wake up from D3_hot or D3_cold
+ * @dev: PCI device to prepare
+ * @enable: True to enable wake-up event generation; false to disable
+ *
+ * Many drivers want the device to wake up the system from D3_hot or D3_cold
+ * and this function allows them to set that up cleanly - pci_enable_wake()
+ * should not be called twice in a row to enable wake-up due to PCI PM vs ACPI
+ * ordering constraints.
+ *
+ * This function only returns error code if the device is not capable of
+ * generating PME# from both D3_hot and D3_cold, and the platform is unable to
+ * enable wake-up power for it.
+ */
+int pci_wake_from_d3(struct pci_dev *dev, bool enable)
+{
+	return pci_pme_capable(dev, PCI_D3cold) ?
+			pci_enable_wake(dev, PCI_D3cold, enable) :
+			pci_enable_wake(dev, PCI_D3hot, enable);
+}
+
 /**
  * pci_target_state - find an appropriate low power state for a given PCI dev
  * @dev: PCI device
@@ -1942,6 +1963,7 @@ EXPORT_SYMBOL(pci_restore_state);
 EXPORT_SYMBOL(pci_pme_capable);
 EXPORT_SYMBOL(pci_pme_active);
 EXPORT_SYMBOL(pci_enable_wake);
+EXPORT_SYMBOL(pci_wake_from_d3);
 EXPORT_SYMBOL(pci_target_state);
 EXPORT_SYMBOL(pci_prepare_to_sleep);
 EXPORT_SYMBOL(pci_back_from_sleep);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index c989f58d09bf..f7e7dbc09194 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -644,6 +644,7 @@ pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state);
 bool pci_pme_capable(struct pci_dev *dev, pci_power_t state);
 void pci_pme_active(struct pci_dev *dev, bool enable);
 int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable);
+int pci_wake_from_d3(struct pci_dev *dev, bool enable);
 pci_power_t pci_target_state(struct pci_dev *dev);
 int pci_prepare_to_sleep(struct pci_dev *dev);
 int pci_back_from_sleep(struct pci_dev *dev);
-- 
GitLab


From 16dbef4a831782466b10d4ae56837c5ba17d1948 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Fri, 15 Aug 2008 19:36:45 -0700
Subject: [PATCH 840/895] PCI: change MSI-x vector to 32bit

We are using 28bit pci (bus/dev/fn + 12 bits) as irq number, so the
cache for irq number should be 32 bit too.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/scsi/qla2xxx/qla_def.h | 2 +-
 include/linux/pci.h            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 83c819216771..f25f41a499e5 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -2108,7 +2108,7 @@ struct scsi_qla_host;
 
 struct qla_msix_entry {
 	int have_irq;
-	uint16_t msix_vector;
+	uint32_t msix_vector;
 	uint16_t msix_entry;
 };
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index f7e7dbc09194..8a4d0bebc311 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -725,7 +725,7 @@ enum pci_dma_burst_strategy {
 };
 
 struct msix_entry {
-	u16 	vector;	/* kernel uses to write allocated vector */
+	u32	vector;	/* kernel uses to write allocated vector */
 	u16	entry;	/* driver uses to specify entry, OS writes */
 };
 
-- 
GitLab


From d768cb6929773060171eee8397a63883f60ddc07 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 25 Aug 2008 15:44:59 -0600
Subject: [PATCH 841/895] x86/PCI: follow lspci device/vendor style

Use "[%04x:%04x]" for PCI vendor/device IDs to follow the format
used by lspci(8).

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/pci/irq.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 006599db0dc7..52a1de1128c1 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -493,7 +493,7 @@ static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq
 	if (pirq <= 4)
 		irq = read_config_nybble(router, 0x56, pirq - 1);
 	dev_info(&dev->dev,
-		 "AMD756: dev [%04x/%04x], router PIRQ %d get IRQ %d\n",
+		 "AMD756: dev [%04x:%04x], router PIRQ %d get IRQ %d\n",
 		 dev->vendor, dev->device, pirq, irq);
 	return irq;
 }
@@ -501,7 +501,7 @@ static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq
 static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
 {
 	dev_info(&dev->dev,
-		 "AMD756: dev [%04x/%04x], router PIRQ %d set IRQ %d\n",
+		 "AMD756: dev [%04x:%04x], router PIRQ %d set IRQ %d\n",
 		 dev->vendor, dev->device, pirq, irq);
 	if (pirq <= 4)
 		write_config_nybble(router, 0x56, pirq - 1, irq);
@@ -823,7 +823,7 @@ static void __init pirq_find_router(struct irq_router *r)
 	r->get = NULL;
 	r->set = NULL;
 
-	DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for %04x:%04x\n",
+	DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for [%04x:%04x]\n",
 	    rt->rtr_vendor, rt->rtr_device);
 
 	pirq_router_dev = pci_get_bus_and_slot(rt->rtr_bus, rt->rtr_devfn);
@@ -843,7 +843,7 @@ static void __init pirq_find_router(struct irq_router *r)
 			h->probe(r, pirq_router_dev, pirq_router_dev->device))
 			break;
 	}
-	dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x/%04x]\n",
+	dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x:%04x]\n",
 		 pirq_router.name,
 		 pirq_router_dev->vendor, pirq_router_dev->device);
 
-- 
GitLab


From 34a2e15e95fce6d6f4d30162f53a0ceb25d5bbaf Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 25 Aug 2008 15:45:20 -0600
Subject: [PATCH 842/895] PCI: follow lspci device/vendor style

Use "[%04x:%04x]" for PCI vendor/device IDs to follow the format
used by lspci(8).

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pcie/portdrv_pci.c | 2 +-
 drivers/pci/probe.c            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c
index 367c9c20000d..584422da8d8b 100644
--- a/drivers/pci/pcie/portdrv_pci.c
+++ b/drivers/pci/pcie/portdrv_pci.c
@@ -91,7 +91,7 @@ static int __devinit pcie_portdrv_probe (struct pci_dev *dev,
 	
 	pci_set_master(dev);
         if (!dev->irq && dev->pin) {
-		dev_warn(&dev->dev, "device [%04x/%04x] has invalid IRQ; "
+		dev_warn(&dev->dev, "device [%04x:%04x] has invalid IRQ; "
 			 "check vendor BIOS\n", dev->vendor, dev->device);
 	}
 	if (pcie_port_device_register(dev)) {
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index d3db8b249729..578d15f49e02 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -762,7 +762,7 @@ static int pci_setup_device(struct pci_dev * dev)
 	dev->class = class;
 	class >>= 8;
 
-	dev_dbg(&dev->dev, "found [%04x/%04x] class %06x header type %02x\n",
+	dev_dbg(&dev->dev, "found [%04x:%04x] class %06x header type %02x\n",
 		 dev->vendor, dev->device, class, dev->hdr_type);
 
 	/* "Unknown power state" */
-- 
GitLab


From a8d2dbd38481e0c35c6bdd8196dd38a42ae45d02 Mon Sep 17 00:00:00 2001
From: "akpm@linux-foundation.org" <akpm@linux-foundation.org>
Date: Fri, 22 Aug 2008 13:28:17 -0700
Subject: [PATCH 843/895] PCI: ibmphp: list_for_each to list_for_each_entry

Make code more readable with list_for_each_entry().

Signed-off-by: Cordelia Sam <cordesam@gmail.com>
Cc: Kristen Carlson Accardi <kristen.c.accardi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/hotplug/ibmphp_ebda.c | 92 ++++++++-----------------------
 1 file changed, 23 insertions(+), 69 deletions(-)

diff --git a/drivers/pci/hotplug/ibmphp_ebda.c b/drivers/pci/hotplug/ibmphp_ebda.c
index 7d27631e6e62..1bc08a5b2c7d 100644
--- a/drivers/pci/hotplug/ibmphp_ebda.c
+++ b/drivers/pci/hotplug/ibmphp_ebda.c
@@ -123,10 +123,8 @@ static struct ebda_pci_rsrc *alloc_ebda_pci_rsrc (void)
 static void __init print_bus_info (void)
 {
 	struct bus_info *ptr;
-	struct list_head *ptr1;
 	
-	list_for_each (ptr1, &bus_info_head) {
-		ptr = list_entry (ptr1, struct bus_info, bus_info_list);
+	list_for_each_entry (ptr, &bus_info_head, bus_info_list) {
 		debug ("%s - slot_min = %x\n", __func__, ptr->slot_min);
 		debug ("%s - slot_max = %x\n", __func__, ptr->slot_max);
 		debug ("%s - slot_count = %x\n", __func__, ptr->slot_count);
@@ -146,10 +144,8 @@ static void __init print_bus_info (void)
 static void print_lo_info (void)
 {
 	struct rio_detail *ptr;
-	struct list_head *ptr1;
 	debug ("print_lo_info ----\n");	
-	list_for_each (ptr1, &rio_lo_head) {
-		ptr = list_entry (ptr1, struct rio_detail, rio_detail_list);
+	list_for_each_entry (ptr, &rio_lo_head, rio_detail_list) {
 		debug ("%s - rio_node_id = %x\n", __func__, ptr->rio_node_id);
 		debug ("%s - rio_type = %x\n", __func__, ptr->rio_type);
 		debug ("%s - owner_id = %x\n", __func__, ptr->owner_id);
@@ -163,10 +159,8 @@ static void print_lo_info (void)
 static void print_vg_info (void)
 {
 	struct rio_detail *ptr;
-	struct list_head *ptr1;
 	debug ("%s ---\n", __func__);
-	list_for_each (ptr1, &rio_vg_head) {
-		ptr = list_entry (ptr1, struct rio_detail, rio_detail_list);
+	list_for_each_entry (ptr, &rio_vg_head, rio_detail_list) {
 		debug ("%s - rio_node_id = %x\n", __func__, ptr->rio_node_id);
 		debug ("%s - rio_type = %x\n", __func__, ptr->rio_type);
 		debug ("%s - owner_id = %x\n", __func__, ptr->owner_id);
@@ -180,10 +174,8 @@ static void print_vg_info (void)
 static void __init print_ebda_pci_rsrc (void)
 {
 	struct ebda_pci_rsrc *ptr;
-	struct list_head *ptr1;
 
-	list_for_each (ptr1, &ibmphp_ebda_pci_rsrc_head) {
-		ptr = list_entry (ptr1, struct ebda_pci_rsrc, ebda_pci_rsrc_list);
+	list_for_each_entry (ptr, &ibmphp_ebda_pci_rsrc_head, ebda_pci_rsrc_list) {
 		debug ("%s - rsrc type: %x bus#: %x dev_func: %x start addr: %x end addr: %x\n", 
 			__func__, ptr->rsrc_type ,ptr->bus_num, ptr->dev_fun,ptr->start_addr, ptr->end_addr);
 	}
@@ -192,10 +184,8 @@ static void __init print_ebda_pci_rsrc (void)
 static void __init print_ibm_slot (void)
 {
 	struct slot *ptr;
-	struct list_head *ptr1;
 
-	list_for_each (ptr1, &ibmphp_slot_head) {
-		ptr = list_entry (ptr1, struct slot, ibm_slot_list);
+	list_for_each_entry (ptr, &ibmphp_slot_head, ibm_slot_list) {
 		debug ("%s - slot_number: %x\n", __func__, ptr->number);
 	}
 }
@@ -203,10 +193,8 @@ static void __init print_ibm_slot (void)
 static void __init print_opt_vg (void)
 {
 	struct opt_rio *ptr;
-	struct list_head *ptr1;
 	debug ("%s ---\n", __func__);
-	list_for_each (ptr1, &opt_vg_head) {
-		ptr = list_entry (ptr1, struct opt_rio, opt_rio_list);
+	list_for_each_entry (ptr, &opt_vg_head, opt_rio_list) {
 		debug ("%s - rio_type %x\n", __func__, ptr->rio_type);
 		debug ("%s - chassis_num: %x\n", __func__, ptr->chassis_num);
 		debug ("%s - first_slot_num: %x\n", __func__, ptr->first_slot_num);
@@ -217,13 +205,9 @@ static void __init print_opt_vg (void)
 static void __init print_ebda_hpc (void)
 {
 	struct controller *hpc_ptr;
-	struct list_head *ptr1;
 	u16 index;
 
-	list_for_each (ptr1, &ebda_hpc_head) {
-
-		hpc_ptr = list_entry (ptr1, struct controller, ebda_hpc_list); 
-
+	list_for_each_entry (hpc_ptr, &ebda_hpc_head, ebda_hpc_list) {
 		for (index = 0; index < hpc_ptr->slot_count; index++) {
 			debug ("%s - physical slot#: %x\n", __func__, hpc_ptr->slots[index].slot_num);
 			debug ("%s - pci bus# of the slot: %x\n", __func__, hpc_ptr->slots[index].slot_bus_num);
@@ -460,9 +444,7 @@ static int __init ebda_rio_table (void)
 static struct opt_rio *search_opt_vg (u8 chassis_num)
 {
 	struct opt_rio *ptr;
-	struct list_head *ptr1;
-	list_for_each (ptr1, &opt_vg_head) {
-		ptr = list_entry (ptr1, struct opt_rio, opt_rio_list);
+	list_for_each_entry (ptr, &opt_vg_head, opt_rio_list) {
 		if (ptr->chassis_num == chassis_num)
 			return ptr;
 	}		
@@ -473,10 +455,8 @@ static int __init combine_wpg_for_chassis (void)
 {
 	struct opt_rio *opt_rio_ptr = NULL;
 	struct rio_detail *rio_detail_ptr = NULL;
-	struct list_head *list_head_ptr = NULL;
 	
-	list_for_each (list_head_ptr, &rio_vg_head) {
-		rio_detail_ptr = list_entry (list_head_ptr, struct rio_detail, rio_detail_list);
+	list_for_each_entry (rio_detail_ptr, &rio_vg_head, rio_detail_list) {
 		opt_rio_ptr = search_opt_vg (rio_detail_ptr->chassis_num);
 		if (!opt_rio_ptr) {
 			opt_rio_ptr = kzalloc(sizeof(struct opt_rio), GFP_KERNEL);
@@ -497,14 +477,12 @@ static int __init combine_wpg_for_chassis (void)
 }	
 
 /*
- * reorgnizing linked list of expansion box	 
+ * reorganizing linked list of expansion box
  */
 static struct opt_rio_lo *search_opt_lo (u8 chassis_num)
 {
 	struct opt_rio_lo *ptr;
-	struct list_head *ptr1;
-	list_for_each (ptr1, &opt_lo_head) {
-		ptr = list_entry (ptr1, struct opt_rio_lo, opt_rio_lo_list);
+	list_for_each_entry (ptr, &opt_lo_head, opt_rio_lo_list) {
 		if (ptr->chassis_num == chassis_num)
 			return ptr;
 	}		
@@ -515,10 +493,8 @@ static int combine_wpg_for_expansion (void)
 {
 	struct opt_rio_lo *opt_rio_lo_ptr = NULL;
 	struct rio_detail *rio_detail_ptr = NULL;
-	struct list_head *list_head_ptr = NULL;
 	
-	list_for_each (list_head_ptr, &rio_lo_head) {
-		rio_detail_ptr = list_entry (list_head_ptr, struct rio_detail, rio_detail_list);
+	list_for_each_entry (rio_detail_ptr, &rio_lo_head, rio_detail_list) {
 		opt_rio_lo_ptr = search_opt_lo (rio_detail_ptr->chassis_num);
 		if (!opt_rio_lo_ptr) {
 			opt_rio_lo_ptr = kzalloc(sizeof(struct opt_rio_lo), GFP_KERNEL);
@@ -550,20 +526,17 @@ static int first_slot_num (u8 slot_num, u8 first_slot, u8 var)
 {
 	struct opt_rio *opt_vg_ptr = NULL;
 	struct opt_rio_lo *opt_lo_ptr = NULL;
-	struct list_head *ptr = NULL;
 	int rc = 0;
 
 	if (!var) {
-		list_for_each (ptr, &opt_vg_head) {
-			opt_vg_ptr = list_entry (ptr, struct opt_rio, opt_rio_list);
+		list_for_each_entry (opt_vg_ptr, &opt_vg_head, opt_rio_list) {
 			if ((first_slot < opt_vg_ptr->first_slot_num) && (slot_num >= opt_vg_ptr->first_slot_num)) { 
 				rc = -ENODEV;
 				break;
 			}
 		}
 	} else {
-		list_for_each (ptr, &opt_lo_head) {
-			opt_lo_ptr = list_entry (ptr, struct opt_rio_lo, opt_rio_lo_list);
+		list_for_each_entry (opt_lo_ptr, &opt_lo_head, opt_rio_lo_list) {
 			if ((first_slot < opt_lo_ptr->first_slot_num) && (slot_num >= opt_lo_ptr->first_slot_num)) {
 				rc = -ENODEV;
 				break;
@@ -576,10 +549,8 @@ static int first_slot_num (u8 slot_num, u8 first_slot, u8 var)
 static struct opt_rio_lo * find_rxe_num (u8 slot_num)
 {
 	struct opt_rio_lo *opt_lo_ptr;
-	struct list_head *ptr;
 
-	list_for_each (ptr, &opt_lo_head) {
-		opt_lo_ptr = list_entry (ptr, struct opt_rio_lo, opt_rio_lo_list);
+	list_for_each_entry (opt_lo_ptr, &opt_lo_head, opt_rio_lo_list) {
 		//check to see if this slot_num belongs to expansion box
 		if ((slot_num >= opt_lo_ptr->first_slot_num) && (!first_slot_num (slot_num, opt_lo_ptr->first_slot_num, 1))) 
 			return opt_lo_ptr;
@@ -590,10 +561,8 @@ static struct opt_rio_lo * find_rxe_num (u8 slot_num)
 static struct opt_rio * find_chassis_num (u8 slot_num)
 {
 	struct opt_rio *opt_vg_ptr;
-	struct list_head *ptr;
 
-	list_for_each (ptr, &opt_vg_head) {
-		opt_vg_ptr = list_entry (ptr, struct opt_rio, opt_rio_list);
+	list_for_each_entry (opt_vg_ptr, &opt_vg_head, opt_rio_list) {
 		//check to see if this slot_num belongs to chassis 
 		if ((slot_num >= opt_vg_ptr->first_slot_num) && (!first_slot_num (slot_num, opt_vg_ptr->first_slot_num, 0))) 
 			return opt_vg_ptr;
@@ -607,11 +576,9 @@ static struct opt_rio * find_chassis_num (u8 slot_num)
 static u8 calculate_first_slot (u8 slot_num)
 {
 	u8 first_slot = 1;
-	struct list_head * list;
 	struct slot * slot_cur;
 	
-	list_for_each (list, &ibmphp_slot_head) {
-		slot_cur = list_entry (list, struct slot, ibm_slot_list);
+	list_for_each_entry (slot_cur, &ibmphp_slot_head, ibm_slot_list) {
 		if (slot_cur->ctrl) {
 			if ((slot_cur->ctrl->ctlr_type != 4) && (slot_cur->ctrl->ending_slot_num > first_slot) && (slot_num > slot_cur->ctrl->ending_slot_num)) 
 				first_slot = slot_cur->ctrl->ending_slot_num;
@@ -767,7 +734,6 @@ static int __init ebda_rsrc_controller (void)
 	struct bus_info *bus_info_ptr1, *bus_info_ptr2;
 	int rc;
 	struct slot *tmp_slot;
-	struct list_head *list;
 
 	addr = hpc_list_ptr->phys_addr;
 	for (ctlr = 0; ctlr < hpc_list_ptr->num_ctlrs; ctlr++) {
@@ -997,9 +963,7 @@ static int __init ebda_rsrc_controller (void)
 
 	}			/* each hpc  */
 
-	list_for_each (list, &ibmphp_slot_head) {
-		tmp_slot = list_entry (list, struct slot, ibm_slot_list);
-
+	list_for_each_entry (tmp_slot, &ibmphp_slot_head, ibm_slot_list) {
 		snprintf (tmp_slot->hotplug_slot->name, 30, "%s", create_file_name (tmp_slot));
 		pci_hp_register(tmp_slot->hotplug_slot,
 			pci_find_bus(0, tmp_slot->bus), tmp_slot->device);
@@ -1101,10 +1065,8 @@ u16 ibmphp_get_total_controllers (void)
 struct slot *ibmphp_get_slot_from_physical_num (u8 physical_num)
 {
 	struct slot *slot;
-	struct list_head *list;
 
-	list_for_each (list, &ibmphp_slot_head) {
-		slot = list_entry (list, struct slot, ibm_slot_list);
+	list_for_each_entry (slot, &ibmphp_slot_head, ibm_slot_list) {
 		if (slot->number == physical_num)
 			return slot;
 	}
@@ -1120,10 +1082,8 @@ struct slot *ibmphp_get_slot_from_physical_num (u8 physical_num)
 struct bus_info *ibmphp_find_same_bus_num (u32 num)
 {
 	struct bus_info *ptr;
-	struct list_head  *ptr1;
 
-	list_for_each (ptr1, &bus_info_head) {
-		ptr = list_entry (ptr1, struct bus_info, bus_info_list); 
+	list_for_each_entry (ptr, &bus_info_head, bus_info_list) {
 		if (ptr->busno == num) 
 			 return ptr;
 	}
@@ -1136,10 +1096,8 @@ struct bus_info *ibmphp_find_same_bus_num (u32 num)
 int ibmphp_get_bus_index (u8 num)
 {
 	struct bus_info *ptr;
-	struct list_head  *ptr1;
 
-	list_for_each (ptr1, &bus_info_head) {
-		ptr = list_entry (ptr1, struct bus_info, bus_info_list);
+	list_for_each_entry (ptr, &bus_info_head, bus_info_list) {
 		if (ptr->busno == num)  
 			return ptr->index;
 	}
@@ -1212,11 +1170,9 @@ static struct pci_driver ibmphp_driver = {
 int ibmphp_register_pci (void)
 {
 	struct controller *ctrl;
-	struct list_head *tmp;
 	int rc = 0;
 
-	list_for_each (tmp, &ebda_hpc_head) {
-		ctrl = list_entry (tmp, struct controller, ebda_hpc_list);
+	list_for_each_entry (ctrl, &ebda_hpc_head, ebda_hpc_list) {
 		if (ctrl->ctlr_type == 1) {
 			rc = pci_register_driver(&ibmphp_driver);
 			break;
@@ -1227,12 +1183,10 @@ int ibmphp_register_pci (void)
 static int ibmphp_probe (struct pci_dev * dev, const struct pci_device_id *ids)
 {
 	struct controller *ctrl;
-	struct list_head *tmp;
 
 	debug ("inside ibmphp_probe\n");
 	
-	list_for_each (tmp, &ebda_hpc_head) {
-		ctrl = list_entry (tmp, struct controller, ebda_hpc_list);
+	list_for_each_entry (ctrl, &ebda_hpc_head, ebda_hpc_list) {
 		if (ctrl->ctlr_type == 1) {
 			if ((dev->devfn == ctrl->u.pci_ctlr.dev_fun) && (dev->bus->number == ctrl->u.pci_ctlr.bus)) {
 				ctrl->ctrl_dev = dev;
-- 
GitLab


From 2fd39aa7c2f3d696814abb3e8962c759eee484f3 Mon Sep 17 00:00:00 2001
From: "akpm@linux-foundation.org" <akpm@linux-foundation.org>
Date: Fri, 22 Aug 2008 13:30:14 -0700
Subject: [PATCH 844/895] PCI: ibmphp: list_for_each to
 list_for_each_entry-checkpatch cleanups

Please run checkpatch prior to sending patches, this one fixes several style
issues with the list_for_each conversion patch.

Cc: Cordelia Sam <cordesam@gmail.com>
Cc: Cordelia Sam <cordsam@linux.vnet.ibm.com>
Cc: Kristen Carlson Accardi <kristen.c.accardi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/hotplug/ibmphp_ebda.c | 44 +++++++++++++++----------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/drivers/pci/hotplug/ibmphp_ebda.c b/drivers/pci/hotplug/ibmphp_ebda.c
index 1bc08a5b2c7d..8cfd1c4926c8 100644
--- a/drivers/pci/hotplug/ibmphp_ebda.c
+++ b/drivers/pci/hotplug/ibmphp_ebda.c
@@ -124,7 +124,7 @@ static void __init print_bus_info (void)
 {
 	struct bus_info *ptr;
 	
-	list_for_each_entry (ptr, &bus_info_head, bus_info_list) {
+	list_for_each_entry(ptr, &bus_info_head, bus_info_list) {
 		debug ("%s - slot_min = %x\n", __func__, ptr->slot_min);
 		debug ("%s - slot_max = %x\n", __func__, ptr->slot_max);
 		debug ("%s - slot_count = %x\n", __func__, ptr->slot_count);
@@ -145,7 +145,7 @@ static void print_lo_info (void)
 {
 	struct rio_detail *ptr;
 	debug ("print_lo_info ----\n");	
-	list_for_each_entry (ptr, &rio_lo_head, rio_detail_list) {
+	list_for_each_entry(ptr, &rio_lo_head, rio_detail_list) {
 		debug ("%s - rio_node_id = %x\n", __func__, ptr->rio_node_id);
 		debug ("%s - rio_type = %x\n", __func__, ptr->rio_type);
 		debug ("%s - owner_id = %x\n", __func__, ptr->owner_id);
@@ -160,7 +160,7 @@ static void print_vg_info (void)
 {
 	struct rio_detail *ptr;
 	debug ("%s ---\n", __func__);
-	list_for_each_entry (ptr, &rio_vg_head, rio_detail_list) {
+	list_for_each_entry(ptr, &rio_vg_head, rio_detail_list) {
 		debug ("%s - rio_node_id = %x\n", __func__, ptr->rio_node_id);
 		debug ("%s - rio_type = %x\n", __func__, ptr->rio_type);
 		debug ("%s - owner_id = %x\n", __func__, ptr->owner_id);
@@ -175,7 +175,7 @@ static void __init print_ebda_pci_rsrc (void)
 {
 	struct ebda_pci_rsrc *ptr;
 
-	list_for_each_entry (ptr, &ibmphp_ebda_pci_rsrc_head, ebda_pci_rsrc_list) {
+	list_for_each_entry(ptr, &ibmphp_ebda_pci_rsrc_head, ebda_pci_rsrc_list) {
 		debug ("%s - rsrc type: %x bus#: %x dev_func: %x start addr: %x end addr: %x\n", 
 			__func__, ptr->rsrc_type ,ptr->bus_num, ptr->dev_fun,ptr->start_addr, ptr->end_addr);
 	}
@@ -185,7 +185,7 @@ static void __init print_ibm_slot (void)
 {
 	struct slot *ptr;
 
-	list_for_each_entry (ptr, &ibmphp_slot_head, ibm_slot_list) {
+	list_for_each_entry(ptr, &ibmphp_slot_head, ibm_slot_list) {
 		debug ("%s - slot_number: %x\n", __func__, ptr->number);
 	}
 }
@@ -194,7 +194,7 @@ static void __init print_opt_vg (void)
 {
 	struct opt_rio *ptr;
 	debug ("%s ---\n", __func__);
-	list_for_each_entry (ptr, &opt_vg_head, opt_rio_list) {
+	list_for_each_entry(ptr, &opt_vg_head, opt_rio_list) {
 		debug ("%s - rio_type %x\n", __func__, ptr->rio_type);
 		debug ("%s - chassis_num: %x\n", __func__, ptr->chassis_num);
 		debug ("%s - first_slot_num: %x\n", __func__, ptr->first_slot_num);
@@ -207,7 +207,7 @@ static void __init print_ebda_hpc (void)
 	struct controller *hpc_ptr;
 	u16 index;
 
-	list_for_each_entry (hpc_ptr, &ebda_hpc_head, ebda_hpc_list) {
+	list_for_each_entry(hpc_ptr, &ebda_hpc_head, ebda_hpc_list) {
 		for (index = 0; index < hpc_ptr->slot_count; index++) {
 			debug ("%s - physical slot#: %x\n", __func__, hpc_ptr->slots[index].slot_num);
 			debug ("%s - pci bus# of the slot: %x\n", __func__, hpc_ptr->slots[index].slot_bus_num);
@@ -444,7 +444,7 @@ static int __init ebda_rio_table (void)
 static struct opt_rio *search_opt_vg (u8 chassis_num)
 {
 	struct opt_rio *ptr;
-	list_for_each_entry (ptr, &opt_vg_head, opt_rio_list) {
+	list_for_each_entry(ptr, &opt_vg_head, opt_rio_list) {
 		if (ptr->chassis_num == chassis_num)
 			return ptr;
 	}		
@@ -456,7 +456,7 @@ static int __init combine_wpg_for_chassis (void)
 	struct opt_rio *opt_rio_ptr = NULL;
 	struct rio_detail *rio_detail_ptr = NULL;
 	
-	list_for_each_entry (rio_detail_ptr, &rio_vg_head, rio_detail_list) {
+	list_for_each_entry(rio_detail_ptr, &rio_vg_head, rio_detail_list) {
 		opt_rio_ptr = search_opt_vg (rio_detail_ptr->chassis_num);
 		if (!opt_rio_ptr) {
 			opt_rio_ptr = kzalloc(sizeof(struct opt_rio), GFP_KERNEL);
@@ -482,7 +482,7 @@ static int __init combine_wpg_for_chassis (void)
 static struct opt_rio_lo *search_opt_lo (u8 chassis_num)
 {
 	struct opt_rio_lo *ptr;
-	list_for_each_entry (ptr, &opt_lo_head, opt_rio_lo_list) {
+	list_for_each_entry(ptr, &opt_lo_head, opt_rio_lo_list) {
 		if (ptr->chassis_num == chassis_num)
 			return ptr;
 	}		
@@ -494,7 +494,7 @@ static int combine_wpg_for_expansion (void)
 	struct opt_rio_lo *opt_rio_lo_ptr = NULL;
 	struct rio_detail *rio_detail_ptr = NULL;
 	
-	list_for_each_entry (rio_detail_ptr, &rio_lo_head, rio_detail_list) {
+	list_for_each_entry(rio_detail_ptr, &rio_lo_head, rio_detail_list) {
 		opt_rio_lo_ptr = search_opt_lo (rio_detail_ptr->chassis_num);
 		if (!opt_rio_lo_ptr) {
 			opt_rio_lo_ptr = kzalloc(sizeof(struct opt_rio_lo), GFP_KERNEL);
@@ -529,14 +529,14 @@ static int first_slot_num (u8 slot_num, u8 first_slot, u8 var)
 	int rc = 0;
 
 	if (!var) {
-		list_for_each_entry (opt_vg_ptr, &opt_vg_head, opt_rio_list) {
+		list_for_each_entry(opt_vg_ptr, &opt_vg_head, opt_rio_list) {
 			if ((first_slot < opt_vg_ptr->first_slot_num) && (slot_num >= opt_vg_ptr->first_slot_num)) { 
 				rc = -ENODEV;
 				break;
 			}
 		}
 	} else {
-		list_for_each_entry (opt_lo_ptr, &opt_lo_head, opt_rio_lo_list) {
+		list_for_each_entry(opt_lo_ptr, &opt_lo_head, opt_rio_lo_list) {
 			if ((first_slot < opt_lo_ptr->first_slot_num) && (slot_num >= opt_lo_ptr->first_slot_num)) {
 				rc = -ENODEV;
 				break;
@@ -550,7 +550,7 @@ static struct opt_rio_lo * find_rxe_num (u8 slot_num)
 {
 	struct opt_rio_lo *opt_lo_ptr;
 
-	list_for_each_entry (opt_lo_ptr, &opt_lo_head, opt_rio_lo_list) {
+	list_for_each_entry(opt_lo_ptr, &opt_lo_head, opt_rio_lo_list) {
 		//check to see if this slot_num belongs to expansion box
 		if ((slot_num >= opt_lo_ptr->first_slot_num) && (!first_slot_num (slot_num, opt_lo_ptr->first_slot_num, 1))) 
 			return opt_lo_ptr;
@@ -562,7 +562,7 @@ static struct opt_rio * find_chassis_num (u8 slot_num)
 {
 	struct opt_rio *opt_vg_ptr;
 
-	list_for_each_entry (opt_vg_ptr, &opt_vg_head, opt_rio_list) {
+	list_for_each_entry(opt_vg_ptr, &opt_vg_head, opt_rio_list) {
 		//check to see if this slot_num belongs to chassis 
 		if ((slot_num >= opt_vg_ptr->first_slot_num) && (!first_slot_num (slot_num, opt_vg_ptr->first_slot_num, 0))) 
 			return opt_vg_ptr;
@@ -578,7 +578,7 @@ static u8 calculate_first_slot (u8 slot_num)
 	u8 first_slot = 1;
 	struct slot * slot_cur;
 	
-	list_for_each_entry (slot_cur, &ibmphp_slot_head, ibm_slot_list) {
+	list_for_each_entry(slot_cur, &ibmphp_slot_head, ibm_slot_list) {
 		if (slot_cur->ctrl) {
 			if ((slot_cur->ctrl->ctlr_type != 4) && (slot_cur->ctrl->ending_slot_num > first_slot) && (slot_num > slot_cur->ctrl->ending_slot_num)) 
 				first_slot = slot_cur->ctrl->ending_slot_num;
@@ -963,7 +963,7 @@ static int __init ebda_rsrc_controller (void)
 
 	}			/* each hpc  */
 
-	list_for_each_entry (tmp_slot, &ibmphp_slot_head, ibm_slot_list) {
+	list_for_each_entry(tmp_slot, &ibmphp_slot_head, ibm_slot_list) {
 		snprintf (tmp_slot->hotplug_slot->name, 30, "%s", create_file_name (tmp_slot));
 		pci_hp_register(tmp_slot->hotplug_slot,
 			pci_find_bus(0, tmp_slot->bus), tmp_slot->device);
@@ -1066,7 +1066,7 @@ struct slot *ibmphp_get_slot_from_physical_num (u8 physical_num)
 {
 	struct slot *slot;
 
-	list_for_each_entry (slot, &ibmphp_slot_head, ibm_slot_list) {
+	list_for_each_entry(slot, &ibmphp_slot_head, ibm_slot_list) {
 		if (slot->number == physical_num)
 			return slot;
 	}
@@ -1083,7 +1083,7 @@ struct bus_info *ibmphp_find_same_bus_num (u32 num)
 {
 	struct bus_info *ptr;
 
-	list_for_each_entry (ptr, &bus_info_head, bus_info_list) {
+	list_for_each_entry(ptr, &bus_info_head, bus_info_list) {
 		if (ptr->busno == num) 
 			 return ptr;
 	}
@@ -1097,7 +1097,7 @@ int ibmphp_get_bus_index (u8 num)
 {
 	struct bus_info *ptr;
 
-	list_for_each_entry (ptr, &bus_info_head, bus_info_list) {
+	list_for_each_entry(ptr, &bus_info_head, bus_info_list) {
 		if (ptr->busno == num)  
 			return ptr->index;
 	}
@@ -1172,7 +1172,7 @@ int ibmphp_register_pci (void)
 	struct controller *ctrl;
 	int rc = 0;
 
-	list_for_each_entry (ctrl, &ebda_hpc_head, ebda_hpc_list) {
+	list_for_each_entry(ctrl, &ebda_hpc_head, ebda_hpc_list) {
 		if (ctrl->ctlr_type == 1) {
 			rc = pci_register_driver(&ibmphp_driver);
 			break;
@@ -1186,7 +1186,7 @@ static int ibmphp_probe (struct pci_dev * dev, const struct pci_device_id *ids)
 
 	debug ("inside ibmphp_probe\n");
 	
-	list_for_each_entry (ctrl, &ebda_hpc_head, ebda_hpc_list) {
+	list_for_each_entry(ctrl, &ebda_hpc_head, ebda_hpc_list) {
 		if (ctrl->ctlr_type == 1) {
 			if ((dev->devfn == ctrl->u.pci_ctlr.dev_fun) && (dev->bus->number == ctrl->u.pci_ctlr.bus)) {
 				ctrl->ctrl_dev = dev;
-- 
GitLab


From c9ed77eeba8ec2541a40918210bcc676acacd43a Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Fri, 22 Aug 2008 09:37:02 -0600
Subject: [PATCH 845/895] PCI: tidy PME support messages

This patch changes these two messages:

    pci 0000:00:03.0: supports D1
    pci 0000:00:03.0: supports D2

to this:

    pci 0000:00:03.0: supports D1 D2

It also trivially converts a "dev_printk(KERN_INFO, ...)" to
"dev_info(...)".

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 2797112c9400..e1a17b85447b 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1263,25 +1263,25 @@ void pci_pm_init(struct pci_dev *dev)
 	dev->d1_support = false;
 	dev->d2_support = false;
 	if (!pci_no_d1d2(dev)) {
-		if (pmc & PCI_PM_CAP_D1) {
-			dev_printk(KERN_DEBUG, &dev->dev, "supports D1\n");
+		if (pmc & PCI_PM_CAP_D1)
 			dev->d1_support = true;
-		}
-		if (pmc & PCI_PM_CAP_D2) {
-			dev_printk(KERN_DEBUG, &dev->dev, "supports D2\n");
+		if (pmc & PCI_PM_CAP_D2)
 			dev->d2_support = true;
-		}
+
+		if (dev->d1_support || dev->d2_support)
+			dev_printk(KERN_DEBUG, &dev->dev, "supports%s%s\n",
+				   dev->d1_support ? " D1": "",
+				   dev->d2_support ? " D2": "");
 	}
 
 	pmc &= PCI_PM_CAP_PME_MASK;
 	if (pmc) {
-		dev_printk(KERN_INFO, &dev->dev,
-			"PME# supported from%s%s%s%s%s\n",
-			(pmc & PCI_PM_CAP_PME_D0) ? " D0" : "",
-			(pmc & PCI_PM_CAP_PME_D1) ? " D1" : "",
-			(pmc & PCI_PM_CAP_PME_D2) ? " D2" : "",
-			(pmc & PCI_PM_CAP_PME_D3) ? " D3hot" : "",
-			(pmc & PCI_PM_CAP_PME_D3cold) ? " D3cold" : "");
+		dev_info(&dev->dev, "PME# supported from%s%s%s%s%s\n",
+			 (pmc & PCI_PM_CAP_PME_D0) ? " D0" : "",
+			 (pmc & PCI_PM_CAP_PME_D1) ? " D1" : "",
+			 (pmc & PCI_PM_CAP_PME_D2) ? " D2" : "",
+			 (pmc & PCI_PM_CAP_PME_D3) ? " D3hot" : "",
+			 (pmc & PCI_PM_CAP_PME_D3cold) ? " D3cold" : "");
 		dev->pme_support = pmc >> PCI_PM_CAP_PME_SHIFT;
 		/*
 		 * Make device's PM flags reflect the wake-up capability, but
-- 
GitLab


From c01156061bdd5976397dfb173f8c70ae351a6cb6 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 22 Aug 2008 09:53:39 +0200
Subject: [PATCH 846/895] PCI: Document that most pci options are shared
 between i386 and x86-64

Since the code is shared pretty much most of the pci= options are shared,
but kernel-parameters.txt marked most of them as i386 only.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 Documentation/kernel-parameters.txt | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 0f1544f67400..08fbc8372ba4 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -101,6 +101,7 @@ parameter is applicable:
 	X86-64	X86-64 architecture is enabled.
 			More X86-64 boot options can be found in
 			Documentation/x86_64/boot-options.txt .
+	X86	Either 32bit or 64bit x86 (same as X86-32+X86-64)
 
 In addition, the following text indicates that the option:
 
@@ -1588,7 +1589,7 @@ and is between 256 and 4096 characters. It is defined in the file
 			See also Documentation/paride.txt.
 
 	pci=option[,option...]	[PCI] various PCI subsystem options:
-		off		[X86-32] don't probe for the PCI bus
+		off		[X86] don't probe for the PCI bus
 		bios		[X86-32] force use of PCI BIOS, don't access
 				the hardware directly. Use this if your machine
 				has a non-standard PCI host bridge.
@@ -1596,9 +1597,9 @@ and is between 256 and 4096 characters. It is defined in the file
 				hardware access methods are allowed. Use this
 				if you experience crashes upon bootup and you
 				suspect they are caused by the BIOS.
-		conf1		[X86-32] Force use of PCI Configuration
+		conf1		[X86] Force use of PCI Configuration
 				Mechanism 1.
-		conf2		[X86-32] Force use of PCI Configuration
+		conf2		[X86] Force use of PCI Configuration
 				Mechanism 2.
 		noaer		[PCIE] If the PCIEAER kernel config parameter is
 				enabled, this kernel boot option can be used to
@@ -1618,37 +1619,37 @@ and is between 256 and 4096 characters. It is defined in the file
 				this option if the kernel is unable to allocate
 				IRQs or discover secondary PCI buses on your
 				motherboard.
-		rom		[X86-32] Assign address space to expansion ROMs.
+		rom		[X86] Assign address space to expansion ROMs.
 				Use with caution as certain devices share
 				address decoders between ROMs and other
 				resources.
-		norom		[X86-32,X86_64] Do not assign address space to
+		norom		[X86] Do not assign address space to
 				expansion ROMs that do not already have
 				BIOS assigned address ranges.
-		irqmask=0xMMMM	[X86-32] Set a bit mask of IRQs allowed to be
+		irqmask=0xMMMM	[X86] Set a bit mask of IRQs allowed to be
 				assigned automatically to PCI devices. You can
 				make the kernel exclude IRQs of your ISA cards
 				this way.
-		pirqaddr=0xAAAAA	[X86-32] Specify the physical address
+		pirqaddr=0xAAAAA	[X86] Specify the physical address
 				of the PIRQ table (normally generated
 				by the BIOS) if it is outside the
 				F0000h-100000h range.
-		lastbus=N	[X86-32] Scan all buses thru bus #N. Can be
+		lastbus=N	[X86] Scan all buses thru bus #N. Can be
 				useful if the kernel is unable to find your
 				secondary buses and you want to tell it
 				explicitly which ones they are.
-		assign-busses	[X86-32] Always assign all PCI bus
+		assign-busses	[X86] Always assign all PCI bus
 				numbers ourselves, overriding
 				whatever the firmware may have done.
-		usepirqmask	[X86-32] Honor the possible IRQ mask stored
+		usepirqmask	[X86] Honor the possible IRQ mask stored
 				in the BIOS $PIR table. This is needed on
 				some systems with broken BIOSes, notably
 				some HP Pavilion N5400 and Omnibook XE3
 				notebooks. This will have no effect if ACPI
 				IRQ routing is enabled.
-		noacpi		[X86-32] Do not use ACPI for IRQ routing
+		noacpi		[X86] Do not use ACPI for IRQ routing
 				or for PCI scanning.
-		use_crs		[X86-32] Use _CRS for PCI resource
+		use_crs		[X86] Use _CRS for PCI resource
 				allocation.
 		routeirq	Do IRQ routing for all PCI devices.
 				This is normally done in pci_enable_device(),
-- 
GitLab


From f7a10e32a1a7ae240fa3925c5727d224eba3e31d Mon Sep 17 00:00:00 2001
From: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Date: Fri, 22 Aug 2008 17:16:48 +0900
Subject: [PATCH 847/895] PCI: pciehp: fix irq initialization

Current pciehp driver gets irq number from pci_dev->irq. But because
pciehp driver is a pci express port service driver, it should get irq
number from pcie_device->irq.

Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/hotplug/pciehp.h     | 1 +
 drivers/pci/hotplug/pciehp_hpc.c | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index 9e6cec67e1cc..217427a0ead9 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -87,6 +87,7 @@ struct controller {
 	int num_slots;			/* Number of slots on ctlr */
 	int slot_num_inc;		/* 1 or -1 */
 	struct pci_dev *pci_dev;
+	struct pcie_device *pcie;	/* PCI Express port service */
 	struct list_head slot_list;
 	struct hpc_ops *hpc_ops;
 	wait_queue_head_t queue;	/* sleep & wake process */
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 9d934ddee956..5a5c08f12af2 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -223,7 +223,7 @@ static void start_int_poll_timer(struct controller *ctrl, int sec)
 
 static inline int pciehp_request_irq(struct controller *ctrl)
 {
-	int retval, irq = ctrl->pci_dev->irq;
+	int retval, irq = ctrl->pcie->irq;
 
 	/* Install interrupt polling timer. Start with 10 sec delay */
 	if (pciehp_poll_mode) {
@@ -244,7 +244,7 @@ static inline void pciehp_free_irq(struct controller *ctrl)
 	if (pciehp_poll_mode)
 		del_timer_sync(&ctrl->poll_timer);
 	else
-		free_irq(ctrl->pci_dev->irq, ctrl);
+		free_irq(ctrl->pcie->irq, ctrl);
 }
 
 static int pcie_poll_cmd(struct controller *ctrl)
@@ -1114,6 +1114,7 @@ struct controller *pcie_init(struct pcie_device *dev)
 	}
 	INIT_LIST_HEAD(&ctrl->slot_list);
 
+	ctrl->pcie = dev;
 	ctrl->pci_dev = pdev;
 	ctrl->cap_base = pci_find_capability(pdev, PCI_CAP_ID_EXP);
 	if (!ctrl->cap_base) {
-- 
GitLab


From 37a84ec668ba251ae02cf2c2c664baf6b247ae1f Mon Sep 17 00:00:00 2001
From: Seth Heasley <seth.heasley@intel.com>
Date: Thu, 28 Aug 2008 15:40:59 -0700
Subject: [PATCH 848/895] x86/PCI: irq and pci_ids patch for Intel Ibex Peak
 DeviceIDs

This patch updates the Intel Ibex Peak (PCH) LPC and SMBus Controller
DeviceIDs.

The LPC Controller ID is set by Firmware within the range of
0x3b00-3b1f.  This range is included in pci_ids.h using min and max
values, and irq.c now has code to handle the range (in lieu of 32
additions to a SWITCH statement).

The SMBus Controller ID is a fixed-value and will not change.

Signed-off-by: Seth Heasley <seth.heasley@intel.com>
Acked-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/pci/irq.c      | 11 +++++++++--
 include/linux/pci_ids.h |  6 +++---
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 52a1de1128c1..bf69dbe08bff 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -590,13 +590,20 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route
 	case PCI_DEVICE_ID_INTEL_ICH10_1:
 	case PCI_DEVICE_ID_INTEL_ICH10_2:
 	case PCI_DEVICE_ID_INTEL_ICH10_3:
-	case PCI_DEVICE_ID_INTEL_PCH_0:
-	case PCI_DEVICE_ID_INTEL_PCH_1:
 		r->name = "PIIX/ICH";
 		r->get = pirq_piix_get;
 		r->set = pirq_piix_set;
 		return 1;
 	}
+
+	if ((device >= PCI_DEVICE_ID_INTEL_PCH_LPC_MIN) && 
+		(device <= PCI_DEVICE_ID_INTEL_PCH_LPC_MAX)) {
+		r->name = "PIIX/ICH";
+		r->get = pirq_piix_get;
+		r->set = pirq_piix_set;
+		return 1;
+	}
+
 	return 0;
 }
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 8edddc240e4f..e5d344bfcb7e 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2454,9 +2454,9 @@
 #define PCI_DEVICE_ID_INTEL_ICH10_3	0x3a1a
 #define PCI_DEVICE_ID_INTEL_ICH10_4	0x3a30
 #define PCI_DEVICE_ID_INTEL_ICH10_5	0x3a60
-#define PCI_DEVICE_ID_INTEL_PCH_0	0x3b10
-#define PCI_DEVICE_ID_INTEL_PCH_1	0x3b11
-#define PCI_DEVICE_ID_INTEL_PCH_2	0x3b30
+#define PCI_DEVICE_ID_INTEL_PCH_LPC_MIN	0x3b00
+#define PCI_DEVICE_ID_INTEL_PCH_LPC_MAX	0x3b1f
+#define PCI_DEVICE_ID_INTEL_PCH_SMBUS	0x3b30
 #define PCI_DEVICE_ID_INTEL_IOAT_SNB	0x402f
 #define PCI_DEVICE_ID_INTEL_5100_16	0x65f0
 #define PCI_DEVICE_ID_INTEL_5100_21	0x65f5
-- 
GitLab


From 83e9ad540b9ee23919961f9500ca254220b78d09 Mon Sep 17 00:00:00 2001
From: Taku Izumi <izumi.taku@jp.fujitsu.com>
Date: Fri, 5 Sep 2008 12:09:43 +0900
Subject: [PATCH 849/895] PCI: pciehp: change name tag of "hpdriver_portdrv"
 variable

I think an appropriate name tag of "hpdriver_portdrv" variable
is "pciehp" rather than "hpdriver".

Signed-off-by: Taku Izumi <izumi.taku@jp.fujitsu.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/hotplug/pciehp_core.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index 4fd5355bc3b5..3c8581e597c5 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -497,10 +497,9 @@ static struct pcie_port_service_id port_pci_ids[] = { {
 	.driver_data =	0,
 	}, { /* end: all zeroes */ }
 };
-static const char device_name[] = "hpdriver";
 
 static struct pcie_port_service_driver hpdriver_portdrv = {
-	.name		= (char *)device_name,
+	.name		= PCIE_MODULE_NAME,
 	.id_table	= &port_pci_ids[0],
 
 	.probe		= pciehp_probe,
-- 
GitLab


From 7f2feec140f1f1e4f701e013a2bf8284a9ec2a3c Mon Sep 17 00:00:00 2001
From: Taku Izumi <izumi.taku@jp.fujitsu.com>
Date: Fri, 5 Sep 2008 12:11:26 +0900
Subject: [PATCH 850/895] PCI: pciehp: replace printk with dev_printk

This patch replaces printks within pciehp module with dev_printks.

Signed-off-by: Taku Izumi <izumi.taku@jp.fujitsu.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/hotplug/pciehp.h      |  15 ++-
 drivers/pci/hotplug/pciehp_core.c |  75 +++++++-----
 drivers/pci/hotplug/pciehp_ctrl.c | 136 ++++++++++++---------
 drivers/pci/hotplug/pciehp_hpc.c  | 197 +++++++++++++++++-------------
 drivers/pci/hotplug/pciehp_pci.c  |  26 ++--
 5 files changed, 263 insertions(+), 186 deletions(-)

diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index 217427a0ead9..c367978bd7fe 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -57,6 +57,19 @@ extern struct workqueue_struct *pciehp_wq;
 #define warn(format, arg...)						\
 	printk(KERN_WARNING "%s: " format, MY_NAME , ## arg)
 
+#define ctrl_dbg(ctrl, format, arg...)					\
+	do {								\
+		if (pciehp_debug)					\
+			dev_printk(, &ctrl->pcie->device,		\
+					format, ## arg);		\
+	} while (0)
+#define ctrl_err(ctrl, format, arg...)					\
+	dev_err(&ctrl->pcie->device, format, ## arg)
+#define ctrl_info(ctrl, format, arg...)					\
+	dev_info(&ctrl->pcie->device, format, ## arg)
+#define ctrl_warn(ctrl, format, arg...)					\
+	dev_warn(&ctrl->pcie->device, format, ## arg)
+
 #define SLOT_NAME_SIZE 10
 struct slot {
 	u8 bus;
@@ -171,7 +184,7 @@ static inline struct slot *pciehp_find_slot(struct controller *ctrl, u8 device)
 			return slot;
 	}
 
-	err("%s: slot (device=0x%x) not found\n", __func__, device);
+	ctrl_err(ctrl, "%s: slot (device=0x%x) not found\n", __func__, device);
 	return NULL;
 }
 
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index 3c8581e597c5..c748a19db89d 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -144,9 +144,10 @@ set_lock_exit:
  * sysfs interface which allows the user to toggle the Electro Mechanical
  * Interlock.  Valid values are either 0 or 1.  0 == unlock, 1 == lock
  */
-static ssize_t lock_write_file(struct hotplug_slot *slot, const char *buf,
-		size_t count)
+static ssize_t lock_write_file(struct hotplug_slot *hotplug_slot,
+		const char *buf, size_t count)
 {
+	struct slot *slot = hotplug_slot->private;
 	unsigned long llock;
 	u8 lock;
 	int retval = 0;
@@ -157,10 +158,11 @@ static ssize_t lock_write_file(struct hotplug_slot *slot, const char *buf,
 	switch (lock) {
 		case 0:
 		case 1:
-			retval = set_lock_status(slot, lock);
+			retval = set_lock_status(hotplug_slot, lock);
 			break;
 		default:
-			err ("%d is an invalid lock value\n", lock);
+			ctrl_err(slot->ctrl, "%d is an invalid lock value\n",
+				 lock);
 			retval = -EINVAL;
 	}
 	if (retval)
@@ -180,7 +182,10 @@ static struct hotplug_slot_attribute hotplug_slot_attr_lock = {
  */
 static void release_slot(struct hotplug_slot *hotplug_slot)
 {
-	dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+	struct slot *slot = hotplug_slot->private;
+
+	ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+		 __func__, hotplug_slot->name);
 
 	kfree(hotplug_slot->info);
 	kfree(hotplug_slot);
@@ -215,9 +220,9 @@ static int init_slots(struct controller *ctrl)
 		get_adapter_status(hotplug_slot, &info->adapter_status);
 		slot->hotplug_slot = hotplug_slot;
 
-		dbg("Registering bus=%x dev=%x hp_slot=%x sun=%x "
-		    "slot_device_offset=%x\n", slot->bus, slot->device,
-		    slot->hp_slot, slot->number, ctrl->slot_device_offset);
+		ctrl_dbg(ctrl, "Registering bus=%x dev=%x hp_slot=%x sun=%x "
+			 "slot_device_offset=%x\n", slot->bus, slot->device,
+			 slot->hp_slot, slot->number, ctrl->slot_device_offset);
 duplicate_name:
 		retval = pci_hp_register(hotplug_slot,
 					 ctrl->pci_dev->subordinate,
@@ -233,9 +238,11 @@ duplicate_name:
 				if (len < SLOT_NAME_SIZE)
 					goto duplicate_name;
 				else
-					err("duplicate slot name overflow\n");
+					ctrl_err(ctrl, "duplicate slot name "
+						 "overflow\n");
 			}
-			err("pci_hp_register failed with error %d\n", retval);
+			ctrl_err(ctrl, "pci_hp_register failed with error %d\n",
+				 retval);
 			goto error_info;
 		}
 		/* create additional sysfs entries */
@@ -244,7 +251,8 @@ duplicate_name:
 				&hotplug_slot_attr_lock.attr);
 			if (retval) {
 				pci_hp_deregister(hotplug_slot);
-				err("cannot create additional sysfs entries\n");
+				ctrl_err(ctrl, "cannot create additional sysfs "
+					 "entries\n");
 				goto error_info;
 			}
 		}
@@ -278,7 +286,8 @@ static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 status)
 {
 	struct slot *slot = hotplug_slot->private;
 
-	dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+	ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+		  __func__, hotplug_slot->name);
 
 	hotplug_slot->info->attention_status = status;
 
@@ -293,7 +302,8 @@ static int enable_slot(struct hotplug_slot *hotplug_slot)
 {
 	struct slot *slot = hotplug_slot->private;
 
-	dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+	ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+		 __func__, hotplug_slot->name);
 
 	return pciehp_sysfs_enable_slot(slot);
 }
@@ -303,7 +313,8 @@ static int disable_slot(struct hotplug_slot *hotplug_slot)
 {
 	struct slot *slot = hotplug_slot->private;
 
-	dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+	ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+		  __func__, hotplug_slot->name);
 
 	return pciehp_sysfs_disable_slot(slot);
 }
@@ -313,7 +324,8 @@ static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
 	struct slot *slot = hotplug_slot->private;
 	int retval;
 
-	dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+	ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+		  __func__, hotplug_slot->name);
 
 	retval = slot->hpc_ops->get_power_status(slot, value);
 	if (retval < 0)
@@ -327,7 +339,8 @@ static int get_attention_status(struct hotplug_slot *hotplug_slot, u8 *value)
 	struct slot *slot = hotplug_slot->private;
 	int retval;
 
-	dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+	ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+		  __func__, hotplug_slot->name);
 
 	retval = slot->hpc_ops->get_attention_status(slot, value);
 	if (retval < 0)
@@ -341,7 +354,8 @@ static int get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value)
 	struct slot *slot = hotplug_slot->private;
 	int retval;
 
-	dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+	ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+		 __func__, hotplug_slot->name);
 
 	retval = slot->hpc_ops->get_latch_status(slot, value);
 	if (retval < 0)
@@ -355,7 +369,8 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
 	struct slot *slot = hotplug_slot->private;
 	int retval;
 
-	dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+	ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+		 __func__, hotplug_slot->name);
 
 	retval = slot->hpc_ops->get_adapter_status(slot, value);
 	if (retval < 0)
@@ -370,7 +385,8 @@ static int get_max_bus_speed(struct hotplug_slot *hotplug_slot,
 	struct slot *slot = hotplug_slot->private;
 	int retval;
 
-	dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+	ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+		 __func__, hotplug_slot->name);
 
 	retval = slot->hpc_ops->get_max_bus_speed(slot, value);
 	if (retval < 0)
@@ -384,7 +400,8 @@ static int get_cur_bus_speed(struct hotplug_slot *hotplug_slot, enum pci_bus_spe
 	struct slot *slot = hotplug_slot->private;
 	int retval;
 
-	dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+	ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+		 __func__, hotplug_slot->name);
 
 	retval = slot->hpc_ops->get_cur_bus_speed(slot, value);
 	if (retval < 0)
@@ -402,14 +419,15 @@ static int pciehp_probe(struct pcie_device *dev, const struct pcie_port_service_
 	struct pci_dev *pdev = dev->port;
 
 	if (pciehp_force)
-		dbg("Bypassing BIOS check for pciehp use on %s\n",
-		    pci_name(pdev));
+		dev_info(&dev->device,
+			 "Bypassing BIOS check for pciehp use on %s\n",
+			 pci_name(pdev));
 	else if (pciehp_get_hp_hw_control_from_firmware(pdev))
 		goto err_out_none;
 
 	ctrl = pcie_init(dev);
 	if (!ctrl) {
-		dbg("%s: controller initialization failed\n", PCIE_MODULE_NAME);
+		dev_err(&dev->device, "controller initialization failed\n");
 		goto err_out_none;
 	}
 	set_service_data(dev, ctrl);
@@ -418,11 +436,10 @@ static int pciehp_probe(struct pcie_device *dev, const struct pcie_port_service_
 	rc = init_slots(ctrl);
 	if (rc) {
 		if (rc == -EBUSY)
-			warn("%s: slot already registered by another "
-				"hotplug driver\n", PCIE_MODULE_NAME);
+			ctrl_warn(ctrl, "slot already registered by another "
+				  "hotplug driver\n");
 		else
-			err("%s: slot initialization failed\n",
-				PCIE_MODULE_NAME);
+			ctrl_err(ctrl, "slot initialization failed\n");
 		goto err_out_release_ctlr;
 	}
 
@@ -461,13 +478,13 @@ static void pciehp_remove (struct pcie_device *dev)
 #ifdef CONFIG_PM
 static int pciehp_suspend (struct pcie_device *dev, pm_message_t state)
 {
-	printk("%s ENTRY\n", __func__);
+	dev_info(&dev->device, "%s ENTRY\n", __func__);
 	return 0;
 }
 
 static int pciehp_resume (struct pcie_device *dev)
 {
-	printk("%s ENTRY\n", __func__);
+	dev_info(&dev->device, "%s ENTRY\n", __func__);
 	if (pciehp_force) {
 		struct controller *ctrl = get_service_data(dev);
 		struct slot *t_slot;
diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c
index 96a5d55a4983..acb7f9efd182 100644
--- a/drivers/pci/hotplug/pciehp_ctrl.c
+++ b/drivers/pci/hotplug/pciehp_ctrl.c
@@ -58,14 +58,15 @@ static int queue_interrupt_event(struct slot *p_slot, u32 event_type)
 u8 pciehp_handle_attention_button(struct slot *p_slot)
 {
 	u32 event_type;
+	struct controller *ctrl = p_slot->ctrl;
 
 	/* Attention Button Change */
-	dbg("pciehp:  Attention button interrupt received.\n");
+	ctrl_dbg(ctrl, "Attention button interrupt received.\n");
 
 	/*
 	 *  Button pressed - See if need to TAKE ACTION!!!
 	 */
-	info("Button pressed on Slot(%s)\n", p_slot->name);
+	ctrl_info(ctrl, "Button pressed on Slot(%s)\n", p_slot->name);
 	event_type = INT_BUTTON_PRESS;
 
 	queue_interrupt_event(p_slot, event_type);
@@ -77,22 +78,23 @@ u8 pciehp_handle_switch_change(struct slot *p_slot)
 {
 	u8 getstatus;
 	u32 event_type;
+	struct controller *ctrl = p_slot->ctrl;
 
 	/* Switch Change */
-	dbg("pciehp:  Switch interrupt received.\n");
+	ctrl_dbg(ctrl, "Switch interrupt received.\n");
 
 	p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
 	if (getstatus) {
 		/*
 		 * Switch opened
 		 */
-		info("Latch open on Slot(%s)\n", p_slot->name);
+		ctrl_info(ctrl, "Latch open on Slot(%s)\n", p_slot->name);
 		event_type = INT_SWITCH_OPEN;
 	} else {
 		/*
 		 *  Switch closed
 		 */
-		info("Latch close on Slot(%s)\n", p_slot->name);
+		ctrl_info(ctrl, "Latch close on Slot(%s)\n", p_slot->name);
 		event_type = INT_SWITCH_CLOSE;
 	}
 
@@ -105,9 +107,10 @@ u8 pciehp_handle_presence_change(struct slot *p_slot)
 {
 	u32 event_type;
 	u8 presence_save;
+	struct controller *ctrl = p_slot->ctrl;
 
 	/* Presence Change */
-	dbg("pciehp:  Presence/Notify input change.\n");
+	ctrl_dbg(ctrl, "Presence/Notify input change.\n");
 
 	/* Switch is open, assume a presence change
 	 * Save the presence state
@@ -117,13 +120,13 @@ u8 pciehp_handle_presence_change(struct slot *p_slot)
 		/*
 		 * Card Present
 		 */
-		info("Card present on Slot(%s)\n", p_slot->name);
+		ctrl_info(ctrl, "Card present on Slot(%s)\n", p_slot->name);
 		event_type = INT_PRESENCE_ON;
 	} else {
 		/*
 		 * Not Present
 		 */
-		info("Card not present on Slot(%s)\n", p_slot->name);
+		ctrl_info(ctrl, "Card not present on Slot(%s)\n", p_slot->name);
 		event_type = INT_PRESENCE_OFF;
 	}
 
@@ -135,23 +138,25 @@ u8 pciehp_handle_presence_change(struct slot *p_slot)
 u8 pciehp_handle_power_fault(struct slot *p_slot)
 {
 	u32 event_type;
+	struct controller *ctrl = p_slot->ctrl;
 
 	/* power fault */
-	dbg("pciehp:  Power fault interrupt received.\n");
+	ctrl_dbg(ctrl, "Power fault interrupt received.\n");
 
 	if ( !(p_slot->hpc_ops->query_power_fault(p_slot))) {
 		/*
 		 * power fault Cleared
 		 */
-		info("Power fault cleared on Slot(%s)\n", p_slot->name);
+		ctrl_info(ctrl, "Power fault cleared on Slot(%s)\n",
+			  p_slot->name);
 		event_type = INT_POWER_FAULT_CLEAR;
 	} else {
 		/*
 		 *   power fault
 		 */
-		info("Power fault on Slot(%s)\n", p_slot->name);
+		ctrl_info(ctrl, "Power fault on Slot(%s)\n", p_slot->name);
 		event_type = INT_POWER_FAULT;
-		info("power fault bit %x set\n", 0);
+		ctrl_info(ctrl, "power fault bit %x set\n", 0);
 	}
 
 	queue_interrupt_event(p_slot, event_type);
@@ -168,8 +173,9 @@ static void set_slot_off(struct controller *ctrl, struct slot * pslot)
 	/* turn off slot, turn on Amber LED, turn off Green LED if supported*/
 	if (POWER_CTRL(ctrl)) {
 		if (pslot->hpc_ops->power_off_slot(pslot)) {
-			err("%s: Issue of Slot Power Off command failed\n",
-			    __func__);
+			ctrl_err(ctrl,
+				 "%s: Issue of Slot Power Off command failed\n",
+				 __func__);
 			return;
 		}
 	}
@@ -186,8 +192,8 @@ static void set_slot_off(struct controller *ctrl, struct slot * pslot)
 
 	if (ATTN_LED(ctrl)) {
 		if (pslot->hpc_ops->set_attention_status(pslot, 1)) {
-			err("%s: Issue of Set Attention Led command failed\n",
-			    __func__);
+			ctrl_err(ctrl, "%s: Issue of Set Attention "
+				 "Led command failed\n", __func__);
 			return;
 		}
 	}
@@ -205,9 +211,9 @@ static int board_added(struct slot *p_slot)
 	int retval = 0;
 	struct controller *ctrl = p_slot->ctrl;
 
-	dbg("%s: slot device, slot offset, hp slot = %d, %d ,%d\n",
-			__func__, p_slot->device,
-			ctrl->slot_device_offset, p_slot->hp_slot);
+	ctrl_dbg(ctrl, "%s: slot device, slot offset, hp slot = %d, %d ,%d\n",
+		 __func__, p_slot->device, ctrl->slot_device_offset,
+		 p_slot->hp_slot);
 
 	if (POWER_CTRL(ctrl)) {
 		/* Power on slot */
@@ -225,22 +231,22 @@ static int board_added(struct slot *p_slot)
 	/* Check link training status */
 	retval = p_slot->hpc_ops->check_lnk_status(ctrl);
 	if (retval) {
-		err("%s: Failed to check link status\n", __func__);
+		ctrl_err(ctrl, "%s: Failed to check link status\n", __func__);
 		set_slot_off(ctrl, p_slot);
 		return retval;
 	}
 
 	/* Check for a power fault */
 	if (p_slot->hpc_ops->query_power_fault(p_slot)) {
-		dbg("%s: power fault detected\n", __func__);
+		ctrl_dbg(ctrl, "%s: power fault detected\n", __func__);
 		retval = POWER_FAILURE;
 		goto err_exit;
 	}
 
 	retval = pciehp_configure_device(p_slot);
 	if (retval) {
-		err("Cannot add device 0x%x:%x\n", p_slot->bus,
-		    p_slot->device);
+		ctrl_err(ctrl, "Cannot add device 0x%x:%x\n",
+			 p_slot->bus, p_slot->device);
 		goto err_exit;
 	}
 
@@ -272,14 +278,14 @@ static int remove_board(struct slot *p_slot)
 	if (retval)
 		return retval;
 
-	dbg("In %s, hp_slot = %d\n", __func__, p_slot->hp_slot);
+	ctrl_dbg(ctrl, "In %s, hp_slot = %d\n", __func__, p_slot->hp_slot);
 
 	if (POWER_CTRL(ctrl)) {
 		/* power off slot */
 		retval = p_slot->hpc_ops->power_off_slot(p_slot);
 		if (retval) {
-			err("%s: Issue of Slot Disable command failed\n",
-			    __func__);
+			ctrl_err(ctrl, "%s: Issue of Slot Disable command "
+				 "failed\n", __func__);
 			return retval;
 		}
 	}
@@ -320,8 +326,8 @@ static void pciehp_power_thread(struct work_struct *work)
 	switch (p_slot->state) {
 	case POWEROFF_STATE:
 		mutex_unlock(&p_slot->lock);
-		dbg("%s: disabling bus:device(%x:%x)\n",
-		    __func__, p_slot->bus, p_slot->device);
+		ctrl_dbg(p_slot->ctrl, "%s: disabling bus:device(%x:%x)\n",
+			 __func__, p_slot->bus, p_slot->device);
 		pciehp_disable_slot(p_slot);
 		mutex_lock(&p_slot->lock);
 		p_slot->state = STATIC_STATE;
@@ -349,7 +355,8 @@ void pciehp_queue_pushbutton_work(struct work_struct *work)
 
 	info = kmalloc(sizeof(*info), GFP_KERNEL);
 	if (!info) {
-		err("%s: Cannot allocate memory\n", __func__);
+		ctrl_err(p_slot->ctrl, "%s: Cannot allocate memory\n",
+			 __func__);
 		return;
 	}
 	info->p_slot = p_slot;
@@ -403,12 +410,14 @@ static void handle_button_press_event(struct slot *p_slot)
 		p_slot->hpc_ops->get_power_status(p_slot, &getstatus);
 		if (getstatus) {
 			p_slot->state = BLINKINGOFF_STATE;
-			info("PCI slot #%s - powering off due to button "
-			     "press.\n", p_slot->name);
+			ctrl_info(ctrl,
+				  "PCI slot #%s - powering off due to button "
+				  "press.\n", p_slot->name);
 		} else {
 			p_slot->state = BLINKINGON_STATE;
-			info("PCI slot #%s - powering on due to button "
-			     "press.\n", p_slot->name);
+			ctrl_info(ctrl,
+				  "PCI slot #%s - powering on due to button "
+				  "press.\n", p_slot->name);
 		}
 		/* blink green LED and turn off amber */
 		if (PWR_LED(ctrl))
@@ -425,8 +434,8 @@ static void handle_button_press_event(struct slot *p_slot)
 		 * press the attention again before the 5 sec. limit
 		 * expires to cancel hot-add or hot-remove
 		 */
-		info("Button cancel on Slot(%s)\n", p_slot->name);
-		dbg("%s: button cancel\n", __func__);
+		ctrl_info(ctrl, "Button cancel on Slot(%s)\n", p_slot->name);
+		ctrl_dbg(ctrl, "%s: button cancel\n", __func__);
 		cancel_delayed_work(&p_slot->work);
 		if (p_slot->state == BLINKINGOFF_STATE) {
 			if (PWR_LED(ctrl))
@@ -437,8 +446,8 @@ static void handle_button_press_event(struct slot *p_slot)
 		}
 		if (ATTN_LED(ctrl))
 			p_slot->hpc_ops->set_attention_status(p_slot, 0);
-		info("PCI slot #%s - action canceled due to button press\n",
-		     p_slot->name);
+		ctrl_info(ctrl, "PCI slot #%s - action canceled "
+			  "due to button press\n", p_slot->name);
 		p_slot->state = STATIC_STATE;
 		break;
 	case POWEROFF_STATE:
@@ -448,11 +457,11 @@ static void handle_button_press_event(struct slot *p_slot)
 		 * this means that the previous attention button action
 		 * to hot-add or hot-remove is undergoing
 		 */
-		info("Button ignore on Slot(%s)\n", p_slot->name);
+		ctrl_info(ctrl, "Button ignore on Slot(%s)\n", p_slot->name);
 		update_slot_info(p_slot);
 		break;
 	default:
-		warn("Not a valid state\n");
+		ctrl_warn(ctrl, "Not a valid state\n");
 		break;
 	}
 }
@@ -467,7 +476,8 @@ static void handle_surprise_event(struct slot *p_slot)
 
 	info = kmalloc(sizeof(*info), GFP_KERNEL);
 	if (!info) {
-		err("%s: Cannot allocate memory\n", __func__);
+		ctrl_err(p_slot->ctrl, "%s: Cannot allocate memory\n",
+			 __func__);
 		return;
 	}
 	info->p_slot = p_slot;
@@ -505,7 +515,7 @@ static void interrupt_event_handler(struct work_struct *work)
 	case INT_PRESENCE_OFF:
 		if (!HP_SUPR_RM(ctrl))
 			break;
-		dbg("Surprise Removal\n");
+		ctrl_dbg(ctrl, "Surprise Removal\n");
 		update_slot_info(p_slot);
 		handle_surprise_event(p_slot);
 		break;
@@ -522,22 +532,23 @@ int pciehp_enable_slot(struct slot *p_slot)
 {
 	u8 getstatus = 0;
 	int rc;
+	struct controller *ctrl = p_slot->ctrl;
 
 	/* Check to see if (latch closed, card present, power off) */
 	mutex_lock(&p_slot->ctrl->crit_sect);
 
 	rc = p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus);
 	if (rc || !getstatus) {
-		info("%s: no adapter on slot(%s)\n", __func__,
-		     p_slot->name);
+		ctrl_info(ctrl, "%s: no adapter on slot(%s)\n",
+			  __func__, p_slot->name);
 		mutex_unlock(&p_slot->ctrl->crit_sect);
 		return -ENODEV;
 	}
 	if (MRL_SENS(p_slot->ctrl)) {
 		rc = p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
 		if (rc || getstatus) {
-			info("%s: latch open on slot(%s)\n", __func__,
-			     p_slot->name);
+			ctrl_info(ctrl, "%s: latch open on slot(%s)\n",
+				  __func__, p_slot->name);
 			mutex_unlock(&p_slot->ctrl->crit_sect);
 			return -ENODEV;
 		}
@@ -546,8 +557,8 @@ int pciehp_enable_slot(struct slot *p_slot)
 	if (POWER_CTRL(p_slot->ctrl)) {
 		rc = p_slot->hpc_ops->get_power_status(p_slot, &getstatus);
 		if (rc || getstatus) {
-			info("%s: already enabled on slot(%s)\n", __func__,
-			     p_slot->name);
+			ctrl_info(ctrl, "%s: already enabled on slot(%s)\n",
+				  __func__, p_slot->name);
 			mutex_unlock(&p_slot->ctrl->crit_sect);
 			return -EINVAL;
 		}
@@ -571,6 +582,7 @@ int pciehp_disable_slot(struct slot *p_slot)
 {
 	u8 getstatus = 0;
 	int ret = 0;
+	struct controller *ctrl = p_slot->ctrl;
 
 	if (!p_slot->ctrl)
 		return 1;
@@ -581,8 +593,8 @@ int pciehp_disable_slot(struct slot *p_slot)
 	if (!HP_SUPR_RM(p_slot->ctrl)) {
 		ret = p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus);
 		if (ret || !getstatus) {
-			info("%s: no adapter on slot(%s)\n", __func__,
-			     p_slot->name);
+			ctrl_info(ctrl, "%s: no adapter on slot(%s)\n",
+				  __func__, p_slot->name);
 			mutex_unlock(&p_slot->ctrl->crit_sect);
 			return -ENODEV;
 		}
@@ -591,8 +603,8 @@ int pciehp_disable_slot(struct slot *p_slot)
 	if (MRL_SENS(p_slot->ctrl)) {
 		ret = p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
 		if (ret || getstatus) {
-			info("%s: latch open on slot(%s)\n", __func__,
-			     p_slot->name);
+			ctrl_info(ctrl, "%s: latch open on slot(%s)\n",
+				  __func__, p_slot->name);
 			mutex_unlock(&p_slot->ctrl->crit_sect);
 			return -ENODEV;
 		}
@@ -601,8 +613,8 @@ int pciehp_disable_slot(struct slot *p_slot)
 	if (POWER_CTRL(p_slot->ctrl)) {
 		ret = p_slot->hpc_ops->get_power_status(p_slot, &getstatus);
 		if (ret || !getstatus) {
-			info("%s: already disabled slot(%s)\n", __func__,
-			     p_slot->name);
+			ctrl_info(ctrl, "%s: already disabled slot(%s)\n",
+				  __func__, p_slot->name);
 			mutex_unlock(&p_slot->ctrl->crit_sect);
 			return -EINVAL;
 		}
@@ -618,6 +630,7 @@ int pciehp_disable_slot(struct slot *p_slot)
 int pciehp_sysfs_enable_slot(struct slot *p_slot)
 {
 	int retval = -ENODEV;
+	struct controller *ctrl = p_slot->ctrl;
 
 	mutex_lock(&p_slot->lock);
 	switch (p_slot->state) {
@@ -631,15 +644,15 @@ int pciehp_sysfs_enable_slot(struct slot *p_slot)
 		p_slot->state = STATIC_STATE;
 		break;
 	case POWERON_STATE:
-		info("Slot %s is already in powering on state\n",
-		     p_slot->name);
+		ctrl_info(ctrl, "Slot %s is already in powering on state\n",
+			  p_slot->name);
 		break;
 	case BLINKINGOFF_STATE:
 	case POWEROFF_STATE:
-		info("Already enabled on slot %s\n", p_slot->name);
+		ctrl_info(ctrl, "Already enabled on slot %s\n", p_slot->name);
 		break;
 	default:
-		err("Not a valid state on slot %s\n", p_slot->name);
+		ctrl_err(ctrl, "Not a valid state on slot %s\n", p_slot->name);
 		break;
 	}
 	mutex_unlock(&p_slot->lock);
@@ -650,6 +663,7 @@ int pciehp_sysfs_enable_slot(struct slot *p_slot)
 int pciehp_sysfs_disable_slot(struct slot *p_slot)
 {
 	int retval = -ENODEV;
+	struct controller *ctrl = p_slot->ctrl;
 
 	mutex_lock(&p_slot->lock);
 	switch (p_slot->state) {
@@ -663,15 +677,15 @@ int pciehp_sysfs_disable_slot(struct slot *p_slot)
 		p_slot->state = STATIC_STATE;
 		break;
 	case POWEROFF_STATE:
-		info("Slot %s is already in powering off state\n",
-		     p_slot->name);
+		ctrl_info(ctrl, "Slot %s is already in powering off state\n",
+			  p_slot->name);
 		break;
 	case BLINKINGON_STATE:
 	case POWERON_STATE:
-		info("Already disabled on slot %s\n", p_slot->name);
+		ctrl_info(ctrl, "Already disabled on slot %s\n", p_slot->name);
 		break;
 	default:
-		err("Not a valid state on slot %s\n", p_slot->name);
+		ctrl_err(ctrl, "Not a valid state on slot %s\n", p_slot->name);
 		break;
 	}
 	mutex_unlock(&p_slot->lock);
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 5a5c08f12af2..8e9530c4c36d 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -235,7 +235,8 @@ static inline int pciehp_request_irq(struct controller *ctrl)
 	/* Installs the interrupt handler */
 	retval = request_irq(irq, pcie_isr, IRQF_SHARED, MY_NAME, ctrl);
 	if (retval)
-		err("Cannot get irq %d for the hotplug controller\n", irq);
+		ctrl_err(ctrl, "Cannot get irq %d for the hotplug controller\n",
+			 irq);
 	return retval;
 }
 
@@ -282,7 +283,7 @@ static void pcie_wait_cmd(struct controller *ctrl, int poll)
 	else
 		rc = wait_event_timeout(ctrl->queue, !ctrl->cmd_busy, timeout);
 	if (!rc)
-		dbg("Command not completed in 1000 msec\n");
+		ctrl_dbg(ctrl, "Command not completed in 1000 msec\n");
 }
 
 /**
@@ -301,7 +302,8 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask)
 
 	retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
 	if (retval) {
-		err("%s: Cannot read SLOTSTATUS register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS register\n",
+			 __func__);
 		goto out;
 	}
 
@@ -312,26 +314,28 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask)
 			 * proceed forward to issue the next command according
 			 * to spec. Just print out the error message.
 			 */
-			dbg("%s: CMD_COMPLETED not clear after 1 sec.\n",
-			    __func__);
+			ctrl_dbg(ctrl,
+				 "%s: CMD_COMPLETED not clear after 1 sec.\n",
+				 __func__);
 		} else if (!NO_CMD_CMPL(ctrl)) {
 			/*
 			 * This controller semms to notify of command completed
 			 * event even though it supports none of power
 			 * controller, attention led, power led and EMI.
 			 */
-			dbg("%s: Unexpected CMD_COMPLETED. Need to wait for "
-			    "command completed event.\n", __func__);
+			ctrl_dbg(ctrl, "%s: Unexpected CMD_COMPLETED. Need to "
+				 "wait for command completed event.\n",
+				 __func__);
 			ctrl->no_cmd_complete = 0;
 		} else {
-			dbg("%s: Unexpected CMD_COMPLETED. Maybe the "
-			    "controller is broken.\n", __func__);
+			ctrl_dbg(ctrl, "%s: Unexpected CMD_COMPLETED. Maybe "
+				 "the controller is broken.\n", __func__);
 		}
 	}
 
 	retval = pciehp_readw(ctrl, SLOTCTRL, &slot_ctrl);
 	if (retval) {
-		err("%s: Cannot read SLOTCTRL register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read SLOTCTRL register\n", __func__);
 		goto out;
 	}
 
@@ -341,7 +345,8 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask)
 	smp_mb();
 	retval = pciehp_writew(ctrl, SLOTCTRL, slot_ctrl);
 	if (retval)
-		err("%s: Cannot write to SLOTCTRL register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot write to SLOTCTRL register\n",
+			 __func__);
 
 	/*
 	 * Wait for command completion.
@@ -370,14 +375,15 @@ static int hpc_check_lnk_status(struct controller *ctrl)
 
 	retval = pciehp_readw(ctrl, LNKSTATUS, &lnk_status);
 	if (retval) {
-		err("%s: Cannot read LNKSTATUS register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read LNKSTATUS register\n",
+			 __func__);
 		return retval;
 	}
 
-	dbg("%s: lnk_status = %x\n", __func__, lnk_status);
+	ctrl_dbg(ctrl, "%s: lnk_status = %x\n", __func__, lnk_status);
 	if ( (lnk_status & LNK_TRN) || (lnk_status & LNK_TRN_ERR) ||
 		!(lnk_status & NEG_LINK_WD)) {
-		err("%s : Link Training Error occurs \n", __func__);
+		ctrl_err(ctrl, "%s : Link Training Error occurs \n", __func__);
 		retval = -1;
 		return retval;
 	}
@@ -394,12 +400,12 @@ static int hpc_get_attention_status(struct slot *slot, u8 *status)
 
 	retval = pciehp_readw(ctrl, SLOTCTRL, &slot_ctrl);
 	if (retval) {
-		err("%s: Cannot read SLOTCTRL register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read SLOTCTRL register\n", __func__);
 		return retval;
 	}
 
-	dbg("%s: SLOTCTRL %x, value read %x\n",
-	    __func__, ctrl->cap_base + SLOTCTRL, slot_ctrl);
+	ctrl_dbg(ctrl, "%s: SLOTCTRL %x, value read %x\n",
+		 __func__, ctrl->cap_base + SLOTCTRL, slot_ctrl);
 
 	atten_led_state = (slot_ctrl & ATTN_LED_CTRL) >> 6;
 
@@ -433,11 +439,11 @@ static int hpc_get_power_status(struct slot *slot, u8 *status)
 
 	retval = pciehp_readw(ctrl, SLOTCTRL, &slot_ctrl);
 	if (retval) {
-		err("%s: Cannot read SLOTCTRL register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read SLOTCTRL register\n", __func__);
 		return retval;
 	}
-	dbg("%s: SLOTCTRL %x value read %x\n",
-	    __func__, ctrl->cap_base + SLOTCTRL, slot_ctrl);
+	ctrl_dbg(ctrl, "%s: SLOTCTRL %x value read %x\n",
+		 __func__, ctrl->cap_base + SLOTCTRL, slot_ctrl);
 
 	pwr_state = (slot_ctrl & PWR_CTRL) >> 10;
 
@@ -464,7 +470,8 @@ static int hpc_get_latch_status(struct slot *slot, u8 *status)
 
 	retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
 	if (retval) {
-		err("%s: Cannot read SLOTSTATUS register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS register\n",
+			 __func__);
 		return retval;
 	}
 
@@ -482,7 +489,8 @@ static int hpc_get_adapter_status(struct slot *slot, u8 *status)
 
 	retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
 	if (retval) {
-		err("%s: Cannot read SLOTSTATUS register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS register\n",
+			 __func__);
 		return retval;
 	}
 	card_state = (u8)((slot_status & PRSN_STATE) >> 6);
@@ -500,7 +508,7 @@ static int hpc_query_power_fault(struct slot *slot)
 
 	retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
 	if (retval) {
-		err("%s: Cannot check for power fault\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot check for power fault\n", __func__);
 		return retval;
 	}
 	pwr_fault = (u8)((slot_status & PWR_FAULT_DETECTED) >> 1);
@@ -516,7 +524,7 @@ static int hpc_get_emi_status(struct slot *slot, u8 *status)
 
 	retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
 	if (retval) {
-		err("%s : Cannot check EMI status\n", __func__);
+		ctrl_err(ctrl, "%s : Cannot check EMI status\n", __func__);
 		return retval;
 	}
 	*status = (slot_status & EMI_STATE) >> EMI_STATUS_BIT;
@@ -560,8 +568,8 @@ static int hpc_set_attention_status(struct slot *slot, u8 value)
 			return -1;
 	}
 	rc = pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
-	dbg("%s: SLOTCTRL %x write cmd %x\n",
-	    __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
+	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
+		 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
 
 	return rc;
 }
@@ -575,8 +583,8 @@ static void hpc_set_green_led_on(struct slot *slot)
 	slot_cmd = 0x0100;
 	cmd_mask = PWR_LED_CTRL;
 	pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
-	dbg("%s: SLOTCTRL %x write cmd %x\n",
-	    __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
+	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
+		 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
 }
 
 static void hpc_set_green_led_off(struct slot *slot)
@@ -588,8 +596,8 @@ static void hpc_set_green_led_off(struct slot *slot)
 	slot_cmd = 0x0300;
 	cmd_mask = PWR_LED_CTRL;
 	pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
-	dbg("%s: SLOTCTRL %x write cmd %x\n",
-	    __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
+	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
+		 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
 }
 
 static void hpc_set_green_led_blink(struct slot *slot)
@@ -601,8 +609,8 @@ static void hpc_set_green_led_blink(struct slot *slot)
 	slot_cmd = 0x0200;
 	cmd_mask = PWR_LED_CTRL;
 	pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
-	dbg("%s: SLOTCTRL %x write cmd %x\n",
-	    __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
+	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
+		 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
 }
 
 static int hpc_power_on_slot(struct slot * slot)
@@ -613,20 +621,22 @@ static int hpc_power_on_slot(struct slot * slot)
 	u16 slot_status;
 	int retval = 0;
 
-	dbg("%s: slot->hp_slot %x\n", __func__, slot->hp_slot);
+	ctrl_dbg(ctrl, "%s: slot->hp_slot %x\n", __func__, slot->hp_slot);
 
 	/* Clear sticky power-fault bit from previous power failures */
 	retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
 	if (retval) {
-		err("%s: Cannot read SLOTSTATUS register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS register\n",
+			 __func__);
 		return retval;
 	}
 	slot_status &= PWR_FAULT_DETECTED;
 	if (slot_status) {
 		retval = pciehp_writew(ctrl, SLOTSTATUS, slot_status);
 		if (retval) {
-			err("%s: Cannot write to SLOTSTATUS register\n",
-			    __func__);
+			ctrl_err(ctrl,
+				 "%s: Cannot write to SLOTSTATUS register\n",
+				 __func__);
 			return retval;
 		}
 	}
@@ -644,11 +654,12 @@ static int hpc_power_on_slot(struct slot * slot)
 	retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
 
 	if (retval) {
-		err("%s: Write %x command failed!\n", __func__, slot_cmd);
+		ctrl_err(ctrl, "%s: Write %x command failed!\n",
+			 __func__, slot_cmd);
 		return -1;
 	}
-	dbg("%s: SLOTCTRL %x write cmd %x\n",
-	    __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
+	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
+		 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
 
 	return retval;
 }
@@ -694,7 +705,7 @@ static int hpc_power_off_slot(struct slot * slot)
 	int retval = 0;
 	int changed;
 
-	dbg("%s: slot->hp_slot %x\n", __func__, slot->hp_slot);
+	ctrl_dbg(ctrl, "%s: slot->hp_slot %x\n", __func__, slot->hp_slot);
 
 	/*
 	 * Set Bad DLLP Mask bit in Correctable Error Mask
@@ -722,12 +733,12 @@ static int hpc_power_off_slot(struct slot * slot)
 
 	retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
 	if (retval) {
-		err("%s: Write command failed!\n", __func__);
+		ctrl_err(ctrl, "%s: Write command failed!\n", __func__);
 		retval = -1;
 		goto out;
 	}
-	dbg("%s: SLOTCTRL %x write cmd %x\n",
-	    __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
+	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
+		 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
  out:
 	if (changed)
 		pcie_unmask_bad_dllp(ctrl);
@@ -749,7 +760,8 @@ static irqreturn_t pcie_isr(int irq, void *dev_id)
 	intr_loc = 0;
 	do {
 		if (pciehp_readw(ctrl, SLOTSTATUS, &detected)) {
-			err("%s: Cannot read SLOTSTATUS\n", __func__);
+			ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS\n",
+				 __func__);
 			return IRQ_NONE;
 		}
 
@@ -760,12 +772,13 @@ static irqreturn_t pcie_isr(int irq, void *dev_id)
 		if (!intr_loc)
 			return IRQ_NONE;
 		if (detected && pciehp_writew(ctrl, SLOTSTATUS, detected)) {
-			err("%s: Cannot write to SLOTSTATUS\n", __func__);
+			ctrl_err(ctrl, "%s: Cannot write to SLOTSTATUS\n",
+				 __func__);
 			return IRQ_NONE;
 		}
 	} while (detected);
 
-	dbg("%s: intr_loc %x\n", __FUNCTION__, intr_loc);
+	ctrl_dbg(ctrl, "%s: intr_loc %x\n", __func__, intr_loc);
 
 	/* Check Command Complete Interrupt Pending */
 	if (intr_loc & CMD_COMPLETED) {
@@ -807,7 +820,7 @@ static int hpc_get_max_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
 
 	retval = pciehp_readl(ctrl, LNKCAP, &lnk_cap);
 	if (retval) {
-		err("%s: Cannot read LNKCAP register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read LNKCAP register\n", __func__);
 		return retval;
 	}
 
@@ -821,7 +834,7 @@ static int hpc_get_max_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
 	}
 
 	*value = lnk_speed;
-	dbg("Max link speed = %d\n", lnk_speed);
+	ctrl_dbg(ctrl, "Max link speed = %d\n", lnk_speed);
 
 	return retval;
 }
@@ -836,7 +849,7 @@ static int hpc_get_max_lnk_width(struct slot *slot,
 
 	retval = pciehp_readl(ctrl, LNKCAP, &lnk_cap);
 	if (retval) {
-		err("%s: Cannot read LNKCAP register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read LNKCAP register\n", __func__);
 		return retval;
 	}
 
@@ -871,7 +884,7 @@ static int hpc_get_max_lnk_width(struct slot *slot,
 	}
 
 	*value = lnk_wdth;
-	dbg("Max link width = %d\n", lnk_wdth);
+	ctrl_dbg(ctrl, "Max link width = %d\n", lnk_wdth);
 
 	return retval;
 }
@@ -885,7 +898,8 @@ static int hpc_get_cur_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
 
 	retval = pciehp_readw(ctrl, LNKSTATUS, &lnk_status);
 	if (retval) {
-		err("%s: Cannot read LNKSTATUS register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read LNKSTATUS register\n",
+			 __func__);
 		return retval;
 	}
 
@@ -899,7 +913,7 @@ static int hpc_get_cur_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
 	}
 
 	*value = lnk_speed;
-	dbg("Current link speed = %d\n", lnk_speed);
+	ctrl_dbg(ctrl, "Current link speed = %d\n", lnk_speed);
 
 	return retval;
 }
@@ -914,7 +928,8 @@ static int hpc_get_cur_lnk_width(struct slot *slot,
 
 	retval = pciehp_readw(ctrl, LNKSTATUS, &lnk_status);
 	if (retval) {
-		err("%s: Cannot read LNKSTATUS register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read LNKSTATUS register\n",
+			 __func__);
 		return retval;
 	}
 
@@ -949,7 +964,7 @@ static int hpc_get_cur_lnk_width(struct slot *slot,
 	}
 
 	*value = lnk_wdth;
-	dbg("Current link width = %d\n", lnk_wdth);
+	ctrl_dbg(ctrl, "Current link width = %d\n", lnk_wdth);
 
 	return retval;
 }
@@ -998,7 +1013,8 @@ int pcie_enable_notification(struct controller *ctrl)
 	       PWR_FAULT_DETECT_ENABLE | HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE;
 
 	if (pcie_write_cmd(ctrl, cmd, mask)) {
-		err("%s: Cannot enable software notification\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot enable software notification\n",
+			 __func__);
 		return -1;
 	}
 	return 0;
@@ -1010,7 +1026,8 @@ static void pcie_disable_notification(struct controller *ctrl)
 	mask = PRSN_DETECT_ENABLE | ATTN_BUTTN_ENABLE | MRL_DETECT_ENABLE |
 	       PWR_FAULT_DETECT_ENABLE | HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE;
 	if (pcie_write_cmd(ctrl, 0, mask))
-		warn("%s: Cannot disable software notification\n", __func__);
+		ctrl_warn(ctrl, "%s: Cannot disable software notification\n",
+			  __func__);
 }
 
 static int pcie_init_notification(struct controller *ctrl)
@@ -1071,34 +1088,45 @@ static inline void dbg_ctrl(struct controller *ctrl)
 	if (!pciehp_debug)
 		return;
 
-	dbg("Hotplug Controller:\n");
-	dbg("  Seg/Bus/Dev/Func/IRQ : %s IRQ %d\n", pci_name(pdev), pdev->irq);
-	dbg("  Vendor ID            : 0x%04x\n", pdev->vendor);
-	dbg("  Device ID            : 0x%04x\n", pdev->device);
-	dbg("  Subsystem ID         : 0x%04x\n", pdev->subsystem_device);
-	dbg("  Subsystem Vendor ID  : 0x%04x\n", pdev->subsystem_vendor);
-	dbg("  PCIe Cap offset      : 0x%02x\n", ctrl->cap_base);
+	ctrl_info(ctrl, "Hotplug Controller:\n");
+	ctrl_info(ctrl, "  Seg/Bus/Dev/Func/IRQ : %s IRQ %d\n",
+		  pci_name(pdev), pdev->irq);
+	ctrl_info(ctrl, "  Vendor ID            : 0x%04x\n", pdev->vendor);
+	ctrl_info(ctrl, "  Device ID            : 0x%04x\n", pdev->device);
+	ctrl_info(ctrl, "  Subsystem ID         : 0x%04x\n",
+		  pdev->subsystem_device);
+	ctrl_info(ctrl, "  Subsystem Vendor ID  : 0x%04x\n",
+		  pdev->subsystem_vendor);
+	ctrl_info(ctrl, "  PCIe Cap offset      : 0x%02x\n", ctrl->cap_base);
 	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
 		if (!pci_resource_len(pdev, i))
 			continue;
-		dbg("  PCI resource [%d]     : 0x%llx@0x%llx\n", i,
-		    (unsigned long long)pci_resource_len(pdev, i),
-		    (unsigned long long)pci_resource_start(pdev, i));
+		ctrl_info(ctrl, "  PCI resource [%d]     : 0x%llx@0x%llx\n",
+			  i, (unsigned long long)pci_resource_len(pdev, i),
+			  (unsigned long long)pci_resource_start(pdev, i));
 	}
-	dbg("Slot Capabilities      : 0x%08x\n", ctrl->slot_cap);
-	dbg("  Physical Slot Number : %d\n", ctrl->first_slot);
-	dbg("  Attention Button     : %3s\n", ATTN_BUTTN(ctrl) ? "yes" : "no");
-	dbg("  Power Controller     : %3s\n", POWER_CTRL(ctrl) ? "yes" : "no");
-	dbg("  MRL Sensor           : %3s\n", MRL_SENS(ctrl)   ? "yes" : "no");
-	dbg("  Attention Indicator  : %3s\n", ATTN_LED(ctrl)   ? "yes" : "no");
-	dbg("  Power Indicator      : %3s\n", PWR_LED(ctrl)    ? "yes" : "no");
-	dbg("  Hot-Plug Surprise    : %3s\n", HP_SUPR_RM(ctrl) ? "yes" : "no");
-	dbg("  EMI Present          : %3s\n", EMI(ctrl)        ? "yes" : "no");
-	dbg("  Command Completed    : %3s\n", NO_CMD_CMPL(ctrl)? "no" : "yes");
+	ctrl_info(ctrl, "Slot Capabilities      : 0x%08x\n", ctrl->slot_cap);
+	ctrl_info(ctrl, "  Physical Slot Number : %d\n", ctrl->first_slot);
+	ctrl_info(ctrl, "  Attention Button     : %3s\n",
+		  ATTN_BUTTN(ctrl) ? "yes" : "no");
+	ctrl_info(ctrl, "  Power Controller     : %3s\n",
+		  POWER_CTRL(ctrl) ? "yes" : "no");
+	ctrl_info(ctrl, "  MRL Sensor           : %3s\n",
+		  MRL_SENS(ctrl)   ? "yes" : "no");
+	ctrl_info(ctrl, "  Attention Indicator  : %3s\n",
+		  ATTN_LED(ctrl)   ? "yes" : "no");
+	ctrl_info(ctrl, "  Power Indicator      : %3s\n",
+		  PWR_LED(ctrl)    ? "yes" : "no");
+	ctrl_info(ctrl, "  Hot-Plug Surprise    : %3s\n",
+		  HP_SUPR_RM(ctrl) ? "yes" : "no");
+	ctrl_info(ctrl, "  EMI Present          : %3s\n",
+		  EMI(ctrl)        ? "yes" : "no");
+	ctrl_info(ctrl, "  Command Completed    : %3s\n",
+		  NO_CMD_CMPL(ctrl) ? "no" : "yes");
 	pciehp_readw(ctrl, SLOTSTATUS, &reg16);
-	dbg("Slot Status            : 0x%04x\n", reg16);
+	ctrl_info(ctrl, "Slot Status            : 0x%04x\n", reg16);
 	pciehp_readw(ctrl, SLOTCTRL, &reg16);
-	dbg("Slot Control           : 0x%04x\n", reg16);
+	ctrl_info(ctrl, "Slot Control           : 0x%04x\n", reg16);
 }
 
 struct controller *pcie_init(struct pcie_device *dev)
@@ -1109,7 +1137,7 @@ struct controller *pcie_init(struct pcie_device *dev)
 
 	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
 	if (!ctrl) {
-		err("%s : out of memory\n", __func__);
+		dev_err(&dev->device, "%s : out of memory\n", __func__);
 		goto abort;
 	}
 	INIT_LIST_HEAD(&ctrl->slot_list);
@@ -1118,11 +1146,12 @@ struct controller *pcie_init(struct pcie_device *dev)
 	ctrl->pci_dev = pdev;
 	ctrl->cap_base = pci_find_capability(pdev, PCI_CAP_ID_EXP);
 	if (!ctrl->cap_base) {
-		err("%s: Cannot find PCI Express capability\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot find PCI Express capability\n",
+			 __func__);
 		goto abort;
 	}
 	if (pciehp_readl(ctrl, SLOTCAP, &slot_cap)) {
-		err("%s: Cannot read SLOTCAP register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read SLOTCAP register\n", __func__);
 		goto abort;
 	}
 
@@ -1162,9 +1191,9 @@ struct controller *pcie_init(struct pcie_device *dev)
 			goto abort_ctrl;
 	}
 
-	info("HPC vendor_id %x device_id %x ss_vid %x ss_did %x\n",
-	     pdev->vendor, pdev->device,
-	     pdev->subsystem_vendor, pdev->subsystem_device);
+	ctrl_info(ctrl, "HPC vendor_id %x device_id %x ss_vid %x ss_did %x\n",
+		  pdev->vendor, pdev->device, pdev->subsystem_vendor,
+		  pdev->subsystem_device);
 
 	if (pcie_init_slot(ctrl))
 		goto abort_ctrl;
diff --git a/drivers/pci/hotplug/pciehp_pci.c b/drivers/pci/hotplug/pciehp_pci.c
index 6040dcceb256..ffd11148fbe2 100644
--- a/drivers/pci/hotplug/pciehp_pci.c
+++ b/drivers/pci/hotplug/pciehp_pci.c
@@ -198,18 +198,20 @@ int pciehp_configure_device(struct slot *p_slot)
 	struct pci_dev *dev;
 	struct pci_bus *parent = p_slot->ctrl->pci_dev->subordinate;
 	int num, fn;
+	struct controller *ctrl = p_slot->ctrl;
 
 	dev = pci_get_slot(parent, PCI_DEVFN(p_slot->device, 0));
 	if (dev) {
-		err("Device %s already exists at %x:%x, cannot hot-add\n",
-				pci_name(dev), p_slot->bus, p_slot->device);
+		ctrl_err(ctrl,
+			 "Device %s already exists at %x:%x, cannot hot-add\n",
+			 pci_name(dev), p_slot->bus, p_slot->device);
 		pci_dev_put(dev);
 		return -EINVAL;
 	}
 
 	num = pci_scan_slot(parent, PCI_DEVFN(p_slot->device, 0));
 	if (num == 0) {
-		err("No new device found\n");
+		ctrl_err(ctrl, "No new device found\n");
 		return -ENODEV;
 	}
 
@@ -218,8 +220,8 @@ int pciehp_configure_device(struct slot *p_slot)
 		if (!dev)
 			continue;
 		if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
-			err("Cannot hot-add display device %s\n",
-					pci_name(dev));
+			ctrl_err(ctrl, "Cannot hot-add display device %s\n",
+				 pci_name(dev));
 			pci_dev_put(dev);
 			continue;
 		}
@@ -244,9 +246,10 @@ int pciehp_unconfigure_device(struct slot *p_slot)
 	u8 presence = 0;
 	struct pci_bus *parent = p_slot->ctrl->pci_dev->subordinate;
 	u16 command;
+	struct controller *ctrl = p_slot->ctrl;
 
-	dbg("%s: bus/dev = %x/%x\n", __func__, p_slot->bus,
-				p_slot->device);
+	ctrl_dbg(ctrl, "%s: bus/dev = %x/%x\n", __func__,
+		 p_slot->bus, p_slot->device);
 	ret = p_slot->hpc_ops->get_adapter_status(p_slot, &presence);
 	if (ret)
 		presence = 0;
@@ -257,16 +260,17 @@ int pciehp_unconfigure_device(struct slot *p_slot)
 		if (!temp)
 			continue;
 		if ((temp->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
-			err("Cannot remove display device %s\n",
-					pci_name(temp));
+			ctrl_err(ctrl, "Cannot remove display device %s\n",
+				 pci_name(temp));
 			pci_dev_put(temp);
 			continue;
 		}
 		if (temp->hdr_type == PCI_HEADER_TYPE_BRIDGE && presence) {
 			pci_read_config_byte(temp, PCI_BRIDGE_CONTROL, &bctl);
 			if (bctl & PCI_BRIDGE_CTL_VGA) {
-				err("Cannot remove display device %s\n",
-				    pci_name(temp));
+				ctrl_err(ctrl,
+					 "Cannot remove display device %s\n",
+					 pci_name(temp));
 				pci_dev_put(temp);
 				continue;
 			}
-- 
GitLab


From 5993760f7fc75b77e4701f1e56dc84c0d6cf18d5 Mon Sep 17 00:00:00 2001
From: Jike Song <albcamus@gmail.com>
Date: Tue, 9 Sep 2008 23:42:03 +0800
Subject: [PATCH 851/895] PCI: utilize calculated results when detecting MSI
 features

In msi_capability_init, we can make use of the calculated results
instead of calling is_mask_bit_support and is_64bit_address twice.

Signed-off-by: Jike Song <albcamus@gmail.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/msi.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 4a10b5624f72..d2812013fd22 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -378,23 +378,21 @@ static int msi_capability_init(struct pci_dev *dev)
 	entry->msi_attrib.masked = 1;
 	entry->msi_attrib.default_irq = dev->irq;	/* Save IOAPIC IRQ */
 	entry->msi_attrib.pos = pos;
-	if (is_mask_bit_support(control)) {
+	if (entry->msi_attrib.maskbit) {
 		entry->mask_base = (void __iomem *)(long)msi_mask_bits_reg(pos,
-				is_64bit_address(control));
+				entry->msi_attrib.is_64);
 	}
 	entry->dev = dev;
 	if (entry->msi_attrib.maskbit) {
 		unsigned int maskbits, temp;
 		/* All MSIs are unmasked by default, Mask them all */
 		pci_read_config_dword(dev,
-			msi_mask_bits_reg(pos, is_64bit_address(control)),
+			msi_mask_bits_reg(pos, entry->msi_attrib.is_64),
 			&maskbits);
 		temp = (1 << multi_msi_capable(control));
 		temp = ((temp - 1) & ~temp);
 		maskbits |= temp;
-		pci_write_config_dword(dev,
-			msi_mask_bits_reg(pos, is_64bit_address(control)),
-			maskbits);
+		pci_write_config_dword(dev, entry->msi_attrib.is_64, maskbits);
 		entry->msi_attrib.maskbits_mask = temp;
 	}
 	list_add_tail(&entry->list, &dev->msi_list);
-- 
GitLab


From 93ff68a55aa92180a765d6c51c3303f6200167a6 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Sat, 6 Sep 2008 05:46:42 -0700
Subject: [PATCH 852/895] PCI: make CPU list affinity visible

Stephen Hemminger wrote:
> Looks like Mike created cpulistaffinty in sysfs but never completed
> the job.

This patch hooks things up correctly, taking care to remove the new file
when the bus is destroyed.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/bus.c    | 7 +++++++
 drivers/pci/pci.h    | 1 +
 drivers/pci/remove.c | 1 +
 3 files changed, 9 insertions(+)

diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 529d9d7727b0..999cc4088b59 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -151,6 +151,13 @@ void pci_bus_add_devices(struct pci_bus *bus)
 			if (retval)
 				dev_err(&dev->dev, "Error creating cpuaffinity"
 					" file, continuing...\n");
+
+			retval = device_create_file(&child_bus->dev,
+						&dev_attr_cpulistaffinity);
+			if (retval)
+				dev_err(&dev->dev,
+					"Error creating cpulistaffinity"
+					" file, continuing...\n");
 		}
 	}
 }
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index d807cd786f20..4723b12fb39a 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -109,6 +109,7 @@ static inline int pci_no_d1d2(struct pci_dev *dev)
 extern int pcie_mch_quirk;
 extern struct device_attribute pci_dev_attrs[];
 extern struct device_attribute dev_attr_cpuaffinity;
+extern struct device_attribute dev_attr_cpulistaffinity;
 
 /**
  * pci_match_one_device - Tell if a PCI device structure has a matching
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index bdc2a44d68e1..f94f6d5ae297 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -73,6 +73,7 @@ void pci_remove_bus(struct pci_bus *pci_bus)
 	up_write(&pci_bus_sem);
 	pci_remove_legacy_files(pci_bus);
 	device_remove_file(&pci_bus->dev, &dev_attr_cpuaffinity);
+	device_remove_file(&pci_bus->dev, &dev_attr_cpulistaffinity);
 	device_unregister(&pci_bus->dev);
 }
 EXPORT_SYMBOL(pci_remove_bus);
-- 
GitLab


From cef354db0d7a7207ea78c716753d9216a9c2b7e1 Mon Sep 17 00:00:00 2001
From: Alex Chiang <achiang@hp.com>
Date: Tue, 2 Sep 2008 09:40:51 -0600
Subject: [PATCH 853/895] PCI: connect struct pci_dev to struct pci_slot

The introduction of struct pci_slot (f46753c5e354b857b20ab8e0fe7b25)
added a struct pci_slot pointer to struct pci_dev, but we forgot to
associate the two.

Connect the two structs together; the interesting portions of the object
lifetimes are:

	- when a new pci_slot is created, connect it to the appropriate
	  pci_dev's. A single pci_slot may be associated with multiple
	  pci_dev's, e.g. any multi-function PCI device.

	- when a pci_slot is released, look for all the pci_dev's it was
	  associated with, and set their pci_slot pointers to NULL

	- when a pci_dev is created, look for slots to associate with.

Note -- when a pci_dev is released, we don't need to do any bookkeeping,
since pci_slot's do not have pointers to pci_dev's.

Signed-off-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/probe.c |  5 +++++
 drivers/pci/slot.c  | 10 ++++++++++
 2 files changed, 15 insertions(+)

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 578d15f49e02..7aa71636dd3c 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -949,6 +949,7 @@ EXPORT_SYMBOL(alloc_pci_dev);
 static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
 {
 	struct pci_dev *dev;
+	struct pci_slot *slot;
 	u32 l;
 	u8 hdr_type;
 	int delay = 1;
@@ -997,6 +998,10 @@ static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
 	dev->error_state = pci_channel_io_normal;
 	set_pcie_port_type(dev);
 
+	list_for_each_entry(slot, &bus->slots, list)
+		if (PCI_SLOT(devfn) == slot->number)
+			dev->slot = slot;
+
 	/* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
 	   set this higher, assuming the system even supports it.  */
 	dev->dma_mask = 0xffffffff;
diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c
index 7e5b85cbd948..0c6db03698ea 100644
--- a/drivers/pci/slot.c
+++ b/drivers/pci/slot.c
@@ -49,11 +49,16 @@ static ssize_t address_read_file(struct pci_slot *slot, char *buf)
 
 static void pci_slot_release(struct kobject *kobj)
 {
+	struct pci_dev *dev;
 	struct pci_slot *slot = to_pci_slot(kobj);
 
 	pr_debug("%s: releasing pci_slot on %x:%d\n", __func__,
 		 slot->bus->number, slot->number);
 
+	list_for_each_entry(dev, &slot->bus->devices, bus_list)
+		if (PCI_SLOT(dev->devfn) == slot->number)
+			dev->slot = NULL;
+
 	list_del(&slot->list);
 
 	kfree(slot);
@@ -108,6 +113,7 @@ static struct kobj_type pci_slot_ktype = {
 struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr,
 				 const char *name)
 {
+	struct pci_dev *dev;
 	struct pci_slot *slot;
 	int err;
 
@@ -150,6 +156,10 @@ placeholder:
 	INIT_LIST_HEAD(&slot->list);
 	list_add(&slot->list, &parent->slots);
 
+	list_for_each_entry(dev, &parent->devices, bus_list)
+		if (PCI_SLOT(dev->devfn) == slot_nr)
+			dev->slot = slot;
+
 	/* Don't care if debug printk has a -1 for slot_nr */
 	pr_debug("%s: created pci_slot on %04x:%02x:%02x\n",
 		 __func__, pci_domain_nr(parent), parent->number, slot_nr);
-- 
GitLab


From ec84f1268fcf16c4a852fdb38b3a541748644918 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Tue, 23 Sep 2008 11:43:34 -0700
Subject: [PATCH 854/895] PCI: fix -Wakpm warnings in pci_pm_init debug output

Checkpatch would have complained about this but neither Bjorn nor myself
ran it prior to pushing.  Fixup the issues Andrew pointed out.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index e1a17b85447b..09dc893c81db 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1270,8 +1270,8 @@ void pci_pm_init(struct pci_dev *dev)
 
 		if (dev->d1_support || dev->d2_support)
 			dev_printk(KERN_DEBUG, &dev->dev, "supports%s%s\n",
-				   dev->d1_support ? " D1": "",
-				   dev->d2_support ? " D2": "");
+				   dev->d1_support ? " D1" : "",
+				   dev->d2_support ? " D2" : "");
 	}
 
 	pmc &= PCI_PM_CAP_PME_MASK;
-- 
GitLab


From 50cbfa511a21cac1909b6b4c955fa39d1da81457 Mon Sep 17 00:00:00 2001
From: Roland Dreier <roland@digitalvampire.org>
Date: Mon, 22 Sep 2008 14:55:24 -0700
Subject: [PATCH 855/895] PCI: fix MSI-HOWTO.txt info about MSI-X MMIO space

The current MSI-HOWTO.txt says that device drivers should not request the
memory space that contains MSI-X tables.  This is because the original
MSI-X implementation did a request_mem_region() on this space, but that
code was removed long ago (in the pre-git era, in fact).  Years after the
code was changed, we might as well clean up the documention to avoid a
confusing mention of requesting regions: drivers using MSI-X can just use
pci_request_regions() just like any other driver, and so there's no need
for MSI-HOWTO.txt to talk about this at all.

Signed-off-by: Roland Dreier <roland@digitalvampire.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 Documentation/MSI-HOWTO.txt | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/Documentation/MSI-HOWTO.txt b/Documentation/MSI-HOWTO.txt
index a51f693c1541..256defd7e174 100644
--- a/Documentation/MSI-HOWTO.txt
+++ b/Documentation/MSI-HOWTO.txt
@@ -236,10 +236,8 @@ software system can set different pages for controlling accesses to the
 MSI-X structure. The implementation of MSI support requires the PCI
 subsystem, not a device driver, to maintain full control of the MSI-X
 table/MSI-X PBA (Pending Bit Array) and MMIO address space of the MSI-X
-table/MSI-X PBA.  A device driver is prohibited from requesting the MMIO
-address space of the MSI-X table/MSI-X PBA. Otherwise, the PCI subsystem
-will fail enabling MSI-X on its hardware device when it calls the function
-pci_enable_msix().
+table/MSI-X PBA.  A device driver should not access the MMIO address
+space of the MSI-X table/MSI-X PBA.
 
 5.3.2 API pci_enable_msix
 
-- 
GitLab


From 11d587429e9cbb40ac20d7ed8126c66da0d7aba5 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Fri, 5 Sep 2008 21:45:36 -0700
Subject: [PATCH 856/895] PCI: fix sparse warning in pci_remove_behind_bridge

Get rid of the second definition of dev which hides the earlier one in
the argument list and causes a warning from sparse.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/remove.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index f94f6d5ae297..042e08924421 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -115,13 +115,9 @@ void pci_remove_behind_bridge(struct pci_dev *dev)
 {
 	struct list_head *l, *n;
 
-	if (dev->subordinate) {
-		list_for_each_safe(l, n, &dev->subordinate->devices) {
-			struct pci_dev *dev = pci_dev_b(l);
-
-			pci_remove_bus_device(dev);
-		}
-	}
+	if (dev->subordinate)
+		list_for_each_safe(l, n, &dev->subordinate->devices)
+			pci_remove_bus_device(pci_dev_b(l));
 }
 
 static void pci_stop_bus_devices(struct pci_bus *bus)
-- 
GitLab


From c8761fe80ed052634153438405c9048611ae7ae1 Mon Sep 17 00:00:00 2001
From: "Zhao, Yu" <yu.zhao@intel.com>
Date: Mon, 22 Sep 2008 14:26:05 +0800
Subject: [PATCH 857/895] PCI: fix hotplug get_##name return value problem

Currently, get_##name in pci_hotplug_core.c will return 0 if module
unload wins the race between unload & reading the hotplug file.  Fix
that case to return -ENODEV like it should.

Reviewed-by: Alex Chiang <achiang@hp.com>
Reviewed-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Yu Zhao <yu.zhao@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/hotplug/pci_hotplug_core.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c
index 5f85b1b120e3..27d2b6fe5d53 100644
--- a/drivers/pci/hotplug/pci_hotplug_core.c
+++ b/drivers/pci/hotplug/pci_hotplug_core.c
@@ -102,13 +102,13 @@ static int get_##name (struct hotplug_slot *slot, type *value)		\
 {									\
 	struct hotplug_slot_ops *ops = slot->ops;			\
 	int retval = 0;							\
-	if (try_module_get(ops->owner)) {				\
-		if (ops->get_##name)					\
-			retval = ops->get_##name(slot, value);		\
-		else							\
-			*value = slot->info->name;			\
-		module_put(ops->owner);					\
-	}								\
+	if (try_module_get(ops->owner))					\
+		return -ENODEV;						\
+	if (ops->get_##name)						\
+		retval = ops->get_##name(slot, value);			\
+	else								\
+		*value = slot->info->name;				\
+	module_put(ops->owner);						\
 	return retval;							\
 }
 
-- 
GitLab


From c9bbb4abb658daf6cc6f92fb4751a7796a5aab75 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Wed, 24 Sep 2008 19:04:33 -0700
Subject: [PATCH 858/895] PCI: use %pF instead of print_fn_descriptor_symbol()
 in quirks.c

Use %pF instead of print_fn_descriptor_symbol() in quirks.c to get the name of
the hook we're calling.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/quirks.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 246918fe9482..9575fc8f87b3 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -1935,8 +1935,7 @@ static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, struct pci_f
 		if ((f->vendor == dev->vendor || f->vendor == (u16) PCI_ANY_ID) &&
  		    (f->device == dev->device || f->device == (u16) PCI_ANY_ID)) {
 #ifdef DEBUG
-			dev_dbg(&dev->dev, "calling ");
-			print_fn_descriptor_symbol("%s\n", f->hook);
+			dev_dbg(&dev->dev, "calling %pF\n", f->hook);
 #endif
 			f->hook(dev);
 		}
-- 
GitLab


From 5d9bc1fa47f0c1561f1d7c0bdff5e24860852b42 Mon Sep 17 00:00:00 2001
From: Kristen Carlson Accardi <kristen.c.accardi@intel.com>
Date: Mon, 13 Oct 2008 09:59:12 -0700
Subject: [PATCH 859/895] PCI hotplug: rpaphp: make debug var unique

Change debug variable name to one more unique to this driver.

Signed-off-by: Kristen Carlson Accardi <kristen.c.accardi@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/hotplug/rpaphp.h      | 4 ++--
 drivers/pci/hotplug/rpaphp_core.c | 4 ++--
 drivers/pci/hotplug/rpaphp_pci.c  | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/pci/hotplug/rpaphp.h b/drivers/pci/hotplug/rpaphp.h
index 7d5921b1ee78..419919a87b0f 100644
--- a/drivers/pci/hotplug/rpaphp.h
+++ b/drivers/pci/hotplug/rpaphp.h
@@ -46,10 +46,10 @@
 #define PRESENT         1	/* Card in slot */
 
 #define MY_NAME "rpaphp"
-extern int debug;
+extern int rpaphp_debug;
 #define dbg(format, arg...)					\
 	do {							\
-		if (debug)					\
+		if (rpaphp_debug)					\
 			printk(KERN_DEBUG "%s: " format,	\
 				MY_NAME , ## arg); 		\
 	} while (0)
diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c
index 1f84f402acdb..95d02a08fdc7 100644
--- a/drivers/pci/hotplug/rpaphp_core.c
+++ b/drivers/pci/hotplug/rpaphp_core.c
@@ -37,7 +37,7 @@
 				/* and pci_do_scan_bus */
 #include "rpaphp.h"
 
-int debug;
+int rpaphp_debug;
 LIST_HEAD(rpaphp_slot_head);
 
 #define DRIVER_VERSION	"0.1"
@@ -50,7 +50,7 @@ MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL");
 
-module_param(debug, bool, 0644);
+module_param_named(debug, rpaphp_debug, bool, 0644);
 
 /**
  * set_attention_status - set attention LED
diff --git a/drivers/pci/hotplug/rpaphp_pci.c b/drivers/pci/hotplug/rpaphp_pci.c
index 5acfd4f3d4cb..513e1e282391 100644
--- a/drivers/pci/hotplug/rpaphp_pci.c
+++ b/drivers/pci/hotplug/rpaphp_pci.c
@@ -123,7 +123,7 @@ int rpaphp_enable_slot(struct slot *slot)
 			slot->state = CONFIGURED;
 		}
 
-		if (debug) {
+		if (rpaphp_debug) {
 			struct pci_dev *dev;
 			dbg("%s: pci_devs of slot[%s]\n", __func__, slot->dn->full_name);
 			list_for_each_entry (dev, &bus->devices, bus_list)
-- 
GitLab


From c322b28a04c084a467a862766f74c40c917a721c Mon Sep 17 00:00:00 2001
From: "Zhao, Yu" <yu.zhao@intel.com>
Date: Mon, 13 Oct 2008 19:36:05 +0800
Subject: [PATCH 860/895] PCI: use same arg names in PCI_VDEVICE comment

This cleanup makes the argument names in PCI_VDEVICE comment consistent
with those used in its definition.

Signed-off-by: Yu Zhao <yu.zhao@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 8a4d0bebc311..008005674b60 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -455,8 +455,8 @@ struct pci_driver {
 
 /**
  * PCI_VDEVICE - macro used to describe a specific pci device in short form
- * @vend: the vendor name
- * @dev: the 16 bit PCI Device ID
+ * @vendor: the vendor name
+ * @device: the 16 bit PCI Device ID
  *
  * This macro is used to create a struct pci_device_id that matches a
  * specific PCI device.  The subvendor, and subdevice fields will be set
-- 
GitLab


From 022edd86d7c864bc8fadc3c8ac4e6a464472ab05 Mon Sep 17 00:00:00 2001
From: "Zhao, Yu" <yu.zhao@intel.com>
Date: Mon, 13 Oct 2008 19:24:28 +0800
Subject: [PATCH 861/895] PCI: use resource_size() everywhere.

This is a cleanup that replaces the resource calculation formula with
resource_size().

Signed-off-by: Yu Zhao <yu.zhao@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/setup-bus.c | 4 ++--
 drivers/pci/setup-res.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 471a429d7a20..ea979f2bc6db 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -299,7 +299,7 @@ static void pbus_size_io(struct pci_bus *bus)
 
 			if (r->parent || !(r->flags & IORESOURCE_IO))
 				continue;
-			r_size = r->end - r->start + 1;
+			r_size = resource_size(r);
 
 			if (r_size < 0x400)
 				/* Might be re-aligned for ISA */
@@ -350,7 +350,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long
 
 			if (r->parent || (r->flags & mask) != type)
 				continue;
-			r_size = r->end - r->start + 1;
+			r_size = resource_size(r);
 			/* For bridges size != alignment */
 			align = resource_alignment(r);
 			order = __ffs(align) - 20;
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index d4b5c690eaa7..2dbd96cce2d8 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -129,7 +129,7 @@ int pci_assign_resource(struct pci_dev *dev, int resno)
 	resource_size_t size, min, align;
 	int ret;
 
-	size = res->end - res->start + 1;
+	size = resource_size(res);
 	min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM;
 
 	align = resource_alignment(res);
-- 
GitLab


From 557848c3c03ad1d1e66cb3b5b06698e3a9ebc33c Mon Sep 17 00:00:00 2001
From: "Zhao, Yu" <yu.zhao@intel.com>
Date: Mon, 13 Oct 2008 19:18:07 +0800
Subject: [PATCH 862/895] PCI: replace cfg space size (256/4096) by macros.

This is a cleanup that changes all PCI configuration space size
representations to the macros (PCI_CFG_SPACE_SIZE and
PCI_CFG_SPACE_EXP_SIZE). And the macros are also moved from
drivers/pci/probe.c to drivers/pci/pci.h.

Signed-off-by: Yu Zhao <yu.zhao@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci-sysfs.c | 10 +++++-----
 drivers/pci/pci.c       | 11 +++++++----
 drivers/pci/pci.h       |  7 +++++++
 drivers/pci/probe.c     |  5 ++---
 4 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 77baff022f71..00a9947cb7cc 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -715,7 +715,7 @@ static struct bin_attribute pci_config_attr = {
 		.name = "config",
 		.mode = S_IRUGO | S_IWUSR,
 	},
-	.size = 256,
+	.size = PCI_CFG_SPACE_SIZE,
 	.read = pci_read_config,
 	.write = pci_write_config,
 };
@@ -725,7 +725,7 @@ static struct bin_attribute pcie_config_attr = {
 		.name = "config",
 		.mode = S_IRUGO | S_IWUSR,
 	},
-	.size = 4096,
+	.size = PCI_CFG_SPACE_EXP_SIZE,
 	.read = pci_read_config,
 	.write = pci_write_config,
 };
@@ -743,7 +743,7 @@ int __must_check pci_create_sysfs_dev_files (struct pci_dev *pdev)
 	if (!sysfs_initialized)
 		return -EACCES;
 
-	if (pdev->cfg_size < 4096)
+	if (pdev->cfg_size < PCI_CFG_SPACE_EXP_SIZE)
 		retval = sysfs_create_bin_file(&pdev->dev.kobj, &pci_config_attr);
 	else
 		retval = sysfs_create_bin_file(&pdev->dev.kobj, &pcie_config_attr);
@@ -814,7 +814,7 @@ err_vpd:
 		kfree(pdev->vpd->attr);
 	}
 err_config_file:
-	if (pdev->cfg_size < 4096)
+	if (pdev->cfg_size < PCI_CFG_SPACE_EXP_SIZE)
 		sysfs_remove_bin_file(&pdev->dev.kobj, &pci_config_attr);
 	else
 		sysfs_remove_bin_file(&pdev->dev.kobj, &pcie_config_attr);
@@ -839,7 +839,7 @@ void pci_remove_sysfs_dev_files(struct pci_dev *pdev)
 		sysfs_remove_bin_file(&pdev->dev.kobj, pdev->vpd->attr);
 		kfree(pdev->vpd->attr);
 	}
-	if (pdev->cfg_size < 4096)
+	if (pdev->cfg_size < PCI_CFG_SPACE_EXP_SIZE)
 		sysfs_remove_bin_file(&pdev->dev.kobj, &pci_config_attr);
 	else
 		sysfs_remove_bin_file(&pdev->dev.kobj, &pcie_config_attr);
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 09dc893c81db..553ca6657955 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -213,10 +213,13 @@ int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap)
 int pci_find_ext_capability(struct pci_dev *dev, int cap)
 {
 	u32 header;
-	int ttl = 480; /* 3840 bytes, minimum 8 bytes per capability */
-	int pos = 0x100;
+	int ttl;
+	int pos = PCI_CFG_SPACE_SIZE;
 
-	if (dev->cfg_size <= 256)
+	/* minimum 8 bytes per capability */
+	ttl = (PCI_CFG_SPACE_EXP_SIZE - PCI_CFG_SPACE_SIZE) / 8;
+
+	if (dev->cfg_size <= PCI_CFG_SPACE_SIZE)
 		return 0;
 
 	if (pci_read_config_dword(dev, pos, &header) != PCIBIOS_SUCCESSFUL)
@@ -234,7 +237,7 @@ int pci_find_ext_capability(struct pci_dev *dev, int cap)
 			return pos;
 
 		pos = PCI_EXT_CAP_NEXT(header);
-		if (pos < 0x100)
+		if (pos < PCI_CFG_SPACE_SIZE)
 			break;
 
 		if (pci_read_config_dword(dev, pos, &header) != PCIBIOS_SUCCESSFUL)
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 4723b12fb39a..601abdc8dd9f 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -1,3 +1,9 @@
+#ifndef DRIVERS_PCI_H
+#define DRIVERS_PCI_H
+
+#define PCI_CFG_SPACE_SIZE	256
+#define PCI_CFG_SPACE_EXP_SIZE	4096
+
 /* Functions internal to the PCI core code */
 
 extern int pci_uevent(struct device *dev, struct kobj_uevent_env *env);
@@ -145,3 +151,4 @@ struct pci_slot_attribute {
 };
 #define to_pci_slot_attr(s) container_of(s, struct pci_slot_attribute, attr)
 
+#endif /* DRIVERS_PCI_H */
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 7aa71636dd3c..f6754e87f046 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -14,8 +14,6 @@
 
 #define CARDBUS_LATENCY_TIMER	176	/* secondary latency timer */
 #define CARDBUS_RESERVE_BUSNR	3
-#define PCI_CFG_SPACE_SIZE	256
-#define PCI_CFG_SPACE_EXP_SIZE	4096
 
 /* Ugh.  Need to stop exporting this to modules. */
 LIST_HEAD(pci_root_buses);
@@ -887,8 +885,9 @@ static void set_pcie_port_type(struct pci_dev *pdev)
 int pci_cfg_space_size_ext(struct pci_dev *dev)
 {
 	u32 status;
+	int pos = PCI_CFG_SPACE_SIZE;
 
-	if (pci_read_config_dword(dev, 256, &status) != PCIBIOS_SUCCESSFUL)
+	if (pci_read_config_dword(dev, pos, &status) != PCIBIOS_SUCCESSFUL)
 		goto fail;
 	if (status == 0xffffffff)
 		goto fail;
-- 
GitLab


From e354597cce8d219d135d65e585dc4f30323486b9 Mon Sep 17 00:00:00 2001
From: Peter Chubb <peterc@gelato.unsw.edu.au>
Date: Mon, 13 Oct 2008 11:49:04 +1100
Subject: [PATCH 863/895] PCI: fix 64-vbit prefetchable memory resource BARs

Since patch 6ac665c63dcac8fcec534a1d224ecbb8b867ad59 my infiniband
controller hasn't worked.  This is because it has 64-bit prefetchable
memory, which was mistakenly being  taken to be 32-bit memory.  The
resource flags in this case are PCI_BASE_ADDRESS_MEM_TYPE_64 |
PCI_BASE_ADDRESS_MEM_PREFETCH.

This patch checks only for the PCI_BASE_ADDRESS_MEM_TYPE_64 bit; thus
whether the region is prefetchable or not is ignored.  This fixes my
Infiniband.

Reviewed-by: Matthew Wilcox <matthew@wil.cx>
Signed-off-by: Peter Chubb <peterc@gelato.unsw.edu.au>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/probe.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index f6754e87f046..49599ac49bda 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -217,7 +217,7 @@ static inline enum pci_bar_type decode_bar(struct resource *res, u32 bar)
 
 	res->flags = bar & ~PCI_BASE_ADDRESS_MEM_MASK;
 
-	if (res->flags == PCI_BASE_ADDRESS_MEM_TYPE_64)
+	if (res->flags & PCI_BASE_ADDRESS_MEM_TYPE_64)
 		return pci_bar_mem64;
 	return pci_bar_mem32;
 }
-- 
GitLab


From 280c73d3691fb182fa55b0160737c2c0feb79471 Mon Sep 17 00:00:00 2001
From: "Zhao, Yu" <yu.zhao@intel.com>
Date: Mon, 13 Oct 2008 20:01:00 +0800
Subject: [PATCH 864/895] PCI: centralize the capabilities code in pci-sysfs.c

This patch centralizes functions used to add and remove sysfs entries
for various capabilities. With this cleanup, the code is more readable
and easier for adding new capability related functions.

Signed-off-by: Yu Zhao <yu.zhao@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci-sysfs.c | 138 ++++++++++++++++++++++++----------------
 1 file changed, 83 insertions(+), 55 deletions(-)

diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 00a9947cb7cc..2cad6da2e4aa 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -735,10 +735,41 @@ int __attribute__ ((weak)) pcibios_add_platform_entries(struct pci_dev *dev)
 	return 0;
 }
 
+static int pci_create_capabilities_sysfs(struct pci_dev *dev)
+{
+	int retval;
+	struct bin_attribute *attr;
+
+	/* If the device has VPD, try to expose it in sysfs. */
+	if (dev->vpd) {
+		attr = kzalloc(sizeof(*attr), GFP_ATOMIC);
+		if (!attr)
+			return -ENOMEM;
+
+		attr->size = dev->vpd->len;
+		attr->attr.name = "vpd";
+		attr->attr.mode = S_IRUSR | S_IWUSR;
+		attr->read = pci_read_vpd;
+		attr->write = pci_write_vpd;
+		retval = sysfs_create_bin_file(&dev->dev.kobj, attr);
+		if (retval) {
+			kfree(dev->vpd->attr);
+			return retval;
+		}
+		dev->vpd->attr = attr;
+	}
+
+	/* Active State Power Management */
+	pcie_aspm_create_sysfs_dev_files(dev);
+
+	return 0;
+}
+
 int __must_check pci_create_sysfs_dev_files (struct pci_dev *pdev)
 {
-	struct bin_attribute *attr = NULL;
 	int retval;
+	int rom_size = 0;
+	struct bin_attribute *attr;
 
 	if (!sysfs_initialized)
 		return -EACCES;
@@ -750,69 +781,55 @@ int __must_check pci_create_sysfs_dev_files (struct pci_dev *pdev)
 	if (retval)
 		goto err;
 
-	/* If the device has VPD, try to expose it in sysfs. */
-	if (pdev->vpd) {
-		attr = kzalloc(sizeof(*attr), GFP_ATOMIC);
-		if (attr) {
-			pdev->vpd->attr = attr;
-			attr->size = pdev->vpd->len;
-			attr->attr.name = "vpd";
-			attr->attr.mode = S_IRUSR | S_IWUSR;
-			attr->read = pci_read_vpd;
-			attr->write = pci_write_vpd;
-			retval = sysfs_create_bin_file(&pdev->dev.kobj, attr);
-			if (retval)
-				goto err_vpd;
-		} else {
-			retval = -ENOMEM;
-			goto err_config_file;
-		}
-	}
-
 	retval = pci_create_resource_files(pdev);
 	if (retval)
-		goto err_vpd_file;
+		goto err_config_file;
+
+	if (pci_resource_len(pdev, PCI_ROM_RESOURCE))
+		rom_size = pci_resource_len(pdev, PCI_ROM_RESOURCE);
+	else if (pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW)
+		rom_size = 0x20000;
 
 	/* If the device has a ROM, try to expose it in sysfs. */
-	if (pci_resource_len(pdev, PCI_ROM_RESOURCE) ||
-	    (pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW)) {
+	if (rom_size) {
 		attr = kzalloc(sizeof(*attr), GFP_ATOMIC);
-		if (attr) {
-			pdev->rom_attr = attr;
-			attr->size = pci_resource_len(pdev, PCI_ROM_RESOURCE);
-			attr->attr.name = "rom";
-			attr->attr.mode = S_IRUSR;
-			attr->read = pci_read_rom;
-			attr->write = pci_write_rom;
-			retval = sysfs_create_bin_file(&pdev->dev.kobj, attr);
-			if (retval)
-				goto err_rom;
-		} else {
+		if (!attr) {
 			retval = -ENOMEM;
 			goto err_resource_files;
 		}
+		attr->size = rom_size;
+		attr->attr.name = "rom";
+		attr->attr.mode = S_IRUSR;
+		attr->read = pci_read_rom;
+		attr->write = pci_write_rom;
+		retval = sysfs_create_bin_file(&pdev->dev.kobj, attr);
+		if (retval) {
+			kfree(attr);
+			goto err_resource_files;
+		}
+		pdev->rom_attr = attr;
 	}
+
 	/* add platform-specific attributes */
-	if (pcibios_add_platform_entries(pdev))
+	retval = pcibios_add_platform_entries(pdev);
+	if (retval)
 		goto err_rom_file;
 
-	pcie_aspm_create_sysfs_dev_files(pdev);
+	/* add sysfs entries for various capabilities */
+	retval = pci_create_capabilities_sysfs(pdev);
+	if (retval)
+		goto err_rom_file;
 
 	return 0;
 
 err_rom_file:
-	if (pci_resource_len(pdev, PCI_ROM_RESOURCE))
+	if (rom_size) {
 		sysfs_remove_bin_file(&pdev->dev.kobj, pdev->rom_attr);
-err_rom:
-	kfree(pdev->rom_attr);
+		kfree(pdev->rom_attr);
+		pdev->rom_attr = NULL;
+	}
 err_resource_files:
 	pci_remove_resource_files(pdev);
-err_vpd_file:
-	if (pdev->vpd) {
-		sysfs_remove_bin_file(&pdev->dev.kobj, pdev->vpd->attr);
-err_vpd:
-		kfree(pdev->vpd->attr);
-	}
 err_config_file:
 	if (pdev->cfg_size < PCI_CFG_SPACE_EXP_SIZE)
 		sysfs_remove_bin_file(&pdev->dev.kobj, &pci_config_attr);
@@ -822,6 +839,16 @@ err:
 	return retval;
 }
 
+static void pci_remove_capabilities_sysfs(struct pci_dev *dev)
+{
+	if (dev->vpd && dev->vpd->attr) {
+		sysfs_remove_bin_file(&dev->dev.kobj, dev->vpd->attr);
+		kfree(dev->vpd->attr);
+	}
+
+	pcie_aspm_remove_sysfs_dev_files(dev);
+}
+
 /**
  * pci_remove_sysfs_dev_files - cleanup PCI specific sysfs files
  * @pdev: device whose entries we should free
@@ -830,15 +857,13 @@ err:
  */
 void pci_remove_sysfs_dev_files(struct pci_dev *pdev)
 {
+	int rom_size = 0;
+
 	if (!sysfs_initialized)
 		return;
 
-	pcie_aspm_remove_sysfs_dev_files(pdev);
+	pci_remove_capabilities_sysfs(pdev);
 
-	if (pdev->vpd) {
-		sysfs_remove_bin_file(&pdev->dev.kobj, pdev->vpd->attr);
-		kfree(pdev->vpd->attr);
-	}
 	if (pdev->cfg_size < PCI_CFG_SPACE_EXP_SIZE)
 		sysfs_remove_bin_file(&pdev->dev.kobj, &pci_config_attr);
 	else
@@ -846,11 +871,14 @@ void pci_remove_sysfs_dev_files(struct pci_dev *pdev)
 
 	pci_remove_resource_files(pdev);
 
-	if (pci_resource_len(pdev, PCI_ROM_RESOURCE)) {
-		if (pdev->rom_attr) {
-			sysfs_remove_bin_file(&pdev->dev.kobj, pdev->rom_attr);
-			kfree(pdev->rom_attr);
-		}
+	if (pci_resource_len(pdev, PCI_ROM_RESOURCE))
+		rom_size = pci_resource_len(pdev, PCI_ROM_RESOURCE);
+	else if (pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW)
+		rom_size = 0x20000;
+
+	if (rom_size && pdev->rom_attr) {
+		sysfs_remove_bin_file(&pdev->dev.kobj, pdev->rom_attr);
+		kfree(pdev->rom_attr);
 	}
 }
 
-- 
GitLab


From 201de56eb22f1ff3f36804bc70cbff220b50f067 Mon Sep 17 00:00:00 2001
From: "Zhao, Yu" <yu.zhao@intel.com>
Date: Mon, 13 Oct 2008 19:49:55 +0800
Subject: [PATCH 865/895] PCI: centralize the capabilities code in probe.c

This patch centralizes the initialization and release functions of
various PCI capabilities in probe.c, which makes the introduction
of new capability support functions cleaner in the future.

Signed-off-by: Yu Zhao <yu.zhao@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/probe.c | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 49599ac49bda..8c158b9abd41 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -842,6 +842,11 @@ static int pci_setup_device(struct pci_dev * dev)
 	return 0;
 }
 
+static void pci_release_capabilities(struct pci_dev *dev)
+{
+	pci_vpd_release(dev);
+}
+
 /**
  * pci_release_dev - free a pci device structure when all users of it are finished.
  * @dev: device that's been disconnected
@@ -854,7 +859,7 @@ static void pci_release_dev(struct device *dev)
 	struct pci_dev *pci_dev;
 
 	pci_dev = to_pci_dev(dev);
-	pci_vpd_release(pci_dev);
+	pci_release_capabilities(pci_dev);
 	kfree(pci_dev);
 }
 
@@ -935,8 +940,6 @@ struct pci_dev *alloc_pci_dev(void)
 
 	INIT_LIST_HEAD(&dev->bus_list);
 
-	pci_msi_init_pci_dev(dev);
-
 	return dev;
 }
 EXPORT_SYMBOL(alloc_pci_dev);
@@ -1009,11 +1012,21 @@ static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
 		return NULL;
 	}
 
-	pci_vpd_pci22_init(dev);
-
 	return dev;
 }
 
+static void pci_init_capabilities(struct pci_dev *dev)
+{
+	/* MSI/MSI-X list */
+	pci_msi_init_pci_dev(dev);
+
+	/* Power Management */
+	pci_pm_init(dev);
+
+	/* Vital Product Data */
+	pci_vpd_pci22_init(dev);
+}
+
 void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
 {
 	device_initialize(&dev->dev);
@@ -1030,8 +1043,8 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
 	/* Fix up broken headers */
 	pci_fixup_device(pci_fixup_header, dev);
 
-	/* Initialize power management of the device */
-	pci_pm_init(dev);
+	/* Initialize various capabilities */
+	pci_init_capabilities(dev);
 
 	/*
 	 * Add the device to our list of discovered devices
-- 
GitLab


From 58c3a727cb73b75a9104d295f096cca12959a5a5 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yu.zhao@intel.com>
Date: Tue, 14 Oct 2008 14:02:53 +0800
Subject: [PATCH 866/895] PCI: support PCIe ARI capability

This patch adds support for PCI Express Alternative Routing-ID
Interpretation (ARI) capability.

The ARI capability extends the Function Number field of the PCI Express
Endpoint by reusing the Device Number which is otherwise hardwired to 0.
With ARI, an Endpoint can have up to 256 functions.

Signed-off-by: Yu Zhao <yu.zhao@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci.c        | 32 ++++++++++++++++++++++++++++++++
 drivers/pci/pci.h        | 12 ++++++++++++
 drivers/pci/probe.c      |  3 +++
 include/linux/pci.h      |  1 +
 include/linux/pci_regs.h | 14 ++++++++++++++
 5 files changed, 62 insertions(+)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 553ca6657955..4db261e13e69 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1299,6 +1299,38 @@ void pci_pm_init(struct pci_dev *dev)
 	}
 }
 
+/**
+ * pci_enable_ari - enable ARI forwarding if hardware support it
+ * @dev: the PCI device
+ */
+void pci_enable_ari(struct pci_dev *dev)
+{
+	int pos;
+	u32 cap;
+	u16 ctrl;
+
+	if (!dev->is_pcie)
+		return;
+
+	if (dev->pcie_type != PCI_EXP_TYPE_ROOT_PORT &&
+	    dev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM)
+		return;
+
+	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
+	if (!pos)
+		return;
+
+	pci_read_config_dword(dev, pos + PCI_EXP_DEVCAP2, &cap);
+	if (!(cap & PCI_EXP_DEVCAP2_ARI))
+		return;
+
+	pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl);
+	ctrl |= PCI_EXP_DEVCTL2_ARI;
+	pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl);
+
+	dev->ari_enabled = 1;
+}
+
 int
 pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge)
 {
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 601abdc8dd9f..39684c1415c5 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -151,4 +151,16 @@ struct pci_slot_attribute {
 };
 #define to_pci_slot_attr(s) container_of(s, struct pci_slot_attribute, attr)
 
+extern void pci_enable_ari(struct pci_dev *dev);
+/**
+ * pci_ari_enabled - query ARI forwarding status
+ * @dev: the PCI device
+ *
+ * Returns 1 if ARI forwarding is enabled, or 0 if not enabled;
+ */
+static inline int pci_ari_enabled(struct pci_dev *dev)
+{
+	return dev->ari_enabled;
+}
+
 #endif /* DRIVERS_PCI_H */
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 8c158b9abd41..3141e8deeac4 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1025,6 +1025,9 @@ static void pci_init_capabilities(struct pci_dev *dev)
 
 	/* Vital Product Data */
 	pci_vpd_pci22_init(dev);
+
+	/* Alternative Routing-ID Forwarding */
+	pci_enable_ari(dev);
 }
 
 void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 008005674b60..7e9a1f0715e6 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -214,6 +214,7 @@ struct pci_dev {
 	unsigned int	broken_parity_status:1;	/* Device generates false positive parity */
 	unsigned int 	msi_enabled:1;
 	unsigned int	msix_enabled:1;
+	unsigned int	ari_enabled:1;	/* ARI forwarding */
 	unsigned int	is_managed:1;
 	unsigned int	is_pcie:1;
 	pci_dev_flags_t dev_flags;
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index 450684f7eaac..eb6686b88f9a 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -419,6 +419,10 @@
 #define  PCI_EXP_RTCTL_CRSSVE	0x10	/* CRS Software Visibility Enable */
 #define PCI_EXP_RTCAP		30	/* Root Capabilities */
 #define PCI_EXP_RTSTA		32	/* Root Status */
+#define PCI_EXP_DEVCAP2		36	/* Device Capabilities 2 */
+#define  PCI_EXP_DEVCAP2_ARI	0x20	/* Alternative Routing-ID */
+#define PCI_EXP_DEVCTL2		40	/* Device Control 2 */
+#define  PCI_EXP_DEVCTL2_ARI	0x20	/* Alternative Routing-ID */
 
 /* Extended Capabilities (PCI-X 2.0 and Express) */
 #define PCI_EXT_CAP_ID(header)		(header & 0x0000ffff)
@@ -429,6 +433,7 @@
 #define PCI_EXT_CAP_ID_VC	2
 #define PCI_EXT_CAP_ID_DSN	3
 #define PCI_EXT_CAP_ID_PWR	4
+#define PCI_EXT_CAP_ID_ARI	14
 
 /* Advanced Error Reporting */
 #define PCI_ERR_UNCOR_STATUS	4	/* Uncorrectable Error Status */
@@ -536,5 +541,14 @@
 #define HT_CAPTYPE_GEN3		0xD0	/* Generation 3 hypertransport configuration */
 #define HT_CAPTYPE_PM		0xE0	/* Hypertransport powermanagement configuration */
 
+/* Alternative Routing-ID Interpretation */
+#define PCI_ARI_CAP		0x04	/* ARI Capability Register */
+#define  PCI_ARI_CAP_MFVC	0x0001	/* MFVC Function Groups Capability */
+#define  PCI_ARI_CAP_ACS	0x0002	/* ACS Function Groups Capability */
+#define  PCI_ARI_CAP_NFN(x)	(((x) >> 8) & 0xff) /* Next Function Number */
+#define PCI_ARI_CTRL		0x06	/* ARI Control Register */
+#define  PCI_ARI_CTRL_MFVC	0x0001	/* MFVC Function Groups Enable */
+#define  PCI_ARI_CTRL_ACS	0x0002	/* ACS Function Groups Enable */
+#define  PCI_ARI_CTRL_FG(x)	(((x) >> 4) & 7) /* Function Group */
 
 #endif /* LINUX_PCI_REGS_H */
-- 
GitLab


From f393d9b130423a7a47c751b26df07ceaa5dc76a9 Mon Sep 17 00:00:00 2001
From: Vincent Legoll <vincent.legoll@gmail.com>
Date: Sun, 12 Oct 2008 12:26:12 +0200
Subject: [PATCH 867/895] PCI: probing debug message uniformization

This patch uniformizes PCI probing debug boot messages with dev_printk()
intead of manual printk()

It changes adress range output from [%llx, %llx] to [%#llx-%#llx], like
in pci_request_region().

For example, it goes from the mixed-style:

PCI: 0000:00:1b.0 reg 10 64bit mmio: [f4280000, f4283fff]
pci 0000:00:1b.0: PME# supported from D0 D3hot D3cold

to uniform:

pci 0000:00:1b.0: reg 10 64bit mmio: [0xf4280000-0xf4283fff]
pci 0000:00:1b.0: PME# supported from D0 D3hot D3cold

This patch has been runtime tested, boot log messages diffed, everything
looks OK.

Acked-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Vincent Legoll <vincent.legoll@gmail.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pcie/aspm.c |  6 +++---
 drivers/pci/probe.c     | 25 ++++++++++++-------------
 2 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index 851f5b83cdbc..8f63f4c6b85f 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -528,9 +528,9 @@ static int pcie_aspm_sanity_check(struct pci_dev *pdev)
 		pci_read_config_dword(child_dev, child_pos + PCI_EXP_DEVCAP,
 			&reg32);
 		if (!(reg32 & PCI_EXP_DEVCAP_RBER) && !aspm_force) {
-			printk("Pre-1.1 PCIe device detected, "
-				"disable ASPM for %s. It can be enabled forcedly"
-				" with 'pcie_aspm=force'\n", pci_name(pdev));
+			dev_printk(KERN_INFO, &child_dev->dev, "disabling ASPM"
+				" on pre-1.1 PCIe device.  You can enable it"
+				" with 'pcie_aspm=force'\n");
 			return -EINVAL;
 		}
 	}
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 3141e8deeac4..afb9f1c0bc28 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -302,8 +302,8 @@ static int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
 		} else {
 			res->start = l64;
 			res->end = l64 + sz64;
-			printk(KERN_DEBUG "PCI: %s reg %x 64bit mmio: %pR\n",
-				pci_name(dev), pos, res);
+			dev_printk(KERN_DEBUG, &dev->dev,
+				"reg %x 64bit mmio: %pR\n", pos, res);
 		}
 	} else {
 		sz = pci_size(l, sz, mask);
@@ -313,10 +313,10 @@ static int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
 
 		res->start = l;
 		res->end = l + sz;
-		printk(KERN_DEBUG "PCI: %s reg %x %s: %pR\n",
-		       pci_name(dev), pos,
-		       (res->flags & IORESOURCE_IO) ? "io port":"32bit mmio",
-		       res);
+
+		dev_printk(KERN_DEBUG, &dev->dev, "reg %x %s: %pR\n", pos,
+			(res->flags & IORESOURCE_IO) ? "io port" : "32bit mmio",
+			res);
 	}
 
  out:
@@ -387,8 +387,7 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
 			res->start = base;
 		if (!res->end)
 			res->end = limit + 0xfff;
-		printk(KERN_DEBUG "PCI: bridge %s io port: %pR\n",
-		       pci_name(dev), res);
+		dev_printk(KERN_DEBUG, &dev->dev, "bridge io port: %pR\n", res);
 	}
 
 	res = child->resource[1];
@@ -400,8 +399,8 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
 		res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM;
 		res->start = base;
 		res->end = limit + 0xfffff;
-		printk(KERN_DEBUG "PCI: bridge %s 32bit mmio: %pR\n",
-		       pci_name(dev), res);
+		dev_printk(KERN_DEBUG, &dev->dev, "bridge 32bit mmio: %pR\n",
+			res);
 	}
 
 	res = child->resource[2];
@@ -437,9 +436,9 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
 		res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM | IORESOURCE_PREFETCH;
 		res->start = base;
 		res->end = limit + 0xfffff;
-		printk(KERN_DEBUG "PCI: bridge %s %sbit mmio pref: %pR\n",
-		       pci_name(dev),
-		       (res->flags & PCI_PREF_RANGE_TYPE_64) ? "64":"32", res);
+		dev_printk(KERN_DEBUG, &dev->dev, "bridge %sbit mmio pref: %pR\n",
+			(res->flags & PCI_PREF_RANGE_TYPE_64) ? "64" : "32",
+			res);
 	}
 }
 
-- 
GitLab


From f19aeb1f3638b7bb4ca21eb361f004fac2bfe259 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Fri, 3 Oct 2008 19:49:32 +1000
Subject: [PATCH 868/895] PCI: Add ability to mmap legacy_io on some platforms

This adds the ability to mmap legacy IO space to the legacy_io files
in sysfs on platforms that support it. This will allow to clean up
X to use this instead of /dev/mem for legacy IO accesses such as
those performed by Int10.

While at it I moved pci_create/remove_legacy_files() to pci-sysfs.c
where I think they belong, thus making more things statis in there
and cleaned up some spurrious prototypes in the ia64 pci.h file

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/ia64/include/asm/pci.h | 12 +----
 arch/ia64/pci/pci.c         |  7 ++-
 drivers/pci/pci-sysfs.c     | 93 +++++++++++++++++++++++++++++++++++--
 drivers/pci/pci.h           |  6 +++
 drivers/pci/probe.c         | 66 --------------------------
 5 files changed, 102 insertions(+), 82 deletions(-)

diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h
index 0149097b736d..ce342fb74246 100644
--- a/arch/ia64/include/asm/pci.h
+++ b/arch/ia64/include/asm/pci.h
@@ -95,16 +95,8 @@ extern int pci_mmap_page_range (struct pci_dev *dev, struct vm_area_struct *vma,
 				enum pci_mmap_state mmap_state, int write_combine);
 #define HAVE_PCI_LEGACY
 extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
-				      struct vm_area_struct *vma);
-extern ssize_t pci_read_legacy_io(struct kobject *kobj,
-				  struct bin_attribute *bin_attr,
-				  char *buf, loff_t off, size_t count);
-extern ssize_t pci_write_legacy_io(struct kobject *kobj,
-				   struct bin_attribute *bin_attr,
-				   char *buf, loff_t off, size_t count);
-extern int pci_mmap_legacy_mem(struct kobject *kobj,
-			       struct bin_attribute *attr,
-			       struct vm_area_struct *vma);
+				      struct vm_area_struct *vma,
+				      enum pci_mmap_state mmap_state);
 
 #define pci_get_legacy_mem platform_pci_get_legacy_mem
 #define pci_legacy_read platform_pci_legacy_read
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 7545037a8625..211fcfd115f9 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -614,12 +614,17 @@ char *ia64_pci_get_legacy_mem(struct pci_bus *bus)
  * vector to get the base address.
  */
 int
-pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma)
+pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma,
+			   enum pci_mmap_state mmap_state)
 {
 	unsigned long size = vma->vm_end - vma->vm_start;
 	pgprot_t prot;
 	char *addr;
 
+	/* We only support mmap'ing of legacy memory space */
+	if (mmap_state != pci_mmap_mem)
+		return -ENOSYS;
+
 	/*
 	 * Avoid attribute aliasing.  See Documentation/ia64/aliasing.txt
 	 * for more details.
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 2cad6da2e4aa..110022d78689 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -423,7 +423,7 @@ pci_write_vpd(struct kobject *kobj, struct bin_attribute *bin_attr,
  * Reads 1, 2, or 4 bytes from legacy I/O port space using an arch specific
  * callback routine (pci_legacy_read).
  */
-ssize_t
+static ssize_t
 pci_read_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
 		   char *buf, loff_t off, size_t count)
 {
@@ -448,7 +448,7 @@ pci_read_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
  * Writes 1, 2, or 4 bytes from legacy I/O port space using an arch specific
  * callback routine (pci_legacy_write).
  */
-ssize_t
+static ssize_t
 pci_write_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
 		    char *buf, loff_t off, size_t count)
 {
@@ -468,11 +468,11 @@ pci_write_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
  * @attr: struct bin_attribute for this file
  * @vma: struct vm_area_struct passed to mmap
  *
- * Uses an arch specific callback, pci_mmap_legacy_page_range, to mmap
+ * Uses an arch specific callback, pci_mmap_legacy_mem_page_range, to mmap
  * legacy memory space (first meg of bus space) into application virtual
  * memory space.
  */
-int
+static int
 pci_mmap_legacy_mem(struct kobject *kobj, struct bin_attribute *attr,
                     struct vm_area_struct *vma)
 {
@@ -480,7 +480,90 @@ pci_mmap_legacy_mem(struct kobject *kobj, struct bin_attribute *attr,
                                                       struct device,
 						      kobj));
 
-        return pci_mmap_legacy_page_range(bus, vma);
+        return pci_mmap_legacy_page_range(bus, vma, pci_mmap_mem);
+}
+
+/**
+ * pci_mmap_legacy_io - map legacy PCI IO into user memory space
+ * @kobj: kobject corresponding to device to be mapped
+ * @attr: struct bin_attribute for this file
+ * @vma: struct vm_area_struct passed to mmap
+ *
+ * Uses an arch specific callback, pci_mmap_legacy_io_page_range, to mmap
+ * legacy IO space (first meg of bus space) into application virtual
+ * memory space. Returns -ENOSYS if the operation isn't supported
+ */
+static int
+pci_mmap_legacy_io(struct kobject *kobj, struct bin_attribute *attr,
+		   struct vm_area_struct *vma)
+{
+        struct pci_bus *bus = to_pci_bus(container_of(kobj,
+                                                      struct device,
+						      kobj));
+
+        return pci_mmap_legacy_page_range(bus, vma, pci_mmap_io);
+}
+
+/**
+ * pci_create_legacy_files - create legacy I/O port and memory files
+ * @b: bus to create files under
+ *
+ * Some platforms allow access to legacy I/O port and ISA memory space on
+ * a per-bus basis.  This routine creates the files and ties them into
+ * their associated read, write and mmap files from pci-sysfs.c
+ *
+ * On error unwind, but don't propogate the error to the caller
+ * as it is ok to set up the PCI bus without these files.
+ */
+void pci_create_legacy_files(struct pci_bus *b)
+{
+	int error;
+
+	b->legacy_io = kzalloc(sizeof(struct bin_attribute) * 2,
+			       GFP_ATOMIC);
+	if (!b->legacy_io)
+		goto kzalloc_err;
+
+	b->legacy_io->attr.name = "legacy_io";
+	b->legacy_io->size = 0xffff;
+	b->legacy_io->attr.mode = S_IRUSR | S_IWUSR;
+	b->legacy_io->read = pci_read_legacy_io;
+	b->legacy_io->write = pci_write_legacy_io;
+	b->legacy_io->mmap = pci_mmap_legacy_io;
+	error = device_create_bin_file(&b->dev, b->legacy_io);
+	if (error)
+		goto legacy_io_err;
+
+	/* Allocated above after the legacy_io struct */
+	b->legacy_mem = b->legacy_io + 1;
+	b->legacy_mem->attr.name = "legacy_mem";
+	b->legacy_mem->size = 1024*1024;
+	b->legacy_mem->attr.mode = S_IRUSR | S_IWUSR;
+	b->legacy_mem->mmap = pci_mmap_legacy_mem;
+	error = device_create_bin_file(&b->dev, b->legacy_mem);
+	if (error)
+		goto legacy_mem_err;
+
+	return;
+
+legacy_mem_err:
+	device_remove_bin_file(&b->dev, b->legacy_io);
+legacy_io_err:
+	kfree(b->legacy_io);
+	b->legacy_io = NULL;
+kzalloc_err:
+	printk(KERN_WARNING "pci: warning: could not create legacy I/O port "
+	       "and ISA memory resources to sysfs\n");
+	return;
+}
+
+void pci_remove_legacy_files(struct pci_bus *b)
+{
+	if (b->legacy_io) {
+		device_remove_bin_file(&b->dev, b->legacy_io);
+		device_remove_bin_file(&b->dev, b->legacy_mem);
+		kfree(b->legacy_io); /* both are allocated here */
+	}
 }
 #endif /* HAVE_PCI_LEGACY */
 
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 39684c1415c5..b205ab866a1d 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -82,7 +82,13 @@ static inline int pci_proc_detach_bus(struct pci_bus *bus) { return 0; }
 /* Functions for PCI Hotplug drivers to use */
 extern unsigned int pci_do_scan_bus(struct pci_bus *bus);
 
+#ifdef HAVE_PCI_LEGACY
+extern void pci_create_legacy_files(struct pci_bus *bus);
 extern void pci_remove_legacy_files(struct pci_bus *bus);
+#else
+static inline void pci_create_legacy_files(struct pci_bus *bus) { return; }
+static inline void pci_remove_legacy_files(struct pci_bus *bus) { return; }
+#endif
 
 /* Lock for read/write access to pci device and bus lists */
 extern struct rw_semaphore pci_bus_sem;
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index afb9f1c0bc28..aaaf0a1fed22 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -42,72 +42,6 @@ int no_pci_devices(void)
 }
 EXPORT_SYMBOL(no_pci_devices);
 
-#ifdef HAVE_PCI_LEGACY
-/**
- * pci_create_legacy_files - create legacy I/O port and memory files
- * @b: bus to create files under
- *
- * Some platforms allow access to legacy I/O port and ISA memory space on
- * a per-bus basis.  This routine creates the files and ties them into
- * their associated read, write and mmap files from pci-sysfs.c
- *
- * On error unwind, but don't propogate the error to the caller
- * as it is ok to set up the PCI bus without these files.
- */
-static void pci_create_legacy_files(struct pci_bus *b)
-{
-	int error;
-
-	b->legacy_io = kzalloc(sizeof(struct bin_attribute) * 2,
-			       GFP_ATOMIC);
-	if (!b->legacy_io)
-		goto kzalloc_err;
-
-	b->legacy_io->attr.name = "legacy_io";
-	b->legacy_io->size = 0xffff;
-	b->legacy_io->attr.mode = S_IRUSR | S_IWUSR;
-	b->legacy_io->read = pci_read_legacy_io;
-	b->legacy_io->write = pci_write_legacy_io;
-	error = device_create_bin_file(&b->dev, b->legacy_io);
-	if (error)
-		goto legacy_io_err;
-
-	/* Allocated above after the legacy_io struct */
-	b->legacy_mem = b->legacy_io + 1;
-	b->legacy_mem->attr.name = "legacy_mem";
-	b->legacy_mem->size = 1024*1024;
-	b->legacy_mem->attr.mode = S_IRUSR | S_IWUSR;
-	b->legacy_mem->mmap = pci_mmap_legacy_mem;
-	error = device_create_bin_file(&b->dev, b->legacy_mem);
-	if (error)
-		goto legacy_mem_err;
-
-	return;
-
-legacy_mem_err:
-	device_remove_bin_file(&b->dev, b->legacy_io);
-legacy_io_err:
-	kfree(b->legacy_io);
-	b->legacy_io = NULL;
-kzalloc_err:
-	printk(KERN_WARNING "pci: warning: could not create legacy I/O port "
-	       "and ISA memory resources to sysfs\n");
-	return;
-}
-
-void pci_remove_legacy_files(struct pci_bus *b)
-{
-	if (b->legacy_io) {
-		device_remove_bin_file(&b->dev, b->legacy_io);
-		device_remove_bin_file(&b->dev, b->legacy_mem);
-		kfree(b->legacy_io); /* both are allocated here */
-	}
-}
-#else /* !HAVE_PCI_LEGACY */
-static inline void pci_create_legacy_files(struct pci_bus *bus) { return; }
-void pci_remove_legacy_files(struct pci_bus *bus) { return; }
-#endif /* HAVE_PCI_LEGACY */
-
 /*
  * PCI Bus Class Devices
  */
-- 
GitLab


From e9f82cb75096ae30658a72d473bf170bf4d3bb2e Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 14 Oct 2008 11:55:31 +1100
Subject: [PATCH 869/895] powerpc/PCI: Add legacy PCI access via sysfs

This patch adds support for legacy_io and legacy_mem files in
bus class directories in sysfs for powerpc

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/powerpc/include/asm/pci-bridge.h |   7 ++
 arch/powerpc/include/asm/pci.h        |  11 +++
 arch/powerpc/kernel/pci-common.c      | 136 +++++++++++++++++++++++++-
 3 files changed, 153 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index ae2ea803a0f2..9047af7baa69 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -74,6 +74,13 @@ struct pci_controller {
 	unsigned long pci_io_size;
 #endif
 
+	/* Some machines have a special region to forward the ISA
+	 * "memory" cycles such as VGA memory regions. Left to 0
+	 * if unsupported
+	 */
+	resource_size_t	isa_mem_phys;
+	resource_size_t	isa_mem_size;
+
 	struct pci_ops *ops;
 	unsigned int __iomem *cfg_addr;
 	void __iomem *cfg_data;
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 0e52c7828ea4..39d547fde956 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -123,6 +123,16 @@ int pci_mmap_page_range(struct pci_dev *pdev, struct vm_area_struct *vma,
 /* Tell drivers/pci/proc.c that we have pci_mmap_page_range() */
 #define HAVE_PCI_MMAP	1
 
+extern int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val,
+			   size_t count);
+extern int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val,
+			   size_t count);
+extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
+				      struct vm_area_struct *vma,
+				      enum pci_mmap_state mmap_state);
+
+#define HAVE_PCI_LEGACY	1
+
 #if defined(CONFIG_PPC64) || defined(CONFIG_NOT_COHERENT_CACHE)
 /*
  * For 64-bit kernels, pci_unmap_{single,page} is not a nop.
@@ -226,5 +236,6 @@ extern void pci_resource_to_user(const struct pci_dev *dev, int bar,
 extern void pcibios_do_bus_setup(struct pci_bus *bus);
 extern void pcibios_fixup_of_probed_bus(struct pci_bus *bus);
 
+
 #endif	/* __KERNEL__ */
 #endif /* __ASM_POWERPC_PCI_H */
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 01ce8c38bae6..3815d84a1ef4 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -451,7 +451,8 @@ pgprot_t pci_phys_mem_access_prot(struct file *file,
 		pci_dev_put(pdev);
 	}
 
-	DBG("non-PCI map for %lx, prot: %lx\n", offset, prot);
+	DBG("non-PCI map for %llx, prot: %lx\n",
+	    (unsigned long long)offset, prot);
 
 	return __pgprot(prot);
 }
@@ -490,6 +491,131 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 	return ret;
 }
 
+/* This provides legacy IO read access on a bus */
+int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, size_t size)
+{
+	unsigned long offset;
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	struct resource *rp = &hose->io_resource;
+	void __iomem *addr;
+
+	/* Check if port can be supported by that bus. We only check
+	 * the ranges of the PHB though, not the bus itself as the rules
+	 * for forwarding legacy cycles down bridges are not our problem
+	 * here. So if the host bridge supports it, we do it.
+	 */
+	offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+	offset += port;
+
+	if (!(rp->flags & IORESOURCE_IO))
+		return -ENXIO;
+	if (offset < rp->start || (offset + size) > rp->end)
+		return -ENXIO;
+	addr = hose->io_base_virt + port;
+
+	switch(size) {
+	case 1:
+		*((u8 *)val) = in_8(addr);
+		return 1;
+	case 2:
+		if (port & 1)
+			return -EINVAL;
+		*((u16 *)val) = in_le16(addr);
+		return 2;
+	case 4:
+		if (port & 3)
+			return -EINVAL;
+		*((u32 *)val) = in_le32(addr);
+		return 4;
+	}
+	return -EINVAL;
+}
+
+/* This provides legacy IO write access on a bus */
+int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val, size_t size)
+{
+	unsigned long offset;
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	struct resource *rp = &hose->io_resource;
+	void __iomem *addr;
+
+	/* Check if port can be supported by that bus. We only check
+	 * the ranges of the PHB though, not the bus itself as the rules
+	 * for forwarding legacy cycles down bridges are not our problem
+	 * here. So if the host bridge supports it, we do it.
+	 */
+	offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+	offset += port;
+
+	if (!(rp->flags & IORESOURCE_IO))
+		return -ENXIO;
+	if (offset < rp->start || (offset + size) > rp->end)
+		return -ENXIO;
+	addr = hose->io_base_virt + port;
+
+	/* WARNING: The generic code is idiotic. It gets passed a pointer
+	 * to what can be a 1, 2 or 4 byte quantity and always reads that
+	 * as a u32, which means that we have to correct the location of
+	 * the data read within those 32 bits for size 1 and 2
+	 */
+	switch(size) {
+	case 1:
+		out_8(addr, val >> 24);
+		return 1;
+	case 2:
+		if (port & 1)
+			return -EINVAL;
+		out_le16(addr, val >> 16);
+		return 2;
+	case 4:
+		if (port & 3)
+			return -EINVAL;
+		out_le32(addr, val);
+		return 4;
+	}
+	return -EINVAL;
+}
+
+/* This provides legacy IO or memory mmap access on a bus */
+int pci_mmap_legacy_page_range(struct pci_bus *bus,
+			       struct vm_area_struct *vma,
+			       enum pci_mmap_state mmap_state)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	resource_size_t offset =
+		((resource_size_t)vma->vm_pgoff) << PAGE_SHIFT;
+	resource_size_t size = vma->vm_end - vma->vm_start;
+	struct resource *rp;
+
+	pr_debug("pci_mmap_legacy_page_range(%04x:%02x, %s @%llx..%llx)\n",
+		 pci_domain_nr(bus), bus->number,
+		 mmap_state == pci_mmap_mem ? "MEM" : "IO",
+		 (unsigned long long)offset,
+		 (unsigned long long)(offset + size - 1));
+
+	if (mmap_state == pci_mmap_mem) {
+		if ((offset + size) > hose->isa_mem_size)
+			return -ENXIO;
+		offset += hose->isa_mem_phys;
+	} else {
+		unsigned long io_offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+		unsigned long roffset = offset + io_offset;
+		rp = &hose->io_resource;
+		if (!(rp->flags & IORESOURCE_IO))
+			return -ENXIO;
+		if (roffset < rp->start || (roffset + size) > rp->end)
+			return -ENXIO;
+		offset += hose->io_base_phys;
+	}
+	pr_debug(" -> mapping phys %llx\n", (unsigned long long)offset);
+
+	vma->vm_pgoff = offset >> PAGE_SHIFT;
+	vma->vm_page_prot |= _PAGE_NO_CACHE | _PAGE_GUARDED;
+	return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+			       vma->vm_end - vma->vm_start,
+			       vma->vm_page_prot);
+}
+
 void pci_resource_to_user(const struct pci_dev *dev, int bar,
 			  const struct resource *rsrc,
 			  resource_size_t *start, resource_size_t *end)
@@ -592,6 +718,12 @@ void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose,
 		cpu_addr = of_translate_address(dev, ranges + 3);
 		size = of_read_number(ranges + pna + 3, 2);
 		ranges += np;
+
+		/* If we failed translation or got a zero-sized region
+		 * (some FW try to feed us with non sensical zero sized regions
+		 * such as power3 which look like some kind of attempt at exposing
+		 * the VGA memory hole)
+		 */
 		if (cpu_addr == OF_BAD_ADDR || size == 0)
 			continue;
 
@@ -665,6 +797,8 @@ void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose,
 				isa_hole = memno;
 				if (primary || isa_mem_base == 0)
 					isa_mem_base = cpu_addr;
+				hose->isa_mem_phys = cpu_addr;
+				hose->isa_mem_size = size;
 			}
 
 			/* We get the PCI/Mem offset from the first range or
-- 
GitLab


From aa42d7c6138afdc54f74e971456a0fbfec16b77b Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Sun, 28 Sep 2008 16:36:11 -0700
Subject: [PATCH 870/895] PCI: introduce an pci_ioremap(pdev, barnr) function

A common thing in many PCI drivers is to ioremap() an entire bar.  This
is a slightly fragile thing right now, needing both an address and a
size, and many driver writers do.. various things there.

This patch introduces an pci_ioremap() function taking just a PCI device
struct and the bar number as arguments, and figures this all out itself,
in one place.  In addition, we can add various sanity checks to this
function (the patch already checks to make sure that the bar in question
really is a MEM bar; few to no drivers do that sort of thing).

Hopefully with this type of API we get less chance of mistakes in
drivers with ioremap() operations.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 7e9a1f0715e6..46ad282ffe4d 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1119,5 +1119,18 @@ static inline void pci_mmcfg_early_init(void) { }
 static inline void pci_mmcfg_late_init(void) { }
 #endif
 
+static inline void * pci_ioremap_bar(struct pci_dev *pdev, int bar)
+{
+	/*
+	 * Make sure the BAR is actually a memory resource, not an IO resource
+	 */
+	if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) {
+		WARN_ON(1);
+		return NULL;
+	}
+	return ioremap_nocache(pci_resource_start(pdev, bar),
+				     pci_resource_len(pdev, bar));
+}
+
 #endif /* __KERNEL__ */
 #endif /* LINUX_PCI_H */
-- 
GitLab


From e5665a45fa28d0114f61b5d534a3b2678592219d Mon Sep 17 00:00:00 2001
From: Chuck Ebbert <cebbert@redhat.com>
Date: Wed, 24 Sep 2008 20:40:34 -0400
Subject: [PATCH 871/895] PCI: document the pcie_aspm kernel parameter

It can be handy so make sure people know about it.

Signed-off-by: Chuck Ebbert <cebbert@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 Documentation/kernel-parameters.txt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 08fbc8372ba4..53ba7c7d82b3 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1678,6 +1678,12 @@ and is between 256 and 4096 characters. It is defined in the file
 				reserved for the CardBus bridge's memory
 				window. The default value is 64 megabytes.
 
+	pcie_aspm=	[PCIE] Forcibly enable or disable PCIe Active State Power
+			Management.
+		off	Disable ASPM.
+		force	Enable ASPM even on devices that claim not to support it.
+			WARNING: Forcing ASPM on may cause system lockups.
+
 	pcmv=		[HW,PCMCIA] BadgePAD 4
 
 	pd.		[PARIDE]
-- 
GitLab


From bd1d9855be3ab8a5c2b31053d464b7fe63e6963b Mon Sep 17 00:00:00 2001
From: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Date: Mon, 29 Sep 2008 17:37:05 +0900
Subject: [PATCH 872/895] PCI hotplug: fix get_##name return value problem

The commit 356a9d6f3dd283f83861adf1ac909879f0e66411 (PCI: fix hotplug
get_##name return value problem) doesn't seem to be merged properly.
Because of this, PCI hotplug no longer works (Read/Write PCI hotplug
files always returns -ENODEV).

This patch fixes wrong check of try_module_get() return value check in
get_##name().

Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/hotplug/pci_hotplug_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c
index 27d2b6fe5d53..2e6c4474644e 100644
--- a/drivers/pci/hotplug/pci_hotplug_core.c
+++ b/drivers/pci/hotplug/pci_hotplug_core.c
@@ -102,7 +102,7 @@ static int get_##name (struct hotplug_slot *slot, type *value)		\
 {									\
 	struct hotplug_slot_ops *ops = slot->ops;			\
 	int retval = 0;							\
-	if (try_module_get(ops->owner))					\
+	if (!try_module_get(ops->owner))				\
 		return -ENODEV;						\
 	if (ops->get_##name)						\
 		retval = ops->get_##name(slot, value);			\
-- 
GitLab


From 1543c90c39360df333a21bfbbdfe812ae23b8167 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Sat, 18 Oct 2008 17:19:38 -0700
Subject: [PATCH 873/895] PCI: remove #ifdef DEBUG around dev_dbg call

No longer needed since we don't use the function symbol stuff anymore.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/quirks.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 9575fc8f87b3..bbf66ea8fd87 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -1934,9 +1934,7 @@ static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, struct pci_f
 	while (f < end) {
 		if ((f->vendor == dev->vendor || f->vendor == (u16) PCI_ANY_ID) &&
  		    (f->device == dev->device || f->device == (u16) PCI_ANY_ID)) {
-#ifdef DEBUG
 			dev_dbg(&dev->dev, "calling %pF\n", f->hook);
-#endif
 			f->hook(dev);
 		}
 		f++;
-- 
GitLab


From 0927678f55c9a50c296f7e6dae85e87b8236e155 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Sat, 18 Oct 2008 17:33:19 -0700
Subject: [PATCH 874/895] PCI: use pci_find_ext_capability everywhere

Remove some open coded (and buggy) versions of pci_find_ext_capability
in favor of the real routine in the PCI core.

Tested-by: Tomasz Czernecki <czernecki@gmail.com>
Acked-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Reviewed-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pcie/aer/aerdrv.c      |  6 ++--
 drivers/pci/pcie/aer/aerdrv_core.c | 47 +++++++-----------------------
 drivers/pci/pcie/portdrv_core.c    | 23 ++++-----------
 drivers/scsi/qla2xxx/qla_os.c      |  5 ++--
 include/linux/aer.h                |  4 ---
 5 files changed, 20 insertions(+), 65 deletions(-)

diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c
index 77036f46acfe..e390707661dd 100644
--- a/drivers/pci/pcie/aer/aerdrv.c
+++ b/drivers/pci/pcie/aer/aerdrv.c
@@ -105,7 +105,7 @@ static irqreturn_t aer_irq(int irq, void *context)
 	unsigned long flags;
 	int pos;
 
-	pos = pci_find_aer_capability(pdev->port);
+	pos = pci_find_ext_capability(pdev->port, PCI_EXT_CAP_ID_ERR);
 	/*
 	 * Must lock access to Root Error Status Reg, Root Error ID Reg,
 	 * and Root error producer/consumer index
@@ -252,7 +252,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
 	u32 status;
 	int pos;
 
-	pos = pci_find_aer_capability(dev);
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
 
 	/* Disable Root's interrupt in response to error messages */
 	pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, 0);
@@ -316,7 +316,7 @@ static void aer_error_resume(struct pci_dev *dev)
 	pci_write_config_word(dev, pos + PCI_EXP_DEVSTA, reg16);
 
 	/* Clean AER Root Error Status */
-	pos = pci_find_aer_capability(dev);
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
 	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
 	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask);
 	if (dev->error_state == pci_channel_io_normal)
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index ee5e7b5176d0..1ff21f6045d6 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -28,36 +28,6 @@
 static int forceload;
 module_param(forceload, bool, 0);
 
-#define PCI_CFG_SPACE_SIZE	(0x100)
-int pci_find_aer_capability(struct pci_dev *dev)
-{
-	int pos;
-	u32 reg32 = 0;
-
-	/* Check if it's a pci-express device */
-	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
-	if (!pos)
-		return 0;
-
-	/* Check if it supports pci-express AER */
-	pos = PCI_CFG_SPACE_SIZE;
-	while (pos) {
-		if (pci_read_config_dword(dev, pos, &reg32))
-			return 0;
-
-		/* some broken boards return ~0 */
-		if (reg32 == 0xffffffff)
-			return 0;
-
-		if (PCI_EXT_CAP_ID(reg32) == PCI_EXT_CAP_ID_ERR)
-			break;
-
-		pos = reg32 >> 20;
-	}
-
-	return pos;
-}
-
 int pci_enable_pcie_error_reporting(struct pci_dev *dev)
 {
 	u16 reg16 = 0;
@@ -67,6 +37,10 @@ int pci_enable_pcie_error_reporting(struct pci_dev *dev)
 	if (!pos)
 		return -EIO;
 
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
+	if (!pos)
+		return -EIO;
+
 	pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, &reg16);
 	reg16 = reg16 |
 		PCI_EXP_DEVCTL_CERE |
@@ -102,7 +76,7 @@ int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
 	int pos;
 	u32 status, mask;
 
-	pos = pci_find_aer_capability(dev);
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
 	if (!pos)
 		return -EIO;
 
@@ -123,7 +97,7 @@ int pci_cleanup_aer_correct_error_status(struct pci_dev *dev)
 	int pos;
 	u32 status;
 
-	pos = pci_find_aer_capability(dev);
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
 	if (!pos)
 		return -EIO;
 
@@ -502,7 +476,7 @@ static void handle_error_source(struct pcie_device * aerdev,
 		 * Correctable error does not need software intevention.
 		 * No need to go through error recovery process.
 		 */
-		pos = pci_find_aer_capability(dev);
+		pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
 		if (pos)
 			pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS,
 					info.status);
@@ -542,7 +516,7 @@ void aer_enable_rootport(struct aer_rpc *rpc)
 	reg16 &= ~(SYSTEM_ERROR_INTR_ON_MESG_MASK);
 	pci_write_config_word(pdev, pos + PCI_EXP_RTCTL, reg16);
 
-	aer_pos = pci_find_aer_capability(pdev);
+	aer_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
 	/* Clear error status */
 	pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, &reg32);
 	pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, reg32);
@@ -579,7 +553,7 @@ static void disable_root_aer(struct aer_rpc *rpc)
 	u32 reg32;
 	int pos;
 
-	pos = pci_find_aer_capability(pdev);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
 	/* Disable Root's interrupt in response to error messages */
 	pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, 0);
 
@@ -618,7 +592,7 @@ static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
 {
 	int pos;
 
-	pos = pci_find_aer_capability(dev);
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
 
 	/* The device might not support AER */
 	if (!pos)
@@ -755,7 +729,6 @@ int aer_init(struct pcie_device *dev)
 	return AER_SUCCESS;
 }
 
-EXPORT_SYMBOL_GPL(pci_find_aer_capability);
 EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting);
 EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting);
 EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status);
diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c
index 890f0d2b370a..2e091e014829 100644
--- a/drivers/pci/pcie/portdrv_core.c
+++ b/drivers/pci/pcie/portdrv_core.c
@@ -195,24 +195,11 @@ static int get_port_device_capability(struct pci_dev *dev)
 	/* PME Capable - root port capability */
 	if (((reg16 >> 4) & PORT_TYPE_MASK) == PCIE_RC_PORT)
 		services |= PCIE_PORT_SERVICE_PME;
-	
-	pos = PCI_CFG_SPACE_SIZE;
-	while (pos) {
-		pci_read_config_dword(dev, pos, &reg32);
-		switch (reg32 & 0xffff) {
-		case PCI_EXT_CAP_ID_ERR:
-			services |= PCIE_PORT_SERVICE_AER;
-			pos = reg32 >> 20;
-			break;
-		case PCI_EXT_CAP_ID_VC:
-			services |= PCIE_PORT_SERVICE_VC;
-			pos = reg32 >> 20;
-			break;
-		default:
-			pos = 0;
-			break;
-		}
-	}
+
+	if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR))
+		services |= PCIE_PORT_SERVICE_AER;
+	if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_VC))
+		services |= PCIE_PORT_SERVICE_VC;
 
 	return services;
 }
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 2aed4721c0d0..21dd182ad512 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1566,9 +1566,8 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 			goto probe_out;
 	}
 
-	if (pci_find_aer_capability(pdev))
-		if (pci_enable_pcie_error_reporting(pdev))
-			goto probe_out;
+	/* This may fail but that's ok */
+	pci_enable_pcie_error_reporting(pdev);
 
 	host = scsi_host_alloc(sht, sizeof(scsi_qla_host_t));
 	if (host == NULL) {
diff --git a/include/linux/aer.h b/include/linux/aer.h
index f2518141de88..a2383a72356a 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -18,10 +18,6 @@ static inline int pci_enable_pcie_error_reporting(struct pci_dev *dev)
 {
 	return -EINVAL;
 }
-static inline int pci_find_aer_capability(struct pci_dev *dev)
-{
-	return 0;
-}
 static inline int pci_disable_pcie_error_reporting(struct pci_dev *dev)
 {
 	return -EINVAL;
-- 
GitLab


From 270c66be9b4a6f2be53ef3aec5dc8e7b07782ec9 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yu.zhao@intel.com>
Date: Sun, 19 Oct 2008 20:35:20 +0800
Subject: [PATCH 875/895] PCI: fix AER capability check

The 'use pci_find_ext_capability everywhere' cleanup brought a new bug,
which makes the AER stop working.  Fix it by actually using find_ext_cap
instead of just find_cap.  Drop the unused config space size define while
we're at it.

Signed-off-by: Yu Zhao <yu.zhao@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 Documentation/PCI/pcieaer-howto.txt | 11 +++--------
 drivers/pci/pcie/aer/aerdrv_core.c  |  4 ++--
 drivers/pci/pcie/portdrv.h          |  1 -
 include/linux/aer.h                 |  1 -
 4 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/Documentation/PCI/pcieaer-howto.txt b/Documentation/PCI/pcieaer-howto.txt
index 16c251230c82..ddeb14beacc8 100644
--- a/Documentation/PCI/pcieaer-howto.txt
+++ b/Documentation/PCI/pcieaer-howto.txt
@@ -203,22 +203,17 @@ to mmio_enabled.
 
 3.3 helper functions
 
-3.3.1 int pci_find_aer_capability(struct pci_dev *dev);
-pci_find_aer_capability locates the PCI Express AER capability
-in the device configuration space. If the device doesn't support
-PCI-Express AER, the function returns 0.
-
-3.3.2 int pci_enable_pcie_error_reporting(struct pci_dev *dev);
+3.3.1 int pci_enable_pcie_error_reporting(struct pci_dev *dev);
 pci_enable_pcie_error_reporting enables the device to send error
 messages to root port when an error is detected. Note that devices
 don't enable the error reporting by default, so device drivers need
 call this function to enable it.
 
-3.3.3 int pci_disable_pcie_error_reporting(struct pci_dev *dev);
+3.3.2 int pci_disable_pcie_error_reporting(struct pci_dev *dev);
 pci_disable_pcie_error_reporting disables the device to send error
 messages to root port when an error is detected.
 
-3.3.4 int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);
+3.3.3 int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);
 pci_cleanup_aer_uncorrect_error_status cleanups the uncorrectable
 error status register.
 
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index 1ff21f6045d6..dfc63d01f20a 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -33,11 +33,11 @@ int pci_enable_pcie_error_reporting(struct pci_dev *dev)
 	u16 reg16 = 0;
 	int pos;
 
-	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
 	if (!pos)
 		return -EIO;
 
-	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
+	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
 	if (!pos)
 		return -EIO;
 
diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h
index 3656e0349dd1..2529f3f2ea5a 100644
--- a/drivers/pci/pcie/portdrv.h
+++ b/drivers/pci/pcie/portdrv.h
@@ -25,7 +25,6 @@
 #define PCIE_CAPABILITIES_REG		0x2
 #define PCIE_SLOT_CAPABILITIES_REG	0x14
 #define PCIE_PORT_DEVICE_MAXSERVICES	4
-#define PCI_CFG_SPACE_SIZE		256
 
 #define get_descriptor_id(type, service) (((type - 4) << 4) | service)
 
diff --git a/include/linux/aer.h b/include/linux/aer.h
index a2383a72356a..f7df1eefc107 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -10,7 +10,6 @@
 #if defined(CONFIG_PCIEAER)
 /* pci-e port driver needs this function to enable aer */
 extern int pci_enable_pcie_error_reporting(struct pci_dev *dev);
-extern int pci_find_aer_capability(struct pci_dev *dev);
 extern int pci_disable_pcie_error_reporting(struct pci_dev *dev);
 extern int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);
 #else
-- 
GitLab


From db7a6d8d01b21829d28638258cbbc9553210bac1 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 20 Oct 2008 11:24:31 -0700
Subject: [PATCH 876/895] Update .gitignore files for generated targets

The generated 'capflags.c' file wasn't properly ignored, and the list of
files in scripts/basic/ wasn't up-to-date.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/cpu/.gitignore | 1 +
 scripts/basic/.gitignore       | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kernel/cpu/.gitignore

diff --git a/arch/x86/kernel/cpu/.gitignore b/arch/x86/kernel/cpu/.gitignore
new file mode 100644
index 000000000000..667df55a4399
--- /dev/null
+++ b/arch/x86/kernel/cpu/.gitignore
@@ -0,0 +1 @@
+capflags.c
diff --git a/scripts/basic/.gitignore b/scripts/basic/.gitignore
index 7304e19782c7..bf8b199ec598 100644
--- a/scripts/basic/.gitignore
+++ b/scripts/basic/.gitignore
@@ -1,3 +1,3 @@
+hash
 fixdep
-split-include
 docproc
-- 
GitLab


From 96499871f45b9126157b1a5c512d6e30f1635225 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 20 Oct 2008 19:45:43 +0200
Subject: [PATCH 877/895] PCI: fix pci_ioremap_bar() on s390

s390 doesn't have ioremap_*, so protect the definition of the new
pci_ioremap_bar function with CONFIG_HAS_IOMEM to avoid build breakage.

Acked-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 46ad282ffe4d..085187be29c7 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1119,6 +1119,7 @@ static inline void pci_mmcfg_early_init(void) { }
 static inline void pci_mmcfg_late_init(void) { }
 #endif
 
+#ifdef CONFIG_HAS_IOMEM
 static inline void * pci_ioremap_bar(struct pci_dev *pdev, int bar)
 {
 	/*
@@ -1131,6 +1132,7 @@ static inline void * pci_ioremap_bar(struct pci_dev *pdev, int bar)
 	return ioremap_nocache(pci_resource_start(pdev, bar),
 				     pci_resource_len(pdev, bar));
 }
+#endif
 
 #endif /* __KERNEL__ */
 #endif /* LINUX_PCI_H */
-- 
GitLab


From 0d468300dc97d6aec084799ffe39253ac366f1e4 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 20 Oct 2008 11:32:09 -0700
Subject: [PATCH 878/895] Remove empty imacfb.c file

Commit 7c08c9ae0c145807c0dae4a55f240fa3d4fd5262 ("efifb/imacfb
consolidation + hardware support") claimed to remove imacfb entirely and
merge its DMI table into the efifb driver.  So far so good, but the diff
actually ended up just generating an empty file instead of removing it.

[ Technical reason: the patch header looked like

  diff -puN drivers/video/imacfb.c~efifb-imacfb-consolidation-hardware-support drivers/video/imacfb.c
  --- a/drivers/video/imacfb.c~efifb-imacfb-consolidation-hardware-support
  +++ a/drivers/video/imacfb.c
  @@ -1,376 +0,0 @@

  which git will think is a truncation, not a delete.  Git wants to see a
  target of /dev/null to consider it a delete. ]

So remove it properly.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/imacfb.c | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 drivers/video/imacfb.c

diff --git a/drivers/video/imacfb.c b/drivers/video/imacfb.c
deleted file mode 100644
index e69de29bb2d1..000000000000
-- 
GitLab


From 6da0b38f4433fb0f24615449d7966471b6e5eae0 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 20 Oct 2008 22:28:45 +0400
Subject: [PATCH 879/895] fs/Kconfig: move ext2, ext3, ext4, JBD, JBD2 out

Use fs/*/Kconfig more, which is good because everything related to one
filesystem is in one place and fs/Kconfig is quite fat.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/Kconfig      | 272 +-----------------------------------------------
 fs/ext2/Kconfig |  55 ++++++++++
 fs/ext3/Kconfig |  67 ++++++++++++
 fs/ext4/Kconfig |  79 ++++++++++++++
 fs/jbd/Kconfig  |  30 ++++++
 fs/jbd2/Kconfig |  33 ++++++
 6 files changed, 269 insertions(+), 267 deletions(-)
 create mode 100644 fs/ext2/Kconfig
 create mode 100644 fs/ext3/Kconfig
 create mode 100644 fs/ext4/Kconfig
 create mode 100644 fs/jbd/Kconfig
 create mode 100644 fs/jbd2/Kconfig

diff --git a/fs/Kconfig b/fs/Kconfig
index 4eca61c201f0..e282002b94d2 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -6,61 +6,9 @@ menu "File systems"
 
 if BLOCK
 
-config EXT2_FS
-	tristate "Second extended fs support"
-	help
-	  Ext2 is a standard Linux file system for hard disks.
-
-	  To compile this file system support as a module, choose M here: the
-	  module will be called ext2.
-
-	  If unsure, say Y.
-
-config EXT2_FS_XATTR
-	bool "Ext2 extended attributes"
-	depends on EXT2_FS
-	help
-	  Extended attributes are name:value pairs associated with inodes by
-	  the kernel or by users (see the attr(5) manual page, or visit
-	  <http://acl.bestbits.at/> for details).
-
-	  If unsure, say N.
-
-config EXT2_FS_POSIX_ACL
-	bool "Ext2 POSIX Access Control Lists"
-	depends on EXT2_FS_XATTR
-	select FS_POSIX_ACL
-	help
-	  Posix Access Control Lists (ACLs) support permissions for users and
-	  groups beyond the owner/group/world scheme.
-
-	  To learn more about Access Control Lists, visit the Posix ACLs for
-	  Linux website <http://acl.bestbits.at/>.
-
-	  If you don't know what Access Control Lists are, say N
-
-config EXT2_FS_SECURITY
-	bool "Ext2 Security Labels"
-	depends on EXT2_FS_XATTR
-	help
-	  Security labels support alternative access control models
-	  implemented by security modules like SELinux.  This option
-	  enables an extended attribute handler for file security
-	  labels in the ext2 filesystem.
-
-	  If you are not using a security module that requires using
-	  extended attributes for file security labels, say N.
-
-config EXT2_FS_XIP
-	bool "Ext2 execute in place support"
-	depends on EXT2_FS && MMU
-	help
-	  Execute in place can be used on memory-backed block devices. If you
-	  enable this option, you can select to mount block devices which are
-	  capable of this feature without using the page cache.
-
-	  If you do not use a block device that is capable of using this,
-	  or if unsure, say N.
+source "fs/ext2/Kconfig"
+source "fs/ext3/Kconfig"
+source "fs/ext4/Kconfig"
 
 config FS_XIP
 # execute in place
@@ -68,218 +16,8 @@ config FS_XIP
 	depends on EXT2_FS_XIP
 	default y
 
-config EXT3_FS
-	tristate "Ext3 journalling file system support"
-	select JBD
-	help
-	  This is the journalling version of the Second extended file system
-	  (often called ext3), the de facto standard Linux file system
-	  (method to organize files on a storage device) for hard disks.
-
-	  The journalling code included in this driver means you do not have
-	  to run e2fsck (file system checker) on your file systems after a
-	  crash.  The journal keeps track of any changes that were being made
-	  at the time the system crashed, and can ensure that your file system
-	  is consistent without the need for a lengthy check.
-
-	  Other than adding the journal to the file system, the on-disk format
-	  of ext3 is identical to ext2.  It is possible to freely switch
-	  between using the ext3 driver and the ext2 driver, as long as the
-	  file system has been cleanly unmounted, or e2fsck is run on the file
-	  system.
-
-	  To add a journal on an existing ext2 file system or change the
-	  behavior of ext3 file systems, you can use the tune2fs utility ("man
-	  tune2fs").  To modify attributes of files and directories on ext3
-	  file systems, use chattr ("man chattr").  You need to be using
-	  e2fsprogs version 1.20 or later in order to create ext3 journals
-	  (available at <http://sourceforge.net/projects/e2fsprogs/>).
-
-	  To compile this file system support as a module, choose M here: the
-	  module will be called ext3.
-
-config EXT3_FS_XATTR
-	bool "Ext3 extended attributes"
-	depends on EXT3_FS
-	default y
-	help
-	  Extended attributes are name:value pairs associated with inodes by
-	  the kernel or by users (see the attr(5) manual page, or visit
-	  <http://acl.bestbits.at/> for details).
-
-	  If unsure, say N.
-
-	  You need this for POSIX ACL support on ext3.
-
-config EXT3_FS_POSIX_ACL
-	bool "Ext3 POSIX Access Control Lists"
-	depends on EXT3_FS_XATTR
-	select FS_POSIX_ACL
-	help
-	  Posix Access Control Lists (ACLs) support permissions for users and
-	  groups beyond the owner/group/world scheme.
-
-	  To learn more about Access Control Lists, visit the Posix ACLs for
-	  Linux website <http://acl.bestbits.at/>.
-
-	  If you don't know what Access Control Lists are, say N
-
-config EXT3_FS_SECURITY
-	bool "Ext3 Security Labels"
-	depends on EXT3_FS_XATTR
-	help
-	  Security labels support alternative access control models
-	  implemented by security modules like SELinux.  This option
-	  enables an extended attribute handler for file security
-	  labels in the ext3 filesystem.
-
-	  If you are not using a security module that requires using
-	  extended attributes for file security labels, say N.
-
-config EXT4_FS
-	tristate "The Extended 4 (ext4) filesystem"
-	select JBD2
-	select CRC16
-	help
-	  This is the next generation of the ext3 filesystem.
-
-	  Unlike the change from ext2 filesystem to ext3 filesystem,
-	  the on-disk format of ext4 is not forwards compatible with
-	  ext3; it is based on extent maps and it supports 48-bit
-	  physical block numbers.  The ext4 filesystem also supports delayed
-	  allocation, persistent preallocation, high resolution time stamps,
-	  and a number of other features to improve performance and speed
-	  up fsck time.  For more information, please see the web pages at
-	  http://ext4.wiki.kernel.org.
-
-	  The ext4 filesystem will support mounting an ext3
-	  filesystem; while there will be some performance gains from
-	  the delayed allocation and inode table readahead, the best
-	  performance gains will require enabling ext4 features in the
-	  filesystem, or formating a new filesystem as an ext4
-	  filesystem initially.
-
-	  To compile this file system support as a module, choose M here. The
-	  module will be called ext4.
-
-	  If unsure, say N.
-
-config EXT4DEV_COMPAT
-	bool "Enable ext4dev compatibility"
-	depends on EXT4_FS
-	help
-	  Starting with 2.6.28, the name of the ext4 filesystem was
-	  renamed from ext4dev to ext4.  Unfortunately there are some
-	  legacy userspace programs (such as klibc's fstype) have
-	  "ext4dev" hardcoded.
-
-	  To enable backwards compatibility so that systems that are
-	  still expecting to mount ext4 filesystems using ext4dev,
-	  chose Y here.   This feature will go away by 2.6.31, so
-	  please arrange to get your userspace programs fixed!
-
-config EXT4_FS_XATTR
-	bool "Ext4 extended attributes"
-	depends on EXT4_FS
-	default y
-	help
-	  Extended attributes are name:value pairs associated with inodes by
-	  the kernel or by users (see the attr(5) manual page, or visit
-	  <http://acl.bestbits.at/> for details).
-
-	  If unsure, say N.
-
-	  You need this for POSIX ACL support on ext4.
-
-config EXT4_FS_POSIX_ACL
-	bool "Ext4 POSIX Access Control Lists"
-	depends on EXT4_FS_XATTR
-	select FS_POSIX_ACL
-	help
-	  POSIX Access Control Lists (ACLs) support permissions for users and
-	  groups beyond the owner/group/world scheme.
-
-	  To learn more about Access Control Lists, visit the POSIX ACLs for
-	  Linux website <http://acl.bestbits.at/>.
-
-	  If you don't know what Access Control Lists are, say N
-
-config EXT4_FS_SECURITY
-	bool "Ext4 Security Labels"
-	depends on EXT4_FS_XATTR
-	help
-	  Security labels support alternative access control models
-	  implemented by security modules like SELinux.  This option
-	  enables an extended attribute handler for file security
-	  labels in the ext4 filesystem.
-
-	  If you are not using a security module that requires using
-	  extended attributes for file security labels, say N.
-
-config JBD
-	tristate
-	help
-	  This is a generic journalling layer for block devices.  It is
-	  currently used by the ext3 file system, but it could also be
-	  used to add journal support to other file systems or block
-	  devices such as RAID or LVM.
-
-	  If you are using the ext3 file system, you need to say Y here.
-	  If you are not using ext3 then you will probably want to say N.
-
-	  To compile this device as a module, choose M here: the module will be
-	  called jbd.  If you are compiling ext3 into the kernel, you
-	  cannot compile this code as a module.
-
-config JBD_DEBUG
-	bool "JBD (ext3) debugging support"
-	depends on JBD && DEBUG_FS
-	help
-	  If you are using the ext3 journaled file system (or potentially any
-	  other file system/device using JBD), this option allows you to
-	  enable debugging output while the system is running, in order to
-	  help track down any problems you are having.  By default the
-	  debugging output will be turned off.
-
-	  If you select Y here, then you will be able to turn on debugging
-	  with "echo N > /sys/kernel/debug/jbd/jbd-debug", where N is a
-	  number between 1 and 5, the higher the number, the more debugging
-	  output is generated.  To turn debugging off again, do
-	  "echo 0 > /sys/kernel/debug/jbd/jbd-debug".
-
-config JBD2
-	tristate
-	select CRC32
-	help
-	  This is a generic journaling layer for block devices that support
-	  both 32-bit and 64-bit block numbers.  It is currently used by
-	  the ext4 and OCFS2 filesystems, but it could also be used to add
-	  journal support to other file systems or block devices such
-	  as RAID or LVM.
-
-	  If you are using ext4 or OCFS2, you need to say Y here.
-	  If you are not using ext4 or OCFS2 then you will
-	  probably want to say N.
-
-	  To compile this device as a module, choose M here. The module will be
-	  called jbd2.  If you are compiling ext4 or OCFS2 into the kernel,
-	  you cannot compile this code as a module.
-
-config JBD2_DEBUG
-	bool "JBD2 (ext4) debugging support"
-	depends on JBD2 && DEBUG_FS
-	help
-	  If you are using the ext4 journaled file system (or
-	  potentially any other filesystem/device using JBD2), this option
-	  allows you to enable debugging output while the system is running,
-	  in order to help track down any problems you are having.
-	  By default, the debugging output will be turned off.
-
-	  If you select Y here, then you will be able to turn on debugging
-	  with "echo N > /sys/kernel/debug/jbd2/jbd2-debug", where N is a
-	  number between 1 and 5. The higher the number, the more debugging
-	  output is generated.  To turn debugging off again, do
-	  "echo 0 > /sys/kernel/debug/jbd2/jbd2-debug".
+source "fs/jbd/Kconfig"
+source "fs/jbd2/Kconfig"
 
 config FS_MBCACHE
 # Meta block cache for Extended Attributes (ext2/ext3/ext4)
diff --git a/fs/ext2/Kconfig b/fs/ext2/Kconfig
new file mode 100644
index 000000000000..14a6780fd034
--- /dev/null
+++ b/fs/ext2/Kconfig
@@ -0,0 +1,55 @@
+config EXT2_FS
+	tristate "Second extended fs support"
+	help
+	  Ext2 is a standard Linux file system for hard disks.
+
+	  To compile this file system support as a module, choose M here: the
+	  module will be called ext2.
+
+	  If unsure, say Y.
+
+config EXT2_FS_XATTR
+	bool "Ext2 extended attributes"
+	depends on EXT2_FS
+	help
+	  Extended attributes are name:value pairs associated with inodes by
+	  the kernel or by users (see the attr(5) manual page, or visit
+	  <http://acl.bestbits.at/> for details).
+
+	  If unsure, say N.
+
+config EXT2_FS_POSIX_ACL
+	bool "Ext2 POSIX Access Control Lists"
+	depends on EXT2_FS_XATTR
+	select FS_POSIX_ACL
+	help
+	  Posix Access Control Lists (ACLs) support permissions for users and
+	  groups beyond the owner/group/world scheme.
+
+	  To learn more about Access Control Lists, visit the Posix ACLs for
+	  Linux website <http://acl.bestbits.at/>.
+
+	  If you don't know what Access Control Lists are, say N
+
+config EXT2_FS_SECURITY
+	bool "Ext2 Security Labels"
+	depends on EXT2_FS_XATTR
+	help
+	  Security labels support alternative access control models
+	  implemented by security modules like SELinux.  This option
+	  enables an extended attribute handler for file security
+	  labels in the ext2 filesystem.
+
+	  If you are not using a security module that requires using
+	  extended attributes for file security labels, say N.
+
+config EXT2_FS_XIP
+	bool "Ext2 execute in place support"
+	depends on EXT2_FS && MMU
+	help
+	  Execute in place can be used on memory-backed block devices. If you
+	  enable this option, you can select to mount block devices which are
+	  capable of this feature without using the page cache.
+
+	  If you do not use a block device that is capable of using this,
+	  or if unsure, say N.
diff --git a/fs/ext3/Kconfig b/fs/ext3/Kconfig
new file mode 100644
index 000000000000..8e0cfe44b0fc
--- /dev/null
+++ b/fs/ext3/Kconfig
@@ -0,0 +1,67 @@
+config EXT3_FS
+	tristate "Ext3 journalling file system support"
+	select JBD
+	help
+	  This is the journalling version of the Second extended file system
+	  (often called ext3), the de facto standard Linux file system
+	  (method to organize files on a storage device) for hard disks.
+
+	  The journalling code included in this driver means you do not have
+	  to run e2fsck (file system checker) on your file systems after a
+	  crash.  The journal keeps track of any changes that were being made
+	  at the time the system crashed, and can ensure that your file system
+	  is consistent without the need for a lengthy check.
+
+	  Other than adding the journal to the file system, the on-disk format
+	  of ext3 is identical to ext2.  It is possible to freely switch
+	  between using the ext3 driver and the ext2 driver, as long as the
+	  file system has been cleanly unmounted, or e2fsck is run on the file
+	  system.
+
+	  To add a journal on an existing ext2 file system or change the
+	  behavior of ext3 file systems, you can use the tune2fs utility ("man
+	  tune2fs").  To modify attributes of files and directories on ext3
+	  file systems, use chattr ("man chattr").  You need to be using
+	  e2fsprogs version 1.20 or later in order to create ext3 journals
+	  (available at <http://sourceforge.net/projects/e2fsprogs/>).
+
+	  To compile this file system support as a module, choose M here: the
+	  module will be called ext3.
+
+config EXT3_FS_XATTR
+	bool "Ext3 extended attributes"
+	depends on EXT3_FS
+	default y
+	help
+	  Extended attributes are name:value pairs associated with inodes by
+	  the kernel or by users (see the attr(5) manual page, or visit
+	  <http://acl.bestbits.at/> for details).
+
+	  If unsure, say N.
+
+	  You need this for POSIX ACL support on ext3.
+
+config EXT3_FS_POSIX_ACL
+	bool "Ext3 POSIX Access Control Lists"
+	depends on EXT3_FS_XATTR
+	select FS_POSIX_ACL
+	help
+	  Posix Access Control Lists (ACLs) support permissions for users and
+	  groups beyond the owner/group/world scheme.
+
+	  To learn more about Access Control Lists, visit the Posix ACLs for
+	  Linux website <http://acl.bestbits.at/>.
+
+	  If you don't know what Access Control Lists are, say N
+
+config EXT3_FS_SECURITY
+	bool "Ext3 Security Labels"
+	depends on EXT3_FS_XATTR
+	help
+	  Security labels support alternative access control models
+	  implemented by security modules like SELinux.  This option
+	  enables an extended attribute handler for file security
+	  labels in the ext3 filesystem.
+
+	  If you are not using a security module that requires using
+	  extended attributes for file security labels, say N.
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
new file mode 100644
index 000000000000..7505482a08fa
--- /dev/null
+++ b/fs/ext4/Kconfig
@@ -0,0 +1,79 @@
+config EXT4_FS
+	tristate "The Extended 4 (ext4) filesystem"
+	select JBD2
+	select CRC16
+	help
+	  This is the next generation of the ext3 filesystem.
+
+	  Unlike the change from ext2 filesystem to ext3 filesystem,
+	  the on-disk format of ext4 is not forwards compatible with
+	  ext3; it is based on extent maps and it supports 48-bit
+	  physical block numbers.  The ext4 filesystem also supports delayed
+	  allocation, persistent preallocation, high resolution time stamps,
+	  and a number of other features to improve performance and speed
+	  up fsck time.  For more information, please see the web pages at
+	  http://ext4.wiki.kernel.org.
+
+	  The ext4 filesystem will support mounting an ext3
+	  filesystem; while there will be some performance gains from
+	  the delayed allocation and inode table readahead, the best
+	  performance gains will require enabling ext4 features in the
+	  filesystem, or formating a new filesystem as an ext4
+	  filesystem initially.
+
+	  To compile this file system support as a module, choose M here. The
+	  module will be called ext4.
+
+	  If unsure, say N.
+
+config EXT4DEV_COMPAT
+	bool "Enable ext4dev compatibility"
+	depends on EXT4_FS
+	help
+	  Starting with 2.6.28, the name of the ext4 filesystem was
+	  renamed from ext4dev to ext4.  Unfortunately there are some
+	  legacy userspace programs (such as klibc's fstype) have
+	  "ext4dev" hardcoded.
+
+	  To enable backwards compatibility so that systems that are
+	  still expecting to mount ext4 filesystems using ext4dev,
+	  chose Y here.   This feature will go away by 2.6.31, so
+	  please arrange to get your userspace programs fixed!
+
+config EXT4_FS_XATTR
+	bool "Ext4 extended attributes"
+	depends on EXT4_FS
+	default y
+	help
+	  Extended attributes are name:value pairs associated with inodes by
+	  the kernel or by users (see the attr(5) manual page, or visit
+	  <http://acl.bestbits.at/> for details).
+
+	  If unsure, say N.
+
+	  You need this for POSIX ACL support on ext4.
+
+config EXT4_FS_POSIX_ACL
+	bool "Ext4 POSIX Access Control Lists"
+	depends on EXT4_FS_XATTR
+	select FS_POSIX_ACL
+	help
+	  POSIX Access Control Lists (ACLs) support permissions for users and
+	  groups beyond the owner/group/world scheme.
+
+	  To learn more about Access Control Lists, visit the POSIX ACLs for
+	  Linux website <http://acl.bestbits.at/>.
+
+	  If you don't know what Access Control Lists are, say N
+
+config EXT4_FS_SECURITY
+	bool "Ext4 Security Labels"
+	depends on EXT4_FS_XATTR
+	help
+	  Security labels support alternative access control models
+	  implemented by security modules like SELinux.  This option
+	  enables an extended attribute handler for file security
+	  labels in the ext4 filesystem.
+
+	  If you are not using a security module that requires using
+	  extended attributes for file security labels, say N.
diff --git a/fs/jbd/Kconfig b/fs/jbd/Kconfig
new file mode 100644
index 000000000000..4e28beeed157
--- /dev/null
+++ b/fs/jbd/Kconfig
@@ -0,0 +1,30 @@
+config JBD
+	tristate
+	help
+	  This is a generic journalling layer for block devices.  It is
+	  currently used by the ext3 file system, but it could also be
+	  used to add journal support to other file systems or block
+	  devices such as RAID or LVM.
+
+	  If you are using the ext3 file system, you need to say Y here.
+	  If you are not using ext3 then you will probably want to say N.
+
+	  To compile this device as a module, choose M here: the module will be
+	  called jbd.  If you are compiling ext3 into the kernel, you
+	  cannot compile this code as a module.
+
+config JBD_DEBUG
+	bool "JBD (ext3) debugging support"
+	depends on JBD && DEBUG_FS
+	help
+	  If you are using the ext3 journaled file system (or potentially any
+	  other file system/device using JBD), this option allows you to
+	  enable debugging output while the system is running, in order to
+	  help track down any problems you are having.  By default the
+	  debugging output will be turned off.
+
+	  If you select Y here, then you will be able to turn on debugging
+	  with "echo N > /sys/kernel/debug/jbd/jbd-debug", where N is a
+	  number between 1 and 5, the higher the number, the more debugging
+	  output is generated.  To turn debugging off again, do
+	  "echo 0 > /sys/kernel/debug/jbd/jbd-debug".
diff --git a/fs/jbd2/Kconfig b/fs/jbd2/Kconfig
new file mode 100644
index 000000000000..f32f346f4b0a
--- /dev/null
+++ b/fs/jbd2/Kconfig
@@ -0,0 +1,33 @@
+config JBD2
+	tristate
+	select CRC32
+	help
+	  This is a generic journaling layer for block devices that support
+	  both 32-bit and 64-bit block numbers.  It is currently used by
+	  the ext4 and OCFS2 filesystems, but it could also be used to add
+	  journal support to other file systems or block devices such
+	  as RAID or LVM.
+
+	  If you are using ext4 or OCFS2, you need to say Y here.
+	  If you are not using ext4 or OCFS2 then you will
+	  probably want to say N.
+
+	  To compile this device as a module, choose M here. The module will be
+	  called jbd2.  If you are compiling ext4 or OCFS2 into the kernel,
+	  you cannot compile this code as a module.
+
+config JBD2_DEBUG
+	bool "JBD2 (ext4) debugging support"
+	depends on JBD2 && DEBUG_FS
+	help
+	  If you are using the ext4 journaled file system (or
+	  potentially any other filesystem/device using JBD2), this option
+	  allows you to enable debugging output while the system is running,
+	  in order to help track down any problems you are having.
+	  By default, the debugging output will be turned off.
+
+	  If you select Y here, then you will be able to turn on debugging
+	  with "echo N > /sys/kernel/debug/jbd2/jbd2-debug", where N is a
+	  number between 1 and 5. The higher the number, the more debugging
+	  output is generated.  To turn debugging off again, do
+	  "echo 0 > /sys/kernel/debug/jbd2/jbd2-debug".
-- 
GitLab


From f4432c5caec5fa95ea7eefd00f8e6cee17e2e023 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Mon, 20 Oct 2008 13:31:45 -0400
Subject: [PATCH 880/895] Update email addresses.

Update assorted email addresses and related info to point
to a single current, valid address.

additionally
- trivial CREDITS entry updates. (Not that this file means much any more)
- remove arjans dead redhat.com address from powernow driver

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 CREDITS                                     | 12 ++++++------
 MAINTAINERS                                 |  2 +-
 arch/x86/kernel/cpu/cpufreq/longhaul.c      |  4 ++--
 arch/x86/kernel/cpu/cpufreq/powernow-k6.c   |  2 +-
 arch/x86/kernel/cpu/cpufreq/powernow-k7.c   |  4 ++--
 arch/x86/kernel/cpu/cpufreq/powernow-k8.c   |  2 +-
 arch/x86/kernel/cpu/cpufreq/speedstep-ich.c |  2 +-
 arch/x86/kernel/cpu/mcheck/k7.c             |  4 ++--
 arch/x86/kernel/cpu/mcheck/mce_32.c         |  2 +-
 arch/x86/kernel/cpu/mcheck/non-fatal.c      |  2 +-
 drivers/char/agp/ali-agp.c                  |  2 +-
 drivers/char/agp/amd64-agp.c                |  2 +-
 drivers/char/agp/ati-agp.c                  |  2 +-
 drivers/char/agp/backend.c                  |  2 +-
 drivers/char/agp/intel-agp.c                |  2 +-
 drivers/char/agp/nvidia-agp.c               |  2 +-
 drivers/char/agp/via-agp.c                  |  2 +-
 scripts/checkpatch.pl                       |  2 +-
 18 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/CREDITS b/CREDITS
index c62dcb3b7e26..2358846f06be 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1653,14 +1653,14 @@ S: Chapel Hill, North Carolina 27514-4818
 S: USA
 
 N: Dave Jones
-E: davej@codemonkey.org.uk
+E: davej@redhat.com
 W: http://www.codemonkey.org.uk
-D: x86 errata/setup maintenance.
-D: AGPGART driver.
+D: Assorted VIA x86 support.
+D: 2.5 AGPGART overhaul.
 D: CPUFREQ maintenance.
-D: Backport/Forwardport merge monkey.
-D: Various Janitor work.
-S: United Kingdom
+D: Fedora kernel maintainence.
+D: Misc/Other.
+S: 314 Littleton Rd, Westford, MA 01886, USA
 
 N: Martin Josfsson
 E: gandalf@wlug.westbo.se
diff --git a/MAINTAINERS b/MAINTAINERS
index 355c192d6997..5c3f79c26384 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1198,7 +1198,7 @@ S:	Maintained
 
 CPU FREQUENCY DRIVERS
 P:	Dave Jones
-M:	davej@codemonkey.org.uk
+M:	davej@redhat.com
 L:	cpufreq@vger.kernel.org
 W:	http://www.codemonkey.org.uk/projects/cpufreq/
 T:	git kernel.org/pub/scm/linux/kernel/git/davej/cpufreq.git
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
index 06fcce516d51..b0461856acfb 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c
@@ -1,5 +1,5 @@
 /*
- *  (C) 2001-2004  Dave Jones. <davej@codemonkey.org.uk>
+ *  (C) 2001-2004  Dave Jones. <davej@redhat.com>
  *  (C) 2002  Padraig Brady. <padraig@antefacto.com>
  *
  *  Licensed under the terms of the GNU GPL License version 2.
@@ -1019,7 +1019,7 @@ MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor");
 module_param(revid_errata, int, 0644);
 MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID");
 
-MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>");
+MODULE_AUTHOR ("Dave Jones <davej@redhat.com>");
 MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors.");
 MODULE_LICENSE ("GPL");
 
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
index b5ced806a316..c1ac5790c63e 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
@@ -246,7 +246,7 @@ static void __exit powernow_k6_exit(void)
 }
 
 
-MODULE_AUTHOR("Arjan van de Ven <arjanv@redhat.com>, Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>");
+MODULE_AUTHOR("Arjan van de Ven, Dave Jones <davej@redhat.com>, Dominik Brodowski <linux@brodo.de>");
 MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors.");
 MODULE_LICENSE("GPL");
 
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index 0a61159d7b71..7c7d56b43136 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -1,6 +1,6 @@
 /*
  *  AMD K7 Powernow driver.
- *  (C) 2003 Dave Jones <davej@codemonkey.org.uk> on behalf of SuSE Labs.
+ *  (C) 2003 Dave Jones on behalf of SuSE Labs.
  *  (C) 2003-2004 Dave Jones <davej@redhat.com>
  *
  *  Licensed under the terms of the GNU GPL License version 2.
@@ -692,7 +692,7 @@ static void __exit powernow_exit (void)
 module_param(acpi_force,  int, 0444);
 MODULE_PARM_DESC(acpi_force, "Force ACPI to be used.");
 
-MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>");
+MODULE_AUTHOR ("Dave Jones <davej@redhat.com>");
 MODULE_DESCRIPTION ("Powernow driver for AMD K7 processors.");
 MODULE_LICENSE ("GPL");
 
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 84bb395038d8..008d23ba491b 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -7,7 +7,7 @@
  *  Support : mark.langsdorf@amd.com
  *
  *  Based on the powernow-k7.c module written by Dave Jones.
- *  (C) 2003 Dave Jones <davej@codemonkey.org.uk> on behalf of SuSE Labs
+ *  (C) 2003 Dave Jones on behalf of SuSE Labs
  *  (C) 2004 Dominik Brodowski <linux@brodo.de>
  *  (C) 2004 Pavel Machek <pavel@suse.cz>
  *  Licensed under the terms of the GNU GPL License version 2.
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 191f7263c61d..04d0376b64b0 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -431,7 +431,7 @@ static void __exit speedstep_exit(void)
 }
 
 
-MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>");
+MODULE_AUTHOR ("Dave Jones <davej@redhat.com>, Dominik Brodowski <linux@brodo.de>");
 MODULE_DESCRIPTION ("Speedstep driver for Intel mobile processors on chipsets with ICH-M southbridges.");
 MODULE_LICENSE ("GPL");
 
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c
index f390c9f66351..dd3af6e7b39a 100644
--- a/arch/x86/kernel/cpu/mcheck/k7.c
+++ b/arch/x86/kernel/cpu/mcheck/k7.c
@@ -1,6 +1,6 @@
 /*
- * Athlon/Hammer specific Machine Check Exception Reporting
- * (C) Copyright 2002 Dave Jones <davej@codemonkey.org.uk>
+ * Athlon specific Machine Check Exception Reporting
+ * (C) Copyright 2002 Dave Jones <davej@redhat.com>
  */
 
 #include <linux/init.h>
diff --git a/arch/x86/kernel/cpu/mcheck/mce_32.c b/arch/x86/kernel/cpu/mcheck/mce_32.c
index 774d87cfd8cd..0ebf3fc6a610 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_32.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_32.c
@@ -1,6 +1,6 @@
 /*
  * mce.c - x86 Machine Check Exception Reporting
- * (c) 2002 Alan Cox <alan@redhat.com>, Dave Jones <davej@codemonkey.org.uk>
+ * (c) 2002 Alan Cox <alan@redhat.com>, Dave Jones <davej@redhat.com>
  */
 
 #include <linux/init.h>
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c
index cc1fccdd31e0..a74af128efc9 100644
--- a/arch/x86/kernel/cpu/mcheck/non-fatal.c
+++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c
@@ -1,7 +1,7 @@
 /*
  * Non Fatal Machine Check Exception Reporting
  *
- * (C) Copyright 2002 Dave Jones. <davej@codemonkey.org.uk>
+ * (C) Copyright 2002 Dave Jones. <davej@redhat.com>
  *
  * This file contains routines to check for non-fatal MCEs every 15s
  *
diff --git a/drivers/char/agp/ali-agp.c b/drivers/char/agp/ali-agp.c
index 31dcd9142d54..dc8d1a90971f 100644
--- a/drivers/char/agp/ali-agp.c
+++ b/drivers/char/agp/ali-agp.c
@@ -417,6 +417,6 @@ static void __exit agp_ali_cleanup(void)
 module_init(agp_ali_init);
 module_exit(agp_ali_cleanup);
 
-MODULE_AUTHOR("Dave Jones <davej@codemonkey.org.uk>");
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
 MODULE_LICENSE("GPL and additional rights");
 
diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c
index 2812ee2b165a..52f4361eb6e4 100644
--- a/drivers/char/agp/amd64-agp.c
+++ b/drivers/char/agp/amd64-agp.c
@@ -772,6 +772,6 @@ module_init(agp_amd64_init);
 module_exit(agp_amd64_cleanup);
 #endif
 
-MODULE_AUTHOR("Dave Jones <davej@codemonkey.org.uk>, Andi Kleen");
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>, Andi Kleen");
 module_param(agp_try_unsupported, bool, 0);
 MODULE_LICENSE("GPL");
diff --git a/drivers/char/agp/ati-agp.c b/drivers/char/agp/ati-agp.c
index ae2791b926b9..f1537eece07f 100644
--- a/drivers/char/agp/ati-agp.c
+++ b/drivers/char/agp/ati-agp.c
@@ -561,6 +561,6 @@ static void __exit agp_ati_cleanup(void)
 module_init(agp_ati_init);
 module_exit(agp_ati_cleanup);
 
-MODULE_AUTHOR("Dave Jones <davej@codemonkey.org.uk>");
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
 MODULE_LICENSE("GPL and additional rights");
 
diff --git a/drivers/char/agp/backend.c b/drivers/char/agp/backend.c
index 3a3cc03d401c..8c617ad7497f 100644
--- a/drivers/char/agp/backend.c
+++ b/drivers/char/agp/backend.c
@@ -349,7 +349,7 @@ static __init int agp_setup(char *s)
 __setup("agp=", agp_setup);
 #endif
 
-MODULE_AUTHOR("Dave Jones <davej@codemonkey.org.uk>");
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
 MODULE_DESCRIPTION("AGP GART driver");
 MODULE_LICENSE("GPL and additional rights");
 MODULE_ALIAS_MISCDEV(AGPGART_MINOR);
diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index 1108665913e2..9cf6e9bb017e 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -2390,5 +2390,5 @@ static void __exit agp_intel_cleanup(void)
 module_init(agp_intel_init);
 module_exit(agp_intel_cleanup);
 
-MODULE_AUTHOR("Dave Jones <davej@codemonkey.org.uk>");
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
 MODULE_LICENSE("GPL and additional rights");
diff --git a/drivers/char/agp/nvidia-agp.c b/drivers/char/agp/nvidia-agp.c
index 5bbed3d79db9..16acee2de117 100644
--- a/drivers/char/agp/nvidia-agp.c
+++ b/drivers/char/agp/nvidia-agp.c
@@ -1,7 +1,7 @@
 /*
  * Nvidia AGPGART routines.
  * Based upon a 2.4 agpgart diff by the folks from NVIDIA, and hacked up
- * to work in 2.5 by Dave Jones <davej@codemonkey.org.uk>
+ * to work in 2.5 by Dave Jones <davej@redhat.com>
  */
 
 #include <linux/module.h>
diff --git a/drivers/char/agp/via-agp.c b/drivers/char/agp/via-agp.c
index 9f4d49e1b59a..d3bd243867fc 100644
--- a/drivers/char/agp/via-agp.c
+++ b/drivers/char/agp/via-agp.c
@@ -595,4 +595,4 @@ module_init(agp_via_init);
 module_exit(agp_via_cleanup);
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Dave Jones <davej@codemonkey.org.uk>");
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index e30bac141b21..f88bb3e21cda 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1,5 +1,5 @@
 #!/usr/bin/perl -w
-# (c) 2001, Dave Jones. <davej@codemonkey.org.uk> (the file handling bit)
+# (c) 2001, Dave Jones. <davej@redhat.com> (the file handling bit)
 # (c) 2005, Joel Schopp <jschopp@austin.ibm.com> (the ugly bit)
 # (c) 2007, Andy Whitcroft <apw@uk.ibm.com> (new conditions, test suite, etc)
 # Licensed under the terms of the GNU GPL License version 2
-- 
GitLab


From f07767fd0f95c385108fa4c456a9cb216a424fec Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Mon, 20 Oct 2008 10:23:38 -0700
Subject: [PATCH 881/895] byteorder: remove direct includes of
 linux/byteorder/swab[b].h

A consolidated implementation will provide this generically through
asm/byteorder, remove direct includes to avoid breakage when the
changeover to the new implementation occurs.

This hunk was lost from commit 1d8cca44b6a244b7e378546d719041819049a0f9
("byteorder: provide swabb.h generically in asm/byteorder.h")

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/rcutorture.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 90b5b123f7a1..85cb90588a55 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -42,10 +42,10 @@
 #include <linux/freezer.h>
 #include <linux/cpu.h>
 #include <linux/delay.h>
-#include <linux/byteorder/swabb.h>
 #include <linux/stat.h>
 #include <linux/srcu.h>
 #include <linux/slab.h>
+#include <asm/byteorder.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and "
-- 
GitLab


From 1ae87786800b5e0411847974b211797b6ada63c4 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 20 Oct 2008 14:14:25 -0700
Subject: [PATCH 882/895] Fix sprintf format warnings in drm_proc.c

Use "%zd" for size_t, and make sure to have a space between the numbers
instead of depending on the field width.

I don't like warnings in my default targeted build.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/drm_proc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_proc.c b/drivers/gpu/drm/drm_proc.c
index d490db4c0de0..ae73b7f7249a 100644
--- a/drivers/gpu/drm/drm_proc.c
+++ b/drivers/gpu/drm/drm_proc.c
@@ -522,12 +522,12 @@ static int drm_gem_one_name_info(int id, void *ptr, void *data)
 	struct drm_gem_object *obj = ptr;
 	struct drm_gem_name_info_data   *nid = data;
 
-	DRM_INFO("name %d size %d\n", obj->name, obj->size);
+	DRM_INFO("name %d size %zd\n", obj->name, obj->size);
 	if (nid->eof)
 		return 0;
 
 	nid->len += sprintf(&nid->buf[nid->len],
-			    "%6d%9d%8d%9d\n",
+			    "%6d %8zd %7d %8d\n",
 			    obj->name, obj->size,
 			    atomic_read(&obj->handlecount.refcount),
 			    atomic_read(&obj->refcount.refcount));
-- 
GitLab


From 9b7530cc329eb036cfa589930c270e85031f554c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 20 Oct 2008 14:16:43 -0700
Subject: [PATCH 883/895] i915: cleanup coding horrors in i915_gem_gtt_pwrite()

Yes, this will probably be switched over to a cleaner model anyway, but
in the meantime I don't want to see the 'unused variable' warnings that
come from the disgusting #ifdef code.  Make the special case be a nice
inlien function of its own, clean up the code, and make the warning go
away.

I wish people didn't write code that gets (valid) warnings from the
compiler, but I'll limit my fixes to code that I actually care about (in
this case just because I see the warning and it annoys me).

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/i915/i915_gem.c | 59 ++++++++++++++++++++-------------
 1 file changed, 36 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9ac73dd1b422..49c5a1798ac4 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -171,6 +171,36 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
+/*
+ * Try to write quickly with an atomic kmap. Return true on success.
+ *
+ * If this fails (which includes a partial write), we'll redo the whole
+ * thing with the slow version.
+ *
+ * This is a workaround for the low performance of iounmap (approximate
+ * 10% cpu cost on normal 3D workloads).  kmap_atomic on HIGHMEM kernels
+ * happens to let us map card memory without taking IPIs.  When the vmap
+ * rework lands we should be able to dump this hack.
+ */
+static inline int fast_user_write(unsigned long pfn, char __user *user_data, int l)
+{
+#ifdef CONFIG_HIGHMEM
+	unsigned long unwritten;
+	char *vaddr_atomic;
+
+	vaddr_atomic = kmap_atomic_pfn(pfn, KM_USER0);
+#if WATCH_PWRITE
+	DRM_INFO("pwrite i %d o %d l %d pfn %ld vaddr %p\n",
+		 i, o, l, pfn, vaddr_atomic);
+#endif
+	unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + o, user_data, l);
+	kunmap_atomic(vaddr_atomic, KM_USER0);
+	return !unwritten;
+#else
+	return 0;
+#endif
+}
+
 static int
 i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
 		    struct drm_i915_gem_pwrite *args,
@@ -180,12 +210,7 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
 	ssize_t remain;
 	loff_t offset;
 	char __user *user_data;
-	char __iomem *vaddr;
-	char *vaddr_atomic;
-	int i, o, l;
 	int ret = 0;
-	unsigned long pfn;
-	unsigned long unwritten;
 
 	user_data = (char __user *) (uintptr_t) args->data_ptr;
 	remain = args->size;
@@ -209,6 +234,9 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
 	obj_priv->dirty = 1;
 
 	while (remain > 0) {
+		unsigned long pfn;
+		int i, o, l;
+
 		/* Operation in this page
 		 *
 		 * i = page number
@@ -223,25 +251,10 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
 
 		pfn = (dev->agp->base >> PAGE_SHIFT) + i;
 
-#ifdef CONFIG_HIGHMEM
-		/* This is a workaround for the low performance of iounmap
-		 * (approximate 10% cpu cost on normal 3D workloads).
-		 * kmap_atomic on HIGHMEM kernels happens to let us map card
-		 * memory without taking IPIs.  When the vmap rework lands
-		 * we should be able to dump this hack.
-		 */
-		vaddr_atomic = kmap_atomic_pfn(pfn, KM_USER0);
-#if WATCH_PWRITE
-		DRM_INFO("pwrite i %d o %d l %d pfn %ld vaddr %p\n",
-			 i, o, l, pfn, vaddr_atomic);
-#endif
-		unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + o,
-							      user_data, l);
-		kunmap_atomic(vaddr_atomic, KM_USER0);
+		if (!fast_user_write(pfn, user_data, l)) {
+			unsigned long unwritten;
+			char __iomem *vaddr;
 
-		if (unwritten)
-#endif /* CONFIG_HIGHMEM */
-		{
 			vaddr = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE);
 #if WATCH_PWRITE
 			DRM_INFO("pwrite slow i %d o %d l %d "
-- 
GitLab


From a9b6148d25f15ddfe9d7a7f3e526fdb64e7cf7da Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 20 Oct 2008 14:23:29 -0700
Subject: [PATCH 884/895] USB: Fix unused label warnings in
 drivers/usb/host/ehci-hcd.c

This gets rid of an annoying warning in ehci-hcd.c when DEBUG isn't
enabled:

    warning: label 'err_debug' defined but not used

by moving it inside the already-existing #ifdef DEBUG, so that it
matches the goto.  And now my regular build is warning-free again.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/usb/host/ehci-hcd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index d343afacb0b0..15a803b206b8 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -1111,8 +1111,8 @@ clean0:
 #ifdef DEBUG
 	debugfs_remove(ehci_debug_root);
 	ehci_debug_root = NULL;
-#endif
 err_debug:
+#endif
 	clear_bit(USB_EHCI_LOADED, &usb_hcds_loaded);
 	return retval;
 }
-- 
GitLab


From 5f41b8cdc6ef33b3432cee36264d628a80398362 Mon Sep 17 00:00:00 2001
From: "Luck, Tony" <tony.luck@intel.com>
Date: Mon, 20 Oct 2008 15:23:40 -0700
Subject: [PATCH 885/895] kexec: fix crash_save_vmcoreinfo_init build problem

This fixes

  kernel/kexec.c: In function 'crash_save_vmcoreinfo_init':
  kernel/kexec.c:1374: error: 'vmlist' undeclared (first use in this function)
  kernel/kexec.c:1374: error: (Each undeclared identifier is reported only once
  kernel/kexec.c:1374: error: for each function it appears in.)
  kernel/kexec.c:1410: error: invalid use of undefined type 'struct vm_struct'
  make[1]: *** [kernel/kexec.o] Error 1

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/kexec.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/kexec.c b/kernel/kexec.c
index 777ac458ac99..ac0fde7b54d0 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -30,6 +30,7 @@
 #include <linux/pm.h>
 #include <linux/cpu.h>
 #include <linux/console.h>
+#include <linux/vmalloc.h>
 
 #include <asm/page.h>
 #include <asm/uaccess.h>
-- 
GitLab


From f6f286f33e843862c559bfea9281318c4cdec6b0 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Mon, 20 Oct 2008 14:41:03 -0700
Subject: [PATCH 886/895] fix WARN() for PPC

powerpc doesn't use the generic WARN_ON infrastructure.  The newly
introduced WARN() as a result didn't print the message, this patch adds
the printk for this specific case.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/bug.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 0f6dabd4b517..12c07c1866b2 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -41,7 +41,7 @@ extern void warn_slowpath(const char *file, const int line,
 #define __WARN() warn_on_slowpath(__FILE__, __LINE__)
 #define __WARN_printf(arg...) warn_slowpath(__FILE__, __LINE__, arg)
 #else
-#define __WARN_printf(arg...) __WARN()
+#define __WARN_printf(arg...) do { printk(arg); __WARN(); } while (0)
 #endif
 
 #ifndef WARN_ON
-- 
GitLab


From e8848a170fd432bdda176a2d568919d4bba90467 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 21 Oct 2008 00:47:45 +0200
Subject: [PATCH 887/895] fix CONFIG_HIGHMEM compile error in
 drivers/gpu/drm/i915/i915_gem.c

commit 9b7530cc329eb036cfa589930c270e85031f554c ("i915: cleanup coding
horrors in i915_gem_gtt_pwrite()")

broke the i386 build for CONFIG_HIGHMEM=y.

Caught by automatic testing http://www.tglx.de/autoqa-logs/000137-0006-0001.log

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
[ My bad. It's the same patch I sent out earlier, nobody noticed then either.. ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/i915/i915_gem.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 49c5a1798ac4..dc2e6fdb6ca3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -182,7 +182,8 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
  * happens to let us map card memory without taking IPIs.  When the vmap
  * rework lands we should be able to dump this hack.
  */
-static inline int fast_user_write(unsigned long pfn, char __user *user_data, int l)
+static inline int fast_user_write(unsigned long pfn, char __user *user_data,
+				  int l, int o)
 {
 #ifdef CONFIG_HIGHMEM
 	unsigned long unwritten;
@@ -251,7 +252,7 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
 
 		pfn = (dev->agp->base >> PAGE_SHIFT) + i;
 
-		if (!fast_user_write(pfn, user_data, l)) {
+		if (!fast_user_write(pfn, user_data, l, o)) {
 			unsigned long unwritten;
 			char __iomem *vaddr;
 
-- 
GitLab


From 653c03168348ac7aebb969931f87ba281749d7dd Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Mon, 20 Oct 2008 16:00:08 -0700
Subject: [PATCH 888/895] misc: replace remaining __FUNCTION__ with __func__

__FUNCTION__ is gcc-specific, use __func__

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/DocBook/kernel-hacking.tmpl | 2 +-
 arch/arm/mach-iop13xx/include/mach/time.h | 4 ++--
 arch/arm/mach-pxa/include/mach/zylonite.h | 4 ++--
 arch/powerpc/include/asm/ptrace.h         | 2 +-
 drivers/net/usb/pegasus.c                 | 4 ++--
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/Documentation/DocBook/kernel-hacking.tmpl b/Documentation/DocBook/kernel-hacking.tmpl
index 4c63e5864160..ae15d55350ec 100644
--- a/Documentation/DocBook/kernel-hacking.tmpl
+++ b/Documentation/DocBook/kernel-hacking.tmpl
@@ -1105,7 +1105,7 @@ static struct block_device_operations opt_fops = {
     </listitem>
     <listitem>
      <para>
-      Function names as strings (__FUNCTION__).
+      Function names as strings (__func__).
      </para>
     </listitem>
     <listitem>
diff --git a/arch/arm/mach-iop13xx/include/mach/time.h b/arch/arm/mach-iop13xx/include/mach/time.h
index 49213d9d7cad..d6d52527589d 100644
--- a/arch/arm/mach-iop13xx/include/mach/time.h
+++ b/arch/arm/mach-iop13xx/include/mach/time.h
@@ -41,7 +41,7 @@ static inline unsigned long iop13xx_core_freq(void)
 		return 1200000000;
 	default:
 		printk("%s: warning unknown frequency, defaulting to 800Mhz\n",
-			__FUNCTION__);
+			__func__);
 	}
 
 	return 800000000;
@@ -60,7 +60,7 @@ static inline unsigned long iop13xx_xsi_bus_ratio(void)
 		return 4;
 	default:
 		printk("%s: warning unknown ratio, defaulting to 2\n",
-			__FUNCTION__);
+			__func__);
 	}
 
 	return 2;
diff --git a/arch/arm/mach-pxa/include/mach/zylonite.h b/arch/arm/mach-pxa/include/mach/zylonite.h
index 0d35ca04731e..bf6785adccf4 100644
--- a/arch/arm/mach-pxa/include/mach/zylonite.h
+++ b/arch/arm/mach-pxa/include/mach/zylonite.h
@@ -30,7 +30,7 @@ extern void zylonite_pxa300_init(void);
 static inline void zylonite_pxa300_init(void)
 {
 	if (cpu_is_pxa300() || cpu_is_pxa310())
-		panic("%s: PXA300/PXA310 not supported\n", __FUNCTION__);
+		panic("%s: PXA300/PXA310 not supported\n", __func__);
 }
 #endif
 
@@ -40,7 +40,7 @@ extern void zylonite_pxa320_init(void);
 static inline void zylonite_pxa320_init(void)
 {
 	if (cpu_is_pxa320())
-		panic("%s: PXA320 not supported\n", __FUNCTION__);
+		panic("%s: PXA320 not supported\n", __func__);
 }
 #endif
 
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 734e0754fb9b..280a90cc9894 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -129,7 +129,7 @@ extern int ptrace_put_reg(struct task_struct *task, int regno,
 #define CHECK_FULL_REGS(regs)						      \
 do {									      \
 	if ((regs)->trap & 1)						      \
-		printk(KERN_CRIT "%s: partial register set\n", __FUNCTION__); \
+		printk(KERN_CRIT "%s: partial register set\n", __func__); \
 } while (0)
 #endif /* __powerpc64__ */
 
diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c
index 38b90e7a7ed3..7914867110ed 100644
--- a/drivers/net/usb/pegasus.c
+++ b/drivers/net/usb/pegasus.c
@@ -168,7 +168,7 @@ static int get_registers(pegasus_t * pegasus, __u16 indx, __u16 size,
 			netif_device_detach(pegasus->net);
 		if (netif_msg_drv(pegasus) && printk_ratelimit())
 			dev_err(&pegasus->intf->dev, "%s, status %d\n",
-					__FUNCTION__, ret);
+					__func__, ret);
 		goto out;
 	}
 
@@ -192,7 +192,7 @@ static int set_registers(pegasus_t * pegasus, __u16 indx, __u16 size,
 	if (!buffer) {
 		if (netif_msg_drv(pegasus))
 			dev_warn(&pegasus->intf->dev, "out of memory in %s\n",
-					__FUNCTION__);
+					__func__);
 		return -ENOMEM;
 	}
 	memcpy(buffer, data, size);
-- 
GitLab


From e798ba57e9f423dddbf1bdeb20a62bdd0593890f Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Tue, 21 Oct 2008 00:04:04 +0100
Subject: [PATCH 889/895] Export tiny shmem_file_setup for DRM-GEM

We're trying to keep the !CONFIG_SHMEM tiny-shmem.c (using ramfs without
swap) in synch with CONFIG_SHMEM shmem.c (and mpm is preparing patches
to combine them).  I was glad to see EXPORT_SYMBOL_GPL(shmem_file_setup)
go into shmem.c, but why not support DRM-GEM when !CONFIG_SHMEM too?
But caution says still depend on MMU, since !CONFIG_MMU is.. different.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Acked-by: Matt Mackall <mpm@selenic.com>
Acked-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/Kconfig | 2 +-
 mm/tiny-shmem.c         | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 9097500de5f4..a8b33c2ec8d2 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -6,7 +6,7 @@
 #
 menuconfig DRM
 	tristate "Direct Rendering Manager (XFree86 4.1.0 and higher DRI support)"
-	depends on (AGP || AGP=n) && PCI && !EMULATED_CMPXCHG && SHMEM
+	depends on (AGP || AGP=n) && PCI && !EMULATED_CMPXCHG && MMU
 	help
 	  Kernel-level support for the Direct Rendering Infrastructure (DRI)
 	  introduced in XFree86 4.0. If you say Y here, you need to select
diff --git a/mm/tiny-shmem.c b/mm/tiny-shmem.c
index 8d7a27a6335c..3e67d575ee6e 100644
--- a/mm/tiny-shmem.c
+++ b/mm/tiny-shmem.c
@@ -95,6 +95,7 @@ put_dentry:
 put_memory:
 	return ERR_PTR(error);
 }
+EXPORT_SYMBOL_GPL(shmem_file_setup);
 
 /**
  * shmem_zero_setup - setup a shared anonymous mapping
-- 
GitLab


From a50c22eed593f474e75f693381e4d42e81762de8 Mon Sep 17 00:00:00 2001
From: Huang Weiyi <weiyi.huang@gmail.com>
Date: Tue, 21 Oct 2008 06:43:33 +0800
Subject: [PATCH 890/895] mm: remove duplicated #include's

Removed duplicated #include <linux/vmalloc.h> in mm/vmalloc.c and
"internal.h" in mm/memory.c.

Signed-off-by: Huang Weiyi <weiyi.huang@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory.c  | 2 --
 mm/vmalloc.c | 1 -
 2 files changed, 3 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index 3a6c4a658325..164951c47305 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -64,8 +64,6 @@
 
 #include "internal.h"
 
-#include "internal.h"
-
 #ifndef CONFIG_NEED_MULTIPLE_NODES
 /* use the per-pgdat data instead for discontigmem - mbligh */
 unsigned long max_mapnr;
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 0797589d51f8..65ae576030da 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -17,7 +17,6 @@
 #include <linux/interrupt.h>
 #include <linux/seq_file.h>
 #include <linux/debugobjects.h>
-#include <linux/vmalloc.h>
 #include <linux/kallsyms.h>
 #include <linux/list.h>
 #include <linux/rbtree.h>
-- 
GitLab


From 2515ddc6db8eb49a79f0fe5e67ff09ac7c81eab4 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 21 Oct 2008 12:07:40 +0900
Subject: [PATCH 891/895] binfmt_elf_fdpic: Update for cputime changes.

Commit f06febc96ba8e0af80bcc3eaec0a109e88275fac ("timers: fix itimer/
many thread hang") introduced a new task_cputime interface and
subsequently only converted binfmt_elf over to it.  This results in the
build for binfmt_elf_fdpic blowing up given that p->signal->{u,s}time
have disappeared from underneath us.

Apply the same trivial fix from binfmt_elf to binfmt_elf_fdpic.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_elf_fdpic.c | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 0e8367c54624..5b5424cb3391 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1390,20 +1390,15 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
 	prstatus->pr_pgrp = task_pgrp_vnr(p);
 	prstatus->pr_sid = task_session_vnr(p);
 	if (thread_group_leader(p)) {
+		struct task_cputime cputime;
+
 		/*
-		 * This is the record for the group leader.  Add in the
-		 * cumulative times of previous dead threads.  This total
-		 * won't include the time of each live thread whose state
-		 * is included in the core dump.  The final total reported
-		 * to our parent process when it calls wait4 will include
-		 * those sums as well as the little bit more time it takes
-		 * this and each other thread to finish dying after the
-		 * core dump synchronization phase.
+		 * This is the record for the group leader.  It shows the
+		 * group-wide total, not its individual thread total.
 		 */
-		cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
-				   &prstatus->pr_utime);
-		cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
-				   &prstatus->pr_stime);
+		thread_group_cputime(p, &cputime);
+		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
+		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
 	} else {
 		cputime_to_timeval(p->utime, &prstatus->pr_utime);
 		cputime_to_timeval(p->stime, &prstatus->pr_stime);
-- 
GitLab


From 2f5ad54ea6e2e38156bfb889964deee991f3087a Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sun, 28 Sep 2008 16:20:09 -0700
Subject: [PATCH 892/895] pci: use pci_ioremap_bar() in sound/

Use the newly introduced pci_ioremap_bar() function in sound/.
pci_ioremap_bar() just takes a pci device and a bar number, with the goal
of making it really hard to get wrong, while also having a central place
to stick sanity checks.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/ad1889.c        | 2 +-
 sound/pci/atiixp.c        | 2 +-
 sound/pci/atiixp_modem.c  | 2 +-
 sound/pci/au88x0/au88x0.c | 3 +--
 sound/pci/bt87x.c         | 3 +--
 sound/pci/cs4281.c        | 4 ++--
 sound/pci/cs5530.c        | 2 +-
 sound/pci/hda/hda_intel.c | 2 +-
 sound/pci/mixart/mixart.c | 3 +--
 9 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/sound/pci/ad1889.c b/sound/pci/ad1889.c
index 92f3a976ef2e..a7f38e63303f 100644
--- a/sound/pci/ad1889.c
+++ b/sound/pci/ad1889.c
@@ -932,7 +932,7 @@ snd_ad1889_create(struct snd_card *card,
 		goto free_and_ret;
 
 	chip->bar = pci_resource_start(pci, 0);
-	chip->iobase = ioremap_nocache(chip->bar, pci_resource_len(pci, 0));
+	chip->iobase = pci_ioremap_bar(pci, 0);
 	if (chip->iobase == NULL) {
 		printk(KERN_ERR PFX "unable to reserve region.\n");
 		err = -EBUSY;
diff --git a/sound/pci/atiixp.c b/sound/pci/atiixp.c
index 085a52b8c807..226fe8237d31 100644
--- a/sound/pci/atiixp.c
+++ b/sound/pci/atiixp.c
@@ -1609,7 +1609,7 @@ static int __devinit snd_atiixp_create(struct snd_card *card,
 		return err;
 	}
 	chip->addr = pci_resource_start(pci, 0);
-	chip->remap_addr = ioremap_nocache(chip->addr, pci_resource_len(pci, 0));
+	chip->remap_addr = pci_ioremap_bar(pci, 0);
 	if (chip->remap_addr == NULL) {
 		snd_printk(KERN_ERR "AC'97 space ioremap problem\n");
 		snd_atiixp_free(chip);
diff --git a/sound/pci/atiixp_modem.c b/sound/pci/atiixp_modem.c
index 2f106306c7fe..0e6e5cc1c501 100644
--- a/sound/pci/atiixp_modem.c
+++ b/sound/pci/atiixp_modem.c
@@ -1252,7 +1252,7 @@ static int __devinit snd_atiixp_create(struct snd_card *card,
 		return err;
 	}
 	chip->addr = pci_resource_start(pci, 0);
-	chip->remap_addr = ioremap_nocache(chip->addr, pci_resource_len(pci, 0));
+	chip->remap_addr = pci_ioremap_bar(pci, 0);
 	if (chip->remap_addr == NULL) {
 		snd_printk(KERN_ERR "AC'97 space ioremap problem\n");
 		snd_atiixp_free(chip);
diff --git a/sound/pci/au88x0/au88x0.c b/sound/pci/au88x0/au88x0.c
index 68368e490074..a36d4d1fd419 100644
--- a/sound/pci/au88x0/au88x0.c
+++ b/sound/pci/au88x0/au88x0.c
@@ -180,8 +180,7 @@ snd_vortex_create(struct snd_card *card, struct pci_dev *pci, vortex_t ** rchip)
 	if ((err = pci_request_regions(pci, CARD_NAME_SHORT)) != 0)
 		goto regions_out;
 
-	chip->mmio = ioremap_nocache(pci_resource_start(pci, 0),
-	                             pci_resource_len(pci, 0));
+	chip->mmio = pci_ioremap_bar(pci, 0);
 	if (!chip->mmio) {
 		printk(KERN_ERR "MMIO area remap failed.\n");
 		err = -ENOMEM;
diff --git a/sound/pci/bt87x.c b/sound/pci/bt87x.c
index 3aa8d973540a..1aa1c0402540 100644
--- a/sound/pci/bt87x.c
+++ b/sound/pci/bt87x.c
@@ -749,8 +749,7 @@ static int __devinit snd_bt87x_create(struct snd_card *card,
 		pci_disable_device(pci);
 		return err;
 	}
-	chip->mmio = ioremap_nocache(pci_resource_start(pci, 0),
-				     pci_resource_len(pci, 0));
+	chip->mmio = pci_ioremap_bar(pci, 0);
 	if (!chip->mmio) {
 		snd_printk(KERN_ERR "cannot remap io memory\n");
 		err = -ENOMEM;
diff --git a/sound/pci/cs4281.c b/sound/pci/cs4281.c
index ef9308f7c45b..192e7842e181 100644
--- a/sound/pci/cs4281.c
+++ b/sound/pci/cs4281.c
@@ -1382,8 +1382,8 @@ static int __devinit snd_cs4281_create(struct snd_card *card,
 	chip->ba0_addr = pci_resource_start(pci, 0);
 	chip->ba1_addr = pci_resource_start(pci, 1);
 
-	chip->ba0 = ioremap_nocache(chip->ba0_addr, pci_resource_len(pci, 0));
-	chip->ba1 = ioremap_nocache(chip->ba1_addr, pci_resource_len(pci, 1));
+	chip->ba0 = pci_ioremap_bar(pci, 0);
+	chip->ba1 = pci_ioremap_bar(pci, 1);
 	if (!chip->ba0 || !chip->ba1) {
 		snd_cs4281_free(chip);
 		return -ENOMEM;
diff --git a/sound/pci/cs5530.c b/sound/pci/cs5530.c
index 7ff8b68e997e..4d9378d81200 100644
--- a/sound/pci/cs5530.c
+++ b/sound/pci/cs5530.c
@@ -132,7 +132,7 @@ static int __devinit snd_cs5530_create(struct snd_card *card,
 	}
 	chip->pci_base = pci_resource_start(pci, 0);
 
-	mem = ioremap_nocache(chip->pci_base, pci_resource_len(pci, 0));
+	mem = pci_ioremap_bar(pci, 0);
 	if (mem == NULL) {
 		kfree(chip);
 		pci_disable_device(pci);
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 9f316c1b2790..f080f8ce0ecb 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2158,7 +2158,7 @@ static int __devinit azx_create(struct snd_card *card, struct pci_dev *pci,
 	}
 
 	chip->addr = pci_resource_start(pci, 0);
-	chip->remap_addr = ioremap_nocache(chip->addr, pci_resource_len(pci,0));
+	chip->remap_addr = pci_ioremap_bar(pci, 0);
 	if (chip->remap_addr == NULL) {
 		snd_printk(KERN_ERR SFX "ioremap error\n");
 		err = -ENXIO;
diff --git a/sound/pci/mixart/mixart.c b/sound/pci/mixart/mixart.c
index 2d0dce649a64..ae7601f353a7 100644
--- a/sound/pci/mixart/mixart.c
+++ b/sound/pci/mixart/mixart.c
@@ -1314,8 +1314,7 @@ static int __devinit snd_mixart_probe(struct pci_dev *pci,
 	}
 	for (i = 0; i < 2; i++) {
 		mgr->mem[i].phys = pci_resource_start(pci, i);
-		mgr->mem[i].virt = ioremap_nocache(mgr->mem[i].phys,
-						   pci_resource_len(pci, i));
+		mgr->mem[i].virt = pci_ioremap_bar(pci, i);
 		if (!mgr->mem[i].virt) {
 		        printk(KERN_ERR "unable to remap resource 0x%lx\n",
 			       mgr->mem[i].phys);
-- 
GitLab


From 2f1e593d4209d0194f9639c5d11aa91171435963 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Mon, 27 Oct 2008 15:21:19 +0000
Subject: [PATCH 893/895] sound: use a common working email address

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/oss/kahlua.c | 2 +-
 sound/pci/cs5530.c | 2 +-
 sound/sound_core.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/sound/oss/kahlua.c b/sound/oss/kahlua.c
index eb9bc365530d..c180598f1710 100644
--- a/sound/oss/kahlua.c
+++ b/sound/oss/kahlua.c
@@ -1,7 +1,7 @@
 /*
  *	Initialisation code for Cyrix/NatSemi VSA1 softaudio
  *
- *	(C) Copyright 2003 Red Hat Inc <alan@redhat.com>
+ *	(C) Copyright 2003 Red Hat Inc <alan@lxorguk.ukuu.org.uk>
  *
  * XpressAudio(tm) is used on the Cyrix MediaGX (now NatSemi Geode) systems.
  * The older version (VSA1) provides fairly good soundblaster emulation
diff --git a/sound/pci/cs5530.c b/sound/pci/cs5530.c
index 7ff8b68e997e..4ba2db2f4250 100644
--- a/sound/pci/cs5530.c
+++ b/sound/pci/cs5530.c
@@ -2,7 +2,7 @@
  * cs5530.c - Initialisation code for Cyrix/NatSemi VSA1 softaudio
  *
  * 	(C) Copyright 2007 Ash Willis <ashwillis@programmer.net>
- *	(C) Copyright 2003 Red Hat Inc <alan@redhat.com>
+ *	(C) Copyright 2003 Red Hat Inc <alan@lxorguk.ukuu.org.uk>
  *
  * This driver was ported (shamelessly ripped ;) from oss/kahlua.c but I did
  * mess with it a bit. The chip seems to have to have trouble with full duplex
diff --git a/sound/sound_core.c b/sound/sound_core.c
index 4ae07e236b36..ee66d43f848c 100644
--- a/sound/sound_core.c
+++ b/sound/sound_core.c
@@ -57,7 +57,7 @@ module_exit(cleanup_soundcore);
 /*
  *	OSS sound core handling. Breaks out sound functions to submodules
  *	
- *	Author:		Alan Cox <alan.cox@linux.org>
+ *	Author:		Alan Cox <alan@lxorguk.ukuu.org.uk>
  *
  *	Fixes:
  *
-- 
GitLab


From e044c39ae258678d6ebb09fccb2a0fdf7ec51847 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 27 Oct 2008 16:56:24 +0100
Subject: [PATCH 894/895] ALSA: hda - Restore default pin configs for realtek
 codecs

Some machines have broken BIOS resume that doesn't restore the default
pin configuration properly, which results in a wrong detection of HP
pin.  This causes a silent speaker output due to missing HP detection.
Related bug: Novell bug#406101
	https://bugzilla.novell.com/show_bug.cgi?id=406101

This patch fixes the issue by saving/restoring the default pin configs
by the driver itself.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 77 +++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index ef4955c73c88..4eceab9bd109 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -307,6 +307,13 @@ struct alc_spec {
 	/* for PLL fix */
 	hda_nid_t pll_nid;
 	unsigned int pll_coef_idx, pll_coef_bit;
+
+#ifdef SND_HDA_NEEDS_RESUME
+#define ALC_MAX_PINS	16
+	unsigned int num_pins;
+	hda_nid_t pin_nids[ALC_MAX_PINS];
+	unsigned int pin_cfgs[ALC_MAX_PINS];
+#endif
 };
 
 /*
@@ -2778,6 +2785,64 @@ static void alc_free(struct hda_codec *codec)
 	codec->spec = NULL; /* to be sure */
 }
 
+#ifdef SND_HDA_NEEDS_RESUME
+static void store_pin_configs(struct hda_codec *codec)
+{
+	struct alc_spec *spec = codec->spec;
+	hda_nid_t nid, end_nid;
+
+	end_nid = codec->start_nid + codec->num_nodes;
+	for (nid = codec->start_nid; nid < end_nid; nid++) {
+		unsigned int wid_caps = get_wcaps(codec, nid);
+		unsigned int wid_type =
+			(wid_caps & AC_WCAP_TYPE) >> AC_WCAP_TYPE_SHIFT;
+		if (wid_type != AC_WID_PIN)
+			continue;
+		if (spec->num_pins >= ARRAY_SIZE(spec->pin_nids))
+			break;
+		spec->pin_nids[spec->num_pins] = nid;
+		spec->pin_cfgs[spec->num_pins] =
+			snd_hda_codec_read(codec, nid, 0,
+					   AC_VERB_GET_CONFIG_DEFAULT, 0);
+		spec->num_pins++;
+	}
+}
+
+static void resume_pin_configs(struct hda_codec *codec)
+{
+	struct alc_spec *spec = codec->spec;
+	int i;
+
+	for (i = 0; i < spec->num_pins; i++) {
+		hda_nid_t pin_nid = spec->pin_nids[i];
+		unsigned int pin_config = spec->pin_cfgs[i];
+		snd_hda_codec_write(codec, pin_nid, 0,
+				    AC_VERB_SET_CONFIG_DEFAULT_BYTES_0,
+				    pin_config & 0x000000ff);
+		snd_hda_codec_write(codec, pin_nid, 0,
+				    AC_VERB_SET_CONFIG_DEFAULT_BYTES_1,
+				    (pin_config & 0x0000ff00) >> 8);
+		snd_hda_codec_write(codec, pin_nid, 0,
+				    AC_VERB_SET_CONFIG_DEFAULT_BYTES_2,
+				    (pin_config & 0x00ff0000) >> 16);
+		snd_hda_codec_write(codec, pin_nid, 0,
+				    AC_VERB_SET_CONFIG_DEFAULT_BYTES_3,
+				    pin_config >> 24);
+	}
+}
+
+static int alc_resume(struct hda_codec *codec)
+{
+	resume_pin_configs(codec);
+	codec->patch_ops.init(codec);
+	snd_hda_codec_resume_amp(codec);
+	snd_hda_codec_resume_cache(codec);
+	return 0;
+}
+#else
+#define store_pin_configs(codec)
+#endif
+
 /*
  */
 static struct hda_codec_ops alc_patch_ops = {
@@ -2786,6 +2851,9 @@ static struct hda_codec_ops alc_patch_ops = {
 	.init = alc_init,
 	.free = alc_free,
 	.unsol_event = alc_unsol_event,
+#ifdef SND_HDA_NEEDS_RESUME
+	.resume = alc_resume,
+#endif
 #ifdef CONFIG_SND_HDA_POWER_SAVE
 	.check_power_status = alc_check_power_status,
 #endif
@@ -3832,6 +3900,7 @@ static int alc880_parse_auto_config(struct hda_codec *codec)
 	spec->num_mux_defs = 1;
 	spec->input_mux = &spec->private_imux;
 
+	store_pin_configs(codec);
 	return 1;
 }
 
@@ -5250,6 +5319,7 @@ static int alc260_parse_auto_config(struct hda_codec *codec)
 	}
 	spec->num_mixers++;
 
+	store_pin_configs(codec);
 	return 1;
 }
 
@@ -10313,6 +10383,7 @@ static int alc262_parse_auto_config(struct hda_codec *codec)
 	if (err < 0)
 		return err;
 
+	store_pin_configs(codec);
 	return 1;
 }
 
@@ -11447,6 +11518,7 @@ static int alc268_parse_auto_config(struct hda_codec *codec)
 	if (err < 0)
 		return err;
 
+	store_pin_configs(codec);
 	return 1;
 }
 
@@ -12230,6 +12302,7 @@ static int alc269_parse_auto_config(struct hda_codec *codec)
 	spec->mixers[spec->num_mixers] = alc269_capture_mixer;
 	spec->num_mixers++;
 
+	store_pin_configs(codec);
 	return 1;
 }
 
@@ -13316,6 +13389,7 @@ static int alc861_parse_auto_config(struct hda_codec *codec)
 	spec->mixers[spec->num_mixers] = alc861_capture_mixer;
 	spec->num_mixers++;
 
+	store_pin_configs(codec);
 	return 1;
 }
 
@@ -14427,6 +14501,7 @@ static int alc861vd_parse_auto_config(struct hda_codec *codec)
 	if (err < 0)
 		return err;
 
+	store_pin_configs(codec);
 	return 1;
 }
 
@@ -16258,6 +16333,8 @@ static int alc662_parse_auto_config(struct hda_codec *codec)
 
 	spec->mixers[spec->num_mixers] = alc662_capture_mixer;
 	spec->num_mixers++;
+
+	store_pin_configs(codec);
 	return 1;
 }
 
-- 
GitLab


From c3e5203bed1999df716e3c7119f6749523eb952f Mon Sep 17 00:00:00 2001
From: Cliff Cai <cliff.cai@analog.com>
Date: Mon, 27 Oct 2008 17:09:25 +0800
Subject: [PATCH 895/895] ALSA: ASoC: Blackfin: update SPORT0 port selector
 (v2)

- Setting the TFS pin selector for SPORT 0 based on whether the selected
  port id F or G. If the port is F then no conflict should exist for the
  TFS. When Port G is selected and EMAC then there is a conflict between
  the PHY interrupt line and TFS.  Current settings prevent the conflict
  by ignoring the TFS pin when Port G is selected. This allows both
  ssm2602 using Port G and EMAC concurrently.

 - some code cleanup

Signed-off-by: Cliff Cai <cliff.cai@analog.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/soc/blackfin/bf5xx-i2s.c | 34 ++++++++++++++++++++++------------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/sound/soc/blackfin/bf5xx-i2s.c b/sound/soc/blackfin/bf5xx-i2s.c
index 827587f08180..e020c160ee44 100644
--- a/sound/soc/blackfin/bf5xx-i2s.c
+++ b/sound/soc/blackfin/bf5xx-i2s.c
@@ -70,12 +70,24 @@ static struct sport_param sport_params[2] = {
 	}
 };
 
-static u16 sport_req[][7] = {
-		{ P_SPORT0_DTPRI, P_SPORT0_TSCLK, P_SPORT0_RFS,
-		  P_SPORT0_DRPRI, P_SPORT0_RSCLK, 0},
-		{ P_SPORT1_DTPRI, P_SPORT1_TSCLK, P_SPORT1_RFS,
-		  P_SPORT1_DRPRI, P_SPORT1_RSCLK, 0},
-};
+/*
+ * Setting the TFS pin selector for SPORT 0 based on whether the selected
+ * port id F or G. If the port is F then no conflict should exist for the
+ * TFS. When Port G is selected and EMAC then there is a conflict between
+ * the PHY interrupt line and TFS.  Current settings prevent the conflict
+ * by ignoring the TFS pin when Port G is selected. This allows both
+ * ssm2602 using Port G and EMAC concurrently.
+ */
+#ifdef CONFIG_BF527_SPORT0_PORTF
+#define LOCAL_SPORT0_TFS (P_SPORT0_TFS)
+#else
+#define LOCAL_SPORT0_TFS (0)
+#endif
+
+static u16 sport_req[][7] = { {P_SPORT0_DTPRI, P_SPORT0_TSCLK, P_SPORT0_RFS,
+		P_SPORT0_DRPRI, P_SPORT0_RSCLK, LOCAL_SPORT0_TFS, 0},
+		{P_SPORT1_DTPRI, P_SPORT1_TSCLK, P_SPORT1_RFS, P_SPORT1_DRPRI,
+		P_SPORT1_RSCLK, P_SPORT1_TFS, 0} };
 
 static int bf5xx_i2s_set_dai_fmt(struct snd_soc_dai *cpu_dai,
 		unsigned int fmt)
@@ -98,23 +110,21 @@ static int bf5xx_i2s_set_dai_fmt(struct snd_soc_dai *cpu_dai,
 		ret = -EINVAL;
 		break;
 	default:
+		printk(KERN_ERR "%s: Unknown DAI format type\n", __func__);
 		ret = -EINVAL;
 		break;
 	}
 
 	switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
-	case SND_SOC_DAIFMT_CBS_CFS:
-		ret = -EINVAL;
-		break;
-	case SND_SOC_DAIFMT_CBM_CFS:
-		ret = -EINVAL;
-		break;
 	case SND_SOC_DAIFMT_CBM_CFM:
 		break;
+	case SND_SOC_DAIFMT_CBS_CFS:
+	case SND_SOC_DAIFMT_CBM_CFS:
 	case SND_SOC_DAIFMT_CBS_CFM:
 		ret = -EINVAL;
 		break;
 	default:
+		printk(KERN_ERR "%s: Unknown DAI master type\n", __func__);
 		ret = -EINVAL;
 		break;
 	}
-- 
GitLab