diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index c0fca2c1c858372493675695f9a5341200c139ae..d60e7195b7dda04dccf95d4f2284e120f5ea3c52 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -720,9 +720,6 @@ int ia64_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size)
 	return ret;
 }
 
-/* It's defined in drivers/pci/pci.c */
-extern u8 pci_cache_line_size;
-
 /**
  * set_pci_cacheline_size - determine cacheline size for PCI devices
  *
@@ -731,7 +728,7 @@ extern u8 pci_cache_line_size;
  *
  * Code mostly taken from arch/ia64/kernel/palinfo.c:cache_info().
  */
-static void __init set_pci_cacheline_size(void)
+static void __init set_pci_dfl_cacheline_size(void)
 {
 	unsigned long levels, unique_caches;
 	long status;
@@ -751,7 +748,7 @@ static void __init set_pci_cacheline_size(void)
 			"(status=%ld)\n", __func__, status);
 		return;
 	}
-	pci_cache_line_size = (1 << cci.pcci_line_size) / 4;
+	pci_dfl_cache_line_size = (1 << cci.pcci_line_size) / 4;
 }
 
 u64 ia64_dma_get_required_mask(struct device *dev)
@@ -782,7 +779,7 @@ EXPORT_SYMBOL_GPL(dma_get_required_mask);
 
 static int __init pcibios_init(void)
 {
-	set_pci_cacheline_size();
+	set_pci_dfl_cacheline_size();
 	return 0;
 }
 
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 1331fcf261433b72c9d0a4620326a3898f1761fa..fbeec31316cf84d13a6c60ecd4a65c802244b93e 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -410,8 +410,6 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum)
 	return bus;
 }
 
-extern u8 pci_cache_line_size;
-
 int __init pcibios_init(void)
 {
 	struct cpuinfo_x86 *c = &boot_cpu_data;
@@ -426,11 +424,11 @@ int __init pcibios_init(void)
 	 * and P4. It's also good for 386/486s (which actually have 16)
 	 * as quite a few PCI devices do not support smaller values.
 	 */
-	pci_cache_line_size = 32 >> 2;
+	pci_dfl_cache_line_size = 32 >> 2;
 	if (c->x86 >= 6 && c->x86_vendor == X86_VENDOR_AMD)
-		pci_cache_line_size = 64 >> 2;	/* K7 & K8 */
+		pci_dfl_cache_line_size = 64 >> 2;	/* K7 & K8 */
 	else if (c->x86 > 6 && c->x86_vendor == X86_VENDOR_INTEL)
-		pci_cache_line_size = 128 >> 2;	/* P4 */
+		pci_dfl_cache_line_size = 128 >> 2;	/* P4 */
 
 	pcibios_resource_survey();
 
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 4e4c295a049f7bfba524f24efe8379530e3dc906..1f9a7a03847b9f21e19db7c44f62bcc12fd64731 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -47,6 +47,19 @@ unsigned long pci_cardbus_mem_size = DEFAULT_CARDBUS_MEM_SIZE;
 unsigned long pci_hotplug_io_size  = DEFAULT_HOTPLUG_IO_SIZE;
 unsigned long pci_hotplug_mem_size = DEFAULT_HOTPLUG_MEM_SIZE;
 
+#ifndef PCI_CACHE_LINE_BYTES
+#define PCI_CACHE_LINE_BYTES L1_CACHE_BYTES
+#endif
+
+/*
+ * The default CLS is used if arch didn't set CLS explicitly and not
+ * all pci devices agree on the same value.  Arch can override either
+ * the dfl or actual value as it sees fit.  Don't forget this is
+ * measured in 32-bit words, not bytes.
+ */
+u8 pci_dfl_cache_line_size __initdata = PCI_CACHE_LINE_BYTES >> 2;
+u8 pci_cache_line_size;
+
 /**
  * pci_bus_max_busnr - returns maximum PCI bus number of given bus' children
  * @bus: pointer to PCI bus structure to search
@@ -1883,14 +1896,6 @@ void pci_clear_mwi(struct pci_dev *dev)
 
 #else
 
-#ifndef PCI_CACHE_LINE_BYTES
-#define PCI_CACHE_LINE_BYTES L1_CACHE_BYTES
-#endif
-
-/* This can be overridden by arch code. */
-/* Don't forget this is measured in 32-bit words, not bytes */
-u8 pci_cache_line_size = PCI_CACHE_LINE_BYTES / 4;
-
 /**
  * pci_set_cacheline_size - ensure the CACHE_LINE_SIZE register is programmed
  * @dev: the PCI device for which MWI is to be enabled
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 245d2cdb47651d4d095edd847417b5251565fc69..1812ae7698de5f6d5f65a1a676d4ffcbf8de9dd4 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -2595,9 +2595,37 @@ void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev)
 static int __init pci_apply_final_quirks(void)
 {
 	struct pci_dev *dev = NULL;
+	u8 cls = 0;
+	u8 tmp;
+
+	if (pci_cache_line_size)
+		printk(KERN_DEBUG "PCI: CLS %u bytes\n",
+		       pci_cache_line_size << 2);
 
 	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
 		pci_fixup_device(pci_fixup_final, dev);
+		/*
+		 * If arch hasn't set it explicitly yet, use the CLS
+		 * value shared by all PCI devices.  If there's a
+		 * mismatch, fall back to the default value.
+		 */
+		if (!pci_cache_line_size) {
+			pci_read_config_byte(dev, PCI_CACHE_LINE_SIZE, &tmp);
+			if (!cls)
+				cls = tmp;
+			if (!tmp || cls == tmp)
+				continue;
+
+			printk(KERN_DEBUG "PCI: CLS mismatch (%u != %u), "
+			       "using %u bytes\n", cls << 2, tmp << 2,
+			       pci_dfl_cache_line_size << 2);
+			pci_cache_line_size = pci_dfl_cache_line_size;
+		}
+	}
+	if (!pci_cache_line_size) {
+		printk(KERN_DEBUG "PCI: CLS %u bytes, default %u\n",
+		       cls << 2, pci_dfl_cache_line_size << 2);
+		pci_cache_line_size = cls;
 	}
 
 	return 0;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index f5c7cd343e56ae85dafe8a660a280054a53bd433..b849861d78e65b7acd73cb9a71b1f33c9e60750c 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1246,6 +1246,8 @@ extern int pci_pci_problems;
 
 extern unsigned long pci_cardbus_io_size;
 extern unsigned long pci_cardbus_mem_size;
+extern u8 pci_dfl_cache_line_size;
+extern u8 pci_cache_line_size;
 
 extern unsigned long pci_hotplug_io_size;
 extern unsigned long pci_hotplug_mem_size;