Commit 42cbd8ef authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'x86-amd-nb-for-linus' of...

Merge branch 'x86-amd-nb-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'x86-amd-nb-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86, cacheinfo: Cleanup L3 cache index disable support
  x86, amd-nb: Cleanup AMD northbridge caching code
  x86, amd-nb: Complete the rename of AMD NB and related code
parents dda5f0a3 f658bcfb
......@@ -1141,16 +1141,16 @@ config NUMA
comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI)
config K8_NUMA
config AMD_NUMA
def_bool y
prompt "Old style AMD Opteron NUMA detection"
depends on X86_64 && NUMA && PCI
---help---
Enable K8 NUMA node topology detection. You should say Y here if
you have a multi processor AMD K8 system. This uses an old
method to read the NUMA configuration directly from the builtin
Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA
instead, which also takes priority if both are compiled in.
Enable AMD NUMA node topology detection. You should say Y here if
you have a multi processor AMD system. This uses an old method to
read the NUMA configuration directly from the builtin Northbridge
of Opteron. It is recommended to use X86_64_ACPI_NUMA instead,
which also takes priority if both are compiled in.
config X86_64_ACPI_NUMA
def_bool y
......
......@@ -3,36 +3,53 @@
#include <linux/pci.h>
extern struct pci_device_id k8_nb_ids[];
extern struct pci_device_id amd_nb_misc_ids[];
struct bootnode;
extern int early_is_k8_nb(u32 value);
extern int cache_k8_northbridges(void);
extern void k8_flush_garts(void);
extern int k8_get_nodes(struct bootnode *nodes);
extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn);
extern int k8_scan_nodes(void);
extern int early_is_amd_nb(u32 value);
extern int amd_cache_northbridges(void);
extern void amd_flush_garts(void);
extern int amd_get_nodes(struct bootnode *nodes);
extern int amd_numa_init(unsigned long start_pfn, unsigned long end_pfn);
extern int amd_scan_nodes(void);
struct k8_northbridge_info {
struct amd_northbridge {
struct pci_dev *misc;
};
struct amd_northbridge_info {
u16 num;
u8 gart_supported;
struct pci_dev **nb_misc;
u64 flags;
struct amd_northbridge *nb;
};
extern struct k8_northbridge_info k8_northbridges;
extern struct amd_northbridge_info amd_northbridges;
#define AMD_NB_GART 0x1
#define AMD_NB_L3_INDEX_DISABLE 0x2
#ifdef CONFIG_AMD_NB
static inline struct pci_dev *node_to_k8_nb_misc(int node)
static inline int amd_nb_num(void)
{
return (node < k8_northbridges.num) ? k8_northbridges.nb_misc[node] : NULL;
return amd_northbridges.num;
}
#else
static inline int amd_nb_has_feature(int feature)
{
return ((amd_northbridges.flags & feature) == feature);
}
static inline struct pci_dev *node_to_k8_nb_misc(int node)
static inline struct amd_northbridge *node_to_amd_nb(int node)
{
return NULL;
return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL;
}
#else
#define amd_nb_num(x) 0
#define amd_nb_has_feature(x) false
#define node_to_amd_nb(x) NULL
#endif
......
......@@ -12,95 +12,116 @@
static u32 *flush_words;
struct pci_device_id k8_nb_ids[] = {
struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) },
{}
};
EXPORT_SYMBOL(k8_nb_ids);
EXPORT_SYMBOL(amd_nb_misc_ids);
struct k8_northbridge_info k8_northbridges;
EXPORT_SYMBOL(k8_northbridges);
struct amd_northbridge_info amd_northbridges;
EXPORT_SYMBOL(amd_northbridges);
static struct pci_dev *next_k8_northbridge(struct pci_dev *dev)
static struct pci_dev *next_northbridge(struct pci_dev *dev,
struct pci_device_id *ids)
{
do {
dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
if (!dev)
break;
} while (!pci_match_id(&k8_nb_ids[0], dev));
} while (!pci_match_id(ids, dev));
return dev;
}
int cache_k8_northbridges(void)
int amd_cache_northbridges(void)
{
int i;
struct pci_dev *dev;
int i = 0;
struct amd_northbridge *nb;
struct pci_dev *misc;
if (k8_northbridges.num)
if (amd_nb_num())
return 0;
dev = NULL;
while ((dev = next_k8_northbridge(dev)) != NULL)
k8_northbridges.num++;
misc = NULL;
while ((misc = next_northbridge(misc, amd_nb_misc_ids)) != NULL)
i++;
/* some CPU families (e.g. family 0x11) do not support GART */
if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
boot_cpu_data.x86 == 0x15)
k8_northbridges.gart_supported = 1;
if (i == 0)
return 0;
k8_northbridges.nb_misc = kmalloc((k8_northbridges.num + 1) *
sizeof(void *), GFP_KERNEL);
if (!k8_northbridges.nb_misc)
nb = kzalloc(i * sizeof(struct amd_northbridge), GFP_KERNEL);
if (!nb)
return -ENOMEM;
if (!k8_northbridges.num) {
k8_northbridges.nb_misc[0] = NULL;
return 0;
}
amd_northbridges.nb = nb;
amd_northbridges.num = i;
if (k8_northbridges.gart_supported) {
flush_words = kmalloc(k8_northbridges.num * sizeof(u32),
GFP_KERNEL);
if (!flush_words) {
kfree(k8_northbridges.nb_misc);
return -ENOMEM;
}
}
misc = NULL;
for (i = 0; i != amd_nb_num(); i++) {
node_to_amd_nb(i)->misc = misc =
next_northbridge(misc, amd_nb_misc_ids);
}
/* some CPU families (e.g. family 0x11) do not support GART */
if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
boot_cpu_data.x86 == 0x15)
amd_northbridges.flags |= AMD_NB_GART;
/*
* Some CPU families support L3 Cache Index Disable. There are some
* limitations because of E382 and E388 on family 0x10.
*/
if (boot_cpu_data.x86 == 0x10 &&
boot_cpu_data.x86_model >= 0x8 &&
(boot_cpu_data.x86_model > 0x9 ||
boot_cpu_data.x86_mask >= 0x1))
amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
dev = NULL;
i = 0;
while ((dev = next_k8_northbridge(dev)) != NULL) {
k8_northbridges.nb_misc[i] = dev;
if (k8_northbridges.gart_supported)
pci_read_config_dword(dev, 0x9c, &flush_words[i++]);
}
k8_northbridges.nb_misc[i] = NULL;
return 0;
}
EXPORT_SYMBOL_GPL(cache_k8_northbridges);
EXPORT_SYMBOL_GPL(amd_cache_northbridges);
/* Ignores subdevice/subvendor but as far as I can figure out
they're useless anyways */
int __init early_is_k8_nb(u32 device)
int __init early_is_amd_nb(u32 device)
{
struct pci_device_id *id;
u32 vendor = device & 0xffff;
device >>= 16;
for (id = k8_nb_ids; id->vendor; id++)
for (id = amd_nb_misc_ids; id->vendor; id++)
if (vendor == id->vendor && device == id->device)
return 1;
return 0;
}
void k8_flush_garts(void)
int amd_cache_gart(void)
{
int i;
if (!amd_nb_has_feature(AMD_NB_GART))
return 0;
flush_words = kmalloc(amd_nb_num() * sizeof(u32), GFP_KERNEL);
if (!flush_words) {
amd_northbridges.flags &= ~AMD_NB_GART;
return -ENOMEM;
}
for (i = 0; i != amd_nb_num(); i++)
pci_read_config_dword(node_to_amd_nb(i)->misc, 0x9c,
&flush_words[i]);
return 0;
}
void amd_flush_garts(void)
{
int flushed, i;
unsigned long flags;
static DEFINE_SPINLOCK(gart_lock);
if (!k8_northbridges.gart_supported)
if (!amd_nb_has_feature(AMD_NB_GART))
return;
/* Avoid races between AGP and IOMMU. In theory it's not needed
......@@ -109,16 +130,16 @@ void k8_flush_garts(void)
that it doesn't matter to serialize more. -AK */
spin_lock_irqsave(&gart_lock, flags);
flushed = 0;
for (i = 0; i < k8_northbridges.num; i++) {
pci_write_config_dword(k8_northbridges.nb_misc[i], 0x9c,
flush_words[i]|1);
for (i = 0; i < amd_nb_num(); i++) {
pci_write_config_dword(node_to_amd_nb(i)->misc, 0x9c,
flush_words[i] | 1);
flushed++;
}
for (i = 0; i < k8_northbridges.num; i++) {
for (i = 0; i < amd_nb_num(); i++) {
u32 w;
/* Make sure the hardware actually executed the flush*/
for (;;) {
pci_read_config_dword(k8_northbridges.nb_misc[i],
pci_read_config_dword(node_to_amd_nb(i)->misc,
0x9c, &w);
if (!(w & 1))
break;
......@@ -129,19 +150,23 @@ void k8_flush_garts(void)
if (!flushed)
printk("nothing to flush?\n");
}
EXPORT_SYMBOL_GPL(k8_flush_garts);
EXPORT_SYMBOL_GPL(amd_flush_garts);
static __init int init_k8_nbs(void)
static __init int init_amd_nbs(void)
{
int err = 0;
err = cache_k8_northbridges();
err = amd_cache_northbridges();
if (err < 0)
printk(KERN_NOTICE "K8 NB: Cannot enumerate AMD northbridges.\n");
printk(KERN_NOTICE "AMD NB: Cannot enumerate AMD northbridges.\n");
if (amd_cache_gart() < 0)
printk(KERN_NOTICE "AMD NB: Cannot initialize GART flush words, "
"GART support disabled.\n");
return err;
}
/* This has to go after the PCI subsystem */
fs_initcall(init_k8_nbs);
fs_initcall(init_amd_nbs);
......@@ -206,7 +206,7 @@ static u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order)
* Do an PCI bus scan by hand because we're running before the PCI
* subsystem.
*
* All K8 AGP bridges are AGPv3 compliant, so we can do this scan
* All AMD AGP bridges are AGPv3 compliant, so we can do this scan
* generically. It's probably overkill to always scan all slots because
* the AGP bridges should be always an own bus on the HT hierarchy,
* but do it here for future safety.
......@@ -303,7 +303,7 @@ void __init early_gart_iommu_check(void)
dev_limit = bus_dev_ranges[i].dev_limit;
for (slot = dev_base; slot < dev_limit; slot++) {
if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
continue;
ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
......@@ -358,7 +358,7 @@ void __init early_gart_iommu_check(void)
dev_limit = bus_dev_ranges[i].dev_limit;
for (slot = dev_base; slot < dev_limit; slot++) {
if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
continue;
ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
......@@ -400,7 +400,7 @@ int __init gart_iommu_hole_init(void)
dev_limit = bus_dev_ranges[i].dev_limit;
for (slot = dev_base; slot < dev_limit; slot++) {
if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
continue;
iommu_detected = 1;
......@@ -518,7 +518,7 @@ out:
dev_base = bus_dev_ranges[i].dev_base;
dev_limit = bus_dev_ranges[i].dev_limit;
for (slot = dev_base; slot < dev_limit; slot++) {
if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
continue;
write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl);
......
......@@ -149,8 +149,7 @@ union _cpuid4_leaf_ecx {
};
struct amd_l3_cache {
struct pci_dev *dev;
bool can_disable;
struct amd_northbridge *nb;
unsigned indices;
u8 subcaches[4];
};
......@@ -311,14 +310,12 @@ struct _cache_attr {
/*
* L3 cache descriptors
*/
static struct amd_l3_cache **__cpuinitdata l3_caches;
static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
{
unsigned int sc0, sc1, sc2, sc3;
u32 val = 0;
pci_read_config_dword(l3->dev, 0x1C4, &val);
pci_read_config_dword(l3->nb->misc, 0x1C4, &val);
/* calculate subcache sizes */
l3->subcaches[0] = sc0 = !(val & BIT(0));
......@@ -330,47 +327,14 @@ static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
}
static struct amd_l3_cache * __cpuinit amd_init_l3_cache(int node)
{
struct amd_l3_cache *l3;
struct pci_dev *dev = node_to_k8_nb_misc(node);
l3 = kzalloc(sizeof(struct amd_l3_cache), GFP_ATOMIC);
if (!l3) {
printk(KERN_WARNING "Error allocating L3 struct\n");
return NULL;
}
l3->dev = dev;
amd_calc_l3_indices(l3);
return l3;
}
static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
int index)
static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf,
int index)
{
static struct amd_l3_cache *__cpuinitdata l3_caches;
int node;
if (boot_cpu_data.x86 != 0x10)
return;
if (index < 3)
return;
/* see errata #382 and #388 */
if (boot_cpu_data.x86_model < 0x8)
return;
if ((boot_cpu_data.x86_model == 0x8 ||
boot_cpu_data.x86_model == 0x9)
&&
boot_cpu_data.x86_mask < 0x1)
return;
/* not in virtualized environments */
if (k8_northbridges.num == 0)
/* only for L3, and not in virtualized environments */
if (index < 3 || amd_nb_num() == 0)
return;
/*
......@@ -378,7 +342,7 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
* never freed but this is done only on shutdown so it doesn't matter.
*/
if (!l3_caches) {
int size = k8_northbridges.num * sizeof(struct amd_l3_cache *);
int size = amd_nb_num() * sizeof(struct amd_l3_cache);
l3_caches = kzalloc(size, GFP_ATOMIC);
if (!l3_caches)
......@@ -387,14 +351,12 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
node = amd_get_nb_id(smp_processor_id());
if (!l3_caches[node]) {
l3_caches[node] = amd_init_l3_cache(node);
l3_caches[node]->can_disable = true;
if (!l3_caches[node].nb) {
l3_caches[node].nb = node_to_amd_nb(node);
amd_calc_l3_indices(&l3_caches[node]);
}
WARN_ON(!l3_caches[node]);
this_leaf->l3 = l3_caches[node];
this_leaf->l3 = &l3_caches[node];
}
/*
......@@ -408,7 +370,7 @@ int amd_get_l3_disable_slot(struct amd_l3_cache *l3, unsigned slot)
{
unsigned int reg = 0;
pci_read_config_dword(l3->dev, 0x1BC + slot * 4, &reg);
pci_read_config_dword(l3->nb->misc, 0x1BC + slot * 4, &reg);
/* check whether this slot is activated already */
if (reg & (3UL << 30))
......@@ -422,7 +384,8 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
{
int index;
if (!this_leaf->l3 || !this_leaf->l3->can_disable)
if (!this_leaf->l3 ||
!amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
return -EINVAL;
index = amd_get_l3_disable_slot(this_leaf->l3, slot);
......@@ -457,7 +420,7 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
if (!l3->subcaches[i])
continue;
pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg);
pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
/*
* We need to WBINVD on a core on the node containing the L3
......@@ -467,7 +430,7 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
wbinvd_on_cpu(cpu);
reg |= BIT(31);
pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg);
pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
}
}
......@@ -524,7 +487,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (!this_leaf->l3 || !this_leaf->l3->can_disable)
if (!this_leaf->l3 ||
!amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
return -EINVAL;
cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
......@@ -545,7 +509,7 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
#define STORE_CACHE_DISABLE(slot) \
static ssize_t \
store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \
const char *buf, size_t count) \
const char *buf, size_t count) \
{ \
return store_cache_disable(this_leaf, buf, count, slot); \
}
......@@ -558,10 +522,7 @@ static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
show_cache_disable_1, store_cache_disable_1);
#else /* CONFIG_AMD_NB */
static void __cpuinit
amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index)
{
};
#define amd_init_l3_cache(x, y)
#endif /* CONFIG_AMD_NB */
static int
......@@ -575,7 +536,7 @@ __cpuinit cpuid4_cache_lookup_regs(int index,
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
amd_cpuid4(index, &eax, &ebx, &ecx);
amd_check_l3_disable(this_leaf, index);
amd_init_l3_cache(this_leaf, index);
} else {
cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
}
......@@ -983,30 +944,48 @@ define_one_ro(size);
define_one_ro(shared_cpu_map);
define_one_ro(shared_cpu_list);
#define DEFAULT_SYSFS_CACHE_ATTRS \
&type.attr, \
&level.attr, \
&coherency_line_size.attr, \
&physical_line_partition.attr, \
&ways_of_associativity.attr, \
&number_of_sets.attr, \
&size.attr, \
&shared_cpu_map.attr, \
&shared_cpu_list.attr
static struct attribute *default_attrs[] = {
DEFAULT_SYSFS_CACHE_ATTRS,
&type.attr,
&level.attr,
&coherency_line_size.attr,
&physical_line_partition.attr,
&ways_of_associativity.attr,
&number_of_sets.attr,
&size.attr,
&shared_cpu_map.attr,
&shared_cpu_list.attr,
NULL
};
static struct attribute *default_l3_attrs[] = {
DEFAULT_SYSFS_CACHE_ATTRS,
#ifdef CONFIG_AMD_NB
&cache_disable_0.attr,
&cache_disable_1.attr,
static struct attribute ** __cpuinit amd_l3_attrs(void)
{
static struct attribute **attrs;
int n;
if (attrs)
return attrs;
n = sizeof (default_attrs) / sizeof (struct attribute *);
if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
n += 2;
attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
if (attrs == NULL)
return attrs = default_attrs;
for (n = 0; default_attrs[n]; n++)
attrs[n] = default_attrs[n];
if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
attrs[n++] = &cache_disable_0.attr;
attrs[n++] = &cache_disable_1.attr;
}
return attrs;
}
#endif
NULL
};
static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
{
......@@ -1117,11 +1096,11 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
this_leaf = CPUID4_INFO_IDX(cpu, i);
if (this_leaf->l3 && this_leaf->l3->can_disable)
ktype_cache.default_attrs = default_l3_attrs;
else
ktype_cache.default_attrs = default_attrs;
ktype_cache.default_attrs = default_attrs;
#ifdef CONFIG_AMD_NB
if (this_leaf->l3)
ktype_cache.default_attrs = amd_l3_attrs();
#endif
retval = kobject_init_and_add(&(this_object->kobj),
&ktype_cache,
per_cpu(ici_cache_kobject, cpu),
......
......@@ -143,7 +143,7 @@ static void flush_gart(void)