Commit 79ba0db6 authored by Len Brown's avatar Len Brown
Browse files

Merge branches 'einj', 'intel_idle', 'misc', 'srat' and 'turbostat-ivb' into release

......@@ -47,20 +47,53 @@ directory apei/einj. The following files are provided.
- param1
This file is used to set the first error parameter value. Effect of
parameter depends on error_type specified. For memory error, this is
physical memory address. Only available if param_extension module
parameter is specified.
parameter depends on error_type specified.
- param2
This file is used to set the second error parameter value. Effect of
parameter depends on error_type specified. For memory error, this is
physical memory address mask. Only available if param_extension
module parameter is specified.
parameter depends on error_type specified.
BIOS versions based in the ACPI 4.0 specification have limited options
to control where the errors are injected. Your BIOS may support an
extension (enabled with the param_extension=1 module parameter, or
boot command line einj.param_extension=1). This allows the address
and mask for memory injections to be specified by the param1 and
param2 files in apei/einj.
BIOS versions using the ACPI 5.0 specification have more control over
the target of the injection. For processor related errors (type 0x1,
0x2 and 0x4) the APICID of the target should be provided using the
param1 file in apei/einj. For memory errors (type 0x8, 0x10 and 0x20)
the address is set using param1 with a mask in param2 (0x0 is equivalent
to all ones). For PCI express errors (type 0x40, 0x80 and 0x100) the
segment, bus, device and function are specified using param1:
31 24 23 16 15 11 10 8 7 0
+-------------------------------------------------+
| segment | bus | device | function | reserved |
+-------------------------------------------------+
An ACPI 5.0 BIOS may also allow vendor specific errors to be injected.
In this case a file named vendor will contain identifying information
from the BIOS that hopefully will allow an application wishing to use
the vendor specific extension to tell that they are running on a BIOS
that supports it. All vendor extensions have the 0x80000000 bit set in
error_type. A file vendor_flags controls the interpretation of param1
and param2 (1 = PROCESSOR, 2 = MEMORY, 4 = PCI). See your BIOS vendor
documentation for details (and expect changes to this API if vendors
creativity in using this feature expands beyond our expectations).
Example:
# cd /sys/kernel/debug/apei/einj
# cat available_error_type # See which errors can be injected
0x00000002 Processor Uncorrectable non-fatal
0x00000008 Memory Correctable
0x00000010 Memory Uncorrectable non-fatal
# echo 0x12345000 > param1 # Set memory address for injection
# echo 0xfffffffffffff000 > param2 # Mask - anywhere in this page
# echo 0x8 > error_type # Choose correctable memory error
# echo 1 > error_inject # Inject now
Injecting parameter support is a BIOS version specific extension, that
is, it only works on some BIOS version. If you want to use it, please
make sure your BIOS version has the proper support and specify
"param_extension=y" in module parameter.
For more information about EINJ, please refer to ACPI specification
version 4.0, section 17.5.
version 4.0, section 17.5 and ACPI 5.0, section 18.6.
......@@ -1035,6 +1035,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
By default, super page will be supported if Intel IOMMU
has the capability. With this option, super page will
not be supported.
intel_idle.max_cstate= [KNL,HW,ACPI,X86]
0 disables intel_idle and fall back on acpi_idle.
1 to 6 specify maximum depth of C-state.
intremap= [X86-64, Intel-IOMMU]
on enable Interrupt Remapping (default)
off disable Interrupt Remapping
......
......@@ -429,22 +429,24 @@ static u32 __devinitdata pxm_flag[PXM_FLAG_LEN];
static struct acpi_table_slit __initdata *slit_table;
cpumask_t early_cpu_possible_map = CPU_MASK_NONE;
static int get_processor_proximity_domain(struct acpi_srat_cpu_affinity *pa)
static int __init
get_processor_proximity_domain(struct acpi_srat_cpu_affinity *pa)
{
int pxm;
pxm = pa->proximity_domain_lo;
if (ia64_platform_is("sn2"))
if (ia64_platform_is("sn2") || acpi_srat_revision >= 2)
pxm += pa->proximity_domain_hi[0] << 8;
return pxm;
}
static int get_memory_proximity_domain(struct acpi_srat_mem_affinity *ma)
static int __init
get_memory_proximity_domain(struct acpi_srat_mem_affinity *ma)
{
int pxm;
pxm = ma->proximity_domain;
if (!ia64_platform_is("sn2"))
if (!ia64_platform_is("sn2") && acpi_srat_revision <= 1)
pxm &= 0xff;
return pxm;
......
......@@ -104,6 +104,8 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
return;
pxm = pa->proximity_domain_lo;
if (acpi_srat_revision >= 2)
pxm |= *((unsigned int*)pa->proximity_domain_hi) << 8;
node = setup_node(pxm);
if (node < 0) {
printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
......@@ -155,6 +157,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
start = ma->base_address;
end = start + ma->length;
pxm = ma->proximity_domain;
if (acpi_srat_revision <= 1)
pxm &= 0xff;
node = setup_node(pxm);
if (node < 0) {
printk(KERN_ERR "SRAT: Too many proximity domains.\n");
......
......@@ -42,6 +42,42 @@
/* Firmware should respond within 1 milliseconds */
#define FIRMWARE_TIMEOUT (1 * NSEC_PER_MSEC)
/*
* ACPI version 5 provides a SET_ERROR_TYPE_WITH_ADDRESS action.
*/
static int acpi5;
struct set_error_type_with_address {
u32 type;
u32 vendor_extension;
u32 flags;
u32 apicid;
u64 memory_address;
u64 memory_address_range;
u32 pcie_sbdf;
};
enum {
SETWA_FLAGS_APICID = 1,
SETWA_FLAGS_MEM = 2,
SETWA_FLAGS_PCIE_SBDF = 4,
};
/*
* Vendor extensions for platform specific operations
*/
struct vendor_error_type_extension {
u32 length;
u32 pcie_sbdf;
u16 vendor_id;
u16 device_id;
u8 rev_id;
u8 reserved[3];
};
static u32 vendor_flags;
static struct debugfs_blob_wrapper vendor_blob;
static char vendor_dev[64];
/*
* Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the
* EINJ table through an unpublished extension. Use with caution as
......@@ -103,7 +139,14 @@ static struct apei_exec_ins_type einj_ins_type[] = {
*/
static DEFINE_MUTEX(einj_mutex);
static struct einj_parameter *einj_param;
static void *einj_param;
#ifndef readq
static inline __u64 readq(volatile void __iomem *addr)
{
return ((__u64)readl(addr+4) << 32) + readl(addr);
}
#endif
#ifndef writeq
static inline void writeq(__u64 val, volatile void __iomem *addr)
......@@ -158,10 +201,31 @@ static int einj_timedout(u64 *t)
return 0;
}
static u64 einj_get_parameter_address(void)
static void check_vendor_extension(u64 paddr,
struct set_error_type_with_address *v5param)
{
int offset = readl(&v5param->vendor_extension);
struct vendor_error_type_extension *v;
u32 sbdf;
if (!offset)
return;
v = ioremap(paddr + offset, sizeof(*v));
if (!v)
return;
sbdf = readl(&v->pcie_sbdf);
sprintf(vendor_dev, "%x:%x:%x.%x vendor_id=%x device_id=%x rev_id=%x\n",
sbdf >> 24, (sbdf >> 16) & 0xff,
(sbdf >> 11) & 0x1f, (sbdf >> 8) & 0x7,
readw(&v->vendor_id), readw(&v->device_id),
readb(&v->rev_id));
iounmap(v);
}
static void *einj_get_parameter_address(void)
{
int i;
u64 paddr = 0;
u64 paddrv4 = 0, paddrv5 = 0;
struct acpi_whea_header *entry;
entry = EINJ_TAB_ENTRY(einj_tab);
......@@ -170,12 +234,40 @@ static u64 einj_get_parameter_address(void)
entry->instruction == ACPI_EINJ_WRITE_REGISTER &&
entry->register_region.space_id ==
ACPI_ADR_SPACE_SYSTEM_MEMORY)
memcpy(&paddr, &entry->register_region.address,
sizeof(paddr));
memcpy(&paddrv4, &entry->register_region.address,
sizeof(paddrv4));
if (entry->action == ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS &&
entry->instruction == ACPI_EINJ_WRITE_REGISTER &&
entry->register_region.space_id ==
ACPI_ADR_SPACE_SYSTEM_MEMORY)
memcpy(&paddrv5, &entry->register_region.address,
sizeof(paddrv5));
entry++;
}
if (paddrv5) {
struct set_error_type_with_address *v5param;
v5param = ioremap(paddrv5, sizeof(*v5param));
if (v5param) {
acpi5 = 1;
check_vendor_extension(paddrv5, v5param);
return v5param;
}
}
if (paddrv4) {
struct einj_parameter *v4param;
v4param = ioremap(paddrv4, sizeof(*v4param));
if (!v4param)
return 0;
if (readq(&v4param->reserved1) || readq(&v4param->reserved2)) {
iounmap(v4param);
return 0;
}
return v4param;
}
return paddr;
return 0;
}
/* do sanity check to trigger table */
......@@ -340,12 +432,56 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2)
if (rc)
return rc;
apei_exec_ctx_set_input(&ctx, type);
rc = apei_exec_run(&ctx, ACPI_EINJ_SET_ERROR_TYPE);
if (rc)
return rc;
if (einj_param) {
writeq(param1, &einj_param->param1);
writeq(param2, &einj_param->param2);
if (acpi5) {
struct set_error_type_with_address *v5param = einj_param;
writel(type, &v5param->type);
if (type & 0x80000000) {
switch (vendor_flags) {
case SETWA_FLAGS_APICID:
writel(param1, &v5param->apicid);
break;
case SETWA_FLAGS_MEM:
writeq(param1, &v5param->memory_address);
writeq(param2, &v5param->memory_address_range);
break;
case SETWA_FLAGS_PCIE_SBDF:
writel(param1, &v5param->pcie_sbdf);
break;
}
writel(vendor_flags, &v5param->flags);
} else {
switch (type) {
case ACPI_EINJ_PROCESSOR_CORRECTABLE:
case ACPI_EINJ_PROCESSOR_UNCORRECTABLE:
case ACPI_EINJ_PROCESSOR_FATAL:
writel(param1, &v5param->apicid);
writel(SETWA_FLAGS_APICID, &v5param->flags);
break;
case ACPI_EINJ_MEMORY_CORRECTABLE:
case ACPI_EINJ_MEMORY_UNCORRECTABLE:
case ACPI_EINJ_MEMORY_FATAL:
writeq(param1, &v5param->memory_address);
writeq(param2, &v5param->memory_address_range);
writel(SETWA_FLAGS_MEM, &v5param->flags);
break;
case ACPI_EINJ_PCIX_CORRECTABLE:
case ACPI_EINJ_PCIX_UNCORRECTABLE:
case ACPI_EINJ_PCIX_FATAL:
writel(param1, &v5param->pcie_sbdf);
writel(SETWA_FLAGS_PCIE_SBDF, &v5param->flags);
break;
}
}
} else {
rc = apei_exec_run(&ctx, ACPI_EINJ_SET_ERROR_TYPE);
if (rc)
return rc;
if (einj_param) {
struct einj_parameter *v4param = einj_param;
writeq(param1, &v4param->param1);
writeq(param2, &v4param->param2);
}
}
rc = apei_exec_run(&ctx, ACPI_EINJ_EXECUTE_OPERATION);
if (rc)
......@@ -455,15 +591,25 @@ static int error_type_set(void *data, u64 val)
{
int rc;
u32 available_error_type = 0;
u32 tval, vendor;
/*
* Vendor defined types have 0x80000000 bit set, and
* are not enumerated by ACPI_EINJ_GET_ERROR_TYPE
*/
vendor = val & 0x80000000;
tval = val & 0x7fffffff;
/* Only one error type can be specified */
if (val & (val - 1))
return -EINVAL;
rc = einj_get_available_error_type(&available_error_type);
if (rc)
return rc;
if (!(val & available_error_type))
if (tval & (tval - 1))
return -EINVAL;
if (!vendor) {
rc = einj_get_available_error_type(&available_error_type);
if (rc)
return rc;
if (!(val & available_error_type))
return -EINVAL;
}
error_type = val;
return 0;
......@@ -502,7 +648,6 @@ static int einj_check_table(struct acpi_table_einj *einj_tab)
static int __init einj_init(void)
{
int rc;
u64 param_paddr;
acpi_status status;
struct dentry *fentry;
struct apei_exec_context ctx;
......@@ -555,23 +700,30 @@ static int __init einj_init(void)
rc = apei_exec_pre_map_gars(&ctx);
if (rc)
goto err_release;
if (param_extension) {
param_paddr = einj_get_parameter_address();
if (param_paddr) {
einj_param = ioremap(param_paddr, sizeof(*einj_param));
rc = -ENOMEM;
if (!einj_param)
goto err_unmap;
fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR,
einj_debug_dir, &error_param1);
if (!fentry)
goto err_unmap;
fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR,
einj_debug_dir, &error_param2);
if (!fentry)
goto err_unmap;
} else
pr_warn(EINJ_PFX "Parameter extension is not supported.\n");
einj_param = einj_get_parameter_address();
if ((param_extension || acpi5) && einj_param) {
fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR,
einj_debug_dir, &error_param1);
if (!fentry)
goto err_unmap;
fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR,
einj_debug_dir, &error_param2);
if (!fentry)
goto err_unmap;
}
if (vendor_dev[0]) {
vendor_blob.data = vendor_dev;
vendor_blob.size = strlen(vendor_dev);
fentry = debugfs_create_blob("vendor", S_IRUSR,
einj_debug_dir, &vendor_blob);
if (!fentry)
goto err_unmap;
fentry = debugfs_create_x32("vendor_flags", S_IRUSR | S_IWUSR,
einj_debug_dir, &vendor_flags);
if (!fentry)
goto err_unmap;
}
pr_info(EINJ_PFX "Error INJection is initialized.\n");
......
......@@ -45,6 +45,8 @@ static int pxm_to_node_map[MAX_PXM_DOMAINS]
static int node_to_pxm_map[MAX_NUMNODES]
= { [0 ... MAX_NUMNODES - 1] = PXM_INVAL };
unsigned char acpi_srat_revision __initdata;
int pxm_to_node(int pxm)
{
if (pxm < 0)
......@@ -255,9 +257,13 @@ acpi_parse_memory_affinity(struct acpi_subtable_header * header,
static int __init acpi_parse_srat(struct acpi_table_header *table)
{
struct acpi_table_srat *srat;
if (!table)
return -EINVAL;
srat = (struct acpi_table_srat *)table;
acpi_srat_revision = srat->header.revision;
/* Real work done in acpi_table_parse_srat below. */
return 0;
......
......@@ -173,8 +173,30 @@ int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id)
apic_id = map_mat_entry(handle, type, acpi_id);
if (apic_id == -1)
apic_id = map_madt_entry(type, acpi_id);
if (apic_id == -1)
return apic_id;
if (apic_id == -1) {
/*
* On UP processor, there is no _MAT or MADT table.
* So above apic_id is always set to -1.
*
* BIOS may define multiple CPU handles even for UP processor.
* For example,
*
* Scope (_PR)
* {
* Processor (CPU0, 0x00, 0x00000410, 0x06) {}
* Processor (CPU1, 0x01, 0x00000410, 0x06) {}
* Processor (CPU2, 0x02, 0x00000410, 0x06) {}
* Processor (CPU3, 0x03, 0x00000410, 0x06) {}
* }
*
* Ignores apic_id and always return 0 for CPU0's handle.
* Return -1 for other CPU's handle.
*/
if (acpi_id == 0)
return acpi_id;
else
return apic_id;
}
#ifdef CONFIG_SMP
for_each_possible_cpu(i) {
......
......@@ -82,7 +82,7 @@ MODULE_LICENSE("GPL");
static int acpi_processor_add(struct acpi_device *device);
static int acpi_processor_remove(struct acpi_device *device, int type);
static void acpi_processor_notify(struct acpi_device *device, u32 event);
static acpi_status acpi_processor_hotadd_init(acpi_handle handle, int *p_cpu);
static acpi_status acpi_processor_hotadd_init(struct acpi_processor *pr);
static int acpi_processor_handle_eject(struct acpi_processor *pr);
......@@ -324,10 +324,8 @@ static int acpi_processor_get_info(struct acpi_device *device)
* they are physically not present.
*/
if (pr->id == -1) {
if (ACPI_FAILURE
(acpi_processor_hotadd_init(pr->handle, &pr->id))) {
if (ACPI_FAILURE(acpi_processor_hotadd_init(pr)))
return -ENODEV;
}
}
/*
* On some boxes several processors use the same processor bus id.
......@@ -539,6 +537,7 @@ err_thermal_unregister:
thermal_cooling_device_unregister(pr->cdev);
err_power_exit:
acpi_processor_power_exit(pr, device);
sysfs_remove_link(&device->dev.kobj, "sysdev");
err_free_cpumask:
free_cpumask_var(pr->throttling.shared_cpu_map);
......@@ -720,18 +719,19 @@ processor_walk_namespace_cb(acpi_handle handle,
return (AE_OK);
}
static acpi_status acpi_processor_hotadd_init(acpi_handle handle, int *p_cpu)
static acpi_status acpi_processor_hotadd_init(struct acpi_processor *pr)
{
acpi_handle handle = pr->handle;
if (!is_processor_present(handle)) {
return AE_ERROR;
}
if (acpi_map_lsapic(handle, p_cpu))
if (acpi_map_lsapic(handle, &pr->id))
return AE_ERROR;
if (arch_register_cpu(*p_cpu)) {
acpi_unmap_lsapic(*p_cpu);
if (arch_register_cpu(pr->id)) {
acpi_unmap_lsapic(pr->id);
return AE_ERROR;
}
......@@ -748,7 +748,7 @@ static int acpi_processor_handle_eject(struct acpi_processor *pr)
return (0);
}
#else
static acpi_status acpi_processor_hotadd_init(acpi_handle handle, int *p_cpu)
static acpi_status acpi_processor_hotadd_init(struct acpi_processor *pr)
{
return AE_ERROR;
}
......@@ -827,8 +827,6 @@ static void __exit acpi_processor_exit(void)
acpi_bus_unregister_driver(&acpi_processor_driver);
cpuidle_unregister_driver(&acpi_idle_driver);
return;
}
......
......@@ -197,7 +197,7 @@ static struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = {
.enter = &intel_idle },
};
static int get_driver_data(int cstate)
static long get_driver_data(int cstate)
{
int driver_data;
switch (cstate) {
......@@ -232,6 +232,7 @@ static int get_driver_data(int cstate)
* @drv: cpuidle driver
* @index: index of cpuidle state
*
* Must be called under local_irq_disable().
*/
static int intel_idle(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index)
......@@ -247,8 +248,6 @@ static int intel_idle(struct cpuidle_device *dev,
cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) + 1;
local_irq_disable();
/*
* leave_mm() to avoid costly and often unnecessary wakeups
* for flushing the user TLB's associated with the active mm.
......@@ -348,7 +347,8 @@ static int intel_idle_probe(void)
cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
!(ecx & CPUID5_ECX_INTERRUPT_BREAK))
!(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
!mwait_substates)
return -ENODEV;
pr_debug(PREFIX "MWAIT substates: 0x%x\n", mwait_substates);
......@@ -394,7 +394,7 @@ static int intel_idle_probe(void)
if (boot_cpu_has(X86_FEATURE_ARAT)) /* Always Reliable APIC Timer */
lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE;
else {
smp_call_function(__setup_broadcast_timer, (void *)true, 1);
on_each_cpu(__setup_broadcast_timer, (void *)true, 1);
register_cpu_notifier(&setup_broadcast_notifier);
}
......@@ -471,71 +471,67 @@ static int intel_idle_cpuidle_driver_init(void)
}
if (auto_demotion_disable_flags)
smp_call_function(auto_demotion_disable, NULL, 1);
on_each_cpu(auto_demotion_disable, NULL, 1);
return 0;
}
/*
* intel_idle_cpuidle_devices_init()
* intel_idle_cpu_init()
* allocate, initialize, register cpuidle_devices
* @cpu: cpu/core to initialize
*/
static int intel_idle_cpuidle_devices_init(void)
int intel_idle_cpu_init(int cpu)
{
int i, cstate;
int cstate;
struct cpuidle_device *dev;
intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
if (intel_idle_cpuidle_devices == NULL)
return -ENOMEM;
for_each_online_cpu(i) {
dev = per_cpu_ptr(intel_idle_cpuidle_devices, i);
dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
dev->state_count = 1;
dev->state_count = 1;
for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) {
int num_substates;
for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) {
int num_substates;
if (cstate > max_cstate) {
printk(PREFIX "max_cstate %d reached\n",
max_cstate);
break;