Commit 6ee7e78e authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'release' of master.kernel.org:/pub/scm/linux/kernel/git/aegl/linux-2.6

* 'release' of master.kernel.org:/pub/scm/linux/kernel/git/aegl/linux-2.6:
  [IA64] replace kmalloc+memset with kzalloc
  [IA64] resolve name clash by renaming is_available_memory()
  [IA64] Need export for csum_ipv6_magic
  [IA64] Fix DISCONTIGMEM without VIRTUAL_MEM_MAP
  [PATCH] Add support for type argument in PAL_GET_PSTATE
  [IA64] tidy up return value of ip_fast_csum
  [IA64] implement csum_ipv6_magic for ia64.
  [IA64] More Itanium PAL spec updates
  [IA64] Update processor_info features
  [IA64] Add se bit to Processor State Parameter structure
  [IA64] Add dp bit to cache and bus check structs
  [IA64] SN: Correctly update smp_affinty mask
  [IA64] sparse cleanups
  [IA64] IA64 Kexec/kdump
parents 7f3af60e 52fd9108
......@@ -434,6 +434,29 @@ config IA64_ESI
source "drivers/sn/Kconfig"
config KEXEC
bool "kexec system call (EXPERIMENTAL)"
depends on EXPERIMENTAL && !IA64_HP_SIM && (!SMP || HOTPLUG_CPU)
help
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
but it is indepedent of the system firmware. And like a reboot
you can start any kernel with it, not just Linux.
The name comes from the similiarity to the exec system call.
It is an ongoing process to be certain the hardware in a machine
is properly shutdown, so do not be surprised if this code does not
initially work for you. It may help to enable device hotplugging
support. As of this writing the exact hardware interface is
strongly in flux, so no good recommendation can be made.
config CRASH_DUMP
bool "kernel crash dumps (EXPERIMENTAL)"
depends on EXPERIMENTAL && IA64_MCA_RECOVERY && !IA64_HP_SIM && (!SMP || HOTPLUG_CPU)
help
Generate crash dump after being started by kexec.
source "drivers/firmware/Kconfig"
source "fs/Kconfig.binfmt"
......
......@@ -1672,15 +1672,13 @@ ioc_sac_init(struct ioc *ioc)
* SAC (single address cycle) addressable, so allocate a
* pseudo-device to enforce that.
*/
sac = kmalloc(sizeof(*sac), GFP_KERNEL);
sac = kzalloc(sizeof(*sac), GFP_KERNEL);
if (!sac)
panic(PFX "Couldn't allocate struct pci_dev");
memset(sac, 0, sizeof(*sac));
controller = kmalloc(sizeof(*controller), GFP_KERNEL);
controller = kzalloc(sizeof(*controller), GFP_KERNEL);
if (!controller)
panic(PFX "Couldn't allocate struct pci_controller");
memset(controller, 0, sizeof(*controller));
controller->iommu = ioc;
sac->sysdata = controller;
......@@ -1737,12 +1735,10 @@ ioc_init(u64 hpa, void *handle)
struct ioc *ioc;
struct ioc_iommu *info;
ioc = kmalloc(sizeof(*ioc), GFP_KERNEL);
ioc = kzalloc(sizeof(*ioc), GFP_KERNEL);
if (!ioc)
return NULL;
memset(ioc, 0, sizeof(*ioc));
ioc->next = ioc_list;
ioc_list = ioc;
......
......@@ -684,12 +684,11 @@ static int get_async_struct(int line, struct async_struct **ret_info)
*ret_info = sstate->info;
return 0;
}
info = kmalloc(sizeof(struct async_struct), GFP_KERNEL);
info = kzalloc(sizeof(struct async_struct), GFP_KERNEL);
if (!info) {
sstate->count--;
return -ENOMEM;
}
memset(info, 0, sizeof(struct async_struct));
init_waitqueue_head(&info->open_wait);
init_waitqueue_head(&info->close_wait);
init_waitqueue_head(&info->delta_msr_wait);
......
......@@ -28,6 +28,7 @@ obj-$(CONFIG_IA64_CYCLONE) += cyclone.o
obj-$(CONFIG_CPU_FREQ) += cpufreq/
obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o
obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o
obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o
obj-$(CONFIG_AUDIT) += audit.o
obj-$(CONFIG_PCI_MSI) += msi_ia64.o
......
......@@ -68,7 +68,8 @@ processor_get_pstate (
dprintk("processor_get_pstate\n");
retval = ia64_pal_get_pstate(&pstate_index);
retval = ia64_pal_get_pstate(&pstate_index,
PAL_GET_PSTATE_TYPE_INSTANT);
*value = (u32) pstate_index;
if (retval)
......@@ -91,7 +92,7 @@ extract_clock (
dprintk("extract_clock\n");
for (i = 0; i < data->acpi_data.state_count; i++) {
if (value >= data->acpi_data.states[i].control)
if (value == data->acpi_data.states[i].status)
return data->acpi_data.states[i].core_frequency;
}
return data->acpi_data.states[i-1].core_frequency;
......@@ -117,11 +118,7 @@ processor_get_freq (
goto migrate_end;
}
/*
* processor_get_pstate gets the average frequency since the
* last get. So, do two PAL_get_freq()...
*/
ret = processor_get_pstate(&value);
/* processor_get_pstate gets the instantaneous frequency */
ret = processor_get_pstate(&value);
if (ret) {
......
/*
* arch/ia64/kernel/crash.c
*
* Architecture specific (ia64) functions for kexec based crash dumps.
*
* Created by: Khalid Aziz <khalid.aziz@hp.com>
* Copyright (C) 2005 Hewlett-Packard Development Company, L.P.
* Copyright (C) 2005 Intel Corp Zou Nan hai <nanhai.zou@intel.com>
*
*/
#include <linux/smp.h>
#include <linux/delay.h>
#include <linux/crash_dump.h>
#include <linux/bootmem.h>
#include <linux/kexec.h>
#include <linux/elfcore.h>
#include <linux/sysctl.h>
#include <linux/init.h>
#include <asm/kdebug.h>
#include <asm/mca.h>
#include <asm/uaccess.h>
int kdump_status[NR_CPUS];
atomic_t kdump_cpu_freezed;
atomic_t kdump_in_progress;
int kdump_on_init = 1;
ssize_t
copy_oldmem_page(unsigned long pfn, char *buf,
size_t csize, unsigned long offset, int userbuf)
{
void *vaddr;
if (!csize)
return 0;
vaddr = __va(pfn<<PAGE_SHIFT);
if (userbuf) {
if (copy_to_user(buf, (vaddr + offset), csize)) {
return -EFAULT;
}
} else
memcpy(buf, (vaddr + offset), csize);
return csize;
}
static inline Elf64_Word
*append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data,
size_t data_len)
{
struct elf_note *note = (struct elf_note *)buf;
note->n_namesz = strlen(name) + 1;
note->n_descsz = data_len;
note->n_type = type;
buf += (sizeof(*note) + 3)/4;
memcpy(buf, name, note->n_namesz);
buf += (note->n_namesz + 3)/4;
memcpy(buf, data, data_len);
buf += (data_len + 3)/4;
return buf;
}
static void
final_note(void *buf)
{
memset(buf, 0, sizeof(struct elf_note));
}
extern void ia64_dump_cpu_regs(void *);
static DEFINE_PER_CPU(struct elf_prstatus, elf_prstatus);
void
crash_save_this_cpu()
{
void *buf;
unsigned long cfm, sof, sol;
int cpu = smp_processor_id();
struct elf_prstatus *prstatus = &per_cpu(elf_prstatus, cpu);
elf_greg_t *dst = (elf_greg_t *)&(prstatus->pr_reg);
memset(prstatus, 0, sizeof(*prstatus));
prstatus->pr_pid = current->pid;
ia64_dump_cpu_regs(dst);
cfm = dst[43];
sol = (cfm >> 7) & 0x7f;
sof = cfm & 0x7f;
dst[46] = (unsigned long)ia64_rse_skip_regs((unsigned long *)dst[46],
sof - sol);
buf = (u64 *) per_cpu_ptr(crash_notes, cpu);
if (!buf)
return;
buf = append_elf_note(buf, "CORE", NT_PRSTATUS, prstatus,
sizeof(*prstatus));
final_note(buf);
}
static int
kdump_wait_cpu_freeze(void)
{
int cpu_num = num_online_cpus() - 1;
int timeout = 1000;
while(timeout-- > 0) {
if (atomic_read(&kdump_cpu_freezed) == cpu_num)
return 0;
udelay(1000);
}
return 1;
}
void
machine_crash_shutdown(struct pt_regs *pt)
{
/* This function is only called after the system
* has paniced or is otherwise in a critical state.
* The minimum amount of code to allow a kexec'd kernel
* to run successfully needs to happen here.
*
* In practice this means shooting down the other cpus in
* an SMP system.
*/
kexec_disable_iosapic();
#ifdef CONFIG_SMP
kdump_smp_send_stop();
if (kdump_wait_cpu_freeze() && kdump_on_init) {
//not all cpu response to IPI, send INIT to freeze them
kdump_smp_send_init();
}
#endif
}
static void
machine_kdump_on_init(void)
{
local_irq_disable();
kexec_disable_iosapic();
machine_kexec(ia64_kimage);
}
void
kdump_cpu_freeze(struct unw_frame_info *info, void *arg)
{
int cpuid;
local_irq_disable();
cpuid = smp_processor_id();
crash_save_this_cpu();
current->thread.ksp = (__u64)info->sw - 16;
atomic_inc(&kdump_cpu_freezed);
kdump_status[cpuid] = 1;
mb();
if (cpuid == 0) {
for (;;)
cpu_relax();
} else
ia64_jump_to_sal(&sal_boot_rendez_state[cpuid]);
}
static int
kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
{
struct ia64_mca_notify_die *nd;
struct die_args *args = data;
if (!kdump_on_init)
return NOTIFY_DONE;
if (val != DIE_INIT_MONARCH_ENTER &&
val != DIE_INIT_SLAVE_ENTER &&
val != DIE_MCA_RENDZVOUS_LEAVE &&
val != DIE_MCA_MONARCH_LEAVE)
return NOTIFY_DONE;
nd = (struct ia64_mca_notify_die *)args->err;
/* Reason code 1 means machine check rendezous*/
if ((val == DIE_INIT_MONARCH_ENTER || DIE_INIT_SLAVE_ENTER) &&
nd->sos->rv_rc == 1)
return NOTIFY_DONE;
switch (val) {
case DIE_INIT_MONARCH_ENTER:
machine_kdump_on_init();
break;
case DIE_INIT_SLAVE_ENTER:
unw_init_running(kdump_cpu_freeze, NULL);
break;
case DIE_MCA_RENDZVOUS_LEAVE:
if (atomic_read(&kdump_in_progress))
unw_init_running(kdump_cpu_freeze, NULL);
break;
case DIE_MCA_MONARCH_LEAVE:
/* die_register->signr indicate if MCA is recoverable */
if (!args->signr)
machine_kdump_on_init();
break;
}
return NOTIFY_DONE;
}
#ifdef CONFIG_SYSCTL
static ctl_table kdump_on_init_table[] = {
{
.ctl_name = CTL_UNNUMBERED,
.procname = "kdump_on_init",
.data = &kdump_on_init,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{ .ctl_name = 0 }
};
static ctl_table sys_table[] = {
{
.ctl_name = CTL_KERN,
.procname = "kernel",
.mode = 0555,
.child = kdump_on_init_table,
},
{ .ctl_name = 0 }
};
#endif
static int
machine_crash_setup(void)
{
char *from = strstr(saved_command_line, "elfcorehdr=");
static struct notifier_block kdump_init_notifier_nb = {
.notifier_call = kdump_init_notifier,
};
int ret;
if (from)
elfcorehdr_addr = memparse(from+11, &from);
saved_max_pfn = (unsigned long)-1;
if((ret = register_die_notifier(&kdump_init_notifier_nb)) != 0)
return ret;
#ifdef CONFIG_SYSCTL
register_sysctl_table(sys_table, 0);
#endif
return 0;
}
__initcall(machine_crash_setup);
......@@ -26,6 +26,7 @@
#include <linux/types.h>
#include <linux/time.h>
#include <linux/efi.h>
#include <linux/kexec.h>
#include <asm/io.h>
#include <asm/kregs.h>
......@@ -41,7 +42,7 @@ extern efi_status_t efi_call_phys (void *, ...);
struct efi efi;
EXPORT_SYMBOL(efi);
static efi_runtime_services_t *runtime;
static unsigned long mem_limit = ~0UL, max_addr = ~0UL;
static unsigned long mem_limit = ~0UL, max_addr = ~0UL, min_addr = 0UL;
#define efi_call_virt(f, args...) (*(f))(args)
......@@ -224,7 +225,7 @@ efi_gettimeofday (struct timespec *ts)
}
static int
is_available_memory (efi_memory_desc_t *md)
is_memory_available (efi_memory_desc_t *md)
{
if (!(md->attribute & EFI_MEMORY_WB))
return 0;
......@@ -421,6 +422,8 @@ efi_init (void)
mem_limit = memparse(cp + 4, &cp);
} else if (memcmp(cp, "max_addr=", 9) == 0) {
max_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp));
} else if (memcmp(cp, "min_addr=", 9) == 0) {
min_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp));
} else {
while (*cp != ' ' && *cp)
++cp;
......@@ -428,6 +431,8 @@ efi_init (void)
++cp;
}
}
if (min_addr != 0UL)
printk(KERN_INFO "Ignoring memory below %luMB\n", min_addr >> 20);
if (max_addr != ~0UL)
printk(KERN_INFO "Ignoring memory above %luMB\n", max_addr >> 20);
......@@ -887,14 +892,15 @@ find_memmap_space (void)
}
contig_high = GRANULEROUNDDOWN(contig_high);
}
if (!is_available_memory(md) || md->type == EFI_LOADER_DATA)
if (!is_memory_available(md) || md->type == EFI_LOADER_DATA)
continue;
/* Round ends inward to granule boundaries */
as = max(contig_low, md->phys_addr);
ae = min(contig_high, efi_md_end(md));
/* keep within max_addr= command line arg */
/* keep within max_addr= and min_addr= command line arg */
as = max(as, min_addr);
ae = min(ae, max_addr);
if (ae <= as)
continue;
......@@ -962,7 +968,7 @@ efi_memmap_init(unsigned long *s, unsigned long *e)
}
contig_high = GRANULEROUNDDOWN(contig_high);
}
if (!is_available_memory(md))
if (!is_memory_available(md))
continue;
/*
......@@ -1004,7 +1010,8 @@ efi_memmap_init(unsigned long *s, unsigned long *e)
} else
ae = efi_md_end(md);
/* keep within max_addr= command line arg */
/* keep within max_addr= and min_addr= command line arg */
as = max(as, min_addr);
ae = min(ae, max_addr);
if (ae <= as)
continue;
......@@ -1116,6 +1123,58 @@ efi_initialize_iomem_resources(struct resource *code_resource,
*/
insert_resource(res, code_resource);
insert_resource(res, data_resource);
#ifdef CONFIG_KEXEC
insert_resource(res, &efi_memmap_res);
insert_resource(res, &boot_param_res);
if (crashk_res.end > crashk_res.start)
insert_resource(res, &crashk_res);
#endif
}
}
}
#ifdef CONFIG_KEXEC
/* find a block of memory aligned to 64M exclude reserved regions
rsvd_regions are sorted
*/
unsigned long
kdump_find_rsvd_region (unsigned long size,
struct rsvd_region *r, int n)
{
int i;
u64 start, end;
u64 alignment = 1UL << _PAGE_SIZE_64M;
void *efi_map_start, *efi_map_end, *p;
efi_memory_desc_t *md;
u64 efi_desc_size;
efi_map_start = __va(ia64_boot_param->efi_memmap);
efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
efi_desc_size = ia64_boot_param->efi_memdesc_size;
for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
md = p;
if (!efi_wb(md))
continue;
start = ALIGN(md->phys_addr, alignment);
end = efi_md_end(md);
for (i = 0; i < n; i++) {
if (__pa(r[i].start) >= start && __pa(r[i].end) < end) {
if (__pa(r[i].start) > start + size)
return start;
start = ALIGN(__pa(r[i].end), alignment);
if (i < n-1 && __pa(r[i+1].start) < start + size)
continue;
else
break;
}
}
if (end > start + size)
return start;
}
printk(KERN_WARNING "Cannot reserve 0x%lx byte of memory for crashdump\n",
size);
return ~0UL;
}
#endif
......@@ -1575,7 +1575,7 @@ sys_call_table:
data8 sys_mq_timedreceive // 1265
data8 sys_mq_notify
data8 sys_mq_getsetattr
data8 sys_ni_syscall // reserved for kexec_load
data8 sys_kexec_load
data8 sys_ni_syscall // reserved for vserver
data8 sys_waitid // 1270
data8 sys_add_key
......
......@@ -14,6 +14,7 @@ EXPORT_SYMBOL(strlen);
#include <asm/checksum.h>
EXPORT_SYMBOL(ip_fast_csum); /* hand-coded assembly */
EXPORT_SYMBOL(csum_ipv6_magic);
#include <asm/semaphore.h>
EXPORT_SYMBOL(__down);
......
......@@ -288,6 +288,27 @@ nop (unsigned int irq)
/* do nothing... */
}
#ifdef CONFIG_KEXEC
void
kexec_disable_iosapic(void)
{
struct iosapic_intr_info *info;
struct iosapic_rte_info *rte;
u8 vec = 0;
for (info = iosapic_intr_info; info <
iosapic_intr_info + IA64_NUM_VECTORS; ++info, ++vec) {
list_for_each_entry(rte, &info->rtes,
rte_list) {
iosapic_write(rte->addr,
IOSAPIC_RTE_LOW(rte->rte_index),
IOSAPIC_MASK|vec);
iosapic_eoi(rte->addr, vec);
}
}
}
#endif
static void
mask_irq (unsigned int irq)
{
......
......@@ -851,7 +851,7 @@ static void ia64_get_bsp_cfm(struct unw_frame_info *info, void *arg)
return;
}
} while (unw_unwind(info) >= 0);
lp->bsp = 0;
lp->bsp = NULL;
lp->cfm = 0;
return;
}
......
/*
* arch/ia64/kernel/machine_kexec.c
*
* Handle transition of Linux booting another kernel
* Copyright (C) 2005 Hewlett-Packard Development Comapny, L.P.
* Copyright (C) 2005 Khalid Aziz <khalid.aziz@hp.com>
* Copyright (C) 2006 Intel Corp, Zou Nan hai <nanhai.zou@intel.com>
*
* This source code is licensed under the GNU General Public License,
* Version 2. See the file COPYING for more details.
*/
#include <linux/mm.h>
#include <linux/kexec.h>
#include <linux/cpu.h>
#include <linux/irq.h>
#include <asm/mmu_context.h>
#include <asm/setup.h>
#include <asm/delay.h>
#include <asm/meminit.h>
typedef void (*relocate_new_kernel_t)(unsigned long, unsigned long,
struct ia64_boot_param *, unsigned long);
struct kimage *ia64_kimage;
struct resource efi_memmap_res = {
.name = "EFI Memory Map",
.start = 0,
.end = 0,
.flags = IORESOURCE_BUSY | IORESOURCE_MEM
};
struct resource boot_param_res = {
.name = "Boot parameter",
.start = 0,
.end = 0,
.flags = IORESOURCE_BUSY | IORESOURCE_MEM
};
/*
* Do what every setup is needed on image and the
* reboot code buffer to allow us to avoid allocations
* later.
*/
int machine_kexec_prepare(struct kimage *image)
{
void *control_code_buffer;
const unsigned long *func;
func = (unsigned long *)&relocate_new_kernel;
/* Pre-load control code buffer to minimize work in kexec path */
control_code_buffer = page_address(image->control_code_page);
memcpy((void *)control_code_buffer, (const void *)func[0],
relocate_new_kernel_size);
flush_icache_range((unsigned long)control_code_buffer,
(unsigned long)control_code_buffer + relocate_new_kernel_size);
ia64_kimage = image;
return 0;