Commit e360adbe authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar

irq_work: Add generic hardirq context callbacks

Provide a mechanism that allows running code in IRQ context. It is
most useful for NMI code that needs to interact with the rest of the
system -- like wakeup a task to drain buffers.

Perf currently has such a mechanism, so extract that and provide it as
a generic feature, independent of perf so that others may also
benefit.

The IRQ context callback is generated through self-IPIs where
possible, or on architectures like powerpc the decrementer (the
built-in timer facility) is set to generate an interrupt immediately.

Architectures that don't have anything like this get to do with a
callback from the timer tick. These architectures can call
irq_work_run() at the tail of any IRQ handlers that might enqueue such
work (like the perf IRQ handler) to avoid undue latencies in
processing the work.
Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: default avatarKyle McMartin <kyle@mcmartin.ca>
Acked-by: default avatarMartin Schwidefsky <schwidefsky@de.ibm.com>
[ various fixes ]
Signed-off-by: default avatarHuang Ying <ying.huang@intel.com>
LKML-Reference: <1287036094.7768.291.camel@yhuang-dev>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 8e5fc1a7
......@@ -9,6 +9,7 @@ config ALPHA
select HAVE_IDE
select HAVE_OPROFILE
select HAVE_SYSCALL_WRAPPERS
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
select HAVE_DMA_ATTRS
help
......
#ifndef __ASM_ALPHA_PERF_EVENT_H
#define __ASM_ALPHA_PERF_EVENT_H
/* Alpha only supports software events through this interface. */
extern void set_perf_event_pending(void);
#define PERF_EVENT_INDEX_OFFSET 0
#ifdef CONFIG_PERF_EVENTS
extern void init_hw_perf_events(void);
#else
......
......@@ -41,7 +41,7 @@
#include <linux/init.h>
#include <linux/bcd.h>
#include <linux/profile.h>
#include <linux/perf_event.h>
#include <linux/irq_work.h>
#include <asm/uaccess.h>
#include <asm/io.h>
......@@ -83,25 +83,25 @@ static struct {
unsigned long est_cycle_freq;
#ifdef CONFIG_PERF_EVENTS
#ifdef CONFIG_IRQ_WORK
DEFINE_PER_CPU(u8, perf_event_pending);
DEFINE_PER_CPU(u8, irq_work_pending);
#define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1
#define test_perf_event_pending() __get_cpu_var(perf_event_pending)
#define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0
#define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1
#define test_irq_work_pending() __get_cpu_var(irq_work_pending)
#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0
void set_perf_event_pending(void)
void set_irq_work_pending(void)
{
set_perf_event_pending_flag();
set_irq_work_pending_flag();
}
#else /* CONFIG_PERF_EVENTS */
#else /* CONFIG_IRQ_WORK */
#define test_perf_event_pending() 0
#define clear_perf_event_pending()
#define test_irq_work_pending() 0
#define clear_irq_work_pending()
#endif /* CONFIG_PERF_EVENTS */
#endif /* CONFIG_IRQ_WORK */
static inline __u32 rpcc(void)
......@@ -191,9 +191,9 @@ irqreturn_t timer_interrupt(int irq, void *dev)
write_sequnlock(&xtime_lock);
if (test_perf_event_pending()) {
clear_perf_event_pending();
perf_event_do_pending();
if (test_irq_work_pending()) {
clear_irq_work_pending();
irq_work_run();
}
#ifndef CONFIG_SMP
......
......@@ -23,6 +23,7 @@ config ARM
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_LZO
select HAVE_KERNEL_LZMA
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
select PERF_USE_VMALLOC
select HAVE_REGS_AND_STACK_ACCESS_API
......
......@@ -12,18 +12,6 @@
#ifndef __ARM_PERF_EVENT_H__
#define __ARM_PERF_EVENT_H__
/*
* NOP: on *most* (read: all supported) ARM platforms, the performance
* counter interrupts are regular interrupts and not an NMI. This
* means that when we receive the interrupt we can call
* perf_event_do_pending() that handles all of the work with
* interrupts disabled.
*/
static inline void
set_perf_event_pending(void)
{
}
/* ARM performance counters start from 1 (in the cp15 accesses) so use the
* same indexes here for consistency. */
#define PERF_EVENT_INDEX_OFFSET 1
......
......@@ -1092,7 +1092,7 @@ armv6pmu_handle_irq(int irq_num,
* platforms that can have the PMU interrupts raised as an NMI, this
* will not work.
*/
perf_event_do_pending();
irq_work_run();
return IRQ_HANDLED;
}
......@@ -2068,7 +2068,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
* platforms that can have the PMU interrupts raised as an NMI, this
* will not work.
*/
perf_event_do_pending();
irq_work_run();
return IRQ_HANDLED;
}
......@@ -2436,7 +2436,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
armpmu->disable(hwc, idx);
}
perf_event_do_pending();
irq_work_run();
/*
* Re-enable the PMU.
......@@ -2763,7 +2763,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
armpmu->disable(hwc, idx);
}
perf_event_do_pending();
irq_work_run();
/*
* Re-enable the PMU.
......
......@@ -7,6 +7,7 @@ config FRV
default y
select HAVE_IDE
select HAVE_ARCH_TRACEHOOK
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
config ZONE_DMA
......
......@@ -5,4 +5,4 @@
lib-y := \
__ashldi3.o __lshrdi3.o __muldi3.o __ashrdi3.o __negdi2.o __ucmpdi2.o \
checksum.o memcpy.o memset.o atomic-ops.o atomic64-ops.o \
outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o perf_event.o
outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o
/* Performance event handling
*
* Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public Licence
* as published by the Free Software Foundation; either version
* 2 of the Licence, or (at your option) any later version.
*/
#include <linux/perf_event.h>
/*
* mark the performance event as pending
*/
void set_perf_event_pending(void)
{
}
......@@ -16,6 +16,7 @@ config PARISC
select RTC_DRV_GENERIC
select INIT_ALL_POSSIBLE
select BUG
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
select GENERIC_ATOMIC64 if !64BIT
help
......
#ifndef __ASM_PARISC_PERF_EVENT_H
#define __ASM_PARISC_PERF_EVENT_H
/* parisc only supports software events through this interface. */
static inline void set_perf_event_pending(void) { }
/* Empty, just to avoid compiling error */
#endif /* __ASM_PARISC_PERF_EVENT_H */
......@@ -138,6 +138,7 @@ config PPC
select HAVE_OPROFILE
select HAVE_SYSCALL_WRAPPERS if PPC64
select GENERIC_ATOMIC64 if PPC32
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64
......
......@@ -129,7 +129,7 @@ struct paca_struct {
u8 soft_enabled; /* irq soft-enable flag */
u8 hard_enabled; /* set if irqs are enabled in MSR */
u8 io_sync; /* writel() needs spin_unlock sync */
u8 perf_event_pending; /* PM interrupt while soft-disabled */
u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */
/* Stuff for accurate time accounting */
u64 user_time; /* accumulated usermode TB ticks */
......
......@@ -53,7 +53,7 @@
#include <linux/posix-timers.h>
#include <linux/irq.h>
#include <linux/delay.h>
#include <linux/perf_event.h>
#include <linux/irq_work.h>
#include <asm/trace.h>
#include <asm/io.h>
......@@ -493,60 +493,60 @@ void __init iSeries_time_init_early(void)
}
#endif /* CONFIG_PPC_ISERIES */
#ifdef CONFIG_PERF_EVENTS
#ifdef CONFIG_IRQ_WORK
/*
* 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
*/
#ifdef CONFIG_PPC64
static inline unsigned long test_perf_event_pending(void)
static inline unsigned long test_irq_work_pending(void)
{
unsigned long x;
asm volatile("lbz %0,%1(13)"
: "=r" (x)
: "i" (offsetof(struct paca_struct, perf_event_pending)));
: "i" (offsetof(struct paca_struct, irq_work_pending)));
return x;
}
static inline void set_perf_event_pending_flag(void)
static inline void set_irq_work_pending_flag(void)
{
asm volatile("stb %0,%1(13)" : :
"r" (1),
"i" (offsetof(struct paca_struct, perf_event_pending)));
"i" (offsetof(struct paca_struct, irq_work_pending)));
}
static inline void clear_perf_event_pending(void)
static inline void clear_irq_work_pending(void)
{
asm volatile("stb %0,%1(13)" : :
"r" (0),
"i" (offsetof(struct paca_struct, perf_event_pending)));
"i" (offsetof(struct paca_struct, irq_work_pending)));
}
#else /* 32-bit */
DEFINE_PER_CPU(u8, perf_event_pending);
DEFINE_PER_CPU(u8, irq_work_pending);
#define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1
#define test_perf_event_pending() __get_cpu_var(perf_event_pending)
#define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0
#define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1
#define test_irq_work_pending() __get_cpu_var(irq_work_pending)
#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0
#endif /* 32 vs 64 bit */
void set_perf_event_pending(void)
void set_irq_work_pending(void)
{
preempt_disable();
set_perf_event_pending_flag();
set_irq_work_pending_flag();
set_dec(1);
preempt_enable();
}
#else /* CONFIG_PERF_EVENTS */
#else /* CONFIG_IRQ_WORK */
#define test_perf_event_pending() 0
#define clear_perf_event_pending()
#define test_irq_work_pending() 0
#define clear_irq_work_pending()
#endif /* CONFIG_PERF_EVENTS */
#endif /* CONFIG_IRQ_WORK */
/*
* For iSeries shared processors, we have to let the hypervisor
......@@ -587,9 +587,9 @@ void timer_interrupt(struct pt_regs * regs)
calculate_steal_time();
if (test_perf_event_pending()) {
clear_perf_event_pending();
perf_event_do_pending();
if (test_irq_work_pending()) {
clear_irq_work_pending();
irq_work_run();
}
#ifdef CONFIG_PPC_ISERIES
......
......@@ -95,6 +95,7 @@ config S390
select HAVE_KVM if 64BIT
select HAVE_ARCH_TRACEHOOK
select INIT_ALL_POSSIBLE
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_BZIP2
......
......@@ -4,7 +4,6 @@
* Copyright 2009 Martin Schwidefsky, IBM Corporation.
*/
static inline void set_perf_event_pending(void) {}
static inline void clear_perf_event_pending(void) {}
/* Empty, just to avoid compiling error */
#define PERF_EVENT_INDEX_OFFSET 0
......@@ -16,6 +16,7 @@ config SUPERH
select HAVE_ARCH_TRACEHOOK
select HAVE_DMA_API_DEBUG
select HAVE_DMA_ATTRS
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
select PERF_USE_VMALLOC
select HAVE_KERNEL_GZIP
......
......@@ -26,11 +26,4 @@ extern int register_sh_pmu(struct sh_pmu *);
extern int reserve_pmc_hardware(void);
extern void release_pmc_hardware(void);
static inline void set_perf_event_pending(void)
{
/* Nothing to see here, move along. */
}
#define PERF_EVENT_INDEX_OFFSET 0
#endif /* __ASM_SH_PERF_EVENT_H */
......@@ -26,6 +26,7 @@ config SPARC
select ARCH_WANT_OPTIONAL_GPIOLIB
select RTC_CLASS
select RTC_DRV_M48T59
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
select PERF_USE_VMALLOC
select HAVE_DMA_ATTRS
......@@ -54,6 +55,7 @@ config SPARC64
select RTC_DRV_BQ4802
select RTC_DRV_SUN4V
select RTC_DRV_STARFIRE
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
select PERF_USE_VMALLOC
......
#ifndef __ASM_SPARC_PERF_EVENT_H
#define __ASM_SPARC_PERF_EVENT_H
extern void set_perf_event_pending(void);
#define PERF_EVENT_INDEX_OFFSET 0
#ifdef CONFIG_PERF_EVENTS
#include <asm/ptrace.h>
......
......@@ -7,7 +7,7 @@
#include <linux/init.h>
#include <linux/irq.h>
#include <linux/perf_event.h>
#include <linux/irq_work.h>
#include <linux/ftrace.h>
#include <asm/pil.h>
......@@ -43,14 +43,14 @@ void __irq_entry deferred_pcr_work_irq(int irq, struct pt_regs *regs)
old_regs = set_irq_regs(regs);
irq_enter();
#ifdef CONFIG_PERF_EVENTS
perf_event_do_pending();
#ifdef CONFIG_IRQ_WORK
irq_work_run();
#endif
irq_exit();
set_irq_regs(old_regs);
}
void set_perf_event_pending(void)
void arch_irq_work_raise(void)
{
set_softint(1 << PIL_DEFERRED_PCR_WORK);
}
......
......@@ -25,6 +25,7 @@ config X86
select HAVE_IDE
select HAVE_OPROFILE
select HAVE_PERF_EVENTS if (!M386 && !M486)
select HAVE_IRQ_WORK
select HAVE_IOREMAP_PROT
select HAVE_KPROBES
select ARCH_WANT_OPTIONAL_GPIOLIB
......
......@@ -49,8 +49,8 @@ BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
#ifdef CONFIG_PERF_EVENTS
BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR)
#ifdef CONFIG_IRQ_WORK
BUILD_INTERRUPT(irq_work_interrupt, IRQ_WORK_VECTOR)
#endif
#ifdef CONFIG_X86_THERMAL_VECTOR
......
......@@ -14,7 +14,7 @@ typedef struct {
#endif
unsigned int x86_platform_ipis; /* arch dependent */
unsigned int apic_perf_irqs;
unsigned int apic_pending_irqs;
unsigned int apic_irq_work_irqs;
#ifdef CONFIG_SMP
unsigned int irq_resched_count;
unsigned int irq_call_count;
......
......@@ -29,7 +29,7 @@
extern void apic_timer_interrupt(void);
extern void x86_platform_ipi(void);
extern void error_interrupt(void);
extern void perf_pending_interrupt(void);
extern void irq_work_interrupt(void);
extern void spurious_interrupt(void);
extern void thermal_interrupt(void);
......
......@@ -114,9 +114,9 @@
#define X86_PLATFORM_IPI_VECTOR 0xed
/*
* Performance monitoring pending work vector:
* IRQ work vector:
*/
#define LOCAL_PENDING_VECTOR 0xec
#define IRQ_WORK_VECTOR 0xec
#define UV_BAU_MESSAGE 0xea
......
......@@ -35,6 +35,7 @@ obj-y := process_$(BITS).o signal.o entry_$(BITS).o
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
obj-y += time.o ioport.o ldt.o dumpstack.o
obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o
obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-$(CONFIG_X86_VISWS) += visws_quirks.o
obj-$(CONFIG_X86_32) += probe_roms_32.o
obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
......
......@@ -1196,25 +1196,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
return handled;
}
void smp_perf_pending_interrupt(struct pt_regs *regs)
{
irq_enter();
ack_APIC_irq();
inc_irq_stat(apic_pending_irqs);
perf_event_do_pending();
irq_exit();
}
void set_perf_event_pending(void)
{
#ifdef CONFIG_X86_LOCAL_APIC
if (!x86_pmu.apic || !x86_pmu_initialized())
return;
apic->send_IPI_self(LOCAL_PENDING_VECTOR);
#endif
}
void perf_events_lapic_init(void)
{
if (!x86_pmu.apic || !x86_pmu_initialized())
......
......@@ -1023,9 +1023,9 @@ apicinterrupt ERROR_APIC_VECTOR \
apicinterrupt SPURIOUS_APIC_VECTOR \
spurious_interrupt smp_spurious_interrupt
#ifdef CONFIG_PERF_EVENTS
apicinterrupt LOCAL_PENDING_VECTOR \
perf_pending_interrupt smp_perf_pending_interrupt
#ifdef CONFIG_IRQ_WORK
apicinterrupt IRQ_WORK_VECTOR \
irq_work_interrupt smp_irq_work_interrupt
#endif
/*
......
......@@ -67,10 +67,10 @@ static int show_other_interrupts(struct seq_file *p, int prec)
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
seq_printf(p, " Performance monitoring interrupts\n");
seq_printf(p, "%*s: ", prec, "PND");
seq_printf(p, "%*s: ", prec, "IWI");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs);
seq_printf(p, " Performance pending work\n");
seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
seq_printf(p, " IRQ work interrupts\n");
#endif
if (x86_platform_ipi_callback) {
seq_printf(p, "%*s: ", prec, "PLT");
......@@ -185,7 +185,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
sum += irq_stats(cpu)->apic_timer_irqs;
sum += irq_stats(cpu)->irq_spurious_count;
sum += irq_stats(cpu)->apic_perf_irqs;
sum += irq_stats(cpu)->apic_pending_irqs;
sum += irq_stats(cpu)->apic_irq_work_irqs;
#endif
if (x86_platform_ipi_callback)
sum += irq_stats(cpu)->x86_platform_ipis;
......
/*
* x86 specific code for irq_work
*
* Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
*/
#include <linux/kernel.h>
#include <linux/irq_work.h>
#include <linux/hardirq.h>
#include <asm/apic.h>
void smp_irq_work_interrupt(struct pt_regs *regs)
{
irq_enter();
ack_APIC_irq();
inc_irq_stat(apic_irq_work_irqs);
irq_work_run();
irq_exit();
}
void arch_irq_work_raise(void)
{
#ifdef CONFIG_X86_LOCAL_APIC
if (!cpu_has_apic)
return;
apic->send_IPI_self(IRQ_WORK_VECTOR);
apic_wait_icr_idle();
#endif
}
......@@ -224,9 +224,9 @@ static void __init apic_intr_init(void)
alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
/* Performance monitoring interrupts: */
# ifdef CONFIG_PERF_EVENTS
alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
/* IRQ work interrupts: */
# ifdef CONFIG_IRQ_WORK
alloc_intr_gate(IRQ_WORK_VECTOR, irq_work_interrupt);
# endif
#endif
......
#ifndef _LINUX_IRQ_WORK_H
#define _LINUX_IRQ_WORK_H
struct irq_work {
struct irq_work *next;
void (*func)(struct irq_work *);
};
static inline
void init_irq_work(struct irq_work *entry, void (*func)(struct irq_work *))
{
entry->next = NULL;
entry->func = func;
}
bool irq_work_queue(struct irq_work *entry);
void irq_work_run(void);
void irq_work_sync(struct irq_work *entry);
#endif /* _LINUX_IRQ_WORK_H */
......@@ -486,6 +486,7 @@ struct perf_guest_info_callbacks {
#include <linux/workqueue.h>
#include <linux/ftrace.h>
#include <linux/cpu.h>
#include <linux/irq_work.h>
#include <asm/atomic.h>
#include <asm/local.h>
......@@ -672,11 +673,6 @@ struct perf_buffer {
void *data_pages[0];
};
struct perf_pending_entry {
struct perf_pending_entry *next;
void (*func)(struct perf_pending_entry *);
};