Commit 4d7b4ac2 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'perf-core-for-linus' of...

Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (311 commits)
  perf tools: Add mode to build without newt support
  perf symbols: symbol inconsistency message should be done only at verbose=1
  perf tui: Add explicit -lslang option
  perf options: Type check all the remaining OPT_ variants
  perf options: Type check OPT_BOOLEAN and fix the offenders
  perf options: Check v type in OPT_U?INTEGER
  perf options: Introduce OPT_UINTEGER
  perf tui: Add workaround for slang < 2.1.4
  perf record: Fix bug mismatch with -c option definition
  perf options: Introduce OPT_U64
  perf tui: Add help window to show key associations
  perf tui: Make <- exit menus too
  perf newt: Add single key shortcuts for zoom into DSO and threads
  perf newt: Exit browser unconditionally when CTRL+C, q or Q is pressed
  perf newt: Fix the 'A'/'a' shortcut for annotate
  perf newt: Make <- exit the ui_browser
  x86, perf: P4 PMU - fix counters management logic
  perf newt: Make <- zoom out filters
  perf report: Report number of events, not samples
  perf hist: Clarify events_stats fields usage
  ...

Fix up trivial conflicts in kernel/fork.c and tools/perf/builtin-record.c
parents 3aaf51ac 94f3ca95
......@@ -165,8 +165,8 @@ the user entry_handler invocation is also skipped.
1.4 How Does Jump Optimization Work?
If you configured your kernel with CONFIG_OPTPROBES=y (currently
this option is supported on x86/x86-64, non-preemptive kernel) and
If your kernel is built with CONFIG_OPTPROBES=y (currently this flag
is automatically set 'y' on x86/x86-64, non-preemptive kernel) and
the "debug.kprobes_optimization" kernel parameter is set to 1 (see
sysctl(8)), Kprobes tries to reduce probe-hit overhead by using a jump
instruction instead of a breakpoint instruction at each probepoint.
......@@ -271,8 +271,6 @@ tweak the kernel's execution path, you need to suppress optimization,
using one of the following techniques:
- Specify an empty function for the kprobe's post_handler or break_handler.
or
- Config CONFIG_OPTPROBES=n.
or
- Execute 'sysctl -w debug.kprobes_optimization=n'
2. Architectures Supported
......@@ -307,10 +305,6 @@ it useful to "Compile the kernel with debug info" (CONFIG_DEBUG_INFO),
so you can use "objdump -d -l vmlinux" to see the source-to-object
code mapping.
If you want to reduce probing overhead, set "Kprobes jump optimization
support" (CONFIG_OPTPROBES) to "y". You can find this option under the
"Kprobes" line.
4. API Reference
The Kprobes API includes a "register" function and an "unregister"
......
......@@ -40,7 +40,9 @@ Synopsis of kprobe_events
$stack : Fetch stack address.
$retval : Fetch return value.(*)
+|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
NAME=FETCHARG: Set NAME as the argument name of FETCHARG.
NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
(u8/u16/u32/u64/s8/s16/s32/s64) are supported.
(*) only for return probe.
(**) this is useful for fetching a field of data structures.
......
......@@ -4354,13 +4354,13 @@ M: Paul Mackerras <paulus@samba.org>
M: Ingo Molnar <mingo@elte.hu>
M: Arnaldo Carvalho de Melo <acme@redhat.com>
S: Supported
F: kernel/perf_event.c
F: kernel/perf_event*.c
F: include/linux/perf_event.h
F: arch/*/kernel/perf_event.c
F: arch/*/kernel/*/perf_event.c
F: arch/*/kernel/*/*/perf_event.c
F: arch/*/kernel/perf_event*.c
F: arch/*/kernel/*/perf_event*.c
F: arch/*/kernel/*/*/perf_event*.c
F: arch/*/include/asm/perf_event.h
F: arch/*/lib/perf_event.c
F: arch/*/lib/perf_event*.c
F: arch/*/kernel/perf_callchain.c
F: tools/perf/
......
......@@ -42,15 +42,10 @@ config KPROBES
If in doubt, say "N".
config OPTPROBES
bool "Kprobes jump optimization support (EXPERIMENTAL)"
default y
depends on KPROBES
def_bool y
depends on KPROBES && HAVE_OPTPROBES
depends on !PREEMPT
depends on HAVE_OPTPROBES
select KALLSYMS_ALL
help
This option will allow kprobes to optimize breakpoint to
a jump for reducing its overhead.
config HAVE_EFFICIENT_UNALIGNED_ACCESS
bool
......@@ -142,6 +137,17 @@ config HAVE_HW_BREAKPOINT
bool
depends on PERF_EVENTS
config HAVE_MIXED_BREAKPOINTS_REGS
bool
depends on HAVE_HW_BREAKPOINT
help
Depending on the arch implementation of hardware breakpoints,
some of them have separate registers for data and instruction
breakpoints addresses, others have mixed registers to store
them but define the access type in a control register.
Select this option if your arch implements breakpoints under the
latter fashion.
config HAVE_USER_RETURN_NOTIFIER
bool
......
......@@ -35,6 +35,9 @@ struct cpu_hw_events {
u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
unsigned long amasks[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
unsigned int group_flag;
int n_txn_start;
};
DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
......@@ -718,66 +721,6 @@ static int collect_events(struct perf_event *group, int max_count,
return n;
}
static void event_sched_in(struct perf_event *event)
{
event->state = PERF_EVENT_STATE_ACTIVE;
event->oncpu = smp_processor_id();
event->tstamp_running += event->ctx->time - event->tstamp_stopped;
if (is_software_event(event))
event->pmu->enable(event);
}
/*
* Called to enable a whole group of events.
* Returns 1 if the group was enabled, or -EAGAIN if it could not be.
* Assumes the caller has disabled interrupts and has
* frozen the PMU with hw_perf_save_disable.
*/
int hw_perf_group_sched_in(struct perf_event *group_leader,
struct perf_cpu_context *cpuctx,
struct perf_event_context *ctx)
{
struct cpu_hw_events *cpuhw;
long i, n, n0;
struct perf_event *sub;
if (!ppmu)
return 0;
cpuhw = &__get_cpu_var(cpu_hw_events);
n0 = cpuhw->n_events;
n = collect_events(group_leader, ppmu->n_counter - n0,
&cpuhw->event[n0], &cpuhw->events[n0],
&cpuhw->flags[n0]);
if (n < 0)
return -EAGAIN;
if (check_excludes(cpuhw->event, cpuhw->flags, n0, n))
return -EAGAIN;
i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n + n0);
if (i < 0)
return -EAGAIN;
cpuhw->n_events = n0 + n;
cpuhw->n_added += n;
/*
* OK, this group can go on; update event states etc.,
* and enable any software events
*/
for (i = n0; i < n0 + n; ++i)
cpuhw->event[i]->hw.config = cpuhw->events[i];
cpuctx->active_oncpu += n;
n = 1;
event_sched_in(group_leader);
list_for_each_entry(sub, &group_leader->sibling_list, group_entry) {
if (sub->state != PERF_EVENT_STATE_OFF) {
event_sched_in(sub);
++n;
}
}
ctx->nr_active += n;
return 1;
}
/*
* Add a event to the PMU.
* If all events are not already frozen, then we disable and
......@@ -805,12 +748,22 @@ static int power_pmu_enable(struct perf_event *event)
cpuhw->event[n0] = event;
cpuhw->events[n0] = event->hw.config;
cpuhw->flags[n0] = event->hw.event_base;
/*
* If group events scheduling transaction was started,
* skip the schedulability test here, it will be peformed
* at commit time(->commit_txn) as a whole
*/
if (cpuhw->group_flag & PERF_EVENT_TXN_STARTED)
goto nocheck;
if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
goto out;
if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1))
goto out;
event->hw.config = cpuhw->events[n0];
nocheck:
++cpuhw->n_events;
++cpuhw->n_added;
......@@ -896,11 +849,65 @@ static void power_pmu_unthrottle(struct perf_event *event)
local_irq_restore(flags);
}
/*
* Start group events scheduling transaction
* Set the flag to make pmu::enable() not perform the
* schedulability test, it will be performed at commit time
*/
void power_pmu_start_txn(const struct pmu *pmu)
{
struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
cpuhw->group_flag |= PERF_EVENT_TXN_STARTED;
cpuhw->n_txn_start = cpuhw->n_events;
}
/*
* Stop group events scheduling transaction
* Clear the flag and pmu::enable() will perform the
* schedulability test.
*/
void power_pmu_cancel_txn(const struct pmu *pmu)
{
struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED;
}
/*
* Commit group events scheduling transaction
* Perform the group schedulability test as a whole
* Return 0 if success
*/
int power_pmu_commit_txn(const struct pmu *pmu)
{
struct cpu_hw_events *cpuhw;
long i, n;
if (!ppmu)
return -EAGAIN;
cpuhw = &__get_cpu_var(cpu_hw_events);
n = cpuhw->n_events;
if (check_excludes(cpuhw->event, cpuhw->flags, 0, n))
return -EAGAIN;
i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n);
if (i < 0)
return -EAGAIN;
for (i = cpuhw->n_txn_start; i < n; ++i)
cpuhw->event[i]->hw.config = cpuhw->events[i];
return 0;
}
struct pmu power_pmu = {
.enable = power_pmu_enable,
.disable = power_pmu_disable,
.read = power_pmu_read,
.unthrottle = power_pmu_unthrottle,
.start_txn = power_pmu_start_txn,
.cancel_txn = power_pmu_cancel_txn,
.commit_txn = power_pmu_commit_txn,
};
/*
......
......@@ -44,6 +44,7 @@ config SUPERH32
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_ARCH_KGDB
select HAVE_HW_BREAKPOINT
select HAVE_MIXED_BREAKPOINTS_REGS
select PERF_EVENTS if HAVE_HW_BREAKPOINT
select ARCH_HIBERNATION_POSSIBLE if MMU
......
......@@ -46,10 +46,14 @@ struct pmu;
/* Maximum number of UBC channels */
#define HBP_NUM 2
static inline int hw_breakpoint_slots(int type)
{
return HBP_NUM;
}
/* arch/sh/kernel/hw_breakpoint.c */
extern int arch_check_va_in_userspace(unsigned long va, u16 hbp_len);
extern int arch_validate_hwbkpt_settings(struct perf_event *bp,
struct task_struct *tsk);
extern int arch_check_bp_in_kernelspace(struct perf_event *bp);
extern int arch_validate_hwbkpt_settings(struct perf_event *bp);
extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
unsigned long val, void *data);
......
......@@ -119,26 +119,17 @@ static int get_hbp_len(u16 hbp_len)
return len_in_bytes;
}
/*
* Check for virtual address in user space.
*/
int arch_check_va_in_userspace(unsigned long va, u16 hbp_len)
{
unsigned int len;
len = get_hbp_len(hbp_len);
return (va <= TASK_SIZE - len);
}
/*
* Check for virtual address in kernel space.
*/
static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len)
int arch_check_bp_in_kernelspace(struct perf_event *bp)
{
unsigned int len;
unsigned long va;
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
len = get_hbp_len(hbp_len);
va = info->address;
len = get_hbp_len(info->len);
return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
}
......@@ -226,8 +217,7 @@ static int arch_build_bp_info(struct perf_event *bp)
/*
* Validate the arch-specific HW Breakpoint register settings
*/
int arch_validate_hwbkpt_settings(struct perf_event *bp,
struct task_struct *tsk)
int arch_validate_hwbkpt_settings(struct perf_event *bp)
{
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
unsigned int align;
......@@ -270,15 +260,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp,
if (info->address & align)
return -EINVAL;
/* Check that the virtual address is in the proper range */
if (tsk) {
if (!arch_check_va_in_userspace(info->address, info->len))
return -EFAULT;
} else {
if (!arch_check_va_in_kernelspace(info->address, info->len))
return -EFAULT;
}
return 0;
}
......@@ -363,8 +344,7 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
perf_bp_event(bp, args->regs);
/* Deliver the signal to userspace */
if (arch_check_va_in_userspace(bp->attr.bp_addr,
bp->attr.bp_len)) {
if (!arch_check_bp_in_kernelspace(bp)) {
siginfo_t info;
info.si_signo = args->signr;
......
......@@ -85,7 +85,7 @@ static int set_single_step(struct task_struct *tsk, unsigned long addr)
bp = thread->ptrace_bps[0];
if (!bp) {
hw_breakpoint_init(&attr);
ptrace_breakpoint_init(&attr);
attr.bp_addr = addr;
attr.bp_len = HW_BREAKPOINT_LEN_2;
......
......@@ -53,11 +53,15 @@ config X86
select HAVE_KERNEL_LZMA
select HAVE_KERNEL_LZO
select HAVE_HW_BREAKPOINT
select HAVE_MIXED_BREAKPOINTS_REGS
select PERF_EVENTS
select ANON_INODES
select HAVE_ARCH_KMEMCHECK
select HAVE_USER_RETURN_NOTIFIER
config INSTRUCTION_DECODER
def_bool (KPROBES || PERF_EVENTS)
config OUTPUT_FORMAT
string
default "elf32-i386" if X86_32
......
......@@ -502,23 +502,3 @@ config CPU_SUP_UMC_32
CPU might render the kernel unbootable.
If unsure, say N.
config X86_DS
def_bool X86_PTRACE_BTS
depends on X86_DEBUGCTLMSR
select HAVE_HW_BRANCH_TRACER
config X86_PTRACE_BTS
bool "Branch Trace Store"
default y
depends on X86_DEBUGCTLMSR
depends on BROKEN
---help---
This adds a ptrace interface to the hardware's branch trace store.
Debuggers may use it to collect an execution trace of the debugged
application in order to answer the question 'how did I get here?'.
Debuggers may trace user mode as well as kernel mode.
Say Y unless there is no application development on this machine
and you want to save a small amount of code size.
......@@ -174,15 +174,6 @@ config IOMMU_LEAK
Add a simple leak tracer to the IOMMU code. This is useful when you
are debugging a buggy device driver that leaks IOMMU mappings.
config X86_DS_SELFTEST
bool "DS selftest"
default y
depends on DEBUG_KERNEL
depends on X86_DS
---help---
Perform Debug Store selftests at boot time.
If in doubt, say "N".
config HAVE_MMIOTRACE_SUPPORT
def_bool y
......
......@@ -373,6 +373,7 @@ extern atomic_t init_deasserted;
extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
#endif
#ifdef CONFIG_X86_LOCAL_APIC
static inline u32 apic_read(u32 reg)
{
return apic->read(reg);
......@@ -403,10 +404,19 @@ static inline u32 safe_apic_wait_icr_idle(void)
return apic->safe_wait_icr_idle();
}
#else /* CONFIG_X86_LOCAL_APIC */
static inline u32 apic_read(u32 reg) { return 0; }
static inline void apic_write(u32 reg, u32 val) { }
static inline u64 apic_icr_read(void) { return 0; }
static inline void apic_icr_write(u32 low, u32 high) { }
static inline void apic_wait_icr_idle(void) { }
static inline u32 safe_apic_wait_icr_idle(void) { return 0; }
#endif /* CONFIG_X86_LOCAL_APIC */
static inline void ack_APIC_irq(void)
{
#ifdef CONFIG_X86_LOCAL_APIC
/*
* ack_APIC_irq() actually gets compiled as a single instruction
* ... yummie.
......@@ -414,7 +424,6 @@ static inline void ack_APIC_irq(void)
/* Docs say use 0 for future compatibility */
apic_write(APIC_EOI, 0);
#endif
}
static inline unsigned default_get_apic_id(unsigned long x)
......
/*
* Debug Store (DS) support
*
* This provides a low-level interface to the hardware's Debug Store
* feature that is used for branch trace store (BTS) and
* precise-event based sampling (PEBS).
*
* It manages:
* - DS and BTS hardware configuration
* - buffer overflow handling (to be done)
* - buffer access
*
* It does not do:
* - security checking (is the caller allowed to trace the task)
* - buffer allocation (memory accounting)
*
*
* Copyright (C) 2007-2009 Intel Corporation.
* Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
*/
#ifndef _ASM_X86_DS_H
#define _ASM_X86_DS_H
#include <linux/types.h>
#include <linux/init.h>
#include <linux/err.h>
#ifdef CONFIG_X86_DS
struct task_struct;
struct ds_context;
struct ds_tracer;
struct bts_tracer;
struct pebs_tracer;
typedef void (*bts_ovfl_callback_t)(struct bts_tracer *);
typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *);
/*
* A list of features plus corresponding macros to talk about them in
* the ds_request function's flags parameter.
*
* We use the enum to index an array of corresponding control bits;
* we use the macro to index a flags bit-vector.
*/
enum ds_feature {
dsf_bts = 0,
dsf_bts_kernel,
#define BTS_KERNEL (1 << dsf_bts_kernel)
/* trace kernel-mode branches */
dsf_bts_user,
#define BTS_USER (1 << dsf_bts_user)
/* trace user-mode branches */
dsf_bts_overflow,
dsf_bts_max,
dsf_pebs = dsf_bts_max,
dsf_pebs_max,
dsf_ctl_max = dsf_pebs_max,
dsf_bts_timestamps = dsf_ctl_max,
#define BTS_TIMESTAMPS (1 << dsf_bts_timestamps)
/* add timestamps into BTS trace */
#define BTS_USER_FLAGS (BTS_KERNEL | BTS_USER | BTS_TIMESTAMPS)
};
/*
* Request BTS or PEBS
*
* Due to alignement constraints, the actual buffer may be slightly
* smaller than the requested or provided buffer.
*
* Returns a pointer to a tracer structure on success, or
* ERR_PTR(errcode) on failure.
*
* The interrupt threshold is independent from the overflow callback
* to allow users to use their own overflow interrupt handling mechanism.
*
* The function might sleep.
*
* task: the task to request recording for
* cpu: the cpu to request recording for
* base: the base pointer for the (non-pageable) buffer;
* size: the size of the provided buffer in bytes
* ovfl: pointer to a function to be called on buffer overflow;
* NULL if cyclic buffer requested
* th: the interrupt threshold in records from the end of the buffer;
* -1 if no interrupt threshold is requested.
* flags: a bit-mask of the above flags
*/
extern struct bts_tracer *ds_request_bts_task(struct task_struct *task,
void *base, size_t size,
bts_ovfl_callback_t ovfl,
size_t th, unsigned int flags);
extern struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size,
bts_ovfl_callback_t ovfl,
size_t th, unsigned int flags);
extern struct pebs_tracer *ds_request_pebs_task(struct task_struct *task,
void *base, size_t size,
pebs_ovfl_callback_t ovfl,
size_t th, unsigned int flags);
extern struct pebs_tracer *ds_request_pebs_cpu(int cpu,
void *base, size_t size,
pebs_ovfl_callback_t ovfl,
size_t th, unsigned int flags);
/*
* Release BTS or PEBS resources
* Suspend and resume BTS or PEBS tracing
*
* Must be called with irq's enabled.
*
* tracer: the tracer handle returned from ds_request_~()
*/
extern void ds_release_bts(struct bts_tracer *tracer);
extern void ds_suspend_bts(struct bts_tracer *tracer);
extern void ds_resume_bts(struct bts_tracer *tracer);
extern void ds_release_pebs(struct pebs_tracer *tracer);
extern void ds_suspend_pebs(struct pebs_tracer *tracer);
extern void ds_resume_pebs(struct pebs_tracer *tracer);
/*
* Release BTS or PEBS resources
* Suspend and resume BTS or PEBS tracing
*
* Cpu tracers must call this on the traced cpu.
* Task tracers must call ds_release_~_noirq() for themselves.
*
* May be called with irq's disabled.
*