Commit 752f114f authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'tracing-core-for-linus' of...

Merge branch 'tracing-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'tracing-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  tracing: Fix "integer as NULL pointer" warning.
  tracing: Fix tracepoint.h DECLARE_TRACE() to allow more than one header
  tracing: Make the documentation clear on trace_event boot option
  ring-buffer: Wrap open-coded WARN_ONCE
  tracing: Convert nop macros to static inlines
  tracing: Fix sleep time function profiling
  tracing: Show sample std dev in function profiling
  tracing: Add documentation for trace commands mod, traceon/traceoff
  ring-buffer: Make benchmark handle missed events
  ring-buffer: Make non-consuming read less expensive with lots of cpus.
  tracing: Add graph output support for irqsoff tracer
  tracing: Have graph flags passed in to ouput functions
  tracing: Add ftrace events for graph tracer
  tracing: Dump either the oops's cpu source or all cpus buffers
  tracing: Fix uninitialized variable of tracing/trace output
parents b8ae30ee ad56b079
......@@ -786,8 +786,12 @@ and is between 256 and 4096 characters. It is defined in the file
as early as possible in order to facilitate early
boot debugging.
ftrace_dump_on_oops
ftrace_dump_on_oops[=orig_cpu]
[FTRACE] will dump the trace buffers on oops.
If no parameter is passed, ftrace will dump
buffers of all CPUs, but if you pass orig_cpu, it will
dump only the buffer of the CPU that triggered the
oops.
ftrace_filter=[function-list]
[FTRACE] Limit the functions traced by the function
......
......@@ -90,7 +90,8 @@ In order to facilitate early boot debugging, use boot option:
trace_event=[event-list]
The format of this boot option is the same as described in section 2.1.
event-list is a comma separated list of events. See section 2.1 for event
format.
3. Defining an event-enabled tracepoint
=======================================
......
......@@ -155,6 +155,9 @@ of ftrace. Here is a list of some of the key files:
to be traced. Echoing names of functions into this file
will limit the trace to only those functions.
This interface also allows for commands to be used. See the
"Filter commands" section for more details.
set_ftrace_notrace:
This has an effect opposite to that of
......@@ -1337,12 +1340,14 @@ ftrace_dump_on_oops must be set. To set ftrace_dump_on_oops, one
can either use the sysctl function or set it via the proc system
interface.
sysctl kernel.ftrace_dump_on_oops=1
sysctl kernel.ftrace_dump_on_oops=n
or
echo 1 > /proc/sys/kernel/ftrace_dump_on_oops
echo n > /proc/sys/kernel/ftrace_dump_on_oops
If n = 1, ftrace will dump buffers of all CPUs, if n = 2 ftrace will
only dump the buffer of the CPU that triggered the oops.
Here's an example of such a dump after a null pointer
dereference in a kernel module:
......@@ -1822,6 +1827,47 @@ this special filter via:
echo > set_graph_function
Filter commands
---------------
A few commands are supported by the set_ftrace_filter interface.
Trace commands have the following format:
<function>:<command>:<parameter>
The following commands are supported:
- mod
This command enables function filtering per module. The
parameter defines the module. For example, if only the write*
functions in the ext3 module are desired, run:
echo 'write*:mod:ext3' > set_ftrace_filter
This command interacts with the filter in the same way as
filtering based on function names. Thus, adding more functions
in a different module is accomplished by appending (>>) to the
filter file. Remove specific module functions by prepending
'!':
echo '!writeback*:mod:ext3' >> set_ftrace_filter
- traceon/traceoff
These commands turn tracing on and off when the specified
functions are hit. The parameter determines how many times the
tracing system is turned on and off. If unspecified, there is
no limit. For example, to disable tracing when a schedule bug
is hit the first 5 times, run:
echo '__schedule_bug:traceoff:5' > set_ftrace_filter
These commands are cumulative whether or not they are appended
to set_ftrace_filter. To remove a command, prepend it by '!'
and drop the parameter:
echo '!__schedule_bug:traceoff' > set_ftrace_filter
trace_pipe
----------
......
......@@ -289,7 +289,7 @@ static struct sysrq_key_op sysrq_showstate_blocked_op = {
static void sysrq_ftrace_dump(int key, struct tty_struct *tty)
{
ftrace_dump();
ftrace_dump(DUMP_ALL);
}
static struct sysrq_key_op sysrq_ftrace_dump_op = {
.handler = sysrq_ftrace_dump,
......
......@@ -82,9 +82,13 @@ void clear_ftrace_function(void);
extern void ftrace_stub(unsigned long a0, unsigned long a1);
#else /* !CONFIG_FUNCTION_TRACER */
# define register_ftrace_function(ops) do { } while (0)
# define unregister_ftrace_function(ops) do { } while (0)
# define clear_ftrace_function(ops) do { } while (0)
/*
* (un)register_ftrace_function must be a macro since the ops parameter
* must not be evaluated.
*/
#define register_ftrace_function(ops) ({ 0; })
#define unregister_ftrace_function(ops) ({ 0; })
static inline void clear_ftrace_function(void) { }
static inline void ftrace_kill(void) { }
static inline void ftrace_stop(void) { }
static inline void ftrace_start(void) { }
......@@ -237,11 +241,13 @@ extern int skip_trace(unsigned long ip);
extern void ftrace_disable_daemon(void);
extern void ftrace_enable_daemon(void);
#else
# define skip_trace(ip) ({ 0; })
# define ftrace_force_update() ({ 0; })
# define ftrace_set_filter(buf, len, reset) do { } while (0)
# define ftrace_disable_daemon() do { } while (0)
# define ftrace_enable_daemon() do { } while (0)
static inline int skip_trace(unsigned long ip) { return 0; }
static inline int ftrace_force_update(void) { return 0; }
static inline void ftrace_set_filter(unsigned char *buf, int len, int reset)
{
}
static inline void ftrace_disable_daemon(void) { }
static inline void ftrace_enable_daemon(void) { }
static inline void ftrace_release_mod(struct module *mod) {}
static inline int register_ftrace_command(struct ftrace_func_command *cmd)
{
......@@ -314,16 +320,16 @@ static inline void __ftrace_enabled_restore(int enabled)
extern void time_hardirqs_on(unsigned long a0, unsigned long a1);
extern void time_hardirqs_off(unsigned long a0, unsigned long a1);
#else
# define time_hardirqs_on(a0, a1) do { } while (0)
# define time_hardirqs_off(a0, a1) do { } while (0)
static inline void time_hardirqs_on(unsigned long a0, unsigned long a1) { }
static inline void time_hardirqs_off(unsigned long a0, unsigned long a1) { }
#endif
#ifdef CONFIG_PREEMPT_TRACER
extern void trace_preempt_on(unsigned long a0, unsigned long a1);
extern void trace_preempt_off(unsigned long a0, unsigned long a1);
#else
# define trace_preempt_on(a0, a1) do { } while (0)
# define trace_preempt_off(a0, a1) do { } while (0)
static inline void trace_preempt_on(unsigned long a0, unsigned long a1) { }
static inline void trace_preempt_off(unsigned long a0, unsigned long a1) { }
#endif
#ifdef CONFIG_FTRACE_MCOUNT_RECORD
......@@ -352,6 +358,10 @@ struct ftrace_graph_ret {
int depth;
};
/* Type of the callback handlers for tracing function graph*/
typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *); /* return */
typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/* for init task */
......@@ -400,10 +410,6 @@ extern char __irqentry_text_end[];
#define FTRACE_RETFUNC_DEPTH 50
#define FTRACE_RETSTACK_ALLOC_SIZE 32
/* Type of the callback handlers for tracing function graph*/
typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *); /* return */
typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */
extern int register_ftrace_graph(trace_func_graph_ret_t retfunc,
trace_func_graph_ent_t entryfunc);
......@@ -441,6 +447,13 @@ static inline void unpause_graph_tracing(void)
static inline void ftrace_graph_init_task(struct task_struct *t) { }
static inline void ftrace_graph_exit_task(struct task_struct *t) { }
static inline int register_ftrace_graph(trace_func_graph_ret_t retfunc,
trace_func_graph_ent_t entryfunc)
{
return -1;
}
static inline void unregister_ftrace_graph(void) { }
static inline int task_curr_ret_stack(struct task_struct *tsk)
{
return -1;
......@@ -492,7 +505,9 @@ static inline int test_tsk_trace_graph(struct task_struct *tsk)
return tsk->trace & TSK_TRACE_FL_GRAPH;
}
extern int ftrace_dump_on_oops;
enum ftrace_dump_mode;
extern enum ftrace_dump_mode ftrace_dump_on_oops;
#ifdef CONFIG_PREEMPT
#define INIT_TRACE_RECURSION .trace_recursion = 0,
......
......@@ -490,6 +490,13 @@ static inline void tracing_off(void) { }
static inline void tracing_off_permanent(void) { }
static inline int tracing_is_on(void) { return 0; }
#endif
enum ftrace_dump_mode {
DUMP_NONE,
DUMP_ALL,
DUMP_ORIG,
};
#ifdef CONFIG_TRACING
extern void tracing_start(void);
extern void tracing_stop(void);
......@@ -571,7 +578,7 @@ __ftrace_vbprintk(unsigned long ip, const char *fmt, va_list ap);
extern int
__ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap);
extern void ftrace_dump(void);
extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode);
#else
static inline void
ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
......@@ -592,7 +599,7 @@ ftrace_vprintk(const char *fmt, va_list ap)
{
return 0;
}
static inline void ftrace_dump(void) { }
static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
#endif /* CONFIG_TRACING */
/*
......
......@@ -127,7 +127,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
unsigned long *lost_events);
struct ring_buffer_iter *
ring_buffer_read_start(struct ring_buffer *buffer, int cpu);
ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu);
void ring_buffer_read_prepare_sync(void);
void ring_buffer_read_start(struct ring_buffer_iter *iter);
void ring_buffer_read_finish(struct ring_buffer_iter *iter);
struct ring_buffer_event *
......
......@@ -33,6 +33,65 @@ struct tracepoint {
* Keep in sync with vmlinux.lds.h.
*/
/*
* Connect a probe to a tracepoint.
* Internal API, should not be used directly.
*/
extern int tracepoint_probe_register(const char *name, void *probe);
/*
* Disconnect a probe from a tracepoint.
* Internal API, should not be used directly.
*/
extern int tracepoint_probe_unregister(const char *name, void *probe);
extern int tracepoint_probe_register_noupdate(const char *name, void *probe);
extern int tracepoint_probe_unregister_noupdate(const char *name, void *probe);
extern void tracepoint_probe_update_all(void);
struct tracepoint_iter {
struct module *module;
struct tracepoint *tracepoint;
};
extern void tracepoint_iter_start(struct tracepoint_iter *iter);
extern void tracepoint_iter_next(struct tracepoint_iter *iter);
extern void tracepoint_iter_stop(struct tracepoint_iter *iter);
extern void tracepoint_iter_reset(struct tracepoint_iter *iter);
extern int tracepoint_get_iter_range(struct tracepoint **tracepoint,
struct tracepoint *begin, struct tracepoint *end);
/*
* tracepoint_synchronize_unregister must be called between the last tracepoint
* probe unregistration and the end of module exit to make sure there is no
* caller executing a probe when it is freed.
*/
static inline void tracepoint_synchronize_unregister(void)
{
synchronize_sched();
}
#define PARAMS(args...) args
#ifdef CONFIG_TRACEPOINTS
extern void tracepoint_update_probe_range(struct tracepoint *begin,
struct tracepoint *end);
#else
static inline void tracepoint_update_probe_range(struct tracepoint *begin,
struct tracepoint *end)
{ }
#endif /* CONFIG_TRACEPOINTS */
#endif /* _LINUX_TRACEPOINT_H */
/*
* Note: we keep the TRACE_EVENT and DECLARE_TRACE outside the include
* file ifdef protection.
* This is due to the way trace events work. If a file includes two
* trace event headers under one "CREATE_TRACE_POINTS" the first include
* will override the TRACE_EVENT and break the second include.
*/
#ifndef DECLARE_TRACE
#define TP_PROTO(args...) args
......@@ -96,9 +155,6 @@ struct tracepoint {
#define EXPORT_TRACEPOINT_SYMBOL(name) \
EXPORT_SYMBOL(__tracepoint_##name)
extern void tracepoint_update_probe_range(struct tracepoint *begin,
struct tracepoint *end);
#else /* !CONFIG_TRACEPOINTS */
#define DECLARE_TRACE(name, proto, args) \
static inline void _do_trace_##name(struct tracepoint *tp, proto) \
......@@ -119,61 +175,9 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin,
#define EXPORT_TRACEPOINT_SYMBOL_GPL(name)
#define EXPORT_TRACEPOINT_SYMBOL(name)
static inline void tracepoint_update_probe_range(struct tracepoint *begin,
struct tracepoint *end)
{ }
#endif /* CONFIG_TRACEPOINTS */
#endif /* DECLARE_TRACE */
/*
* Connect a probe to a tracepoint.
* Internal API, should not be used directly.
*/
extern int tracepoint_probe_register(const char *name, void *probe);
/*
* Disconnect a probe from a tracepoint.
* Internal API, should not be used directly.
*/
extern int tracepoint_probe_unregister(const char *name, void *probe);
extern int tracepoint_probe_register_noupdate(const char *name, void *probe);
extern int tracepoint_probe_unregister_noupdate(const char *name, void *probe);
extern void tracepoint_probe_update_all(void);
struct tracepoint_iter {
struct module *module;
struct tracepoint *tracepoint;
};
extern void tracepoint_iter_start(struct tracepoint_iter *iter);
extern void tracepoint_iter_next(struct tracepoint_iter *iter);
extern void tracepoint_iter_stop(struct tracepoint_iter *iter);
extern void tracepoint_iter_reset(struct tracepoint_iter *iter);
extern int tracepoint_get_iter_range(struct tracepoint **tracepoint,
struct tracepoint *begin, struct tracepoint *end);
/*
* tracepoint_synchronize_unregister must be called between the last tracepoint
* probe unregistration and the end of module exit to make sure there is no
* caller executing a probe when it is freed.
*/
static inline void tracepoint_synchronize_unregister(void)
{
synchronize_sched();
}
#define PARAMS(args...) args
#endif /* _LINUX_TRACEPOINT_H */
/*
* Note: we keep the TRACE_EVENT outside the include file ifdef protection.
* This is due to the way trace events work. If a file includes two
* trace event headers under one "CREATE_TRACE_POINTS" the first include
* will override the TRACE_EVENT and break the second include.
*/
#ifndef TRACE_EVENT
/*
* For use with the TRACE_EVENT macro:
......
......@@ -65,6 +65,10 @@
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
/* Make all open coded DECLARE_TRACE nops */
#undef DECLARE_TRACE
#define DECLARE_TRACE(name, proto, args)
#ifdef CONFIG_EVENT_TRACING
#include <trace/ftrace.h>
#endif
......@@ -75,6 +79,7 @@
#undef DEFINE_EVENT
#undef DEFINE_EVENT_PRINT
#undef TRACE_HEADER_MULTI_READ
#undef DECLARE_TRACE
/* Only undef what we defined in this file */
#ifdef UNDEF_TRACE_INCLUDE_FILE
......
#ifndef _TRACE_NAPI_H_
#undef TRACE_SYSTEM
#define TRACE_SYSTEM napi
#if !defined(_TRACE_NAPI_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_NAPI_H_
#include <linux/netdevice.h>
......@@ -8,4 +11,7 @@ DECLARE_TRACE(napi_poll,
TP_PROTO(struct napi_struct *napi),
TP_ARGS(napi));
#endif
#endif /* _TRACE_NAPI_H_ */
/* This part must be outside protection */
#include <trace/define_trace.h>
......@@ -264,6 +264,7 @@ struct ftrace_profile {
unsigned long counter;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
unsigned long long time;
unsigned long long time_squared;
#endif
};
......@@ -366,9 +367,9 @@ static int function_stat_headers(struct seq_file *m)
{
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
seq_printf(m, " Function "
"Hit Time Avg\n"
"Hit Time Avg s^2\n"
" -------- "
"--- ---- ---\n");
"--- ---- --- ---\n");
#else
seq_printf(m, " Function Hit\n"
" -------- ---\n");
......@@ -384,6 +385,7 @@ static int function_stat_show(struct seq_file *m, void *v)
static DEFINE_MUTEX(mutex);
static struct trace_seq s;
unsigned long long avg;
unsigned long long stddev;
#endif
kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
......@@ -394,11 +396,25 @@ static int function_stat_show(struct seq_file *m, void *v)
avg = rec->time;
do_div(avg, rec->counter);
/* Sample standard deviation (s^2) */
if (rec->counter <= 1)
stddev = 0;
else {
stddev = rec->time_squared - rec->counter * avg * avg;
/*
* Divide only 1000 for ns^2 -> us^2 conversion.
* trace_print_graph_duration will divide 1000 again.
*/
do_div(stddev, (rec->counter - 1) * 1000);
}
mutex_lock(&mutex);
trace_seq_init(&s);
trace_print_graph_duration(rec->time, &s);
trace_seq_puts(&s, " ");
trace_print_graph_duration(avg, &s);
trace_seq_puts(&s, " ");
trace_print_graph_duration(stddev, &s);
trace_print_seq(m, &s);
mutex_unlock(&mutex);
#endif
......@@ -650,6 +666,10 @@ static void profile_graph_return(struct ftrace_graph_ret *trace)
if (!stat->hash || !ftrace_profile_enabled)
goto out;
/* If the calltime was zero'd ignore it */
if (!trace->calltime)
goto out;
calltime = trace->rettime - trace->calltime;
if (!(trace_flags & TRACE_ITER_GRAPH_TIME)) {
......@@ -668,8 +688,10 @@ static void profile_graph_return(struct ftrace_graph_ret *trace)
}
rec = ftrace_find_profiled_func(stat, trace->func);
if (rec)
if (rec) {
rec->time += calltime;
rec->time_squared += calltime * calltime;
}
out:
local_irq_restore(flags);
......@@ -3338,11 +3360,11 @@ void unregister_ftrace_graph(void)
goto out;
ftrace_graph_active--;
unregister_trace_sched_switch(ftrace_graph_probe_sched_switch);
ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
ftrace_graph_entry = ftrace_graph_entry_stub;
ftrace_shutdown(FTRACE_STOP_FUNC_RET);
unregister_pm_notifier(&ftrace_suspend_notifier);
unregister_trace_sched_switch(ftrace_graph_probe_sched_switch);
out:
mutex_unlock(&ftrace_lock);
......
......@@ -2000,17 +2000,13 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
u64 *ts, u64 *delta)
{
struct ring_buffer_event *event;
static int once;
int ret;
if (unlikely(*delta > (1ULL << 59) && !once++)) {
printk(KERN_WARNING "Delta way too big! %llu"
" ts=%llu write stamp = %llu\n",
(unsigned long long)*delta,
(unsigned long long)*ts,
(unsigned long long)cpu_buffer->write_stamp);
WARN_ON(1);
}
WARN_ONCE(*delta > (1ULL << 59),
KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
(unsigned long long)*delta,
(unsigned long long)*ts,
(unsigned long long)cpu_buffer->write_stamp);
/*
* The delta is too big, we to add a
......@@ -3332,23 +3328,30 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
EXPORT_SYMBOL_GPL(ring_buffer_consume);
/**
* ring_buffer_read_start - start a non consuming read of the buffer
* ring_buffer_read_prepare - Prepare for a non consuming read of the buffer
* @buffer: The ring buffer to read from
* @cpu: The cpu buffer to iterate over
*
* This starts up an iteration through the buffer. It also disables
* the recording to the buffer until the reading is finished.
* This prevents the reading from being corrupted. This is not
* a consuming read, so a producer is not expected.
* This performs the initial preparations necessary to iterate
* through the buffer. Memory is allocated, buffer recording
* is disabled, and the iterator pointer is returned to the caller.
*
* Must be paired with ring_buffer_finish.
* Disabling buffer recordng prevents the reading from being
* corrupted. This is not a consuming read, so a producer is not
* expected.
*
* After a sequence of ring_buffer_read_prepare calls, the user is
* expected to make at least one call to ring_buffer_prepare_sync.
* Afterwards, ring_buffer_read_start is invoked to get things going
* for real.
*
* This overall must be paired with ring_buffer_finish.
*/
struct ring_buffer_iter *
ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu)
{
struct ring_buffer_per_cpu *cpu_buffer;
struct ring_buffer_iter *iter;
unsigned long flags;
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return NULL;
......@@ -3362,15 +3365,52 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
iter->cpu_buffer = cpu_buffer;
atomic_inc(&cpu_buffer->record_disabled);
return iter;
}
EXPORT_SYMBOL_GPL(ring_buffer_read_prepare);
/**
* ring_buffer_read_prepare_sync - Synchronize a set of prepare calls
*
* All previously invoked ring_buffer_read_prepare calls to prepare
* iterators will be synchronized. Afterwards, read_buffer_read_start
* calls on those iterators are allowed.
*/
void
ring_buffer_read_prepare_sync(void)
{
synchronize_sched();
}
EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync);
/**
* ring_buffer_read_start - start a non consuming read of the buffer
* @iter: The iterator returned by ring_buffer_read_prepare
*
* This finalizes the startup of an iteration through the buffer.
* The iterator comes from a call to ring_buffer_read_prepare and
* an intervening ring_buffer_read_prepare_sync must have been
* performed.
*
* Must be paired with ring_buffer_finish.
*/
void
ring_buffer_read_start(struct ring_buffer_iter *iter)
{
struct ring_buffer_per_cpu *cpu_buffer;
unsigned long flags;
if (!iter)
return;
cpu_buffer = iter->cpu_buffer;
spin_lock_irqsave(&cpu_buffer->reader_lock, flags);