Commit 903b20ad authored by James Hogan's avatar James Hogan
Browse files

metag: Perf

Add Perf support for metag.
Signed-off-by: default avatarJames Hogan <>
Cc: Peter Zijlstra <>
Cc: Paul Mackerras <>
Cc: Ingo Molnar <>
Cc: Arnaldo Carvalho de Melo <>
parent 5633004c
......@@ -22,6 +22,7 @@ config METAG
#endif /* __ASM_METAG_PERF_EVENT_H */
......@@ -25,6 +25,8 @@ obj-y += topology.o
obj-y += traps.o
obj-y += user_gateway.o
obj-$(CONFIG_PERF_EVENTS) += perf/
obj-$(CONFIG_METAG_COREMEM) += coremem.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
obj-$(CONFIG_FUNCTION_TRACER) += ftrace_stub.o
# Makefile for performance event core
obj-y += perf_event.o
This diff is collapsed.
* Meta performance counter support.
* Copyright (C) 2012 Imagination Technologies Ltd
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
#include <linux/kernel.h>
#include <linux/interrupt.h>
#include <linux/perf_event.h>
/* For performance counter definitions */
#include <asm/metag_mem.h>
* The Meta core has two performance counters, with 24-bit resolution. Newer
* cores generate an overflow interrupt on transition from 0xffffff to 0.
* Each counter consists of the counter id, hardware thread id, and the count
* itself; each counter can be assigned to multiple hardware threads at any
* one time, with the returned count being an aggregate of events. A small
* number of events are thread global, i.e. they count the aggregate of all
* threads' events, regardless of the thread selected.
* Newer cores can store an arbitrary 24-bit number in the counter, whereas
* older cores will clear the counter bits on write.
* We also have a pseudo-counter in the form of the thread active cycles
* counter (which, incidentally, is also bound to
#define MAX_HWEVENTS 3
#define MAX_PERIOD ((1UL << 24) - 1)
* struct cpu_hw_events - a processor core's performance events
* @events: an array of perf_events active for a given index.
* @used_mask: a bitmap of in-use counters.
* @pmu_lock: a perf counter lock
* This is a per-cpu/core structure that maintains a record of its
* performance counters' state.
struct cpu_hw_events {
struct perf_event *events[MAX_HWEVENTS];
unsigned long used_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
raw_spinlock_t pmu_lock;
* struct metag_pmu - the Meta PMU structure
* @pmu: core pmu structure
* @name: pmu name
* @version: core version
* @handle_irq: overflow interrupt handler
* @enable: enable a counter
* @disable: disable a counter
* @read: read the value of a counter
* @write: write a value to a counter
* @event_map: kernel event to counter event id map
* @cache_events: kernel cache counter to core cache counter map
* @max_period: maximum value of the counter before overflow
* @max_events: maximum number of counters available at any one time
* @active_events: number of active counters
* @reserve_mutex: counter reservation mutex
* This describes the main functionality and data used by the performance
* event core.
struct metag_pmu {
struct pmu pmu;
const char *name;
u32 version;
irqreturn_t (*handle_irq)(int irq_num, void *dev);
void (*enable)(struct hw_perf_event *evt, int idx);
void (*disable)(struct hw_perf_event *evt, int idx);
u64 (*read)(int idx);
void (*write)(int idx, u32 val);
int (*event_map)(int idx);
const int (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
u32 max_period;
int max_events;
atomic_t active_events;
struct mutex reserve_mutex;
/* Convenience macros for accessing the perf counters */
/* Define some convenience accessors */
#define PERF_COUNT(x) (PERF_COUNT0 + (sizeof(u64) * (x)))
#define PERF_ICORE(x) (PERF_ICORE0 + (sizeof(u64) * (x)))
#define PERF_CHAN(x) (PERF_CHAN0 + (sizeof(u64) * (x)))
/* Cache index macros */
#define C(x) PERF_COUNT_HW_CACHE_##x
#define CACHE_OP_NONSENSE 0xffff
* Perf callchain handling code.
* Based on the ARM perf implementation.
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/perf_event.h>
#include <linux/uaccess.h>
#include <asm/ptrace.h>
#include <asm/stacktrace.h>
static bool is_valid_call(unsigned long calladdr)
unsigned int callinsn;
/* Check the possible return address is aligned. */
if (!(calladdr & 0x3)) {
if (!get_user(callinsn, (unsigned int *)calladdr)) {
/* Check for CALLR or SWAP PC,D1RtP. */
if ((callinsn & 0xff000000) == 0xab000000 ||
callinsn == 0xa3200aa0)
return true;
return false;
static struct metag_frame __user *
user_backtrace(struct metag_frame __user *user_frame,
struct perf_callchain_entry *entry)
struct metag_frame frame;
unsigned long calladdr;
/* We cannot rely on having frame pointers in user code. */
while (1) {
/* Also check accessibility of one struct frame beyond */
if (!access_ok(VERIFY_READ, user_frame, sizeof(frame)))
return 0;
if (__copy_from_user_inatomic(&frame, user_frame,
return 0;
calladdr = - 4;
if (is_valid_call(calladdr)) {
perf_callchain_store(entry, calladdr);
return user_frame;
return 0;
perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
unsigned long sp = regs->ctx.AX[0].U0;
struct metag_frame __user *frame;
frame = (struct metag_frame __user *)sp;
while ((entry->nr < PERF_MAX_STACK_DEPTH) && frame)
frame = user_backtrace(frame, entry);
* Gets called by walk_stackframe() for every stackframe. This will be called
* whist unwinding the stackframe and is like a subroutine return so we use
* the PC.
static int
callchain_trace(struct stackframe *fr,
void *data)
struct perf_callchain_entry *entry = data;
perf_callchain_store(entry, fr->pc);
return 0;
perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
struct stackframe fr;
fr.fp = regs->ctx.AX[1].U0;
fr.sp = regs->ctx.AX[0].U0; = regs->ctx.DX[4].U1;
fr.pc = regs->ctx.CurrPC;
walk_stackframe(&fr, callchain_trace, entry);
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment