perf_event.c 18.3 KB
Newer Older
1 2 3 4 5 6
#undef DEBUG

/*
 * ARM performance counter support.
 *
 * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
7
 * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
8
 *
9 10 11 12 13 14 15 16
 * This code is based on the sparc64 perf event code, which is in turn based
 * on the x86 code. Callchain code is based on the ARM OProfile backtrace
 * code.
 */
#define pr_fmt(fmt) "hw perfevents: " fmt

#include <linux/interrupt.h>
#include <linux/kernel.h>
17
#include <linux/module.h>
18
#include <linux/perf_event.h>
19
#include <linux/platform_device.h>
20 21 22 23 24 25 26 27 28
#include <linux/spinlock.h>
#include <linux/uaccess.h>

#include <asm/cputype.h>
#include <asm/irq.h>
#include <asm/irq_regs.h>
#include <asm/pmu.h>
#include <asm/stacktrace.h>

29
static struct platform_device *pmu_device;
30 31 32 33 34

/*
 * Hardware lock to serialize accesses to PMU registers. Needed for the
 * read/modify/write sequences.
 */
35
static DEFINE_RAW_SPINLOCK(pmu_lock);
36 37

/*
38
 * ARMv6 supports a maximum of 3 events, starting from index 0. If we add
39 40
 * another platform that supports more, we need to increase this to be the
 * largest of all platforms.
41 42 43 44
 *
 * ARMv7 supports up to 32 events:
 *  cycle counter CCNT + 31 events counters CNT0..30.
 *  Cortex-A8 has 1+4 counters, Cortex-A9 has 1+6 counters.
45
 */
46
#define ARMPMU_MAX_HWEVENTS		32
47 48 49 50

/* The events for a given CPU. */
struct cpu_hw_events {
	/*
51
	 * The events that are active on the CPU for the given index.
52 53 54 55 56 57 58 59 60
	 */
	struct perf_event	*events[ARMPMU_MAX_HWEVENTS];

	/*
	 * A 1 bit for an index indicates that the counter is being used for
	 * an event. A 0 means that the counter can be used.
	 */
	unsigned long		used_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
};
61
static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
62

63
struct arm_pmu {
64
	enum arm_perf_pmu_ids id;
65
	cpumask_t	active_irqs;
66
	const char	*name;
67 68 69 70 71
	irqreturn_t	(*handle_irq)(int irq_num, void *dev);
	void		(*enable)(struct hw_perf_event *evt, int idx);
	void		(*disable)(struct hw_perf_event *evt, int idx);
	int		(*get_event_idx)(struct cpu_hw_events *cpuc,
					 struct hw_perf_event *hwc);
72 73
	int		(*set_event_filter)(struct hw_perf_event *evt,
					    struct perf_event_attr *attr);
74 75 76 77
	u32		(*read_counter)(int idx);
	void		(*write_counter)(int idx, u32 val);
	void		(*start)(void);
	void		(*stop)(void);
78
	void		(*reset)(void *);
79 80 81 82 83
	const unsigned	(*cache_map)[PERF_COUNT_HW_CACHE_MAX]
				    [PERF_COUNT_HW_CACHE_OP_MAX]
				    [PERF_COUNT_HW_CACHE_RESULT_MAX];
	const unsigned	(*event_map)[PERF_COUNT_HW_MAX];
	u32		raw_event_mask;
84 85 86 87 88
	int		num_events;
	u64		max_period;
};

/* Set at runtime when we know what CPU type we are. */
89
static struct arm_pmu *armpmu;
90

91 92 93 94 95 96 97 98 99 100 101 102
enum arm_perf_pmu_ids
armpmu_get_pmu_id(void)
{
	int id = -ENODEV;

	if (armpmu != NULL)
		id = armpmu->id;

	return id;
}
EXPORT_SYMBOL_GPL(armpmu_get_pmu_id);

103 104 105 106 107 108 109 110 111 112 113 114
int
armpmu_get_max_events(void)
{
	int max_events = 0;

	if (armpmu != NULL)
		max_events = armpmu->num_events;

	return max_events;
}
EXPORT_SYMBOL_GPL(armpmu_get_max_events);

115 116 117 118 119 120
int perf_num_counters(void)
{
	return armpmu_get_max_events();
}
EXPORT_SYMBOL_GPL(perf_num_counters);

121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
#define HW_OP_UNSUPPORTED		0xFFFF

#define C(_x) \
	PERF_COUNT_HW_CACHE_##_x

#define CACHE_OP_UNSUPPORTED		0xFFFF

static int
armpmu_map_cache_event(u64 config)
{
	unsigned int cache_type, cache_op, cache_result, ret;

	cache_type = (config >>  0) & 0xff;
	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
		return -EINVAL;

	cache_op = (config >>  8) & 0xff;
	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
		return -EINVAL;

	cache_result = (config >> 16) & 0xff;
	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
		return -EINVAL;

145
	ret = (int)(*armpmu->cache_map)[cache_type][cache_op][cache_result];
146 147 148 149 150 151 152

	if (ret == CACHE_OP_UNSUPPORTED)
		return -ENOENT;

	return ret;
}

153 154 155 156 157 158 159 160 161 162 163 164 165
static int
armpmu_map_event(u64 config)
{
	int mapping = (*armpmu->event_map)[config];
	return mapping == HW_OP_UNSUPPORTED ? -EOPNOTSUPP : mapping;
}

static int
armpmu_map_raw_event(u64 config)
{
	return (int)(config & armpmu->raw_event_mask);
}

166 167 168 169 170
static int
armpmu_event_set_period(struct perf_event *event,
			struct hw_perf_event *hwc,
			int idx)
{
171
	s64 left = local64_read(&hwc->period_left);
172 173 174 175 176
	s64 period = hwc->sample_period;
	int ret = 0;

	if (unlikely(left <= -period)) {
		left = period;
177
		local64_set(&hwc->period_left, left);
178 179 180 181 182 183
		hwc->last_period = period;
		ret = 1;
	}

	if (unlikely(left <= 0)) {
		left += period;
184
		local64_set(&hwc->period_left, left);
185 186 187 188 189 190 191
		hwc->last_period = period;
		ret = 1;
	}

	if (left > (s64)armpmu->max_period)
		left = armpmu->max_period;

192
	local64_set(&hwc->prev_count, (u64)-left);
193 194 195 196 197 198 199 200 201 202 203

	armpmu->write_counter(idx, (u64)(-left) & 0xffffffff);

	perf_event_update_userpage(event);

	return ret;
}

static u64
armpmu_event_update(struct perf_event *event,
		    struct hw_perf_event *hwc,
204
		    int idx, int overflow)
205
{
206
	u64 delta, prev_raw_count, new_raw_count;
207 208

again:
209
	prev_raw_count = local64_read(&hwc->prev_count);
210 211
	new_raw_count = armpmu->read_counter(idx);

212
	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
213 214 215
			     new_raw_count) != prev_raw_count)
		goto again;

216 217 218 219
	new_raw_count &= armpmu->max_period;
	prev_raw_count &= armpmu->max_period;

	if (overflow)
220
		delta = armpmu->max_period - prev_raw_count + new_raw_count + 1;
221 222
	else
		delta = new_raw_count - prev_raw_count;
223

224 225
	local64_add(delta, &event->count);
	local64_sub(delta, &hwc->period_left);
226 227 228 229 230

	return new_raw_count;
}

static void
Peter Zijlstra's avatar
Peter Zijlstra committed
231
armpmu_read(struct perf_event *event)
232 233 234
{
	struct hw_perf_event *hwc = &event->hw;

Peter Zijlstra's avatar
Peter Zijlstra committed
235 236 237
	/* Don't read disabled counters! */
	if (hwc->idx < 0)
		return;
238

239
	armpmu_event_update(event, hwc, hwc->idx, 0);
240 241 242
}

static void
Peter Zijlstra's avatar
Peter Zijlstra committed
243
armpmu_stop(struct perf_event *event, int flags)
244 245 246
{
	struct hw_perf_event *hwc = &event->hw;

Peter Zijlstra's avatar
Peter Zijlstra committed
247 248 249 250 251 252 253
	/*
	 * ARM pmu always has to update the counter, so ignore
	 * PERF_EF_UPDATE, see comments in armpmu_start().
	 */
	if (!(hwc->state & PERF_HES_STOPPED)) {
		armpmu->disable(hwc, hwc->idx);
		barrier(); /* why? */
254
		armpmu_event_update(event, hwc, hwc->idx, 0);
Peter Zijlstra's avatar
Peter Zijlstra committed
255 256
		hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
	}
257 258 259
}

static void
Peter Zijlstra's avatar
Peter Zijlstra committed
260
armpmu_start(struct perf_event *event, int flags)
261 262 263
{
	struct hw_perf_event *hwc = &event->hw;

Peter Zijlstra's avatar
Peter Zijlstra committed
264 265 266 267 268 269 270 271
	/*
	 * ARM pmu always has to reprogram the period, so ignore
	 * PERF_EF_RELOAD, see the comment below.
	 */
	if (flags & PERF_EF_RELOAD)
		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));

	hwc->state = 0;
272 273
	/*
	 * Set the period again. Some counters can't be stopped, so when we
Peter Zijlstra's avatar
Peter Zijlstra committed
274
	 * were stopped we simply disabled the IRQ source and the counter
275 276 277 278 279 280 281 282
	 * may have been left counting. If we don't do this step then we may
	 * get an interrupt too soon or *way* too late if the overflow has
	 * happened since disabling.
	 */
	armpmu_event_set_period(event, hwc, hwc->idx);
	armpmu->enable(hwc, hwc->idx);
}

Peter Zijlstra's avatar
Peter Zijlstra committed
283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298
static void
armpmu_del(struct perf_event *event, int flags)
{
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
	struct hw_perf_event *hwc = &event->hw;
	int idx = hwc->idx;

	WARN_ON(idx < 0);

	armpmu_stop(event, PERF_EF_UPDATE);
	cpuc->events[idx] = NULL;
	clear_bit(idx, cpuc->used_mask);

	perf_event_update_userpage(event);
}

299
static int
Peter Zijlstra's avatar
Peter Zijlstra committed
300
armpmu_add(struct perf_event *event, int flags)
301 302 303 304 305 306
{
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
	struct hw_perf_event *hwc = &event->hw;
	int idx;
	int err = 0;

Peter Zijlstra's avatar
Peter Zijlstra committed
307
	perf_pmu_disable(event->pmu);
308

309 310 311 312 313 314 315 316 317 318 319 320 321 322 323
	/* If we don't have a space for the counter then finish early. */
	idx = armpmu->get_event_idx(cpuc, hwc);
	if (idx < 0) {
		err = idx;
		goto out;
	}

	/*
	 * If there is an event in the counter we are going to use then make
	 * sure it is disabled.
	 */
	event->hw.idx = idx;
	armpmu->disable(hwc, idx);
	cpuc->events[idx] = event;

Peter Zijlstra's avatar
Peter Zijlstra committed
324 325 326
	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
	if (flags & PERF_EF_START)
		armpmu_start(event, PERF_EF_RELOAD);
327 328 329 330 331

	/* Propagate our changes to the userspace mapping. */
	perf_event_update_userpage(event);

out:
Peter Zijlstra's avatar
Peter Zijlstra committed
332
	perf_pmu_enable(event->pmu);
333 334 335
	return err;
}

336
static struct pmu pmu;
337 338 339 340 341 342

static int
validate_event(struct cpu_hw_events *cpuc,
	       struct perf_event *event)
{
	struct hw_perf_event fake_event = event->hw;
343
	struct pmu *leader_pmu = event->group_leader->pmu;
344

345
	if (event->pmu != leader_pmu || event->state <= PERF_EVENT_STATE_OFF)
346
		return 1;
347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372

	return armpmu->get_event_idx(cpuc, &fake_event) >= 0;
}

static int
validate_group(struct perf_event *event)
{
	struct perf_event *sibling, *leader = event->group_leader;
	struct cpu_hw_events fake_pmu;

	memset(&fake_pmu, 0, sizeof(fake_pmu));

	if (!validate_event(&fake_pmu, leader))
		return -ENOSPC;

	list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
		if (!validate_event(&fake_pmu, sibling))
			return -ENOSPC;
	}

	if (!validate_event(&fake_pmu, event))
		return -ENOSPC;

	return 0;
}

373 374 375 376 377 378 379
static irqreturn_t armpmu_platform_irq(int irq, void *dev)
{
	struct arm_pmu_platdata *plat = dev_get_platdata(&pmu_device->dev);

	return plat->handle_irq(irq, dev, armpmu->handle_irq);
}

380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398
static void
armpmu_release_hardware(void)
{
	int i, irq, irqs;

	irqs = min(pmu_device->num_resources, num_possible_cpus());

	for (i = 0; i < irqs; ++i) {
		if (!cpumask_test_and_clear_cpu(i, &armpmu->active_irqs))
			continue;
		irq = platform_get_irq(pmu_device, i);
		if (irq >= 0)
			free_irq(irq, NULL);
	}

	armpmu->stop();
	release_pmu(ARM_PMU_DEVICE_CPU);
}

399 400 401
static int
armpmu_reserve_hardware(void)
{
402 403
	struct arm_pmu_platdata *plat;
	irq_handler_t handle_irq;
404
	int i, err, irq, irqs;
405

406 407
	err = reserve_pmu(ARM_PMU_DEVICE_CPU);
	if (err) {
408
		pr_warning("unable to reserve pmu\n");
409
		return err;
410 411
	}

412 413 414 415 416 417
	plat = dev_get_platdata(&pmu_device->dev);
	if (plat && plat->handle_irq)
		handle_irq = armpmu_platform_irq;
	else
		handle_irq = armpmu->handle_irq;

418
	irqs = min(pmu_device->num_resources, num_possible_cpus());
419
	if (irqs < 1) {
420 421 422 423
		pr_err("no irqs for PMUs defined\n");
		return -ENODEV;
	}

424
	for (i = 0; i < irqs; ++i) {
425
		err = 0;
426 427 428 429
		irq = platform_get_irq(pmu_device, i);
		if (irq < 0)
			continue;

430 431 432
		/*
		 * If we have a single PMU interrupt that we can't shift,
		 * assume that we're running on a uniprocessor machine and
433
		 * continue. Otherwise, continue without this interrupt.
434
		 */
435 436 437 438
		if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) {
			pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
				    irq, i);
			continue;
439 440
		}

441
		err = request_irq(irq, handle_irq,
442
				  IRQF_DISABLED | IRQF_NOBALANCING,
443
				  "arm-pmu", NULL);
444
		if (err) {
445 446
			pr_err("unable to request IRQ%d for ARM PMU counters\n",
				irq);
447 448
			armpmu_release_hardware();
			return err;
449 450
		}

451
		cpumask_set_cpu(i, &armpmu->active_irqs);
452
	}
453

454
	return 0;
455 456 457 458 459 460 461 462 463 464 465 466 467 468
}

static atomic_t active_events = ATOMIC_INIT(0);
static DEFINE_MUTEX(pmu_reserve_mutex);

static void
hw_perf_event_destroy(struct perf_event *event)
{
	if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) {
		armpmu_release_hardware();
		mutex_unlock(&pmu_reserve_mutex);
	}
}

469 470 471 472 473 474 475
static int
event_requires_mode_exclusion(struct perf_event_attr *attr)
{
	return attr->exclude_idle || attr->exclude_user ||
	       attr->exclude_kernel || attr->exclude_hv;
}

476 477 478 479 480 481 482 483
static int
__hw_perf_event_init(struct perf_event *event)
{
	struct hw_perf_event *hwc = &event->hw;
	int mapping, err;

	/* Decode the generic type into an ARM event identifier. */
	if (PERF_TYPE_HARDWARE == event->attr.type) {
484
		mapping = armpmu_map_event(event->attr.config);
485 486 487
	} else if (PERF_TYPE_HW_CACHE == event->attr.type) {
		mapping = armpmu_map_cache_event(event->attr.config);
	} else if (PERF_TYPE_RAW == event->attr.type) {
488
		mapping = armpmu_map_raw_event(event->attr.config);
489 490 491 492 493 494 495 496 497 498 499
	} else {
		pr_debug("event type %x not supported\n", event->attr.type);
		return -EOPNOTSUPP;
	}

	if (mapping < 0) {
		pr_debug("event %x:%llx not supported\n", event->attr.type,
			 event->attr.config);
		return mapping;
	}

500 501 502 503 504 505 506 507 508 509 510
	/*
	 * We don't assign an index until we actually place the event onto
	 * hardware. Use -1 to signify that we haven't decided where to put it
	 * yet. For SMP systems, each core has it's own PMU so we can't do any
	 * clever allocation or constraints checking at this point.
	 */
	hwc->idx		= -1;
	hwc->config_base	= 0;
	hwc->config		= 0;
	hwc->event_base		= 0;

511 512 513
	/*
	 * Check whether we need to exclude the counter from certain modes.
	 */
514 515 516
	if ((!armpmu->set_event_filter ||
	     armpmu->set_event_filter(hwc, &event->attr)) &&
	     event_requires_mode_exclusion(&event->attr)) {
517 518 519 520 521 522
		pr_debug("ARM performance counters do not support "
			 "mode exclusion\n");
		return -EPERM;
	}

	/*
523
	 * Store the event encoding into the config_base field.
524
	 */
525
	hwc->config_base	    |= (unsigned long)mapping;
526 527 528 529

	if (!hwc->sample_period) {
		hwc->sample_period  = armpmu->max_period;
		hwc->last_period    = hwc->sample_period;
530
		local64_set(&hwc->period_left, hwc->sample_period);
531 532 533 534 535 536 537 538 539 540 541 542
	}

	err = 0;
	if (event->group_leader != event) {
		err = validate_group(event);
		if (err)
			return -EINVAL;
	}

	return err;
}

543
static int armpmu_event_init(struct perf_event *event)
544 545 546
{
	int err = 0;

547 548 549 550 551 552 553 554 555 556
	switch (event->attr.type) {
	case PERF_TYPE_RAW:
	case PERF_TYPE_HARDWARE:
	case PERF_TYPE_HW_CACHE:
		break;

	default:
		return -ENOENT;
	}

557 558 559 560 561 562 563 564 565 566 567 568 569 570
	event->destroy = hw_perf_event_destroy;

	if (!atomic_inc_not_zero(&active_events)) {
		mutex_lock(&pmu_reserve_mutex);
		if (atomic_read(&active_events) == 0) {
			err = armpmu_reserve_hardware();
		}

		if (!err)
			atomic_inc(&active_events);
		mutex_unlock(&pmu_reserve_mutex);
	}

	if (err)
571
		return err;
572 573 574 575 576

	err = __hw_perf_event_init(event);
	if (err)
		hw_perf_event_destroy(event);

577
	return err;
578 579
}

Peter Zijlstra's avatar
Peter Zijlstra committed
580
static void armpmu_enable(struct pmu *pmu)
581 582
{
	/* Enable all of the perf events on hardware. */
583
	int idx, enabled = 0;
584 585
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);

586
	for (idx = 0; idx < armpmu->num_events; ++idx) {
587 588 589 590 591 592
		struct perf_event *event = cpuc->events[idx];

		if (!event)
			continue;

		armpmu->enable(&event->hw, idx);
593
		enabled = 1;
594 595
	}

596 597
	if (enabled)
		armpmu->start();
598 599
}

Peter Zijlstra's avatar
Peter Zijlstra committed
600
static void armpmu_disable(struct pmu *pmu)
601
{
602
	armpmu->stop();
603 604
}

Peter Zijlstra's avatar
Peter Zijlstra committed
605
static struct pmu pmu = {
Peter Zijlstra's avatar
Peter Zijlstra committed
606 607 608 609 610 611 612 613
	.pmu_enable	= armpmu_enable,
	.pmu_disable	= armpmu_disable,
	.event_init	= armpmu_event_init,
	.add		= armpmu_add,
	.del		= armpmu_del,
	.start		= armpmu_start,
	.stop		= armpmu_stop,
	.read		= armpmu_read,
Peter Zijlstra's avatar
Peter Zijlstra committed
614 615
};

616 617 618 619
/* Include the PMU-specific implementations. */
#include "perf_event_xscale.c"
#include "perf_event_v6.c"
#include "perf_event_v7.c"
620

621 622 623 624 625 626 627 628 629 630 631 632 633
/*
 * Ensure the PMU has sane values out of reset.
 * This requires SMP to be available, so exists as a separate initcall.
 */
static int __init
armpmu_reset(void)
{
	if (armpmu && armpmu->reset)
		return on_each_cpu(armpmu->reset, NULL, 1);
	return 0;
}
arch_initcall(armpmu_reset);

634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673
/*
 * PMU platform driver and devicetree bindings.
 */
static struct of_device_id armpmu_of_device_ids[] = {
	{.compatible = "arm,cortex-a9-pmu"},
	{.compatible = "arm,cortex-a8-pmu"},
	{.compatible = "arm,arm1136-pmu"},
	{.compatible = "arm,arm1176-pmu"},
	{},
};

static struct platform_device_id armpmu_plat_device_ids[] = {
	{.name = "arm-pmu"},
	{},
};

static int __devinit armpmu_device_probe(struct platform_device *pdev)
{
	pmu_device = pdev;
	return 0;
}

static struct platform_driver armpmu_driver = {
	.driver		= {
		.name	= "arm-pmu",
		.of_match_table = armpmu_of_device_ids,
	},
	.probe		= armpmu_device_probe,
	.id_table	= armpmu_plat_device_ids,
};

static int __init register_pmu_driver(void)
{
	return platform_driver_register(&armpmu_driver);
}
device_initcall(register_pmu_driver);

/*
 * CPU PMU identification and registration.
 */
674 675 676 677 678 679 680
static int __init
init_hw_perf_events(void)
{
	unsigned long cpuid = read_cpuid_id();
	unsigned long implementor = (cpuid & 0xFF000000) >> 24;
	unsigned long part_number = (cpuid & 0xFFF0);

681
	/* ARM Ltd CPUs. */
682 683 684 685 686
	if (0x41 == implementor) {
		switch (part_number) {
		case 0xB360:	/* ARM1136 */
		case 0xB560:	/* ARM1156 */
		case 0xB760:	/* ARM1176 */
687
			armpmu = armv6pmu_init();
688 689
			break;
		case 0xB020:	/* ARM11mpcore */
690
			armpmu = armv6mpcore_pmu_init();
691
			break;
692
		case 0xC080:	/* Cortex-A8 */
693
			armpmu = armv7_a8_pmu_init();
694 695
			break;
		case 0xC090:	/* Cortex-A9 */
696
			armpmu = armv7_a9_pmu_init();
697
			break;
698 699 700
		case 0xC050:	/* Cortex-A5 */
			armpmu = armv7_a5_pmu_init();
			break;
701 702 703
		case 0xC0F0:	/* Cortex-A15 */
			armpmu = armv7_a15_pmu_init();
			break;
704 705 706 707 708 709
		}
	/* Intel CPUs [xscale]. */
	} else if (0x69 == implementor) {
		part_number = (cpuid >> 13) & 0x7;
		switch (part_number) {
		case 1:
710
			armpmu = xscale1pmu_init();
711 712
			break;
		case 2:
713
			armpmu = xscale2pmu_init();
714
			break;
715 716 717
		}
	}

718
	if (armpmu) {
719
		pr_info("enabled with %s PMU driver, %d counters available\n",
720
			armpmu->name, armpmu->num_events);
721
		perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
722 723 724
	} else {
		pr_info("no hardware support available\n");
	}
725 726 727

	return 0;
}
728
early_initcall(init_hw_perf_events);
729 730 731 732 733 734 735 736 737 738 739 740 741 742

/*
 * Callchain handling code.
 */

/*
 * The registers we're interested in are at the end of the variable
 * length saved register structure. The fp points at the end of this
 * structure so the address of this struct is:
 * (struct frame_tail *)(xxx->fp)-1
 *
 * This code has been adapted from the ARM OProfile support.
 */
struct frame_tail {
743 744 745
	struct frame_tail __user *fp;
	unsigned long sp;
	unsigned long lr;
746 747 748 749 750 751
} __attribute__((packed));

/*
 * Get the return address for a single stackframe and return a pointer to the
 * next frame tail.
 */
752 753
static struct frame_tail __user *
user_backtrace(struct frame_tail __user *tail,
754 755 756 757 758 759 760 761 762 763
	       struct perf_callchain_entry *entry)
{
	struct frame_tail buftail;

	/* Also check accessibility of one struct frame_tail beyond */
	if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
		return NULL;
	if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail)))
		return NULL;

764
	perf_callchain_store(entry, buftail.lr);
765 766 767 768 769

	/*
	 * Frame pointers should strictly progress back up the stack
	 * (towards higher addresses).
	 */
770
	if (tail + 1 >= buftail.fp)
771 772 773 774 775
		return NULL;

	return buftail.fp - 1;
}

776 777
void
perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
778
{
779
	struct frame_tail __user *tail;
780 781


782
	tail = (struct frame_tail __user *)regs->ARM_fp - 1;
783

784 785
	while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
	       tail && !((unsigned long)tail & 0x3))
786 787 788 789 790 791 792 793 794 795 796 797 798
		tail = user_backtrace(tail, entry);
}

/*
 * Gets called by walk_stackframe() for every stackframe. This will be called
 * whist unwinding the stackframe and is like a subroutine return so we use
 * the PC.
 */
static int
callchain_trace(struct stackframe *fr,
		void *data)
{
	struct perf_callchain_entry *entry = data;
799
	perf_callchain_store(entry, fr->pc);
800 801 802
	return 0;
}

803 804
void
perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
805 806 807 808 809 810 811 812 813
{
	struct stackframe fr;

	fr.fp = regs->ARM_fp;
	fr.sp = regs->ARM_sp;
	fr.lr = regs->ARM_lr;
	fr.pc = regs->ARM_pc;
	walk_stackframe(&fr, callchain_trace, entry);
}