Commit 6bc4c3ad authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf fixes from Ingo Molnar:
 "These are the left over fixes from the v4.1 cycle"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf tools: Fix build breakage if prefix= is specified
  perf/x86: Honor the architectural performance monitoring version
  perf/x86/intel: Fix PMI handling for Intel PT
  perf/x86/intel/bts: Fix DS area sharing with x86_pmu events
  perf/x86: Add more Broadwell model numbers
  perf: Fix ring_buffer_attach() RCU sync, again
parents c58267e9 0f02adaa
......@@ -135,6 +135,7 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
}
static atomic_t active_events;
static atomic_t pmc_refcount;
static DEFINE_MUTEX(pmc_reserve_mutex);
#ifdef CONFIG_X86_LOCAL_APIC
......@@ -270,11 +271,8 @@ msr_fail:
static void hw_perf_event_destroy(struct perf_event *event)
{
if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
release_pmc_hardware();
release_ds_buffers();
mutex_unlock(&pmc_reserve_mutex);
}
x86_release_hardware();
atomic_dec(&active_events);
}
void hw_perf_lbr_event_destroy(struct perf_event *event)
......@@ -324,6 +322,35 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
return x86_pmu_extra_regs(val, event);
}
int x86_reserve_hardware(void)
{
int err = 0;
if (!atomic_inc_not_zero(&pmc_refcount)) {
mutex_lock(&pmc_reserve_mutex);
if (atomic_read(&pmc_refcount) == 0) {
if (!reserve_pmc_hardware())
err = -EBUSY;
else
reserve_ds_buffers();
}
if (!err)
atomic_inc(&pmc_refcount);
mutex_unlock(&pmc_reserve_mutex);
}
return err;
}
void x86_release_hardware(void)
{
if (atomic_dec_and_mutex_lock(&pmc_refcount, &pmc_reserve_mutex)) {
release_pmc_hardware();
release_ds_buffers();
mutex_unlock(&pmc_reserve_mutex);
}
}
/*
* Check if we can create event of a certain type (that no conflicting events
* are present).
......@@ -336,21 +363,34 @@ int x86_add_exclusive(unsigned int what)
return 0;
mutex_lock(&pmc_reserve_mutex);
for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++)
for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) {
if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i]))
goto out;
}
atomic_inc(&x86_pmu.lbr_exclusive[what]);
ret = 0;
out:
mutex_unlock(&pmc_reserve_mutex);
/*
* Assuming that all exclusive events will share the PMI handler
* (which checks active_events for whether there is work to do),
* we can bump active_events counter right here, except for
* x86_lbr_exclusive_lbr events that go through x86_pmu_event_init()
* path, which already bumps active_events for them.
*/
if (!ret && what != x86_lbr_exclusive_lbr)
atomic_inc(&active_events);
return ret;
}
void x86_del_exclusive(unsigned int what)
{
atomic_dec(&x86_pmu.lbr_exclusive[what]);
atomic_dec(&active_events);
}
int x86_setup_perfctr(struct perf_event *event)
......@@ -527,22 +567,11 @@ static int __x86_pmu_event_init(struct perf_event *event)
if (!x86_pmu_initialized())
return -ENODEV;
err = 0;
if (!atomic_inc_not_zero(&active_events)) {
mutex_lock(&pmc_reserve_mutex);
if (atomic_read(&active_events) == 0) {
if (!reserve_pmc_hardware())
err = -EBUSY;
else
reserve_ds_buffers();
}
if (!err)
atomic_inc(&active_events);
mutex_unlock(&pmc_reserve_mutex);
}
err = x86_reserve_hardware();
if (err)
return err;
atomic_inc(&active_events);
event->destroy = hw_perf_event_destroy;
event->hw.idx = -1;
......@@ -1415,6 +1444,10 @@ perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
u64 finish_clock;
int ret;
/*
* All PMUs/events that share this PMI handler should make sure to
* increment active_events for their events.
*/
if (!atomic_read(&active_events))
return NMI_DONE;
......
......@@ -716,6 +716,10 @@ int x86_add_exclusive(unsigned int what);
void x86_del_exclusive(unsigned int what);
int x86_reserve_hardware(void);
void x86_release_hardware(void);
void hw_perf_lbr_event_destroy(struct perf_event *event);
int x86_setup_perfctr(struct perf_event *event);
......
......@@ -3227,6 +3227,8 @@ __init int intel_pmu_init(void)
case 61: /* 14nm Broadwell Core-M */
case 86: /* 14nm Broadwell Xeon D */
case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
case 79: /* 14nm Broadwell Server */
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
......@@ -3296,13 +3298,13 @@ __init int intel_pmu_init(void)
* counter, so do not extend mask to generic counters
*/
for_each_event_constraint(c, x86_pmu.event_constraints) {
if (c->cmask != FIXED_EVENT_FLAGS
|| c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) {
continue;
if (c->cmask == FIXED_EVENT_FLAGS
&& c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) {
c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
}
c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
c->weight += x86_pmu.num_counters;
c->idxmsk64 &=
~(~0UL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed));
c->weight = hweight64(c->idxmsk64);
}
}
......
......@@ -483,17 +483,26 @@ static int bts_event_add(struct perf_event *event, int mode)
static void bts_event_destroy(struct perf_event *event)
{
x86_release_hardware();
x86_del_exclusive(x86_lbr_exclusive_bts);
}
static int bts_event_init(struct perf_event *event)
{
int ret;
if (event->attr.type != bts_pmu.type)
return -ENOENT;
if (x86_add_exclusive(x86_lbr_exclusive_bts))
return -EBUSY;
ret = x86_reserve_hardware();
if (ret) {
x86_del_exclusive(x86_lbr_exclusive_bts);
return ret;
}
event->destroy = bts_event_destroy;
return 0;
......
......@@ -4331,20 +4331,20 @@ static void ring_buffer_attach(struct perf_event *event,
WARN_ON_ONCE(event->rcu_pending);
old_rb = event->rb;
event->rcu_batches = get_state_synchronize_rcu();
event->rcu_pending = 1;
spin_lock_irqsave(&old_rb->event_lock, flags);
list_del_rcu(&event->rb_entry);
spin_unlock_irqrestore(&old_rb->event_lock, flags);
}
if (event->rcu_pending && rb) {
cond_synchronize_rcu(event->rcu_batches);
event->rcu_pending = 0;
event->rcu_batches = get_state_synchronize_rcu();
event->rcu_pending = 1;
}
if (rb) {
if (event->rcu_pending) {
cond_synchronize_rcu(event->rcu_batches);
event->rcu_pending = 0;
}
spin_lock_irqsave(&rb->event_lock, flags);
list_add_rcu(&event->rb_entry, &rb->event_list);
spin_unlock_irqrestore(&rb->event_lock, flags);
......
......@@ -94,12 +94,12 @@ obj-y := $(patsubst %/, %/$(obj)-in.o, $(obj-y))
subdir-obj-y := $(filter %/$(obj)-in.o, $(obj-y))
# '$(OUTPUT)/dir' prefix to all objects
prefix := $(subst ./,,$(OUTPUT)$(dir)/)
obj-y := $(addprefix $(prefix),$(obj-y))
subdir-obj-y := $(addprefix $(prefix),$(subdir-obj-y))
objprefix := $(subst ./,,$(OUTPUT)$(dir)/)
obj-y := $(addprefix $(objprefix),$(obj-y))
subdir-obj-y := $(addprefix $(objprefix),$(subdir-obj-y))
# Final '$(obj)-in.o' object
in-target := $(prefix)$(obj)-in.o
in-target := $(objprefix)$(obj)-in.o
PHONY += $(subdir-y)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment