nmi.c 27.9 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
/*
 *  linux/arch/i386/nmi.c
 *
 *  NMI watchdog support on APIC systems
 *
 *  Started by Ingo Molnar <mingo@redhat.com>
 *
 *  Fixes:
 *  Mikael Pettersson	: AMD K7 support for local APIC NMI watchdog.
 *  Mikael Pettersson	: Power Management for local APIC NMI watchdog.
 *  Mikael Pettersson	: Pentium 4 support for local APIC NMI watchdog.
 *  Pavel Machek and
 *  Mikael Pettersson	: PM converted to driver model. Disable/enable API.
 */

#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/nmi.h>
#include <linux/sysdev.h>
#include <linux/sysctl.h>
22
#include <linux/percpu.h>
23
#include <linux/dmi.h>
24
#include <linux/kprobes.h>
Andrew Morton's avatar
Andrew Morton committed
25
#include <linux/cpumask.h>
26
#include <linux/kernel_stat.h>
Linus Torvalds's avatar
Linus Torvalds committed
27
28
29

#include <asm/smp.h>
#include <asm/nmi.h>
30
#include <asm/kdebug.h>
31
#include <asm/intel_arch_perfmon.h>
Linus Torvalds's avatar
Linus Torvalds committed
32
33
34

#include "mach_traps.h"

35
36
37
int unknown_nmi_panic;
int nmi_watchdog_enabled;

38
39
40
41
42
43
44
45
46
/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
 * evtsel_nmi_owner tracks the ownership of the event selection
 * - different performance counters/ event selection may be reserved for
 *   different subsystems this reservation system just tries to coordinate
 *   things a little
 */
static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner);
static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]);

Andrew Morton's avatar
Andrew Morton committed
47
48
static cpumask_t backtrace_mask = CPU_MASK_NONE;

49
50
51
52
53
/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
 * offset from MSR_P4_BSU_ESCR0.  It will be the max for all platforms (for now)
 */
#define NMI_MAX_COUNTER_BITS 66

Linus Torvalds's avatar
Linus Torvalds committed
54
/* nmi_active:
55
56
 * >0: the lapic NMI watchdog is active, but can be disabled
 * <0: the lapic NMI watchdog has not been set up, and cannot
Linus Torvalds's avatar
Linus Torvalds committed
57
 *     be enabled
58
 *  0: the lapic NMI watchdog is disabled, but can be enabled
Linus Torvalds's avatar
Linus Torvalds committed
59
 */
60
atomic_t nmi_active = ATOMIC_INIT(0);		/* oprofile uses this */
Linus Torvalds's avatar
Linus Torvalds committed
61

62
63
unsigned int nmi_watchdog = NMI_DEFAULT;
static unsigned int nmi_hz = HZ;
Linus Torvalds's avatar
Linus Torvalds committed
64

65
66
67
68
69
70
71
72
struct nmi_watchdog_ctlblk {
	int enabled;
	u64 check_bit;
	unsigned int cccr_msr;
	unsigned int perfctr_msr;  /* the MSR to reset in NMI handler */
	unsigned int evntsel_msr;  /* the MSR to select the events to handle */
};
static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
Linus Torvalds's avatar
Linus Torvalds committed
73

74
75
76
77
78
/* local prototypes */
static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);

extern void show_registers(struct pt_regs *regs);
extern int unknown_nmi_panic;
Linus Torvalds's avatar
Linus Torvalds committed
79

80
81
82
83
84
85
86
87
/* converts an msr to an appropriate reservation bit */
static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
{
	/* returns the bit offset of the performance counter register */
	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_AMD:
		return (msr - MSR_K7_PERFCTR0);
	case X86_VENDOR_INTEL:
88
89
90
		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
			return (msr - MSR_ARCH_PERFMON_PERFCTR0);

91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
		switch (boot_cpu_data.x86) {
		case 6:
			return (msr - MSR_P6_PERFCTR0);
		case 15:
			return (msr - MSR_P4_BPU_PERFCTR0);
		}
	}
	return 0;
}

/* converts an msr to an appropriate reservation bit */
static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
{
	/* returns the bit offset of the event selection register */
	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_AMD:
		return (msr - MSR_K7_EVNTSEL0);
	case X86_VENDOR_INTEL:
109
110
111
		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
			return (msr - MSR_ARCH_PERFMON_EVENTSEL0);

112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
		switch (boot_cpu_data.x86) {
		case 6:
			return (msr - MSR_P6_EVNTSEL0);
		case 15:
			return (msr - MSR_P4_BSU_ESCR0);
		}
	}
	return 0;
}

/* checks for a bit availability (hack for oprofile) */
int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
{
	BUG_ON(counter > NMI_MAX_COUNTER_BITS);

	return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
}

/* checks the an msr for availability */
int avail_to_resrv_perfctr_nmi(unsigned int msr)
{
	unsigned int counter;

	counter = nmi_perfctr_msr_to_bit(msr);
	BUG_ON(counter > NMI_MAX_COUNTER_BITS);

	return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
}

int reserve_perfctr_nmi(unsigned int msr)
{
	unsigned int counter;

	counter = nmi_perfctr_msr_to_bit(msr);
	BUG_ON(counter > NMI_MAX_COUNTER_BITS);

	if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner)))
		return 1;
	return 0;
}

void release_perfctr_nmi(unsigned int msr)
{
	unsigned int counter;

	counter = nmi_perfctr_msr_to_bit(msr);
	BUG_ON(counter > NMI_MAX_COUNTER_BITS);

	clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner));
}

int reserve_evntsel_nmi(unsigned int msr)
{
	unsigned int counter;

	counter = nmi_evntsel_msr_to_bit(msr);
	BUG_ON(counter > NMI_MAX_COUNTER_BITS);

	if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]))
		return 1;
	return 0;
}

void release_evntsel_nmi(unsigned int msr)
{
	unsigned int counter;

	counter = nmi_evntsel_msr_to_bit(msr);
	BUG_ON(counter > NMI_MAX_COUNTER_BITS);

	clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]);
}

185
186
187
188
static __cpuinit inline int nmi_known_cpu(void)
{
	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_AMD:
189
190
		return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6)
			|| (boot_cpu_data.x86 == 16));
191
	case X86_VENDOR_INTEL:
192
193
194
195
		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
			return 1;
		else
			return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
196
197
198
199
	}
	return 0;
}

200
201
static int endflag __initdata = 0;

202
203
204
205
206
207
208
#ifdef CONFIG_SMP
/* The performance counters used by NMI_LOCAL_APIC don't trigger when
 * the CPU is idle. To make sure the NMI watchdog really ticks on all
 * CPUs during the test make them busy.
 */
static __init void nmi_cpu_busy(void *data)
{
209
	local_irq_enable_in_hardirq();
210
211
212
213
214
215
	/* Intentionally don't use cpu_relax here. This is
	   to make sure that the performance counter really ticks,
	   even if there is a simulator or similar that catches the
	   pause instruction. On a real HT machine this is fine because
	   all other CPUs are busy with "useless" delay loops and don't
	   care if they get somewhat less cycles. */
216
217
	while (endflag == 0)
		mb();
218
219
220
}
#endif

221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
static unsigned int adjust_for_32bit_ctr(unsigned int hz)
{
	u64 counter_val;
	unsigned int retval = hz;

	/*
	 * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
	 * are writable, with higher bits sign extending from bit 31.
	 * So, we can only program the counter with 31 bit values and
	 * 32nd bit should be 1, for 33.. to be 1.
	 * Find the appropriate nmi_hz
	 */
	counter_val = (u64)cpu_khz * 1000;
	do_div(counter_val, retval);
 	if (counter_val > 0x7fffffffULL) {
		u64 count = (u64)cpu_khz * 1000;
		do_div(count, 0x7fffffffUL);
		retval = count + 1;
	}
	return retval;
}

243
static int __init check_nmi_watchdog(void)
Linus Torvalds's avatar
Linus Torvalds committed
244
{
245
	unsigned int *prev_nmi_count;
Linus Torvalds's avatar
Linus Torvalds committed
246
247
	int cpu;

248
	/* Enable NMI watchdog for newer systems.
249
250
251
252
253
	   Probably safe on most older systems too, but let's be careful.
	   IBM ThinkPads use INT10 inside SMM and that allows early NMI inside SMM
	   which hangs the system. Disable watchdog for all thinkpads */
	if (nmi_watchdog == NMI_DEFAULT && dmi_get_year(DMI_BIOS_DATE) >= 2004 &&
		!dmi_name_in_vendors("ThinkPad"))
254
255
		nmi_watchdog = NMI_LOCAL_APIC;

256
257
258
259
	if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT))
		return 0;

	if (!atomic_read(&nmi_active))
260
261
		return 0;

262
263
264
265
	prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
	if (!prev_nmi_count)
		return -1;

266
	printk(KERN_INFO "Testing NMI watchdog ... ");
Linus Torvalds's avatar
Linus Torvalds committed
267

268
269
270
	if (nmi_watchdog == NMI_LOCAL_APIC)
		smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);

271
	for_each_possible_cpu(cpu)
Linus Torvalds's avatar
Linus Torvalds committed
272
273
274
275
		prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
	local_irq_enable();
	mdelay((10*1000)/nmi_hz); // wait 10 ticks

276
	for_each_possible_cpu(cpu) {
Linus Torvalds's avatar
Linus Torvalds committed
277
278
279
280
281
282
#ifdef CONFIG_SMP
		/* Check cpu_callin_map here because that is set
		   after the timer is started. */
		if (!cpu_isset(cpu, cpu_callin_map))
			continue;
#endif
283
284
		if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled)
			continue;
Linus Torvalds's avatar
Linus Torvalds committed
285
		if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
286
287
288
289
			printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
				cpu,
				prev_nmi_count[cpu],
				nmi_count(cpu));
290
291
			per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0;
			atomic_dec(&nmi_active);
Linus Torvalds's avatar
Linus Torvalds committed
292
293
		}
	}
294
295
296
297
298
	if (!atomic_read(&nmi_active)) {
		kfree(prev_nmi_count);
		atomic_set(&nmi_active, -1);
		return -1;
	}
299
	endflag = 1;
Linus Torvalds's avatar
Linus Torvalds committed
300
301
302
303
	printk("OK.\n");

	/* now that we know it works we can reduce NMI frequency to
	   something more reasonable; makes a difference in some configs */
304
305
306
	if (nmi_watchdog == NMI_LOCAL_APIC) {
		struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);

Linus Torvalds's avatar
Linus Torvalds committed
307
		nmi_hz = 1;
308
309
310
311

		if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
		    wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
			nmi_hz = adjust_for_32bit_ctr(nmi_hz);
312
313
		}
	}
Linus Torvalds's avatar
Linus Torvalds committed
314

315
	kfree(prev_nmi_count);
Linus Torvalds's avatar
Linus Torvalds committed
316
317
	return 0;
}
318
319
/* This needs to happen later in boot so counters are working */
late_initcall(check_nmi_watchdog);
Linus Torvalds's avatar
Linus Torvalds committed
320
321
322
323
324
325
326

static int __init setup_nmi_watchdog(char *str)
{
	int nmi;

	get_option(&str, &nmi);

327
	if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE))
Linus Torvalds's avatar
Linus Torvalds committed
328
		return 0;
329

330
	nmi_watchdog = nmi;
Linus Torvalds's avatar
Linus Torvalds committed
331
332
333
334
335
336
337
	return 1;
}

__setup("nmi_watchdog=", setup_nmi_watchdog);

static void disable_lapic_nmi_watchdog(void)
{
338
339
340
	BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);

	if (atomic_read(&nmi_active) <= 0)
Linus Torvalds's avatar
Linus Torvalds committed
341
342
		return;

343
	on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
Linus Torvalds's avatar
Linus Torvalds committed
344

345
	BUG_ON(atomic_read(&nmi_active) != 0);
Linus Torvalds's avatar
Linus Torvalds committed
346
347
348
349
}

static void enable_lapic_nmi_watchdog(void)
{
350
351
352
353
354
355
356
357
358
359
360
361
	BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);

	/* are we already enabled */
	if (atomic_read(&nmi_active) != 0)
		return;

	/* are we lapic aware */
	if (nmi_known_cpu() <= 0)
		return;

	on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
	touch_nmi_watchdog();
Linus Torvalds's avatar
Linus Torvalds committed
362
363
364
365
}

void disable_timer_nmi_watchdog(void)
{
366
367
368
	BUG_ON(nmi_watchdog != NMI_IO_APIC);

	if (atomic_read(&nmi_active) <= 0)
Linus Torvalds's avatar
Linus Torvalds committed
369
370
		return;

371
372
373
374
	disable_irq(0);
	on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);

	BUG_ON(atomic_read(&nmi_active) != 0);
Linus Torvalds's avatar
Linus Torvalds committed
375
376
377
378
}

void enable_timer_nmi_watchdog(void)
{
379
380
381
	BUG_ON(nmi_watchdog != NMI_IO_APIC);

	if (atomic_read(&nmi_active) == 0) {
Linus Torvalds's avatar
Linus Torvalds committed
382
		touch_nmi_watchdog();
383
384
		on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
		enable_irq(0);
Linus Torvalds's avatar
Linus Torvalds committed
385
386
387
	}
}

388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
static void __acpi_nmi_disable(void *__unused)
{
	apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
}

/*
 * Disable timer based NMIs on all CPUs:
 */
void acpi_nmi_disable(void)
{
	if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
		on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
}

static void __acpi_nmi_enable(void *__unused)
{
	apic_write_around(APIC_LVT0, APIC_DM_NMI);
}

/*
 * Enable timer based NMIs on all CPUs:
 */
void acpi_nmi_enable(void)
{
	if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
		on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
}

Linus Torvalds's avatar
Linus Torvalds committed
416
417
418
419
#ifdef CONFIG_PM

static int nmi_pm_active; /* nmi_active before suspend */

420
static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
Linus Torvalds's avatar
Linus Torvalds committed
421
{
422
	/* only CPU0 goes here, other CPUs should be offline */
423
	nmi_pm_active = atomic_read(&nmi_active);
424
425
	stop_apic_nmi_watchdog(NULL);
	BUG_ON(atomic_read(&nmi_active) != 0);
Linus Torvalds's avatar
Linus Torvalds committed
426
427
428
429
430
	return 0;
}

static int lapic_nmi_resume(struct sys_device *dev)
{
431
432
433
434
435
	/* only CPU0 goes here, other CPUs should be offline */
	if (nmi_pm_active > 0) {
		setup_apic_nmi_watchdog(NULL);
		touch_nmi_watchdog();
	}
Linus Torvalds's avatar
Linus Torvalds committed
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
	return 0;
}


static struct sysdev_class nmi_sysclass = {
	set_kset_name("lapic_nmi"),
	.resume		= lapic_nmi_resume,
	.suspend	= lapic_nmi_suspend,
};

static struct sys_device device_lapic_nmi = {
	.id	= 0,
	.cls	= &nmi_sysclass,
};

static int __init init_lapic_nmi_sysfs(void)
{
	int error;

455
456
457
458
459
460
461
	/* should really be a BUG_ON but b/c this is an
	 * init call, it just doesn't work.  -dcz
	 */
	if (nmi_watchdog != NMI_LOCAL_APIC)
		return 0;

	if ( atomic_read(&nmi_active) < 0 )
Linus Torvalds's avatar
Linus Torvalds committed
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
		return 0;

	error = sysdev_class_register(&nmi_sysclass);
	if (!error)
		error = sysdev_register(&device_lapic_nmi);
	return error;
}
/* must come after the local APIC's device_initcall() */
late_initcall(init_lapic_nmi_sysfs);

#endif	/* CONFIG_PM */

/*
 * Activate the NMI watchdog via the local APIC.
 * Original code written by Keith Owens.
 */

479
static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr)
480
481
482
483
484
485
{
	u64 count = (u64)cpu_khz * 1000;

	do_div(count, nmi_hz);
	if(descr)
		Dprintk("setting %s to -0x%08Lx\n", descr, count);
486
	wrmsrl(perfctr_msr, 0 - count);
487
488
}

489
490
491
492
493
494
495
496
497
498
499
static void write_watchdog_counter32(unsigned int perfctr_msr,
		const char *descr)
{
	u64 count = (u64)cpu_khz * 1000;

	do_div(count, nmi_hz);
	if(descr)
		Dprintk("setting %s to -0x%08Lx\n", descr, count);
	wrmsr(perfctr_msr, (u32)(-count), 0);
}

500
501
502
503
504
505
506
507
508
509
/* Note that these events don't tick when the CPU idles. This means
   the frequency varies with CPU load. */

#define K7_EVNTSEL_ENABLE	(1 << 22)
#define K7_EVNTSEL_INT		(1 << 20)
#define K7_EVNTSEL_OS		(1 << 17)
#define K7_EVNTSEL_USR		(1 << 16)
#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING	0x76
#define K7_NMI_EVENT		K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING

510
static int setup_k7_watchdog(void)
Linus Torvalds's avatar
Linus Torvalds committed
511
{
512
	unsigned int perfctr_msr, evntsel_msr;
Linus Torvalds's avatar
Linus Torvalds committed
513
	unsigned int evntsel;
514
	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
Linus Torvalds's avatar
Linus Torvalds committed
515

516
517
518
	perfctr_msr = MSR_K7_PERFCTR0;
	evntsel_msr = MSR_K7_EVNTSEL0;
	if (!reserve_perfctr_nmi(perfctr_msr))
519
520
		goto fail;

521
	if (!reserve_evntsel_nmi(evntsel_msr))
522
523
		goto fail1;

524
	wrmsrl(perfctr_msr, 0UL);
Linus Torvalds's avatar
Linus Torvalds committed
525
526
527
528
529
530

	evntsel = K7_EVNTSEL_INT
		| K7_EVNTSEL_OS
		| K7_EVNTSEL_USR
		| K7_NMI_EVENT;

531
532
533
	/* setup the timer */
	wrmsr(evntsel_msr, evntsel, 0);
	write_watchdog_counter(perfctr_msr, "K7_PERFCTR0");
Linus Torvalds's avatar
Linus Torvalds committed
534
535
	apic_write(APIC_LVTPC, APIC_DM_NMI);
	evntsel |= K7_EVNTSEL_ENABLE;
536
537
538
539
540
541
	wrmsr(evntsel_msr, evntsel, 0);

	wd->perfctr_msr = perfctr_msr;
	wd->evntsel_msr = evntsel_msr;
	wd->cccr_msr = 0;  //unused
	wd->check_bit = 1ULL<<63;
542
543
	return 1;
fail1:
544
	release_perfctr_nmi(perfctr_msr);
545
546
fail:
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
547
548
}

549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
static void stop_k7_watchdog(void)
{
	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);

	wrmsr(wd->evntsel_msr, 0, 0);

	release_evntsel_nmi(wd->evntsel_msr);
	release_perfctr_nmi(wd->perfctr_msr);
}

#define P6_EVNTSEL0_ENABLE	(1 << 22)
#define P6_EVNTSEL_INT		(1 << 20)
#define P6_EVNTSEL_OS		(1 << 17)
#define P6_EVNTSEL_USR		(1 << 16)
#define P6_EVENT_CPU_CLOCKS_NOT_HALTED	0x79
#define P6_NMI_EVENT		P6_EVENT_CPU_CLOCKS_NOT_HALTED

566
static int setup_p6_watchdog(void)
Linus Torvalds's avatar
Linus Torvalds committed
567
{
568
	unsigned int perfctr_msr, evntsel_msr;
Linus Torvalds's avatar
Linus Torvalds committed
569
	unsigned int evntsel;
570
	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
Linus Torvalds's avatar
Linus Torvalds committed
571

572
573
574
	perfctr_msr = MSR_P6_PERFCTR0;
	evntsel_msr = MSR_P6_EVNTSEL0;
	if (!reserve_perfctr_nmi(perfctr_msr))
575
576
		goto fail;

577
	if (!reserve_evntsel_nmi(evntsel_msr))
578
		goto fail1;
Linus Torvalds's avatar
Linus Torvalds committed
579

580
581
	wrmsrl(perfctr_msr, 0UL);

Linus Torvalds's avatar
Linus Torvalds committed
582
583
584
585
586
	evntsel = P6_EVNTSEL_INT
		| P6_EVNTSEL_OS
		| P6_EVNTSEL_USR
		| P6_NMI_EVENT;

587
588
	/* setup the timer */
	wrmsr(evntsel_msr, evntsel, 0);
589
590
	nmi_hz = adjust_for_32bit_ctr(nmi_hz);
	write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0");
Linus Torvalds's avatar
Linus Torvalds committed
591
592
	apic_write(APIC_LVTPC, APIC_DM_NMI);
	evntsel |= P6_EVNTSEL0_ENABLE;
593
594
595
596
597
598
	wrmsr(evntsel_msr, evntsel, 0);

	wd->perfctr_msr = perfctr_msr;
	wd->evntsel_msr = evntsel_msr;
	wd->cccr_msr = 0;  //unused
	wd->check_bit = 1ULL<<39;
599
600
	return 1;
fail1:
601
	release_perfctr_nmi(perfctr_msr);
602
603
fail:
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
604
605
}

606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
static void stop_p6_watchdog(void)
{
	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);

	wrmsr(wd->evntsel_msr, 0, 0);

	release_evntsel_nmi(wd->evntsel_msr);
	release_perfctr_nmi(wd->perfctr_msr);
}

/* Note that these events don't tick when the CPU idles. This means
   the frequency varies with CPU load. */

#define MSR_P4_MISC_ENABLE_PERF_AVAIL	(1<<7)
#define P4_ESCR_EVENT_SELECT(N)	((N)<<25)
#define P4_ESCR_OS		(1<<3)
#define P4_ESCR_USR		(1<<2)
#define P4_CCCR_OVF_PMI0	(1<<26)
#define P4_CCCR_OVF_PMI1	(1<<27)
#define P4_CCCR_THRESHOLD(N)	((N)<<20)
#define P4_CCCR_COMPLEMENT	(1<<19)
#define P4_CCCR_COMPARE		(1<<18)
#define P4_CCCR_REQUIRED	(3<<16)
#define P4_CCCR_ESCR_SELECT(N)	((N)<<13)
#define P4_CCCR_ENABLE		(1<<12)
#define P4_CCCR_OVF 		(1<<31)
/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
   CRU_ESCR0 (with any non-null event selector) through a complemented
   max threshold. [IA32-Vol3, Section 14.9.9] */

Linus Torvalds's avatar
Linus Torvalds committed
636
637
static int setup_p4_watchdog(void)
{
638
639
	unsigned int perfctr_msr, evntsel_msr, cccr_msr;
	unsigned int evntsel, cccr_val;
Linus Torvalds's avatar
Linus Torvalds committed
640
	unsigned int misc_enable, dummy;
641
642
	unsigned int ht_num;
	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
Linus Torvalds's avatar
Linus Torvalds committed
643

644
	rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
Linus Torvalds's avatar
Linus Torvalds committed
645
646
647
648
	if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
		return 0;

#ifdef CONFIG_SMP
649
650
651
652
653
654
655
656
	/* detect which hyperthread we are on */
	if (smp_num_siblings == 2) {
		unsigned int ebx, apicid;

        	ebx = cpuid_ebx(1);
	        apicid = (ebx >> 24) & 0xff;
        	ht_num = apicid & 1;
	} else
Linus Torvalds's avatar
Linus Torvalds committed
657
#endif
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
		ht_num = 0;

	/* performance counters are shared resources
	 * assign each hyperthread its own set
	 * (re-use the ESCR0 register, seems safe
	 * and keeps the cccr_val the same)
	 */
	if (!ht_num) {
		/* logical cpu 0 */
		perfctr_msr = MSR_P4_IQ_PERFCTR0;
		evntsel_msr = MSR_P4_CRU_ESCR0;
		cccr_msr = MSR_P4_IQ_CCCR0;
		cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
	} else {
		/* logical cpu 1 */
		perfctr_msr = MSR_P4_IQ_PERFCTR1;
		evntsel_msr = MSR_P4_CRU_ESCR0;
		cccr_msr = MSR_P4_IQ_CCCR1;
		cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
	}
Linus Torvalds's avatar
Linus Torvalds committed
678

679
	if (!reserve_perfctr_nmi(perfctr_msr))
680
681
		goto fail;

682
	if (!reserve_evntsel_nmi(evntsel_msr))
683
		goto fail1;
Linus Torvalds's avatar
Linus Torvalds committed
684

685
686
687
688
689
690
691
692
693
694
695
696
	evntsel = P4_ESCR_EVENT_SELECT(0x3F)
	 	| P4_ESCR_OS
		| P4_ESCR_USR;

	cccr_val |= P4_CCCR_THRESHOLD(15)
		 | P4_CCCR_COMPLEMENT
		 | P4_CCCR_COMPARE
		 | P4_CCCR_REQUIRED;

	wrmsr(evntsel_msr, evntsel, 0);
	wrmsr(cccr_msr, cccr_val, 0);
	write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0");
Linus Torvalds's avatar
Linus Torvalds committed
697
	apic_write(APIC_LVTPC, APIC_DM_NMI);
698
699
700
701
702
703
	cccr_val |= P4_CCCR_ENABLE;
	wrmsr(cccr_msr, cccr_val, 0);
	wd->perfctr_msr = perfctr_msr;
	wd->evntsel_msr = evntsel_msr;
	wd->cccr_msr = cccr_msr;
	wd->check_bit = 1ULL<<39;
Linus Torvalds's avatar
Linus Torvalds committed
704
	return 1;
705
fail1:
706
	release_perfctr_nmi(perfctr_msr);
707
708
fail:
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
709
710
}

711
static void stop_p4_watchdog(void)
Linus Torvalds's avatar
Linus Torvalds committed
712
{
713
714
715
716
	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);

	wrmsr(wd->cccr_msr, 0, 0);
	wrmsr(wd->evntsel_msr, 0, 0);
Linus Torvalds's avatar
Linus Torvalds committed
717

718
719
720
721
	release_evntsel_nmi(wd->evntsel_msr);
	release_perfctr_nmi(wd->perfctr_msr);
}

722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
#define ARCH_PERFMON_NMI_EVENT_SEL	ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
#define ARCH_PERFMON_NMI_EVENT_UMASK	ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK

static int setup_intel_arch_watchdog(void)
{
	unsigned int ebx;
	union cpuid10_eax eax;
	unsigned int unused;
	unsigned int perfctr_msr, evntsel_msr;
	unsigned int evntsel;
	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);

	/*
	 * Check whether the Architectural PerfMon supports
	 * Unhalted Core Cycles Event or not.
	 * NOTE: Corresponding bit = 0 in ebx indicates event present.
	 */
	cpuid(10, &(eax.full), &ebx, &unused, &unused);
	if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
	    (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
		goto fail;

	perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
	evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;

	if (!reserve_perfctr_nmi(perfctr_msr))
		goto fail;

	if (!reserve_evntsel_nmi(evntsel_msr))
		goto fail1;

	wrmsrl(perfctr_msr, 0UL);

	evntsel = ARCH_PERFMON_EVENTSEL_INT
		| ARCH_PERFMON_EVENTSEL_OS
		| ARCH_PERFMON_EVENTSEL_USR
		| ARCH_PERFMON_NMI_EVENT_SEL
		| ARCH_PERFMON_NMI_EVENT_UMASK;

	/* setup the timer */
	wrmsr(evntsel_msr, evntsel, 0);
763
764
	nmi_hz = adjust_for_32bit_ctr(nmi_hz);
	write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0");
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
	apic_write(APIC_LVTPC, APIC_DM_NMI);
	evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
	wrmsr(evntsel_msr, evntsel, 0);

	wd->perfctr_msr = perfctr_msr;
	wd->evntsel_msr = evntsel_msr;
	wd->cccr_msr = 0;  //unused
	wd->check_bit = 1ULL << (eax.split.bit_width - 1);
	return 1;
fail1:
	release_perfctr_nmi(perfctr_msr);
fail:
	return 0;
}

static void stop_intel_arch_watchdog(void)
{
	unsigned int ebx;
	union cpuid10_eax eax;
	unsigned int unused;
	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);

	/*
	 * Check whether the Architectural PerfMon supports
	 * Unhalted Core Cycles Event or not.
	 * NOTE: Corresponding bit = 0 in ebx indicates event present.
	 */
	cpuid(10, &(eax.full), &ebx, &unused, &unused);
	if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
	    (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
		return;

	wrmsr(wd->evntsel_msr, 0, 0);
	release_evntsel_nmi(wd->evntsel_msr);
	release_perfctr_nmi(wd->perfctr_msr);
}

802
803
void setup_apic_nmi_watchdog (void *unused)
{
804
805
	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);

806
807
808
809
810
	/* only support LOCAL and IO APICs for now */
	if ((nmi_watchdog != NMI_LOCAL_APIC) &&
	    (nmi_watchdog != NMI_IO_APIC))
	    	return;

811
812
813
814
815
816
817
818
	if (wd->enabled == 1)
		return;

	/* cheap hack to support suspend/resume */
	/* if cpu0 is not active neither should the other cpus */
	if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
		return;

819
820
821
	if (nmi_watchdog == NMI_LOCAL_APIC) {
		switch (boot_cpu_data.x86_vendor) {
		case X86_VENDOR_AMD:
822
823
			if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 &&
				boot_cpu_data.x86 != 16)
824
				return;
825
			if (!setup_k7_watchdog())
Linus Torvalds's avatar
Linus Torvalds committed
826
				return;
827
828
			break;
		case X86_VENDOR_INTEL:
829
830
831
832
833
			if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
				if (!setup_intel_arch_watchdog())
					return;
				break;
			}
834
835
836
837
838
839
840
841
842
843
844
			switch (boot_cpu_data.x86) {
			case 6:
				if (boot_cpu_data.x86_model > 0xd)
					return;

				if (!setup_p6_watchdog())
					return;
				break;
			case 15:
				if (boot_cpu_data.x86_model > 0x4)
					return;
Linus Torvalds's avatar
Linus Torvalds committed
845

846
847
848
849
				if (!setup_p4_watchdog())
					return;
				break;
			default:
Linus Torvalds's avatar
Linus Torvalds committed
850
				return;
851
852
853
854
855
856
			}
			break;
		default:
			return;
		}
	}
857
	wd->enabled = 1;
858
859
860
	atomic_inc(&nmi_active);
}

861
void stop_apic_nmi_watchdog(void *unused)
862
{
863
864
	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);

865
866
867
868
869
	/* only support LOCAL and IO APICs for now */
	if ((nmi_watchdog != NMI_LOCAL_APIC) &&
	    (nmi_watchdog != NMI_IO_APIC))
	    	return;

870
871
872
	if (wd->enabled == 0)
		return;

873
874
875
876
877
878
	if (nmi_watchdog == NMI_LOCAL_APIC) {
		switch (boot_cpu_data.x86_vendor) {
		case X86_VENDOR_AMD:
			stop_k7_watchdog();
			break;
		case X86_VENDOR_INTEL:
879
880
881
882
			if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
				stop_intel_arch_watchdog();
				break;
			}
883
884
885
886
887
888
889
890
891
892
893
894
			switch (boot_cpu_data.x86) {
			case 6:
				if (boot_cpu_data.x86_model > 0xd)
					break;
				stop_p6_watchdog();
				break;
			case 15:
				if (boot_cpu_data.x86_model > 0x4)
					break;
				stop_p4_watchdog();
				break;
			}
Linus Torvalds's avatar
Linus Torvalds committed
895
896
897
898
899
			break;
		default:
			return;
		}
	}
900
	wd->enabled = 0;
901
	atomic_dec(&nmi_active);
Linus Torvalds's avatar
Linus Torvalds committed
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
}

/*
 * the best way to detect whether a CPU has a 'hard lockup' problem
 * is to check it's local APIC timer IRQ counts. If they are not
 * changing then that CPU has some problem.
 *
 * as these watchdog NMI IRQs are generated on every CPU, we only
 * have to check the current processor.
 *
 * since NMIs don't listen to _any_ locks, we have to be extremely
 * careful not to rely on unsafe variables. The printk might lock
 * up though, so we have to break up any console locks first ...
 * [when there will be more tty-related locks, break them up
 *  here too!]
 */

static unsigned int
	last_irq_sums [NR_CPUS],
	alert_counter [NR_CPUS];

void touch_nmi_watchdog (void)
{
925
926
	if (nmi_watchdog > 0) {
		unsigned cpu;
Linus Torvalds's avatar
Linus Torvalds committed
927

928
929
930
931
932
933
934
		/*
		 * Just reset the alert counters, (other CPUs might be
		 * spinning on locks we hold):
		 */
		for_each_present_cpu (cpu)
			alert_counter[cpu] = 0;
	}
Ingo Molnar's avatar
Ingo Molnar committed
935
936
937
938
939

	/*
	 * Tickle the softlockup detector too:
	 */
	touch_softlockup_watchdog();
Linus Torvalds's avatar
Linus Torvalds committed
940
}
941
EXPORT_SYMBOL(touch_nmi_watchdog);
Linus Torvalds's avatar
Linus Torvalds committed
942
943
944

extern void die_nmi(struct pt_regs *, const char *msg);

945
__kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
Linus Torvalds's avatar
Linus Torvalds committed
946
947
948
949
950
951
952
{

	/*
	 * Since current_thread_info()-> is always on the stack, and we
	 * always switch the stack NMI-atomically, it's safe to use
	 * smp_processor_id().
	 */
953
	unsigned int sum;
954
	int touched = 0;
955
	int cpu = smp_processor_id();
956
957
	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
	u64 dummy;
958
	int rc=0;
959
960
961
962

	/* check for other users first */
	if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
			== NOTIFY_STOP) {
963
		rc = 1;
964
965
		touched = 1;
	}
Linus Torvalds's avatar
Linus Torvalds committed
966

Andrew Morton's avatar
Andrew Morton committed
967
968
969
970
971
972
973
974
975
976
	if (cpu_isset(cpu, backtrace_mask)) {
		static DEFINE_SPINLOCK(lock);	/* Serialise the printks */

		spin_lock(&lock);
		printk("NMI backtrace for cpu %d\n", cpu);
		dump_stack();
		spin_unlock(&lock);
		cpu_clear(cpu, backtrace_mask);
	}

977
978
979
980
981
	/*
	 * Take the local apic timer and PIT/HPET into account. We don't
	 * know which one is active, when we have highres/dyntick on
	 */
	sum = per_cpu(irq_stat, cpu).apic_timer_irqs + kstat_irqs(0);
Linus Torvalds's avatar
Linus Torvalds committed
982

983
	/* if the none of the timers isn't firing, this cpu isn't doing much */
984
	if (!touched && last_irq_sums[cpu] == sum) {
Linus Torvalds's avatar
Linus Torvalds committed
985
986
987
988
989
990
		/*
		 * Ayiee, looks like this CPU is stuck ...
		 * wait a few IRQs (5 seconds) before doing the oops ...
		 */
		alert_counter[cpu]++;
		if (alert_counter[cpu] == 5*nmi_hz)
991
992
993
			/*
			 * die_nmi will return ONLY if NOTIFY_STOP happens..
			 */
994
			die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP");
995
	} else {
Linus Torvalds's avatar
Linus Torvalds committed
996
997
998
		last_irq_sums[cpu] = sum;
		alert_counter[cpu] = 0;
	}
999
1000
	/* see if the nmi watchdog went off */
	if (wd->enabled) {