smpboot.c 32 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
/*
 *	x86 SMP booting functions
 *
 *	(c) 1995 Alan Cox, Building #3 <alan@redhat.com>
 *	(c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
 *
 *	Much of the core SMP work is based on previous work by Thomas Radke, to
 *	whom a great many thanks are extended.
 *
 *	Thanks to Intel for making available several different Pentium,
 *	Pentium Pro and Pentium-II/Xeon MP machines.
 *	Original development of Linux SMP code supported by Caldera.
 *
 *	This code is released under the GNU General Public License version 2 or
 *	later.
 *
 *	Fixes
 *		Felix Koop	:	NR_CPUS used properly
 *		Jose Renau	:	Handle single CPU case.
 *		Alan Cox	:	By repeated request 8) - Total BogoMIPS report.
 *		Greg Wright	:	Fix for kernel stacks panic.
 *		Erich Boleyn	:	MP v1.4 and additional changes.
 *	Matthias Sattler	:	Changes for 2.1 kernel map.
 *	Michel Lespinasse	:	Changes for 2.1 kernel map.
 *	Michael Chastain	:	Change trampoline.S to gnu as.
 *		Alan Cox	:	Dumb bug: 'B' step PPro's are fine
 *		Ingo Molnar	:	Added APIC timers, based on code
 *					from Jose Renau
 *		Ingo Molnar	:	various cleanups and rewrites
 *		Tigran Aivazian	:	fixed "0.00 in /proc/uptime on SMP" bug.
 *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs
 *		Martin J. Bligh	: 	Added support for multi-quad systems
 *		Dave Jones	:	Report invalid combinations of Athlon CPUs.
*		Rusty Russell	:	Hacked into shape for new "hotplug" boot process. */

#include <linux/module.h>
#include <linux/init.h>
#include <linux/kernel.h>

#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/kernel_stat.h>
#include <linux/bootmem.h>
Zwane Mwaikambo's avatar
Zwane Mwaikambo committed
44
45
46
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/percpu.h>
47
#include <linux/nmi.h>
Linus Torvalds's avatar
Linus Torvalds committed
48
49
50
51
52
53

#include <linux/delay.h>
#include <linux/mc146818rtc.h>
#include <asm/tlbflush.h>
#include <asm/desc.h>
#include <asm/arch_hooks.h>
54
#include <asm/nmi.h>
Linus Torvalds's avatar
Linus Torvalds committed
55
56
57
58

#include <mach_apic.h>
#include <mach_wakecpu.h>
#include <smpboot_hooks.h>
59
#include <asm/vmi.h>
60
#include <asm/mtrr.h>
Linus Torvalds's avatar
Linus Torvalds committed
61
62

/* Set if we find a B stepping CPU */
Li Shaohua's avatar
Li Shaohua committed
63
static int __devinitdata smp_b_stepping;
Linus Torvalds's avatar
Linus Torvalds committed
64
65
66

/* Number of siblings per CPU package */
int smp_num_siblings = 1;
67
EXPORT_SYMBOL(smp_num_siblings);
68

69
70
71
/* Last level cache ID of each logical CPU */
int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID};

72
/* representing HT siblings of each logical CPU */
73
cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
74
75
EXPORT_SYMBOL(cpu_sibling_map);

76
/* representing HT and core siblings of each logical CPU */
77
cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
78
79
EXPORT_SYMBOL(cpu_core_map);

Linus Torvalds's avatar
Linus Torvalds committed
80
/* bitmap of online cpus */
81
cpumask_t cpu_online_map __read_mostly;
82
EXPORT_SYMBOL(cpu_online_map);
Linus Torvalds's avatar
Linus Torvalds committed
83
84
85

cpumask_t cpu_callin_map;
cpumask_t cpu_callout_map;
86
EXPORT_SYMBOL(cpu_callout_map);
87
88
cpumask_t cpu_possible_map;
EXPORT_SYMBOL(cpu_possible_map);
Linus Torvalds's avatar
Linus Torvalds committed
89
90
91
92
static cpumask_t smp_commenced_mask;

/* Per CPU bogomips and other parameters */
struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
93
EXPORT_SYMBOL(cpu_data);
Linus Torvalds's avatar
Linus Torvalds committed
94

95
u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly =
Linus Torvalds's avatar
Linus Torvalds committed
96
97
98
			{ [0 ... NR_CPUS-1] = 0xff };
EXPORT_SYMBOL(x86_cpu_to_apicid);

99
100
u8 apicid_2_node[MAX_APICID];

Linus Torvalds's avatar
Linus Torvalds committed
101
102
103
104
105
106
107
108
109
110
111
/*
 * Trampoline 80x86 program as an array.
 */

extern unsigned char trampoline_data [];
extern unsigned char trampoline_end  [];
static unsigned char *trampoline_base;
static int trampoline_exec;

static void map_cpu_to_logical_apicid(void);

Zwane Mwaikambo's avatar
Zwane Mwaikambo committed
112
113
114
/* State of each CPU. */
DEFINE_PER_CPU(int, cpu_state) = { 0 };

Linus Torvalds's avatar
Linus Torvalds committed
115
116
117
118
119
120
/*
 * Currently trivial. Write the real->protected mode
 * bootstrap into the page concerned. The caller
 * has made sure it's suitably aligned.
 */

Li Shaohua's avatar
Li Shaohua committed
121
static unsigned long __devinit setup_trampoline(void)
Linus Torvalds's avatar
Linus Torvalds committed
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
{
	memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
	return virt_to_phys(trampoline_base);
}

/*
 * We are called very early to get the low memory for the
 * SMP bootup trampoline page.
 */
void __init smp_alloc_memory(void)
{
	trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
	/*
	 * Has to be in very low memory so we can execute
	 * real-mode AP code.
	 */
	if (__pa(trampoline_base) >= 0x9F000)
		BUG();
	/*
	 * Make the SMP trampoline executable:
	 */
	trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1);
}

/*
 * The bootstrap kernel entry code has set these up. Save them for
 * a given CPU
 */

151
void __cpuinit smp_store_cpu_info(int id)
Linus Torvalds's avatar
Linus Torvalds committed
152
153
154
155
156
{
	struct cpuinfo_x86 *c = cpu_data + id;

	*c = boot_cpu_data;
	if (id!=0)
157
		identify_secondary_cpu(c);
Linus Torvalds's avatar
Linus Torvalds committed
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
	/*
	 * Mask B, Pentium, but not Pentium MMX
	 */
	if (c->x86_vendor == X86_VENDOR_INTEL &&
	    c->x86 == 5 &&
	    c->x86_mask >= 1 && c->x86_mask <= 4 &&
	    c->x86_model <= 3)
		/*
		 * Remember we have B step Pentia with bugs
		 */
		smp_b_stepping = 1;

	/*
	 * Certain Athlons might work (for various values of 'work') in SMP
	 * but they are not certified as MP capable.
	 */
	if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {

176
177
178
		if (num_possible_cpus() == 1)
			goto valid_k7;

Linus Torvalds's avatar
Linus Torvalds committed
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
		/* Athlon 660/661 is valid. */	
		if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1)))
			goto valid_k7;

		/* Duron 670 is valid */
		if ((c->x86_model==7) && (c->x86_mask==0))
			goto valid_k7;

		/*
		 * Athlon 662, Duron 671, and Athlon >model 7 have capability bit.
		 * It's worth noting that the A5 stepping (662) of some Athlon XP's
		 * have the MP bit set.
		 * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more.
		 */
		if (((c->x86_model==6) && (c->x86_mask>=2)) ||
		    ((c->x86_model==7) && (c->x86_mask>=1)) ||
		     (c->x86_model> 7))
			if (cpu_has_mp)
				goto valid_k7;

		/* If we get here, it's not a certified SMP capable AMD system. */
200
		add_taint(TAINT_UNSAFE_SMP);
Linus Torvalds's avatar
Linus Torvalds committed
201
202
203
204
205
206
207
208
209
210
	}

valid_k7:
	;
}

extern void calibrate_delay(void);

static atomic_t init_deasserted;

211
static void __cpuinit smp_callin(void)
Linus Torvalds's avatar
Linus Torvalds committed
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
{
	int cpuid, phys_id;
	unsigned long timeout;

	/*
	 * If waken up by an INIT in an 82489DX configuration
	 * we may get here before an INIT-deassert IPI reaches
	 * our local APIC.  We have to wait for the IPI or we'll
	 * lock up on an APIC access.
	 */
	wait_for_init_deassert(&init_deasserted);

	/*
	 * (This works even if the APIC is not enabled.)
	 */
	phys_id = GET_APIC_ID(apic_read(APIC_ID));
	cpuid = smp_processor_id();
	if (cpu_isset(cpuid, cpu_callin_map)) {
		printk("huh, phys CPU#%d, CPU#%d already present??\n",
					phys_id, cpuid);
		BUG();
	}
	Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);

	/*
	 * STARTUP IPIs are fragile beasts as they might sometimes
	 * trigger some glue motherboard logic. Complete APIC bus
	 * silence for 1 second, this overestimates the time the
	 * boot CPU is spending to send the up to 2 STARTUP IPIs
	 * by a factor of two. This should be enough.
	 */

	/*
	 * Waiting 2s total for startup (udelay is not yet working)
	 */
	timeout = jiffies + 2*HZ;
	while (time_before(jiffies, timeout)) {
		/*
		 * Has the boot CPU finished it's STARTUP sequence?
		 */
		if (cpu_isset(cpuid, cpu_callout_map))
			break;
		rep_nop();
	}

	if (!time_before(jiffies, timeout)) {
		printk("BUG: CPU%d started up but did not get a callout!\n",
			cpuid);
		BUG();
	}

	/*
	 * the boot CPU has finished the init stage and is spinning
	 * on callin_map until we finish. We are free to set up this
	 * CPU, first the APIC. (this is probably redundant on most
	 * boards)
	 */

	Dprintk("CALLIN, before setup_local_APIC().\n");
	smp_callin_clear_local_apic();
	setup_local_APIC();
	map_cpu_to_logical_apicid();

	/*
	 * Get our bogomips.
	 */
	calibrate_delay();
	Dprintk("Stack at about %p\n",&cpuid);

	/*
	 * Save our processor parameters
	 */
284
	smp_store_cpu_info(cpuid);
Linus Torvalds's avatar
Linus Torvalds committed
285
286
287
288
289
290
291
292
293

	/*
	 * Allow the master to continue.
	 */
	cpu_set(cpuid, cpu_callin_map);
}

static int cpucount;

294
295
296
297
298
299
/* maps the cpu to the sched domain representing multi-core */
cpumask_t cpu_coregroup_map(int cpu)
{
	struct cpuinfo_x86 *c = cpu_data + cpu;
	/*
	 * For perf, we return last level cache shared map.
300
	 * And for power savings, we return cpu_core_map
301
	 */
302
303
304
305
	if (sched_mc_power_savings || sched_smt_power_savings)
		return cpu_core_map[cpu];
	else
		return c->llc_shared_map;
306
307
}

308
309
310
/* representing cpus for which sibling maps can be computed */
static cpumask_t cpu_sibling_setup_map;

311
312
313
314
static inline void
set_cpu_sibling_map(int cpu)
{
	int i;
315
316
317
	struct cpuinfo_x86 *c = cpu_data;

	cpu_set(cpu, cpu_sibling_setup_map);
318
319

	if (smp_num_siblings > 1) {
320
		for_each_cpu_mask(i, cpu_sibling_setup_map) {
321
322
			if (c[cpu].phys_proc_id == c[i].phys_proc_id &&
			    c[cpu].cpu_core_id == c[i].cpu_core_id) {
323
324
				cpu_set(i, cpu_sibling_map[cpu]);
				cpu_set(cpu, cpu_sibling_map[i]);
325
326
				cpu_set(i, cpu_core_map[cpu]);
				cpu_set(cpu, cpu_core_map[i]);
327
328
				cpu_set(i, c[cpu].llc_shared_map);
				cpu_set(cpu, c[i].llc_shared_map);
329
330
331
332
333
334
			}
		}
	} else {
		cpu_set(cpu, cpu_sibling_map[cpu]);
	}

335
336
	cpu_set(cpu, c[cpu].llc_shared_map);

337
	if (current_cpu_data.x86_max_cores == 1) {
338
		cpu_core_map[cpu] = cpu_sibling_map[cpu];
339
340
341
342
343
		c[cpu].booted_cores = 1;
		return;
	}

	for_each_cpu_mask(i, cpu_sibling_setup_map) {
344
345
346
347
348
		if (cpu_llc_id[cpu] != BAD_APICID &&
		    cpu_llc_id[cpu] == cpu_llc_id[i]) {
			cpu_set(i, c[cpu].llc_shared_map);
			cpu_set(cpu, c[i].llc_shared_map);
		}
349
		if (c[cpu].phys_proc_id == c[i].phys_proc_id) {
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
			cpu_set(i, cpu_core_map[cpu]);
			cpu_set(cpu, cpu_core_map[i]);
			/*
			 *  Does this new cpu bringup a new core?
			 */
			if (cpus_weight(cpu_sibling_map[cpu]) == 1) {
				/*
				 * for each core in package, increment
				 * the booted_cores for this new cpu
				 */
				if (first_cpu(cpu_sibling_map[i]) == i)
					c[cpu].booted_cores++;
				/*
				 * increment the core count for all
				 * the other cpus in this package
				 */
				if (i != cpu)
					c[i].booted_cores++;
			} else if (i != cpu && !c[cpu].booted_cores)
				c[cpu].booted_cores = c[i].booted_cores;
		}
371
372
373
	}
}

Linus Torvalds's avatar
Linus Torvalds committed
374
375
376
/*
 * Activate a secondary processor.
 */
377
static void __cpuinit start_secondary(void *unused)
Linus Torvalds's avatar
Linus Torvalds committed
378
379
{
	/*
380
381
382
	 * Don't put *anything* before cpu_init(), SMP booting is too
	 * fragile that we want to limit the things done here to the
	 * most necessary things.
Linus Torvalds's avatar
Linus Torvalds committed
383
	 */
384
385
386
#ifdef CONFIG_VMI
	vmi_bringup();
#endif
387
	cpu_init();
388
	preempt_disable();
Linus Torvalds's avatar
Linus Torvalds committed
389
390
391
	smp_callin();
	while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
		rep_nop();
392
393
394
395
396
	/*
	 * Check TSC synchronization with the BP:
	 */
	check_tsc_sync_target();

397
	setup_secondary_clock();
Linus Torvalds's avatar
Linus Torvalds committed
398
399
400
401
402
403
404
405
406
407
	if (nmi_watchdog == NMI_IO_APIC) {
		disable_8259A_irq(0);
		enable_NMI_through_LVT0(NULL);
		enable_8259A_irq(0);
	}
	/*
	 * low-memory mappings have been cleared, flush them from
	 * the local TLBs too.
	 */
	local_flush_tlb();
Li Shaohua's avatar
Li Shaohua committed
408

409
410
411
412
	/* This must be done before setting cpu_online_map */
	set_cpu_sibling_map(raw_smp_processor_id());
	wmb();

Li Shaohua's avatar
Li Shaohua committed
413
414
415
416
417
418
419
420
421
	/*
	 * We need to hold call_lock, so there is no inconsistency
	 * between the time smp_call_function() determines number of
	 * IPI receipients, and the time when the determination is made
	 * for which cpus receive the IPI. Holding this
	 * lock helps us to not include this cpu in a currently in progress
	 * smp_call_function().
	 */
	lock_ipi_call_lock();
Linus Torvalds's avatar
Linus Torvalds committed
422
	cpu_set(smp_processor_id(), cpu_online_map);
Li Shaohua's avatar
Li Shaohua committed
423
	unlock_ipi_call_lock();
424
	per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
Linus Torvalds's avatar
Linus Torvalds committed
425
426
427
428
429
430
431
432
433
434
435
436
437
438

	/* We can take interrupts now: we're officially "up". */
	local_irq_enable();

	wmb();
	cpu_idle();
}

/*
 * Everything has been set up for the secondary
 * CPUs - they just need to reload everything
 * from the task structure
 * This function must not return.
 */
Li Shaohua's avatar
Li Shaohua committed
439
void __devinit initialize_secondary(void)
Linus Torvalds's avatar
Linus Torvalds committed
440
441
442
443
444
445
446
447
448
449
{
	/*
	 * We don't actually need to load the full TSS,
	 * basically just the stack pointer and the eip.
	 */

	asm volatile(
		"movl %0,%%esp\n\t"
		"jmp *%1"
		:
450
		:"m" (current->thread.esp),"m" (current->thread.eip));
Linus Torvalds's avatar
Linus Torvalds committed
451
452
}

453
/* Static state in head.S used to set up a CPU */
Linus Torvalds's avatar
Linus Torvalds committed
454
455
456
457
458
459
460
461
extern struct {
	void * esp;
	unsigned short ss;
} stack_start;

#ifdef CONFIG_NUMA

/* which logical CPUs are on which nodes */
462
cpumask_t node_2_cpu_mask[MAX_NUMNODES] __read_mostly =
Linus Torvalds's avatar
Linus Torvalds committed
463
				{ [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
464
EXPORT_SYMBOL(node_2_cpu_mask);
Linus Torvalds's avatar
Linus Torvalds committed
465
/* which node each logical CPU is on */
466
int cpu_2_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
Linus Torvalds's avatar
Linus Torvalds committed
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
EXPORT_SYMBOL(cpu_2_node);

/* set up a mapping between cpu and node. */
static inline void map_cpu_to_node(int cpu, int node)
{
	printk("Mapping cpu %d to node %d\n", cpu, node);
	cpu_set(cpu, node_2_cpu_mask[node]);
	cpu_2_node[cpu] = node;
}

/* undo a mapping between cpu and node. */
static inline void unmap_cpu_to_node(int cpu)
{
	int node;

	printk("Unmapping cpu %d from all nodes\n", cpu);
	for (node = 0; node < MAX_NUMNODES; node ++)
		cpu_clear(cpu, node_2_cpu_mask[node]);
	cpu_2_node[cpu] = 0;
}
#else /* !CONFIG_NUMA */

#define map_cpu_to_node(cpu, node)	({})
#define unmap_cpu_to_node(cpu)	({})

#endif /* CONFIG_NUMA */

494
u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
Linus Torvalds's avatar
Linus Torvalds committed
495
496
497
498
499

static void map_cpu_to_logical_apicid(void)
{
	int cpu = smp_processor_id();
	int apicid = logical_smp_processor_id();
500
	int node = apicid_to_node(apicid);
501
502
503

	if (!node_online(node))
		node = first_online_node;
Linus Torvalds's avatar
Linus Torvalds committed
504
505

	cpu_2_logical_apicid[cpu] = apicid;
506
	map_cpu_to_node(cpu, node);
Linus Torvalds's avatar
Linus Torvalds committed
507
508
509
510
511
512
513
514
515
516
517
518
}

static void unmap_cpu_to_logical_apicid(int cpu)
{
	cpu_2_logical_apicid[cpu] = BAD_APICID;
	unmap_cpu_to_node(cpu);
}

static inline void __inquire_remote_apic(int apicid)
{
	int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
	char *names[] = { "ID", "VERSION", "SPIV" };
519
520
	int timeout;
	unsigned long status;
Linus Torvalds's avatar
Linus Torvalds committed
521
522
523

	printk("Inquiring remote APIC #%d...\n", apicid);

524
	for (i = 0; i < ARRAY_SIZE(regs); i++) {
Linus Torvalds's avatar
Linus Torvalds committed
525
526
527
528
529
		printk("... APIC #%d %s: ", apicid, names[i]);

		/*
		 * Wait for idle.
		 */
530
531
532
		status = safe_apic_wait_icr_idle();
		if (status)
			printk("a previous APIC delivery may have failed\n");
Linus Torvalds's avatar
Linus Torvalds committed
533
534
535
536
537
538
539
540
541
542
543
544
545

		apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
		apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);

		timeout = 0;
		do {
			udelay(100);
			status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
		} while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);

		switch (status) {
		case APIC_ICR_RR_VALID:
			status = apic_read(APIC_RRR);
546
			printk("%lx\n", status);
Linus Torvalds's avatar
Linus Torvalds committed
547
548
549
550
551
552
553
554
555
556
557
558
559
			break;
		default:
			printk("failed\n");
		}
	}
}

#ifdef WAKE_SECONDARY_VIA_NMI
/* 
 * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
 * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
 * won't ... remember to clear down the APIC, etc later.
 */
Li Shaohua's avatar
Li Shaohua committed
560
static int __devinit
Linus Torvalds's avatar
Linus Torvalds committed
561
562
wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
{
563
564
	unsigned long send_status, accept_status = 0;
	int maxlvt;
Linus Torvalds's avatar
Linus Torvalds committed
565
566
567
568
569
570
571
572
573

	/* Target chip */
	apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));

	/* Boot on the stack */
	/* Kick the second */
	apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);

	Dprintk("Waiting for send to finish...\n");
574
	send_status = safe_apic_wait_icr_idle();
Linus Torvalds's avatar
Linus Torvalds committed
575
576
577
578
579
580
581
582

	/*
	 * Give the other CPU some time to accept the IPI.
	 */
	udelay(200);
	/*
	 * Due to the Pentium erratum 3AP.
	 */
583
	maxlvt = lapic_get_maxlvt();
Linus Torvalds's avatar
Linus Torvalds committed
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
	if (maxlvt > 3) {
		apic_read_around(APIC_SPIV);
		apic_write(APIC_ESR, 0);
	}
	accept_status = (apic_read(APIC_ESR) & 0xEF);
	Dprintk("NMI sent.\n");

	if (send_status)
		printk("APIC never delivered???\n");
	if (accept_status)
		printk("APIC delivery error (%lx).\n", accept_status);

	return (send_status | accept_status);
}
#endif	/* WAKE_SECONDARY_VIA_NMI */

#ifdef WAKE_SECONDARY_VIA_INIT
Li Shaohua's avatar
Li Shaohua committed
601
static int __devinit
Linus Torvalds's avatar
Linus Torvalds committed
602
603
wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
{
604
605
	unsigned long send_status, accept_status = 0;
	int maxlvt, num_starts, j;
Linus Torvalds's avatar
Linus Torvalds committed
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629

	/*
	 * Be paranoid about clearing APIC errors.
	 */
	if (APIC_INTEGRATED(apic_version[phys_apicid])) {
		apic_read_around(APIC_SPIV);
		apic_write(APIC_ESR, 0);
		apic_read(APIC_ESR);
	}

	Dprintk("Asserting INIT.\n");

	/*
	 * Turn INIT on target chip
	 */
	apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));

	/*
	 * Send IPI
	 */
	apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
				| APIC_DM_INIT);

	Dprintk("Waiting for send to finish...\n");
630
	send_status = safe_apic_wait_icr_idle();
Linus Torvalds's avatar
Linus Torvalds committed
631
632
633
634
635
636
637
638
639
640
641
642

	mdelay(10);

	Dprintk("Deasserting INIT.\n");

	/* Target chip */
	apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));

	/* Send IPI */
	apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);

	Dprintk("Waiting for send to finish...\n");
643
	send_status = safe_apic_wait_icr_idle();
Linus Torvalds's avatar
Linus Torvalds committed
644
645
646
647
648
649
650
651
652
653
654
655
656
657

	atomic_set(&init_deasserted, 1);

	/*
	 * Should we send STARTUP IPIs ?
	 *
	 * Determine this based on the APIC version.
	 * If we don't have an integrated APIC, don't send the STARTUP IPIs.
	 */
	if (APIC_INTEGRATED(apic_version[phys_apicid]))
		num_starts = 2;
	else
		num_starts = 0;

658
659
660
661
662
663
664
	/*
	 * Paravirt / VMI wants a startup IPI hook here to set up the
	 * target processor state.
	 */
	startup_ipi_hook(phys_apicid, (unsigned long) start_secondary,
		         (unsigned long) stack_start.esp);

Linus Torvalds's avatar
Linus Torvalds committed
665
666
667
668
669
	/*
	 * Run STARTUP IPI loop.
	 */
	Dprintk("#startup loops: %d.\n", num_starts);

670
	maxlvt = lapic_get_maxlvt();
Linus Torvalds's avatar
Linus Torvalds committed
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698

	for (j = 1; j <= num_starts; j++) {
		Dprintk("Sending STARTUP #%d.\n",j);
		apic_read_around(APIC_SPIV);
		apic_write(APIC_ESR, 0);
		apic_read(APIC_ESR);
		Dprintk("After apic_write.\n");

		/*
		 * STARTUP IPI
		 */

		/* Target chip */
		apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));

		/* Boot on the stack */
		/* Kick the second */
		apic_write_around(APIC_ICR, APIC_DM_STARTUP
					| (start_eip >> 12));

		/*
		 * Give the other CPU some time to accept the IPI.
		 */
		udelay(300);

		Dprintk("Startup point 1.\n");

		Dprintk("Waiting for send to finish...\n");
699
		send_status = safe_apic_wait_icr_idle();
Linus Torvalds's avatar
Linus Torvalds committed
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727

		/*
		 * Give the other CPU some time to accept the IPI.
		 */
		udelay(200);
		/*
		 * Due to the Pentium erratum 3AP.
		 */
		if (maxlvt > 3) {
			apic_read_around(APIC_SPIV);
			apic_write(APIC_ESR, 0);
		}
		accept_status = (apic_read(APIC_ESR) & 0xEF);
		if (send_status || accept_status)
			break;
	}
	Dprintk("After Startup.\n");

	if (send_status)
		printk("APIC never delivered???\n");
	if (accept_status)
		printk("APIC delivery error (%lx).\n", accept_status);

	return (send_status | accept_status);
}
#endif	/* WAKE_SECONDARY_VIA_INIT */

extern cpumask_t cpu_initialized;
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
static inline int alloc_cpu_id(void)
{
	cpumask_t	tmp_map;
	int cpu;
	cpus_complement(tmp_map, cpu_present_map);
	cpu = first_cpu(tmp_map);
	if (cpu >= NR_CPUS)
		return -ENODEV;
	return cpu;
}

#ifdef CONFIG_HOTPLUG_CPU
static struct task_struct * __devinitdata cpu_idle_tasks[NR_CPUS];
static inline struct task_struct * alloc_idle_task(int cpu)
{
	struct task_struct *idle;

	if ((idle = cpu_idle_tasks[cpu]) != NULL) {
		/* initialize thread_struct.  we really want to avoid destroy
		 * idle tread
		 */
749
		idle->thread.esp = (unsigned long)task_pt_regs(idle);
750
751
752
753
754
755
756
757
758
759
760
761
		init_idle(idle, cpu);
		return idle;
	}
	idle = fork_idle(cpu);

	if (!IS_ERR(idle))
		cpu_idle_tasks[cpu] = idle;
	return idle;
}
#else
#define alloc_idle_task(cpu) fork_idle(cpu)
#endif
Linus Torvalds's avatar
Linus Torvalds committed
762

763
static int __cpuinit do_boot_cpu(int apicid, int cpu)
Linus Torvalds's avatar
Linus Torvalds committed
764
765
766
767
768
769
770
771
/*
 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
 * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
 * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
 */
{
	struct task_struct *idle;
	unsigned long boot_error;
772
	int timeout;
Linus Torvalds's avatar
Linus Torvalds committed
773
774
775
	unsigned long start_eip;
	unsigned short nmi_high = 0, nmi_low = 0;

776
777
778
779
780
781
	/*
	 * Save current MTRR state in case it was changed since early boot
	 * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync:
	 */
	mtrr_save_state();

Linus Torvalds's avatar
Linus Torvalds committed
782
783
784
785
	/*
	 * We can't use kernel_thread since we must avoid to
	 * reschedule the child.
	 */
786
	idle = alloc_idle_task(cpu);
Linus Torvalds's avatar
Linus Torvalds committed
787
788
	if (IS_ERR(idle))
		panic("failed fork for CPU %d", cpu);
789

790
791
	init_gdt(cpu);
 	per_cpu(current_task, cpu) = idle;
792
	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
793

Linus Torvalds's avatar
Linus Torvalds committed
794
795
796
797
	idle->thread.eip = (unsigned long) start_secondary;
	/* start_eip had better be page-aligned! */
	start_eip = setup_trampoline();

798
799
800
	++cpucount;
	alternatives_smp_switch(1);

Linus Torvalds's avatar
Linus Torvalds committed
801
802
803
804
805
806
807
	/* So we see what's up   */
	printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
	/* Stack for startup_32 can be just as for start_secondary onwards */
	stack_start.esp = (void *) idle->thread.esp;

	irq_ctx_init(cpu);

808
	x86_cpu_to_apicid[cpu] = apicid;
Linus Torvalds's avatar
Linus Torvalds committed
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
	/*
	 * This grunge runs the startup process for
	 * the targeted processor.
	 */

	atomic_set(&init_deasserted, 0);

	Dprintk("Setting warm reset code and vector.\n");

	store_NMI_vector(&nmi_high, &nmi_low);

	smpboot_setup_warm_reset_vector(start_eip);

	/*
	 * Starting actual IPI sequence...
	 */
	boot_error = wakeup_secondary_cpu(apicid, start_eip);

	if (!boot_error) {
		/*
		 * allow APs to start initializing.
		 */
		Dprintk("Before Callout %d.\n", cpu);
		cpu_set(cpu, cpu_callout_map);
		Dprintk("After Callout %d.\n", cpu);

		/*
		 * Wait 5s total for a response
		 */
		for (timeout = 0; timeout < 50000; timeout++) {
			if (cpu_isset(cpu, cpu_callin_map))
				break;	/* It has booted */
			udelay(100);
		}

		if (cpu_isset(cpu, cpu_callin_map)) {
			/* number CPUs logically, starting from 1 (BSP is 0) */
			Dprintk("OK.\n");
			printk("CPU%d: ", cpu);
			print_cpu_info(&cpu_data[cpu]);
			Dprintk("CPU has booted.\n");
		} else {
			boot_error= 1;
			if (*((volatile unsigned char *)trampoline_base)
					== 0xA5)
				/* trampoline started but...? */
				printk("Stuck ??\n");
			else
				/* trampoline code not run */
				printk("Not responding.\n");
			inquire_remote_apic(apicid);
		}
	}
862

Linus Torvalds's avatar
Linus Torvalds committed
863
864
865
866
867
868
	if (boot_error) {
		/* Try to put things back the way they were before ... */
		unmap_cpu_to_logical_apicid(cpu);
		cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
		cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
		cpucount--;
869
870
871
	} else {
		x86_cpu_to_apicid[cpu] = apicid;
		cpu_set(cpu, cpu_present_map);
Linus Torvalds's avatar
Linus Torvalds committed
872
873
874
875
876
877
878
879
	}

	/* mark "stuck" area as not stuck */
	*((volatile unsigned long *)trampoline_base) = 0;

	return boot_error;
}

880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
#ifdef CONFIG_HOTPLUG_CPU
void cpu_exit_clear(void)
{
	int cpu = raw_smp_processor_id();

	idle_task_exit();

	cpucount --;
	cpu_uninit();
	irq_ctx_exit(cpu);

	cpu_clear(cpu, cpu_callout_map);
	cpu_clear(cpu, cpu_callin_map);

	cpu_clear(cpu, smp_commenced_mask);
	unmap_cpu_to_logical_apicid(cpu);
}

struct warm_boot_cpu_info {
	struct completion *complete;
David Howells's avatar
David Howells committed
900
	struct work_struct task;
901
902
903
904
	int apicid;
	int cpu;
};

David Howells's avatar
David Howells committed
905
static void __cpuinit do_warm_boot_cpu(struct work_struct *work)
906
{
David Howells's avatar
David Howells committed
907
908
	struct warm_boot_cpu_info *info =
		container_of(work, struct warm_boot_cpu_info, task);
909
910
911
912
	do_boot_cpu(info->apicid, info->cpu);
	complete(info->complete);
}

913
static int __cpuinit __smp_prepare_cpu(int cpu)
914
{
915
	DECLARE_COMPLETION_ONSTACK(done);
916
917
918
919
920
921
922
923
924
925
926
927
	struct warm_boot_cpu_info info;
	int	apicid, ret;

	apicid = x86_cpu_to_apicid[cpu];
	if (apicid == BAD_APICID) {
		ret = -ENODEV;
		goto exit;
	}

	info.complete = &done;
	info.apicid = apicid;
	info.cpu = cpu;
David Howells's avatar
David Howells committed
928
	INIT_WORK(&info.task, do_warm_boot_cpu);
929
930

	/* init low mem mapping */
931
	clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
932
			min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS));
933
	flush_tlb_all();
David Howells's avatar
David Howells committed
934
	schedule_work(&info.task);
935
936
937
938
939
940
941
942
943
	wait_for_completion(&done);

	zap_low_mappings();
	ret = 0;
exit:
	return ret;
}
#endif

Linus Torvalds's avatar
Linus Torvalds committed
944
945
946
947
948
949
950
/*
 * Cycle through the processors sending APIC IPIs to boot each.
 */

static int boot_cpu_logical_apicid;
/* Where the IO area was mapped on multiquad, always 0 otherwise */
void *xquad_portio;
951
952
953
#ifdef CONFIG_X86_NUMAQ
EXPORT_SYMBOL(xquad_portio);
#endif
Linus Torvalds's avatar
Linus Torvalds committed
954
955
956
957
958
959
960
961
962
963
964
965
966

static void __init smp_boot_cpus(unsigned int max_cpus)
{
	int apicid, cpu, bit, kicked;
	unsigned long bogosum = 0;

	/*
	 * Setup boot CPU information
	 */
	smp_store_cpu_info(0); /* Final full version of the data */
	printk("CPU%d: ", 0);
	print_cpu_info(&cpu_data[0]);

967
	boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
Linus Torvalds's avatar
Linus Torvalds committed
968
969
970
971
972
	boot_cpu_logical_apicid = logical_smp_processor_id();
	x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;

	current_thread_info()->cpu = 0;

973
	set_cpu_sibling_map(0);
974

Linus Torvalds's avatar
Linus Torvalds committed
975
976
977
978
979
980
	/*
	 * If we couldn't find an SMP configuration at boot time,
	 * get out of here now!
	 */
	if (!smp_found_config && !acpi_lapic) {
		printk(KERN_NOTICE "SMP motherboard not detected.\n");
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
		smpboot_clear_io_apic_irqs();
		phys_cpu_present_map = physid_mask_of_physid(0);
		if (APIC_init_uniprocessor())
			printk(KERN_NOTICE "Local APIC not detected."
					   " Using dummy APIC emulation.\n");
		map_cpu_to_logical_apicid();
		cpu_set(0, cpu_sibling_map[0]);
		cpu_set(0, cpu_core_map[0]);
		return;
	}

	/*
	 * Should not be necessary because the MP table should list the boot
	 * CPU too, but we do it for the sake of robustness anyway.
	 * Makes no sense to do this check in clustered apic mode, so skip it
	 */
	if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
		printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
				boot_cpu_physical_apicid);
		physid_set(hard_smp_processor_id(), phys_cpu_present_map);
For faster browsing, not all history is shown. View entire blame