Skip to content

Commit ce4b1b1

Browse files
Igor MammedovIngo Molnar
authored andcommitted
x86/smpboot: Initialize secondary CPU only if master CPU will wait for it
Hang is observed on virtual machines during CPU hotplug, especially in big guests with many CPUs. (It reproducible more often if host is over-committed). It happens because master CPU gives up waiting on secondary CPU and allows it to run wild. As result AP causes locking or crashing system. For example as described here: https://lkml.org/lkml/2014/3/6/257 If master CPU have sent STARTUP IPI successfully, and AP signalled to master CPU that it's ready to start initialization, make master CPU wait indefinitely till AP is onlined. To ensure that AP won't ever run wild, make it wait at early startup till master CPU confirms its intention to wait for AP. If AP doesn't respond in 10 seconds, the master CPU will timeout and cancel AP onlining. Signed-off-by: Igor Mammedov <imammedo@redhat.com> Acked-by: Toshi Kani <toshi.kani@hp.com> Tested-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1403266991-12233-1-git-send-email-imammedo@redhat.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent 9e82bf0 commit ce4b1b1

File tree

3 files changed

+50
-79
lines changed

3 files changed

+50
-79
lines changed

arch/x86/kernel/cpu/common.c

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1266,6 +1266,19 @@ static void dbg_restore_debug_regs(void)
12661266
#define dbg_restore_debug_regs()
12671267
#endif /* ! CONFIG_KGDB */
12681268

1269+
static void wait_for_master_cpu(int cpu)
1270+
{
1271+
#ifdef CONFIG_SMP
1272+
/*
1273+
* wait for ACK from master CPU before continuing
1274+
* with AP initialization
1275+
*/
1276+
WARN_ON(cpumask_test_and_set_cpu(cpu, cpu_initialized_mask));
1277+
while (!cpumask_test_cpu(cpu, cpu_callout_mask))
1278+
cpu_relax();
1279+
#endif
1280+
}
1281+
12691282
/*
12701283
* cpu_init() initializes state that is per-CPU. Some data is already
12711284
* initialized (naturally) in the bootstrap process, such as the GDT
@@ -1281,16 +1294,17 @@ void cpu_init(void)
12811294
struct task_struct *me;
12821295
struct tss_struct *t;
12831296
unsigned long v;
1284-
int cpu;
1297+
int cpu = stack_smp_processor_id();
12851298
int i;
12861299

1300+
wait_for_master_cpu(cpu);
1301+
12871302
/*
12881303
* Load microcode on this cpu if a valid microcode is available.
12891304
* This is early microcode loading procedure.
12901305
*/
12911306
load_ucode_ap();
12921307

1293-
cpu = stack_smp_processor_id();
12941308
t = &per_cpu(init_tss, cpu);
12951309
oist = &per_cpu(orig_ist, cpu);
12961310

@@ -1302,9 +1316,6 @@ void cpu_init(void)
13021316

13031317
me = current;
13041318

1305-
if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask))
1306-
panic("CPU#%d already initialized!\n", cpu);
1307-
13081319
pr_debug("Initializing CPU#%d\n", cpu);
13091320

13101321
clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
@@ -1381,13 +1392,9 @@ void cpu_init(void)
13811392
struct tss_struct *t = &per_cpu(init_tss, cpu);
13821393
struct thread_struct *thread = &curr->thread;
13831394

1384-
show_ucode_info_early();
1395+
wait_for_master_cpu(cpu);
13851396

1386-
if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) {
1387-
printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
1388-
for (;;)
1389-
local_irq_enable();
1390-
}
1397+
show_ucode_info_early();
13911398

13921399
printk(KERN_INFO "Initializing CPU#%d\n", cpu);
13931400

arch/x86/kernel/smpboot.c

Lines changed: 30 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,6 @@ atomic_t init_deasserted;
111111
static void smp_callin(void)
112112
{
113113
int cpuid, phys_id;
114-
unsigned long timeout;
115114

116115
/*
117116
* If waken up by an INIT in an 82489DX configuration
@@ -130,37 +129,6 @@ static void smp_callin(void)
130129
* (This works even if the APIC is not enabled.)
131130
*/
132131
phys_id = read_apic_id();
133-
if (cpumask_test_cpu(cpuid, cpu_callin_mask)) {
134-
panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
135-
phys_id, cpuid);
136-
}
137-
pr_debug("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
138-
139-
/*
140-
* STARTUP IPIs are fragile beasts as they might sometimes
141-
* trigger some glue motherboard logic. Complete APIC bus
142-
* silence for 1 second, this overestimates the time the
143-
* boot CPU is spending to send the up to 2 STARTUP IPIs
144-
* by a factor of two. This should be enough.
145-
*/
146-
147-
/*
148-
* Waiting 2s total for startup (udelay is not yet working)
149-
*/
150-
timeout = jiffies + 2*HZ;
151-
while (time_before(jiffies, timeout)) {
152-
/*
153-
* Has the boot CPU finished it's STARTUP sequence?
154-
*/
155-
if (cpumask_test_cpu(cpuid, cpu_callout_mask))
156-
break;
157-
cpu_relax();
158-
}
159-
160-
if (!time_before(jiffies, timeout)) {
161-
panic("%s: CPU%d started up but did not get a callout!\n",
162-
__func__, cpuid);
163-
}
164132

165133
/*
166134
* the boot CPU has finished the init stage and is spinning
@@ -753,8 +721,8 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
753721
unsigned long start_ip = real_mode_header->trampoline_start;
754722

755723
unsigned long boot_error = 0;
756-
int timeout;
757724
int cpu0_nmi_registered = 0;
725+
unsigned long timeout;
758726

759727
/* Just in case we booted with a single CPU. */
760728
alternatives_enable_smp();
@@ -801,6 +769,15 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
801769
}
802770
}
803771

772+
/*
773+
* AP might wait on cpu_callout_mask in cpu_init() with
774+
* cpu_initialized_mask set if previous attempt to online
775+
* it timed-out. Clear cpu_initialized_mask so that after
776+
* INIT/SIPI it could start with a clean state.
777+
*/
778+
cpumask_clear_cpu(cpu, cpu_initialized_mask);
779+
smp_mb();
780+
804781
/*
805782
* Wake up a CPU in difference cases:
806783
* - Use the method in the APIC driver if it's defined
@@ -815,53 +792,38 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
815792

816793
if (!boot_error) {
817794
/*
818-
* allow APs to start initializing.
795+
* Wait 10s total for a response from AP
819796
*/
820-
pr_debug("Before Callout %d\n", cpu);
821-
cpumask_set_cpu(cpu, cpu_callout_mask);
822-
pr_debug("After Callout %d\n", cpu);
797+
boot_error = -1;
798+
timeout = jiffies + 10*HZ;
799+
while (time_before(jiffies, timeout)) {
800+
if (cpumask_test_cpu(cpu, cpu_initialized_mask)) {
801+
/*
802+
* Tell AP to proceed with initialization
803+
*/
804+
cpumask_set_cpu(cpu, cpu_callout_mask);
805+
boot_error = 0;
806+
break;
807+
}
808+
udelay(100);
809+
schedule();
810+
}
811+
}
823812

813+
if (!boot_error) {
824814
/*
825-
* Wait 5s total for a response
815+
* Wait till AP completes initial initialization
826816
*/
827-
for (timeout = 0; timeout < 50000; timeout++) {
828-
if (cpumask_test_cpu(cpu, cpu_callin_mask))
829-
break; /* It has booted */
830-
udelay(100);
817+
while (!cpumask_test_cpu(cpu, cpu_callin_mask)) {
831818
/*
832819
* Allow other tasks to run while we wait for the
833820
* AP to come online. This also gives a chance
834821
* for the MTRR work(triggered by the AP coming online)
835822
* to be completed in the stop machine context.
836823
*/
824+
udelay(100);
837825
schedule();
838826
}
839-
840-
if (cpumask_test_cpu(cpu, cpu_callin_mask)) {
841-
print_cpu_msr(&cpu_data(cpu));
842-
pr_debug("CPU%d: has booted.\n", cpu);
843-
} else {
844-
boot_error = 1;
845-
if (*trampoline_status == 0xA5A5A5A5)
846-
/* trampoline started but...? */
847-
pr_err("CPU%d: Stuck ??\n", cpu);
848-
else
849-
/* trampoline code not run */
850-
pr_err("CPU%d: Not responding\n", cpu);
851-
if (apic->inquire_remote_apic)
852-
apic->inquire_remote_apic(apicid);
853-
}
854-
}
855-
856-
if (boot_error) {
857-
/* Try to put things back the way they were before ... */
858-
numa_remove_cpu(cpu); /* was set by numa_add_cpu */
859-
860-
/* was set by do_boot_cpu() */
861-
cpumask_clear_cpu(cpu, cpu_callout_mask);
862-
863-
/* was set by cpu_init() */
864-
cpumask_clear_cpu(cpu, cpu_initialized_mask);
865827
}
866828

867829
/* mark "stuck" area as not stuck */

arch/x86/xen/smp.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
360360
struct desc_struct *gdt;
361361
unsigned long gdt_mfn;
362362

363+
/* used to tell cpu_init() that it can proceed with initialization */
364+
cpumask_set_cpu(cpu, cpu_callout_mask);
363365
if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map))
364366
return 0;
365367

0 commit comments

Comments
 (0)