Skip to content

Commit 6f855b3

Browse files
vdonnefortKAGA-KOKO
authored andcommitted
cpu/hotplug: Do not bail-out in DYING/STARTING sections
The DYING/STARTING callbacks are not expected to fail. However, as reported by Derek, buggy drivers such as tboot are still free to return errors within those sections, which halts the hot(un)plug and leaves the CPU in an unrecoverable state. As there is no rollback possible, only log the failures and proceed with the following steps. This restores the hotplug behaviour prior to commit 453e410 ("cpu/hotplug: Add cpuhp_invoke_callback_range()") Fixes: 453e410 ("cpu/hotplug: Add cpuhp_invoke_callback_range()") Reported-by: Derek Dolney <z23@posteo.net> Signed-off-by: Vincent Donnefort <vdonnefort@google.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Derek Dolney <z23@posteo.net> Reviewed-by: Valentin Schneider <vschneid@redhat.com> Link: https://bugzilla.kernel.org/show_bug.cgi?id=215867 Link: https://lore.kernel.org/r/20220927101259.1149636-1-vdonnefort@google.com
1 parent d385feb commit 6f855b3

File tree

1 file changed

+40
-16
lines changed

1 file changed

+40
-16
lines changed

kernel/cpu.c

+40-16
Original file line numberDiff line numberDiff line change
@@ -663,21 +663,51 @@ static bool cpuhp_next_state(bool bringup,
663663
return true;
664664
}
665665

666-
static int cpuhp_invoke_callback_range(bool bringup,
667-
unsigned int cpu,
668-
struct cpuhp_cpu_state *st,
669-
enum cpuhp_state target)
666+
static int __cpuhp_invoke_callback_range(bool bringup,
667+
unsigned int cpu,
668+
struct cpuhp_cpu_state *st,
669+
enum cpuhp_state target,
670+
bool nofail)
670671
{
671672
enum cpuhp_state state;
672-
int err = 0;
673+
int ret = 0;
673674

674675
while (cpuhp_next_state(bringup, &state, st, target)) {
676+
int err;
677+
675678
err = cpuhp_invoke_callback(cpu, state, bringup, NULL, NULL);
676-
if (err)
679+
if (!err)
680+
continue;
681+
682+
if (nofail) {
683+
pr_warn("CPU %u %s state %s (%d) failed (%d)\n",
684+
cpu, bringup ? "UP" : "DOWN",
685+
cpuhp_get_step(st->state)->name,
686+
st->state, err);
687+
ret = -1;
688+
} else {
689+
ret = err;
677690
break;
691+
}
678692
}
679693

680-
return err;
694+
return ret;
695+
}
696+
697+
static inline int cpuhp_invoke_callback_range(bool bringup,
698+
unsigned int cpu,
699+
struct cpuhp_cpu_state *st,
700+
enum cpuhp_state target)
701+
{
702+
return __cpuhp_invoke_callback_range(bringup, cpu, st, target, false);
703+
}
704+
705+
static inline void cpuhp_invoke_callback_range_nofail(bool bringup,
706+
unsigned int cpu,
707+
struct cpuhp_cpu_state *st,
708+
enum cpuhp_state target)
709+
{
710+
__cpuhp_invoke_callback_range(bringup, cpu, st, target, true);
681711
}
682712

683713
static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st)
@@ -999,7 +1029,6 @@ static int take_cpu_down(void *_param)
9991029
struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
10001030
enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
10011031
int err, cpu = smp_processor_id();
1002-
int ret;
10031032

10041033
/* Ensure this CPU doesn't handle any more interrupts. */
10051034
err = __cpu_disable();
@@ -1012,13 +1041,10 @@ static int take_cpu_down(void *_param)
10121041
*/
10131042
WARN_ON(st->state != (CPUHP_TEARDOWN_CPU - 1));
10141043

1015-
/* Invoke the former CPU_DYING callbacks */
1016-
ret = cpuhp_invoke_callback_range(false, cpu, st, target);
1017-
10181044
/*
1019-
* DYING must not fail!
1045+
* Invoke the former CPU_DYING callbacks. DYING must not fail!
10201046
*/
1021-
WARN_ON_ONCE(ret);
1047+
cpuhp_invoke_callback_range_nofail(false, cpu, st, target);
10221048

10231049
/* Give up timekeeping duties */
10241050
tick_handover_do_timer();
@@ -1296,16 +1322,14 @@ void notify_cpu_starting(unsigned int cpu)
12961322
{
12971323
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
12981324
enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
1299-
int ret;
13001325

13011326
rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */
13021327
cpumask_set_cpu(cpu, &cpus_booted_once_mask);
1303-
ret = cpuhp_invoke_callback_range(true, cpu, st, target);
13041328

13051329
/*
13061330
* STARTING must not fail!
13071331
*/
1308-
WARN_ON_ONCE(ret);
1332+
cpuhp_invoke_callback_range_nofail(true, cpu, st, target);
13091333
}
13101334

13111335
/*

0 commit comments

Comments
 (0)