Skip to content

Commit 7a33cd0

Browse files
Masamitsu Yamazakigregkh
authored andcommitted
ipmi: Stop timers before cleaning up the module
commit 4f7f555 upstream. System may crash after unloading ipmi_si.ko module because a timer may remain and fire after the module cleaned up resources. cleanup_one_si() contains the following processing. /* * Make sure that interrupts, the timer and the thread are * stopped and will not run again. */ if (to_clean->irq_cleanup) to_clean->irq_cleanup(to_clean); wait_for_timer_and_thread(to_clean); /* * Timeouts are stopped, now make sure the interrupts are off * in the BMC. Note that timers and CPU interrupts are off, * so no need for locks. */ while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) { poll(to_clean); schedule_timeout_uninterruptible(1); } si_state changes as following in the while loop calling poll(to_clean). SI_GETTING_MESSAGES => SI_CHECKING_ENABLES => SI_SETTING_ENABLES => SI_GETTING_EVENTS => SI_NORMAL As written in the code comments above, timers are expected to stop before the polling loop and not to run again. But the timer is set again in the following process when si_state becomes SI_SETTING_ENABLES. => poll => smi_event_handler => handle_transaction_done // smi_info->si_state == SI_SETTING_ENABLES => start_getting_events => start_new_msg => smi_mod_timer => mod_timer As a result, before the timer set in start_new_msg() expires, the polling loop may see si_state becoming SI_NORMAL and the module clean-up finishes. For example, hard LOCKUP and panic occurred as following. smi_timeout was called after smi_event_handler, kcs_event and hangs at port_inb() trying to access I/O port after release. [exception RIP: port_inb+19] RIP: ffffffffc0473053 RSP: ffff88069fdc3d80 RFLAGS: 00000006 RAX: ffff8806800f8e00 RBX: ffff880682bd9400 RCX: 0000000000000000 RDX: 0000000000000ca3 RSI: 0000000000000ca3 RDI: ffff8806800f8e40 RBP: ffff88069fdc3d80 R8: ffffffff81d86dfc R9: ffffffff81e36426 R10: 00000000000509f0 R11: 0000000000100000 R12: 0000000000]:000000 R13: 0000000000000000 R14: 0000000000000246 R15: ffff8806800f8e00 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0000 --- <NMI exception stack> --- To fix the problem I defined a flag, timer_can_start, as member of struct smi_info. The flag is enabled immediately after initializing the timer and disabled immediately before waiting for timer deletion. Fixes: 0cfec91 ("ipmi: Start the timer and thread on internal msgs") Signed-off-by: Yamazaki Masamitsu <m-yamazaki@ah.jp.nec.com> [Some fairly major changes went into the IPMI driver in 4.15, so this required a backport as the code had changed and moved to a different file.] Signed-off-by: Corey Minyard <cminyard@mvista.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
1 parent 0c88233 commit 7a33cd0

File tree

1 file changed

+23
-21
lines changed

1 file changed

+23
-21
lines changed

drivers/char/ipmi/ipmi_si_intf.c

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,9 @@ struct smi_info {
242242
/* The timer for this si. */
243243
struct timer_list si_timer;
244244

245+
/* This flag is set, if the timer can be set */
246+
bool timer_can_start;
247+
245248
/* This flag is set, if the timer is running (timer_pending() isn't enough) */
246249
bool timer_running;
247250

@@ -417,6 +420,8 @@ static enum si_sm_result start_next_msg(struct smi_info *smi_info)
417420

418421
static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val)
419422
{
423+
if (!smi_info->timer_can_start)
424+
return;
420425
smi_info->last_timeout_jiffies = jiffies;
421426
mod_timer(&smi_info->si_timer, new_val);
422427
smi_info->timer_running = true;
@@ -436,21 +441,18 @@ static void start_new_msg(struct smi_info *smi_info, unsigned char *msg,
436441
smi_info->handlers->start_transaction(smi_info->si_sm, msg, size);
437442
}
438443

439-
static void start_check_enables(struct smi_info *smi_info, bool start_timer)
444+
static void start_check_enables(struct smi_info *smi_info)
440445
{
441446
unsigned char msg[2];
442447

443448
msg[0] = (IPMI_NETFN_APP_REQUEST << 2);
444449
msg[1] = IPMI_GET_BMC_GLOBAL_ENABLES_CMD;
445450

446-
if (start_timer)
447-
start_new_msg(smi_info, msg, 2);
448-
else
449-
smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2);
451+
start_new_msg(smi_info, msg, 2);
450452
smi_info->si_state = SI_CHECKING_ENABLES;
451453
}
452454

453-
static void start_clear_flags(struct smi_info *smi_info, bool start_timer)
455+
static void start_clear_flags(struct smi_info *smi_info)
454456
{
455457
unsigned char msg[3];
456458

@@ -459,10 +461,7 @@ static void start_clear_flags(struct smi_info *smi_info, bool start_timer)
459461
msg[1] = IPMI_CLEAR_MSG_FLAGS_CMD;
460462
msg[2] = WDT_PRE_TIMEOUT_INT;
461463

462-
if (start_timer)
463-
start_new_msg(smi_info, msg, 3);
464-
else
465-
smi_info->handlers->start_transaction(smi_info->si_sm, msg, 3);
464+
start_new_msg(smi_info, msg, 3);
466465
smi_info->si_state = SI_CLEARING_FLAGS;
467466
}
468467

@@ -497,11 +496,11 @@ static void start_getting_events(struct smi_info *smi_info)
497496
* Note that we cannot just use disable_irq(), since the interrupt may
498497
* be shared.
499498
*/
500-
static inline bool disable_si_irq(struct smi_info *smi_info, bool start_timer)
499+
static inline bool disable_si_irq(struct smi_info *smi_info)
501500
{
502501
if ((smi_info->irq) && (!smi_info->interrupt_disabled)) {
503502
smi_info->interrupt_disabled = true;
504-
start_check_enables(smi_info, start_timer);
503+
start_check_enables(smi_info);
505504
return true;
506505
}
507506
return false;
@@ -511,7 +510,7 @@ static inline bool enable_si_irq(struct smi_info *smi_info)
511510
{
512511
if ((smi_info->irq) && (smi_info->interrupt_disabled)) {
513512
smi_info->interrupt_disabled = false;
514-
start_check_enables(smi_info, true);
513+
start_check_enables(smi_info);
515514
return true;
516515
}
517516
return false;
@@ -529,7 +528,7 @@ static struct ipmi_smi_msg *alloc_msg_handle_irq(struct smi_info *smi_info)
529528

530529
msg = ipmi_alloc_smi_msg();
531530
if (!msg) {
532-
if (!disable_si_irq(smi_info, true))
531+
if (!disable_si_irq(smi_info))
533532
smi_info->si_state = SI_NORMAL;
534533
} else if (enable_si_irq(smi_info)) {
535534
ipmi_free_smi_msg(msg);
@@ -545,7 +544,7 @@ static void handle_flags(struct smi_info *smi_info)
545544
/* Watchdog pre-timeout */
546545
smi_inc_stat(smi_info, watchdog_pretimeouts);
547546

548-
start_clear_flags(smi_info, true);
547+
start_clear_flags(smi_info);
549548
smi_info->msg_flags &= ~WDT_PRE_TIMEOUT_INT;
550549
if (smi_info->intf)
551550
ipmi_smi_watchdog_pretimeout(smi_info->intf);
@@ -928,7 +927,7 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info,
928927
* disable and messages disabled.
929928
*/
930929
if (smi_info->supports_event_msg_buff || smi_info->irq) {
931-
start_check_enables(smi_info, true);
930+
start_check_enables(smi_info);
932931
} else {
933932
smi_info->curr_msg = alloc_msg_handle_irq(smi_info);
934933
if (!smi_info->curr_msg)
@@ -1235,6 +1234,7 @@ static int smi_start_processing(void *send_info,
12351234

12361235
/* Set up the timer that drives the interface. */
12371236
setup_timer(&new_smi->si_timer, smi_timeout, (long)new_smi);
1237+
new_smi->timer_can_start = true;
12381238
smi_mod_timer(new_smi, jiffies + SI_TIMEOUT_JIFFIES);
12391239

12401240
/* Try to claim any interrupts. */
@@ -3416,10 +3416,12 @@ static void check_for_broken_irqs(struct smi_info *smi_info)
34163416
check_set_rcv_irq(smi_info);
34173417
}
34183418

3419-
static inline void wait_for_timer_and_thread(struct smi_info *smi_info)
3419+
static inline void stop_timer_and_thread(struct smi_info *smi_info)
34203420
{
34213421
if (smi_info->thread != NULL)
34223422
kthread_stop(smi_info->thread);
3423+
3424+
smi_info->timer_can_start = false;
34233425
if (smi_info->timer_running)
34243426
del_timer_sync(&smi_info->si_timer);
34253427
}
@@ -3605,7 +3607,7 @@ static int try_smi_init(struct smi_info *new_smi)
36053607
* Start clearing the flags before we enable interrupts or the
36063608
* timer to avoid racing with the timer.
36073609
*/
3608-
start_clear_flags(new_smi, false);
3610+
start_clear_flags(new_smi);
36093611

36103612
/*
36113613
* IRQ is defined to be set when non-zero. req_events will
@@ -3674,7 +3676,7 @@ static int try_smi_init(struct smi_info *new_smi)
36743676
return 0;
36753677

36763678
out_err_stop_timer:
3677-
wait_for_timer_and_thread(new_smi);
3679+
stop_timer_and_thread(new_smi);
36783680

36793681
out_err:
36803682
new_smi->interrupt_disabled = true;
@@ -3866,7 +3868,7 @@ static void cleanup_one_si(struct smi_info *to_clean)
38663868
*/
38673869
if (to_clean->irq_cleanup)
38683870
to_clean->irq_cleanup(to_clean);
3869-
wait_for_timer_and_thread(to_clean);
3871+
stop_timer_and_thread(to_clean);
38703872

38713873
/*
38723874
* Timeouts are stopped, now make sure the interrupts are off
@@ -3878,7 +3880,7 @@ static void cleanup_one_si(struct smi_info *to_clean)
38783880
schedule_timeout_uninterruptible(1);
38793881
}
38803882
if (to_clean->handlers)
3881-
disable_si_irq(to_clean, false);
3883+
disable_si_irq(to_clean);
38823884
while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) {
38833885
poll(to_clean);
38843886
schedule_timeout_uninterruptible(1);

0 commit comments

Comments
 (0)