Skip to content

Commit 4f7f555

Browse files
Masamitsu Yamazakicminyard
authored andcommitted
ipmi: Stop timers before cleaning up the module
System may crash after unloading ipmi_si.ko module because a timer may remain and fire after the module cleaned up resources. cleanup_one_si() contains the following processing. /* * Make sure that interrupts, the timer and the thread are * stopped and will not run again. */ if (to_clean->irq_cleanup) to_clean->irq_cleanup(to_clean); wait_for_timer_and_thread(to_clean); /* * Timeouts are stopped, now make sure the interrupts are off * in the BMC. Note that timers and CPU interrupts are off, * so no need for locks. */ while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) { poll(to_clean); schedule_timeout_uninterruptible(1); } si_state changes as following in the while loop calling poll(to_clean). SI_GETTING_MESSAGES => SI_CHECKING_ENABLES => SI_SETTING_ENABLES => SI_GETTING_EVENTS => SI_NORMAL As written in the code comments above, timers are expected to stop before the polling loop and not to run again. But the timer is set again in the following process when si_state becomes SI_SETTING_ENABLES. => poll => smi_event_handler => handle_transaction_done // smi_info->si_state == SI_SETTING_ENABLES => start_getting_events => start_new_msg => smi_mod_timer => mod_timer As a result, before the timer set in start_new_msg() expires, the polling loop may see si_state becoming SI_NORMAL and the module clean-up finishes. For example, hard LOCKUP and panic occurred as following. smi_timeout was called after smi_event_handler, kcs_event and hangs at port_inb() trying to access I/O port after release. [exception RIP: port_inb+19] RIP: ffffffffc0473053 RSP: ffff88069fdc3d80 RFLAGS: 00000006 RAX: ffff8806800f8e00 RBX: ffff880682bd9400 RCX: 0000000000000000 RDX: 0000000000000ca3 RSI: 0000000000000ca3 RDI: ffff8806800f8e40 RBP: ffff88069fdc3d80 R8: ffffffff81d86dfc R9: ffffffff81e36426 R10: 00000000000509f0 R11: 0000000000100000 R12: 0000000000]:000000 R13: 0000000000000000 R14: 0000000000000246 R15: ffff8806800f8e00 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0000 --- <NMI exception stack> --- To fix the problem I defined a flag, timer_can_start, as member of struct smi_info. The flag is enabled immediately after initializing the timer and disabled immediately before waiting for timer deletion. Fixes: 0cfec91 ("ipmi: Start the timer and thread on internal msgs") Signed-off-by: Yamazaki Masamitsu <m-yamazaki@ah.jp.nec.com> [Adjusted for recent changes in the driver.] Signed-off-by: Corey Minyard <cminyard@mvista.com>
1 parent 6363b3f commit 4f7f555

File tree

1 file changed

+23
-21
lines changed

1 file changed

+23
-21
lines changed

drivers/char/ipmi/ipmi_si_intf.c

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,9 @@ struct smi_info {
199199
/* The timer for this si. */
200200
struct timer_list si_timer;
201201

202+
/* This flag is set, if the timer can be set */
203+
bool timer_can_start;
204+
202205
/* This flag is set, if the timer is running (timer_pending() isn't enough) */
203206
bool timer_running;
204207

@@ -355,6 +358,8 @@ static enum si_sm_result start_next_msg(struct smi_info *smi_info)
355358

356359
static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val)
357360
{
361+
if (!smi_info->timer_can_start)
362+
return;
358363
smi_info->last_timeout_jiffies = jiffies;
359364
mod_timer(&smi_info->si_timer, new_val);
360365
smi_info->timer_running = true;
@@ -374,21 +379,18 @@ static void start_new_msg(struct smi_info *smi_info, unsigned char *msg,
374379
smi_info->handlers->start_transaction(smi_info->si_sm, msg, size);
375380
}
376381

377-
static void start_check_enables(struct smi_info *smi_info, bool start_timer)
382+
static void start_check_enables(struct smi_info *smi_info)
378383
{
379384
unsigned char msg[2];
380385

381386
msg[0] = (IPMI_NETFN_APP_REQUEST << 2);
382387
msg[1] = IPMI_GET_BMC_GLOBAL_ENABLES_CMD;
383388

384-
if (start_timer)
385-
start_new_msg(smi_info, msg, 2);
386-
else
387-
smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2);
389+
start_new_msg(smi_info, msg, 2);
388390
smi_info->si_state = SI_CHECKING_ENABLES;
389391
}
390392

391-
static void start_clear_flags(struct smi_info *smi_info, bool start_timer)
393+
static void start_clear_flags(struct smi_info *smi_info)
392394
{
393395
unsigned char msg[3];
394396

@@ -397,10 +399,7 @@ static void start_clear_flags(struct smi_info *smi_info, bool start_timer)
397399
msg[1] = IPMI_CLEAR_MSG_FLAGS_CMD;
398400
msg[2] = WDT_PRE_TIMEOUT_INT;
399401

400-
if (start_timer)
401-
start_new_msg(smi_info, msg, 3);
402-
else
403-
smi_info->handlers->start_transaction(smi_info->si_sm, msg, 3);
402+
start_new_msg(smi_info, msg, 3);
404403
smi_info->si_state = SI_CLEARING_FLAGS;
405404
}
406405

@@ -435,11 +434,11 @@ static void start_getting_events(struct smi_info *smi_info)
435434
* Note that we cannot just use disable_irq(), since the interrupt may
436435
* be shared.
437436
*/
438-
static inline bool disable_si_irq(struct smi_info *smi_info, bool start_timer)
437+
static inline bool disable_si_irq(struct smi_info *smi_info)
439438
{
440439
if ((smi_info->io.irq) && (!smi_info->interrupt_disabled)) {
441440
smi_info->interrupt_disabled = true;
442-
start_check_enables(smi_info, start_timer);
441+
start_check_enables(smi_info);
443442
return true;
444443
}
445444
return false;
@@ -449,7 +448,7 @@ static inline bool enable_si_irq(struct smi_info *smi_info)
449448
{
450449
if ((smi_info->io.irq) && (smi_info->interrupt_disabled)) {
451450
smi_info->interrupt_disabled = false;
452-
start_check_enables(smi_info, true);
451+
start_check_enables(smi_info);
453452
return true;
454453
}
455454
return false;
@@ -467,7 +466,7 @@ static struct ipmi_smi_msg *alloc_msg_handle_irq(struct smi_info *smi_info)
467466

468467
msg = ipmi_alloc_smi_msg();
469468
if (!msg) {
470-
if (!disable_si_irq(smi_info, true))
469+
if (!disable_si_irq(smi_info))
471470
smi_info->si_state = SI_NORMAL;
472471
} else if (enable_si_irq(smi_info)) {
473472
ipmi_free_smi_msg(msg);
@@ -483,7 +482,7 @@ static void handle_flags(struct smi_info *smi_info)
483482
/* Watchdog pre-timeout */
484483
smi_inc_stat(smi_info, watchdog_pretimeouts);
485484

486-
start_clear_flags(smi_info, true);
485+
start_clear_flags(smi_info);
487486
smi_info->msg_flags &= ~WDT_PRE_TIMEOUT_INT;
488487
if (smi_info->intf)
489488
ipmi_smi_watchdog_pretimeout(smi_info->intf);
@@ -866,7 +865,7 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info,
866865
* disable and messages disabled.
867866
*/
868867
if (smi_info->supports_event_msg_buff || smi_info->io.irq) {
869-
start_check_enables(smi_info, true);
868+
start_check_enables(smi_info);
870869
} else {
871870
smi_info->curr_msg = alloc_msg_handle_irq(smi_info);
872871
if (!smi_info->curr_msg)
@@ -1167,6 +1166,7 @@ static int smi_start_processing(void *send_info,
11671166

11681167
/* Set up the timer that drives the interface. */
11691168
setup_timer(&new_smi->si_timer, smi_timeout, (long)new_smi);
1169+
new_smi->timer_can_start = true;
11701170
smi_mod_timer(new_smi, jiffies + SI_TIMEOUT_JIFFIES);
11711171

11721172
/* Try to claim any interrupts. */
@@ -1936,10 +1936,12 @@ static void check_for_broken_irqs(struct smi_info *smi_info)
19361936
check_set_rcv_irq(smi_info);
19371937
}
19381938

1939-
static inline void wait_for_timer_and_thread(struct smi_info *smi_info)
1939+
static inline void stop_timer_and_thread(struct smi_info *smi_info)
19401940
{
19411941
if (smi_info->thread != NULL)
19421942
kthread_stop(smi_info->thread);
1943+
1944+
smi_info->timer_can_start = false;
19431945
if (smi_info->timer_running)
19441946
del_timer_sync(&smi_info->si_timer);
19451947
}
@@ -2152,7 +2154,7 @@ static int try_smi_init(struct smi_info *new_smi)
21522154
* Start clearing the flags before we enable interrupts or the
21532155
* timer to avoid racing with the timer.
21542156
*/
2155-
start_clear_flags(new_smi, false);
2157+
start_clear_flags(new_smi);
21562158

21572159
/*
21582160
* IRQ is defined to be set when non-zero. req_events will
@@ -2238,7 +2240,7 @@ static int try_smi_init(struct smi_info *new_smi)
22382240
dev_set_drvdata(new_smi->io.dev, NULL);
22392241

22402242
out_err_stop_timer:
2241-
wait_for_timer_and_thread(new_smi);
2243+
stop_timer_and_thread(new_smi);
22422244

22432245
out_err:
22442246
new_smi->interrupt_disabled = true;
@@ -2388,7 +2390,7 @@ static void cleanup_one_si(struct smi_info *to_clean)
23882390
*/
23892391
if (to_clean->io.irq_cleanup)
23902392
to_clean->io.irq_cleanup(&to_clean->io);
2391-
wait_for_timer_and_thread(to_clean);
2393+
stop_timer_and_thread(to_clean);
23922394

23932395
/*
23942396
* Timeouts are stopped, now make sure the interrupts are off
@@ -2400,7 +2402,7 @@ static void cleanup_one_si(struct smi_info *to_clean)
24002402
schedule_timeout_uninterruptible(1);
24012403
}
24022404
if (to_clean->handlers)
2403-
disable_si_irq(to_clean, false);
2405+
disable_si_irq(to_clean);
24042406
while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) {
24052407
poll(to_clean);
24062408
schedule_timeout_uninterruptible(1);

0 commit comments

Comments
 (0)