Skip to content

Commit 9592747

Browse files
committed
x86, traps: Track entry into and exit from IST context
We currently pretend that IST context is like standard exception context, but this is incorrect. IST entries from userspace are like standard exceptions except that they use per-cpu stacks, so they are atomic. IST entries from kernel space are like NMIs from RCU's perspective -- they are not quiescent states even if they interrupted the kernel during a quiescent state. Add and use ist_enter and ist_exit to track IST context. Even though x86_32 has no IST stacks, we track these interrupts the same way. This fixes two issues: - Scheduling from an IST interrupt handler will now warn. It would previously appear to work as long as we got lucky and nothing overwrote the stack frame. (I don't know of any bugs in this that would trigger the warning, but it's good to be on the safe side.) - RCU handling in IST context was dangerous. As far as I know, only machine checks were likely to trigger this, but it's good to be on the safe side. Note that the machine check handlers appears to have been missing any context tracking at all before this patch. Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Josh Triplett <josh@joshtriplett.org> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Signed-off-by: Andy Lutomirski <luto@amacapital.net>
1 parent 48e08d0 commit 9592747

File tree

5 files changed

+61
-6
lines changed

5 files changed

+61
-6
lines changed

arch/x86/include/asm/traps.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#ifndef _ASM_X86_TRAPS_H
22
#define _ASM_X86_TRAPS_H
33

4+
#include <linux/context_tracking_state.h>
45
#include <linux/kprobes.h>
56

67
#include <asm/debugreg.h>
@@ -110,6 +111,9 @@ asmlinkage void smp_thermal_interrupt(void);
110111
asmlinkage void mce_threshold_interrupt(void);
111112
#endif
112113

114+
extern enum ctx_state ist_enter(struct pt_regs *regs);
115+
extern void ist_exit(struct pt_regs *regs, enum ctx_state prev_state);
116+
113117
/* Interrupts/Exceptions */
114118
enum {
115119
X86_TRAP_DE = 0, /* 0, Divide-by-zero */

arch/x86/kernel/cpu/mcheck/mce.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
#include <linux/export.h>
4444

4545
#include <asm/processor.h>
46+
#include <asm/traps.h>
4647
#include <asm/mce.h>
4748
#include <asm/msr.h>
4849

@@ -1063,6 +1064,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
10631064
{
10641065
struct mca_config *cfg = &mca_cfg;
10651066
struct mce m, *final;
1067+
enum ctx_state prev_state;
10661068
int i;
10671069
int worst = 0;
10681070
int severity;
@@ -1085,6 +1087,8 @@ void do_machine_check(struct pt_regs *regs, long error_code)
10851087
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
10861088
char *msg = "Unknown";
10871089

1090+
prev_state = ist_enter(regs);
1091+
10881092
this_cpu_inc(mce_exception_count);
10891093

10901094
if (!cfg->banks)
@@ -1216,6 +1220,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
12161220
mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
12171221
out:
12181222
sync_core();
1223+
ist_exit(regs, prev_state);
12191224
}
12201225
EXPORT_SYMBOL_GPL(do_machine_check);
12211226

arch/x86/kernel/cpu/mcheck/p5.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <linux/smp.h>
99

1010
#include <asm/processor.h>
11+
#include <asm/traps.h>
1112
#include <asm/mce.h>
1213
#include <asm/msr.h>
1314

@@ -17,8 +18,11 @@ int mce_p5_enabled __read_mostly;
1718
/* Machine check handler for Pentium class Intel CPUs: */
1819
static void pentium_machine_check(struct pt_regs *regs, long error_code)
1920
{
21+
enum ctx_state prev_state;
2022
u32 loaddr, hi, lotype;
2123

24+
prev_state = ist_enter(regs);
25+
2226
rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
2327
rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
2428

@@ -33,6 +37,8 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
3337
}
3438

3539
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
40+
41+
ist_exit(regs, prev_state);
3642
}
3743

3844
/* Set up machine check reporting for processors with Intel style MCE: */

arch/x86/kernel/cpu/mcheck/winchip.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,19 @@
77
#include <linux/types.h>
88

99
#include <asm/processor.h>
10+
#include <asm/traps.h>
1011
#include <asm/mce.h>
1112
#include <asm/msr.h>
1213

1314
/* Machine check handler for WinChip C6: */
1415
static void winchip_machine_check(struct pt_regs *regs, long error_code)
1516
{
17+
enum ctx_state prev_state = ist_enter(regs);
18+
1619
printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
1720
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
21+
22+
ist_exit(regs, prev_state);
1823
}
1924

2025
/* Set up machine check reporting on the Winchip C6 series */

arch/x86/kernel/traps.c

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,39 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
108108
preempt_count_dec();
109109
}
110110

111+
enum ctx_state ist_enter(struct pt_regs *regs)
112+
{
113+
/*
114+
* We are atomic because we're on the IST stack (or we're on x86_32,
115+
* in which case we still shouldn't schedule.
116+
*/
117+
preempt_count_add(HARDIRQ_OFFSET);
118+
119+
if (user_mode_vm(regs)) {
120+
/* Other than that, we're just an exception. */
121+
return exception_enter();
122+
} else {
123+
/*
124+
* We might have interrupted pretty much anything. In
125+
* fact, if we're a machine check, we can even interrupt
126+
* NMI processing. We don't want in_nmi() to return true,
127+
* but we need to notify RCU.
128+
*/
129+
rcu_nmi_enter();
130+
return IN_KERNEL; /* the value is irrelevant. */
131+
}
132+
}
133+
134+
void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
135+
{
136+
preempt_count_sub(HARDIRQ_OFFSET);
137+
138+
if (user_mode_vm(regs))
139+
return exception_exit(prev_state);
140+
else
141+
rcu_nmi_exit();
142+
}
143+
111144
static nokprobe_inline int
112145
do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
113146
struct pt_regs *regs, long error_code)
@@ -251,6 +284,8 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
251284
* end up promoting it to a doublefault. In that case, modify
252285
* the stack to make it look like we just entered the #GP
253286
* handler from user space, similar to bad_iret.
287+
*
288+
* No need for ist_enter here because we don't use RCU.
254289
*/
255290
if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY &&
256291
regs->cs == __KERNEL_CS &&
@@ -263,12 +298,12 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
263298
normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */
264299
regs->ip = (unsigned long)general_protection;
265300
regs->sp = (unsigned long)&normal_regs->orig_ax;
301+
266302
return;
267303
}
268304
#endif
269305

270-
exception_enter();
271-
/* Return not checked because double check cannot be ignored */
306+
ist_enter(regs); /* Discard prev_state because we won't return. */
272307
notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
273308

274309
tsk->thread.error_code = error_code;
@@ -434,7 +469,7 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
434469
if (poke_int3_handler(regs))
435470
return;
436471

437-
prev_state = exception_enter();
472+
prev_state = ist_enter(regs);
438473
#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
439474
if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
440475
SIGTRAP) == NOTIFY_STOP)
@@ -460,7 +495,7 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
460495
preempt_conditional_cli(regs);
461496
debug_stack_usage_dec();
462497
exit:
463-
exception_exit(prev_state);
498+
ist_exit(regs, prev_state);
464499
}
465500
NOKPROBE_SYMBOL(do_int3);
466501

@@ -541,7 +576,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
541576
unsigned long dr6;
542577
int si_code;
543578

544-
prev_state = exception_enter();
579+
prev_state = ist_enter(regs);
545580

546581
get_debugreg(dr6, 6);
547582

@@ -616,7 +651,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
616651
debug_stack_usage_dec();
617652

618653
exit:
619-
exception_exit(prev_state);
654+
ist_exit(regs, prev_state);
620655
}
621656
NOKPROBE_SYMBOL(do_debug);
622657

0 commit comments

Comments
 (0)