Skip to content

Commit 00a5ae2

Browse files
committed
Merge branch 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 page table isolation fixes from Thomas Gleixner: "A couple of urgent fixes for PTI: - Fix a PTE mismatch between user and kernel visible mapping of the cpu entry area (differs vs. the GLB bit) and causes a TLB mismatch MCE on older AMD K8 machines - Fix the misplaced CR3 switch in the SYSCALL compat entry code which causes access to unmapped kernel memory resulting in double faults. - Fix the section mismatch of the cpu_tss_rw percpu storage caused by using a different mechanism for declaration and definition. - Two fixes for dumpstack which help to decode entry stack issues better - Enable PTI by default in Kconfig. We should have done that earlier, but it slipped through the cracks. - Exclude AMD from the PTI enforcement. Not necessarily a fix, but if AMD is so confident that they are not affected, then we should not burden users with the overhead" * 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/process: Define cpu_tss_rw in same section as declaration x86/pti: Switch to kernel CR3 at early in entry_SYSCALL_compat() x86/dumpstack: Print registers for first stack frame x86/dumpstack: Fix partial register dumps x86/pti: Make sure the user/kernel PTEs match x86/cpu, x86/pti: Do not enable PTI on AMD processors x86/pti: Enable PTI by default
2 parents d6bbd51 + 2fd9c41 commit 00a5ae2

File tree

8 files changed

+48
-25
lines changed

8 files changed

+48
-25
lines changed

arch/x86/entry/entry_64_compat.S

+6-7
Original file line numberDiff line numberDiff line change
@@ -190,8 +190,13 @@ ENTRY(entry_SYSCALL_compat)
190190
/* Interrupts are off on entry. */
191191
swapgs
192192

193-
/* Stash user ESP and switch to the kernel stack. */
193+
/* Stash user ESP */
194194
movl %esp, %r8d
195+
196+
/* Use %rsp as scratch reg. User ESP is stashed in r8 */
197+
SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
198+
199+
/* Switch to the kernel stack */
195200
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
196201

197202
/* Construct struct pt_regs on stack */
@@ -219,12 +224,6 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
219224
pushq $0 /* pt_regs->r14 = 0 */
220225
pushq $0 /* pt_regs->r15 = 0 */
221226

222-
/*
223-
* We just saved %rdi so it is safe to clobber. It is not
224-
* preserved during the C calls inside TRACE_IRQS_OFF anyway.
225-
*/
226-
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
227-
228227
/*
229228
* User mode is traced as though IRQs are on, and SYSENTER
230229
* turned them off.

arch/x86/include/asm/unwind.h

+13-4
Original file line numberDiff line numberDiff line change
@@ -56,18 +56,27 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
5656

5757
#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
5858
/*
59-
* WARNING: The entire pt_regs may not be safe to dereference. In some cases,
60-
* only the iret frame registers are accessible. Use with caution!
59+
* If 'partial' returns true, only the iret frame registers are valid.
6160
*/
62-
static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
61+
static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state,
62+
bool *partial)
6363
{
6464
if (unwind_done(state))
6565
return NULL;
6666

67+
if (partial) {
68+
#ifdef CONFIG_UNWINDER_ORC
69+
*partial = !state->full_regs;
70+
#else
71+
*partial = false;
72+
#endif
73+
}
74+
6775
return state->regs;
6876
}
6977
#else
70-
static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
78+
static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state,
79+
bool *partial)
7180
{
7281
return NULL;
7382
}

arch/x86/kernel/cpu/common.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -923,8 +923,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
923923

924924
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
925925

926-
/* Assume for now that ALL x86 CPUs are insecure */
927-
setup_force_cpu_bug(X86_BUG_CPU_INSECURE);
926+
if (c->x86_vendor != X86_VENDOR_AMD)
927+
setup_force_cpu_bug(X86_BUG_CPU_INSECURE);
928928

929929
fpu__init_system(c);
930930

arch/x86/kernel/dumpstack.c

+22-9
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,23 @@ void show_iret_regs(struct pt_regs *regs)
7676
regs->sp, regs->flags);
7777
}
7878

79-
static void show_regs_safe(struct stack_info *info, struct pt_regs *regs)
79+
static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs,
80+
bool partial)
8081
{
81-
if (on_stack(info, regs, sizeof(*regs)))
82+
/*
83+
* These on_stack() checks aren't strictly necessary: the unwind code
84+
* has already validated the 'regs' pointer. The checks are done for
85+
* ordering reasons: if the registers are on the next stack, we don't
86+
* want to print them out yet. Otherwise they'll be shown as part of
87+
* the wrong stack. Later, when show_trace_log_lvl() switches to the
88+
* next stack, this function will be called again with the same regs so
89+
* they can be printed in the right context.
90+
*/
91+
if (!partial && on_stack(info, regs, sizeof(*regs))) {
8292
__show_regs(regs, 0);
83-
else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
84-
IRET_FRAME_SIZE)) {
93+
94+
} else if (partial && on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
95+
IRET_FRAME_SIZE)) {
8596
/*
8697
* When an interrupt or exception occurs in entry code, the
8798
* full pt_regs might not have been saved yet. In that case
@@ -98,11 +109,13 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
98109
struct stack_info stack_info = {0};
99110
unsigned long visit_mask = 0;
100111
int graph_idx = 0;
112+
bool partial;
101113

102114
printk("%sCall Trace:\n", log_lvl);
103115

104116
unwind_start(&state, task, regs, stack);
105117
stack = stack ? : get_stack_pointer(task, regs);
118+
regs = unwind_get_entry_regs(&state, &partial);
106119

107120
/*
108121
* Iterate through the stacks, starting with the current stack pointer.
@@ -120,7 +133,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
120133
* - hardirq stack
121134
* - entry stack
122135
*/
123-
for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
136+
for ( ; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
124137
const char *stack_name;
125138

126139
if (get_stack_info(stack, task, &stack_info, &visit_mask)) {
@@ -140,7 +153,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
140153
printk("%s <%s>\n", log_lvl, stack_name);
141154

142155
if (regs)
143-
show_regs_safe(&stack_info, regs);
156+
show_regs_if_on_stack(&stack_info, regs, partial);
144157

145158
/*
146159
* Scan the stack, printing any text addresses we find. At the
@@ -164,7 +177,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
164177

165178
/*
166179
* Don't print regs->ip again if it was already printed
167-
* by show_regs_safe() below.
180+
* by show_regs_if_on_stack().
168181
*/
169182
if (regs && stack == &regs->ip)
170183
goto next;
@@ -199,9 +212,9 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
199212
unwind_next_frame(&state);
200213

201214
/* if the frame has entry regs, print them */
202-
regs = unwind_get_entry_regs(&state);
215+
regs = unwind_get_entry_regs(&state, &partial);
203216
if (regs)
204-
show_regs_safe(&stack_info, regs);
217+
show_regs_if_on_stack(&stack_info, regs, partial);
205218
}
206219

207220
if (stack_name)

arch/x86/kernel/process.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
* section. Since TSS's are completely CPU-local, we want them
4848
* on exact cacheline boundaries, to eliminate cacheline ping-pong.
4949
*/
50-
__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss_rw) = {
50+
__visible DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw) = {
5151
.x86_tss = {
5252
/*
5353
* .sp0 is only used when entering ring 0 from a lower

arch/x86/kernel/stacktrace.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ __save_stack_trace_reliable(struct stack_trace *trace,
102102
for (unwind_start(&state, task, NULL, NULL); !unwind_done(&state);
103103
unwind_next_frame(&state)) {
104104

105-
regs = unwind_get_entry_regs(&state);
105+
regs = unwind_get_entry_regs(&state, NULL);
106106
if (regs) {
107107
/*
108108
* Kernel mode registers on the stack indicate an

arch/x86/mm/pti.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,8 @@ static void __init pti_setup_espfix64(void)
367367
static void __init pti_clone_entry_text(void)
368368
{
369369
pti_clone_pmds((unsigned long) __entry_text_start,
370-
(unsigned long) __irqentry_text_end, _PAGE_RW);
370+
(unsigned long) __irqentry_text_end,
371+
_PAGE_RW | _PAGE_GLOBAL);
371372
}
372373

373374
/*

security/Kconfig

+1
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ config SECURITY_NETWORK
5656

5757
config PAGE_TABLE_ISOLATION
5858
bool "Remove the kernel mapping in user mode"
59+
default y
5960
depends on X86_64 && !UML
6061
help
6162
This feature reduces the number of hardware side channels by

0 commit comments

Comments
 (0)