Skip to content

Commit bfe6ed0

Browse files
kirylhansendc
authored andcommitted
x86/tdx: Add HLT support for TDX guests
The HLT instruction is a privileged instruction, executing it stops instruction execution and places the processor in a HALT state. It is used in kernel for cases like reboot, idle loop and exception fixup handlers. For the idle case, interrupts will be enabled (using STI) before the HLT instruction (this is also called safe_halt()). To support the HLT instruction in TDX guests, it needs to be emulated using TDVMCALL (hypercall to VMM). More details about it can be found in Intel Trust Domain Extensions (Intel TDX) Guest-Host-Communication Interface (GHCI) specification, section TDVMCALL[Instruction.HLT]. In TDX guests, executing HLT instruction will generate a #VE, which is used to emulate the HLT instruction. But #VE based emulation will not work for the safe_halt() flavor, because it requires STI instruction to be executed just before the TDCALL. Since idle loop is the only user of safe_halt() variant, handle it as a special case. To avoid *safe_halt() call in the idle function, define the tdx_guest_idle() and use it to override the "x86_idle" function pointer for a valid TDX guest. Alternative choices like PV ops have been considered for adding safe_halt() support. But it was rejected because HLT paravirt calls only exist under PARAVIRT_XXL, and enabling it in TDX guest just for safe_halt() use case is not worth the cost. Co-developed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Reviewed-by: Andi Kleen <ak@linux.intel.com> Reviewed-by: Tony Luck <tony.luck@intel.com> Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com> Link: https://lkml.kernel.org/r/20220405232939.73860-9-kirill.shutemov@linux.intel.com
1 parent 9a22bf6 commit bfe6ed0

File tree

4 files changed

+112
-2
lines changed

4 files changed

+112
-2
lines changed

arch/x86/coco/tdx/tdcall.S

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,19 @@ SYM_FUNC_START(__tdx_hypercall)
139139

140140
movl $TDVMCALL_EXPOSE_REGS_MASK, %ecx
141141

142+
/*
143+
* For the idle loop STI needs to be called directly before the TDCALL
144+
* that enters idle (EXIT_REASON_HLT case). STI instruction enables
145+
* interrupts only one instruction later. If there is a window between
146+
* STI and the instruction that emulates the HALT state, there is a
147+
* chance for interrupts to happen in this window, which can delay the
148+
* HLT operation indefinitely. Since this is the not the desired
149+
* result, conditionally call STI before TDCALL.
150+
*/
151+
testq $TDX_HCALL_ISSUE_STI, %rsi
152+
jz .Lskip_sti
153+
sti
154+
.Lskip_sti:
142155
tdcall
143156

144157
/*

arch/x86/coco/tdx/tdx.c

Lines changed: 91 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include <linux/cpufeature.h>
88
#include <asm/coco.h>
99
#include <asm/tdx.h>
10+
#include <asm/vmx.h>
1011

1112
/* TDX module Call Leaf IDs */
1213
#define TDX_GET_INFO 1
@@ -36,6 +37,17 @@ void __tdx_hypercall_failed(void)
3637
panic("TDVMCALL failed. TDX module bug?");
3738
}
3839

40+
/*
41+
* The TDG.VP.VMCALL-Instruction-execution sub-functions are defined
42+
* independently from but are currently matched 1:1 with VMX EXIT_REASONs.
43+
* Reusing the KVM EXIT_REASON macros makes it easier to connect the host and
44+
* guest sides of these calls.
45+
*/
46+
static u64 hcall_func(u64 exit_reason)
47+
{
48+
return exit_reason;
49+
}
50+
3951
/*
4052
* Used for TDX guests to make calls directly to the TD module. This
4153
* should only be used for calls that have no legitimate reason to fail
@@ -74,6 +86,62 @@ static u64 get_cc_mask(void)
7486
return BIT_ULL(gpa_width - 1);
7587
}
7688

89+
static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti)
90+
{
91+
struct tdx_hypercall_args args = {
92+
.r10 = TDX_HYPERCALL_STANDARD,
93+
.r11 = hcall_func(EXIT_REASON_HLT),
94+
.r12 = irq_disabled,
95+
};
96+
97+
/*
98+
* Emulate HLT operation via hypercall. More info about ABI
99+
* can be found in TDX Guest-Host-Communication Interface
100+
* (GHCI), section 3.8 TDG.VP.VMCALL<Instruction.HLT>.
101+
*
102+
* The VMM uses the "IRQ disabled" param to understand IRQ
103+
* enabled status (RFLAGS.IF) of the TD guest and to determine
104+
* whether or not it should schedule the halted vCPU if an
105+
* IRQ becomes pending. E.g. if IRQs are disabled, the VMM
106+
* can keep the vCPU in virtual HLT, even if an IRQ is
107+
* pending, without hanging/breaking the guest.
108+
*/
109+
return __tdx_hypercall(&args, do_sti ? TDX_HCALL_ISSUE_STI : 0);
110+
}
111+
112+
static bool handle_halt(void)
113+
{
114+
/*
115+
* Since non safe halt is mainly used in CPU offlining
116+
* and the guest will always stay in the halt state, don't
117+
* call the STI instruction (set do_sti as false).
118+
*/
119+
const bool irq_disabled = irqs_disabled();
120+
const bool do_sti = false;
121+
122+
if (__halt(irq_disabled, do_sti))
123+
return false;
124+
125+
return true;
126+
}
127+
128+
void __cpuidle tdx_safe_halt(void)
129+
{
130+
/*
131+
* For do_sti=true case, __tdx_hypercall() function enables
132+
* interrupts using the STI instruction before the TDCALL. So
133+
* set irq_disabled as false.
134+
*/
135+
const bool irq_disabled = false;
136+
const bool do_sti = true;
137+
138+
/*
139+
* Use WARN_ONCE() to report the failure.
140+
*/
141+
if (__halt(irq_disabled, do_sti))
142+
WARN_ONCE(1, "HLT instruction emulation failed\n");
143+
}
144+
77145
void tdx_get_ve_info(struct ve_info *ve)
78146
{
79147
struct tdx_module_output out;
@@ -104,11 +172,32 @@ void tdx_get_ve_info(struct ve_info *ve)
104172
ve->instr_info = upper_32_bits(out.r10);
105173
}
106174

175+
/* Handle the kernel #VE */
176+
static bool virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve)
177+
{
178+
switch (ve->exit_reason) {
179+
case EXIT_REASON_HLT:
180+
return handle_halt();
181+
default:
182+
pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
183+
return false;
184+
}
185+
}
186+
107187
bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve)
108188
{
109-
pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
189+
bool ret;
190+
191+
if (user_mode(regs))
192+
ret = false;
193+
else
194+
ret = virt_exception_kernel(regs, ve);
195+
196+
/* After successful #VE handling, move the IP */
197+
if (ret)
198+
regs->ip += ve->instr_len;
110199

111-
return false;
200+
return ret;
112201
}
113202

114203
void __init tdx_early_init(void)

arch/x86/include/asm/tdx.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#define TDX_HYPERCALL_STANDARD 0
1515

1616
#define TDX_HCALL_HAS_OUTPUT BIT(0)
17+
#define TDX_HCALL_ISSUE_STI BIT(1)
1718

1819
/*
1920
* SW-defined error codes.
@@ -91,9 +92,12 @@ void tdx_get_ve_info(struct ve_info *ve);
9192

9293
bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve);
9394

95+
void tdx_safe_halt(void);
96+
9497
#else
9598

9699
static inline void tdx_early_init(void) { };
100+
static inline void tdx_safe_halt(void) { };
97101

98102
#endif /* CONFIG_INTEL_TDX_GUEST */
99103

arch/x86/kernel/process.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
#include <asm/proto.h>
4747
#include <asm/frame.h>
4848
#include <asm/unwind.h>
49+
#include <asm/tdx.h>
4950

5051
#include "process.h"
5152

@@ -873,6 +874,9 @@ void select_idle_routine(const struct cpuinfo_x86 *c)
873874
} else if (prefer_mwait_c1_over_halt(c)) {
874875
pr_info("using mwait in idle threads\n");
875876
x86_idle = mwait_idle;
877+
} else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
878+
pr_info("using TDX aware idle routine\n");
879+
x86_idle = tdx_safe_halt;
876880
} else
877881
x86_idle = default_idle;
878882
}

0 commit comments

Comments
 (0)