Skip to content

Commit 14b6320

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: "x86 and selftests fixes. x86: - When emulating a guest TLB flush for a nested guest, flush vpid01, not vpid02, if L2 is active but VPID is disabled in vmcs12, i.e. if L2 and L1 are sharing VPID '0' (from L1's perspective). - Fix a bug in the SNP initialization flow where KVM would return '0' to userspace instead of -errno on failure. - Move the Intel PT virtualization (i.e. outputting host trace to host buffer and guest trace to guest buffer) behind CONFIG_BROKEN. - Fix memory leak on failure of KVM_SEV_SNP_LAUNCH_START - Fix a bug where KVM fails to inject an interrupt from the IRR after KVM_SET_LAPIC. Selftests: - Increase the timeout for the memslot performance selftest to avoid false failures on arm64 and nested x86 platforms. - Fix a goof in the guest_memfd selftest where a for-loop initialized a bit mask to zero instead of BIT(0). - Disable strict aliasing when building KVM selftests to prevent the compiler from treating things like "u64 *" to "uint64_t *" cases as undefined behavior, which can lead to nasty, hard to debug failures. - Force -march=x86-64-v2 for KVM x86 selftests if and only if the uarch is supported by the compiler. - Fix broken compilation of kvm selftests after a header sync in tools/" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: VMX: Bury Intel PT virtualization (guest/host mode) behind CONFIG_BROKEN KVM: x86: Unconditionally set irr_pending when updating APICv state kvm: svm: Fix gctx page leak on invalid inputs KVM: selftests: use X86_MEMTYPE_WB instead of VMX_BASIC_MEM_TYPE_WB KVM: SVM: Propagate error from snp_guest_req_init() to userspace KVM: nVMX: Treat vpid01 as current if L2 is active, but with VPID disabled KVM: selftests: Don't force -march=x86-64-v2 if it's unsupported KVM: selftests: Disable strict aliasing KVM: selftests: fix unintentional noop test in guest_memfd_test.c KVM: selftests: memslot_perf_test: increase guest sync timeout
2 parents 5456ec9 + aa0d42c commit 14b6320

File tree

8 files changed

+65
-31
lines changed

8 files changed

+65
-31
lines changed

arch/x86/kvm/lapic.c

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2629,19 +2629,26 @@ void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
26292629
{
26302630
struct kvm_lapic *apic = vcpu->arch.apic;
26312631

2632-
if (apic->apicv_active) {
2633-
/* irr_pending is always true when apicv is activated. */
2634-
apic->irr_pending = true;
2632+
/*
2633+
* When APICv is enabled, KVM must always search the IRR for a pending
2634+
* IRQ, as other vCPUs and devices can set IRR bits even if the vCPU
2635+
* isn't running. If APICv is disabled, KVM _should_ search the IRR
2636+
* for a pending IRQ. But KVM currently doesn't ensure *all* hardware,
2637+
* e.g. CPUs and IOMMUs, has seen the change in state, i.e. searching
2638+
* the IRR at this time could race with IRQ delivery from hardware that
2639+
* still sees APICv as being enabled.
2640+
*
2641+
* FIXME: Ensure other vCPUs and devices observe the change in APICv
2642+
* state prior to updating KVM's metadata caches, so that KVM
2643+
* can safely search the IRR and set irr_pending accordingly.
2644+
*/
2645+
apic->irr_pending = true;
2646+
2647+
if (apic->apicv_active)
26352648
apic->isr_count = 1;
2636-
} else {
2637-
/*
2638-
* Don't clear irr_pending, searching the IRR can race with
2639-
* updates from the CPU as APICv is still active from hardware's
2640-
* perspective. The flag will be cleared as appropriate when
2641-
* KVM injects the interrupt.
2642-
*/
2649+
else
26432650
apic->isr_count = count_vectors(apic->regs + APIC_ISR);
2644-
}
2651+
26452652
apic->highest_isr_cache = -1;
26462653
}
26472654

arch/x86/kvm/svm/sev.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -450,8 +450,11 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp,
450450
goto e_free;
451451

452452
/* This needs to happen after SEV/SNP firmware initialization. */
453-
if (vm_type == KVM_X86_SNP_VM && snp_guest_req_init(kvm))
454-
goto e_free;
453+
if (vm_type == KVM_X86_SNP_VM) {
454+
ret = snp_guest_req_init(kvm);
455+
if (ret)
456+
goto e_free;
457+
}
455458

456459
INIT_LIST_HEAD(&sev->regions_list);
457460
INIT_LIST_HEAD(&sev->mirror_vms);
@@ -2212,10 +2215,6 @@ static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
22122215
if (sev->snp_context)
22132216
return -EINVAL;
22142217

2215-
sev->snp_context = snp_context_create(kvm, argp);
2216-
if (!sev->snp_context)
2217-
return -ENOTTY;
2218-
22192218
if (params.flags)
22202219
return -EINVAL;
22212220

@@ -2230,6 +2229,10 @@ static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
22302229
if (params.policy & SNP_POLICY_MASK_SINGLE_SOCKET)
22312230
return -EINVAL;
22322231

2232+
sev->snp_context = snp_context_create(kvm, argp);
2233+
if (!sev->snp_context)
2234+
return -ENOTTY;
2235+
22332236
start.gctx_paddr = __psp_pa(sev->snp_context);
22342237
start.policy = params.policy;
22352238
memcpy(start.gosvw, params.gosvw, sizeof(params.gosvw));

arch/x86/kvm/vmx/nested.c

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1197,11 +1197,14 @@ static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu,
11971197
kvm_hv_nested_transtion_tlb_flush(vcpu, enable_ept);
11981198

11991199
/*
1200-
* If vmcs12 doesn't use VPID, L1 expects linear and combined mappings
1201-
* for *all* contexts to be flushed on VM-Enter/VM-Exit, i.e. it's a
1202-
* full TLB flush from the guest's perspective. This is required even
1203-
* if VPID is disabled in the host as KVM may need to synchronize the
1204-
* MMU in response to the guest TLB flush.
1200+
* If VPID is disabled, then guest TLB accesses use VPID=0, i.e. the
1201+
* same VPID as the host, and so architecturally, linear and combined
1202+
* mappings for VPID=0 must be flushed at VM-Enter and VM-Exit. KVM
1203+
* emulates L2 sharing L1's VPID=0 by using vpid01 while running L2,
1204+
* and so KVM must also emulate TLB flush of VPID=0, i.e. vpid01. This
1205+
* is required if VPID is disabled in KVM, as a TLB flush (there are no
1206+
* VPIDs) still occurs from L1's perspective, and KVM may need to
1207+
* synchronize the MMU in response to the guest TLB flush.
12051208
*
12061209
* Note, using TLB_FLUSH_GUEST is correct even if nested EPT is in use.
12071210
* EPT is a special snowflake, as guest-physical mappings aren't
@@ -2315,6 +2318,17 @@ static void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx,
23152318

23162319
vmcs_write64(VMCS_LINK_POINTER, INVALID_GPA);
23172320

2321+
/*
2322+
* If VPID is disabled, then guest TLB accesses use VPID=0, i.e. the
2323+
* same VPID as the host. Emulate this behavior by using vpid01 for L2
2324+
* if VPID is disabled in vmcs12. Note, if VPID is disabled, VM-Enter
2325+
* and VM-Exit are architecturally required to flush VPID=0, but *only*
2326+
* VPID=0. I.e. using vpid02 would be ok (so long as KVM emulates the
2327+
* required flushes), but doing so would cause KVM to over-flush. E.g.
2328+
* if L1 runs L2 X with VPID12=1, then runs L2 Y with VPID12 disabled,
2329+
* and then runs L2 X again, then KVM can and should retain TLB entries
2330+
* for VPID12=1.
2331+
*/
23182332
if (enable_vpid) {
23192333
if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02)
23202334
vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
@@ -5950,6 +5964,12 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
59505964
return nested_vmx_fail(vcpu,
59515965
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
59525966

5967+
/*
5968+
* Always flush the effective vpid02, i.e. never flush the current VPID
5969+
* and never explicitly flush vpid01. INVVPID targets a VPID, not a
5970+
* VMCS, and so whether or not the current vmcs12 has VPID enabled is
5971+
* irrelevant (and there may not be a loaded vmcs12).
5972+
*/
59535973
vpid02 = nested_get_vpid02(vcpu);
59545974
switch (type) {
59555975
case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:

arch/x86/kvm/vmx/vmx.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -217,9 +217,11 @@ module_param(ple_window_shrink, uint, 0444);
217217
static unsigned int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
218218
module_param(ple_window_max, uint, 0444);
219219

220-
/* Default is SYSTEM mode, 1 for host-guest mode */
220+
/* Default is SYSTEM mode, 1 for host-guest mode (which is BROKEN) */
221221
int __read_mostly pt_mode = PT_MODE_SYSTEM;
222+
#ifdef CONFIG_BROKEN
222223
module_param(pt_mode, int, S_IRUGO);
224+
#endif
223225

224226
struct x86_pmu_lbr __ro_after_init vmx_lbr_caps;
225227

@@ -3216,7 +3218,7 @@ void vmx_flush_tlb_all(struct kvm_vcpu *vcpu)
32163218

32173219
static inline int vmx_get_current_vpid(struct kvm_vcpu *vcpu)
32183220
{
3219-
if (is_guest_mode(vcpu))
3221+
if (is_guest_mode(vcpu) && nested_cpu_has_vpid(get_vmcs12(vcpu)))
32203222
return nested_get_vpid02(vcpu);
32213223
return to_vmx(vcpu)->vpid;
32223224
}

tools/testing/selftests/kvm/Makefile

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -241,16 +241,18 @@ CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
241241
-Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \
242242
-fno-builtin-memcmp -fno-builtin-memcpy \
243243
-fno-builtin-memset -fno-builtin-strnlen \
244-
-fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \
245-
-I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \
246-
-I$(<D) -Iinclude/$(ARCH_DIR) -I ../rseq -I.. $(EXTRA_CFLAGS) \
247-
$(KHDR_INCLUDES)
244+
-fno-stack-protector -fno-PIE -fno-strict-aliasing \
245+
-I$(LINUX_TOOL_INCLUDE) -I$(LINUX_TOOL_ARCH_INCLUDE) \
246+
-I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(ARCH_DIR) \
247+
-I ../rseq -I.. $(EXTRA_CFLAGS) $(KHDR_INCLUDES)
248248
ifeq ($(ARCH),s390)
249249
CFLAGS += -march=z10
250250
endif
251251
ifeq ($(ARCH),x86)
252+
ifeq ($(shell echo "void foo(void) { }" | $(CC) -march=x86-64-v2 -x c - -c -o /dev/null 2>/dev/null; echo "$$?"),0)
252253
CFLAGS += -march=x86-64-v2
253254
endif
255+
endif
254256
ifeq ($(ARCH),arm64)
255257
tools_dir := $(top_srcdir)/tools
256258
arm64_tools_dir := $(tools_dir)/arch/arm64/tools/

tools/testing/selftests/kvm/guest_memfd_test.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ static void test_create_guest_memfd_invalid(struct kvm_vm *vm)
134134
size);
135135
}
136136

137-
for (flag = 0; flag; flag <<= 1) {
137+
for (flag = BIT(0); flag; flag <<= 1) {
138138
fd = __vm_create_guest_memfd(vm, page_size, flag);
139139
TEST_ASSERT(fd == -1 && errno == EINVAL,
140140
"guest_memfd() with flag '0x%lx' should fail with EINVAL",

tools/testing/selftests/kvm/lib/x86_64/vmx.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ static inline void init_vmcs_control_fields(struct vmx_pages *vmx)
200200
if (vmx->eptp_gpa) {
201201
uint64_t ept_paddr;
202202
struct eptPageTablePointer eptp = {
203-
.memory_type = VMX_BASIC_MEM_TYPE_WB,
203+
.memory_type = X86_MEMTYPE_WB,
204204
.page_walk_length = 3, /* + 1 */
205205
.ad_enabled = ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS),
206206
.address = vmx->eptp_gpa >> PAGE_SHIFT_4K,

tools/testing/selftests/kvm/memslot_perf_test.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -417,7 +417,7 @@ static bool _guest_should_exit(void)
417417
*/
418418
static noinline void host_perform_sync(struct sync_area *sync)
419419
{
420-
alarm(2);
420+
alarm(10);
421421

422422
atomic_store_explicit(&sync->sync_flag, true, memory_order_release);
423423
while (atomic_load_explicit(&sync->sync_flag, memory_order_acquire))

0 commit comments

Comments
 (0)