Skip to content

Commit 1bcc3f8

Browse files
yanzhao56sean-jc
authored andcommitted
KVM: selftests: Test prefault memory during concurrent memslot removal
Expand the prefault memory selftest to add a regression test for a KVM bug where KVM's retry logic would result in (breakable) deadlock due to the memslot deletion waiting on prefaulting to release SRCU, and prefaulting waiting on the memslot to fully disappear (KVM uses a two-step process to delete memslots, and KVM x86 retries page faults if a to-be-deleted, a.k.a. INVALID, memslot is encountered). To exercise concurrent memslot remove, spawn a second thread to initiate memslot removal at roughly the same time as prefaulting. Test memslot removal for all testcases, i.e. don't limit concurrent removal to only the success case. There are essentially three prefault scenarios (so far) that are of interest: 1. Success 2. ENOENT due to no memslot 3. EAGAIN due to INVALID memslot For all intents and purposes, #1 and #2 are mutually exclusive, or rather, easier to test via separate testcases since writing to non-existent memory is trivial. But for #3, making it mutually exclusive with #1 _or_ #2 is actually more complex than testing memslot removal for all scenarios. The only requirement to let memslot removal coexist with other scenarios is a way to guarantee a stable result, e.g. that the "no memslot" test observes ENOENT, not EAGAIN, for the final checks. So, rather than make memslot removal mutually exclusive with the ENOENT scenario, simply restore the memslot and retry prefaulting. For the "no memslot" case, KVM_PRE_FAULT_MEMORY should be idempotent, i.e. should always fail with ENOENT regardless of how many times userspace attempts prefaulting. Pass in both the base GPA and the offset (instead of the "full" GPA) so that the worker can recreate the memslot. Signed-off-by: Yan Zhao <yan.y.zhao@intel.com> Co-developed-by: Sean Christopherson <seanjc@google.com> Link: https://lore.kernel.org/r/20250924174255.2141847-1-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc@google.com>
1 parent 6b36119 commit 1bcc3f8

File tree

1 file changed

+114
-17
lines changed

1 file changed

+114
-17
lines changed

tools/testing/selftests/kvm/pre_fault_memory_test.c

Lines changed: 114 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <test_util.h>
1111
#include <kvm_util.h>
1212
#include <processor.h>
13+
#include <pthread.h>
1314

1415
/* Arbitrarily chosen values */
1516
#define TEST_SIZE (SZ_2M + PAGE_SIZE)
@@ -30,37 +31,132 @@ static void guest_code(uint64_t base_gpa)
3031
GUEST_DONE();
3132
}
3233

33-
static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 gpa, u64 size,
34-
u64 left)
34+
struct slot_worker_data {
35+
struct kvm_vm *vm;
36+
u64 gpa;
37+
uint32_t flags;
38+
bool worker_ready;
39+
bool prefault_ready;
40+
bool recreate_slot;
41+
};
42+
43+
static void *delete_slot_worker(void *__data)
44+
{
45+
struct slot_worker_data *data = __data;
46+
struct kvm_vm *vm = data->vm;
47+
48+
WRITE_ONCE(data->worker_ready, true);
49+
50+
while (!READ_ONCE(data->prefault_ready))
51+
cpu_relax();
52+
53+
vm_mem_region_delete(vm, TEST_SLOT);
54+
55+
while (!READ_ONCE(data->recreate_slot))
56+
cpu_relax();
57+
58+
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, data->gpa,
59+
TEST_SLOT, TEST_NPAGES, data->flags);
60+
61+
return NULL;
62+
}
63+
64+
static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 base_gpa, u64 offset,
65+
u64 size, u64 expected_left, bool private)
3566
{
3667
struct kvm_pre_fault_memory range = {
37-
.gpa = gpa,
68+
.gpa = base_gpa + offset,
3869
.size = size,
3970
.flags = 0,
4071
};
41-
u64 prev;
72+
struct slot_worker_data data = {
73+
.vm = vcpu->vm,
74+
.gpa = base_gpa,
75+
.flags = private ? KVM_MEM_GUEST_MEMFD : 0,
76+
};
77+
bool slot_recreated = false;
78+
pthread_t slot_worker;
4279
int ret, save_errno;
80+
u64 prev;
81+
82+
/*
83+
* Concurrently delete (and recreate) the slot to test KVM's handling
84+
* of a racing memslot deletion with prefaulting.
85+
*/
86+
pthread_create(&slot_worker, NULL, delete_slot_worker, &data);
4387

44-
do {
88+
while (!READ_ONCE(data.worker_ready))
89+
cpu_relax();
90+
91+
WRITE_ONCE(data.prefault_ready, true);
92+
93+
for (;;) {
4594
prev = range.size;
4695
ret = __vcpu_ioctl(vcpu, KVM_PRE_FAULT_MEMORY, &range);
4796
save_errno = errno;
4897
TEST_ASSERT((range.size < prev) ^ (ret < 0),
4998
"%sexpecting range.size to change on %s",
5099
ret < 0 ? "not " : "",
51100
ret < 0 ? "failure" : "success");
52-
} while (ret >= 0 ? range.size : save_errno == EINTR);
53101

54-
TEST_ASSERT(range.size == left,
55-
"Completed with %lld bytes left, expected %" PRId64,
56-
range.size, left);
102+
/*
103+
* Immediately retry prefaulting if KVM was interrupted by an
104+
* unrelated signal/event.
105+
*/
106+
if (ret < 0 && save_errno == EINTR)
107+
continue;
108+
109+
/*
110+
* Tell the worker to recreate the slot in order to complete
111+
* prefaulting (if prefault didn't already succeed before the
112+
* slot was deleted) and/or to prepare for the next testcase.
113+
* Wait for the worker to exit so that the next invocation of
114+
* prefaulting is guaranteed to complete (assuming no KVM bugs).
115+
*/
116+
if (!slot_recreated) {
117+
WRITE_ONCE(data.recreate_slot, true);
118+
pthread_join(slot_worker, NULL);
119+
slot_recreated = true;
120+
121+
/*
122+
* Retry prefaulting to get a stable result, i.e. to
123+
* avoid seeing random EAGAIN failures. Don't retry if
124+
* prefaulting already succeeded, as KVM disallows
125+
* prefaulting with size=0, i.e. blindly retrying would
126+
* result in test failures due to EINVAL. KVM should
127+
* always return success if all bytes are prefaulted,
128+
* i.e. there is no need to guard against EAGAIN being
129+
* returned.
130+
*/
131+
if (range.size)
132+
continue;
133+
}
134+
135+
/*
136+
* All done if there are no remaining bytes to prefault, or if
137+
* prefaulting failed (EINTR was handled above, and EAGAIN due
138+
* to prefaulting a memslot that's being actively deleted should
139+
* be impossible since the memslot has already been recreated).
140+
*/
141+
if (!range.size || ret < 0)
142+
break;
143+
}
57144

58-
if (left == 0)
59-
__TEST_ASSERT_VM_VCPU_IOCTL(!ret, "KVM_PRE_FAULT_MEMORY", ret, vcpu->vm);
145+
TEST_ASSERT(range.size == expected_left,
146+
"Completed with %llu bytes left, expected %lu",
147+
range.size, expected_left);
148+
149+
/*
150+
* Assert success if prefaulting the entire range should succeed, i.e.
151+
* complete with no bytes remaining. Otherwise prefaulting should have
152+
* failed due to ENOENT (due to RET_PF_EMULATE for emulated MMIO when
153+
* no memslot exists).
154+
*/
155+
if (!expected_left)
156+
TEST_ASSERT_VM_VCPU_IOCTL(!ret, KVM_PRE_FAULT_MEMORY, ret, vcpu->vm);
60157
else
61-
/* No memory slot causes RET_PF_EMULATE. it results in -ENOENT. */
62-
__TEST_ASSERT_VM_VCPU_IOCTL(ret && save_errno == ENOENT,
63-
"KVM_PRE_FAULT_MEMORY", ret, vcpu->vm);
158+
TEST_ASSERT_VM_VCPU_IOCTL(ret && save_errno == ENOENT,
159+
KVM_PRE_FAULT_MEMORY, ret, vcpu->vm);
64160
}
65161

66162
static void __test_pre_fault_memory(unsigned long vm_type, bool private)
@@ -97,9 +193,10 @@ static void __test_pre_fault_memory(unsigned long vm_type, bool private)
97193

98194
if (private)
99195
vm_mem_set_private(vm, guest_test_phys_mem, TEST_SIZE);
100-
pre_fault_memory(vcpu, guest_test_phys_mem, SZ_2M, 0);
101-
pre_fault_memory(vcpu, guest_test_phys_mem + SZ_2M, PAGE_SIZE * 2, PAGE_SIZE);
102-
pre_fault_memory(vcpu, guest_test_phys_mem + TEST_SIZE, PAGE_SIZE, PAGE_SIZE);
196+
197+
pre_fault_memory(vcpu, guest_test_phys_mem, 0, SZ_2M, 0, private);
198+
pre_fault_memory(vcpu, guest_test_phys_mem, SZ_2M, PAGE_SIZE * 2, PAGE_SIZE, private);
199+
pre_fault_memory(vcpu, guest_test_phys_mem, TEST_SIZE, PAGE_SIZE, PAGE_SIZE, private);
103200

104201
vcpu_args_set(vcpu, 1, guest_test_virt_mem);
105202
vcpu_run(vcpu);

0 commit comments

Comments
 (0)