Skip to content

Commit 8dd2eee

Browse files
chao-pbonzini
authored andcommitted
KVM: x86/mmu: Handle page fault for private memory
Add support for resolving page faults on guest private memory for VMs that differentiate between "shared" and "private" memory. For such VMs, KVM_MEM_GUEST_MEMFD memslots can include both fd-based private memory and hva-based shared memory, and KVM needs to map in the "correct" variant, i.e. KVM needs to map the gfn shared/private as appropriate based on the current state of the gfn's KVM_MEMORY_ATTRIBUTE_PRIVATE flag. For AMD's SEV-SNP and Intel's TDX, the guest effectively gets to request shared vs. private via a bit in the guest page tables, i.e. what the guest wants may conflict with the current memory attributes. To support such "implicit" conversion requests, exit to user with KVM_EXIT_MEMORY_FAULT to forward the request to userspace. Add a new flag for memory faults, KVM_MEMORY_EXIT_FLAG_PRIVATE, to communicate whether the guest wants to map memory as shared vs. private. Like KVM_MEMORY_ATTRIBUTE_PRIVATE, use bit 3 for flagging private memory so that KVM can use bits 0-2 for capturing RWX behavior if/when userspace needs such information, e.g. a likely user of KVM_EXIT_MEMORY_FAULT is to exit on missing mappings when handling guest page fault VM-Exits. In that case, userspace will want to know RWX information in order to correctly/precisely resolve the fault. Note, private memory *must* be backed by guest_memfd, i.e. shared mappings always come from the host userspace page tables, and private mappings always come from a guest_memfd instance. Co-developed-by: Yu Zhang <yu.c.zhang@linux.intel.com> Signed-off-by: Yu Zhang <yu.c.zhang@linux.intel.com> Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com> Co-developed-by: Sean Christopherson <seanjc@google.com> Signed-off-by: Sean Christopherson <seanjc@google.com> Reviewed-by: Fuad Tabba <tabba@google.com> Tested-by: Fuad Tabba <tabba@google.com> Message-Id: <20231027182217.3615211-21-seanjc@google.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
1 parent 90b4fe1 commit 8dd2eee

File tree

5 files changed

+110
-9
lines changed

5 files changed

+110
-9
lines changed

Documentation/virt/kvm/api.rst

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6952,6 +6952,7 @@ spec refer, https://github.com/riscv/riscv-sbi-doc.
69526952

69536953
/* KVM_EXIT_MEMORY_FAULT */
69546954
struct {
6955+
#define KVM_MEMORY_EXIT_FLAG_PRIVATE (1ULL << 3)
69556956
__u64 flags;
69566957
__u64 gpa;
69576958
__u64 size;
@@ -6960,8 +6961,11 @@ spec refer, https://github.com/riscv/riscv-sbi-doc.
69606961
KVM_EXIT_MEMORY_FAULT indicates the vCPU has encountered a memory fault that
69616962
could not be resolved by KVM. The 'gpa' and 'size' (in bytes) describe the
69626963
guest physical address range [gpa, gpa + size) of the fault. The 'flags' field
6963-
describes properties of the faulting access that are likely pertinent.
6964-
Currently, no flags are defined.
6964+
describes properties of the faulting access that are likely pertinent:
6965+
6966+
- KVM_MEMORY_EXIT_FLAG_PRIVATE - When set, indicates the memory fault occurred
6967+
on a private memory access. When clear, indicates the fault occurred on a
6968+
shared access.
69656969

69666970
Note! KVM_EXIT_MEMORY_FAULT is unique among all KVM exit reasons in that it
69676971
accompanies a return code of '-1', not '0'! errno will always be set to EFAULT

arch/x86/kvm/mmu/mmu.c

Lines changed: 96 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3147,9 +3147,9 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn,
31473147
return level;
31483148
}
31493149

3150-
int kvm_mmu_max_mapping_level(struct kvm *kvm,
3151-
const struct kvm_memory_slot *slot, gfn_t gfn,
3152-
int max_level)
3150+
static int __kvm_mmu_max_mapping_level(struct kvm *kvm,
3151+
const struct kvm_memory_slot *slot,
3152+
gfn_t gfn, int max_level, bool is_private)
31533153
{
31543154
struct kvm_lpage_info *linfo;
31553155
int host_level;
@@ -3161,13 +3161,26 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm,
31613161
break;
31623162
}
31633163

3164+
if (is_private)
3165+
return max_level;
3166+
31643167
if (max_level == PG_LEVEL_4K)
31653168
return PG_LEVEL_4K;
31663169

31673170
host_level = host_pfn_mapping_level(kvm, gfn, slot);
31683171
return min(host_level, max_level);
31693172
}
31703173

3174+
int kvm_mmu_max_mapping_level(struct kvm *kvm,
3175+
const struct kvm_memory_slot *slot, gfn_t gfn,
3176+
int max_level)
3177+
{
3178+
bool is_private = kvm_slot_can_be_private(slot) &&
3179+
kvm_mem_is_private(kvm, gfn);
3180+
3181+
return __kvm_mmu_max_mapping_level(kvm, slot, gfn, max_level, is_private);
3182+
}
3183+
31713184
void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
31723185
{
31733186
struct kvm_memory_slot *slot = fault->slot;
@@ -3188,8 +3201,9 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
31883201
* Enforce the iTLB multihit workaround after capturing the requested
31893202
* level, which will be used to do precise, accurate accounting.
31903203
*/
3191-
fault->req_level = kvm_mmu_max_mapping_level(vcpu->kvm, slot,
3192-
fault->gfn, fault->max_level);
3204+
fault->req_level = __kvm_mmu_max_mapping_level(vcpu->kvm, slot,
3205+
fault->gfn, fault->max_level,
3206+
fault->is_private);
31933207
if (fault->req_level == PG_LEVEL_4K || fault->huge_page_disallowed)
31943208
return;
31953209

@@ -4269,6 +4283,55 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
42694283
kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true, NULL);
42704284
}
42714285

4286+
static inline u8 kvm_max_level_for_order(int order)
4287+
{
4288+
BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G);
4289+
4290+
KVM_MMU_WARN_ON(order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G) &&
4291+
order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M) &&
4292+
order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K));
4293+
4294+
if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G))
4295+
return PG_LEVEL_1G;
4296+
4297+
if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M))
4298+
return PG_LEVEL_2M;
4299+
4300+
return PG_LEVEL_4K;
4301+
}
4302+
4303+
static void kvm_mmu_prepare_memory_fault_exit(struct kvm_vcpu *vcpu,
4304+
struct kvm_page_fault *fault)
4305+
{
4306+
kvm_prepare_memory_fault_exit(vcpu, fault->gfn << PAGE_SHIFT,
4307+
PAGE_SIZE, fault->write, fault->exec,
4308+
fault->is_private);
4309+
}
4310+
4311+
static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu,
4312+
struct kvm_page_fault *fault)
4313+
{
4314+
int max_order, r;
4315+
4316+
if (!kvm_slot_can_be_private(fault->slot)) {
4317+
kvm_mmu_prepare_memory_fault_exit(vcpu, fault);
4318+
return -EFAULT;
4319+
}
4320+
4321+
r = kvm_gmem_get_pfn(vcpu->kvm, fault->slot, fault->gfn, &fault->pfn,
4322+
&max_order);
4323+
if (r) {
4324+
kvm_mmu_prepare_memory_fault_exit(vcpu, fault);
4325+
return r;
4326+
}
4327+
4328+
fault->max_level = min(kvm_max_level_for_order(max_order),
4329+
fault->max_level);
4330+
fault->map_writable = !(fault->slot->flags & KVM_MEM_READONLY);
4331+
4332+
return RET_PF_CONTINUE;
4333+
}
4334+
42724335
static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
42734336
{
42744337
struct kvm_memory_slot *slot = fault->slot;
@@ -4301,6 +4364,14 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
43014364
return RET_PF_EMULATE;
43024365
}
43034366

4367+
if (fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn)) {
4368+
kvm_mmu_prepare_memory_fault_exit(vcpu, fault);
4369+
return -EFAULT;
4370+
}
4371+
4372+
if (fault->is_private)
4373+
return kvm_faultin_pfn_private(vcpu, fault);
4374+
43044375
async = false;
43054376
fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, false, &async,
43064377
fault->write, &fault->map_writable,
@@ -7188,6 +7259,26 @@ void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
71887259
}
71897260

71907261
#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
7262+
bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm,
7263+
struct kvm_gfn_range *range)
7264+
{
7265+
/*
7266+
* Zap SPTEs even if the slot can't be mapped PRIVATE. KVM x86 only
7267+
* supports KVM_MEMORY_ATTRIBUTE_PRIVATE, and so it *seems* like KVM
7268+
* can simply ignore such slots. But if userspace is making memory
7269+
* PRIVATE, then KVM must prevent the guest from accessing the memory
7270+
* as shared. And if userspace is making memory SHARED and this point
7271+
* is reached, then at least one page within the range was previously
7272+
* PRIVATE, i.e. the slot's possible hugepage ranges are changing.
7273+
* Zapping SPTEs in this case ensures KVM will reassess whether or not
7274+
* a hugepage can be used for affected ranges.
7275+
*/
7276+
if (WARN_ON_ONCE(!kvm_arch_has_private_mem(kvm)))
7277+
return false;
7278+
7279+
return kvm_unmap_gfn_range(kvm, range);
7280+
}
7281+
71917282
static bool hugepage_test_mixed(struct kvm_memory_slot *slot, gfn_t gfn,
71927283
int level)
71937284
{

arch/x86/kvm/mmu/mmu_internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ struct kvm_page_fault {
201201

202202
/* Derived from mmu and global state. */
203203
const bool is_tdp;
204+
const bool is_private;
204205
const bool nx_huge_page_workaround_enabled;
205206

206207
/*

include/linux/kvm_host.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2357,14 +2357,18 @@ static inline void kvm_account_pgtable_pages(void *virt, int nr)
23572357
#define KVM_DIRTY_RING_MAX_ENTRIES 65536
23582358

23592359
static inline void kvm_prepare_memory_fault_exit(struct kvm_vcpu *vcpu,
2360-
gpa_t gpa, gpa_t size)
2360+
gpa_t gpa, gpa_t size,
2361+
bool is_write, bool is_exec,
2362+
bool is_private)
23612363
{
23622364
vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT;
23632365
vcpu->run->memory_fault.gpa = gpa;
23642366
vcpu->run->memory_fault.size = size;
23652367

2366-
/* Flags are not (yet) defined or communicated to userspace. */
2368+
/* RWX flags are not (yet) defined or communicated to userspace. */
23672369
vcpu->run->memory_fault.flags = 0;
2370+
if (is_private)
2371+
vcpu->run->memory_fault.flags |= KVM_MEMORY_EXIT_FLAG_PRIVATE;
23682372
}
23692373

23702374
#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES

include/uapi/linux/kvm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,7 @@ struct kvm_run {
535535
} notify;
536536
/* KVM_EXIT_MEMORY_FAULT */
537537
struct {
538+
#define KVM_MEMORY_EXIT_FLAG_PRIVATE (1ULL << 3)
538539
__u64 flags;
539540
__u64 gpa;
540541
__u64 size;

0 commit comments

Comments
 (0)