Skip to content

Commit 531810c

Browse files
Ben Gardonbonzini
authored andcommitted
KVM: x86/mmu: Use an rwlock for the x86 MMU
Add a read / write lock to be used in place of the MMU spinlock on x86. The rwlock will enable the TDP MMU to handle page faults, and other operations in parallel in future commits. Reviewed-by: Peter Feiner <pfeiner@google.com> Signed-off-by: Ben Gardon <bgardon@google.com> Message-Id: <20210202185734.1680553-19-bgardon@google.com> [Introduce virt/kvm/mmu_lock.h - Paolo] Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
1 parent f3d4b4b commit 531810c

File tree

10 files changed

+118
-84
lines changed

10 files changed

+118
-84
lines changed

arch/x86/include/asm/kvm_host.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,8 @@ struct kvm_mmu_root_info {
348348

349349
#define KVM_MMU_NUM_PREV_ROOTS 3
350350

351+
#define KVM_HAVE_MMU_RWLOCK
352+
351353
struct kvm_mmu_page;
352354

353355
/*

arch/x86/kvm/mmu/mmu.c

Lines changed: 45 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -2010,9 +2010,9 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
20102010
flush |= kvm_sync_page(vcpu, sp, &invalid_list);
20112011
mmu_pages_clear_parents(&parents);
20122012
}
2013-
if (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock)) {
2013+
if (need_resched() || rwlock_needbreak(&vcpu->kvm->mmu_lock)) {
20142014
kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
2015-
cond_resched_lock(&vcpu->kvm->mmu_lock);
2015+
cond_resched_rwlock_write(&vcpu->kvm->mmu_lock);
20162016
flush = false;
20172017
}
20182018
}
@@ -2464,7 +2464,7 @@ static int make_mmu_pages_available(struct kvm_vcpu *vcpu)
24642464
*/
24652465
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long goal_nr_mmu_pages)
24662466
{
2467-
spin_lock(&kvm->mmu_lock);
2467+
write_lock(&kvm->mmu_lock);
24682468

24692469
if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) {
24702470
kvm_mmu_zap_oldest_mmu_pages(kvm, kvm->arch.n_used_mmu_pages -
@@ -2475,7 +2475,7 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long goal_nr_mmu_pages)
24752475

24762476
kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages;
24772477

2478-
spin_unlock(&kvm->mmu_lock);
2478+
write_unlock(&kvm->mmu_lock);
24792479
}
24802480

24812481
int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
@@ -2486,15 +2486,15 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
24862486

24872487
pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
24882488
r = 0;
2489-
spin_lock(&kvm->mmu_lock);
2489+
write_lock(&kvm->mmu_lock);
24902490
for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
24912491
pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
24922492
sp->role.word);
24932493
r = 1;
24942494
kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
24952495
}
24962496
kvm_mmu_commit_zap_page(kvm, &invalid_list);
2497-
spin_unlock(&kvm->mmu_lock);
2497+
write_unlock(&kvm->mmu_lock);
24982498

24992499
return r;
25002500
}
@@ -3186,7 +3186,7 @@ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
31863186
return;
31873187
}
31883188

3189-
spin_lock(&kvm->mmu_lock);
3189+
write_lock(&kvm->mmu_lock);
31903190

31913191
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
31923192
if (roots_to_free & KVM_MMU_ROOT_PREVIOUS(i))
@@ -3209,7 +3209,7 @@ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
32093209
}
32103210

32113211
kvm_mmu_commit_zap_page(kvm, &invalid_list);
3212-
spin_unlock(&kvm->mmu_lock);
3212+
write_unlock(&kvm->mmu_lock);
32133213
}
32143214
EXPORT_SYMBOL_GPL(kvm_mmu_free_roots);
32153215

@@ -3230,16 +3230,16 @@ static hpa_t mmu_alloc_root(struct kvm_vcpu *vcpu, gfn_t gfn, gva_t gva,
32303230
{
32313231
struct kvm_mmu_page *sp;
32323232

3233-
spin_lock(&vcpu->kvm->mmu_lock);
3233+
write_lock(&vcpu->kvm->mmu_lock);
32343234

32353235
if (make_mmu_pages_available(vcpu)) {
3236-
spin_unlock(&vcpu->kvm->mmu_lock);
3236+
write_unlock(&vcpu->kvm->mmu_lock);
32373237
return INVALID_PAGE;
32383238
}
32393239
sp = kvm_mmu_get_page(vcpu, gfn, gva, level, direct, ACC_ALL);
32403240
++sp->root_count;
32413241

3242-
spin_unlock(&vcpu->kvm->mmu_lock);
3242+
write_unlock(&vcpu->kvm->mmu_lock);
32433243
return __pa(sp->spt);
32443244
}
32453245

@@ -3410,17 +3410,17 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
34103410
!smp_load_acquire(&sp->unsync_children))
34113411
return;
34123412

3413-
spin_lock(&vcpu->kvm->mmu_lock);
3413+
write_lock(&vcpu->kvm->mmu_lock);
34143414
kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
34153415

34163416
mmu_sync_children(vcpu, sp);
34173417

34183418
kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
3419-
spin_unlock(&vcpu->kvm->mmu_lock);
3419+
write_unlock(&vcpu->kvm->mmu_lock);
34203420
return;
34213421
}
34223422

3423-
spin_lock(&vcpu->kvm->mmu_lock);
3423+
write_lock(&vcpu->kvm->mmu_lock);
34243424
kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
34253425

34263426
for (i = 0; i < 4; ++i) {
@@ -3434,7 +3434,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
34343434
}
34353435

34363436
kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
3437-
spin_unlock(&vcpu->kvm->mmu_lock);
3437+
write_unlock(&vcpu->kvm->mmu_lock);
34383438
}
34393439
EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots);
34403440

@@ -3718,7 +3718,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
37183718
return r;
37193719

37203720
r = RET_PF_RETRY;
3721-
spin_lock(&vcpu->kvm->mmu_lock);
3721+
write_lock(&vcpu->kvm->mmu_lock);
37223722
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
37233723
goto out_unlock;
37243724
r = make_mmu_pages_available(vcpu);
@@ -3733,7 +3733,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
37333733
prefault, is_tdp);
37343734

37353735
out_unlock:
3736-
spin_unlock(&vcpu->kvm->mmu_lock);
3736+
write_unlock(&vcpu->kvm->mmu_lock);
37373737
kvm_release_pfn_clean(pfn);
37383738
return r;
37393739
}
@@ -4959,7 +4959,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
49594959
*/
49604960
mmu_topup_memory_caches(vcpu, true);
49614961

4962-
spin_lock(&vcpu->kvm->mmu_lock);
4962+
write_lock(&vcpu->kvm->mmu_lock);
49634963

49644964
gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, &bytes);
49654965

@@ -4991,7 +4991,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
49914991
}
49924992
kvm_mmu_flush_or_zap(vcpu, &invalid_list, remote_flush, local_flush);
49934993
kvm_mmu_audit(vcpu, AUDIT_POST_PTE_WRITE);
4994-
spin_unlock(&vcpu->kvm->mmu_lock);
4994+
write_unlock(&vcpu->kvm->mmu_lock);
49954995
}
49964996

49974997
int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
@@ -5189,14 +5189,14 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
51895189
if (iterator.rmap)
51905190
flush |= fn(kvm, iterator.rmap);
51915191

5192-
if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
5192+
if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
51935193
if (flush && lock_flush_tlb) {
51945194
kvm_flush_remote_tlbs_with_address(kvm,
51955195
start_gfn,
51965196
iterator.gfn - start_gfn + 1);
51975197
flush = false;
51985198
}
5199-
cond_resched_lock(&kvm->mmu_lock);
5199+
cond_resched_rwlock_write(&kvm->mmu_lock);
52005200
}
52015201
}
52025202

@@ -5346,7 +5346,7 @@ static void kvm_zap_obsolete_pages(struct kvm *kvm)
53465346
* be in active use by the guest.
53475347
*/
53485348
if (batch >= BATCH_ZAP_PAGES &&
5349-
cond_resched_lock(&kvm->mmu_lock)) {
5349+
cond_resched_rwlock_write(&kvm->mmu_lock)) {
53505350
batch = 0;
53515351
goto restart;
53525352
}
@@ -5379,7 +5379,7 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm)
53795379
{
53805380
lockdep_assert_held(&kvm->slots_lock);
53815381

5382-
spin_lock(&kvm->mmu_lock);
5382+
write_lock(&kvm->mmu_lock);
53835383
trace_kvm_mmu_zap_all_fast(kvm);
53845384

53855385
/*
@@ -5406,7 +5406,7 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm)
54065406
if (kvm->arch.tdp_mmu_enabled)
54075407
kvm_tdp_mmu_zap_all(kvm);
54085408

5409-
spin_unlock(&kvm->mmu_lock);
5409+
write_unlock(&kvm->mmu_lock);
54105410
}
54115411

54125412
static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
@@ -5448,7 +5448,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
54485448
int i;
54495449
bool flush;
54505450

5451-
spin_lock(&kvm->mmu_lock);
5451+
write_lock(&kvm->mmu_lock);
54525452
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
54535453
slots = __kvm_memslots(kvm, i);
54545454
kvm_for_each_memslot(memslot, slots) {
@@ -5472,7 +5472,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
54725472
kvm_flush_remote_tlbs(kvm);
54735473
}
54745474

5475-
spin_unlock(&kvm->mmu_lock);
5475+
write_unlock(&kvm->mmu_lock);
54765476
}
54775477

54785478
static bool slot_rmap_write_protect(struct kvm *kvm,
@@ -5487,12 +5487,12 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
54875487
{
54885488
bool flush;
54895489

5490-
spin_lock(&kvm->mmu_lock);
5490+
write_lock(&kvm->mmu_lock);
54915491
flush = slot_handle_level(kvm, memslot, slot_rmap_write_protect,
54925492
start_level, KVM_MAX_HUGEPAGE_LEVEL, false);
54935493
if (kvm->arch.tdp_mmu_enabled)
54945494
flush |= kvm_tdp_mmu_wrprot_slot(kvm, memslot, PG_LEVEL_4K);
5495-
spin_unlock(&kvm->mmu_lock);
5495+
write_unlock(&kvm->mmu_lock);
54965496

54975497
/*
54985498
* We can flush all the TLBs out of the mmu lock without TLB
@@ -5552,13 +5552,13 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
55525552
const struct kvm_memory_slot *memslot)
55535553
{
55545554
/* FIXME: const-ify all uses of struct kvm_memory_slot. */
5555-
spin_lock(&kvm->mmu_lock);
5555+
write_lock(&kvm->mmu_lock);
55565556
slot_handle_leaf(kvm, (struct kvm_memory_slot *)memslot,
55575557
kvm_mmu_zap_collapsible_spte, true);
55585558

55595559
if (kvm->arch.tdp_mmu_enabled)
55605560
kvm_tdp_mmu_zap_collapsible_sptes(kvm, memslot);
5561-
spin_unlock(&kvm->mmu_lock);
5561+
write_unlock(&kvm->mmu_lock);
55625562
}
55635563

55645564
void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
@@ -5581,11 +5581,11 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
55815581
{
55825582
bool flush;
55835583

5584-
spin_lock(&kvm->mmu_lock);
5584+
write_lock(&kvm->mmu_lock);
55855585
flush = slot_handle_leaf(kvm, memslot, __rmap_clear_dirty, false);
55865586
if (kvm->arch.tdp_mmu_enabled)
55875587
flush |= kvm_tdp_mmu_clear_dirty_slot(kvm, memslot);
5588-
spin_unlock(&kvm->mmu_lock);
5588+
write_unlock(&kvm->mmu_lock);
55895589

55905590
/*
55915591
* It's also safe to flush TLBs out of mmu lock here as currently this
@@ -5603,12 +5603,12 @@ void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
56035603
{
56045604
bool flush;
56055605

5606-
spin_lock(&kvm->mmu_lock);
5606+
write_lock(&kvm->mmu_lock);
56075607
flush = slot_handle_large_level(kvm, memslot, slot_rmap_write_protect,
56085608
false);
56095609
if (kvm->arch.tdp_mmu_enabled)
56105610
flush |= kvm_tdp_mmu_wrprot_slot(kvm, memslot, PG_LEVEL_2M);
5611-
spin_unlock(&kvm->mmu_lock);
5611+
write_unlock(&kvm->mmu_lock);
56125612

56135613
if (flush)
56145614
kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
@@ -5620,11 +5620,11 @@ void kvm_mmu_slot_set_dirty(struct kvm *kvm,
56205620
{
56215621
bool flush;
56225622

5623-
spin_lock(&kvm->mmu_lock);
5623+
write_lock(&kvm->mmu_lock);
56245624
flush = slot_handle_all_level(kvm, memslot, __rmap_set_dirty, false);
56255625
if (kvm->arch.tdp_mmu_enabled)
56265626
flush |= kvm_tdp_mmu_slot_set_dirty(kvm, memslot);
5627-
spin_unlock(&kvm->mmu_lock);
5627+
write_unlock(&kvm->mmu_lock);
56285628

56295629
if (flush)
56305630
kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
@@ -5637,14 +5637,14 @@ void kvm_mmu_zap_all(struct kvm *kvm)
56375637
LIST_HEAD(invalid_list);
56385638
int ign;
56395639

5640-
spin_lock(&kvm->mmu_lock);
5640+
write_lock(&kvm->mmu_lock);
56415641
restart:
56425642
list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) {
56435643
if (WARN_ON(sp->role.invalid))
56445644
continue;
56455645
if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign))
56465646
goto restart;
5647-
if (cond_resched_lock(&kvm->mmu_lock))
5647+
if (cond_resched_rwlock_write(&kvm->mmu_lock))
56485648
goto restart;
56495649
}
56505650

@@ -5653,7 +5653,7 @@ void kvm_mmu_zap_all(struct kvm *kvm)
56535653
if (kvm->arch.tdp_mmu_enabled)
56545654
kvm_tdp_mmu_zap_all(kvm);
56555655

5656-
spin_unlock(&kvm->mmu_lock);
5656+
write_unlock(&kvm->mmu_lock);
56575657
}
56585658

56595659
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
@@ -5713,7 +5713,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
57135713
continue;
57145714

57155715
idx = srcu_read_lock(&kvm->srcu);
5716-
spin_lock(&kvm->mmu_lock);
5716+
write_lock(&kvm->mmu_lock);
57175717

57185718
if (kvm_has_zapped_obsolete_pages(kvm)) {
57195719
kvm_mmu_commit_zap_page(kvm,
@@ -5724,7 +5724,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
57245724
freed = kvm_mmu_zap_oldest_mmu_pages(kvm, sc->nr_to_scan);
57255725

57265726
unlock:
5727-
spin_unlock(&kvm->mmu_lock);
5727+
write_unlock(&kvm->mmu_lock);
57285728
srcu_read_unlock(&kvm->srcu, idx);
57295729

57305730
/*
@@ -5944,7 +5944,7 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
59445944
ulong to_zap;
59455945

59465946
rcu_idx = srcu_read_lock(&kvm->srcu);
5947-
spin_lock(&kvm->mmu_lock);
5947+
write_lock(&kvm->mmu_lock);
59485948

59495949
ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
59505950
to_zap = ratio ? DIV_ROUND_UP(kvm->stat.nx_lpage_splits, ratio) : 0;
@@ -5969,14 +5969,14 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
59695969
WARN_ON_ONCE(sp->lpage_disallowed);
59705970
}
59715971

5972-
if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
5972+
if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
59735973
kvm_mmu_commit_zap_page(kvm, &invalid_list);
5974-
cond_resched_lock(&kvm->mmu_lock);
5974+
cond_resched_rwlock_write(&kvm->mmu_lock);
59755975
}
59765976
}
59775977
kvm_mmu_commit_zap_page(kvm, &invalid_list);
59785978

5979-
spin_unlock(&kvm->mmu_lock);
5979+
write_unlock(&kvm->mmu_lock);
59805980
srcu_read_unlock(&kvm->srcu, rcu_idx);
59815981
}
59825982

arch/x86/kvm/mmu/page_track.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -184,9 +184,9 @@ kvm_page_track_register_notifier(struct kvm *kvm,
184184

185185
head = &kvm->arch.track_notifier_head;
186186

187-
spin_lock(&kvm->mmu_lock);
187+
write_lock(&kvm->mmu_lock);
188188
hlist_add_head_rcu(&n->node, &head->track_notifier_list);
189-
spin_unlock(&kvm->mmu_lock);
189+
write_unlock(&kvm->mmu_lock);
190190
}
191191
EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier);
192192

@@ -202,9 +202,9 @@ kvm_page_track_unregister_notifier(struct kvm *kvm,
202202

203203
head = &kvm->arch.track_notifier_head;
204204

205-
spin_lock(&kvm->mmu_lock);
205+
write_lock(&kvm->mmu_lock);
206206
hlist_del_rcu(&n->node);
207-
spin_unlock(&kvm->mmu_lock);
207+
write_unlock(&kvm->mmu_lock);
208208
synchronize_srcu(&head->track_srcu);
209209
}
210210
EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier);

0 commit comments

Comments
 (0)