Skip to content

Commit 4af0b13

Browse files
committed
KVM: PVM: Introduce ASID to manage host PCID for guest
ntroduce ASID to manage the host PCID for the guest, where an ASID is a group of host PCIDs. Each vCPU will be assigned an ASID when it is loaded onto a pCPU, and each guest PCID on this vCPU is directly mapped to its host PCID by adding the ASID of the vCPU. This approach can improve direct switching and reduce the complexity of host PCID allocation for the guest. However, this conflicts with the global ASID allocation in the upstream, which will be addressed later. Signed-off-by: Hou Wenlong <houwenlong.hwl@antgroup.com> Link: #20
1 parent aa13497 commit 4af0b13

File tree

2 files changed

+98
-162
lines changed

2 files changed

+98
-162
lines changed

arch/x86/kvm/pvm/pvm.c

Lines changed: 84 additions & 158 deletions
Original file line numberDiff line numberDiff line change
@@ -540,155 +540,57 @@ static void pvm_switch_to_host(struct vcpu_pvm *pvm)
540540
preempt_enable();
541541
}
542542

543-
struct host_pcid_one {
544-
/*
545-
* It is struct vcpu_pvm *pvm, but it is not allowed to be
546-
* dereferenced since it might be freed.
547-
*/
548-
void *pvm;
549-
u64 root_hpa;
543+
struct pvm_asid_data {
544+
u64 asid_generation;
545+
u32 max_asid;
546+
u32 next_asid;
547+
u32 min_asid;
550548
};
551549

552-
struct host_pcid_state {
553-
struct host_pcid_one pairs[NUM_HOST_PCID_FOR_GUEST];
554-
int evict_next_round_robin;
555-
};
556-
557-
static DEFINE_PER_CPU(struct host_pcid_state, pvm_tlb_state);
558-
559-
static void host_pcid_flush_all(struct vcpu_pvm *pvm)
560-
{
561-
struct host_pcid_state *tlb_state = this_cpu_ptr(&pvm_tlb_state);
562-
int i;
563-
564-
for (i = 0; i < NUM_HOST_PCID_FOR_GUEST; i++) {
565-
if (tlb_state->pairs[i].pvm == pvm)
566-
tlb_state->pairs[i].pvm = NULL;
567-
}
568-
}
569-
570-
static inline unsigned int host_pcid_to_index(unsigned int host_pcid)
571-
{
572-
return host_pcid & ~HOST_PCID_TAG_FOR_GUEST;
573-
}
550+
static DEFINE_PER_CPU(struct pvm_asid_data, pvm_asid);
574551

575-
static inline int index_to_host_pcid(int index)
576-
{
577-
return index | HOST_PCID_TAG_FOR_GUEST;
578-
}
579-
580-
/*
581-
* Free the uncached guest pcid (not in mmu->root nor mmu->prev_root), so
582-
* that the next allocation would not evict a clean one.
583-
*
584-
* It would be better if kvm.ko notifies us when a root_pgd is freed
585-
* from the cache.
586-
*
587-
* Returns a freed index or -1 if nothing is freed.
588-
*/
589-
static int host_pcid_free_uncached(struct vcpu_pvm *pvm)
552+
static void __pvm_hwtlb_flush_all(void)
590553
{
591-
/* It is allowed to do nothing. */
592-
return -1;
554+
__flush_tlb_all();
593555
}
594556

595-
static int host_pcid_find(struct vcpu_pvm *pvm, u64 root_hpa)
557+
static void update_asid(struct vcpu_pvm *pvm)
596558
{
597-
struct host_pcid_state *tlb_state = this_cpu_ptr(&pvm_tlb_state);
598-
int i;
559+
struct pvm_asid_data *asid_data = this_cpu_ptr(&pvm_asid);
599560

600-
/* find if it is allocated. */
601-
for (i = 0; i < NUM_HOST_PCID_FOR_GUEST; i++) {
602-
struct host_pcid_one *tlb = &tlb_state->pairs[i];
561+
if (pvm->asid_generation == asid_data->asid_generation)
562+
return;
603563

604-
if (tlb->root_hpa == root_hpa && tlb->pvm == pvm)
605-
return index_to_host_pcid(i);
564+
if (asid_data->next_asid > asid_data->max_asid) {
565+
++asid_data->asid_generation;
566+
if (!asid_data->asid_generation)
567+
asid_data->asid_generation = PVM_ASID_GEN_INIT;
568+
asid_data->next_asid = asid_data->min_asid;
569+
__pvm_hwtlb_flush_all();
606570
}
607571

608-
return 0;
572+
pvm->asid_generation = asid_data->asid_generation;
573+
pvm->asid = asid_data->next_asid++;
609574
}
610575

611-
/*
612-
* Get a host pcid of the current pCPU for the specific guest pgd.
613-
* PVM vTLB is guest pgd tagged.
614-
*/
615-
static int host_pcid_get(struct vcpu_pvm *pvm, u64 root_hpa, bool *flush)
576+
static inline u32 guest_pcid_to_host_pcid(struct vcpu_pvm *pvm, u32 guest_pcid, bool is_smod)
616577
{
617-
struct host_pcid_state *tlb_state = this_cpu_ptr(&pvm_tlb_state);
618-
int i, j = -1;
619-
620-
/* find if it is allocated. */
621-
for (i = 0; i < NUM_HOST_PCID_FOR_GUEST; i++) {
622-
struct host_pcid_one *tlb = &tlb_state->pairs[i];
623-
624-
if (tlb->root_hpa == root_hpa && tlb->pvm == pvm)
625-
return index_to_host_pcid(i);
578+
u32 pcid;
626579

627-
/* if it has no owner, allocate it if not found. */
628-
if (!tlb->pvm)
629-
j = i;
630-
}
631-
632-
/*
633-
* Fallback to:
634-
* use the fallback recorded in the above loop.
635-
* use a freed uncached.
636-
* evict one (which might be still usable) by round-robin policy.
637-
*/
638-
if (j < 0)
639-
j = host_pcid_free_uncached(pvm);
640-
if (j < 0) {
641-
j = tlb_state->evict_next_round_robin;
642-
if (++tlb_state->evict_next_round_robin == NUM_HOST_PCID_FOR_GUEST)
643-
tlb_state->evict_next_round_robin = 0;
644-
}
580+
if (guest_pcid & ~PVM_GUEST_PCID_MASK)
581+
guest_pcid = 0;
582+
pcid = (pvm->asid << PVM_ASID_SHIFT) | (guest_pcid & PVM_GUEST_PCID_MASK);
583+
if (!is_smod)
584+
pcid |= PVM_GUEST_PTI_PCID_MASK;
645585

646-
/* associate the host pcid to the guest */
647-
tlb_state->pairs[j].pvm = pvm;
648-
tlb_state->pairs[j].root_hpa = root_hpa;
649-
650-
*flush = true;
651-
return index_to_host_pcid(j);
652-
}
653-
654-
static void host_pcid_free(struct vcpu_pvm *pvm, u64 root_hpa)
655-
{
656-
struct host_pcid_state *tlb_state = this_cpu_ptr(&pvm_tlb_state);
657-
int i;
658-
659-
for (i = 0; i < NUM_HOST_PCID_FOR_GUEST; i++) {
660-
struct host_pcid_one *tlb = &tlb_state->pairs[i];
661-
662-
if (tlb->root_hpa == root_hpa && tlb->pvm == pvm) {
663-
tlb->pvm = NULL;
664-
return;
665-
}
666-
}
667-
}
668-
669-
static inline void *host_pcid_owner(int host_pcid)
670-
{
671-
return this_cpu_read(pvm_tlb_state.pairs[host_pcid_to_index(host_pcid)].pvm);
672-
}
673-
674-
static inline u64 host_pcid_root(int host_pcid)
675-
{
676-
return this_cpu_read(pvm_tlb_state.pairs[host_pcid_to_index(host_pcid)].root_hpa);
677-
}
678-
679-
static void __pvm_hwtlb_flush_all(struct vcpu_pvm *pvm)
680-
{
681-
if (static_cpu_has(X86_FEATURE_PCID))
682-
host_pcid_flush_all(pvm);
586+
return pcid;
683587
}
684588

685589
static void pvm_flush_hwtlb(struct kvm_vcpu *vcpu)
686590
{
687591
struct vcpu_pvm *pvm = to_pvm(vcpu);
688592

689-
get_cpu();
690-
__pvm_hwtlb_flush_all(pvm);
691-
put_cpu();
593+
pvm->asid_generation = PVM_ASID_GEN_RESERVED;
692594
}
693595

694596
static void pvm_flush_hwtlb_guest(struct kvm_vcpu *vcpu)
@@ -706,37 +608,50 @@ static void pvm_flush_hwtlb_guest(struct kvm_vcpu *vcpu)
706608

707609
static void pvm_flush_hwtlb_current(struct kvm_vcpu *vcpu)
708610
{
611+
struct vcpu_pvm *pvm = to_pvm(vcpu);
612+
709613
/* No flush required if the current context is invalid. */
710614
if (!VALID_PAGE(vcpu->arch.mmu->root.hpa))
711615
return;
712616

713617
if (static_cpu_has(X86_FEATURE_PCID)) {
714618
get_cpu();
715-
host_pcid_free(to_pvm(vcpu), vcpu->arch.mmu->root.hpa);
619+
if (this_cpu_has(X86_FEATURE_INVPCID)) {
620+
u32 pcid = kvm_get_pcid(vcpu, vcpu->arch.cr3);
621+
622+
pcid = guest_pcid_to_host_pcid(pvm, pcid, true);
623+
invpcid_flush_single_context(pcid);
624+
invpcid_flush_single_context(pcid ^ PVM_GUEST_PTI_PCID_MASK);
625+
} else {
626+
pvm_flush_hwtlb(vcpu);
627+
}
716628
put_cpu();
717629
}
718630
}
719631

720632
static void pvm_flush_hwtlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
721633
{
634+
struct kvm_mmu *mmu = vcpu->arch.mmu;
722635
struct vcpu_pvm *pvm = to_pvm(vcpu);
723-
int max = MIN_HOST_PCID_FOR_GUEST + NUM_HOST_PCID_FOR_GUEST;
724636
int i;
725637

726638
if (!static_cpu_has(X86_FEATURE_PCID))
727639
return;
728640

729641
get_cpu();
730642
if (!this_cpu_has(X86_FEATURE_INVPCID)) {
731-
host_pcid_flush_all(pvm);
643+
pvm_flush_hwtlb(vcpu);
732644
put_cpu();
733645
return;
734646
}
735647

736-
host_pcid_free_uncached(pvm);
737-
for (i = MIN_HOST_PCID_FOR_GUEST; i < max; i++) {
738-
if (host_pcid_owner(i) == pvm)
739-
invpcid_flush_one(i, addr);
648+
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
649+
if (VALID_PAGE(mmu->prev_roots[i].hpa)) {
650+
u32 pcid = kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd);
651+
bool is_smod = root_to_sp(mmu->prev_roots[i].hpa)->role.word & ACC_USER_MASK;
652+
653+
invpcid_flush_one(guest_pcid_to_host_pcid(pvm, pcid, is_smod), addr);
654+
}
740655
}
741656

742657
put_cpu();
@@ -753,7 +668,7 @@ static u64 get_switch_hw_cr3(struct vcpu_pvm *pvm)
753668
if (is_root_usable(&mmu->prev_roots[i], pvm->vcpu.arch.cr3, switch_role)) {
754669
if (i != 0)
755670
swap(mmu->prev_roots[0], mmu->prev_roots[i]);
756-
return mmu->prev_roots[0].hpa;
671+
return __sme_set(mmu->prev_roots[0].hpa);
757672
}
758673
}
759674

@@ -762,29 +677,31 @@ static u64 get_switch_hw_cr3(struct vcpu_pvm *pvm)
762677

763678
static void pvm_set_host_cr3_for_guest(struct vcpu_pvm *pvm)
764679
{
765-
u64 hw_cr3 = pvm->vcpu.arch.mmu->root.hpa;
766-
u64 enter_hw_cr3 = hw_cr3;
680+
u64 hw_cr3 = __sme_set(pvm->vcpu.arch.mmu->root.hpa);
767681
u64 switch_hw_cr3 = get_switch_hw_cr3(pvm);
768682

769683
if (static_cpu_has(X86_FEATURE_PCID)) {
770-
bool flush = false;
771-
u32 host_pcid = host_pcid_get(pvm, hw_cr3, &flush);
772-
773-
enter_hw_cr3 |= host_pcid;
774-
if (!flush)
775-
enter_hw_cr3 |= CR3_NOFLUSH;
776-
hw_cr3 |= host_pcid | CR3_NOFLUSH;
777-
778-
if (switch_hw_cr3 != INVALID_PAGE) {
779-
host_pcid = host_pcid_find(pvm, switch_hw_cr3);
780-
if (!host_pcid)
781-
switch_hw_cr3 = INVALID_PAGE;
782-
else
783-
switch_hw_cr3 |= host_pcid | CR3_NOFLUSH;
684+
u32 pcid = pvm->vcpu.arch.cr3 & X86_CR3_PCID_MASK;
685+
686+
update_asid(pvm);
687+
688+
pcid = guest_pcid_to_host_pcid(pvm, pcid, is_smod(pvm));
689+
hw_cr3 |= pcid | CR3_NOFLUSH;
690+
if (switch_hw_cr3 != INVALID_PAGE)
691+
switch_hw_cr3 |= (pcid ^ PVM_GUEST_PTI_PCID_MASK) | CR3_NOFLUSH;
692+
693+
/*
694+
* if guest PCID is bigger than 7, use the fallback guest PCID
695+
* 0, which is assumed to always be force flushed.
696+
*/
697+
if (unlikely(!(pcid & PVM_GUEST_PCID_INDEX_MASK))) {
698+
hw_cr3 &= ~CR3_NOFLUSH;
699+
if (switch_hw_cr3 != INVALID_PAGE)
700+
switch_hw_cr3 &= ~CR3_NOFLUSH;
784701
}
785702
}
786703

787-
this_cpu_write(cpu_tss_rw.tss_ex.enter_cr3, enter_hw_cr3);
704+
this_cpu_write(cpu_tss_rw.tss_ex.enter_cr3, hw_cr3);
788705

789706
if (is_smod(pvm)) {
790707
this_cpu_write(cpu_tss_rw.tss_ex.smod_cr3, hw_cr3);
@@ -845,7 +762,7 @@ static void pvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
845762
__this_cpu_write(active_pvm_vcpu, pvm);
846763

847764
if (vcpu->cpu != cpu)
848-
__pvm_hwtlb_flush_all(pvm);
765+
pvm_flush_hwtlb(vcpu);
849766

850767
indirect_branch_prediction_barrier();
851768
}
@@ -854,7 +771,6 @@ static void pvm_vcpu_put(struct kvm_vcpu *vcpu)
854771
{
855772
struct vcpu_pvm *pvm = to_pvm(vcpu);
856773

857-
host_pcid_free_uncached(pvm);
858774
pvm_prepare_switch_to_host(pvm);
859775
}
860776

@@ -2869,9 +2785,20 @@ static int pvm_vm_init(struct kvm *kvm)
28692785
return 0;
28702786
}
28712787

2788+
static void pvm_asid_data_init(void)
2789+
{
2790+
struct pvm_asid_data *asid_data = this_cpu_ptr(&pvm_asid);
2791+
2792+
asid_data->asid_generation = PVM_ASID_GEN_INIT;
2793+
asid_data->max_asid = PVM_ASID_MAX;
2794+
asid_data->next_asid = PVM_ASID_MIN;
2795+
asid_data->min_asid = PVM_ASID_MIN;
2796+
__pvm_hwtlb_flush_all();
2797+
}
2798+
28722799
static int hardware_enable(void)
28732800
{
2874-
/* Nothing to do */
2801+
pvm_asid_data_init();
28752802
return 0;
28762803
}
28772804

@@ -3222,10 +3149,9 @@ module_exit(pvm_exit);
32223149

32233150
static int __init hardware_cap_check(void)
32243151
{
3225-
BUILD_BUG_ON(MIN_HOST_PCID_FOR_GUEST <= TLB_NR_DYN_ASIDS);
3152+
BUILD_BUG_ON(NUM_PVM_GUEST_PCID_INDEX <= TLB_NR_DYN_ASIDS);
32263153
#ifdef CONFIG_PAGE_TABLE_ISOLATION
3227-
BUILD_BUG_ON((MIN_HOST_PCID_FOR_GUEST + NUM_HOST_PCID_FOR_GUEST) >=
3228-
(1 << X86_CR3_PTI_PCID_USER_BIT));
3154+
BUILD_BUG_ON(PVM_GUEST_PTI_PCID_BIT != X86_CR3_PTI_PCID_USER_BIT);
32293155
#endif
32303156

32313157
/*

arch/x86/kvm/pvm/pvm.h

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,17 @@ extern u64 *host_mmu_root_pgd;
7676
void host_mmu_destroy(void);
7777
int host_mmu_init(void);
7878

79-
#define HOST_PCID_TAG_FOR_GUEST (32)
80-
81-
#define MIN_HOST_PCID_FOR_GUEST HOST_PCID_TAG_FOR_GUEST
82-
#define NUM_HOST_PCID_FOR_GUEST HOST_PCID_TAG_FOR_GUEST
79+
#define PVM_ASID_SHIFT 3
80+
#define NUM_PVM_GUEST_PCID_INDEX (1U << PVM_ASID_SHIFT)
81+
#define PVM_GUEST_PTI_PCID_BIT 11
82+
#define PVM_GUEST_PTI_PCID_MASK (1U << PVM_GUEST_PTI_PCID_BIT)
83+
#define PVM_GUEST_PCID_INDEX_MASK (NUM_PVM_GUEST_PCID_INDEX - 1)
84+
#define PVM_GUEST_PCID_MASK (PVM_GUEST_PCID_INDEX_MASK | PVM_GUEST_PTI_PCID_MASK)
85+
86+
#define PVM_ASID_MIN 1
87+
#define PVM_ASID_MAX (((1U << PVM_GUEST_PTI_PCID_BIT) - 1) / NUM_PVM_GUEST_PCID_INDEX)
88+
#define PVM_ASID_GEN_RESERVED 0
89+
#define PVM_ASID_GEN_INIT 1
8390

8491
struct vcpu_pvm {
8592
struct kvm_vcpu vcpu;
@@ -111,6 +118,9 @@ struct vcpu_pvm {
111118

112119
struct gfn_to_pfn_cache pvcs_gpc;
113120

121+
u32 asid;
122+
u64 asid_generation;
123+
114124
// emulated x86 msrs
115125
u64 msr_lstar;
116126
u64 msr_syscall_mask;

0 commit comments

Comments
 (0)