@@ -427,15 +427,19 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
427427 */
428428 if (!was_present && !is_present ) {
429429 /*
430- * If this change does not involve a MMIO SPTE, it is
431- * unexpected. Log the change, though it should not impact the
432- * guest since both the former and current SPTEs are nonpresent.
430+ * If this change does not involve a MMIO SPTE or removed SPTE,
431+ * it is unexpected. Log the change, though it should not
432+ * impact the guest since both the former and current SPTEs
433+ * are nonpresent.
433434 */
434- if (WARN_ON (!is_mmio_spte (old_spte ) && !is_mmio_spte (new_spte )))
435+ if (WARN_ON (!is_mmio_spte (old_spte ) &&
436+ !is_mmio_spte (new_spte ) &&
437+ !is_removed_spte (new_spte )))
435438 pr_err ("Unexpected SPTE change! Nonpresent SPTEs\n"
436439 "should not be replaced with another,\n"
437440 "different nonpresent SPTE, unless one or both\n"
438- "are MMIO SPTEs.\n"
441+ "are MMIO SPTEs, or the new SPTE is\n"
442+ "a temporary removed SPTE.\n"
439443 "as_id: %d gfn: %llx old_spte: %llx new_spte: %llx level: %d" ,
440444 as_id , gfn , old_spte , new_spte , level );
441445 return ;
@@ -486,6 +490,13 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
486490
487491 lockdep_assert_held_read (& kvm -> mmu_lock );
488492
493+ /*
494+ * Do not change removed SPTEs. Only the thread that froze the SPTE
495+ * may modify it.
496+ */
497+ if (iter -> old_spte == REMOVED_SPTE )
498+ return false;
499+
489500 if (cmpxchg64 (rcu_dereference (iter -> sptep ), iter -> old_spte ,
490501 new_spte ) != iter -> old_spte )
491502 return false;
@@ -496,6 +507,34 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
496507 return true;
497508}
498509
510+ static inline bool tdp_mmu_zap_spte_atomic (struct kvm * kvm ,
511+ struct tdp_iter * iter )
512+ {
513+ /*
514+ * Freeze the SPTE by setting it to a special,
515+ * non-present value. This will stop other threads from
516+ * immediately installing a present entry in its place
517+ * before the TLBs are flushed.
518+ */
519+ if (!tdp_mmu_set_spte_atomic (kvm , iter , REMOVED_SPTE ))
520+ return false;
521+
522+ kvm_flush_remote_tlbs_with_address (kvm , iter -> gfn ,
523+ KVM_PAGES_PER_HPAGE (iter -> level ));
524+
525+ /*
526+ * No other thread can overwrite the removed SPTE as they
527+ * must either wait on the MMU lock or use
528+ * tdp_mmu_set_spte_atomic which will not overrite the
529+ * special removed SPTE value. No bookkeeping is needed
530+ * here since the SPTE is going from non-present
531+ * to non-present.
532+ */
533+ WRITE_ONCE (* iter -> sptep , 0 );
534+
535+ return true;
536+ }
537+
499538
500539/*
501540 * __tdp_mmu_set_spte - Set a TDP MMU SPTE and handle the associated bookkeeping
@@ -523,6 +562,15 @@ static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
523562
524563 lockdep_assert_held_write (& kvm -> mmu_lock );
525564
565+ /*
566+ * No thread should be using this function to set SPTEs to the
567+ * temporary removed SPTE value.
568+ * If operating under the MMU lock in read mode, tdp_mmu_set_spte_atomic
569+ * should be used. If operating under the MMU lock in write mode, the
570+ * use of the removed SPTE should not be necessary.
571+ */
572+ WARN_ON (iter -> old_spte == REMOVED_SPTE );
573+
526574 WRITE_ONCE (* rcu_dereference (iter -> sptep ), new_spte );
527575
528576 __handle_changed_spte (kvm , as_id , iter -> gfn , iter -> old_spte , new_spte ,
@@ -790,12 +838,9 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
790838 */
791839 if (is_shadow_present_pte (iter .old_spte ) &&
792840 is_large_pte (iter .old_spte )) {
793- if (!tdp_mmu_set_spte_atomic (vcpu -> kvm , & iter , 0 ))
841+ if (!tdp_mmu_zap_spte_atomic (vcpu -> kvm , & iter ))
794842 break ;
795843
796- kvm_flush_remote_tlbs_with_address (vcpu -> kvm , iter .gfn ,
797- KVM_PAGES_PER_HPAGE (iter .level ));
798-
799844 /*
800845 * The iter must explicitly re-read the spte here
801846 * because the new value informs the !present
0 commit comments