Skip to content

Commit 0959e16

Browse files
committed
s390/mm: Add huge page dirty sync support
To do dirty loging with huge pages, we protect huge pmds in the gmap. When they are written to, we unprotect them and mark them dirty. We introduce the function gmap_test_and_clear_dirty_pmd which handles dirty sync for huge pages. Signed-off-by: Janosch Frank <frankja@linux.ibm.com> Acked-by: David Hildenbrand <david@redhat.com>
1 parent 6a37627 commit 0959e16

File tree

5 files changed

+148
-44
lines changed

5 files changed

+148
-44
lines changed

arch/s390/include/asm/gmap.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
/* Status bits only for huge segment entries */
1717
#define _SEGMENT_ENTRY_GMAP_IN 0x8000 /* invalidation notify bit */
18+
#define _SEGMENT_ENTRY_GMAP_UC 0x4000 /* dirty (migration) */
1819

1920
/**
2021
* struct gmap_struct - guest address space
@@ -139,4 +140,6 @@ void gmap_pte_notify(struct mm_struct *, unsigned long addr, pte_t *,
139140
int gmap_mprotect_notify(struct gmap *, unsigned long start,
140141
unsigned long len, int prot);
141142

143+
void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
144+
unsigned long gaddr, unsigned long vmaddr);
142145
#endif /* _ASM_S390_GMAP_H */

arch/s390/include/asm/pgtable.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1103,7 +1103,8 @@ int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,
11031103
pte_t *sptep, pte_t *tptep, pte_t pte);
11041104
void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep);
11051105

1106-
bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long address);
1106+
bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long address,
1107+
pte_t *ptep);
11071108
int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
11081109
unsigned char key, bool nq);
11091110
int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr,

arch/s390/kvm/kvm-s390.c

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -511,19 +511,30 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
511511
}
512512

513513
static void kvm_s390_sync_dirty_log(struct kvm *kvm,
514-
struct kvm_memory_slot *memslot)
514+
struct kvm_memory_slot *memslot)
515515
{
516+
int i;
516517
gfn_t cur_gfn, last_gfn;
517-
unsigned long address;
518+
unsigned long gaddr, vmaddr;
518519
struct gmap *gmap = kvm->arch.gmap;
520+
DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
519521

520-
/* Loop over all guest pages */
522+
/* Loop over all guest segments */
523+
cur_gfn = memslot->base_gfn;
521524
last_gfn = memslot->base_gfn + memslot->npages;
522-
for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
523-
address = gfn_to_hva_memslot(memslot, cur_gfn);
525+
for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
526+
gaddr = gfn_to_gpa(cur_gfn);
527+
vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
528+
if (kvm_is_error_hva(vmaddr))
529+
continue;
530+
531+
bitmap_zero(bitmap, _PAGE_ENTRIES);
532+
gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
533+
for (i = 0; i < _PAGE_ENTRIES; i++) {
534+
if (test_bit(i, bitmap))
535+
mark_page_dirty(kvm, cur_gfn + i);
536+
}
524537

525-
if (test_and_clear_guest_dirty(gmap->mm, address))
526-
mark_page_dirty(kvm, cur_gfn);
527538
if (fatal_signal_pending(current))
528539
return;
529540
cond_resched();

arch/s390/mm/gmap.c

Lines changed: 122 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,9 @@ void gmap_unlink(struct mm_struct *mm, unsigned long *table,
521521
rcu_read_unlock();
522522
}
523523

524+
static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *old, pmd_t new,
525+
unsigned long gaddr);
526+
524527
/**
525528
* gmap_link - set up shadow page tables to connect a host to a guest address
526529
* @gmap: pointer to guest mapping meta data structure
@@ -541,6 +544,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
541544
p4d_t *p4d;
542545
pud_t *pud;
543546
pmd_t *pmd;
547+
u64 unprot;
544548
int rc;
545549

546550
BUG_ON(gmap_is_shadow(gmap));
@@ -598,12 +602,19 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
598602
vmaddr >> PMD_SHIFT, table);
599603
if (!rc) {
600604
if (pmd_large(*pmd)) {
601-
*table = pmd_val(*pmd) &
602-
_SEGMENT_ENTRY_HARDWARE_BITS_LARGE;
605+
*table = (pmd_val(*pmd) &
606+
_SEGMENT_ENTRY_HARDWARE_BITS_LARGE)
607+
| _SEGMENT_ENTRY_GMAP_UC;
603608
} else
604609
*table = pmd_val(*pmd) &
605610
_SEGMENT_ENTRY_HARDWARE_BITS;
606611
}
612+
} else if (*table & _SEGMENT_ENTRY_PROTECT &&
613+
!(pmd_val(*pmd) & _SEGMENT_ENTRY_PROTECT)) {
614+
unprot = (u64)*table;
615+
unprot &= ~_SEGMENT_ENTRY_PROTECT;
616+
unprot |= _SEGMENT_ENTRY_GMAP_UC;
617+
gmap_pmdp_xchg(gmap, (pmd_t *)table, __pmd(unprot), gaddr);
607618
}
608619
spin_unlock(&gmap->guest_table_lock);
609620
spin_unlock(ptl);
@@ -930,11 +941,23 @@ static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr,
930941
{
931942
int pmd_i = pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID;
932943
int pmd_p = pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT;
944+
pmd_t new = *pmdp;
933945

934946
/* Fixup needed */
935947
if ((pmd_i && (prot != PROT_NONE)) || (pmd_p && (prot == PROT_WRITE)))
936948
return -EAGAIN;
937949

950+
if (prot == PROT_NONE && !pmd_i) {
951+
pmd_val(new) |= _SEGMENT_ENTRY_INVALID;
952+
gmap_pmdp_xchg(gmap, pmdp, new, gaddr);
953+
}
954+
955+
if (prot == PROT_READ && !pmd_p) {
956+
pmd_val(new) &= ~_SEGMENT_ENTRY_INVALID;
957+
pmd_val(new) |= _SEGMENT_ENTRY_PROTECT;
958+
gmap_pmdp_xchg(gmap, pmdp, new, gaddr);
959+
}
960+
938961
if (bits & GMAP_NOTIFY_MPROT)
939962
pmd_val(*pmdp) |= _SEGMENT_ENTRY_GMAP_IN;
940963

@@ -2228,6 +2251,32 @@ static void pmdp_notify_gmap(struct gmap *gmap, pmd_t *pmdp,
22282251
gmap_call_notifier(gmap, gaddr, gaddr + HPAGE_SIZE - 1);
22292252
}
22302253

2254+
/**
2255+
* gmap_pmdp_xchg - exchange a gmap pmd with another
2256+
* @gmap: pointer to the guest address space structure
2257+
* @pmdp: pointer to the pmd entry
2258+
* @new: replacement entry
2259+
* @gaddr: the affected guest address
2260+
*
2261+
* This function is assumed to be called with the guest_table_lock
2262+
* held.
2263+
*/
2264+
static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new,
2265+
unsigned long gaddr)
2266+
{
2267+
gaddr &= HPAGE_MASK;
2268+
pmdp_notify_gmap(gmap, pmdp, gaddr);
2269+
pmd_val(new) &= ~_SEGMENT_ENTRY_GMAP_IN;
2270+
if (MACHINE_HAS_TLB_GUEST)
2271+
__pmdp_idte(gaddr, (pmd_t *)pmdp, IDTE_GUEST_ASCE, gmap->asce,
2272+
IDTE_GLOBAL);
2273+
else if (MACHINE_HAS_IDTE)
2274+
__pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL);
2275+
else
2276+
__pmdp_csp(pmdp);
2277+
*pmdp = new;
2278+
}
2279+
22312280
static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr,
22322281
int purge)
22332282
{
@@ -2243,7 +2292,8 @@ static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr,
22432292
if (pmdp) {
22442293
gaddr = __gmap_segment_gaddr((unsigned long *)pmdp);
22452294
pmdp_notify_gmap(gmap, pmdp, gaddr);
2246-
WARN_ON(pmd_val(*pmdp) & ~_SEGMENT_ENTRY_HARDWARE_BITS_LARGE);
2295+
WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
2296+
_SEGMENT_ENTRY_GMAP_UC));
22472297
if (purge)
22482298
__pmdp_csp(pmdp);
22492299
pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY;
@@ -2296,7 +2346,8 @@ void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr)
22962346
pmdp = (pmd_t *)entry;
22972347
gaddr = __gmap_segment_gaddr(entry);
22982348
pmdp_notify_gmap(gmap, pmdp, gaddr);
2299-
WARN_ON(*entry & ~_SEGMENT_ENTRY_HARDWARE_BITS_LARGE);
2349+
WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
2350+
_SEGMENT_ENTRY_GMAP_UC));
23002351
if (MACHINE_HAS_TLB_GUEST)
23012352
__pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
23022353
gmap->asce, IDTE_LOCAL);
@@ -2330,7 +2381,8 @@ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr)
23302381
pmdp = (pmd_t *)entry;
23312382
gaddr = __gmap_segment_gaddr(entry);
23322383
pmdp_notify_gmap(gmap, pmdp, gaddr);
2333-
WARN_ON(*entry & ~_SEGMENT_ENTRY_HARDWARE_BITS_LARGE);
2384+
WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
2385+
_SEGMENT_ENTRY_GMAP_UC));
23342386
if (MACHINE_HAS_TLB_GUEST)
23352387
__pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
23362388
gmap->asce, IDTE_GLOBAL);
@@ -2346,6 +2398,71 @@ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr)
23462398
}
23472399
EXPORT_SYMBOL_GPL(gmap_pmdp_idte_global);
23482400

2401+
/**
2402+
* gmap_test_and_clear_dirty_pmd - test and reset segment dirty status
2403+
* @gmap: pointer to guest address space
2404+
* @pmdp: pointer to the pmd to be tested
2405+
* @gaddr: virtual address in the guest address space
2406+
*
2407+
* This function is assumed to be called with the guest_table_lock
2408+
* held.
2409+
*/
2410+
bool gmap_test_and_clear_dirty_pmd(struct gmap *gmap, pmd_t *pmdp,
2411+
unsigned long gaddr)
2412+
{
2413+
if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
2414+
return false;
2415+
2416+
/* Already protected memory, which did not change is clean */
2417+
if (pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT &&
2418+
!(pmd_val(*pmdp) & _SEGMENT_ENTRY_GMAP_UC))
2419+
return false;
2420+
2421+
/* Clear UC indication and reset protection */
2422+
pmd_val(*pmdp) &= ~_SEGMENT_ENTRY_GMAP_UC;
2423+
gmap_protect_pmd(gmap, gaddr, pmdp, PROT_READ, 0);
2424+
return true;
2425+
}
2426+
2427+
/**
2428+
* gmap_sync_dirty_log_pmd - set bitmap based on dirty status of segment
2429+
* @gmap: pointer to guest address space
2430+
* @bitmap: dirty bitmap for this pmd
2431+
* @gaddr: virtual address in the guest address space
2432+
* @vmaddr: virtual address in the host address space
2433+
*
2434+
* This function is assumed to be called with the guest_table_lock
2435+
* held.
2436+
*/
2437+
void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4],
2438+
unsigned long gaddr, unsigned long vmaddr)
2439+
{
2440+
int i;
2441+
pmd_t *pmdp;
2442+
pte_t *ptep;
2443+
spinlock_t *ptl;
2444+
2445+
pmdp = gmap_pmd_op_walk(gmap, gaddr);
2446+
if (!pmdp)
2447+
return;
2448+
2449+
if (pmd_large(*pmdp)) {
2450+
if (gmap_test_and_clear_dirty_pmd(gmap, pmdp, gaddr))
2451+
bitmap_fill(bitmap, _PAGE_ENTRIES);
2452+
} else {
2453+
for (i = 0; i < _PAGE_ENTRIES; i++, vmaddr += PAGE_SIZE) {
2454+
ptep = pte_alloc_map_lock(gmap->mm, pmdp, vmaddr, &ptl);
2455+
if (!ptep)
2456+
continue;
2457+
if (ptep_test_and_clear_uc(gmap->mm, vmaddr, ptep))
2458+
set_bit(i, bitmap);
2459+
spin_unlock(ptl);
2460+
}
2461+
}
2462+
gmap_pmd_op_end(gmap, pmdp);
2463+
}
2464+
EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd);
2465+
23492466
static inline void thp_split_mm(struct mm_struct *mm)
23502467
{
23512468
#ifdef CONFIG_TRANSPARENT_HUGEPAGE

arch/s390/mm/pgtable.c

Lines changed: 3 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -704,40 +704,14 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
704704
/*
705705
* Test and reset if a guest page is dirty
706706
*/
707-
bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
707+
bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr,
708+
pte_t *ptep)
708709
{
709-
spinlock_t *ptl;
710-
pgd_t *pgd;
711-
p4d_t *p4d;
712-
pud_t *pud;
713-
pmd_t *pmd;
714710
pgste_t pgste;
715-
pte_t *ptep;
716711
pte_t pte;
717712
bool dirty;
718713
int nodat;
719714

720-
pgd = pgd_offset(mm, addr);
721-
p4d = p4d_alloc(mm, pgd, addr);
722-
if (!p4d)
723-
return false;
724-
pud = pud_alloc(mm, p4d, addr);
725-
if (!pud)
726-
return false;
727-
pmd = pmd_alloc(mm, pud, addr);
728-
if (!pmd)
729-
return false;
730-
/* We can't run guests backed by huge pages, but userspace can
731-
* still set them up and then try to migrate them without any
732-
* migration support.
733-
*/
734-
if (pmd_large(*pmd))
735-
return true;
736-
737-
ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl);
738-
if (unlikely(!ptep))
739-
return false;
740-
741715
pgste = pgste_get_lock(ptep);
742716
dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
743717
pgste_val(pgste) &= ~PGSTE_UC_BIT;
@@ -753,11 +727,9 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
753727
*ptep = pte;
754728
}
755729
pgste_set_unlock(ptep, pgste);
756-
757-
spin_unlock(ptl);
758730
return dirty;
759731
}
760-
EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty);
732+
EXPORT_SYMBOL_GPL(ptep_test_and_clear_uc);
761733

762734
int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
763735
unsigned char key, bool nq)

0 commit comments

Comments
 (0)