Skip to content

Commit 11bb6c5

Browse files
kirylsfrothwell
authored andcommitted
mm: prepare page_referenced() and page_idle to new THP refcounting
Both page_referenced() and page_idle_clear_pte_refs_one() assume that THP can only be mapped with PMD, so there's no reason to look on PTEs for PageTransHuge() pages. That's no true anymore: THP can be mapped with PTEs too. The patch removes PageTransHuge() test from the functions and opencode page table check. Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Vladimir Davydov <vdavydov@parallels.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Hugh Dickins <hughd@google.com> Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Sasha Levin <sasha.levin@oracle.com> Cc: Minchan Kim <minchan@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent 8beb2d5 commit 11bb6c5

File tree

5 files changed

+156
-104
lines changed

5 files changed

+156
-104
lines changed

include/linux/huge_mm.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,6 @@ enum transparent_hugepage_flag {
4848
#endif
4949
};
5050

51-
extern pmd_t *page_check_address_pmd(struct page *page,
52-
struct mm_struct *mm,
53-
unsigned long address,
54-
spinlock_t **ptl);
55-
5651
#define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT)
5752
#define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER)
5853

include/linux/mm.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,25 @@ static inline int page_mapcount(struct page *page)
448448
return ret;
449449
}
450450

451+
static inline int total_mapcount(struct page *page)
452+
{
453+
int i, ret;
454+
455+
VM_BUG_ON_PAGE(PageTail(page), page);
456+
457+
if (likely(!PageCompound(page)))
458+
return atomic_read(&page->_mapcount) + 1;
459+
460+
ret = compound_mapcount(page);
461+
if (PageHuge(page))
462+
return ret;
463+
for (i = 0; i < HPAGE_PMD_NR; i++)
464+
ret += atomic_read(&page[i]._mapcount) + 1;
465+
if (PageDoubleMap(page))
466+
ret -= HPAGE_PMD_NR;
467+
return ret;
468+
}
469+
451470
static inline int page_count(struct page *page)
452471
{
453472
return atomic_read(&compound_head(page)->_count);

mm/huge_memory.c

Lines changed: 0 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1679,46 +1679,6 @@ bool __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
16791679
return false;
16801680
}
16811681

1682-
/*
1683-
* This function returns whether a given @page is mapped onto the @address
1684-
* in the virtual space of @mm.
1685-
*
1686-
* When it's true, this function returns *pmd with holding the page table lock
1687-
* and passing it back to the caller via @ptl.
1688-
* If it's false, returns NULL without holding the page table lock.
1689-
*/
1690-
pmd_t *page_check_address_pmd(struct page *page,
1691-
struct mm_struct *mm,
1692-
unsigned long address,
1693-
spinlock_t **ptl)
1694-
{
1695-
pgd_t *pgd;
1696-
pud_t *pud;
1697-
pmd_t *pmd;
1698-
1699-
if (address & ~HPAGE_PMD_MASK)
1700-
return NULL;
1701-
1702-
pgd = pgd_offset(mm, address);
1703-
if (!pgd_present(*pgd))
1704-
return NULL;
1705-
pud = pud_offset(pgd, address);
1706-
if (!pud_present(*pud))
1707-
return NULL;
1708-
pmd = pmd_offset(pud, address);
1709-
1710-
*ptl = pmd_lock(mm, pmd);
1711-
if (!pmd_present(*pmd))
1712-
goto unlock;
1713-
if (pmd_page(*pmd) != page)
1714-
goto unlock;
1715-
if (pmd_trans_huge(*pmd))
1716-
return pmd;
1717-
unlock:
1718-
spin_unlock(*ptl);
1719-
return NULL;
1720-
}
1721-
17221682
#define VM_NO_THP (VM_SPECIAL | VM_HUGETLB | VM_SHARED | VM_MAYSHARE)
17231683

17241684
int hugepage_madvise(struct vm_area_struct *vma,
@@ -3156,20 +3116,6 @@ static void unfreeze_page(struct anon_vma *anon_vma, struct page *page)
31563116
}
31573117
}
31583118

3159-
static int total_mapcount(struct page *page)
3160-
{
3161-
int i, ret;
3162-
3163-
ret = compound_mapcount(page);
3164-
for (i = 0; i < HPAGE_PMD_NR; i++)
3165-
ret += atomic_read(&page[i]._mapcount) + 1;
3166-
3167-
if (PageDoubleMap(page))
3168-
ret -= HPAGE_PMD_NR;
3169-
3170-
return ret;
3171-
}
3172-
31733119
static int __split_huge_page_tail(struct page *head, int tail,
31743120
struct lruvec *lruvec, struct list_head *list)
31753121
{

mm/page_idle.c

Lines changed: 55 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -56,23 +56,69 @@ static int page_idle_clear_pte_refs_one(struct page *page,
5656
{
5757
struct mm_struct *mm = vma->vm_mm;
5858
spinlock_t *ptl;
59+
pgd_t *pgd;
60+
pud_t *pud;
5961
pmd_t *pmd;
6062
pte_t *pte;
6163
bool referenced = false;
6264

63-
if (unlikely(PageTransHuge(page))) {
64-
pmd = page_check_address_pmd(page, mm, addr, &ptl);
65-
if (pmd) {
66-
referenced = pmdp_clear_young_notify(vma, addr, pmd);
65+
pgd = pgd_offset(mm, addr);
66+
if (!pgd_present(*pgd))
67+
return SWAP_AGAIN;
68+
pud = pud_offset(pgd, addr);
69+
if (!pud_present(*pud))
70+
return SWAP_AGAIN;
71+
pmd = pmd_offset(pud, addr);
72+
73+
if (pmd_trans_huge(*pmd)) {
74+
ptl = pmd_lock(mm, pmd);
75+
if (!pmd_present(*pmd))
76+
goto unlock_pmd;
77+
if (unlikely(!pmd_trans_huge(*pmd))) {
6778
spin_unlock(ptl);
79+
goto map_pte;
6880
}
81+
82+
if (pmd_page(*pmd) != page)
83+
goto unlock_pmd;
84+
85+
referenced = pmdp_clear_young_notify(vma, addr, pmd);
86+
spin_unlock(ptl);
87+
goto found;
88+
unlock_pmd:
89+
spin_unlock(ptl);
90+
return SWAP_AGAIN;
6991
} else {
70-
pte = page_check_address(page, mm, addr, &ptl, 0);
71-
if (pte) {
72-
referenced = ptep_clear_young_notify(vma, addr, pte);
73-
pte_unmap_unlock(pte, ptl);
74-
}
92+
pmd_t pmde = *pmd;
93+
barrier();
94+
if (!pmd_present(pmde) || pmd_trans_huge(pmde))
95+
return SWAP_AGAIN;
96+
97+
}
98+
map_pte:
99+
pte = pte_offset_map(pmd, addr);
100+
if (!pte_present(*pte)) {
101+
pte_unmap(pte);
102+
return SWAP_AGAIN;
75103
}
104+
105+
ptl = pte_lockptr(mm, pmd);
106+
spin_lock(ptl);
107+
108+
if (!pte_present(*pte)) {
109+
pte_unmap_unlock(pte, ptl);
110+
return SWAP_AGAIN;
111+
}
112+
113+
/* THP can be referenced by any subpage */
114+
if (pte_pfn(*pte) - page_to_pfn(page) >= hpage_nr_pages(page)) {
115+
pte_unmap_unlock(pte, ptl);
116+
return SWAP_AGAIN;
117+
}
118+
119+
referenced = ptep_clear_young_notify(vma, addr, pte);
120+
pte_unmap_unlock(pte, ptl);
121+
found:
76122
if (referenced) {
77123
clear_page_idle(page);
78124
/*

mm/rmap.c

Lines changed: 82 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -814,58 +814,104 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
814814
spinlock_t *ptl;
815815
int referenced = 0;
816816
struct page_referenced_arg *pra = arg;
817+
pgd_t *pgd;
818+
pud_t *pud;
819+
pmd_t *pmd;
820+
pte_t *pte;
817821

818-
if (unlikely(PageTransHuge(page))) {
819-
pmd_t *pmd;
820-
821-
/*
822-
* rmap might return false positives; we must filter
823-
* these out using page_check_address_pmd().
824-
*/
825-
pmd = page_check_address_pmd(page, mm, address, &ptl);
826-
if (!pmd)
822+
if (unlikely(PageHuge(page))) {
823+
/* when pud is not present, pte will be NULL */
824+
pte = huge_pte_offset(mm, address);
825+
if (!pte)
827826
return SWAP_AGAIN;
828827

829-
if (vma->vm_flags & VM_LOCKED) {
828+
ptl = huge_pte_lockptr(page_hstate(page), mm, pte);
829+
goto check_pte;
830+
}
831+
832+
pgd = pgd_offset(mm, address);
833+
if (!pgd_present(*pgd))
834+
return SWAP_AGAIN;
835+
pud = pud_offset(pgd, address);
836+
if (!pud_present(*pud))
837+
return SWAP_AGAIN;
838+
pmd = pmd_offset(pud, address);
839+
840+
if (pmd_trans_huge(*pmd)) {
841+
int ret = SWAP_AGAIN;
842+
843+
ptl = pmd_lock(mm, pmd);
844+
if (!pmd_present(*pmd))
845+
goto unlock_pmd;
846+
if (unlikely(!pmd_trans_huge(*pmd))) {
830847
spin_unlock(ptl);
848+
goto map_pte;
849+
}
850+
851+
if (pmd_page(*pmd) != page)
852+
goto unlock_pmd;
853+
854+
if (vma->vm_flags & VM_LOCKED) {
831855
pra->vm_flags |= VM_LOCKED;
832-
return SWAP_FAIL; /* To break the loop */
856+
ret = SWAP_FAIL; /* To break the loop */
857+
goto unlock_pmd;
833858
}
834859

835860
if (pmdp_clear_flush_young_notify(vma, address, pmd))
836861
referenced++;
837862
spin_unlock(ptl);
863+
goto found;
864+
unlock_pmd:
865+
spin_unlock(ptl);
866+
return ret;
838867
} else {
839-
pte_t *pte;
840-
841-
/*
842-
* rmap might return false positives; we must filter
843-
* these out using page_check_address().
844-
*/
845-
pte = page_check_address(page, mm, address, &ptl, 0);
846-
if (!pte)
868+
pmd_t pmde = *pmd;
869+
barrier();
870+
if (!pmd_present(pmde) || pmd_trans_huge(pmde))
847871
return SWAP_AGAIN;
872+
}
873+
map_pte:
874+
pte = pte_offset_map(pmd, address);
875+
if (!pte_present(*pte)) {
876+
pte_unmap(pte);
877+
return SWAP_AGAIN;
878+
}
848879

849-
if (vma->vm_flags & VM_LOCKED) {
850-
pte_unmap_unlock(pte, ptl);
851-
pra->vm_flags |= VM_LOCKED;
852-
return SWAP_FAIL; /* To break the loop */
853-
}
880+
ptl = pte_lockptr(mm, pmd);
881+
check_pte:
882+
spin_lock(ptl);
854883

855-
if (ptep_clear_flush_young_notify(vma, address, pte)) {
856-
/*
857-
* Don't treat a reference through a sequentially read
858-
* mapping as such. If the page has been used in
859-
* another mapping, we will catch it; if this other
860-
* mapping is already gone, the unmap path will have
861-
* set PG_referenced or activated the page.
862-
*/
863-
if (likely(!(vma->vm_flags & VM_SEQ_READ)))
864-
referenced++;
865-
}
884+
if (!pte_present(*pte)) {
885+
pte_unmap_unlock(pte, ptl);
886+
return SWAP_AGAIN;
887+
}
888+
889+
/* THP can be referenced by any subpage */
890+
if (pte_pfn(*pte) - page_to_pfn(page) >= hpage_nr_pages(page)) {
891+
pte_unmap_unlock(pte, ptl);
892+
return SWAP_AGAIN;
893+
}
894+
895+
if (vma->vm_flags & VM_LOCKED) {
866896
pte_unmap_unlock(pte, ptl);
897+
pra->vm_flags |= VM_LOCKED;
898+
return SWAP_FAIL; /* To break the loop */
867899
}
868900

901+
if (ptep_clear_flush_young_notify(vma, address, pte)) {
902+
/*
903+
* Don't treat a reference through a sequentially read
904+
* mapping as such. If the page has been used in
905+
* another mapping, we will catch it; if this other
906+
* mapping is already gone, the unmap path will have
907+
* set PG_referenced or activated the page.
908+
*/
909+
if (likely(!(vma->vm_flags & VM_SEQ_READ)))
910+
referenced++;
911+
}
912+
pte_unmap_unlock(pte, ptl);
913+
914+
found:
869915
if (referenced)
870916
clear_page_idle(page);
871917
if (test_and_clear_page_young(page))
@@ -912,7 +958,7 @@ int page_referenced(struct page *page,
912958
int ret;
913959
int we_locked = 0;
914960
struct page_referenced_arg pra = {
915-
.mapcount = page_mapcount(page),
961+
.mapcount = total_mapcount(page),
916962
.memcg = memcg,
917963
};
918964
struct rmap_walk_control rwc = {

0 commit comments

Comments
 (0)