Skip to content

Commit

Permalink
thp: avoid breaking huge pmd invariants in case of vma_adjust failures
Browse files Browse the repository at this point in the history
An huge pmd can only be mapped if the corresponding 2M virtual range is
fully contained in the vma.  At times the VM calls split_vma twice, if the
first split_vma succeeds and the second fail, the first split_vma remains
in effect and it's not rolled back.  For split_vma or vma_adjust to fail
an allocation failure is needed so it's a very unlikely event (the out of
memory killer would normally fire before any allocation failure is visible
to kernel and userland and if an out of memory condition happens it's
unlikely to happen exactly here).  Nevertheless it's safer to ensure that
no huge pmd can be left around if the vma is adjusted in a way that can't
fit hugepages anymore at the new vm_start/vm_end address.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
aagit authored and torvalds committed Jan 14, 2011
1 parent bc83501 commit 94fcc58
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 2 deletions.
19 changes: 19 additions & 0 deletions include/linux/huge_mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,19 @@ extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd);
#error "hugepages can't be allocated by the buddy allocator"
#endif
extern int hugepage_madvise(unsigned long *vm_flags);
extern void __vma_adjust_trans_huge(struct vm_area_struct *vma,
unsigned long start,
unsigned long end,
long adjust_next);
static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
unsigned long start,
unsigned long end,
long adjust_next)
{
if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
return;
__vma_adjust_trans_huge(vma, start, end, adjust_next);
}
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
#define HPAGE_PMD_SHIFT ({ BUG(); 0; })
#define HPAGE_PMD_MASK ({ BUG(); 0; })
Expand All @@ -125,6 +138,12 @@ static inline int hugepage_madvise(unsigned long *vm_flags)
BUG();
return 0;
}
static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
unsigned long start,
unsigned long end,
long adjust_next)
{
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */

#endif /* _LINUX_HUGE_MM_H */
80 changes: 78 additions & 2 deletions mm/huge_memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -1075,8 +1075,16 @@ pmd_t *page_check_address_pmd(struct page *page,
goto out;
if (pmd_page(*pmd) != page)
goto out;
VM_BUG_ON(flag == PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG &&
pmd_trans_splitting(*pmd));
/*
* split_vma() may create temporary aliased mappings. There is
* no risk as long as all huge pmd are found and have their
* splitting bit set before __split_huge_page_refcount
* runs. Finding the same huge pmd more than once during the
* same rmap walk is not a problem.
*/
if (flag == PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG &&
pmd_trans_splitting(*pmd))
goto out;
if (pmd_trans_huge(*pmd)) {
VM_BUG_ON(flag == PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG &&
!pmd_trans_splitting(*pmd));
Expand Down Expand Up @@ -2196,3 +2204,71 @@ void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd)
put_page(page);
BUG_ON(pmd_trans_huge(*pmd));
}

static void split_huge_page_address(struct mm_struct *mm,
unsigned long address)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;

VM_BUG_ON(!(address & ~HPAGE_PMD_MASK));

pgd = pgd_offset(mm, address);
if (!pgd_present(*pgd))
return;

pud = pud_offset(pgd, address);
if (!pud_present(*pud))
return;

pmd = pmd_offset(pud, address);
if (!pmd_present(*pmd))
return;
/*
* Caller holds the mmap_sem write mode, so a huge pmd cannot
* materialize from under us.
*/
split_huge_page_pmd(mm, pmd);
}

void __vma_adjust_trans_huge(struct vm_area_struct *vma,
unsigned long start,
unsigned long end,
long adjust_next)
{
/*
* If the new start address isn't hpage aligned and it could
* previously contain an hugepage: check if we need to split
* an huge pmd.
*/
if (start & ~HPAGE_PMD_MASK &&
(start & HPAGE_PMD_MASK) >= vma->vm_start &&
(start & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= vma->vm_end)
split_huge_page_address(vma->vm_mm, start);

/*
* If the new end address isn't hpage aligned and it could
* previously contain an hugepage: check if we need to split
* an huge pmd.
*/
if (end & ~HPAGE_PMD_MASK &&
(end & HPAGE_PMD_MASK) >= vma->vm_start &&
(end & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= vma->vm_end)
split_huge_page_address(vma->vm_mm, end);

/*
* If we're also updating the vma->vm_next->vm_start, if the new
* vm_next->vm_start isn't page aligned and it could previously
* contain an hugepage: check if we need to split an huge pmd.
*/
if (adjust_next > 0) {
struct vm_area_struct *next = vma->vm_next;
unsigned long nstart = next->vm_start;
nstart += adjust_next << PAGE_SHIFT;
if (nstart & ~HPAGE_PMD_MASK &&
(nstart & HPAGE_PMD_MASK) >= next->vm_start &&
(nstart & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= next->vm_end)
split_huge_page_address(next->vm_mm, nstart);
}
}
2 changes: 2 additions & 0 deletions mm/mmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,8 @@ again: remove_next = 1 + (end > next->vm_end);
}
}

vma_adjust_trans_huge(vma, start, end, adjust_next);

/*
* When changing only vma->vm_end, we don't really need anon_vma
* lock. This is a fairly rare case by itself, but the anon_vma
Expand Down

0 comments on commit 94fcc58

Please sign in to comment.