Skip to content

Commit

Permalink
Merge branch 'akpm' (patches from Andrew)
Browse files Browse the repository at this point in the history
Merge fixes from Andrew Morton:
 "11 fixes.

  The presence of 'thp: reduce indentation level in change_huge_pmd()'
  is unfortunate. But the patchset had been decently reviewed and tested
  before we decided it was needed in -stable and I felt it best not to
  churn things at the last minute"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  mailmap: add Martin Kepplinger's email
  zsmalloc: expand class bit
  zram: do not use copy_page with non-page aligned address
  zram: fix operator precedence to get offset
  hugetlbfs: fix offset overflow in hugetlbfs mmap
  thp: fix MADV_DONTNEED vs clear soft dirty race
  thp: fix MADV_DONTNEED vs. MADV_FREE race
  mm: drop unused pmdp_huge_get_and_clear_notify()
  thp: fix MADV_DONTNEED vs. numa balancing race
  thp: reduce indentation level in change_huge_pmd()
  z3fold: fix page locking in z3fold_alloc()
  • Loading branch information
torvalds committed Apr 14, 2017
2 parents d8a6e3a + 5714320 commit a232591
Show file tree
Hide file tree
Showing 8 changed files with 92 additions and 51 deletions.
2 changes: 2 additions & 0 deletions .mailmap
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ Linas Vepstas <linas@austin.ibm.com>
Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@web.de>
Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@ascom.ch>
Mark Brown <broonie@sirena.org.uk>
Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com>
Martin Kepplinger <martink@posteo.de> <martin.kepplinger@ginzinger.com>
Matthieu CASTET <castet.matthieu@free.fr>
Mauro Carvalho Chehab <mchehab@kernel.org> <mchehab@brturbo.com.br>
Mauro Carvalho Chehab <mchehab@kernel.org> <maurochehab@gmail.com>
Expand Down
6 changes: 3 additions & 3 deletions drivers/block/zram/zram_drv.c
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,7 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)

cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
if (size == PAGE_SIZE) {
copy_page(mem, cmem);
memcpy(mem, cmem, PAGE_SIZE);
} else {
struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);

Expand Down Expand Up @@ -717,7 +717,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,

if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) {
src = kmap_atomic(page);
copy_page(cmem, src);
memcpy(cmem, src, PAGE_SIZE);
kunmap_atomic(src);
} else {
memcpy(cmem, src, clen);
Expand Down Expand Up @@ -928,7 +928,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
}

index = sector >> SECTORS_PER_PAGE_SHIFT;
offset = sector & (SECTORS_PER_PAGE - 1) << SECTOR_SHIFT;
offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;

bv.bv_page = page;
bv.bv_len = PAGE_SIZE;
Expand Down
15 changes: 12 additions & 3 deletions fs/hugetlbfs/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -136,17 +136,26 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
vma->vm_ops = &hugetlb_vm_ops;

/*
* Offset passed to mmap (before page shift) could have been
* negative when represented as a (l)off_t.
*/
if (((loff_t)vma->vm_pgoff << PAGE_SHIFT) < 0)
return -EINVAL;

if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
return -EINVAL;

vma_len = (loff_t)(vma->vm_end - vma->vm_start);
len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
/* check for overflow */
if (len < vma_len)
return -EINVAL;

inode_lock(inode);
file_accessed(file);

ret = -ENOMEM;
len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);

if (hugetlb_reserve_pages(inode,
vma->vm_pgoff >> huge_page_order(h),
len >> huge_page_shift(h), vma,
Expand All @@ -155,7 +164,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)

ret = 0;
if (vma->vm_flags & VM_WRITE && inode->i_size < len)
inode->i_size = len;
i_size_write(inode, len);
out:
inode_unlock(inode);

Expand Down
9 changes: 8 additions & 1 deletion fs/proc/task_mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -900,7 +900,14 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp)
{
pmd_t pmd = pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp);
pmd_t pmd = *pmdp;

/* See comment in change_huge_pmd() */
pmdp_invalidate(vma, addr, pmdp);
if (pmd_dirty(*pmdp))
pmd = pmd_mkdirty(pmd);
if (pmd_young(*pmdp))
pmd = pmd_mkyoung(pmd);

pmd = pmd_wrprotect(pmd);
pmd = pmd_clear_soft_dirty(pmd);
Expand Down
13 changes: 0 additions & 13 deletions include/linux/mmu_notifier.h
Original file line number Diff line number Diff line change
Expand Up @@ -394,18 +394,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
___pud; \
})

#define pmdp_huge_get_and_clear_notify(__mm, __haddr, __pmd) \
({ \
unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \
pmd_t ___pmd; \
\
___pmd = pmdp_huge_get_and_clear(__mm, __haddr, __pmd); \
mmu_notifier_invalidate_range(__mm, ___haddr, \
___haddr + HPAGE_PMD_SIZE); \
\
___pmd; \
})

/*
* set_pte_at_notify() sets the pte _after_ running the notifier.
* This is safe to start by updating the secondary MMUs, because the primary MMU
Expand Down Expand Up @@ -489,7 +477,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
#define ptep_clear_flush_notify ptep_clear_flush
#define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush
#define pudp_huge_clear_flush_notify pudp_huge_clear_flush
#define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear
#define set_pte_at_notify set_pte_at

#endif /* CONFIG_MMU_NOTIFIER */
Expand Down
87 changes: 59 additions & 28 deletions mm/huge_memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -1568,8 +1568,7 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
deactivate_page(page);

if (pmd_young(orig_pmd) || pmd_dirty(orig_pmd)) {
orig_pmd = pmdp_huge_get_and_clear_full(tlb->mm, addr, pmd,
tlb->fullmm);
pmdp_invalidate(vma, addr, pmd);
orig_pmd = pmd_mkold(orig_pmd);
orig_pmd = pmd_mkclean(orig_pmd);

Expand Down Expand Up @@ -1724,37 +1723,69 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
{
struct mm_struct *mm = vma->vm_mm;
spinlock_t *ptl;
int ret = 0;
pmd_t entry;
bool preserve_write;
int ret;

ptl = __pmd_trans_huge_lock(pmd, vma);
if (ptl) {
pmd_t entry;
bool preserve_write = prot_numa && pmd_write(*pmd);
ret = 1;
if (!ptl)
return 0;

/*
* Avoid trapping faults against the zero page. The read-only
* data is likely to be read-cached on the local CPU and
* local/remote hits to the zero page are not interesting.
*/
if (prot_numa && is_huge_zero_pmd(*pmd)) {
spin_unlock(ptl);
return ret;
}
preserve_write = prot_numa && pmd_write(*pmd);
ret = 1;

if (!prot_numa || !pmd_protnone(*pmd)) {
entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd);
entry = pmd_modify(entry, newprot);
if (preserve_write)
entry = pmd_mk_savedwrite(entry);
ret = HPAGE_PMD_NR;
set_pmd_at(mm, addr, pmd, entry);
BUG_ON(vma_is_anonymous(vma) && !preserve_write &&
pmd_write(entry));
}
spin_unlock(ptl);
}
/*
* Avoid trapping faults against the zero page. The read-only
* data is likely to be read-cached on the local CPU and
* local/remote hits to the zero page are not interesting.
*/
if (prot_numa && is_huge_zero_pmd(*pmd))
goto unlock;

if (prot_numa && pmd_protnone(*pmd))
goto unlock;

/*
* In case prot_numa, we are under down_read(mmap_sem). It's critical
* to not clear pmd intermittently to avoid race with MADV_DONTNEED
* which is also under down_read(mmap_sem):
*
* CPU0: CPU1:
* change_huge_pmd(prot_numa=1)
* pmdp_huge_get_and_clear_notify()
* madvise_dontneed()
* zap_pmd_range()
* pmd_trans_huge(*pmd) == 0 (without ptl)
* // skip the pmd
* set_pmd_at();
* // pmd is re-established
*
* The race makes MADV_DONTNEED miss the huge pmd and don't clear it
* which may break userspace.
*
* pmdp_invalidate() is required to make sure we don't miss
* dirty/young flags set by hardware.
*/
entry = *pmd;
pmdp_invalidate(vma, addr, pmd);

/*
* Recover dirty/young flags. It relies on pmdp_invalidate to not
* corrupt them.
*/
if (pmd_dirty(*pmd))
entry = pmd_mkdirty(entry);
if (pmd_young(*pmd))
entry = pmd_mkyoung(entry);

entry = pmd_modify(entry, newprot);
if (preserve_write)
entry = pmd_mk_savedwrite(entry);
ret = HPAGE_PMD_NR;
set_pmd_at(mm, addr, pmd, entry);
BUG_ON(vma_is_anonymous(vma) && !preserve_write && pmd_write(entry));
unlock:
spin_unlock(ptl);
return ret;
}

Expand Down
9 changes: 7 additions & 2 deletions mm/z3fold.c
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,12 @@ static inline void z3fold_page_lock(struct z3fold_header *zhdr)
spin_lock(&zhdr->page_lock);
}

/* Try to lock a z3fold page */
static inline int z3fold_page_trylock(struct z3fold_header *zhdr)
{
return spin_trylock(&zhdr->page_lock);
}

/* Unlock a z3fold page */
static inline void z3fold_page_unlock(struct z3fold_header *zhdr)
{
Expand Down Expand Up @@ -385,7 +391,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
spin_lock(&pool->lock);
zhdr = list_first_entry_or_null(&pool->unbuddied[i],
struct z3fold_header, buddy);
if (!zhdr) {
if (!zhdr || !z3fold_page_trylock(zhdr)) {
spin_unlock(&pool->lock);
continue;
}
Expand All @@ -394,7 +400,6 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
spin_unlock(&pool->lock);

page = virt_to_page(zhdr);
z3fold_page_lock(zhdr);
if (zhdr->first_chunks == 0) {
if (zhdr->middle_chunks != 0 &&
chunks >= zhdr->start_middle)
Expand Down
2 changes: 1 addition & 1 deletion mm/zsmalloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ struct zs_pool {
struct zspage {
struct {
unsigned int fullness:FULLNESS_BITS;
unsigned int class:CLASS_BITS;
unsigned int class:CLASS_BITS + 1;
unsigned int isolated:ISOLATED_BITS;
unsigned int magic:MAGIC_VAL_BITS;
};
Expand Down

0 comments on commit a232591

Please sign in to comment.