Skip to content
This repository was archived by the owner on Oct 30, 2021. It is now read-only.

Commit c20f6d0

Browse files
mjkravetzgregkh
authored andcommitted
hugetlb: use same fault hash key for shared and private mappings
commit 1b426bac66e6cc83c9f2d92b96e4e72acf43419a upstream. hugetlb uses a fault mutex hash table to prevent page faults of the same pages concurrently. The key for shared and private mappings is different. Shared keys off address_space and file index. Private keys off mm and virtual address. Consider a private mappings of a populated hugetlbfs file. A fault will map the page from the file and if needed do a COW to map a writable page. Hugetlbfs hole punch uses the fault mutex to prevent mappings of file pages. It uses the address_space file index key. However, private mappings will use a different key and could race with this code to map the file page. This causes problems (BUG) for the page cache remove code as it expects the page to be unmapped. A sample stack is: page dumped because: VM_BUG_ON_PAGE(page_mapped(page)) kernel BUG at mm/filemap.c:169! ... RIP: 0010:unaccount_page_cache_page+0x1b8/0x200 ... Call Trace: __delete_from_page_cache+0x39/0x220 delete_from_page_cache+0x45/0x70 remove_inode_hugepages+0x13c/0x380 ? __add_to_page_cache_locked+0x162/0x380 hugetlbfs_fallocate+0x403/0x540 ? _cond_resched+0x15/0x30 ? __inode_security_revalidate+0x5d/0x70 ? selinux_file_permission+0x100/0x130 vfs_fallocate+0x13f/0x270 ksys_fallocate+0x3c/0x80 __x64_sys_fallocate+0x1a/0x20 do_syscall_64+0x5b/0x180 entry_SYSCALL_64_after_hwframe+0x44/0xa9 There seems to be another potential COW issue/race with this approach of different private and shared keys as noted in commit 8382d91 ("mm, hugetlb: improve page-fault scalability"). Since every hugetlb mapping (even anon and private) is actually a file mapping, just use the address_space index key for all mappings. This results in potentially more hash collisions. However, this should not be the common case. Link: http://lkml.kernel.org/r/20190328234704.27083-3-mike.kravetz@oracle.com Link: http://lkml.kernel.org/r/20190412165235.t4sscoujczfhuiyt@linux-r8p5 Fixes: b5cec28 ("hugetlbfs: truncate_hugepages() takes a range of pages") Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com> Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Reviewed-by: Davidlohr Bueso <dbueso@suse.de> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
1 parent d989d2a commit c20f6d0

File tree

4 files changed

+11
-27
lines changed

4 files changed

+11
-27
lines changed

fs/hugetlbfs/inode.c

+2-6
Original file line numberDiff line numberDiff line change
@@ -436,9 +436,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
436436
u32 hash;
437437

438438
index = page->index;
439-
hash = hugetlb_fault_mutex_hash(h, current->mm,
440-
&pseudo_vma,
441-
mapping, index, 0);
439+
hash = hugetlb_fault_mutex_hash(h, mapping, index, 0);
442440
mutex_lock(&hugetlb_fault_mutex_table[hash]);
443441

444442
/*
@@ -557,7 +555,6 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
557555
struct address_space *mapping = inode->i_mapping;
558556
struct hstate *h = hstate_inode(inode);
559557
struct vm_area_struct pseudo_vma;
560-
struct mm_struct *mm = current->mm;
561558
loff_t hpage_size = huge_page_size(h);
562559
unsigned long hpage_shift = huge_page_shift(h);
563560
pgoff_t start, index, end;
@@ -621,8 +618,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
621618
addr = index * hpage_size;
622619

623620
/* mutex taken here, fault path and hole punch */
624-
hash = hugetlb_fault_mutex_hash(h, mm, &pseudo_vma, mapping,
625-
index, addr);
621+
hash = hugetlb_fault_mutex_hash(h, mapping, index, addr);
626622
mutex_lock(&hugetlb_fault_mutex_table[hash]);
627623

628624
/* See if already present in mapping to avoid alloc/free */

include/linux/hugetlb.h

+1-3
Original file line numberDiff line numberDiff line change
@@ -122,9 +122,7 @@ void putback_active_hugepage(struct page *page);
122122
void free_huge_page(struct page *page);
123123
void hugetlb_fix_reserve_counts(struct inode *inode);
124124
extern struct mutex *hugetlb_fault_mutex_table;
125-
u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm,
126-
struct vm_area_struct *vma,
127-
struct address_space *mapping,
125+
u32 hugetlb_fault_mutex_hash(struct hstate *h, struct address_space *mapping,
128126
pgoff_t idx, unsigned long address);
129127

130128
pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud);

mm/hugetlb.c

+7-16
Original file line numberDiff line numberDiff line change
@@ -3729,8 +3729,8 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
37293729
* handling userfault. Reacquire after handling
37303730
* fault to make calling code simpler.
37313731
*/
3732-
hash = hugetlb_fault_mutex_hash(h, mm, vma, mapping,
3733-
idx, address);
3732+
hash = hugetlb_fault_mutex_hash(h, mapping, idx,
3733+
address);
37343734
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
37353735
ret = handle_userfault(&vmf, VM_UFFD_MISSING);
37363736
mutex_lock(&hugetlb_fault_mutex_table[hash]);
@@ -3842,21 +3842,14 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
38423842
}
38433843

38443844
#ifdef CONFIG_SMP
3845-
u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm,
3846-
struct vm_area_struct *vma,
3847-
struct address_space *mapping,
3845+
u32 hugetlb_fault_mutex_hash(struct hstate *h, struct address_space *mapping,
38483846
pgoff_t idx, unsigned long address)
38493847
{
38503848
unsigned long key[2];
38513849
u32 hash;
38523850

3853-
if (vma->vm_flags & VM_SHARED) {
3854-
key[0] = (unsigned long) mapping;
3855-
key[1] = idx;
3856-
} else {
3857-
key[0] = (unsigned long) mm;
3858-
key[1] = address >> huge_page_shift(h);
3859-
}
3851+
key[0] = (unsigned long) mapping;
3852+
key[1] = idx;
38603853

38613854
hash = jhash2((u32 *)&key, sizeof(key)/sizeof(u32), 0);
38623855

@@ -3867,9 +3860,7 @@ u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm,
38673860
* For uniprocesor systems we always use a single mutex, so just
38683861
* return 0 and avoid the hashing overhead.
38693862
*/
3870-
u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm,
3871-
struct vm_area_struct *vma,
3872-
struct address_space *mapping,
3863+
u32 hugetlb_fault_mutex_hash(struct hstate *h, struct address_space *mapping,
38733864
pgoff_t idx, unsigned long address)
38743865
{
38753866
return 0;
@@ -3915,7 +3906,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
39153906
* get spurious allocation failures if two CPUs race to instantiate
39163907
* the same page in the page cache.
39173908
*/
3918-
hash = hugetlb_fault_mutex_hash(h, mm, vma, mapping, idx, address);
3909+
hash = hugetlb_fault_mutex_hash(h, mapping, idx, address);
39193910
mutex_lock(&hugetlb_fault_mutex_table[hash]);
39203911

39213912
entry = huge_ptep_get(ptep);

mm/userfaultfd.c

+1-2
Original file line numberDiff line numberDiff line change
@@ -272,8 +272,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
272272
*/
273273
idx = linear_page_index(dst_vma, dst_addr);
274274
mapping = dst_vma->vm_file->f_mapping;
275-
hash = hugetlb_fault_mutex_hash(h, dst_mm, dst_vma, mapping,
276-
idx, dst_addr);
275+
hash = hugetlb_fault_mutex_hash(h, mapping, idx, dst_addr);
277276
mutex_lock(&hugetlb_fault_mutex_table[hash]);
278277

279278
err = -ENOMEM;

0 commit comments

Comments
 (0)