Skip to content

Commit f95de70

Browse files
Eric B Munsonhnaz
authored andcommitted
mm: introduce VM_LOCKONFAULT
The cost of faulting in all memory to be locked can be very high when working with large mappings. If only portions of the mapping will be used this can incur a high penalty for locking. For the example of a large file, this is the usage pattern for a large statical language model (probably applies to other statical or graphical models as well). For the security example, any application transacting in data that cannot be swapped out (credit card data, medical records, etc). This patch introduces the ability to request that pages are not pre-faulted, but are placed on the unevictable LRU when they are finally faulted in. The VM_LOCKONFAULT flag will be used together with VM_LOCKED and has no effect when set without VM_LOCKED. Setting the VM_LOCKONFAULT flag for a VMA will cause pages faulted into that VMA to be added to the unevictable LRU when they are faulted or if they are already present, but will not cause any missing pages to be faulted in. Exposing this new lock state means that we cannot overload the meaning of the FOLL_POPULATE flag any longer. Prior to this patch it was used to mean that the VMA for a fault was locked. This means we need the new FOLL_MLOCK flag to communicate the locked state of a VMA. FOLL_POPULATE will now only control if the VMA should be populated and in the case of VM_LOCKONFAULT, it will not be set. Signed-off-by: Eric B Munson <emunson@akamai.com> Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Michal Hocko <mhocko@suse.cz> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Guenter Roeck <linux@roeck-us.net> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Michael Kerrisk <mtk.manpages@gmail.com> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Shuah Khan <shuahkh@osg.samsung.com> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent af296ec commit f95de70

File tree

11 files changed

+28
-11
lines changed

11 files changed

+28
-11
lines changed

drivers/gpu/drm/drm_vm.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -699,9 +699,15 @@ int drm_vma_info(struct seq_file *m, void *data)
699699
(void *)(unsigned long)virt_to_phys(high_memory));
700700

701701
list_for_each_entry(pt, &dev->vmalist, head) {
702+
char lock_flag = '-';
703+
702704
vma = pt->vma;
703705
if (!vma)
704706
continue;
707+
if (vma->vm_flags & VM_LOCKONFAULT)
708+
lock_flag = 'f';
709+
else if (vma->vm_flags & VM_LOCKED)
710+
lock_flag = 'l';
705711
seq_printf(m,
706712
"\n%5d 0x%pK-0x%pK %c%c%c%c%c%c 0x%08lx000",
707713
pt->pid,
@@ -710,7 +716,7 @@ int drm_vma_info(struct seq_file *m, void *data)
710716
vma->vm_flags & VM_WRITE ? 'w' : '-',
711717
vma->vm_flags & VM_EXEC ? 'x' : '-',
712718
vma->vm_flags & VM_MAYSHARE ? 's' : 'p',
713-
vma->vm_flags & VM_LOCKED ? 'l' : '-',
719+
lock_flag,
714720
vma->vm_flags & VM_IO ? 'i' : '-',
715721
vma->vm_pgoff);
716722

fs/proc/task_mmu.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -579,6 +579,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
579579
#ifdef CONFIG_X86_INTEL_MPX
580580
[ilog2(VM_MPX)] = "mp",
581581
#endif
582+
[ilog2(VM_LOCKONFAULT)] = "lf",
582583
[ilog2(VM_LOCKED)] = "lo",
583584
[ilog2(VM_IO)] = "io",
584585
[ilog2(VM_SEQ_READ)] = "sr",

include/linux/mm.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ extern unsigned int kobjsize(const void *objp);
129129
#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */
130130
#define VM_UFFD_WP 0x00001000 /* wrprotect pages tracking */
131131

132+
#define VM_LOCKONFAULT 0x00001000 /* Lock the pages covered when they are faulted in */
132133
#define VM_LOCKED 0x00002000
133134
#define VM_IO 0x00004000 /* Memory mapped I/O or similar */
134135

@@ -2045,6 +2046,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma,
20452046
#define FOLL_NUMA 0x200 /* force NUMA hinting page fault */
20462047
#define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */
20472048
#define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */
2049+
#define FOLL_MLOCK 0x1000 /* lock present pages */
20482050

20492051
typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
20502052
void *data);

kernel/fork.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -455,7 +455,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
455455
tmp->vm_mm = mm;
456456
if (anon_vma_fork(tmp, mpnt))
457457
goto fail_nomem_anon_vma_fork;
458-
tmp->vm_flags &= ~(VM_LOCKED|VM_UFFD_MISSING|VM_UFFD_WP);
458+
tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT |
459+
VM_UFFD_MISSING | VM_UFFD_WP);
459460
tmp->vm_next = tmp->vm_prev = NULL;
460461
tmp->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
461462
file = tmp->vm_file;

mm/debug.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ static const struct trace_print_flags vmaflags_names[] = {
121121
{VM_GROWSDOWN, "growsdown" },
122122
{VM_PFNMAP, "pfnmap" },
123123
{VM_DENYWRITE, "denywrite" },
124+
{VM_LOCKONFAULT, "lockonfault" },
124125
{VM_LOCKED, "locked" },
125126
{VM_IO, "io" },
126127
{VM_SEQ_READ, "seqread" },

mm/gup.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
9292
*/
9393
mark_page_accessed(page);
9494
}
95-
if ((flags & FOLL_POPULATE) && (vma->vm_flags & VM_LOCKED)) {
95+
if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
9696
/*
9797
* The preliminary mapping check is mainly to avoid the
9898
* pointless overhead of lock_page on the ZERO_PAGE
@@ -265,6 +265,9 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
265265
unsigned int fault_flags = 0;
266266
int ret;
267267

268+
/* mlock all present pages, but do not fault in new pages */
269+
if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
270+
return -ENOENT;
268271
/* For mm_populate(), just skip the stack guard page. */
269272
if ((*flags & FOLL_POPULATE) &&
270273
(stack_guard_page_start(vma, address) ||
@@ -850,7 +853,10 @@ long populate_vma_page_range(struct vm_area_struct *vma,
850853
VM_BUG_ON_VMA(end > vma->vm_end, vma);
851854
VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm);
852855

853-
gup_flags = FOLL_TOUCH | FOLL_POPULATE;
856+
gup_flags = FOLL_TOUCH | FOLL_MLOCK;
857+
if ((vma->vm_flags & (VM_LOCKED | VM_LOCKONFAULT)) == VM_LOCKED)
858+
gup_flags |= FOLL_POPULATE;
859+
854860
/*
855861
* We want to touch writable mappings with a write fault in order
856862
* to break COW, except for shared mappings because these don't COW

mm/huge_memory.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1265,7 +1265,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
12651265
pmd, _pmd, 1))
12661266
update_mmu_cache_pmd(vma, addr, pmd);
12671267
}
1268-
if ((flags & FOLL_POPULATE) && (vma->vm_flags & VM_LOCKED)) {
1268+
if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
12691269
if (page->mapping && trylock_page(page)) {
12701270
lru_add_drain();
12711271
if (page->mapping)

mm/hugetlb.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3764,8 +3764,8 @@ static unsigned long page_table_shareable(struct vm_area_struct *svma,
37643764
unsigned long s_end = sbase + PUD_SIZE;
37653765

37663766
/* Allow segments to share if only one is marked locked */
3767-
unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED;
3768-
unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED;
3767+
unsigned long vm_flags = vma->vm_flags & ~(VM_LOCKED | VM_LOCKONFAULT);
3768+
unsigned long svm_flags = svma->vm_flags & ~(VM_LOCKED | VM_LOCKONFAULT);
37693769

37703770
/*
37713771
* match the virtual addresses, permission and the alignment of the

mm/mlock.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -422,7 +422,7 @@ static unsigned long __munlock_pagevec_fill(struct pagevec *pvec,
422422
void munlock_vma_pages_range(struct vm_area_struct *vma,
423423
unsigned long start, unsigned long end)
424424
{
425-
vma->vm_flags &= ~VM_LOCKED;
425+
vma->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT);
426426

427427
while (start < end) {
428428
struct page *page = NULL;

mm/mmap.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1664,7 +1664,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
16641664
vma == get_gate_vma(current->mm)))
16651665
mm->locked_vm += (len >> PAGE_SHIFT);
16661666
else
1667-
vma->vm_flags &= ~VM_LOCKED;
1667+
vma->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT);
16681668
}
16691669

16701670
if (file)

0 commit comments

Comments
 (0)