Skip to content

Commit

Permalink
mm: avoid using vma_merge() for new VMAs
Browse files Browse the repository at this point in the history
Abstract vma_merge_new_vma() to use vma_merge_struct and rename the
resultant function vma_merge_new_range() to be clear what the purpose of
this function is - a new VMA is desired in the specified range, and we
wish to see if it is possible to 'merge' surrounding VMAs into this range
rather than having to allocate a new VMA.

Note that this function uses vma_extend() exclusively, so adopts its
requirement that the iterator point at or before the gap.  We add an
assert to this effect.

This is as opposed to vma_merge_existing_range(), which will be introduced
in a subsequent commit, and provide the same functionality for cases in
which we are modifying an existing VMA.

In mmap_region() and do_brk_flags() we open code scenarios where we prefer
to use vma_expand() rather than invoke a full vma_merge() operation.

Abstract this logic and eliminate all of the open-coding, and also use the
same logic for all cases where we add new VMAs to, rather than ultimately
use vma_merge(), rather use vma_expand().

Doing so removes duplication and simplifies VMA merging in all such cases,
laying the ground for us to eliminate the merging of new VMAs in
vma_merge() altogether.

Also add the ability for the vmg to track state, and able to report
errors, allowing for us to differentiate a failed merge from an inability
to allocate memory in callers.

This makes it far easier to understand what is happening in these cases
avoiding confusion, bugs and allowing for future optimisation.

Also introduce vma_iter_next_rewind() to allow for retrieval of the next,
and (optionally) the prev VMA, rewinding to the start of the previous gap.

Introduce are_anon_vmas_compatible() to abstract individual VMA anon_vma
comparison for the case of merging on both sides where the anon_vma of the
VMA being merged maybe compatible with prev and next, but prev and next's
anon_vma's may not be compatible with each other.

Finally also introduce can_vma_merge_left() / can_vma_merge_right() to
check adjacent VMA compatibility and that they are indeed adjacent.

Link: https://lkml.kernel.org/r/49d37c0769b6b9dc03b27fe4d059173832556392.1725040657.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Tested-by: Mark Brown <broonie@kernel.org>
Cc: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Bert Karwatzki <spasswolf@web.de>
Cc: Jeff Xu <jeffxu@chromium.org>
Cc: Jiri Olsa <olsajiri@gmail.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Lorenzo Stoakes <lstoakes@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
  • Loading branch information
lorenzo-stoakes authored and akpm00 committed Sep 4, 2024
1 parent fc21959 commit cacded5
Show file tree
Hide file tree
Showing 5 changed files with 279 additions and 100 deletions.
92 changes: 20 additions & 72 deletions mm/mmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -1364,8 +1364,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma = NULL;
struct vm_area_struct *next, *prev, *merge;
pgoff_t pglen = PHYS_PFN(len);
struct vm_area_struct *merge;
unsigned long charged = 0;
struct vma_munmap_struct vms;
struct ma_state mas_detach;
Expand All @@ -1389,14 +1389,11 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
if (error)
goto gather_failed;

next = vmg.next = vms.next;
prev = vmg.prev = vms.prev;
vmg.next = vms.next;
vmg.prev = vms.prev;
vma = NULL;
} else {
next = vmg.next = vma_next(&vmi);
prev = vmg.prev = vma_prev(&vmi);
if (prev)
vma_iter_next_range(&vmi);
vmg.next = vma_iter_next_rewind(&vmi, &vmg.prev);
}

/* Check against address space limit. */
Expand All @@ -1417,46 +1414,9 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
vmg.flags = vm_flags;
}

if (vm_flags & VM_SPECIAL)
goto cannot_expand;

/* Attempt to expand an old mapping */
/* Check next */
if (next && next->vm_start == end && can_vma_merge_before(&vmg)) {
vmg.end = next->vm_end;
vma = vmg.vma = next;
vmg.pgoff = next->vm_pgoff - pglen;
/*
* We set this here so if we will merge with the previous VMA in
* the code below, can_vma_merge_after() ensures anon_vma
* compatibility between prev and next.
*/
vmg.anon_vma = vma->anon_vma;
vmg.uffd_ctx = vma->vm_userfaultfd_ctx;
}

/* Check prev */
if (prev && prev->vm_end == addr && can_vma_merge_after(&vmg)) {
vmg.start = prev->vm_start;
vma = vmg.vma = prev;
vmg.pgoff = prev->vm_pgoff;
vma_prev(&vmi); /* Equivalent to going to the previous range */
}

if (vma) {
/* Actually expand, if possible */
if (!vma_expand(&vmg)) {
khugepaged_enter_vma(vma, vm_flags);
goto expanded;
}

/* If the expand fails, then reposition the vma iterator */
if (unlikely(vma == prev))
vma_iter_set(&vmi, addr);
}

cannot_expand:

vma = vma_merge_new_range(&vmg);
if (vma)
goto expanded;
/*
* Determine the object being mapped and call the appropriate
* specific mapper. the address has already been validated, but
Expand Down Expand Up @@ -1503,10 +1463,11 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
* If vm_flags changed after call_mmap(), we should try merge
* vma again as we may succeed this time.
*/
if (unlikely(vm_flags != vma->vm_flags && prev)) {
merge = vma_merge_new_vma(&vmi, prev, vma,
vma->vm_start, vma->vm_end,
vma->vm_pgoff);
if (unlikely(vm_flags != vma->vm_flags && vmg.prev)) {
vmg.flags = vma->vm_flags;
/* If this fails, state is reset ready for a reattempt. */
merge = vma_merge_new_range(&vmg);

if (merge) {
/*
* ->mmap() can change vma->vm_file and fput
Expand All @@ -1522,6 +1483,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
vm_flags = vma->vm_flags;
goto unmap_writable;
}
vma_iter_config(&vmi, addr, end);
}

vm_flags = vma->vm_flags;
Expand Down Expand Up @@ -1554,7 +1516,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
vma_link_file(vma);

/*
* vma_merge() calls khugepaged_enter_vma() either, the below
* vma_merge_new_range() calls khugepaged_enter_vma() too, the below
* call covers the non-merge case.
*/
khugepaged_enter_vma(vma, vma->vm_flags);
Expand Down Expand Up @@ -1609,7 +1571,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,

vma_iter_set(&vmi, vma->vm_end);
/* Undo any partial mapping done by a device driver. */
unmap_region(&vmi.mas, vma, prev, next);
unmap_region(&vmi.mas, vma, vmg.prev, vmg.next);
}
if (writable_file_mapping)
mapping_unmap_writable(file->f_mapping);
Expand Down Expand Up @@ -1756,7 +1718,6 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
unsigned long addr, unsigned long len, unsigned long flags)
{
struct mm_struct *mm = current->mm;
struct vma_prepare vp;

/*
* Check against address space limits by the changed size
Expand All @@ -1780,25 +1741,12 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
VMG_STATE(vmg, mm, vmi, addr, addr + len, flags, PHYS_PFN(addr));

vmg.prev = vma;
if (can_vma_merge_after(&vmg)) {
vma_iter_config(vmi, vma->vm_start, addr + len);
if (vma_iter_prealloc(vmi, vma))
goto unacct_fail;

vma_start_write(vma);

init_vma_prep(&vp, vma);
vma_prepare(&vp);
vma_adjust_trans_huge(vma, vma->vm_start, addr + len, 0);
vma->vm_end = addr + len;
vm_flags_set(vma, VM_SOFTDIRTY);
vma_iter_store(vmi, vma);

vma_complete(&vp, vmi, mm);
validate_mm(mm);
khugepaged_enter_vma(vma, flags);
vma_iter_next_range(vmi);

if (vma_merge_new_range(&vmg))
goto out;
}
else if (vmg_nomem(&vmg))
goto unacct_fail;
}

if (vma)
Expand Down
Loading

0 comments on commit cacded5

Please sign in to comment.