Skip to content

Commit

Permalink
arch: mm: pass userspace fault flag to generic fault handler
Browse files Browse the repository at this point in the history
Unlike global OOM handling, memory cgroup code will invoke the OOM killer
in any OOM situation because it has no way of telling faults occuring in
kernel context - which could be handled more gracefully - from
user-triggered faults.

Pass a flag that identifies faults originating in user space from the
architecture-specific fault handlers to generic code so that memcg OOM
handling can be improved.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: azurIt <azurit@pobox.sk>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
hnaz authored and torvalds committed Sep 12, 2013
1 parent 8713410 commit 759496b
Show file tree
Hide file tree
Showing 29 changed files with 135 additions and 64 deletions.
7 changes: 4 additions & 3 deletions arch/alpha/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
const struct exception_table_entry *fixup;
int fault, si_code = SEGV_MAPERR;
siginfo_t info;
unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
(cause > 0 ? FAULT_FLAG_WRITE : 0));
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;

/* As of EV6, a load into $31/$f31 is a prefetch, and never faults
(or is suppressed by the PALcode). Support that for older CPUs
Expand All @@ -115,7 +114,8 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
if (address >= TASK_SIZE)
goto vmalloc_fault;
#endif

if (user_mode(regs))
flags |= FAULT_FLAG_USER;
retry:
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
Expand All @@ -142,6 +142,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
} else {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
flags |= FAULT_FLAG_WRITE;
}

/* If for any reason at all we couldn't handle the fault,
Expand Down
6 changes: 4 additions & 2 deletions arch/arc/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address)
siginfo_t info;
int fault, ret;
int write = regs->ecr_cause & ECR_C_PROTV_STORE; /* ST/EX */
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
(write ? FAULT_FLAG_WRITE : 0);
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;

/*
* We fault-in kernel-space virtual memory on-demand. The
Expand Down Expand Up @@ -89,6 +88,8 @@ void do_page_fault(struct pt_regs *regs, unsigned long address)
if (in_atomic() || !mm)
goto no_context;

if (user_mode(regs))
flags |= FAULT_FLAG_USER;
retry:
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
Expand Down Expand Up @@ -117,6 +118,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address)
if (write) {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
flags |= FAULT_FLAG_WRITE;
} else {
if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
goto bad_area;
Expand Down
9 changes: 6 additions & 3 deletions arch/arm/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -261,9 +261,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
struct task_struct *tsk;
struct mm_struct *mm;
int fault, sig, code;
int write = fsr & FSR_WRITE;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
(write ? FAULT_FLAG_WRITE : 0);
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;

if (notify_page_fault(regs, fsr))
return 0;
Expand All @@ -282,6 +280,11 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
if (in_atomic() || !mm)
goto no_context;

if (user_mode(regs))
flags |= FAULT_FLAG_USER;
if (fsr & FSR_WRITE)
flags |= FAULT_FLAG_WRITE;

/*
* As per x86, we may deadlock here. However, since the kernel only
* validly references user space from well defined areas of the code,
Expand Down
17 changes: 10 additions & 7 deletions arch/arm64/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -199,13 +199,6 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC;
unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;

if (esr & ESR_LNX_EXEC) {
vm_flags = VM_EXEC;
} else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) {
vm_flags = VM_WRITE;
mm_flags |= FAULT_FLAG_WRITE;
}

tsk = current;
mm = tsk->mm;

Expand All @@ -220,6 +213,16 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
if (in_atomic() || !mm)
goto no_context;

if (user_mode(regs))
mm_flags |= FAULT_FLAG_USER;

if (esr & ESR_LNX_EXEC) {
vm_flags = VM_EXEC;
} else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) {
vm_flags = VM_WRITE;
mm_flags |= FAULT_FLAG_WRITE;
}

/*
* As per x86, we may deadlock here. However, since the kernel only
* validly references user space from well defined areas of the code,
Expand Down
2 changes: 2 additions & 0 deletions arch/avr32/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ asmlinkage void do_page_fault(unsigned long ecr, struct pt_regs *regs)

local_irq_enable();

if (user_mode(regs))
flags |= FAULT_FLAG_USER;
retry:
down_read(&mm->mmap_sem);

Expand Down
6 changes: 4 additions & 2 deletions arch/cris/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,7 @@ do_page_fault(unsigned long address, struct pt_regs *regs,
struct vm_area_struct * vma;
siginfo_t info;
int fault;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
((writeaccess & 1) ? FAULT_FLAG_WRITE : 0);
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;

D(printk(KERN_DEBUG
"Page fault for %lX on %X at %lX, prot %d write %d\n",
Expand Down Expand Up @@ -117,6 +116,8 @@ do_page_fault(unsigned long address, struct pt_regs *regs,
if (in_atomic() || !mm)
goto no_context;

if (user_mode(regs))
flags |= FAULT_FLAG_USER;
retry:
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
Expand Down Expand Up @@ -155,6 +156,7 @@ do_page_fault(unsigned long address, struct pt_regs *regs,
} else if (writeaccess == 1) {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
flags |= FAULT_FLAG_WRITE;
} else {
if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
goto bad_area;
Expand Down
10 changes: 6 additions & 4 deletions arch/frv/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear
struct vm_area_struct *vma;
struct mm_struct *mm;
unsigned long _pme, lrai, lrad, fixup;
unsigned long flags = 0;
siginfo_t info;
pgd_t *pge;
pud_t *pue;
pte_t *pte;
int write;
int fault;

#if 0
Expand Down Expand Up @@ -81,6 +81,9 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear
if (in_atomic() || !mm)
goto no_context;

if (user_mode(__frame))
flags |= FAULT_FLAG_USER;

down_read(&mm->mmap_sem);

vma = find_vma(mm, ear0);
Expand Down Expand Up @@ -129,7 +132,6 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear
*/
good_area:
info.si_code = SEGV_ACCERR;
write = 0;
switch (esr0 & ESR0_ATXC) {
default:
/* handle write to write protected page */
Expand All @@ -140,7 +142,7 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear
#endif
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
write = 1;
flags |= FAULT_FLAG_WRITE;
break;

/* handle read from protected page */
Expand All @@ -162,7 +164,7 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
fault = handle_mm_fault(mm, vma, ear0, write ? FAULT_FLAG_WRITE : 0);
fault = handle_mm_fault(mm, vma, ear0, flags);
if (unlikely(fault & VM_FAULT_ERROR)) {
if (fault & VM_FAULT_OOM)
goto out_of_memory;
Expand Down
6 changes: 4 additions & 2 deletions arch/hexagon/mm/vm_fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,7 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs)
int si_code = SEGV_MAPERR;
int fault;
const struct exception_table_entry *fixup;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
(cause > 0 ? FAULT_FLAG_WRITE : 0);
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;

/*
* If we're in an interrupt or have no user context,
Expand All @@ -65,6 +64,8 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs)

local_irq_enable();

if (user_mode(regs))
flags |= FAULT_FLAG_USER;
retry:
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
Expand Down Expand Up @@ -96,6 +97,7 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs)
case FLT_STORE:
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
flags |= FAULT_FLAG_WRITE;
break;
}

Expand Down
6 changes: 4 additions & 2 deletions arch/ia64/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,6 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
mask = ((((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT)
| (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT));

flags |= ((mask & VM_WRITE) ? FAULT_FLAG_WRITE : 0);

/* mmap_sem is performance critical.... */
prefetchw(&mm->mmap_sem);

Expand Down Expand Up @@ -119,6 +117,10 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
if (notify_page_fault(regs, TRAP_BRKPT))
return;

if (user_mode(regs))
flags |= FAULT_FLAG_USER;
if (mask & VM_WRITE)
flags |= FAULT_FLAG_WRITE;
retry:
down_read(&mm->mmap_sem);

Expand Down
10 changes: 6 additions & 4 deletions arch/m32r/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code,
struct mm_struct *mm;
struct vm_area_struct * vma;
unsigned long page, addr;
int write;
unsigned long flags = 0;
int fault;
siginfo_t info;

Expand Down Expand Up @@ -117,6 +117,9 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code,
if (in_atomic() || !mm)
goto bad_area_nosemaphore;

if (error_code & ACE_USERMODE)
flags |= FAULT_FLAG_USER;

/* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
* kernel and should generate an OOPS. Unfortunately, in the case of an
Expand Down Expand Up @@ -166,14 +169,13 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code,
*/
good_area:
info.si_code = SEGV_ACCERR;
write = 0;
switch (error_code & (ACE_WRITE|ACE_PROTECTION)) {
default: /* 3: write, present */
/* fall through */
case ACE_WRITE: /* write, not present */
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
write++;
flags |= FAULT_FLAG_WRITE;
break;
case ACE_PROTECTION: /* read, present */
case 0: /* read, not present */
Expand All @@ -194,7 +196,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code,
*/
addr = (address & PAGE_MASK);
set_thread_fault_code(error_code);
fault = handle_mm_fault(mm, vma, addr, write ? FAULT_FLAG_WRITE : 0);
fault = handle_mm_fault(mm, vma, addr, flags);
if (unlikely(fault & VM_FAULT_ERROR)) {
if (fault & VM_FAULT_OOM)
goto out_of_memory;
Expand Down
2 changes: 2 additions & 0 deletions arch/m68k/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
if (in_atomic() || !mm)
goto no_context;

if (user_mode(regs))
flags |= FAULT_FLAG_USER;
retry:
down_read(&mm->mmap_sem);

Expand Down
6 changes: 4 additions & 2 deletions arch/metag/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
struct vm_area_struct *vma, *prev_vma;
siginfo_t info;
int fault;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
(write_access ? FAULT_FLAG_WRITE : 0);
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;

tsk = current;

Expand Down Expand Up @@ -109,6 +108,8 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
if (in_atomic() || !mm)
goto no_context;

if (user_mode(regs))
flags |= FAULT_FLAG_USER;
retry:
down_read(&mm->mmap_sem);

Expand All @@ -121,6 +122,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
if (write_access) {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
flags |= FAULT_FLAG_WRITE;
} else {
if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
goto bad_area;
Expand Down
7 changes: 5 additions & 2 deletions arch/microblaze/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address,
int code = SEGV_MAPERR;
int is_write = error_code & ESR_S;
int fault;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
(is_write ? FAULT_FLAG_WRITE : 0);
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;

regs->ear = address;
regs->esr = error_code;
Expand Down Expand Up @@ -121,6 +120,9 @@ void do_page_fault(struct pt_regs *regs, unsigned long address,
die("Weird page fault", regs, SIGSEGV);
}

if (user_mode(regs))
flags |= FAULT_FLAG_USER;

/* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
* kernel and should generate an OOPS. Unfortunately, in the case of an
Expand Down Expand Up @@ -199,6 +201,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address,
if (unlikely(is_write)) {
if (unlikely(!(vma->vm_flags & VM_WRITE)))
goto bad_area;
flags |= FAULT_FLAG_WRITE;
/* a read */
} else {
/* protection fault */
Expand Down
6 changes: 4 additions & 2 deletions arch/mips/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write,
const int field = sizeof(unsigned long) * 2;
siginfo_t info;
int fault;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
(write ? FAULT_FLAG_WRITE : 0);
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;

#if 0
printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", raw_smp_processor_id(),
Expand Down Expand Up @@ -93,6 +92,8 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write,
if (in_atomic() || !mm)
goto bad_area_nosemaphore;

if (user_mode(regs))
flags |= FAULT_FLAG_USER;
retry:
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
Expand All @@ -114,6 +115,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write,
if (write) {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
flags |= FAULT_FLAG_WRITE;
} else {
if (cpu_has_rixi) {
if (address == regs->cp0_epc && !(vma->vm_flags & VM_EXEC)) {
Expand Down
2 changes: 2 additions & 0 deletions arch/mn10300/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long fault_code,
if (in_atomic() || !mm)
goto no_context;

if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR)
flags |= FAULT_FLAG_USER;
retry:
down_read(&mm->mmap_sem);

Expand Down
1 change: 1 addition & 0 deletions arch/openrisc/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address,
if (user_mode(regs)) {
/* Exception was in userspace: reenable interrupts */
local_irq_enable();
flags |= FAULT_FLAG_USER;
} else {
/* If exception was in a syscall, then IRQ's may have
* been enabled or disabled. If they were enabled,
Expand Down
Loading

0 comments on commit 759496b

Please sign in to comment.