Skip to content

Commit 456f998

Browse files
yinghantorvalds
authored andcommitted
memcg: add the pagefault count into memcg stats
Two new stats in per-memcg memory.stat which tracks the number of page faults and number of major page faults. "pgfault" "pgmajfault" They are different from "pgpgin"/"pgpgout" stat which count number of pages charged/discharged to the cgroup and have no meaning of reading/ writing page to disk. It is valuable to track the two stats for both measuring application's performance as well as the efficiency of the kernel page reclaim path. Counting pagefaults per process is useful, but we also need the aggregated value since processes are monitored and controlled in cgroup basis in memcg. Functional test: check the total number of pgfault/pgmajfault of all memcgs and compare with global vmstat value: $ cat /proc/vmstat | grep fault pgfault 1070751 pgmajfault 553 $ cat /dev/cgroup/memory.stat | grep fault pgfault 1071138 pgmajfault 553 total_pgfault 1071142 total_pgmajfault 553 $ cat /dev/cgroup/A/memory.stat | grep fault pgfault 199 pgmajfault 0 total_pgfault 199 total_pgmajfault 0 Performance test: run page fault test(pft) wit 16 thread on faulting in 15G anon pages in 16G container. There is no regression noticed on the "flt/cpu/s" Sample output from pft: TAG pft:anon-sys-default: Gb Thr CLine User System Wall flt/cpu/s fault/wsec 15 16 1 0.67s 233.41s 14.76s 16798.546 266356.260 +-------------------------------------------------------------------------+ N Min Max Median Avg Stddev x 10 16682.962 17344.027 16913.524 16928.812 166.5362 + 10 16695.568 16923.896 16820.604 16824.652 84.816568 No difference proven at 95.0% confidence [akpm@linux-foundation.org: fix build] [hughd@google.com: shmem fix] Signed-off-by: Ying Han <yinghan@google.com> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Reviewed-by: Minchan Kim <minchan.kim@gmail.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com> Signed-off-by: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 406eb0c commit 456f998

File tree

6 files changed

+65
-5
lines changed

6 files changed

+65
-5
lines changed

fs/ncpfs/mmap.c

+2
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <linux/mman.h>
1717
#include <linux/string.h>
1818
#include <linux/fcntl.h>
19+
#include <linux/memcontrol.h>
1920

2021
#include <asm/uaccess.h>
2122
#include <asm/system.h>
@@ -92,6 +93,7 @@ static int ncp_file_mmap_fault(struct vm_area_struct *area,
9293
* -- wli
9394
*/
9495
count_vm_event(PGMAJFAULT);
96+
mem_cgroup_count_vm_event(area->vm_mm, PGMAJFAULT);
9597
return VM_FAULT_MAJOR;
9698
}
9799

include/linux/memcontrol.h

+7
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
#ifndef _LINUX_MEMCONTROL_H
2121
#define _LINUX_MEMCONTROL_H
2222
#include <linux/cgroup.h>
23+
#include <linux/vm_event_item.h>
24+
2325
struct mem_cgroup;
2426
struct page_cgroup;
2527
struct page;
@@ -149,6 +151,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
149151
unsigned long *total_scanned);
150152
u64 mem_cgroup_get_limit(struct mem_cgroup *mem);
151153

154+
void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
152155
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
153156
void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail);
154157
#endif
@@ -357,6 +360,10 @@ static inline void mem_cgroup_split_huge_fixup(struct page *head,
357360
{
358361
}
359362

363+
static inline
364+
void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
365+
{
366+
}
360367
#endif /* CONFIG_CGROUP_MEM_CONT */
361368

362369
#if !defined(CONFIG_CGROUP_MEM_RES_CTLR) || !defined(CONFIG_DEBUG_VM)

mm/filemap.c

+1
Original file line numberDiff line numberDiff line change
@@ -1661,6 +1661,7 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
16611661
/* No page in the page cache at all */
16621662
do_sync_mmap_readahead(vma, ra, file, offset);
16631663
count_vm_event(PGMAJFAULT);
1664+
mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
16641665
ret = VM_FAULT_MAJOR;
16651666
retry_find:
16661667
page = find_get_page(mapping, offset);

mm/memcontrol.c

+47
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ enum mem_cgroup_events_index {
9494
MEM_CGROUP_EVENTS_PGPGIN, /* # of pages paged in */
9595
MEM_CGROUP_EVENTS_PGPGOUT, /* # of pages paged out */
9696
MEM_CGROUP_EVENTS_COUNT, /* # of pages paged in/out */
97+
MEM_CGROUP_EVENTS_PGFAULT, /* # of page-faults */
98+
MEM_CGROUP_EVENTS_PGMAJFAULT, /* # of major page-faults */
9799
MEM_CGROUP_EVENTS_NSTATS,
98100
};
99101
/*
@@ -590,6 +592,16 @@ static void mem_cgroup_swap_statistics(struct mem_cgroup *mem,
590592
this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_SWAPOUT], val);
591593
}
592594

595+
void mem_cgroup_pgfault(struct mem_cgroup *mem, int val)
596+
{
597+
this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_PGFAULT], val);
598+
}
599+
600+
void mem_cgroup_pgmajfault(struct mem_cgroup *mem, int val)
601+
{
602+
this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT], val);
603+
}
604+
593605
static unsigned long mem_cgroup_read_events(struct mem_cgroup *mem,
594606
enum mem_cgroup_events_index idx)
595607
{
@@ -827,6 +839,33 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *mem)
827839
return (mem == root_mem_cgroup);
828840
}
829841

842+
void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
843+
{
844+
struct mem_cgroup *mem;
845+
846+
if (!mm)
847+
return;
848+
849+
rcu_read_lock();
850+
mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
851+
if (unlikely(!mem))
852+
goto out;
853+
854+
switch (idx) {
855+
case PGMAJFAULT:
856+
mem_cgroup_pgmajfault(mem, 1);
857+
break;
858+
case PGFAULT:
859+
mem_cgroup_pgfault(mem, 1);
860+
break;
861+
default:
862+
BUG();
863+
}
864+
out:
865+
rcu_read_unlock();
866+
}
867+
EXPORT_SYMBOL(mem_cgroup_count_vm_event);
868+
830869
/*
831870
* Following LRU functions are allowed to be used without PCG_LOCK.
832871
* Operations are called by routine of global LRU independently from memcg.
@@ -3958,6 +3997,8 @@ enum {
39583997
MCS_PGPGIN,
39593998
MCS_PGPGOUT,
39603999
MCS_SWAP,
4000+
MCS_PGFAULT,
4001+
MCS_PGMAJFAULT,
39614002
MCS_INACTIVE_ANON,
39624003
MCS_ACTIVE_ANON,
39634004
MCS_INACTIVE_FILE,
@@ -3980,6 +4021,8 @@ struct {
39804021
{"pgpgin", "total_pgpgin"},
39814022
{"pgpgout", "total_pgpgout"},
39824023
{"swap", "total_swap"},
4024+
{"pgfault", "total_pgfault"},
4025+
{"pgmajfault", "total_pgmajfault"},
39834026
{"inactive_anon", "total_inactive_anon"},
39844027
{"active_anon", "total_active_anon"},
39854028
{"inactive_file", "total_inactive_file"},
@@ -4008,6 +4051,10 @@ mem_cgroup_get_local_stat(struct mem_cgroup *mem, struct mcs_total_stat *s)
40084051
val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_SWAPOUT);
40094052
s->stat[MCS_SWAP] += val * PAGE_SIZE;
40104053
}
4054+
val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGFAULT);
4055+
s->stat[MCS_PGFAULT] += val;
4056+
val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGMAJFAULT);
4057+
s->stat[MCS_PGMAJFAULT] += val;
40114058

40124059
/* per zone stat */
40134060
val = mem_cgroup_get_local_zonestat(mem, LRU_INACTIVE_ANON);

mm/memory.c

+2
Original file line numberDiff line numberDiff line change
@@ -2874,6 +2874,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
28742874
/* Had to read the page from swap area: Major fault */
28752875
ret = VM_FAULT_MAJOR;
28762876
count_vm_event(PGMAJFAULT);
2877+
mem_cgroup_count_vm_event(mm, PGMAJFAULT);
28772878
} else if (PageHWPoison(page)) {
28782879
/*
28792880
* hwpoisoned dirty swapcache pages are kept for killing
@@ -3413,6 +3414,7 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
34133414
__set_current_state(TASK_RUNNING);
34143415

34153416
count_vm_event(PGFAULT);
3417+
mem_cgroup_count_vm_event(mm, PGFAULT);
34163418

34173419
/* do counter updates before entering really critical section. */
34183420
check_sync_rss_stat(current);

mm/shmem.c

+6-5
Original file line numberDiff line numberDiff line change
@@ -1305,12 +1305,10 @@ static int shmem_getpage(struct inode *inode, unsigned long idx,
13051305
swappage = lookup_swap_cache(swap);
13061306
if (!swappage) {
13071307
shmem_swp_unmap(entry);
1308+
spin_unlock(&info->lock);
13081309
/* here we actually do the io */
1309-
if (type && !(*type & VM_FAULT_MAJOR)) {
1310-
__count_vm_event(PGMAJFAULT);
1310+
if (type)
13111311
*type |= VM_FAULT_MAJOR;
1312-
}
1313-
spin_unlock(&info->lock);
13141312
swappage = shmem_swapin(swap, gfp, info, idx);
13151313
if (!swappage) {
13161314
spin_lock(&info->lock);
@@ -1549,7 +1547,10 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
15491547
error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret);
15501548
if (error)
15511549
return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
1552-
1550+
if (ret & VM_FAULT_MAJOR) {
1551+
count_vm_event(PGMAJFAULT);
1552+
mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
1553+
}
15531554
return ret | VM_FAULT_LOCKED;
15541555
}
15551556

0 commit comments

Comments
 (0)