Skip to content

Commit 0ae5e89

Browse files
yinghantorvalds
authored andcommitted
memcg: count the soft_limit reclaim in global background reclaim
The global kswapd scans per-zone LRU and reclaims pages regardless of the cgroup. It breaks memory isolation since one cgroup can end up reclaiming pages from another cgroup. Instead we should rely on memcg-aware target reclaim including per-memcg kswapd and soft_limit hierarchical reclaim under memory pressure. In the global background reclaim, we do soft reclaim before scanning the per-zone LRU. However, the return value is ignored. This patch is the first step to skip shrink_zone() if soft_limit reclaim does enough work. This is part of the effort which tries to reduce reclaiming pages in global LRU in memcg. The per-memcg background reclaim patchset further enhances the per-cgroup targetting reclaim, which I should have V4 posted shortly. Try running multiple memory intensive workloads within seperate memcgs. Watch the counters of soft_steal in memory.stat. $ cat /dev/cgroup/A/memory.stat | grep 'soft' soft_steal 240000 soft_scan 240000 total_soft_steal 240000 total_soft_scan 240000 This patch: In the global background reclaim, we do soft reclaim before scanning the per-zone LRU. However, the return value is ignored. We would like to skip shrink_zone() if soft_limit reclaim does enough work. Also, we need to make the memory pressure balanced across per-memcg zones, like the logic vm-core. This patch is the first step where we start with counting the nr_scanned and nr_reclaimed from soft_limit reclaim into the global scan_control. Signed-off-by: Ying Han <yinghan@google.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Minchan Kim <minchan.kim@gmail.com> Cc: Rik van Riel <riel@redhat.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Balbir Singh <balbir@in.ibm.com> Acked-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent f042e70 commit 0ae5e89

File tree

4 files changed

+39
-15
lines changed

4 files changed

+39
-15
lines changed

include/linux/memcontrol.h

+4-2
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,8 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
144144
}
145145

146146
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
147-
gfp_t gfp_mask);
147+
gfp_t gfp_mask,
148+
unsigned long *total_scanned);
148149
u64 mem_cgroup_get_limit(struct mem_cgroup *mem);
149150

150151
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -338,7 +339,8 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
338339

339340
static inline
340341
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
341-
gfp_t gfp_mask)
342+
gfp_t gfp_mask,
343+
unsigned long *total_scanned)
342344
{
343345
return 0;
344346
}

include/linux/swap.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,8 @@ extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
257257
extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
258258
gfp_t gfp_mask, bool noswap,
259259
unsigned int swappiness,
260-
struct zone *zone);
260+
struct zone *zone,
261+
unsigned long *nr_scanned);
261262
extern int __isolate_lru_page(struct page *page, int mode, int file);
262263
extern unsigned long shrink_all_memory(unsigned long nr_pages);
263264
extern int vm_swappiness;

mm/memcontrol.c

+20-9
Original file line numberDiff line numberDiff line change
@@ -1433,7 +1433,8 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem)
14331433
static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
14341434
struct zone *zone,
14351435
gfp_t gfp_mask,
1436-
unsigned long reclaim_options)
1436+
unsigned long reclaim_options,
1437+
unsigned long *total_scanned)
14371438
{
14381439
struct mem_cgroup *victim;
14391440
int ret, total = 0;
@@ -1442,6 +1443,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
14421443
bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
14431444
bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
14441445
unsigned long excess;
1446+
unsigned long nr_scanned;
14451447

14461448
excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT;
14471449

@@ -1484,10 +1486,12 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
14841486
continue;
14851487
}
14861488
/* we use swappiness of local cgroup */
1487-
if (check_soft)
1489+
if (check_soft) {
14881490
ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,
1489-
noswap, get_swappiness(victim), zone);
1490-
else
1491+
noswap, get_swappiness(victim), zone,
1492+
&nr_scanned);
1493+
*total_scanned += nr_scanned;
1494+
} else
14911495
ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
14921496
noswap, get_swappiness(victim));
14931497
css_put(&victim->css);
@@ -1928,7 +1932,7 @@ static int mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask,
19281932
return CHARGE_WOULDBLOCK;
19291933

19301934
ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL,
1931-
gfp_mask, flags);
1935+
gfp_mask, flags, NULL);
19321936
if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
19331937
return CHARGE_RETRY;
19341938
/*
@@ -3211,7 +3215,8 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
32113215
break;
32123216

32133217
mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
3214-
MEM_CGROUP_RECLAIM_SHRINK);
3218+
MEM_CGROUP_RECLAIM_SHRINK,
3219+
NULL);
32153220
curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
32163221
/* Usage is reduced ? */
32173222
if (curusage >= oldusage)
@@ -3271,7 +3276,8 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
32713276

32723277
mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
32733278
MEM_CGROUP_RECLAIM_NOSWAP |
3274-
MEM_CGROUP_RECLAIM_SHRINK);
3279+
MEM_CGROUP_RECLAIM_SHRINK,
3280+
NULL);
32753281
curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
32763282
/* Usage is reduced ? */
32773283
if (curusage >= oldusage)
@@ -3285,14 +3291,16 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
32853291
}
32863292

32873293
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
3288-
gfp_t gfp_mask)
3294+
gfp_t gfp_mask,
3295+
unsigned long *total_scanned)
32893296
{
32903297
unsigned long nr_reclaimed = 0;
32913298
struct mem_cgroup_per_zone *mz, *next_mz = NULL;
32923299
unsigned long reclaimed;
32933300
int loop = 0;
32943301
struct mem_cgroup_tree_per_zone *mctz;
32953302
unsigned long long excess;
3303+
unsigned long nr_scanned;
32963304

32973305
if (order > 0)
32983306
return 0;
@@ -3311,10 +3319,13 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
33113319
if (!mz)
33123320
break;
33133321

3322+
nr_scanned = 0;
33143323
reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone,
33153324
gfp_mask,
3316-
MEM_CGROUP_RECLAIM_SOFT);
3325+
MEM_CGROUP_RECLAIM_SOFT,
3326+
&nr_scanned);
33173327
nr_reclaimed += reclaimed;
3328+
*total_scanned += nr_scanned;
33183329
spin_lock(&mctz->lock);
33193330

33203331
/*

mm/vmscan.c

+13-3
Original file line numberDiff line numberDiff line change
@@ -2171,9 +2171,11 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
21712171
unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
21722172
gfp_t gfp_mask, bool noswap,
21732173
unsigned int swappiness,
2174-
struct zone *zone)
2174+
struct zone *zone,
2175+
unsigned long *nr_scanned)
21752176
{
21762177
struct scan_control sc = {
2178+
.nr_scanned = 0,
21772179
.nr_to_reclaim = SWAP_CLUSTER_MAX,
21782180
.may_writepage = !laptop_mode,
21792181
.may_unmap = 1,
@@ -2182,6 +2184,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
21822184
.order = 0,
21832185
.mem_cgroup = mem,
21842186
};
2187+
21852188
sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
21862189
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
21872190

@@ -2200,6 +2203,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
22002203

22012204
trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
22022205

2206+
*nr_scanned = sc.nr_scanned;
22032207
return sc.nr_reclaimed;
22042208
}
22052209

@@ -2347,6 +2351,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
23472351
int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
23482352
unsigned long total_scanned;
23492353
struct reclaim_state *reclaim_state = current->reclaim_state;
2354+
unsigned long nr_soft_reclaimed;
2355+
unsigned long nr_soft_scanned;
23502356
struct scan_control sc = {
23512357
.gfp_mask = GFP_KERNEL,
23522358
.may_unmap = 1,
@@ -2439,11 +2445,15 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
24392445

24402446
sc.nr_scanned = 0;
24412447

2448+
nr_soft_scanned = 0;
24422449
/*
24432450
* Call soft limit reclaim before calling shrink_zone.
2444-
* For now we ignore the return value
24452451
*/
2446-
mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask);
2452+
nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
2453+
order, sc.gfp_mask,
2454+
&nr_soft_scanned);
2455+
sc.nr_reclaimed += nr_soft_reclaimed;
2456+
total_scanned += nr_soft_scanned;
24472457

24482458
/*
24492459
* We put equal pressure on every zone, unless

0 commit comments

Comments
 (0)