Skip to content

Commit 31d8fca

Browse files
hnaztorvalds
authored andcommitted
mm: workingset: age nonresident information alongside anonymous pages
Patch series "fix for "mm: balance LRU lists based on relative thrashing" patchset" This patchset fixes some problems of the patchset, "mm: balance LRU lists based on relative thrashing", which is now merged on the mainline. Patch "mm: workingset: let cache workingset challenge anon fix" is the result of discussion with Johannes. See following link. http://lkml.kernel.org/r/20200520232525.798933-6-hannes@cmpxchg.org And, the other two are minor things which are found when I try to rebase my patchset. This patch (of 3): After ("mm: workingset: let cache workingset challenge anon fix"), we compare refault distances to active_file + anon. But age of the non-resident information is only driven by the file LRU. As a result, we may overestimate the recency of any incoming refaults and activate them too eagerly, causing unnecessary LRU churn in certain situations. Make anon aging drive nonresident age as well to address that. Link: http://lkml.kernel.org/r/1592288204-27734-1-git-send-email-iamjoonsoo.kim@lge.com Link: http://lkml.kernel.org/r/1592288204-27734-2-git-send-email-iamjoonsoo.kim@lge.com Fixes: 34e58ca ("mm: workingset: let cache workingset challenge anon") Reported-by: Joonsoo Kim <js1304@gmail.com> Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Rik van Riel <riel@surriel.com> Cc: Minchan Kim <minchan.kim@gmail.com> Cc: Michal Hocko <mhocko@suse.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 2a8bef3 commit 31d8fca

File tree

4 files changed

+33
-21
lines changed

4 files changed

+33
-21
lines changed

include/linux/mmzone.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -257,8 +257,8 @@ struct lruvec {
257257
*/
258258
unsigned long anon_cost;
259259
unsigned long file_cost;
260-
/* Evictions & activations on the inactive file list */
261-
atomic_long_t inactive_age;
260+
/* Non-resident age, driven by LRU movement */
261+
atomic_long_t nonresident_age;
262262
/* Refaults at the time of last reclaim cycle */
263263
unsigned long refaults;
264264
/* Various lruvec state flags (enum lruvec_flags) */

include/linux/swap.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,7 @@ struct vma_swap_readahead {
313313
};
314314

315315
/* linux/mm/workingset.c */
316+
void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages);
316317
void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg);
317318
void workingset_refault(struct page *page, void *shadow);
318319
void workingset_activation(struct page *page);

mm/vmscan.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -904,6 +904,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
904904
__delete_from_swap_cache(page, swap);
905905
xa_unlock_irqrestore(&mapping->i_pages, flags);
906906
put_swap_page(page, swap);
907+
workingset_eviction(page, target_memcg);
907908
} else {
908909
void (*freepage)(struct page *);
909910
void *shadow = NULL;
@@ -1884,6 +1885,8 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
18841885
list_add(&page->lru, &pages_to_free);
18851886
} else {
18861887
nr_moved += nr_pages;
1888+
if (PageActive(page))
1889+
workingset_age_nonresident(lruvec, nr_pages);
18871890
}
18881891
}
18891892

mm/workingset.c

Lines changed: 27 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,8 @@
156156
*
157157
* Implementation
158158
*
159-
* For each node's file LRU lists, a counter for inactive evictions
160-
* and activations is maintained (node->inactive_age).
159+
* For each node's LRU lists, a counter for inactive evictions and
160+
* activations is maintained (node->nonresident_age).
161161
*
162162
* On eviction, a snapshot of this counter (along with some bits to
163163
* identify the node) is stored in the now empty page cache
@@ -213,7 +213,17 @@ static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat,
213213
*workingsetp = workingset;
214214
}
215215

216-
static void advance_inactive_age(struct mem_cgroup *memcg, pg_data_t *pgdat)
216+
/**
217+
* workingset_age_nonresident - age non-resident entries as LRU ages
218+
* @memcg: the lruvec that was aged
219+
* @nr_pages: the number of pages to count
220+
*
221+
* As in-memory pages are aged, non-resident pages need to be aged as
222+
* well, in order for the refault distances later on to be comparable
223+
* to the in-memory dimensions. This function allows reclaim and LRU
224+
* operations to drive the non-resident aging along in parallel.
225+
*/
226+
void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages)
217227
{
218228
/*
219229
* Reclaiming a cgroup means reclaiming all its children in a
@@ -227,11 +237,8 @@ static void advance_inactive_age(struct mem_cgroup *memcg, pg_data_t *pgdat)
227237
* the root cgroup's, age as well.
228238
*/
229239
do {
230-
struct lruvec *lruvec;
231-
232-
lruvec = mem_cgroup_lruvec(memcg, pgdat);
233-
atomic_long_inc(&lruvec->inactive_age);
234-
} while (memcg && (memcg = parent_mem_cgroup(memcg)));
240+
atomic_long_add(nr_pages, &lruvec->nonresident_age);
241+
} while ((lruvec = parent_lruvec(lruvec)));
235242
}
236243

237244
/**
@@ -254,12 +261,11 @@ void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg)
254261
VM_BUG_ON_PAGE(page_count(page), page);
255262
VM_BUG_ON_PAGE(!PageLocked(page), page);
256263

257-
advance_inactive_age(page_memcg(page), pgdat);
258-
259264
lruvec = mem_cgroup_lruvec(target_memcg, pgdat);
265+
workingset_age_nonresident(lruvec, hpage_nr_pages(page));
260266
/* XXX: target_memcg can be NULL, go through lruvec */
261267
memcgid = mem_cgroup_id(lruvec_memcg(lruvec));
262-
eviction = atomic_long_read(&lruvec->inactive_age);
268+
eviction = atomic_long_read(&lruvec->nonresident_age);
263269
return pack_shadow(memcgid, pgdat, eviction, PageWorkingset(page));
264270
}
265271

@@ -309,20 +315,20 @@ void workingset_refault(struct page *page, void *shadow)
309315
if (!mem_cgroup_disabled() && !eviction_memcg)
310316
goto out;
311317
eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat);
312-
refault = atomic_long_read(&eviction_lruvec->inactive_age);
318+
refault = atomic_long_read(&eviction_lruvec->nonresident_age);
313319

314320
/*
315321
* Calculate the refault distance
316322
*
317323
* The unsigned subtraction here gives an accurate distance
318-
* across inactive_age overflows in most cases. There is a
324+
* across nonresident_age overflows in most cases. There is a
319325
* special case: usually, shadow entries have a short lifetime
320326
* and are either refaulted or reclaimed along with the inode
321327
* before they get too old. But it is not impossible for the
322-
* inactive_age to lap a shadow entry in the field, which can
323-
* then result in a false small refault distance, leading to a
324-
* false activation should this old entry actually refault
325-
* again. However, earlier kernels used to deactivate
328+
* nonresident_age to lap a shadow entry in the field, which
329+
* can then result in a false small refault distance, leading
330+
* to a false activation should this old entry actually
331+
* refault again. However, earlier kernels used to deactivate
326332
* unconditionally with *every* reclaim invocation for the
327333
* longest time, so the occasional inappropriate activation
328334
* leading to pressure on the active list is not a problem.
@@ -359,7 +365,7 @@ void workingset_refault(struct page *page, void *shadow)
359365
goto out;
360366

361367
SetPageActive(page);
362-
advance_inactive_age(memcg, pgdat);
368+
workingset_age_nonresident(lruvec, hpage_nr_pages(page));
363369
inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE);
364370

365371
/* Page was active prior to eviction */
@@ -382,6 +388,7 @@ void workingset_refault(struct page *page, void *shadow)
382388
void workingset_activation(struct page *page)
383389
{
384390
struct mem_cgroup *memcg;
391+
struct lruvec *lruvec;
385392

386393
rcu_read_lock();
387394
/*
@@ -394,7 +401,8 @@ void workingset_activation(struct page *page)
394401
memcg = page_memcg_rcu(page);
395402
if (!mem_cgroup_disabled() && !memcg)
396403
goto out;
397-
advance_inactive_age(memcg, page_pgdat(page));
404+
lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
405+
workingset_age_nonresident(lruvec, hpage_nr_pages(page));
398406
out:
399407
rcu_read_unlock();
400408
}

0 commit comments

Comments
 (0)