Skip to content

Commit

Permalink
mm: memcontrol: generalize locking for the page->mem_cgroup binding
Browse files Browse the repository at this point in the history
These patches tag the page cache radix tree eviction entries with the
memcg an evicted page belonged to, thus making per-cgroup LRU reclaim
work properly and be as adaptive to new cache workingsets as global
reclaim already is.

This should have been part of the original thrash detection patch
series, but was deferred due to the complexity of those patches.

This patch (of 5):

So far the only sites that needed to exclude charge migration to
stabilize page->mem_cgroup have been per-cgroup page statistics, hence
the name mem_cgroup_begin_page_stat().  But per-cgroup thrash detection
will add another site that needs to ensure page->mem_cgroup lifetime.

Rename these locking functions to the more generic lock_page_memcg() and
unlock_page_memcg().  Since charge migration is a cgroup1 feature only,
we might be able to delete it at some point, and these now easy to
identify locking sites along with it.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Suggested-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
hnaz authored and torvalds committed Mar 15, 2016
1 parent 0db2cb8 commit 81f8c3a
Show file tree
Hide file tree
Showing 9 changed files with 67 additions and 67 deletions.
14 changes: 7 additions & 7 deletions fs/buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -621,7 +621,7 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
* If warn is true, then emit a warning if the page is not uptodate and has
* not been truncated.
*
* The caller must hold mem_cgroup_begin_page_stat() lock.
* The caller must hold lock_page_memcg().
*/
static void __set_page_dirty(struct page *page, struct address_space *mapping,
struct mem_cgroup *memcg, int warn)
Expand Down Expand Up @@ -683,17 +683,17 @@ int __set_page_dirty_buffers(struct page *page)
} while (bh != head);
}
/*
* Use mem_group_begin_page_stat() to keep PageDirty synchronized with
* per-memcg dirty page counters.
* Lock out page->mem_cgroup migration to keep PageDirty
* synchronized with per-memcg dirty page counters.
*/
memcg = mem_cgroup_begin_page_stat(page);
memcg = lock_page_memcg(page);
newly_dirty = !TestSetPageDirty(page);
spin_unlock(&mapping->private_lock);

if (newly_dirty)
__set_page_dirty(page, mapping, memcg, 1);

mem_cgroup_end_page_stat(memcg);
unlock_page_memcg(memcg);

if (newly_dirty)
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
Expand Down Expand Up @@ -1169,13 +1169,13 @@ void mark_buffer_dirty(struct buffer_head *bh)
struct address_space *mapping = NULL;
struct mem_cgroup *memcg;

memcg = mem_cgroup_begin_page_stat(page);
memcg = lock_page_memcg(page);
if (!TestSetPageDirty(page)) {
mapping = page_mapping(page);
if (mapping)
__set_page_dirty(page, mapping, memcg, 0);
}
mem_cgroup_end_page_stat(memcg);
unlock_page_memcg(memcg);
if (mapping)
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
}
Expand Down
8 changes: 4 additions & 4 deletions fs/xfs/xfs_aops.c
Original file line number Diff line number Diff line change
Expand Up @@ -1978,10 +1978,10 @@ xfs_vm_set_page_dirty(
} while (bh != head);
}
/*
* Use mem_group_begin_page_stat() to keep PageDirty synchronized with
* per-memcg dirty page counters.
* Lock out page->mem_cgroup migration to keep PageDirty
* synchronized with per-memcg dirty page counters.
*/
memcg = mem_cgroup_begin_page_stat(page);
memcg = lock_page_memcg(page);
newly_dirty = !TestSetPageDirty(page);
spin_unlock(&mapping->private_lock);

Expand All @@ -1998,7 +1998,7 @@ xfs_vm_set_page_dirty(
}
spin_unlock_irqrestore(&mapping->tree_lock, flags);
}
mem_cgroup_end_page_stat(memcg);
unlock_page_memcg(memcg);
if (newly_dirty)
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
return newly_dirty;
Expand Down
16 changes: 11 additions & 5 deletions include/linux/memcontrol.h
Original file line number Diff line number Diff line change
Expand Up @@ -429,16 +429,22 @@ bool mem_cgroup_oom_synchronize(bool wait);
extern int do_swap_account;
#endif

struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page);
void mem_cgroup_end_page_stat(struct mem_cgroup *memcg);
struct mem_cgroup *lock_page_memcg(struct page *page);
void unlock_page_memcg(struct mem_cgroup *memcg);

/**
* mem_cgroup_update_page_stat - update page state statistics
* @memcg: memcg to account against
* @idx: page state item to account
* @val: number of pages (positive or negative)
*
* See mem_cgroup_begin_page_stat() for locking requirements.
* Callers must use lock_page_memcg() to prevent double accounting
* when the page is concurrently being moved to another memcg:
*
* memcg = lock_page_memcg(page);
* if (TestClearPageState(page))
* mem_cgroup_update_page_stat(memcg, state, -1);
* unlock_page_memcg(memcg);
*/
static inline void mem_cgroup_update_page_stat(struct mem_cgroup *memcg,
enum mem_cgroup_stat_index idx, int val)
Expand Down Expand Up @@ -613,12 +619,12 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
{
}

static inline struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page)
static inline struct mem_cgroup *lock_page_memcg(struct page *page)
{
return NULL;
}

static inline void mem_cgroup_end_page_stat(struct mem_cgroup *memcg)
static inline void unlock_page_memcg(struct mem_cgroup *memcg)
{
}

Expand Down
12 changes: 6 additions & 6 deletions mm/filemap.c
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@
* ->tree_lock (page_remove_rmap->set_page_dirty)
* bdi.wb->list_lock (page_remove_rmap->set_page_dirty)
* ->inode->i_lock (page_remove_rmap->set_page_dirty)
* ->memcg->move_lock (page_remove_rmap->mem_cgroup_begin_page_stat)
* ->memcg->move_lock (page_remove_rmap->lock_page_memcg)
* bdi.wb->list_lock (zap_pte_range->set_page_dirty)
* ->inode->i_lock (zap_pte_range->set_page_dirty)
* ->private_lock (zap_pte_range->__set_page_dirty_buffers)
Expand Down Expand Up @@ -177,7 +177,7 @@ static void page_cache_tree_delete(struct address_space *mapping,
* Delete a page from the page cache and free it. Caller has to make
* sure the page is locked and that nobody else uses it - or that usage
* is safe. The caller must hold the mapping's tree_lock and
* mem_cgroup_begin_page_stat().
* lock_page_memcg().
*/
void __delete_from_page_cache(struct page *page, void *shadow,
struct mem_cgroup *memcg)
Expand Down Expand Up @@ -263,11 +263,11 @@ void delete_from_page_cache(struct page *page)

freepage = mapping->a_ops->freepage;

memcg = mem_cgroup_begin_page_stat(page);
memcg = lock_page_memcg(page);
spin_lock_irqsave(&mapping->tree_lock, flags);
__delete_from_page_cache(page, NULL, memcg);
spin_unlock_irqrestore(&mapping->tree_lock, flags);
mem_cgroup_end_page_stat(memcg);
unlock_page_memcg(memcg);

if (freepage)
freepage(page);
Expand Down Expand Up @@ -561,7 +561,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
new->mapping = mapping;
new->index = offset;

memcg = mem_cgroup_begin_page_stat(old);
memcg = lock_page_memcg(old);
spin_lock_irqsave(&mapping->tree_lock, flags);
__delete_from_page_cache(old, NULL, memcg);
error = radix_tree_insert(&mapping->page_tree, offset, new);
Expand All @@ -576,7 +576,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
if (PageSwapBacked(new))
__inc_zone_page_state(new, NR_SHMEM);
spin_unlock_irqrestore(&mapping->tree_lock, flags);
mem_cgroup_end_page_stat(memcg);
unlock_page_memcg(memcg);
mem_cgroup_replace_page(old, new);
radix_tree_preload_end();
if (freepage)
Expand Down
34 changes: 14 additions & 20 deletions mm/memcontrol.c
Original file line number Diff line number Diff line change
Expand Up @@ -1709,19 +1709,13 @@ bool mem_cgroup_oom_synchronize(bool handle)
}

/**
* mem_cgroup_begin_page_stat - begin a page state statistics transaction
* @page: page that is going to change accounted state
*
* This function must mark the beginning of an accounted page state
* change to prevent double accounting when the page is concurrently
* being moved to another memcg:
* lock_page_memcg - lock a page->mem_cgroup binding
* @page: the page
*
* memcg = mem_cgroup_begin_page_stat(page);
* if (TestClearPageState(page))
* mem_cgroup_update_page_stat(memcg, state, -1);
* mem_cgroup_end_page_stat(memcg);
* This function protects unlocked LRU pages from being moved to
* another cgroup and stabilizes their page->mem_cgroup binding.
*/
struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page)
struct mem_cgroup *lock_page_memcg(struct page *page)
{
struct mem_cgroup *memcg;
unsigned long flags;
Expand Down Expand Up @@ -1759,20 +1753,20 @@ struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page)
/*
* When charge migration first begins, we can have locked and
* unlocked page stat updates happening concurrently. Track
* the task who has the lock for mem_cgroup_end_page_stat().
* the task who has the lock for unlock_page_memcg().
*/
memcg->move_lock_task = current;
memcg->move_lock_flags = flags;

return memcg;
}
EXPORT_SYMBOL(mem_cgroup_begin_page_stat);
EXPORT_SYMBOL(lock_page_memcg);

/**
* mem_cgroup_end_page_stat - finish a page state statistics transaction
* @memcg: the memcg that was accounted against
* unlock_page_memcg - unlock a page->mem_cgroup binding
* @memcg: the memcg returned by lock_page_memcg()
*/
void mem_cgroup_end_page_stat(struct mem_cgroup *memcg)
void unlock_page_memcg(struct mem_cgroup *memcg)
{
if (memcg && memcg->move_lock_task == current) {
unsigned long flags = memcg->move_lock_flags;
Expand All @@ -1785,7 +1779,7 @@ void mem_cgroup_end_page_stat(struct mem_cgroup *memcg)

rcu_read_unlock();
}
EXPORT_SYMBOL(mem_cgroup_end_page_stat);
EXPORT_SYMBOL(unlock_page_memcg);

/*
* size of first charge trial. "32" comes from vmscan.c's magic value.
Expand Down Expand Up @@ -4923,9 +4917,9 @@ static void mem_cgroup_move_charge(struct mm_struct *mm)

lru_add_drain_all();
/*
* Signal mem_cgroup_begin_page_stat() to take the memcg's
* move_lock while we're moving its pages to another memcg.
* Then wait for already started RCU-only updates to finish.
* Signal lock_page_memcg() to take the memcg's move_lock
* while we're moving its pages to another memcg. Then wait
* for already started RCU-only updates to finish.
*/
atomic_inc(&mc.from->moving_account);
synchronize_rcu();
Expand Down
28 changes: 14 additions & 14 deletions mm/page-writeback.c
Original file line number Diff line number Diff line change
Expand Up @@ -2410,7 +2410,7 @@ int __set_page_dirty_no_writeback(struct page *page)
/*
* Helper function for set_page_dirty family.
*
* Caller must hold mem_cgroup_begin_page_stat().
* Caller must hold lock_page_memcg().
*
* NOTE: This relies on being atomic wrt interrupts.
*/
Expand Down Expand Up @@ -2442,7 +2442,7 @@ EXPORT_SYMBOL(account_page_dirtied);
/*
* Helper function for deaccounting dirty page without writeback.
*
* Caller must hold mem_cgroup_begin_page_stat().
* Caller must hold lock_page_memcg().
*/
void account_page_cleaned(struct page *page, struct address_space *mapping,
struct mem_cgroup *memcg, struct bdi_writeback *wb)
Expand Down Expand Up @@ -2471,13 +2471,13 @@ int __set_page_dirty_nobuffers(struct page *page)
{
struct mem_cgroup *memcg;

memcg = mem_cgroup_begin_page_stat(page);
memcg = lock_page_memcg(page);
if (!TestSetPageDirty(page)) {
struct address_space *mapping = page_mapping(page);
unsigned long flags;

if (!mapping) {
mem_cgroup_end_page_stat(memcg);
unlock_page_memcg(memcg);
return 1;
}

Expand All @@ -2488,15 +2488,15 @@ int __set_page_dirty_nobuffers(struct page *page)
radix_tree_tag_set(&mapping->page_tree, page_index(page),
PAGECACHE_TAG_DIRTY);
spin_unlock_irqrestore(&mapping->tree_lock, flags);
mem_cgroup_end_page_stat(memcg);
unlock_page_memcg(memcg);

if (mapping->host) {
/* !PageAnon && !swapper_space */
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
}
return 1;
}
mem_cgroup_end_page_stat(memcg);
unlock_page_memcg(memcg);
return 0;
}
EXPORT_SYMBOL(__set_page_dirty_nobuffers);
Expand Down Expand Up @@ -2629,14 +2629,14 @@ void cancel_dirty_page(struct page *page)
struct mem_cgroup *memcg;
bool locked;

memcg = mem_cgroup_begin_page_stat(page);
memcg = lock_page_memcg(page);
wb = unlocked_inode_to_wb_begin(inode, &locked);

if (TestClearPageDirty(page))
account_page_cleaned(page, mapping, memcg, wb);

unlocked_inode_to_wb_end(inode, locked);
mem_cgroup_end_page_stat(memcg);
unlock_page_memcg(memcg);
} else {
ClearPageDirty(page);
}
Expand Down Expand Up @@ -2705,7 +2705,7 @@ int clear_page_dirty_for_io(struct page *page)
* always locked coming in here, so we get the desired
* exclusion.
*/
memcg = mem_cgroup_begin_page_stat(page);
memcg = lock_page_memcg(page);
wb = unlocked_inode_to_wb_begin(inode, &locked);
if (TestClearPageDirty(page)) {
mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_DIRTY);
Expand All @@ -2714,7 +2714,7 @@ int clear_page_dirty_for_io(struct page *page)
ret = 1;
}
unlocked_inode_to_wb_end(inode, locked);
mem_cgroup_end_page_stat(memcg);
unlock_page_memcg(memcg);
return ret;
}
return TestClearPageDirty(page);
Expand All @@ -2727,7 +2727,7 @@ int test_clear_page_writeback(struct page *page)
struct mem_cgroup *memcg;
int ret;

memcg = mem_cgroup_begin_page_stat(page);
memcg = lock_page_memcg(page);
if (mapping) {
struct inode *inode = mapping->host;
struct backing_dev_info *bdi = inode_to_bdi(inode);
Expand Down Expand Up @@ -2755,7 +2755,7 @@ int test_clear_page_writeback(struct page *page)
dec_zone_page_state(page, NR_WRITEBACK);
inc_zone_page_state(page, NR_WRITTEN);
}
mem_cgroup_end_page_stat(memcg);
unlock_page_memcg(memcg);
return ret;
}

Expand All @@ -2765,7 +2765,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
struct mem_cgroup *memcg;
int ret;

memcg = mem_cgroup_begin_page_stat(page);
memcg = lock_page_memcg(page);
if (mapping) {
struct inode *inode = mapping->host;
struct backing_dev_info *bdi = inode_to_bdi(inode);
Expand Down Expand Up @@ -2796,7 +2796,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK);
inc_zone_page_state(page, NR_WRITEBACK);
}
mem_cgroup_end_page_stat(memcg);
unlock_page_memcg(memcg);
return ret;

}
Expand Down
8 changes: 4 additions & 4 deletions mm/rmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -1289,19 +1289,19 @@ void page_add_file_rmap(struct page *page)
{
struct mem_cgroup *memcg;

memcg = mem_cgroup_begin_page_stat(page);
memcg = lock_page_memcg(page);
if (atomic_inc_and_test(&page->_mapcount)) {
__inc_zone_page_state(page, NR_FILE_MAPPED);
mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED);
}
mem_cgroup_end_page_stat(memcg);
unlock_page_memcg(memcg);
}

static void page_remove_file_rmap(struct page *page)
{
struct mem_cgroup *memcg;

memcg = mem_cgroup_begin_page_stat(page);
memcg = lock_page_memcg(page);

/* Hugepages are not counted in NR_FILE_MAPPED for now. */
if (unlikely(PageHuge(page))) {
Expand All @@ -1325,7 +1325,7 @@ static void page_remove_file_rmap(struct page *page)
if (unlikely(PageMlocked(page)))
clear_page_mlock(page);
out:
mem_cgroup_end_page_stat(memcg);
unlock_page_memcg(memcg);
}

static void page_remove_anon_compound_rmap(struct page *page)
Expand Down
Loading

0 comments on commit 81f8c3a

Please sign in to comment.