Skip to content

Commit

Permalink
mm: memcg/percpu: account percpu memory to memory cgroups
Browse files Browse the repository at this point in the history
Percpu memory is becoming more and more widely used by various subsystems,
and the total amount of memory controlled by the percpu allocator can make
a good part of the total memory.

As an example, bpf maps can consume a lot of percpu memory, and they are
created by a user.  Also, some cgroup internals (e.g.  memory controller
statistics) can be quite large.  On a machine with many CPUs and big
number of cgroups they can consume hundreds of megabytes.

So the lack of memcg accounting is creating a breach in the memory
isolation.  Similar to the slab memory, percpu memory should be accounted
by default.

To implement the perpcu accounting it's possible to take the slab memory
accounting as a model to follow.  Let's introduce two types of percpu
chunks: root and memcg.  What makes memcg chunks different is an
additional space allocated to store memcg membership information.  If
__GFP_ACCOUNT is passed on allocation, a memcg chunk should be be used.
If it's possible to charge the corresponding size to the target memory
cgroup, allocation is performed, and the memcg ownership data is recorded.
System-wide allocations are performed using root chunks, so there is no
additional memory overhead.

To implement a fast reparenting of percpu memory on memcg removal, we
don't store mem_cgroup pointers directly: instead we use obj_cgroup API,
introduced for slab accounting.

[akpm@linux-foundation.org: fix CONFIG_MEMCG_KMEM=n build errors and warning]
[akpm@linux-foundation.org: move unreachable code, per Roman]
[cuibixuan@huawei.com: mm/percpu: fix 'defined but not used' warning]
  Link: http://lkml.kernel.org/r/6d41b939-a741-b521-a7a2-e7296ec16219@huawei.com

Signed-off-by: Roman Gushchin <guro@fb.com>
Signed-off-by: Bixuan Cui <cuibixuan@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Dennis Zhou <dennis@kernel.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Tobin C. Harding <tobin@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Waiman Long <longman@redhat.com>
Cc: Bixuan Cui <cuibixuan@huawei.com>
Cc: Michal Koutný <mkoutny@suse.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Link: http://lkml.kernel.org/r/20200623184515.4132564-3-guro@fb.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
rgushchin authored and torvalds committed Aug 12, 2020
1 parent 5b32af9 commit 3c7be18
Show file tree
Hide file tree
Showing 5 changed files with 246 additions and 40 deletions.
55 changes: 54 additions & 1 deletion mm/percpu-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,25 @@
#include <linux/types.h>
#include <linux/percpu.h>

/*
* There are two chunk types: root and memcg-aware.
* Chunks of each type have separate slots list.
*
* Memcg-aware chunks have an attached vector of obj_cgroup pointers, which is
* used to store memcg membership data of a percpu object. Obj_cgroups are
* ref-counted pointers to a memory cgroup with an ability to switch dynamically
* to the parent memory cgroup. This allows to reclaim a deleted memory cgroup
* without reclaiming of all outstanding objects, which hold a reference at it.
*/
enum pcpu_chunk_type {
PCPU_CHUNK_ROOT,
#ifdef CONFIG_MEMCG_KMEM
PCPU_CHUNK_MEMCG,
#endif
PCPU_NR_CHUNK_TYPES,
PCPU_FAIL_ALLOC = PCPU_NR_CHUNK_TYPES
};

/*
* pcpu_block_md is the metadata block struct.
* Each chunk's bitmap is split into a number of full blocks.
Expand Down Expand Up @@ -54,6 +73,9 @@ struct pcpu_chunk {
int end_offset; /* additional area required to
have the region end page
aligned */
#ifdef CONFIG_MEMCG_KMEM
struct obj_cgroup **obj_cgroups; /* vector of object cgroups */
#endif

int nr_pages; /* # of pages served by this chunk */
int nr_populated; /* # of populated pages */
Expand All @@ -63,7 +85,7 @@ struct pcpu_chunk {

extern spinlock_t pcpu_lock;

extern struct list_head *pcpu_slot;
extern struct list_head *pcpu_chunk_lists;
extern int pcpu_nr_slots;
extern int pcpu_nr_empty_pop_pages;

Expand Down Expand Up @@ -106,6 +128,37 @@ static inline int pcpu_chunk_map_bits(struct pcpu_chunk *chunk)
return pcpu_nr_pages_to_map_bits(chunk->nr_pages);
}

#ifdef CONFIG_MEMCG_KMEM
static inline enum pcpu_chunk_type pcpu_chunk_type(struct pcpu_chunk *chunk)
{
if (chunk->obj_cgroups)
return PCPU_CHUNK_MEMCG;
return PCPU_CHUNK_ROOT;
}

static inline bool pcpu_is_memcg_chunk(enum pcpu_chunk_type chunk_type)
{
return chunk_type == PCPU_CHUNK_MEMCG;
}

#else
static inline enum pcpu_chunk_type pcpu_chunk_type(struct pcpu_chunk *chunk)
{
return PCPU_CHUNK_ROOT;
}

static inline bool pcpu_is_memcg_chunk(enum pcpu_chunk_type chunk_type)
{
return false;
}
#endif

static inline struct list_head *pcpu_chunk_list(enum pcpu_chunk_type chunk_type)
{
return &pcpu_chunk_lists[pcpu_nr_slots *
pcpu_is_memcg_chunk(chunk_type)];
}

#ifdef CONFIG_PERCPU_STATS

#include <linux/spinlock.h>
Expand Down
5 changes: 3 additions & 2 deletions mm/percpu-km.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,16 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
/* nada */
}

static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp)
static struct pcpu_chunk *pcpu_create_chunk(enum pcpu_chunk_type type,
gfp_t gfp)
{
const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT;
struct pcpu_chunk *chunk;
struct page *pages;
unsigned long flags;
int i;

chunk = pcpu_alloc_chunk(gfp);
chunk = pcpu_alloc_chunk(type, gfp);
if (!chunk)
return NULL;

Expand Down
36 changes: 22 additions & 14 deletions mm/percpu-stats.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,15 @@ static int find_max_nr_alloc(void)
{
struct pcpu_chunk *chunk;
int slot, max_nr_alloc;
enum pcpu_chunk_type type;

max_nr_alloc = 0;
for (slot = 0; slot < pcpu_nr_slots; slot++)
list_for_each_entry(chunk, &pcpu_slot[slot], list)
max_nr_alloc = max(max_nr_alloc, chunk->nr_alloc);
for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++)
for (slot = 0; slot < pcpu_nr_slots; slot++)
list_for_each_entry(chunk, &pcpu_chunk_list(type)[slot],
list)
max_nr_alloc = max(max_nr_alloc,
chunk->nr_alloc);

return max_nr_alloc;
}
Expand Down Expand Up @@ -129,6 +133,9 @@ static void chunk_map_stats(struct seq_file *m, struct pcpu_chunk *chunk,
P("cur_min_alloc", cur_min_alloc);
P("cur_med_alloc", cur_med_alloc);
P("cur_max_alloc", cur_max_alloc);
#ifdef CONFIG_MEMCG_KMEM
P("memcg_aware", pcpu_is_memcg_chunk(pcpu_chunk_type(chunk)));
#endif
seq_putc(m, '\n');
}

Expand All @@ -137,6 +144,7 @@ static int percpu_stats_show(struct seq_file *m, void *v)
struct pcpu_chunk *chunk;
int slot, max_nr_alloc;
int *buffer;
enum pcpu_chunk_type type;

alloc_buffer:
spin_lock_irq(&pcpu_lock);
Expand Down Expand Up @@ -202,18 +210,18 @@ static int percpu_stats_show(struct seq_file *m, void *v)
chunk_map_stats(m, pcpu_reserved_chunk, buffer);
}

for (slot = 0; slot < pcpu_nr_slots; slot++) {
list_for_each_entry(chunk, &pcpu_slot[slot], list) {
if (chunk == pcpu_first_chunk) {
seq_puts(m, "Chunk: <- First Chunk\n");
chunk_map_stats(m, chunk, buffer);


} else {
seq_puts(m, "Chunk:\n");
chunk_map_stats(m, chunk, buffer);
for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++) {
for (slot = 0; slot < pcpu_nr_slots; slot++) {
list_for_each_entry(chunk, &pcpu_chunk_list(type)[slot],
list) {
if (chunk == pcpu_first_chunk) {
seq_puts(m, "Chunk: <- First Chunk\n");
chunk_map_stats(m, chunk, buffer);
} else {
seq_puts(m, "Chunk:\n");
chunk_map_stats(m, chunk, buffer);
}
}

}
}

Expand Down
5 changes: 3 additions & 2 deletions mm/percpu-vm.c
Original file line number Diff line number Diff line change
Expand Up @@ -328,12 +328,13 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
pcpu_free_pages(chunk, pages, page_start, page_end);
}

static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp)
static struct pcpu_chunk *pcpu_create_chunk(enum pcpu_chunk_type type,
gfp_t gfp)
{
struct pcpu_chunk *chunk;
struct vm_struct **vms;

chunk = pcpu_alloc_chunk(gfp);
chunk = pcpu_alloc_chunk(type, gfp);
if (!chunk)
return NULL;

Expand Down
Loading

0 comments on commit 3c7be18

Please sign in to comment.