Skip to content

bpf: switch to memcg-based memory accounting #339

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 31 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
cc1adde
adding ci files
kernel-patches-bot Nov 14, 2020
3c6e849
bpf: memcg-based memory accounting for bpf progs
rgushchin Nov 12, 2020
1939351
bpf: prepare for memcg-based memory accounting for bpf maps
rgushchin Nov 12, 2020
8e69610
bpf: memcg-based memory accounting for bpf maps
rgushchin Nov 12, 2020
dbfa57e
bpf: refine memcg-based memory accounting for arraymap maps
rgushchin Nov 12, 2020
8f3a2c8
bpf: refine memcg-based memory accounting for cpumap maps
rgushchin Nov 12, 2020
4dcb9e4
bpf: memcg-based memory accounting for cgroup storage maps
rgushchin Nov 12, 2020
c9e8bb1
bpf: refine memcg-based memory accounting for devmap maps
rgushchin Nov 12, 2020
16fd379
bpf: refine memcg-based memory accounting for hashtab maps
rgushchin Nov 12, 2020
ea538da
bpf: memcg-based memory accounting for lpm_trie maps
rgushchin Nov 12, 2020
8572159
bpf: memcg-based memory accounting for bpf ringbuffer
rgushchin Nov 12, 2020
574b147
bpf: memcg-based memory accounting for bpf local storage maps
rgushchin Nov 12, 2020
d1f4fbc
bpf: refine memcg-based memory accounting for sockmap and sockhash maps
rgushchin Nov 12, 2020
c4b10e1
bpf: refine memcg-based memory accounting for xskmap maps
rgushchin Nov 12, 2020
b1a6042
bpf: eliminate rlimit-based memory accounting for arraymap maps
rgushchin Nov 12, 2020
c1c0890
bpf: eliminate rlimit-based memory accounting for bpf_struct_ops maps
rgushchin Nov 12, 2020
fbaadfb
bpf: eliminate rlimit-based memory accounting for cpumap maps
rgushchin Nov 12, 2020
0dc5ae4
bpf: eliminate rlimit-based memory accounting for cgroup storage maps
rgushchin Nov 12, 2020
5f35f05
bpf: eliminate rlimit-based memory accounting for devmap maps
rgushchin Nov 12, 2020
c93b8ac
bpf: eliminate rlimit-based memory accounting for hashtab maps
rgushchin Nov 12, 2020
bdc789a
bpf: eliminate rlimit-based memory accounting for lpm_trie maps
rgushchin Nov 12, 2020
ce88993
bpf: eliminate rlimit-based memory accounting for queue_stack_maps maps
rgushchin Nov 12, 2020
543bdb0
bpf: eliminate rlimit-based memory accounting for reuseport_array maps
rgushchin Nov 12, 2020
dc50dd7
bpf: eliminate rlimit-based memory accounting for bpf ringbuffer
rgushchin Nov 12, 2020
e69e475
bpf: eliminate rlimit-based memory accounting for sockmap and sockhas…
rgushchin Nov 12, 2020
f6f21ed
bpf: eliminate rlimit-based memory accounting for stackmap maps
rgushchin Nov 12, 2020
17768a5
bpf: eliminate rlimit-based memory accounting for xskmap maps
rgushchin Nov 12, 2020
a5fcf2a
bpf: eliminate rlimit-based memory accounting for bpf local storage maps
rgushchin Nov 12, 2020
b08040d
bpf: eliminate rlimit-based memory accounting infra for bpf maps
rgushchin Nov 12, 2020
ab88586
bpf: eliminate rlimit-based memory accounting for bpf progs
rgushchin Nov 12, 2020
19564bd
bpf: samples: do not touch RLIMIT_MEMLOCK
rgushchin Nov 12, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
sudo: required
language: bash
dist: bionic
services:
- docker

env:
global:
- PROJECT_NAME='libbpf'
- AUTHOR_EMAIL="$(git log -1 --pretty=\"%aE\")"
- REPO_ROOT="$TRAVIS_BUILD_DIR"
- CI_ROOT="$REPO_ROOT/travis-ci"
- VMTEST_ROOT="$CI_ROOT/vmtest"

addons:
apt:
packages:
- qemu-kvm
- zstd
- binutils-dev
- elfutils
- libcap-dev
- libelf-dev
- libdw-dev
- python3-docutils

jobs:
include:
- stage: Builds & Tests
name: Kernel LATEST + selftests
language: bash
env: KERNEL=LATEST
script: $CI_ROOT/vmtest/run_vmtest.sh || travis_terminate 1
27 changes: 4 additions & 23 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ struct bpf_iter_aux_info;
struct bpf_local_storage;
struct bpf_local_storage_map;
struct kobject;
struct mem_cgroup;

extern struct idr btf_idr;
extern spinlock_t btf_idr_lock;
Expand Down Expand Up @@ -135,11 +136,6 @@ struct bpf_map_ops {
const struct bpf_iter_seq_info *iter_seq_info;
};

struct bpf_map_memory {
u32 pages;
struct user_struct *user;
};

struct bpf_map {
/* The first two cachelines with read-mostly members of which some
* are also accessed in fast-path (e.g. ops, max_entries).
Expand All @@ -160,7 +156,9 @@ struct bpf_map {
u32 btf_key_type_id;
u32 btf_value_type_id;
struct btf *btf;
struct bpf_map_memory memory;
#ifdef CONFIG_MEMCG_KMEM
struct mem_cgroup *memcg;
#endif
char name[BPF_OBJ_NAME_LEN];
u32 btf_vmlinux_value_type_id;
bool bypass_spec_v1;
Expand Down Expand Up @@ -1202,8 +1200,6 @@ void bpf_prog_sub(struct bpf_prog *prog, int i);
void bpf_prog_inc(struct bpf_prog *prog);
struct bpf_prog * __must_check bpf_prog_inc_not_zero(struct bpf_prog *prog);
void bpf_prog_put(struct bpf_prog *prog);
int __bpf_prog_charge(struct user_struct *user, u32 pages);
void __bpf_prog_uncharge(struct user_struct *user, u32 pages);
void __bpf_free_used_maps(struct bpf_prog_aux *aux,
struct bpf_map **used_maps, u32 len);

Expand All @@ -1218,12 +1214,6 @@ void bpf_map_inc_with_uref(struct bpf_map *map);
struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map);
void bpf_map_put_with_uref(struct bpf_map *map);
void bpf_map_put(struct bpf_map *map);
int bpf_map_charge_memlock(struct bpf_map *map, u32 pages);
void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages);
int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size);
void bpf_map_charge_finish(struct bpf_map_memory *mem);
void bpf_map_charge_move(struct bpf_map_memory *dst,
struct bpf_map_memory *src);
void *bpf_map_area_alloc(u64 size, int numa_node);
void *bpf_map_area_mmapable_alloc(u64 size, int numa_node);
void bpf_map_area_free(void *base);
Expand Down Expand Up @@ -1490,15 +1480,6 @@ bpf_prog_inc_not_zero(struct bpf_prog *prog)
return ERR_PTR(-EOPNOTSUPP);
}

static inline int __bpf_prog_charge(struct user_struct *user, u32 pages)
{
return 0;
}

static inline void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
{
}

static inline void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
const struct bpf_link_ops *ops,
struct bpf_prog *prog)
Expand Down
30 changes: 7 additions & 23 deletions kernel/bpf/arraymap.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@ static void bpf_array_free_percpu(struct bpf_array *array)

static int bpf_array_alloc_percpu(struct bpf_array *array)
{
const gfp_t gfp = GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT;
void __percpu *ptr;
int i;

for (i = 0; i < array->map.max_entries; i++) {
ptr = __alloc_percpu_gfp(array->elem_size, 8,
GFP_USER | __GFP_NOWARN);
ptr = __alloc_percpu_gfp(array->elem_size, 8, gfp);
if (!ptr) {
bpf_array_free_percpu(array);
return -ENOMEM;
Expand Down Expand Up @@ -81,11 +81,10 @@ int array_map_alloc_check(union bpf_attr *attr)
static struct bpf_map *array_map_alloc(union bpf_attr *attr)
{
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
int ret, numa_node = bpf_map_attr_numa_node(attr);
int numa_node = bpf_map_attr_numa_node(attr);
u32 elem_size, index_mask, max_entries;
bool bypass_spec_v1 = bpf_bypass_spec_v1();
u64 cost, array_size, mask64;
struct bpf_map_memory mem;
u64 array_size, mask64;
struct bpf_array *array;

elem_size = round_up(attr->value_size, 8);
Expand Down Expand Up @@ -126,44 +125,29 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
}
}

/* make sure there is no u32 overflow later in round_up() */
cost = array_size;
if (percpu)
cost += (u64)attr->max_entries * elem_size * num_possible_cpus();

ret = bpf_map_charge_init(&mem, cost);
if (ret < 0)
return ERR_PTR(ret);

/* allocate all map elements and zero-initialize them */
if (attr->map_flags & BPF_F_MMAPABLE) {
void *data;

/* kmalloc'ed memory can't be mmap'ed, use explicit vmalloc */
data = bpf_map_area_mmapable_alloc(array_size, numa_node);
if (!data) {
bpf_map_charge_finish(&mem);
if (!data)
return ERR_PTR(-ENOMEM);
}
array = data + PAGE_ALIGN(sizeof(struct bpf_array))
- offsetof(struct bpf_array, value);
} else {
array = bpf_map_area_alloc(array_size, numa_node);
}
if (!array) {
bpf_map_charge_finish(&mem);
if (!array)
return ERR_PTR(-ENOMEM);
}
array->index_mask = index_mask;
array->map.bypass_spec_v1 = bypass_spec_v1;

/* copy mandatory map attributes */
bpf_map_init_from_attr(&array->map, attr);
bpf_map_charge_move(&array->map.memory, &mem);
array->elem_size = elem_size;

if (percpu && bpf_array_alloc_percpu(array)) {
bpf_map_charge_finish(&array->map.memory);
bpf_map_area_free(array);
return ERR_PTR(-ENOMEM);
}
Expand Down Expand Up @@ -1018,7 +1002,7 @@ static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
struct bpf_array_aux *aux;
struct bpf_map *map;

aux = kzalloc(sizeof(*aux), GFP_KERNEL);
aux = kzalloc(sizeof(*aux), GFP_KERNEL_ACCOUNT);
if (!aux)
return ERR_PTR(-ENOMEM);

Expand Down
18 changes: 4 additions & 14 deletions kernel/bpf/bpf_local_storage.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
if (charge_mem && mem_charge(smap, owner, smap->elem_size))
return NULL;

selem = kzalloc(smap->elem_size, GFP_ATOMIC | __GFP_NOWARN);
selem = kzalloc(smap->elem_size, GFP_ATOMIC | __GFP_NOWARN |
__GFP_ACCOUNT);
if (selem) {
if (value)
memcpy(SDATA(selem)->data, value, smap->map.value_size);
Expand Down Expand Up @@ -543,10 +544,8 @@ struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
struct bpf_local_storage_map *smap;
unsigned int i;
u32 nbuckets;
u64 cost;
int ret;

smap = kzalloc(sizeof(*smap), GFP_USER | __GFP_NOWARN);
smap = kzalloc(sizeof(*smap), GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT);
if (!smap)
return ERR_PTR(-ENOMEM);
bpf_map_init_from_attr(&smap->map, attr);
Expand All @@ -555,18 +554,9 @@ struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
/* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */
nbuckets = max_t(u32, 2, nbuckets);
smap->bucket_log = ilog2(nbuckets);
cost = sizeof(*smap->buckets) * nbuckets + sizeof(*smap);

ret = bpf_map_charge_init(&smap->map.memory, cost);
if (ret < 0) {
kfree(smap);
return ERR_PTR(ret);
}

smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets,
GFP_USER | __GFP_NOWARN);
GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT);
if (!smap->buckets) {
bpf_map_charge_finish(&smap->map.memory);
kfree(smap);
return ERR_PTR(-ENOMEM);
}
Expand Down
19 changes: 3 additions & 16 deletions kernel/bpf/bpf_struct_ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -548,12 +548,10 @@ static int bpf_struct_ops_map_alloc_check(union bpf_attr *attr)
static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
{
const struct bpf_struct_ops *st_ops;
size_t map_total_size, st_map_size;
size_t st_map_size;
struct bpf_struct_ops_map *st_map;
const struct btf_type *t, *vt;
struct bpf_map_memory mem;
struct bpf_map *map;
int err;

if (!bpf_capable())
return ERR_PTR(-EPERM);
Expand All @@ -573,20 +571,11 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
* struct bpf_struct_ops_tcp_congestions_ops
*/
(vt->size - sizeof(struct bpf_struct_ops_value));
map_total_size = st_map_size +
/* uvalue */
sizeof(vt->size) +
/* struct bpf_progs **progs */
btf_type_vlen(t) * sizeof(struct bpf_prog *);
err = bpf_map_charge_init(&mem, map_total_size);
if (err < 0)
return ERR_PTR(err);

st_map = bpf_map_area_alloc(st_map_size, NUMA_NO_NODE);
if (!st_map) {
bpf_map_charge_finish(&mem);
if (!st_map)
return ERR_PTR(-ENOMEM);
}

st_map->st_ops = st_ops;
map = &st_map->map;

Expand All @@ -597,14 +586,12 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
st_map->image = bpf_jit_alloc_exec(PAGE_SIZE);
if (!st_map->uvalue || !st_map->progs || !st_map->image) {
bpf_struct_ops_map_free(map);
bpf_map_charge_finish(&mem);
return ERR_PTR(-ENOMEM);
}

mutex_init(&st_map->lock);
set_vm_flush_reset_perms(st_map->image);
bpf_map_init_from_attr(map, attr);
bpf_map_charge_move(&map->memory, &mem);

return map;
}
Expand Down
22 changes: 7 additions & 15 deletions kernel/bpf/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns

struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags)
{
gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags;
struct bpf_prog_aux *aux;
struct bpf_prog *fp;

Expand All @@ -86,7 +86,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
if (fp == NULL)
return NULL;

aux = kzalloc(sizeof(*aux), GFP_KERNEL | gfp_extra_flags);
aux = kzalloc(sizeof(*aux), GFP_KERNEL_ACCOUNT | gfp_extra_flags);
if (aux == NULL) {
vfree(fp);
return NULL;
Expand All @@ -106,7 +106,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag

struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
{
gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags;
struct bpf_prog *prog;
int cpu;

Expand Down Expand Up @@ -138,7 +138,7 @@ int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog)

prog->aux->jited_linfo = kcalloc(prog->aux->nr_linfo,
sizeof(*prog->aux->jited_linfo),
GFP_KERNEL | __GFP_NOWARN);
GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
if (!prog->aux->jited_linfo)
return -ENOMEM;

Expand Down Expand Up @@ -219,25 +219,17 @@ void bpf_prog_free_linfo(struct bpf_prog *prog)
struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
gfp_t gfp_extra_flags)
{
gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags;
struct bpf_prog *fp;
u32 pages, delta;
int ret;
u32 pages;

size = round_up(size, PAGE_SIZE);
pages = size / PAGE_SIZE;
if (pages <= fp_old->pages)
return fp_old;

delta = pages - fp_old->pages;
ret = __bpf_prog_charge(fp_old->aux->user, delta);
if (ret)
return NULL;

fp = __vmalloc(size, gfp_flags);
if (fp == NULL) {
__bpf_prog_uncharge(fp_old->aux->user, delta);
} else {
if (fp) {
memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE);
fp->pages = pages;
fp->aux->prog = fp;
Expand Down
Loading