Skip to content

Commit 4d7d7f6

Browse files
kkdwivediAlexei Starovoitov
authored andcommitted
bpf: Adapt copy_map_value for multiple offset case
Since now there might be at most 10 offsets that need handling in copy_map_value, the manual shuffling and special case is no longer going to work. Hence, let's generalise the copy_map_value function by using a sorted array of offsets to skip regions that must be avoided while copying into and out of a map value. When the map is created, we populate the offset array in struct map, Then, copy_map_value uses this sorted offset array is used to memcpy while skipping timer, spin lock, and kptr. The array is allocated as in most cases none of these special fields would be present in map value, hence we can save on space for the common case by not embedding the entire object inside bpf_map struct. Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/20220424214901.2743946-6-memxor@gmail.com
1 parent 6efe152 commit 4d7d7f6

File tree

2 files changed

+117
-27
lines changed

2 files changed

+117
-27
lines changed

include/linux/bpf.h

Lines changed: 30 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,9 @@ struct bpf_map_ops {
158158
enum {
159159
/* Support at most 8 pointers in a BPF map value */
160160
BPF_MAP_VALUE_OFF_MAX = 8,
161+
BPF_MAP_OFF_ARR_MAX = BPF_MAP_VALUE_OFF_MAX +
162+
1 + /* for bpf_spin_lock */
163+
1, /* for bpf_timer */
161164
};
162165

163166
enum bpf_kptr_type {
@@ -179,6 +182,12 @@ struct bpf_map_value_off {
179182
struct bpf_map_value_off_desc off[];
180183
};
181184

185+
struct bpf_map_off_arr {
186+
u32 cnt;
187+
u32 field_off[BPF_MAP_OFF_ARR_MAX];
188+
u8 field_sz[BPF_MAP_OFF_ARR_MAX];
189+
};
190+
182191
struct bpf_map {
183192
/* The first two cachelines with read-mostly members of which some
184193
* are also accessed in fast-path (e.g. ops, max_entries).
@@ -207,10 +216,7 @@ struct bpf_map {
207216
struct mem_cgroup *memcg;
208217
#endif
209218
char name[BPF_OBJ_NAME_LEN];
210-
bool bypass_spec_v1;
211-
bool frozen; /* write-once; write-protected by freeze_mutex */
212-
/* 6 bytes hole */
213-
219+
struct bpf_map_off_arr *off_arr;
214220
/* The 3rd and 4th cacheline with misc members to avoid false sharing
215221
* particularly with refcounting.
216222
*/
@@ -230,6 +236,8 @@ struct bpf_map {
230236
bool jited;
231237
bool xdp_has_frags;
232238
} owner;
239+
bool bypass_spec_v1;
240+
bool frozen; /* write-once; write-protected by freeze_mutex */
233241
};
234242

235243
static inline bool map_value_has_spin_lock(const struct bpf_map *map)
@@ -253,37 +261,33 @@ static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
253261
memset(dst + map->spin_lock_off, 0, sizeof(struct bpf_spin_lock));
254262
if (unlikely(map_value_has_timer(map)))
255263
memset(dst + map->timer_off, 0, sizeof(struct bpf_timer));
264+
if (unlikely(map_value_has_kptrs(map))) {
265+
struct bpf_map_value_off *tab = map->kptr_off_tab;
266+
int i;
267+
268+
for (i = 0; i < tab->nr_off; i++)
269+
*(u64 *)(dst + tab->off[i].offset) = 0;
270+
}
256271
}
257272

258273
/* copy everything but bpf_spin_lock and bpf_timer. There could be one of each. */
259274
static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
260275
{
261-
u32 s_off = 0, s_sz = 0, t_off = 0, t_sz = 0;
276+
u32 curr_off = 0;
277+
int i;
262278

263-
if (unlikely(map_value_has_spin_lock(map))) {
264-
s_off = map->spin_lock_off;
265-
s_sz = sizeof(struct bpf_spin_lock);
266-
}
267-
if (unlikely(map_value_has_timer(map))) {
268-
t_off = map->timer_off;
269-
t_sz = sizeof(struct bpf_timer);
279+
if (likely(!map->off_arr)) {
280+
memcpy(dst, src, map->value_size);
281+
return;
270282
}
271283

272-
if (unlikely(s_sz || t_sz)) {
273-
if (s_off < t_off || !s_sz) {
274-
swap(s_off, t_off);
275-
swap(s_sz, t_sz);
276-
}
277-
memcpy(dst, src, t_off);
278-
memcpy(dst + t_off + t_sz,
279-
src + t_off + t_sz,
280-
s_off - t_off - t_sz);
281-
memcpy(dst + s_off + s_sz,
282-
src + s_off + s_sz,
283-
map->value_size - s_off - s_sz);
284-
} else {
285-
memcpy(dst, src, map->value_size);
284+
for (i = 0; i < map->off_arr->cnt; i++) {
285+
u32 next_off = map->off_arr->field_off[i];
286+
287+
memcpy(dst + curr_off, src + curr_off, next_off - curr_off);
288+
curr_off += map->off_arr->field_sz[i];
286289
}
290+
memcpy(dst + curr_off, src + curr_off, map->value_size - curr_off);
287291
}
288292
void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
289293
bool lock_src);

kernel/bpf/syscall.c

Lines changed: 87 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include <linux/pgtable.h>
3131
#include <linux/bpf_lsm.h>
3232
#include <linux/poll.h>
33+
#include <linux/sort.h>
3334
#include <linux/bpf-netns.h>
3435
#include <linux/rcupdate_trace.h>
3536
#include <linux/memcontrol.h>
@@ -551,6 +552,7 @@ static void bpf_map_free_deferred(struct work_struct *work)
551552
struct bpf_map *map = container_of(work, struct bpf_map, work);
552553

553554
security_bpf_map_free(map);
555+
kfree(map->off_arr);
554556
bpf_map_free_kptr_off_tab(map);
555557
bpf_map_release_memcg(map);
556558
/* implementation dependent freeing */
@@ -840,6 +842,84 @@ int map_check_no_btf(const struct bpf_map *map,
840842
return -ENOTSUPP;
841843
}
842844

845+
static int map_off_arr_cmp(const void *_a, const void *_b, const void *priv)
846+
{
847+
const u32 a = *(const u32 *)_a;
848+
const u32 b = *(const u32 *)_b;
849+
850+
if (a < b)
851+
return -1;
852+
else if (a > b)
853+
return 1;
854+
return 0;
855+
}
856+
857+
static void map_off_arr_swap(void *_a, void *_b, int size, const void *priv)
858+
{
859+
struct bpf_map *map = (struct bpf_map *)priv;
860+
u32 *off_base = map->off_arr->field_off;
861+
u32 *a = _a, *b = _b;
862+
u8 *sz_a, *sz_b;
863+
864+
sz_a = map->off_arr->field_sz + (a - off_base);
865+
sz_b = map->off_arr->field_sz + (b - off_base);
866+
867+
swap(*a, *b);
868+
swap(*sz_a, *sz_b);
869+
}
870+
871+
static int bpf_map_alloc_off_arr(struct bpf_map *map)
872+
{
873+
bool has_spin_lock = map_value_has_spin_lock(map);
874+
bool has_timer = map_value_has_timer(map);
875+
bool has_kptrs = map_value_has_kptrs(map);
876+
struct bpf_map_off_arr *off_arr;
877+
u32 i;
878+
879+
if (!has_spin_lock && !has_timer && !has_kptrs) {
880+
map->off_arr = NULL;
881+
return 0;
882+
}
883+
884+
off_arr = kmalloc(sizeof(*map->off_arr), GFP_KERNEL | __GFP_NOWARN);
885+
if (!off_arr)
886+
return -ENOMEM;
887+
map->off_arr = off_arr;
888+
889+
off_arr->cnt = 0;
890+
if (has_spin_lock) {
891+
i = off_arr->cnt;
892+
893+
off_arr->field_off[i] = map->spin_lock_off;
894+
off_arr->field_sz[i] = sizeof(struct bpf_spin_lock);
895+
off_arr->cnt++;
896+
}
897+
if (has_timer) {
898+
i = off_arr->cnt;
899+
900+
off_arr->field_off[i] = map->timer_off;
901+
off_arr->field_sz[i] = sizeof(struct bpf_timer);
902+
off_arr->cnt++;
903+
}
904+
if (has_kptrs) {
905+
struct bpf_map_value_off *tab = map->kptr_off_tab;
906+
u32 *off = &off_arr->field_off[off_arr->cnt];
907+
u8 *sz = &off_arr->field_sz[off_arr->cnt];
908+
909+
for (i = 0; i < tab->nr_off; i++) {
910+
*off++ = tab->off[i].offset;
911+
*sz++ = sizeof(u64);
912+
}
913+
off_arr->cnt += tab->nr_off;
914+
}
915+
916+
if (off_arr->cnt == 1)
917+
return 0;
918+
sort_r(off_arr->field_off, off_arr->cnt, sizeof(off_arr->field_off[0]),
919+
map_off_arr_cmp, map_off_arr_swap, map);
920+
return 0;
921+
}
922+
843923
static int map_check_btf(struct bpf_map *map, const struct btf *btf,
844924
u32 btf_key_id, u32 btf_value_id)
845925
{
@@ -1009,10 +1089,14 @@ static int map_create(union bpf_attr *attr)
10091089
attr->btf_vmlinux_value_type_id;
10101090
}
10111091

1012-
err = security_bpf_map_alloc(map);
1092+
err = bpf_map_alloc_off_arr(map);
10131093
if (err)
10141094
goto free_map;
10151095

1096+
err = security_bpf_map_alloc(map);
1097+
if (err)
1098+
goto free_map_off_arr;
1099+
10161100
err = bpf_map_alloc_id(map);
10171101
if (err)
10181102
goto free_map_sec;
@@ -1035,6 +1119,8 @@ static int map_create(union bpf_attr *attr)
10351119

10361120
free_map_sec:
10371121
security_bpf_map_free(map);
1122+
free_map_off_arr:
1123+
kfree(map->off_arr);
10381124
free_map:
10391125
btf_put(map->btf);
10401126
map->ops->map_free(map);

0 commit comments

Comments
 (0)