Skip to content

Commit 6813466

Browse files
Alexei Starovoitovborkmann
Alexei Starovoitov
authored andcommitted
bpf: Add map side support for bpf timers.
Restrict bpf timers to array, hash (both preallocated and kmalloced), and lru map types. The per-cpu maps with timers don't make sense, since 'struct bpf_timer' is a part of map value. bpf timers in per-cpu maps would mean that the number of timers depends on number of possible cpus and timers would not be accessible from all cpus. lpm map support can be added in the future. The timers in inner maps are supported. The bpf_map_update/delete_elem() helpers and sys_bpf commands cancel and free bpf_timer in a given map element. Similar to 'struct bpf_spin_lock' BTF is required and it is used to validate that map element indeed contains 'struct bpf_timer'. Make check_and_init_map_value() init both bpf_spin_lock and bpf_timer when map element data is reused in preallocated htab and lru maps. Teach copy_map_value() to support both bpf_spin_lock and bpf_timer in a single map element. There could be one of each, but not more than one. Due to 'one bpf_timer in one element' restriction do not support timers in global data, since global data is a map of single element, but from bpf program side it's seen as many global variables and restriction of single global timer would be odd. The sys_bpf map_freeze and sys_mmap syscalls are not allowed on maps with timers, since user space could have corrupted mmap element and crashed the kernel. The maps with timers cannot be readonly. Due to these restrictions search for bpf_timer in datasec BTF in case it was placed in the global data to report clear error. The previous patch allowed 'struct bpf_timer' as a first field in a map element only. Relax this restriction. Refactor lru map to s/bpf_lru_push_free/htab_lru_push_free/ to cancel and free the timer when lru map deletes an element as a part of it eviction algorithm. Make sure that bpf program cannot access 'struct bpf_timer' via direct load/store. The timer operation are done through helpers only. This is similar to 'struct bpf_spin_lock'. Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Yonghong Song <yhs@fb.com> Acked-by: Martin KaFai Lau <kafai@fb.com> Acked-by: Andrii Nakryiko <andrii@kernel.org> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210715005417.78572-5-alexei.starovoitov@gmail.com
1 parent b00628b commit 6813466

File tree

9 files changed

+259
-46
lines changed

9 files changed

+259
-46
lines changed

include/linux/bpf.h

+33-11
Original file line numberDiff line numberDiff line change
@@ -198,24 +198,46 @@ static inline bool map_value_has_spin_lock(const struct bpf_map *map)
198198
return map->spin_lock_off >= 0;
199199
}
200200

201-
static inline void check_and_init_map_lock(struct bpf_map *map, void *dst)
201+
static inline bool map_value_has_timer(const struct bpf_map *map)
202202
{
203-
if (likely(!map_value_has_spin_lock(map)))
204-
return;
205-
*(struct bpf_spin_lock *)(dst + map->spin_lock_off) =
206-
(struct bpf_spin_lock){};
203+
return map->timer_off >= 0;
207204
}
208205

209-
/* copy everything but bpf_spin_lock */
206+
static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
207+
{
208+
if (unlikely(map_value_has_spin_lock(map)))
209+
*(struct bpf_spin_lock *)(dst + map->spin_lock_off) =
210+
(struct bpf_spin_lock){};
211+
if (unlikely(map_value_has_timer(map)))
212+
*(struct bpf_timer *)(dst + map->timer_off) =
213+
(struct bpf_timer){};
214+
}
215+
216+
/* copy everything but bpf_spin_lock and bpf_timer. There could be one of each. */
210217
static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
211218
{
219+
u32 s_off = 0, s_sz = 0, t_off = 0, t_sz = 0;
220+
212221
if (unlikely(map_value_has_spin_lock(map))) {
213-
u32 off = map->spin_lock_off;
222+
s_off = map->spin_lock_off;
223+
s_sz = sizeof(struct bpf_spin_lock);
224+
} else if (unlikely(map_value_has_timer(map))) {
225+
t_off = map->timer_off;
226+
t_sz = sizeof(struct bpf_timer);
227+
}
214228

215-
memcpy(dst, src, off);
216-
memcpy(dst + off + sizeof(struct bpf_spin_lock),
217-
src + off + sizeof(struct bpf_spin_lock),
218-
map->value_size - off - sizeof(struct bpf_spin_lock));
229+
if (unlikely(s_sz || t_sz)) {
230+
if (s_off < t_off || !s_sz) {
231+
swap(s_off, t_off);
232+
swap(s_sz, t_sz);
233+
}
234+
memcpy(dst, src, t_off);
235+
memcpy(dst + t_off + t_sz,
236+
src + t_off + t_sz,
237+
s_off - t_off - t_sz);
238+
memcpy(dst + s_off + s_sz,
239+
src + s_off + s_sz,
240+
map->value_size - s_off - s_sz);
219241
} else {
220242
memcpy(dst, src, map->value_size);
221243
}

include/linux/btf.h

+1
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
9999
const struct btf_member *m,
100100
u32 expected_offset, u32 expected_size);
101101
int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
102+
int btf_find_timer(const struct btf *btf, const struct btf_type *t);
102103
bool btf_type_is_void(const struct btf_type *t);
103104
s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind);
104105
const struct btf_type *btf_type_skip_modifiers(const struct btf *btf,

kernel/bpf/arraymap.c

+21
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,12 @@ static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key
287287
return 0;
288288
}
289289

290+
static void check_and_free_timer_in_array(struct bpf_array *arr, void *val)
291+
{
292+
if (unlikely(map_value_has_timer(&arr->map)))
293+
bpf_timer_cancel_and_free(val + arr->map.timer_off);
294+
}
295+
290296
/* Called from syscall or from eBPF program */
291297
static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
292298
u64 map_flags)
@@ -321,6 +327,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
321327
copy_map_value_locked(map, val, value, false);
322328
else
323329
copy_map_value(map, val, value);
330+
check_and_free_timer_in_array(array, val);
324331
}
325332
return 0;
326333
}
@@ -374,6 +381,19 @@ static void *array_map_vmalloc_addr(struct bpf_array *array)
374381
return (void *)round_down((unsigned long)array, PAGE_SIZE);
375382
}
376383

384+
static void array_map_free_timers(struct bpf_map *map)
385+
{
386+
struct bpf_array *array = container_of(map, struct bpf_array, map);
387+
int i;
388+
389+
if (likely(!map_value_has_timer(map)))
390+
return;
391+
392+
for (i = 0; i < array->map.max_entries; i++)
393+
bpf_timer_cancel_and_free(array->value + array->elem_size * i +
394+
map->timer_off);
395+
}
396+
377397
/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
378398
static void array_map_free(struct bpf_map *map)
379399
{
@@ -668,6 +688,7 @@ const struct bpf_map_ops array_map_ops = {
668688
.map_alloc = array_map_alloc,
669689
.map_free = array_map_free,
670690
.map_get_next_key = array_map_get_next_key,
691+
.map_release_uref = array_map_free_timers,
671692
.map_lookup_elem = array_map_lookup_elem,
672693
.map_update_elem = array_map_update_elem,
673694
.map_delete_elem = array_map_delete_elem,

kernel/bpf/btf.c

+63-14
Original file line numberDiff line numberDiff line change
@@ -3046,43 +3046,92 @@ static void btf_struct_log(struct btf_verifier_env *env,
30463046
btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t));
30473047
}
30483048

3049-
/* find 'struct bpf_spin_lock' in map value.
3050-
* return >= 0 offset if found
3051-
* and < 0 in case of error
3052-
*/
3053-
int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t)
3049+
static int btf_find_struct_field(const struct btf *btf, const struct btf_type *t,
3050+
const char *name, int sz, int align)
30543051
{
30553052
const struct btf_member *member;
30563053
u32 i, off = -ENOENT;
30573054

3058-
if (!__btf_type_is_struct(t))
3059-
return -EINVAL;
3060-
30613055
for_each_member(i, t, member) {
30623056
const struct btf_type *member_type = btf_type_by_id(btf,
30633057
member->type);
30643058
if (!__btf_type_is_struct(member_type))
30653059
continue;
3066-
if (member_type->size != sizeof(struct bpf_spin_lock))
3060+
if (member_type->size != sz)
30673061
continue;
3068-
if (strcmp(__btf_name_by_offset(btf, member_type->name_off),
3069-
"bpf_spin_lock"))
3062+
if (strcmp(__btf_name_by_offset(btf, member_type->name_off), name))
30703063
continue;
30713064
if (off != -ENOENT)
3072-
/* only one 'struct bpf_spin_lock' is allowed */
3065+
/* only one such field is allowed */
30733066
return -E2BIG;
30743067
off = btf_member_bit_offset(t, member);
30753068
if (off % 8)
30763069
/* valid C code cannot generate such BTF */
30773070
return -EINVAL;
30783071
off /= 8;
3079-
if (off % __alignof__(struct bpf_spin_lock))
3080-
/* valid struct bpf_spin_lock will be 4 byte aligned */
3072+
if (off % align)
3073+
return -EINVAL;
3074+
}
3075+
return off;
3076+
}
3077+
3078+
static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
3079+
const char *name, int sz, int align)
3080+
{
3081+
const struct btf_var_secinfo *vsi;
3082+
u32 i, off = -ENOENT;
3083+
3084+
for_each_vsi(i, t, vsi) {
3085+
const struct btf_type *var = btf_type_by_id(btf, vsi->type);
3086+
const struct btf_type *var_type = btf_type_by_id(btf, var->type);
3087+
3088+
if (!__btf_type_is_struct(var_type))
3089+
continue;
3090+
if (var_type->size != sz)
3091+
continue;
3092+
if (vsi->size != sz)
3093+
continue;
3094+
if (strcmp(__btf_name_by_offset(btf, var_type->name_off), name))
3095+
continue;
3096+
if (off != -ENOENT)
3097+
/* only one such field is allowed */
3098+
return -E2BIG;
3099+
off = vsi->offset;
3100+
if (off % align)
30813101
return -EINVAL;
30823102
}
30833103
return off;
30843104
}
30853105

3106+
static int btf_find_field(const struct btf *btf, const struct btf_type *t,
3107+
const char *name, int sz, int align)
3108+
{
3109+
3110+
if (__btf_type_is_struct(t))
3111+
return btf_find_struct_field(btf, t, name, sz, align);
3112+
else if (btf_type_is_datasec(t))
3113+
return btf_find_datasec_var(btf, t, name, sz, align);
3114+
return -EINVAL;
3115+
}
3116+
3117+
/* find 'struct bpf_spin_lock' in map value.
3118+
* return >= 0 offset if found
3119+
* and < 0 in case of error
3120+
*/
3121+
int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t)
3122+
{
3123+
return btf_find_field(btf, t, "bpf_spin_lock",
3124+
sizeof(struct bpf_spin_lock),
3125+
__alignof__(struct bpf_spin_lock));
3126+
}
3127+
3128+
int btf_find_timer(const struct btf *btf, const struct btf_type *t)
3129+
{
3130+
return btf_find_field(btf, t, "bpf_timer",
3131+
sizeof(struct bpf_timer),
3132+
__alignof__(struct bpf_timer));
3133+
}
3134+
30863135
static void __btf_struct_show(const struct btf *btf, const struct btf_type *t,
30873136
u32 type_id, void *data, u8 bits_offset,
30883137
struct btf_show *show)

0 commit comments

Comments
 (0)