Skip to content

Commit 4b3ccca

Browse files
author
Alexei Starovoitov
committed
Merge branch 'bpf-refcount-followups-2-owner-field'
Dave Marchevsky says: ==================== BPF Refcount followups 2: owner field This series adds an 'owner' field to bpf_{list,rb}_node structs, to be used by the runtime to determine whether insertion or removal operations are valid in shared ownership scenarios. Both the races which the series fixes and the fix itself are inspired by Kumar's suggestions in [0]. Aside from insertion and removal having more reasons to fail, there are no user-facing changes as a result of this series. * Patch 1 reverts disabling of bpf_refcount_acquire so that the fixed logic can be exercised by CI. It should _not_ be applied. * Patch 2 adds internal definitions of bpf_{rb,list}_node so that their fields are easier to access. * Patch 3 is the meat of the series - it adds 'owner' field and enforcement of correct owner to insertion and removal helpers. * Patch 4 adds a test based on Kumar's examples. * Patch 5 disables the test until bpf_refcount_acquire is re-enabled. * Patch 6 reverts disabling of test added in this series logic can be exercised by CI. It should _not_ be applied. [0]: https://lore.kernel.org/bpf/d7hyspcow5wtjcmw4fugdgyp3fwhljwuscp3xyut5qnwivyeru@ysdq543otzv2/ Changelog: v1 -> v2: lore.kernel.org/bpf/20230711175945.3298231-1-davemarchevsky@fb.com/ Patch 2 ("Introduce internal definitions for UAPI-opaque bpf_{rb,list}_node") * Rename bpf_{rb,list}_node_internal -> bpf_{list,rb}_node_kern (Alexei) Patch 3 ("bpf: Add 'owner' field to bpf_{list,rb}_node") * WARN_ON_ONCE in __bpf_list_del when node has wrong owner. This shouldn't happen, but worth checking regardless (Alexei, offline convo) * Continue previous patch's renaming changes ==================== Link: https://lore.kernel.org/r/20230718083813.3416104-1-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2 parents 60cc1f7 + f3514a5 commit 4b3ccca

File tree

6 files changed

+187
-53
lines changed

6 files changed

+187
-53
lines changed

include/linux/bpf.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,18 @@ struct btf_record {
228228
struct btf_field fields[];
229229
};
230230

231+
/* Non-opaque version of bpf_rb_node in uapi/linux/bpf.h */
232+
struct bpf_rb_node_kern {
233+
struct rb_node rb_node;
234+
void *owner;
235+
} __attribute__((aligned(8)));
236+
237+
/* Non-opaque version of bpf_list_node in uapi/linux/bpf.h */
238+
struct bpf_list_node_kern {
239+
struct list_head list_head;
240+
void *owner;
241+
} __attribute__((aligned(8)));
242+
231243
struct bpf_map {
232244
/* The first two cachelines with read-mostly members of which some
233245
* are also accessed in fast-path (e.g. ops, max_entries).

include/uapi/linux/bpf.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7052,6 +7052,7 @@ struct bpf_list_head {
70527052
struct bpf_list_node {
70537053
__u64 :64;
70547054
__u64 :64;
7055+
__u64 :64;
70557056
} __attribute__((aligned(8)));
70567057

70577058
struct bpf_rb_root {
@@ -7063,6 +7064,7 @@ struct bpf_rb_node {
70637064
__u64 :64;
70647065
__u64 :64;
70657066
__u64 :64;
7067+
__u64 :64;
70667068
} __attribute__((aligned(8)));
70677069

70687070
struct bpf_refcount {

kernel/bpf/helpers.c

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1942,23 +1942,29 @@ __bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta
19421942
return (void *)p__refcounted_kptr;
19431943
}
19441944

1945-
static int __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *head,
1945+
static int __bpf_list_add(struct bpf_list_node_kern *node,
1946+
struct bpf_list_head *head,
19461947
bool tail, struct btf_record *rec, u64 off)
19471948
{
1948-
struct list_head *n = (void *)node, *h = (void *)head;
1949+
struct list_head *n = &node->list_head, *h = (void *)head;
19491950

19501951
/* If list_head was 0-initialized by map, bpf_obj_init_field wasn't
19511952
* called on its fields, so init here
19521953
*/
19531954
if (unlikely(!h->next))
19541955
INIT_LIST_HEAD(h);
1955-
if (!list_empty(n)) {
1956+
1957+
/* node->owner != NULL implies !list_empty(n), no need to separately
1958+
* check the latter
1959+
*/
1960+
if (cmpxchg(&node->owner, NULL, BPF_PTR_POISON)) {
19561961
/* Only called from BPF prog, no need to migrate_disable */
19571962
__bpf_obj_drop_impl((void *)n - off, rec);
19581963
return -EINVAL;
19591964
}
19601965

19611966
tail ? list_add_tail(n, h) : list_add(n, h);
1967+
WRITE_ONCE(node->owner, head);
19621968

19631969
return 0;
19641970
}
@@ -1967,25 +1973,26 @@ __bpf_kfunc int bpf_list_push_front_impl(struct bpf_list_head *head,
19671973
struct bpf_list_node *node,
19681974
void *meta__ign, u64 off)
19691975
{
1976+
struct bpf_list_node_kern *n = (void *)node;
19701977
struct btf_struct_meta *meta = meta__ign;
19711978

1972-
return __bpf_list_add(node, head, false,
1973-
meta ? meta->record : NULL, off);
1979+
return __bpf_list_add(n, head, false, meta ? meta->record : NULL, off);
19741980
}
19751981

19761982
__bpf_kfunc int bpf_list_push_back_impl(struct bpf_list_head *head,
19771983
struct bpf_list_node *node,
19781984
void *meta__ign, u64 off)
19791985
{
1986+
struct bpf_list_node_kern *n = (void *)node;
19801987
struct btf_struct_meta *meta = meta__ign;
19811988

1982-
return __bpf_list_add(node, head, true,
1983-
meta ? meta->record : NULL, off);
1989+
return __bpf_list_add(n, head, true, meta ? meta->record : NULL, off);
19841990
}
19851991

19861992
static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tail)
19871993
{
19881994
struct list_head *n, *h = (void *)head;
1995+
struct bpf_list_node_kern *node;
19891996

19901997
/* If list_head was 0-initialized by map, bpf_obj_init_field wasn't
19911998
* called on its fields, so init here
@@ -1994,8 +2001,14 @@ static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tai
19942001
INIT_LIST_HEAD(h);
19952002
if (list_empty(h))
19962003
return NULL;
2004+
19972005
n = tail ? h->prev : h->next;
2006+
node = container_of(n, struct bpf_list_node_kern, list_head);
2007+
if (WARN_ON_ONCE(READ_ONCE(node->owner) != head))
2008+
return NULL;
2009+
19982010
list_del_init(n);
2011+
WRITE_ONCE(node->owner, NULL);
19992012
return (struct bpf_list_node *)n;
20002013
}
20012014

@@ -2012,29 +2025,38 @@ __bpf_kfunc struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head)
20122025
__bpf_kfunc struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root,
20132026
struct bpf_rb_node *node)
20142027
{
2028+
struct bpf_rb_node_kern *node_internal = (struct bpf_rb_node_kern *)node;
20152029
struct rb_root_cached *r = (struct rb_root_cached *)root;
2016-
struct rb_node *n = (struct rb_node *)node;
2030+
struct rb_node *n = &node_internal->rb_node;
20172031

2018-
if (RB_EMPTY_NODE(n))
2032+
/* node_internal->owner != root implies either RB_EMPTY_NODE(n) or
2033+
* n is owned by some other tree. No need to check RB_EMPTY_NODE(n)
2034+
*/
2035+
if (READ_ONCE(node_internal->owner) != root)
20192036
return NULL;
20202037

20212038
rb_erase_cached(n, r);
20222039
RB_CLEAR_NODE(n);
2040+
WRITE_ONCE(node_internal->owner, NULL);
20232041
return (struct bpf_rb_node *)n;
20242042
}
20252043

20262044
/* Need to copy rbtree_add_cached's logic here because our 'less' is a BPF
20272045
* program
20282046
*/
2029-
static int __bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node,
2047+
static int __bpf_rbtree_add(struct bpf_rb_root *root,
2048+
struct bpf_rb_node_kern *node,
20302049
void *less, struct btf_record *rec, u64 off)
20312050
{
20322051
struct rb_node **link = &((struct rb_root_cached *)root)->rb_root.rb_node;
2033-
struct rb_node *parent = NULL, *n = (struct rb_node *)node;
2052+
struct rb_node *parent = NULL, *n = &node->rb_node;
20342053
bpf_callback_t cb = (bpf_callback_t)less;
20352054
bool leftmost = true;
20362055

2037-
if (!RB_EMPTY_NODE(n)) {
2056+
/* node->owner != NULL implies !RB_EMPTY_NODE(n), no need to separately
2057+
* check the latter
2058+
*/
2059+
if (cmpxchg(&node->owner, NULL, BPF_PTR_POISON)) {
20382060
/* Only called from BPF prog, no need to migrate_disable */
20392061
__bpf_obj_drop_impl((void *)n - off, rec);
20402062
return -EINVAL;
@@ -2052,6 +2074,7 @@ static int __bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node,
20522074

20532075
rb_link_node(n, parent, link);
20542076
rb_insert_color_cached(n, (struct rb_root_cached *)root, leftmost);
2077+
WRITE_ONCE(node->owner, root);
20552078
return 0;
20562079
}
20572080

@@ -2060,8 +2083,9 @@ __bpf_kfunc int bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node
20602083
void *meta__ign, u64 off)
20612084
{
20622085
struct btf_struct_meta *meta = meta__ign;
2086+
struct bpf_rb_node_kern *n = (void *)node;
20632087

2064-
return __bpf_rbtree_add(root, node, (void *)less, meta ? meta->record : NULL, off);
2088+
return __bpf_rbtree_add(root, n, (void *)less, meta ? meta->record : NULL, off);
20652089
}
20662090

20672091
__bpf_kfunc struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root)

0 commit comments

Comments
 (0)