Skip to content

Commit 636d549

Browse files
author
Alexei Starovoitov
committed
Merge branch 'bpf: misc performance improvements for cgroup'
Stanislav Fomichev says: ==================== First patch adds custom getsockopt for TCP_ZEROCOPY_RECEIVE to remove kmalloc and lock_sock overhead from the dat path. Second patch removes kzalloc/kfree from getsockopt for the common cases. Third patch switches cgroup_bpf_enabled to be per-attach to to add only overhead for the cgroup attach types used on the system. No visible user-side changes. v9: - include linux/tcp.h instead of netinet/tcp.h in sockopt_sk.c - note that v9 depends on the commit 4be34f3 ("bpf: Don't leak memory in bpf getsockopt when optlen == 0") from bpf tree v8: - add bpi.h to tools/include/uapi in the same patch (Martin KaFai Lau) - kmalloc instead of kzalloc when exporting buffer (Martin KaFai Lau) - note that v8 depends on the commit 4be34f3 ("bpf: Don't leak memory in bpf getsockopt when optlen == 0") from bpf tree v7: - add comment about buffer contents for retval != 0 (Martin KaFai Lau) - export tcp.h into tools/include/uapi (Martin KaFai Lau) - note that v7 depends on the commit 4be34f3 ("bpf: Don't leak memory in bpf getsockopt when optlen == 0") from bpf tree v6: - avoid indirect cost for new bpf_bypass_getsockopt (Eric Dumazet) v5: - reorder patches to reduce the churn (Martin KaFai Lau) v4: - update performance numbers - bypass_bpf_getsockopt (Martin KaFai Lau) v3: - remove extra newline, add comment about sizeof tcp_zerocopy_receive (Martin KaFai Lau) - add another patch to remove lock_sock overhead from TCP_ZEROCOPY_RECEIVE; technically, this makes patch #1 obsolete, but I'd still prefer to keep it to help with other socket options v2: - perf numbers for getsockopt kmalloc reduction (Song Liu) - (sk) in BPF_CGROUP_PRE_CONNECT_ENABLED (Song Liu) - 128 -> 64 buffer size, BUILD_BUG_ON (Martin KaFai Lau) ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2 parents 13ca51d + a9ed15d commit 636d549

File tree

21 files changed

+597
-55
lines changed

21 files changed

+597
-55
lines changed

include/linux/bpf-cgroup.h

Lines changed: 42 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ struct ctl_table_header;
2323

2424
#ifdef CONFIG_CGROUP_BPF
2525

26-
extern struct static_key_false cgroup_bpf_enabled_key;
27-
#define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)
26+
extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE];
27+
#define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type])
2828

2929
DECLARE_PER_CPU(struct bpf_cgroup_storage*,
3030
bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
@@ -147,6 +147,10 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
147147
int __user *optlen, int max_optlen,
148148
int retval);
149149

150+
int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
151+
int optname, void *optval,
152+
int *optlen, int retval);
153+
150154
static inline enum bpf_cgroup_storage_type cgroup_storage_type(
151155
struct bpf_map *map)
152156
{
@@ -185,7 +189,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
185189
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \
186190
({ \
187191
int __ret = 0; \
188-
if (cgroup_bpf_enabled) \
192+
if (cgroup_bpf_enabled(BPF_CGROUP_INET_INGRESS)) \
189193
__ret = __cgroup_bpf_run_filter_skb(sk, skb, \
190194
BPF_CGROUP_INET_INGRESS); \
191195
\
@@ -195,7 +199,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
195199
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb) \
196200
({ \
197201
int __ret = 0; \
198-
if (cgroup_bpf_enabled && sk && sk == skb->sk) { \
202+
if (cgroup_bpf_enabled(BPF_CGROUP_INET_EGRESS) && sk && sk == skb->sk) { \
199203
typeof(sk) __sk = sk_to_full_sk(sk); \
200204
if (sk_fullsock(__sk)) \
201205
__ret = __cgroup_bpf_run_filter_skb(__sk, skb, \
@@ -207,7 +211,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
207211
#define BPF_CGROUP_RUN_SK_PROG(sk, type) \
208212
({ \
209213
int __ret = 0; \
210-
if (cgroup_bpf_enabled) { \
214+
if (cgroup_bpf_enabled(type)) { \
211215
__ret = __cgroup_bpf_run_filter_sk(sk, type); \
212216
} \
213217
__ret; \
@@ -228,7 +232,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
228232
#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type) \
229233
({ \
230234
int __ret = 0; \
231-
if (cgroup_bpf_enabled) \
235+
if (cgroup_bpf_enabled(type)) \
232236
__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \
233237
NULL); \
234238
__ret; \
@@ -237,7 +241,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
237241
#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx) \
238242
({ \
239243
int __ret = 0; \
240-
if (cgroup_bpf_enabled) { \
244+
if (cgroup_bpf_enabled(type)) { \
241245
lock_sock(sk); \
242246
__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \
243247
t_ctx); \
@@ -252,8 +256,10 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
252256
#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr) \
253257
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_BIND, NULL)
254258

255-
#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (cgroup_bpf_enabled && \
256-
sk->sk_prot->pre_connect)
259+
#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) \
260+
((cgroup_bpf_enabled(BPF_CGROUP_INET4_CONNECT) || \
261+
cgroup_bpf_enabled(BPF_CGROUP_INET6_CONNECT)) && \
262+
(sk)->sk_prot->pre_connect)
257263

258264
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) \
259265
BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_CONNECT)
@@ -297,7 +303,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
297303
#define BPF_CGROUP_RUN_PROG_SOCK_OPS_SK(sock_ops, sk) \
298304
({ \
299305
int __ret = 0; \
300-
if (cgroup_bpf_enabled) \
306+
if (cgroup_bpf_enabled(BPF_CGROUP_SOCK_OPS)) \
301307
__ret = __cgroup_bpf_run_filter_sock_ops(sk, \
302308
sock_ops, \
303309
BPF_CGROUP_SOCK_OPS); \
@@ -307,7 +313,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
307313
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \
308314
({ \
309315
int __ret = 0; \
310-
if (cgroup_bpf_enabled && (sock_ops)->sk) { \
316+
if (cgroup_bpf_enabled(BPF_CGROUP_SOCK_OPS) && (sock_ops)->sk) { \
311317
typeof(sk) __sk = sk_to_full_sk((sock_ops)->sk); \
312318
if (__sk && sk_fullsock(__sk)) \
313319
__ret = __cgroup_bpf_run_filter_sock_ops(__sk, \
@@ -320,7 +326,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
320326
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access) \
321327
({ \
322328
int __ret = 0; \
323-
if (cgroup_bpf_enabled) \
329+
if (cgroup_bpf_enabled(BPF_CGROUP_DEVICE)) \
324330
__ret = __cgroup_bpf_check_dev_permission(type, major, minor, \
325331
access, \
326332
BPF_CGROUP_DEVICE); \
@@ -332,7 +338,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
332338
#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, pos) \
333339
({ \
334340
int __ret = 0; \
335-
if (cgroup_bpf_enabled) \
341+
if (cgroup_bpf_enabled(BPF_CGROUP_SYSCTL)) \
336342
__ret = __cgroup_bpf_run_filter_sysctl(head, table, write, \
337343
buf, count, pos, \
338344
BPF_CGROUP_SYSCTL); \
@@ -343,7 +349,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
343349
kernel_optval) \
344350
({ \
345351
int __ret = 0; \
346-
if (cgroup_bpf_enabled) \
352+
if (cgroup_bpf_enabled(BPF_CGROUP_SETSOCKOPT)) \
347353
__ret = __cgroup_bpf_run_filter_setsockopt(sock, level, \
348354
optname, optval, \
349355
optlen, \
@@ -354,7 +360,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
354360
#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) \
355361
({ \
356362
int __ret = 0; \
357-
if (cgroup_bpf_enabled) \
363+
if (cgroup_bpf_enabled(BPF_CGROUP_GETSOCKOPT)) \
358364
get_user(__ret, optlen); \
359365
__ret; \
360366
})
@@ -363,11 +369,24 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
363369
max_optlen, retval) \
364370
({ \
365371
int __ret = retval; \
366-
if (cgroup_bpf_enabled) \
367-
__ret = __cgroup_bpf_run_filter_getsockopt(sock, level, \
368-
optname, optval, \
369-
optlen, max_optlen, \
370-
retval); \
372+
if (cgroup_bpf_enabled(BPF_CGROUP_GETSOCKOPT)) \
373+
if (!(sock)->sk_prot->bpf_bypass_getsockopt || \
374+
!INDIRECT_CALL_INET_1((sock)->sk_prot->bpf_bypass_getsockopt, \
375+
tcp_bpf_bypass_getsockopt, \
376+
level, optname)) \
377+
__ret = __cgroup_bpf_run_filter_getsockopt( \
378+
sock, level, optname, optval, optlen, \
379+
max_optlen, retval); \
380+
__ret; \
381+
})
382+
383+
#define BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sock, level, optname, optval, \
384+
optlen, retval) \
385+
({ \
386+
int __ret = retval; \
387+
if (cgroup_bpf_enabled(BPF_CGROUP_GETSOCKOPT)) \
388+
__ret = __cgroup_bpf_run_filter_getsockopt_kern( \
389+
sock, level, optname, optval, optlen, retval); \
371390
__ret; \
372391
})
373392

@@ -427,7 +446,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
427446
return 0;
428447
}
429448

430-
#define cgroup_bpf_enabled (0)
449+
#define cgroup_bpf_enabled(type) (0)
431450
#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx) ({ 0; })
432451
#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
433452
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
@@ -452,6 +471,8 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
452471
#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) ({ 0; })
453472
#define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, \
454473
optlen, max_optlen, retval) ({ retval; })
474+
#define BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sock, level, optname, optval, \
475+
optlen, retval) ({ retval; })
455476
#define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \
456477
kernel_optval) ({ 0; })
457478

include/linux/filter.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1298,6 +1298,11 @@ struct bpf_sysctl_kern {
12981298
u64 tmp_reg;
12991299
};
13001300

1301+
#define BPF_SOCKOPT_KERN_BUF_SIZE 32
1302+
struct bpf_sockopt_buf {
1303+
u8 data[BPF_SOCKOPT_KERN_BUF_SIZE];
1304+
};
1305+
13011306
struct bpf_sockopt_kern {
13021307
struct sock *sk;
13031308
u8 *optval;

include/linux/indirect_call_wrapper.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,4 +60,10 @@
6060
#define INDIRECT_CALL_INET(f, f2, f1, ...) f(__VA_ARGS__)
6161
#endif
6262

63+
#if IS_ENABLED(CONFIG_INET)
64+
#define INDIRECT_CALL_INET_1(f, f1, ...) INDIRECT_CALL_1(f, f1, __VA_ARGS__)
65+
#else
66+
#define INDIRECT_CALL_INET_1(f, f1, ...) f(__VA_ARGS__)
67+
#endif
68+
6369
#endif

include/net/sock.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1174,6 +1174,8 @@ struct proto {
11741174

11751175
int (*backlog_rcv) (struct sock *sk,
11761176
struct sk_buff *skb);
1177+
bool (*bpf_bypass_getsockopt)(int level,
1178+
int optname);
11771179

11781180
void (*release_cb)(struct sock *sk);
11791181

include/net/tcp.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock,
403403
struct poll_table_struct *wait);
404404
int tcp_getsockopt(struct sock *sk, int level, int optname,
405405
char __user *optval, int __user *optlen);
406+
bool tcp_bpf_bypass_getsockopt(int level, int optname);
406407
int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
407408
unsigned int optlen);
408409
void tcp_set_keepalive(struct sock *sk, int val);

0 commit comments

Comments
 (0)