Skip to content

Commit ccdf7fa

Browse files
committed
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2020-10-12 The main changes are: 1) The BPF verifier improvements to track register allocation pattern, from Alexei and Yonghong. 2) libbpf relocation support for different size load/store, from Andrii. 3) bpf_redirect_peer() helper and support for inner map array with different max_entries, from Daniel. 4) BPF support for per-cpu variables, form Hao. 5) sockmap improvements, from John. ==================== Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2 parents a308283 + 376dcfe commit ccdf7fa

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

73 files changed

+4339
-772
lines changed

Documentation/bpf/bpf_devel_QA.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,13 +60,13 @@ Q: Where can I find patches currently under discussion for BPF subsystem?
6060
A: All patches that are Cc'ed to netdev are queued for review under netdev
6161
patchwork project:
6262

63-
http://patchwork.ozlabs.org/project/netdev/list/
63+
https://patchwork.kernel.org/project/netdevbpf/list/
6464

6565
Those patches which target BPF, are assigned to a 'bpf' delegate for
6666
further processing from BPF maintainers. The current queue with
6767
patches under review can be found at:
6868

69-
https://patchwork.ozlabs.org/project/netdev/list/?delegate=77147
69+
https://patchwork.kernel.org/project/netdevbpf/list/?delegate=121173
7070

7171
Once the patches have been reviewed by the BPF community as a whole
7272
and approved by the BPF maintainers, their status in patchwork will be

MAINTAINERS

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3263,7 +3263,7 @@ M: Daniel Borkmann <daniel@iogearbox.net>
32633263
R: Martin KaFai Lau <kafai@fb.com>
32643264
R: Song Liu <songliubraving@fb.com>
32653265
R: Yonghong Song <yhs@fb.com>
3266-
R: Andrii Nakryiko <andriin@fb.com>
3266+
R: Andrii Nakryiko <andrii@kernel.org>
32673267
R: John Fastabend <john.fastabend@gmail.com>
32683268
R: KP Singh <kpsingh@chromium.org>
32693269
L: netdev@vger.kernel.org

drivers/net/veth.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,14 @@ static int veth_select_rxq(struct net_device *dev)
420420
return smp_processor_id() % dev->real_num_rx_queues;
421421
}
422422

423+
static struct net_device *veth_peer_dev(struct net_device *dev)
424+
{
425+
struct veth_priv *priv = netdev_priv(dev);
426+
427+
/* Callers must be under RCU read side. */
428+
return rcu_dereference(priv->peer);
429+
}
430+
423431
static int veth_xdp_xmit(struct net_device *dev, int n,
424432
struct xdp_frame **frames,
425433
u32 flags, bool ndo_xmit)
@@ -1224,6 +1232,7 @@ static const struct net_device_ops veth_netdev_ops = {
12241232
.ndo_set_rx_headroom = veth_set_rx_headroom,
12251233
.ndo_bpf = veth_xdp,
12261234
.ndo_xdp_xmit = veth_ndo_xdp_xmit,
1235+
.ndo_get_peer_dev = veth_peer_dev,
12271236
};
12281237

12291238
#define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \

include/linux/bpf.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ struct bpf_map_ops {
8282
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
8383
int fd);
8484
void (*map_fd_put_ptr)(void *ptr);
85-
u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
85+
int (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
8686
u32 (*map_fd_sys_lookup_elem)(void *ptr);
8787
void (*map_seq_show_elem)(struct bpf_map *map, void *key,
8888
struct seq_file *m);
@@ -293,6 +293,7 @@ enum bpf_arg_type {
293293
ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */
294294
ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */
295295
ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
296+
ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */
296297
__BPF_ARG_TYPE_MAX,
297298
};
298299

@@ -307,6 +308,8 @@ enum bpf_return_type {
307308
RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */
308309
RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */
309310
RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */
311+
RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */
312+
RET_PTR_TO_MEM_OR_BTF_ID, /* returns a pointer to a valid memory or a btf_id */
310313
};
311314

312315
/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
@@ -405,6 +408,7 @@ enum bpf_reg_type {
405408
PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */
406409
PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */
407410
PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */
411+
PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */
408412
};
409413

410414
/* The information passed from prog-specific *_is_valid_access
@@ -1828,6 +1832,8 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto;
18281832
extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto;
18291833
extern const struct bpf_func_proto bpf_copy_from_user_proto;
18301834
extern const struct bpf_func_proto bpf_snprintf_btf_proto;
1835+
extern const struct bpf_func_proto bpf_per_cpu_ptr_proto;
1836+
extern const struct bpf_func_proto bpf_this_cpu_ptr_proto;
18311837

18321838
const struct bpf_func_proto *bpf_tracing_func_proto(
18331839
enum bpf_func_id func_id, const struct bpf_prog *prog);

include/linux/bpf_verifier.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,13 @@ struct bpf_insn_aux_data {
308308
u32 map_index; /* index into used_maps[] */
309309
u32 map_off; /* offset from value base address */
310310
};
311+
struct {
312+
enum bpf_reg_type reg_type; /* type of pseudo_btf_id */
313+
union {
314+
u32 btf_id; /* btf_id for struct typed var */
315+
u32 mem_size; /* mem_size for non-struct typed var */
316+
};
317+
} btf_var;
311318
};
312319
u64 map_key_state; /* constant (32 bit) key tracking for maps */
313320
int ctx_field_size; /* the ctx field size for load insn, maybe 0 */

include/linux/btf.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,11 @@ btf_resolve_size(const struct btf *btf, const struct btf_type *type,
110110
i < btf_type_vlen(struct_type); \
111111
i++, member++)
112112

113+
#define for_each_vsi(i, datasec_type, member) \
114+
for (i = 0, member = btf_type_var_secinfo(datasec_type); \
115+
i < btf_type_vlen(datasec_type); \
116+
i++, member++)
117+
113118
static inline bool btf_type_is_ptr(const struct btf_type *t)
114119
{
115120
return BTF_INFO_KIND(t->info) == BTF_KIND_PTR;
@@ -145,6 +150,21 @@ static inline bool btf_type_is_func_proto(const struct btf_type *t)
145150
return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC_PROTO;
146151
}
147152

153+
static inline bool btf_type_is_var(const struct btf_type *t)
154+
{
155+
return BTF_INFO_KIND(t->info) == BTF_KIND_VAR;
156+
}
157+
158+
/* union is only a special case of struct:
159+
* all its offsetof(member) == 0
160+
*/
161+
static inline bool btf_type_is_struct(const struct btf_type *t)
162+
{
163+
u8 kind = BTF_INFO_KIND(t->info);
164+
165+
return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
166+
}
167+
148168
static inline u16 btf_type_vlen(const struct btf_type *t)
149169
{
150170
return BTF_INFO_VLEN(t->info);
@@ -179,6 +199,12 @@ static inline const struct btf_member *btf_type_member(const struct btf_type *t)
179199
return (const struct btf_member *)(t + 1);
180200
}
181201

202+
static inline const struct btf_var_secinfo *btf_type_var_secinfo(
203+
const struct btf_type *t)
204+
{
205+
return (const struct btf_var_secinfo *)(t + 1);
206+
}
207+
182208
#ifdef CONFIG_BPF_SYSCALL
183209
const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
184210
const char *btf_name_by_offset(const struct btf *btf, u32 offset);

include/linux/netdevice.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1276,6 +1276,9 @@ struct netdev_net_notifier {
12761276
* int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm *p,
12771277
* int cmd);
12781278
* Add, change, delete or get information on an IPv4 tunnel.
1279+
* struct net_device *(*ndo_get_peer_dev)(struct net_device *dev);
1280+
* If a device is paired with a peer device, return the peer instance.
1281+
* The caller must be under RCU read context.
12791282
*/
12801283
struct net_device_ops {
12811284
int (*ndo_init)(struct net_device *dev);
@@ -1483,6 +1486,7 @@ struct net_device_ops {
14831486
struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev);
14841487
int (*ndo_tunnel_ctl)(struct net_device *dev,
14851488
struct ip_tunnel_parm *p, int cmd);
1489+
struct net_device * (*ndo_get_peer_dev)(struct net_device *dev);
14861490
};
14871491

14881492
/**

include/linux/skmsg.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,8 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node);
308308
int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock);
309309
void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock);
310310
void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock);
311+
void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock);
312+
void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock);
311313

312314
int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
313315
struct sk_msg *msg);

include/net/tcp.h

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -2228,34 +2228,6 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
22282228
#endif /* CONFIG_NET_SOCK_MSG */
22292229

22302230
#ifdef CONFIG_CGROUP_BPF
2231-
/* Copy the listen sk's HDR_OPT_CB flags to its child.
2232-
*
2233-
* During 3-Way-HandShake, the synack is usually sent from
2234-
* the listen sk with the HDR_OPT_CB flags set so that
2235-
* bpf-prog will be called to write the BPF hdr option.
2236-
*
2237-
* In fastopen, the child sk is used to send synack instead
2238-
* of the listen sk. Thus, inheriting the HDR_OPT_CB flags
2239-
* from the listen sk gives the bpf-prog a chance to write
2240-
* BPF hdr option in the synack pkt during fastopen.
2241-
*
2242-
* Both fastopen and non-fastopen child will inherit the
2243-
* HDR_OPT_CB flags to keep the bpf-prog having a consistent
2244-
* behavior when deciding to clear this cb flags (or not)
2245-
* during the PASSIVE_ESTABLISHED_CB.
2246-
*
2247-
* In the future, other cb flags could be inherited here also.
2248-
*/
2249-
static inline void bpf_skops_init_child(const struct sock *sk,
2250-
struct sock *child)
2251-
{
2252-
tcp_sk(child)->bpf_sock_ops_cb_flags =
2253-
tcp_sk(sk)->bpf_sock_ops_cb_flags &
2254-
(BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG |
2255-
BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG |
2256-
BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG);
2257-
}
2258-
22592231
static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
22602232
struct sk_buff *skb,
22612233
unsigned int end_offset)
@@ -2264,11 +2236,6 @@ static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
22642236
skops->skb_data_end = skb->data + end_offset;
22652237
}
22662238
#else
2267-
static inline void bpf_skops_init_child(const struct sock *sk,
2268-
struct sock *child)
2269-
{
2270-
}
2271-
22722239
static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
22732240
struct sk_buff *skb,
22742241
unsigned int end_offset)

include/uapi/linux/bpf.h

Lines changed: 87 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -356,18 +356,36 @@ enum bpf_link_type {
356356
#define BPF_F_SLEEPABLE (1U << 4)
357357

358358
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
359-
* two extensions:
360-
*
361-
* insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE
362-
* insn[0].imm: map fd map fd
363-
* insn[1].imm: 0 offset into value
364-
* insn[0].off: 0 0
365-
* insn[1].off: 0 0
366-
* ldimm64 rewrite: address of map address of map[0]+offset
367-
* verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE
359+
* the following extensions:
360+
*
361+
* insn[0].src_reg: BPF_PSEUDO_MAP_FD
362+
* insn[0].imm: map fd
363+
* insn[1].imm: 0
364+
* insn[0].off: 0
365+
* insn[1].off: 0
366+
* ldimm64 rewrite: address of map
367+
* verifier type: CONST_PTR_TO_MAP
368368
*/
369369
#define BPF_PSEUDO_MAP_FD 1
370+
/* insn[0].src_reg: BPF_PSEUDO_MAP_VALUE
371+
* insn[0].imm: map fd
372+
* insn[1].imm: offset into value
373+
* insn[0].off: 0
374+
* insn[1].off: 0
375+
* ldimm64 rewrite: address of map[0]+offset
376+
* verifier type: PTR_TO_MAP_VALUE
377+
*/
370378
#define BPF_PSEUDO_MAP_VALUE 2
379+
/* insn[0].src_reg: BPF_PSEUDO_BTF_ID
380+
* insn[0].imm: kernel btd id of VAR
381+
* insn[1].imm: 0
382+
* insn[0].off: 0
383+
* insn[1].off: 0
384+
* ldimm64 rewrite: address of the kernel variable
385+
* verifier type: PTR_TO_BTF_ID or PTR_TO_MEM, depending on whether the var
386+
* is struct/union.
387+
*/
388+
#define BPF_PSEUDO_BTF_ID 3
371389

372390
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
373391
* offset to another bpf function
@@ -417,6 +435,9 @@ enum {
417435

418436
/* Share perf_event among processes */
419437
BPF_F_PRESERVE_ELEMS = (1U << 11),
438+
439+
/* Create a map that is suitable to be an inner map with dynamic max entries */
440+
BPF_F_INNER_MAP = (1U << 12),
420441
};
421442

422443
/* Flags for BPF_PROG_QUERY. */
@@ -1680,7 +1701,7 @@ union bpf_attr {
16801701
* **TCP_CONGESTION**, **TCP_BPF_IW**,
16811702
* **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**,
16821703
* **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**,
1683-
* **TCP_SYNCNT**, **TCP_USER_TIMEOUT**.
1704+
* **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**.
16841705
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
16851706
* * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
16861707
* Return
@@ -2235,7 +2256,7 @@ union bpf_attr {
22352256
* Description
22362257
* This helper is used in programs implementing policies at the
22372258
* skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
2238-
* if the verdeict eBPF program returns **SK_PASS**), redirect it
2259+
* if the verdict eBPF program returns **SK_PASS**), redirect it
22392260
* to the socket referenced by *map* (of type
22402261
* **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
22412262
* egress interfaces can be used for redirection. The
@@ -3661,10 +3682,59 @@ union bpf_attr {
36613682
* Redirect the packet to another net device of index *ifindex*
36623683
* and fill in L2 addresses from neighboring subsystem. This helper
36633684
* is somewhat similar to **bpf_redirect**\ (), except that it
3664-
* fills in e.g. MAC addresses based on the L3 information from
3665-
* the packet. This helper is supported for IPv4 and IPv6 protocols.
3685+
* populates L2 addresses as well, meaning, internally, the helper
3686+
* performs a FIB lookup based on the skb's networking header to
3687+
* get the address of the next hop and then relies on the neighbor
3688+
* lookup for the L2 address of the nexthop.
3689+
*
3690+
* The *flags* argument is reserved and must be 0. The helper is
3691+
* currently only supported for tc BPF program types, and enabled
3692+
* for IPv4 and IPv6 protocols.
3693+
* Return
3694+
* The helper returns **TC_ACT_REDIRECT** on success or
3695+
* **TC_ACT_SHOT** on error.
3696+
*
3697+
* void *bpf_per_cpu_ptr(const void *percpu_ptr, u32 cpu)
3698+
* Description
3699+
* Take a pointer to a percpu ksym, *percpu_ptr*, and return a
3700+
* pointer to the percpu kernel variable on *cpu*. A ksym is an
3701+
* extern variable decorated with '__ksym'. For ksym, there is a
3702+
* global var (either static or global) defined of the same name
3703+
* in the kernel. The ksym is percpu if the global var is percpu.
3704+
* The returned pointer points to the global percpu var on *cpu*.
3705+
*
3706+
* bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the
3707+
* kernel, except that bpf_per_cpu_ptr() may return NULL. This
3708+
* happens if *cpu* is larger than nr_cpu_ids. The caller of
3709+
* bpf_per_cpu_ptr() must check the returned value.
3710+
* Return
3711+
* A pointer pointing to the kernel percpu variable on *cpu*, or
3712+
* NULL, if *cpu* is invalid.
3713+
*
3714+
* void *bpf_this_cpu_ptr(const void *percpu_ptr)
3715+
* Description
3716+
* Take a pointer to a percpu ksym, *percpu_ptr*, and return a
3717+
* pointer to the percpu kernel variable on this cpu. See the
3718+
* description of 'ksym' in **bpf_per_cpu_ptr**\ ().
3719+
*
3720+
* bpf_this_cpu_ptr() has the same semantic as this_cpu_ptr() in
3721+
* the kernel. Different from **bpf_per_cpu_ptr**\ (), it would
3722+
* never return NULL.
3723+
* Return
3724+
* A pointer pointing to the kernel percpu variable on this cpu.
3725+
*
3726+
* long bpf_redirect_peer(u32 ifindex, u64 flags)
3727+
* Description
3728+
* Redirect the packet to another net device of index *ifindex*.
3729+
* This helper is somewhat similar to **bpf_redirect**\ (), except
3730+
* that the redirection happens to the *ifindex*' peer device and
3731+
* the netns switch takes place from ingress to ingress without
3732+
* going through the CPU's backlog queue.
3733+
*
36663734
* The *flags* argument is reserved and must be 0. The helper is
3667-
* currently only supported for tc BPF program types.
3735+
* currently only supported for tc BPF program types at the ingress
3736+
* hook and for veth device types. The peer device must reside in a
3737+
* different network namespace.
36683738
* Return
36693739
* The helper returns **TC_ACT_REDIRECT** on success or
36703740
* **TC_ACT_SHOT** on error.
@@ -3823,6 +3893,9 @@ union bpf_attr {
38233893
FN(seq_printf_btf), \
38243894
FN(skb_cgroup_classid), \
38253895
FN(redirect_neigh), \
3896+
FN(bpf_per_cpu_ptr), \
3897+
FN(bpf_this_cpu_ptr), \
3898+
FN(redirect_peer), \
38263899
/* */
38273900

38283901
/* integer value in 'imm' field of BPF_CALL instruction selects which helper

0 commit comments

Comments
 (0)