Skip to content

Commit 6402528

Browse files
Niklas Söderlunddavem330
authored andcommitted
nfp: xsk: add AF_XDP zero-copy Rx and Tx support
This patch adds zero-copy Rx and Tx support for AF_XDP sockets. It do so by adding a separate NAPI poll function that is attached to a each channel when the XSK socket is attached with XDP_SETUP_XSK_POOL, and restored when the XSK socket is terminated, this is done per channel. Support for XDP_TX is implemented and the XDP buffer can safely be moved from the Rx to the Tx queue and correctly freed and returned to the XSK pool once it's transmitted. Note that when AF_XDP zero-copy is enabled, the XDP action XDP_PASS will allocate a new buffer and copy the zero-copy frame prior passing it to the kernel stack. This patch is based on previous work by Jakub Kicinski. Signed-off-by: Niklas Söderlund <niklas.soderlund@corigine.com> Signed-off-by: Simon Horman <simon.horman@corigine.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 9c91a36 commit 6402528

File tree

6 files changed

+756
-28
lines changed

6 files changed

+756
-28
lines changed

drivers/net/ethernet/netronome/nfp/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ nfp-objs := \
3131
nfp_net_main.o \
3232
nfp_net_repr.o \
3333
nfp_net_sriov.o \
34+
nfp_net_xsk.o \
3435
nfp_netvf_main.o \
3536
nfp_port.o \
3637
nfp_shared_buf.o \

drivers/net/ethernet/netronome/nfp/nfp_net.h

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -171,22 +171,33 @@ struct nfp_net_tx_desc {
171171
* struct nfp_net_tx_buf - software TX buffer descriptor
172172
* @skb: normal ring, sk_buff associated with this buffer
173173
* @frag: XDP ring, page frag associated with this buffer
174+
* @xdp: XSK buffer pool handle (for AF_XDP)
174175
* @dma_addr: DMA mapping address of the buffer
175176
* @fidx: Fragment index (-1 for the head and [0..nr_frags-1] for frags)
176177
* @pkt_cnt: Number of packets to be produced out of the skb associated
177178
* with this buffer (valid only on the head's buffer).
178179
* Will be 1 for all non-TSO packets.
180+
* @is_xsk_tx: Flag if buffer is a RX buffer after a XDP_TX action and not a
181+
* buffer from the TX queue (for AF_XDP).
179182
* @real_len: Number of bytes which to be produced out of the skb (valid only
180183
* on the head's buffer). Equal to skb->len for non-TSO packets.
181184
*/
182185
struct nfp_net_tx_buf {
183186
union {
184187
struct sk_buff *skb;
185188
void *frag;
189+
struct xdp_buff *xdp;
186190
};
187191
dma_addr_t dma_addr;
188-
short int fidx;
189-
u16 pkt_cnt;
192+
union {
193+
struct {
194+
short int fidx;
195+
u16 pkt_cnt;
196+
};
197+
struct {
198+
bool is_xsk_tx;
199+
};
200+
};
190201
u32 real_len;
191202
};
192203

@@ -315,6 +326,16 @@ struct nfp_net_rx_buf {
315326
dma_addr_t dma_addr;
316327
};
317328

329+
/**
330+
* struct nfp_net_xsk_rx_buf - software RX XSK buffer descriptor
331+
* @dma_addr: DMA mapping address of the buffer
332+
* @xdp: XSK buffer pool handle (for AF_XDP)
333+
*/
334+
struct nfp_net_xsk_rx_buf {
335+
dma_addr_t dma_addr;
336+
struct xdp_buff *xdp;
337+
};
338+
318339
/**
319340
* struct nfp_net_rx_ring - RX ring structure
320341
* @r_vec: Back pointer to ring vector structure
@@ -325,6 +346,7 @@ struct nfp_net_rx_buf {
325346
* @fl_qcidx: Queue Controller Peripheral (QCP) queue index for the freelist
326347
* @qcp_fl: Pointer to base of the QCP freelist queue
327348
* @rxbufs: Array of transmitted FL/RX buffers
349+
* @xsk_rxbufs: Array of transmitted FL/RX buffers (for AF_XDP)
328350
* @rxds: Virtual address of FL/RX ring in host memory
329351
* @xdp_rxq: RX-ring info avail for XDP
330352
* @dma: DMA address of the FL/RX ring
@@ -343,6 +365,7 @@ struct nfp_net_rx_ring {
343365
u8 __iomem *qcp_fl;
344366

345367
struct nfp_net_rx_buf *rxbufs;
368+
struct nfp_net_xsk_rx_buf *xsk_rxbufs;
346369
struct nfp_net_rx_desc *rxds;
347370

348371
struct xdp_rxq_info xdp_rxq;
@@ -361,6 +384,7 @@ struct nfp_net_rx_ring {
361384
* @tx_ring: Pointer to TX ring
362385
* @rx_ring: Pointer to RX ring
363386
* @xdp_ring: Pointer to an extra TX ring for XDP
387+
* @xsk_pool: XSK buffer pool active on vector queue pair (for AF_XDP)
364388
* @irq_entry: MSI-X table entry (use for talking to the device)
365389
* @event_ctr: Number of interrupt
366390
* @rx_dim: Dynamic interrupt moderation structure for RX
@@ -432,6 +456,7 @@ struct nfp_net_r_vector {
432456
u64 rx_replace_buf_alloc_fail;
433457

434458
struct nfp_net_tx_ring *xdp_ring;
459+
struct xsk_buff_pool *xsk_pool;
435460

436461
struct u64_stats_sync tx_sync;
437462
u64 tx_pkts;
@@ -502,7 +527,7 @@ struct nfp_stat_pair {
502527
* @num_stack_tx_rings: Number of TX rings used by the stack (not XDP)
503528
* @num_rx_rings: Currently configured number of RX rings
504529
* @mtu: Device MTU
505-
* @xsk_pools: AF_XDP UMEM table (@num_r_vecs in size)
530+
* @xsk_pools: XSK buffer pools, @max_r_vecs in size (for AF_XDP).
506531
*/
507532
struct nfp_net_dp {
508533
struct device *dev;

drivers/net/ethernet/netronome/nfp/nfp_net_common.c

Lines changed: 85 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
#include "nfp_net_ctrl.h"
4747
#include "nfp_net.h"
4848
#include "nfp_net_sriov.h"
49+
#include "nfp_net_xsk.h"
4950
#include "nfp_port.h"
5051
#include "crypto/crypto.h"
5152
#include "crypto/fw.h"
@@ -1316,6 +1317,9 @@ nfp_net_tx_ring_reset(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring)
13161317
tx_ring->rd_p++;
13171318
}
13181319

1320+
if (tx_ring->is_xdp)
1321+
nfp_net_xsk_tx_bufs_free(tx_ring);
1322+
13191323
memset(tx_ring->txds, 0, tx_ring->size);
13201324
tx_ring->wr_p = 0;
13211325
tx_ring->rd_p = 0;
@@ -1504,10 +1508,14 @@ static void nfp_net_rx_ring_reset(struct nfp_net_rx_ring *rx_ring)
15041508
/* Move the empty entry to the end of the list */
15051509
wr_idx = D_IDX(rx_ring, rx_ring->wr_p);
15061510
last_idx = rx_ring->cnt - 1;
1507-
rx_ring->rxbufs[wr_idx].dma_addr = rx_ring->rxbufs[last_idx].dma_addr;
1508-
rx_ring->rxbufs[wr_idx].frag = rx_ring->rxbufs[last_idx].frag;
1509-
rx_ring->rxbufs[last_idx].dma_addr = 0;
1510-
rx_ring->rxbufs[last_idx].frag = NULL;
1511+
if (rx_ring->r_vec->xsk_pool) {
1512+
rx_ring->xsk_rxbufs[wr_idx] = rx_ring->xsk_rxbufs[last_idx];
1513+
memset(&rx_ring->xsk_rxbufs[last_idx], 0,
1514+
sizeof(*rx_ring->xsk_rxbufs));
1515+
} else {
1516+
rx_ring->rxbufs[wr_idx] = rx_ring->rxbufs[last_idx];
1517+
memset(&rx_ring->rxbufs[last_idx], 0, sizeof(*rx_ring->rxbufs));
1518+
}
15111519

15121520
memset(rx_ring->rxds, 0, rx_ring->size);
15131521
rx_ring->wr_p = 0;
@@ -1529,6 +1537,9 @@ nfp_net_rx_ring_bufs_free(struct nfp_net_dp *dp,
15291537
{
15301538
unsigned int i;
15311539

1540+
if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx))
1541+
return;
1542+
15321543
for (i = 0; i < rx_ring->cnt - 1; i++) {
15331544
/* NULL skb can only happen when initial filling of the ring
15341545
* fails to allocate enough buffers and calls here to free
@@ -1556,6 +1567,9 @@ nfp_net_rx_ring_bufs_alloc(struct nfp_net_dp *dp,
15561567
struct nfp_net_rx_buf *rxbufs;
15571568
unsigned int i;
15581569

1570+
if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx))
1571+
return 0;
1572+
15591573
rxbufs = rx_ring->rxbufs;
15601574

15611575
for (i = 0; i < rx_ring->cnt - 1; i++) {
@@ -1580,6 +1594,9 @@ nfp_net_rx_ring_fill_freelist(struct nfp_net_dp *dp,
15801594
{
15811595
unsigned int i;
15821596

1597+
if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx))
1598+
return nfp_net_xsk_rx_ring_fill_freelist(rx_ring);
1599+
15831600
for (i = 0; i < rx_ring->cnt - 1; i++)
15841601
nfp_net_rx_give_one(dp, rx_ring, rx_ring->rxbufs[i].frag,
15851602
rx_ring->rxbufs[i].dma_addr);
@@ -2560,14 +2577,19 @@ static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring)
25602577

25612578
if (dp->netdev)
25622579
xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
2563-
kvfree(rx_ring->rxbufs);
2580+
2581+
if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx))
2582+
kvfree(rx_ring->xsk_rxbufs);
2583+
else
2584+
kvfree(rx_ring->rxbufs);
25642585

25652586
if (rx_ring->rxds)
25662587
dma_free_coherent(dp->dev, rx_ring->size,
25672588
rx_ring->rxds, rx_ring->dma);
25682589

25692590
rx_ring->cnt = 0;
25702591
rx_ring->rxbufs = NULL;
2592+
rx_ring->xsk_rxbufs = NULL;
25712593
rx_ring->rxds = NULL;
25722594
rx_ring->dma = 0;
25732595
rx_ring->size = 0;
@@ -2583,15 +2605,29 @@ static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring)
25832605
static int
25842606
nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring)
25852607
{
2608+
enum xdp_mem_type mem_type;
2609+
size_t rxbuf_sw_desc_sz;
25862610
int err;
25872611

2612+
if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx)) {
2613+
mem_type = MEM_TYPE_XSK_BUFF_POOL;
2614+
rxbuf_sw_desc_sz = sizeof(*rx_ring->xsk_rxbufs);
2615+
} else {
2616+
mem_type = MEM_TYPE_PAGE_ORDER0;
2617+
rxbuf_sw_desc_sz = sizeof(*rx_ring->rxbufs);
2618+
}
2619+
25882620
if (dp->netdev) {
25892621
err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, dp->netdev,
25902622
rx_ring->idx, rx_ring->r_vec->napi.napi_id);
25912623
if (err < 0)
25922624
return err;
25932625
}
25942626

2627+
err = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, mem_type, NULL);
2628+
if (err)
2629+
goto err_alloc;
2630+
25952631
rx_ring->cnt = dp->rxd_cnt;
25962632
rx_ring->size = array_size(rx_ring->cnt, sizeof(*rx_ring->rxds));
25972633
rx_ring->rxds = dma_alloc_coherent(dp->dev, rx_ring->size,
@@ -2603,10 +2639,17 @@ nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring)
26032639
goto err_alloc;
26042640
}
26052641

2606-
rx_ring->rxbufs = kvcalloc(rx_ring->cnt, sizeof(*rx_ring->rxbufs),
2607-
GFP_KERNEL);
2608-
if (!rx_ring->rxbufs)
2609-
goto err_alloc;
2642+
if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx)) {
2643+
rx_ring->xsk_rxbufs = kvcalloc(rx_ring->cnt, rxbuf_sw_desc_sz,
2644+
GFP_KERNEL);
2645+
if (!rx_ring->xsk_rxbufs)
2646+
goto err_alloc;
2647+
} else {
2648+
rx_ring->rxbufs = kvcalloc(rx_ring->cnt, rxbuf_sw_desc_sz,
2649+
GFP_KERNEL);
2650+
if (!rx_ring->rxbufs)
2651+
goto err_alloc;
2652+
}
26102653

26112654
return 0;
26122655

@@ -2659,11 +2702,13 @@ static void nfp_net_rx_rings_free(struct nfp_net_dp *dp)
26592702
}
26602703

26612704
static void
2662-
nfp_net_napi_add(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec)
2705+
nfp_net_napi_add(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, int idx)
26632706
{
26642707
if (dp->netdev)
26652708
netif_napi_add(dp->netdev, &r_vec->napi,
2666-
nfp_net_poll, NAPI_POLL_WEIGHT);
2709+
nfp_net_has_xsk_pool_slow(dp, idx) ?
2710+
nfp_net_xsk_poll : nfp_net_poll,
2711+
NAPI_POLL_WEIGHT);
26672712
else
26682713
tasklet_enable(&r_vec->tasklet);
26692714
}
@@ -2687,6 +2732,17 @@ nfp_net_vector_assign_rings(struct nfp_net_dp *dp,
26872732

26882733
r_vec->xdp_ring = idx < dp->num_tx_rings - dp->num_stack_tx_rings ?
26892734
&dp->tx_rings[dp->num_stack_tx_rings + idx] : NULL;
2735+
2736+
if (nfp_net_has_xsk_pool_slow(dp, idx) || r_vec->xsk_pool) {
2737+
r_vec->xsk_pool = dp->xdp_prog ? dp->xsk_pools[idx] : NULL;
2738+
2739+
if (r_vec->xsk_pool)
2740+
xsk_pool_set_rxq_info(r_vec->xsk_pool,
2741+
&r_vec->rx_ring->xdp_rxq);
2742+
2743+
nfp_net_napi_del(dp, r_vec);
2744+
nfp_net_napi_add(dp, r_vec, idx);
2745+
}
26902746
}
26912747

26922748
static int
@@ -2695,7 +2751,7 @@ nfp_net_prepare_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
26952751
{
26962752
int err;
26972753

2698-
nfp_net_napi_add(&nn->dp, r_vec);
2754+
nfp_net_napi_add(&nn->dp, r_vec, idx);
26992755

27002756
snprintf(r_vec->name, sizeof(r_vec->name),
27012757
"%s-rxtx-%d", nfp_net_name(nn), idx);
@@ -2834,8 +2890,11 @@ static void nfp_net_clear_config_and_disable(struct nfp_net *nn)
28342890
if (err)
28352891
nn_err(nn, "Could not disable device: %d\n", err);
28362892

2837-
for (r = 0; r < nn->dp.num_rx_rings; r++)
2893+
for (r = 0; r < nn->dp.num_rx_rings; r++) {
28382894
nfp_net_rx_ring_reset(&nn->dp.rx_rings[r]);
2895+
if (nfp_net_has_xsk_pool_slow(&nn->dp, nn->dp.rx_rings[r].idx))
2896+
nfp_net_xsk_rx_bufs_free(&nn->dp.rx_rings[r]);
2897+
}
28392898
for (r = 0; r < nn->dp.num_tx_rings; r++)
28402899
nfp_net_tx_ring_reset(&nn->dp, &nn->dp.tx_rings[r]);
28412900
for (r = 0; r < nn->dp.num_r_vecs; r++)
@@ -3771,6 +3830,9 @@ static int nfp_net_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
37713830
return nfp_net_xdp_setup_drv(nn, xdp);
37723831
case XDP_SETUP_PROG_HW:
37733832
return nfp_net_xdp_setup_hw(nn, xdp);
3833+
case XDP_SETUP_XSK_POOL:
3834+
return nfp_net_xsk_setup_pool(netdev, xdp->xsk.pool,
3835+
xdp->xsk.queue_id);
37743836
default:
37753837
return nfp_app_bpf(nn->app, nn, xdp);
37763838
}
@@ -3821,6 +3883,7 @@ const struct net_device_ops nfp_net_netdev_ops = {
38213883
.ndo_features_check = nfp_net_features_check,
38223884
.ndo_get_phys_port_name = nfp_net_get_phys_port_name,
38233885
.ndo_bpf = nfp_net_xdp,
3886+
.ndo_xsk_wakeup = nfp_net_xsk_wakeup,
38243887
.ndo_get_devlink_port = nfp_devlink_get_devlink_port,
38253888
};
38263889

@@ -3948,6 +4011,14 @@ nfp_net_alloc(struct pci_dev *pdev, void __iomem *ctrl_bar, bool needs_netdev,
39484011
nn->dp.num_r_vecs = max(nn->dp.num_tx_rings, nn->dp.num_rx_rings);
39494012
nn->dp.num_r_vecs = min_t(unsigned int,
39504013
nn->dp.num_r_vecs, num_online_cpus());
4014+
nn->max_r_vecs = nn->dp.num_r_vecs;
4015+
4016+
nn->dp.xsk_pools = kcalloc(nn->max_r_vecs, sizeof(nn->dp.xsk_pools),
4017+
GFP_KERNEL);
4018+
if (!nn->dp.xsk_pools) {
4019+
err = -ENOMEM;
4020+
goto err_free_nn;
4021+
}
39514022

39524023
nn->dp.txd_cnt = NFP_NET_TX_DESCS_DEFAULT;
39534024
nn->dp.rxd_cnt = NFP_NET_RX_DESCS_DEFAULT;
@@ -3987,6 +4058,7 @@ void nfp_net_free(struct nfp_net *nn)
39874058
WARN_ON(timer_pending(&nn->reconfig_timer) || nn->reconfig_posted);
39884059
nfp_ccm_mbox_free(nn);
39894060

4061+
kfree(nn->dp.xsk_pools);
39904062
if (nn->dp.netdev)
39914063
free_netdev(nn->dp.netdev);
39924064
else

drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,19 @@ static int nfp_rx_q_show(struct seq_file *file, void *data)
4242
seq_printf(file, "%04d: 0x%08x 0x%08x", i,
4343
rxd->vals[0], rxd->vals[1]);
4444

45-
frag = READ_ONCE(rx_ring->rxbufs[i].frag);
46-
if (frag)
47-
seq_printf(file, " frag=%p", frag);
48-
49-
if (rx_ring->rxbufs[i].dma_addr)
50-
seq_printf(file, " dma_addr=%pad",
51-
&rx_ring->rxbufs[i].dma_addr);
45+
if (!r_vec->xsk_pool) {
46+
frag = READ_ONCE(rx_ring->rxbufs[i].frag);
47+
if (frag)
48+
seq_printf(file, " frag=%p", frag);
49+
50+
if (rx_ring->rxbufs[i].dma_addr)
51+
seq_printf(file, " dma_addr=%pad",
52+
&rx_ring->rxbufs[i].dma_addr);
53+
} else {
54+
if (rx_ring->xsk_rxbufs[i].dma_addr)
55+
seq_printf(file, " dma_addr=%pad",
56+
&rx_ring->xsk_rxbufs[i].dma_addr);
57+
}
5258

5359
if (i == rx_ring->rd_p % rxd_cnt)
5460
seq_puts(file, " H_RD ");
@@ -103,20 +109,23 @@ static int nfp_tx_q_show(struct seq_file *file, void *data)
103109
tx_ring->rd_p, tx_ring->wr_p, d_rd_p, d_wr_p);
104110

105111
for (i = 0; i < txd_cnt; i++) {
112+
struct xdp_buff *xdp;
113+
struct sk_buff *skb;
114+
106115
txd = &tx_ring->txds[i];
107116
seq_printf(file, "%04d: 0x%08x 0x%08x 0x%08x 0x%08x", i,
108117
txd->vals[0], txd->vals[1],
109118
txd->vals[2], txd->vals[3]);
110119

111-
if (tx_ring == r_vec->tx_ring) {
112-
struct sk_buff *skb = READ_ONCE(tx_ring->txbufs[i].skb);
113-
120+
if (!tx_ring->is_xdp) {
121+
skb = READ_ONCE(tx_ring->txbufs[i].skb);
114122
if (skb)
115123
seq_printf(file, " skb->head=%p skb->data=%p",
116124
skb->head, skb->data);
117125
} else {
118-
seq_printf(file, " frag=%p",
119-
READ_ONCE(tx_ring->txbufs[i].frag));
126+
xdp = READ_ONCE(tx_ring->txbufs[i].xdp);
127+
if (xdp)
128+
seq_printf(file, " xdp->data=%p", xdp->data);
120129
}
121130

122131
if (tx_ring->txbufs[i].dma_addr)

0 commit comments

Comments
 (0)