Skip to content

Commit 2d4238f

Browse files
kkazimieJeff Kirsher
authored and
Jeff Kirsher
committed
ice: Add support for AF_XDP
Add zero copy AF_XDP support. This patch adds zero copy support for Tx and Rx; code for zero copy is added to ice_xsk.h and ice_xsk.c. For Tx, implement ndo_xsk_wakeup. As with other drivers, reuse existing XDP Tx queues for this task, since XDP_REDIRECT guarantees mutual exclusion between different NAPI contexts based on CPU ID. In turn, a netdev can XDP_REDIRECT to another netdev with a different NAPI context, since the operation is bound to a specific core and each core has its own hardware ring. For Rx, allocate frames as MEM_TYPE_ZERO_COPY on queues that AF_XDP is enabled. Signed-off-by: Krzysztof Kazimierczak <krzysztof.kazimierczak@intel.com> Co-developed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com> Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
1 parent 0891d6d commit 2d4238f

File tree

11 files changed

+1456
-27
lines changed

11 files changed

+1456
-27
lines changed

drivers/net/ethernet/intel/ice/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,4 @@ ice-y := ice_main.o \
2121
ice_ethtool.o
2222
ice-$(CONFIG_PCI_IOV) += ice_virtchnl_pf.o ice_sriov.o
2323
ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_lib.o
24+
ice-$(CONFIG_XDP_SOCKETS) += ice_xsk.o

drivers/net/ethernet/intel/ice/ice.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include <linux/bpf.h>
3636
#include <linux/avf/virtchnl.h>
3737
#include <net/ipv6.h>
38+
#include <net/xdp_sock.h>
3839
#include "ice_devids.h"
3940
#include "ice_type.h"
4041
#include "ice_txrx.h"
@@ -44,6 +45,7 @@
4445
#include "ice_sched.h"
4546
#include "ice_virtchnl_pf.h"
4647
#include "ice_sriov.h"
48+
#include "ice_xsk.h"
4749

4850
extern const char ice_drv_ver[];
4951
#define ICE_BAR0 0
@@ -287,6 +289,9 @@ struct ice_vsi {
287289
struct ice_ring **xdp_rings; /* XDP ring array */
288290
u16 num_xdp_txq; /* Used XDP queues */
289291
u8 xdp_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */
292+
struct xdp_umem **xsk_umems;
293+
u16 num_xsk_umems_used;
294+
u16 num_xsk_umems;
290295
} ____cacheline_internodealigned_in_smp;
291296

292297
/* struct that defines an interrupt vector */
@@ -440,6 +445,27 @@ static inline void ice_set_ring_xdp(struct ice_ring *ring)
440445
ring->flags |= ICE_TX_FLAGS_RING_XDP;
441446
}
442447

448+
/**
449+
* ice_xsk_umem - get XDP UMEM bound to a ring
450+
* @ring - ring to use
451+
*
452+
* Returns a pointer to xdp_umem structure if there is an UMEM present,
453+
* NULL otherwise.
454+
*/
455+
static inline struct xdp_umem *ice_xsk_umem(struct ice_ring *ring)
456+
{
457+
struct xdp_umem **umems = ring->vsi->xsk_umems;
458+
int qid = ring->q_index;
459+
460+
if (ice_ring_is_xdp(ring))
461+
qid -= ring->vsi->num_xdp_txq;
462+
463+
if (!umems || !umems[qid] || !ice_is_xdp_ena_vsi(ring->vsi))
464+
return NULL;
465+
466+
return umems[qid];
467+
}
468+
443469
/**
444470
* ice_get_main_vsi - Get the PF VSI
445471
* @pf: PF instance

drivers/net/ethernet/intel/ice/ice_base.c

Lines changed: 46 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -276,14 +276,17 @@ ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
276276
*/
277277
int ice_setup_rx_ctx(struct ice_ring *ring)
278278
{
279+
int chain_len = ICE_MAX_CHAINED_RX_BUFS;
279280
struct ice_vsi *vsi = ring->vsi;
280-
struct ice_hw *hw = &vsi->back->hw;
281281
u32 rxdid = ICE_RXDID_FLEX_NIC;
282282
struct ice_rlan_ctx rlan_ctx;
283+
struct ice_hw *hw;
283284
u32 regval;
284285
u16 pf_q;
285286
int err;
286287

288+
hw = &vsi->back->hw;
289+
287290
/* what is Rx queue number in global space of 2K Rx queues */
288291
pf_q = vsi->rxq_map[ring->q_index];
289292

@@ -297,10 +300,38 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
297300
xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
298301
ring->q_index);
299302

300-
err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
301-
MEM_TYPE_PAGE_SHARED, NULL);
302-
if (err)
303-
return err;
303+
ring->xsk_umem = ice_xsk_umem(ring);
304+
if (ring->xsk_umem) {
305+
xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
306+
307+
ring->rx_buf_len = ring->xsk_umem->chunk_size_nohr -
308+
XDP_PACKET_HEADROOM;
309+
/* For AF_XDP ZC, we disallow packets to span on
310+
* multiple buffers, thus letting us skip that
311+
* handling in the fast-path.
312+
*/
313+
chain_len = 1;
314+
ring->zca.free = ice_zca_free;
315+
err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
316+
MEM_TYPE_ZERO_COPY,
317+
&ring->zca);
318+
if (err)
319+
return err;
320+
321+
dev_info(&vsi->back->pdev->dev, "Registered XDP mem model MEM_TYPE_ZERO_COPY on Rx ring %d\n",
322+
ring->q_index);
323+
} else {
324+
if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
325+
xdp_rxq_info_reg(&ring->xdp_rxq,
326+
ring->netdev,
327+
ring->q_index);
328+
329+
err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
330+
MEM_TYPE_PAGE_SHARED,
331+
NULL);
332+
if (err)
333+
return err;
334+
}
304335
}
305336
/* Receive Queue Base Address.
306337
* Indicates the starting address of the descriptor queue defined in
@@ -340,7 +371,7 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
340371
* than 5 x DBUF
341372
*/
342373
rlan_ctx.rxmax = min_t(u16, vsi->max_frame,
343-
ICE_MAX_CHAINED_RX_BUFS * vsi->rx_buf_len);
374+
chain_len * ring->rx_buf_len);
344375

345376
/* Rx queue threshold in units of 64 */
346377
rlan_ctx.lrxqthresh = 1;
@@ -378,7 +409,15 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
378409
/* init queue specific tail register */
379410
ring->tail = hw->hw_addr + QRX_TAIL(pf_q);
380411
writel(0, ring->tail);
381-
ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring));
412+
413+
err = ring->xsk_umem ?
414+
ice_alloc_rx_bufs_slow_zc(ring, ICE_DESC_UNUSED(ring)) :
415+
ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring));
416+
if (err)
417+
dev_info(&vsi->back->pdev->dev,
418+
"Failed allocate some buffers on %sRx ring %d (pf_q %d)\n",
419+
ring->xsk_umem ? "UMEM enabled " : "",
420+
ring->q_index, pf_q);
382421

383422
return 0;
384423
}

drivers/net/ethernet/intel/ice/ice_ethtool.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2612,6 +2612,13 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
26122612
return 0;
26132613
}
26142614

2615+
/* If there is a AF_XDP UMEM attached to any of Rx rings,
2616+
* disallow changing the number of descriptors -- regardless
2617+
* if the netdev is running or not.
2618+
*/
2619+
if (ice_xsk_any_rx_ring_ena(vsi))
2620+
return -EBUSY;
2621+
26152622
while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) {
26162623
timeout--;
26172624
if (!timeout)

drivers/net/ethernet/intel/ice/ice_lib.c

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1283,7 +1283,17 @@ int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi)
12831283
*/
12841284
int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi)
12851285
{
1286-
return ice_vsi_cfg_txqs(vsi, vsi->xdp_rings);
1286+
int ret;
1287+
int i;
1288+
1289+
ret = ice_vsi_cfg_txqs(vsi, vsi->xdp_rings);
1290+
if (ret)
1291+
return ret;
1292+
1293+
for (i = 0; i < vsi->num_xdp_txq; i++)
1294+
vsi->xdp_rings[i]->xsk_umem = ice_xsk_umem(vsi->xdp_rings[i]);
1295+
1296+
return ret;
12871297
}
12881298

12891299
/**
@@ -2514,6 +2524,51 @@ char *ice_nvm_version_str(struct ice_hw *hw)
25142524
return buf;
25152525
}
25162526

2527+
/**
2528+
* ice_update_ring_stats - Update ring statistics
2529+
* @ring: ring to update
2530+
* @cont: used to increment per-vector counters
2531+
* @pkts: number of processed packets
2532+
* @bytes: number of processed bytes
2533+
*
2534+
* This function assumes that caller has acquired a u64_stats_sync lock.
2535+
*/
2536+
static void
2537+
ice_update_ring_stats(struct ice_ring *ring, struct ice_ring_container *cont,
2538+
u64 pkts, u64 bytes)
2539+
{
2540+
ring->stats.bytes += bytes;
2541+
ring->stats.pkts += pkts;
2542+
cont->total_bytes += bytes;
2543+
cont->total_pkts += pkts;
2544+
}
2545+
2546+
/**
2547+
* ice_update_tx_ring_stats - Update Tx ring specific counters
2548+
* @tx_ring: ring to update
2549+
* @pkts: number of processed packets
2550+
* @bytes: number of processed bytes
2551+
*/
2552+
void ice_update_tx_ring_stats(struct ice_ring *tx_ring, u64 pkts, u64 bytes)
2553+
{
2554+
u64_stats_update_begin(&tx_ring->syncp);
2555+
ice_update_ring_stats(tx_ring, &tx_ring->q_vector->tx, pkts, bytes);
2556+
u64_stats_update_end(&tx_ring->syncp);
2557+
}
2558+
2559+
/**
2560+
* ice_update_rx_ring_stats - Update Rx ring specific counters
2561+
* @rx_ring: ring to update
2562+
* @pkts: number of processed packets
2563+
* @bytes: number of processed bytes
2564+
*/
2565+
void ice_update_rx_ring_stats(struct ice_ring *rx_ring, u64 pkts, u64 bytes)
2566+
{
2567+
u64_stats_update_begin(&rx_ring->syncp);
2568+
ice_update_ring_stats(rx_ring, &rx_ring->q_vector->rx, pkts, bytes);
2569+
u64_stats_update_end(&rx_ring->syncp);
2570+
}
2571+
25172572
/**
25182573
* ice_vsi_cfg_mac_fltr - Add or remove a MAC address filter for a VSI
25192574
* @vsi: the VSI being configured MAC filter

drivers/net/ethernet/intel/ice/ice_lib.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,10 @@ void ice_vsi_free_tx_rings(struct ice_vsi *vsi);
8383

8484
int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena);
8585

86+
void ice_update_tx_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes);
87+
88+
void ice_update_rx_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes);
89+
8690
void ice_vsi_cfg_frame_size(struct ice_vsi *vsi);
8791

8892
u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran);

drivers/net/ethernet/intel/ice/ice_main.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1692,6 +1692,7 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
16921692
if (ice_setup_tx_ring(xdp_ring))
16931693
goto free_xdp_rings;
16941694
ice_set_ring_xdp(xdp_ring);
1695+
xdp_ring->xsk_umem = ice_xsk_umem(xdp_ring);
16951696
}
16961697

16971698
return 0;
@@ -1934,6 +1935,17 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
19341935
if (if_running)
19351936
ret = ice_up(vsi);
19361937

1938+
if (!ret && prog && vsi->xsk_umems) {
1939+
int i;
1940+
1941+
ice_for_each_rxq(vsi, i) {
1942+
struct ice_ring *rx_ring = vsi->rx_rings[i];
1943+
1944+
if (rx_ring->xsk_umem)
1945+
napi_schedule(&rx_ring->q_vector->napi);
1946+
}
1947+
}
1948+
19371949
return (ret || xdp_ring_err) ? -ENOMEM : 0;
19381950
}
19391951

@@ -1959,6 +1971,9 @@ static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp)
19591971
case XDP_QUERY_PROG:
19601972
xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0;
19611973
return 0;
1974+
case XDP_SETUP_XSK_UMEM:
1975+
return ice_xsk_umem_setup(vsi, xdp->xsk.umem,
1976+
xdp->xsk.queue_id);
19621977
default:
19631978
return -EINVAL;
19641979
}
@@ -5205,4 +5220,5 @@ static const struct net_device_ops ice_netdev_ops = {
52055220
.ndo_tx_timeout = ice_tx_timeout,
52065221
.ndo_bpf = ice_xdp,
52075222
.ndo_xdp_xmit = ice_xdp_xmit,
5223+
.ndo_xsk_wakeup = ice_xsk_wakeup,
52085224
};

drivers/net/ethernet/intel/ice/ice_txrx.c

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "ice_lib.h"
1212
#include "ice.h"
1313
#include "ice_dcb_lib.h"
14+
#include "ice_xsk.h"
1415

1516
#define ICE_RX_HDR_SIZE 256
1617

@@ -58,6 +59,11 @@ void ice_clean_tx_ring(struct ice_ring *tx_ring)
5859
{
5960
u16 i;
6061

62+
if (ice_ring_is_xdp(tx_ring) && tx_ring->xsk_umem) {
63+
ice_xsk_clean_xdp_ring(tx_ring);
64+
goto tx_skip_free;
65+
}
66+
6167
/* ring already cleared, nothing to do */
6268
if (!tx_ring->tx_buf)
6369
return;
@@ -66,6 +72,7 @@ void ice_clean_tx_ring(struct ice_ring *tx_ring)
6672
for (i = 0; i < tx_ring->count; i++)
6773
ice_unmap_and_free_tx_buf(tx_ring, &tx_ring->tx_buf[i]);
6874

75+
tx_skip_free:
6976
memset(tx_ring->tx_buf, 0, sizeof(*tx_ring->tx_buf) * tx_ring->count);
7077

7178
/* Zero out the descriptor ring */
@@ -198,12 +205,8 @@ static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget)
198205

199206
i += tx_ring->count;
200207
tx_ring->next_to_clean = i;
201-
u64_stats_update_begin(&tx_ring->syncp);
202-
tx_ring->stats.bytes += total_bytes;
203-
tx_ring->stats.pkts += total_pkts;
204-
u64_stats_update_end(&tx_ring->syncp);
205-
tx_ring->q_vector->tx.total_bytes += total_bytes;
206-
tx_ring->q_vector->tx.total_pkts += total_pkts;
208+
209+
ice_update_tx_ring_stats(tx_ring, total_pkts, total_bytes);
207210

208211
if (ice_ring_is_xdp(tx_ring))
209212
return !!budget;
@@ -286,6 +289,11 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
286289
if (!rx_ring->rx_buf)
287290
return;
288291

292+
if (rx_ring->xsk_umem) {
293+
ice_xsk_clean_rx_ring(rx_ring);
294+
goto rx_skip_free;
295+
}
296+
289297
/* Free all the Rx ring sk_buffs */
290298
for (i = 0; i < rx_ring->count; i++) {
291299
struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i];
@@ -313,6 +321,7 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
313321
rx_buf->page_offset = 0;
314322
}
315323

324+
rx_skip_free:
316325
memset(rx_ring->rx_buf, 0, sizeof(*rx_ring->rx_buf) * rx_ring->count);
317326

318327
/* Zero out the descriptor ring */
@@ -1073,13 +1082,7 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
10731082
if (xdp_prog)
10741083
ice_finalize_xdp_rx(rx_ring, xdp_xmit);
10751084

1076-
/* update queue and vector specific stats */
1077-
u64_stats_update_begin(&rx_ring->syncp);
1078-
rx_ring->stats.pkts += total_rx_pkts;
1079-
rx_ring->stats.bytes += total_rx_bytes;
1080-
u64_stats_update_end(&rx_ring->syncp);
1081-
rx_ring->q_vector->rx.total_pkts += total_rx_pkts;
1082-
rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
1085+
ice_update_rx_ring_stats(rx_ring, total_rx_pkts, total_rx_bytes);
10831086

10841087
/* guarantee a trip back through this routine if there was a failure */
10851088
return failure ? budget : (int)total_rx_pkts;
@@ -1457,9 +1460,14 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
14571460
/* Since the actual Tx work is minimal, we can give the Tx a larger
14581461
* budget and be more aggressive about cleaning up the Tx descriptors.
14591462
*/
1460-
ice_for_each_ring(ring, q_vector->tx)
1461-
if (!ice_clean_tx_irq(ring, budget))
1463+
ice_for_each_ring(ring, q_vector->tx) {
1464+
bool wd = ring->xsk_umem ?
1465+
ice_clean_tx_irq_zc(ring, budget) :
1466+
ice_clean_tx_irq(ring, budget);
1467+
1468+
if (!wd)
14621469
clean_complete = false;
1470+
}
14631471

14641472
/* Handle case where we are called by netpoll with a budget of 0 */
14651473
if (unlikely(budget <= 0))
@@ -1479,7 +1487,13 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
14791487
ice_for_each_ring(ring, q_vector->rx) {
14801488
int cleaned;
14811489

1482-
cleaned = ice_clean_rx_irq(ring, budget_per_ring);
1490+
/* A dedicated path for zero-copy allows making a single
1491+
* comparison in the irq context instead of many inside the
1492+
* ice_clean_rx_irq function and makes the codebase cleaner.
1493+
*/
1494+
cleaned = ring->xsk_umem ?
1495+
ice_clean_rx_irq_zc(ring, budget_per_ring) :
1496+
ice_clean_rx_irq(ring, budget_per_ring);
14831497
work_done += cleaned;
14841498
/* if we clean as many as budgeted, we must not be done */
14851499
if (cleaned >= budget_per_ring)

0 commit comments

Comments
 (0)