Skip to content

Commit 94d79f7

Browse files
tianx666NipaLocal
authored andcommitted
xsc: Add ndo_start_xmit
This patch adds core data transmission functionality, focusing on the ndo_start_xmit interface. The main steps are: 1. Transmission Entry The entry point selects the appropriate transmit queue (SQ) and verifies hardware readiness before calling xsc_eth_xmit_frame for packet transmission. 2. Packet Processing Supports TCP/UDP GSO, calculates MSS and IHS. If necessary, performs SKB linearization and handles checksum offload. Maps data for DMA using dma_map_single and skb_frag_dma_map. 3. Descriptor Generation Constructs control (cseg) and data (dseg) segments, including setting operation codes, segment counts, and DMA addresses. Hardware Notification & Queue Management: 4. Notifies hardware using a doorbell register and manages queue flow to avoid overloading. 5. Combines small packets using netdev_xmit_more to reduce doorbell writes and supports zero-copy transmission for efficiency. Co-developed-by: Honggang Wei <weihg@yunsilicon.com> Signed-off-by: Honggang Wei <weihg@yunsilicon.com> Co-developed-by: Lei Yan <jacky@yunsilicon.com> Signed-off-by: Lei Yan <jacky@yunsilicon.com> Signed-off-by: Xin Tian <tianx@yunsilicon.com> Signed-off-by: NipaLocal <nipa@local>
1 parent 1f95510 commit 94d79f7

File tree

9 files changed

+552
-4
lines changed

9 files changed

+552
-4
lines changed

drivers/net/ethernet/yunsilicon/xsc/common/xsc_core.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,4 +490,9 @@ static inline u8 xsc_get_user_mode(struct xsc_core_device *xdev)
490490
return xdev->user_mode;
491491
}
492492

493+
static inline u8 get_cqe_opcode(struct xsc_cqe *cqe)
494+
{
495+
return FIELD_GET(XSC_CQE_MSG_OPCD_MASK, le32_to_cpu(cqe->data0));
496+
}
497+
493498
#endif

drivers/net/ethernet/yunsilicon/xsc/net/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@ ccflags-y += -I$(srctree)/drivers/net/ethernet/yunsilicon/xsc
66

77
obj-$(CONFIG_YUNSILICON_XSC_ETH) += xsc_eth.o
88

9-
xsc_eth-y := main.o xsc_eth_wq.o xsc_eth_txrx.o xsc_eth_rx.o
9+
xsc_eth-y := main.o xsc_eth_wq.o xsc_eth_txrx.o xsc_eth_tx.o xsc_eth_rx.o

drivers/net/ethernet/yunsilicon/xsc/net/main.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1683,6 +1683,7 @@ static int xsc_eth_set_hw_mtu(struct xsc_core_device *xdev,
16831683
static const struct net_device_ops xsc_netdev_ops = {
16841684
.ndo_open = xsc_eth_open,
16851685
.ndo_stop = xsc_eth_close,
1686+
.ndo_start_xmit = xsc_eth_xmit_start,
16861687
};
16871688

16881689
static void xsc_eth_build_nic_netdev(struct xsc_adapter *adapter)

drivers/net/ethernet/yunsilicon/xsc/net/xsc_eth.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
#ifndef __XSC_ETH_H
77
#define __XSC_ETH_H
88

9+
#include <linux/udp.h>
10+
911
#include "common/xsc_device.h"
1012
#include "xsc_eth_common.h"
1113

drivers/net/ethernet/yunsilicon/xsc/net/xsc_eth_common.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,4 +178,12 @@ struct xsc_eth_channels {
178178
u32 rqn_base;
179179
};
180180

181+
union xsc_send_doorbell {
182+
struct{
183+
s32 next_pid : 16;
184+
u32 qp_num : 15;
185+
};
186+
u32 send_data;
187+
};
188+
181189
#endif
Lines changed: 315 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,315 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/* Copyright (C) 2021-2025, Shanghai Yunsilicon Technology Co., Ltd.
3+
* All rights reserved.
4+
*/
5+
6+
#include <linux/tcp.h>
7+
8+
#include "xsc_eth.h"
9+
#include "xsc_eth_txrx.h"
10+
11+
#define XSC_OPCODE_RAW 7
12+
13+
static void xsc_dma_push(struct xsc_sq *sq, dma_addr_t addr, u32 size,
14+
enum xsc_dma_map_type map_type)
15+
{
16+
struct xsc_sq_dma *dma = xsc_dma_get(sq, sq->dma_fifo_pc++);
17+
18+
dma->addr = addr;
19+
dma->size = size;
20+
dma->type = map_type;
21+
}
22+
23+
static void xsc_dma_unmap_wqe(struct xsc_sq *sq, u8 num_dma)
24+
{
25+
struct xsc_adapter *adapter = sq->channel->adapter;
26+
struct device *dev = adapter->dev;
27+
int i;
28+
29+
for (i = 0; i < num_dma; i++) {
30+
struct xsc_sq_dma *last_pushed_dma;
31+
32+
last_pushed_dma = xsc_dma_get(sq, --sq->dma_fifo_pc);
33+
xsc_tx_dma_unmap(dev, last_pushed_dma);
34+
}
35+
}
36+
37+
static void *xsc_sq_fetch_wqe(struct xsc_sq *sq, size_t size, u16 *pi)
38+
{
39+
struct xsc_wq_cyc *wq = &sq->wq;
40+
void *wqe;
41+
42+
/*caution, sp->pc is default to be zero*/
43+
*pi = xsc_wq_cyc_ctr2ix(wq, sq->pc);
44+
wqe = xsc_wq_cyc_get_wqe(wq, *pi);
45+
memset(wqe, 0, size);
46+
47+
return wqe;
48+
}
49+
50+
static u16 xsc_tx_get_gso_ihs(struct xsc_sq *sq, struct sk_buff *skb)
51+
{
52+
u16 ihs;
53+
54+
if (skb->encapsulation) {
55+
ihs = skb_inner_transport_offset(skb) + inner_tcp_hdrlen(skb);
56+
} else {
57+
if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
58+
ihs = skb_transport_offset(skb) + sizeof(struct udphdr);
59+
else
60+
ihs = skb_transport_offset(skb) + tcp_hdrlen(skb);
61+
}
62+
63+
return ihs;
64+
}
65+
66+
static void xsc_txwqe_build_cseg_csum(struct xsc_sq *sq,
67+
struct sk_buff *skb,
68+
struct xsc_send_wqe_ctrl_seg *cseg)
69+
{
70+
u32 val = le32_to_cpu(cseg->data0);
71+
72+
if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
73+
if (skb->encapsulation)
74+
val |= FIELD_PREP(XSC_WQE_CTRL_SEG_CSUM_EN_MASK,
75+
XSC_ETH_WQE_INNER_AND_OUTER_CSUM);
76+
else
77+
val |= FIELD_PREP(XSC_WQE_CTRL_SEG_CSUM_EN_MASK,
78+
XSC_ETH_WQE_OUTER_CSUM);
79+
} else {
80+
val |= FIELD_PREP(XSC_WQE_CTRL_SEG_CSUM_EN_MASK,
81+
XSC_ETH_WQE_NONE_CSUM);
82+
}
83+
cseg->data0 = cpu_to_le32(val);
84+
}
85+
86+
static void xsc_txwqe_build_csegs(struct xsc_sq *sq, struct sk_buff *skb,
87+
u16 mss, u16 ihs, u16 headlen,
88+
u8 opcode, u16 ds_cnt, u32 msglen,
89+
struct xsc_send_wqe_ctrl_seg *cseg)
90+
{
91+
struct xsc_core_device *xdev = sq->cq.xdev;
92+
int send_wqe_ds_num_log;
93+
u32 val = 0;
94+
95+
send_wqe_ds_num_log = ilog2(xdev->caps.send_ds_num);
96+
xsc_txwqe_build_cseg_csum(sq, skb, cseg);
97+
98+
if (mss != 0) {
99+
val |= XSC_WQE_CTRL_SEG_HAS_PPH |
100+
XSC_WQE_CTRL_SEG_SO_TYPE |
101+
FIELD_PREP(XSC_WQE_CTRL_SEG_SO_HDR_LEN_MASK, ihs) |
102+
FIELD_PREP(XSC_WQE_CTRL_SEG_SO_DATA_SIZE_MASK, mss);
103+
cseg->data2 = cpu_to_le32(val);
104+
}
105+
106+
val = le32_to_cpu(cseg->data0);
107+
val |= FIELD_PREP(XSC_WQE_CTRL_SEG_MSG_OPCODE_MASK, opcode) |
108+
FIELD_PREP(XSC_WQE_CTRL_SEG_WQE_ID_MASK,
109+
sq->pc << send_wqe_ds_num_log) |
110+
FIELD_PREP(XSC_WQE_CTRL_SEG_DS_DATA_NUM_MASK,
111+
ds_cnt - XSC_SEND_WQEBB_CTRL_NUM_DS);
112+
cseg->data0 = cpu_to_le32(val);
113+
cseg->msg_len = cpu_to_le32(msglen);
114+
cseg->data3 = cpu_to_le32(XSC_WQE_CTRL_SEG_CE);
115+
}
116+
117+
static int xsc_txwqe_build_dsegs(struct xsc_sq *sq, struct sk_buff *skb,
118+
u16 ihs, u16 headlen,
119+
struct xsc_wqe_data_seg *dseg)
120+
{
121+
struct xsc_adapter *adapter = sq->channel->adapter;
122+
struct device *dev = adapter->dev;
123+
dma_addr_t dma_addr = 0;
124+
u8 num_dma = 0;
125+
int i;
126+
127+
if (headlen) {
128+
dma_addr = dma_map_single(dev,
129+
skb->data,
130+
headlen,
131+
DMA_TO_DEVICE);
132+
if (unlikely(dma_mapping_error(dev, dma_addr)))
133+
goto err_dma_unmap_wqe;
134+
135+
dseg->va = cpu_to_le64(dma_addr);
136+
dseg->mkey = cpu_to_le32(be32_to_cpu(sq->mkey_be));
137+
dseg->data0 |=
138+
cpu_to_le32(FIELD_PREP(XSC_WQE_DATA_SEG_SEG_LEN_MASK,
139+
headlen));
140+
141+
xsc_dma_push(sq, dma_addr, headlen, XSC_DMA_MAP_SINGLE);
142+
num_dma++;
143+
dseg++;
144+
}
145+
146+
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
147+
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
148+
int fsz = skb_frag_size(frag);
149+
150+
dma_addr = skb_frag_dma_map(dev, frag, 0, fsz, DMA_TO_DEVICE);
151+
if (unlikely(dma_mapping_error(dev, dma_addr)))
152+
goto err_dma_unmap_wqe;
153+
154+
dseg->va = cpu_to_le64(dma_addr);
155+
dseg->mkey = cpu_to_le32(be32_to_cpu(sq->mkey_be));
156+
dseg->data0 |=
157+
cpu_to_le32(FIELD_PREP(XSC_WQE_DATA_SEG_SEG_LEN_MASK,
158+
fsz));
159+
160+
xsc_dma_push(sq, dma_addr, fsz, XSC_DMA_MAP_PAGE);
161+
num_dma++;
162+
dseg++;
163+
}
164+
165+
return num_dma;
166+
167+
err_dma_unmap_wqe:
168+
xsc_dma_unmap_wqe(sq, num_dma);
169+
return -ENOMEM;
170+
}
171+
172+
static void xsc_sq_notify_hw(struct xsc_wq_cyc *wq, u16 pc,
173+
struct xsc_sq *sq)
174+
{
175+
struct xsc_adapter *adapter = sq->channel->adapter;
176+
struct xsc_core_device *xdev = adapter->xdev;
177+
union xsc_send_doorbell doorbell_value;
178+
int send_ds_num_log;
179+
180+
send_ds_num_log = ilog2(xdev->caps.send_ds_num);
181+
/*reverse wqe index to ds index*/
182+
doorbell_value.next_pid = pc << send_ds_num_log;
183+
doorbell_value.qp_num = sq->sqn;
184+
185+
/* Make sure that descriptors are written before
186+
* updating doorbell record and ringing the doorbell
187+
*/
188+
wmb();
189+
writel(doorbell_value.send_data, XSC_REG_ADDR(xdev, xdev->regs.tx_db));
190+
}
191+
192+
static void xsc_txwqe_complete(struct xsc_sq *sq, struct sk_buff *skb,
193+
u8 opcode, u16 ds_cnt,
194+
u8 num_wqebbs, u32 num_bytes, u8 num_dma,
195+
struct xsc_tx_wqe_info *wi)
196+
{
197+
struct xsc_wq_cyc *wq = &sq->wq;
198+
199+
wi->num_bytes = num_bytes;
200+
wi->num_dma = num_dma;
201+
wi->num_wqebbs = num_wqebbs;
202+
wi->skb = skb;
203+
204+
netdev_tx_sent_queue(sq->txq, num_bytes);
205+
206+
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
207+
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
208+
209+
sq->pc += wi->num_wqebbs;
210+
211+
if (unlikely(!xsc_wqc_has_room_for(wq, sq->cc, sq->pc, sq->stop_room)))
212+
netif_tx_stop_queue(sq->txq);
213+
214+
if (!netdev_xmit_more() || netif_xmit_stopped(sq->txq))
215+
xsc_sq_notify_hw(wq, sq->pc, sq);
216+
}
217+
218+
static uint32_t xsc_eth_xmit_frame(struct sk_buff *skb,
219+
struct xsc_sq *sq,
220+
struct xsc_tx_wqe *wqe,
221+
u16 pi)
222+
{
223+
struct xsc_core_device *xdev = sq->cq.xdev;
224+
struct xsc_send_wqe_ctrl_seg *cseg;
225+
struct xsc_wqe_data_seg *dseg;
226+
struct xsc_tx_wqe_info *wi;
227+
u16 mss, ihs, headlen;
228+
u32 num_bytes;
229+
u8 num_wqebbs;
230+
int num_dma;
231+
u16 ds_cnt;
232+
u8 opcode;
233+
234+
retry_send:
235+
/* Calc ihs and ds cnt, no writes to wqe yet
236+
* ctrl-ds, it would be reduce in ds_data_num */
237+
ds_cnt = XSC_SEND_WQEBB_CTRL_NUM_DS;
238+
239+
if (skb_is_gso(skb)) {
240+
opcode = XSC_OPCODE_RAW;
241+
mss = skb_shinfo(skb)->gso_size;
242+
ihs = xsc_tx_get_gso_ihs(sq, skb);
243+
num_bytes = skb->len +
244+
(skb_shinfo(skb)->gso_segs - 1) * ihs;
245+
} else {
246+
opcode = XSC_OPCODE_RAW;
247+
mss = 0;
248+
ihs = 0;
249+
num_bytes = skb->len;
250+
}
251+
252+
/*linear data in skb*/
253+
headlen = skb->len - skb->data_len;
254+
ds_cnt += !!headlen;
255+
ds_cnt += skb_shinfo(skb)->nr_frags;
256+
257+
/* Check packet size. */
258+
if (unlikely(mss == 0 && skb->len > sq->hw_mtu))
259+
goto err_drop;
260+
261+
num_wqebbs = DIV_ROUND_UP(ds_cnt, xdev->caps.send_ds_num);
262+
/*if ds_cnt exceed one wqe, drop it*/
263+
if (num_wqebbs != 1) {
264+
if (skb_linearize(skb))
265+
goto err_drop;
266+
goto retry_send;
267+
}
268+
269+
/* fill wqe */
270+
wi = (struct xsc_tx_wqe_info *)&sq->db.wqe_info[pi];
271+
cseg = &wqe->ctrl;
272+
dseg = &wqe->data[0];
273+
274+
if (unlikely(skb->len == 0))
275+
goto err_drop;
276+
277+
xsc_txwqe_build_csegs(sq, skb, mss, ihs, headlen,
278+
opcode, ds_cnt, skb->len, cseg);
279+
280+
/*inline header is also use dma to transport*/
281+
num_dma = xsc_txwqe_build_dsegs(sq, skb, ihs, headlen, dseg);
282+
if (unlikely(num_dma < 0))
283+
goto err_drop;
284+
285+
xsc_txwqe_complete(sq, skb, opcode, ds_cnt, num_wqebbs, num_bytes,
286+
num_dma, wi);
287+
288+
return NETDEV_TX_OK;
289+
290+
err_drop:
291+
dev_kfree_skb_any(skb);
292+
293+
return NETDEV_TX_OK;
294+
}
295+
296+
netdev_tx_t xsc_eth_xmit_start(struct sk_buff *skb, struct net_device *netdev)
297+
{
298+
struct xsc_adapter *adapter = netdev_priv(netdev);
299+
struct xsc_tx_wqe *wqe;
300+
struct xsc_sq *sq;
301+
u32 ds_num;
302+
u16 pi;
303+
304+
if (adapter->status != XSCALE_ETH_DRIVER_OK)
305+
return NETDEV_TX_BUSY;
306+
307+
sq = adapter->txq2sq[skb_get_queue_mapping(skb)];
308+
if (unlikely(!sq))
309+
return NETDEV_TX_BUSY;
310+
311+
ds_num = adapter->xdev->caps.send_ds_num;
312+
wqe = xsc_sq_fetch_wqe(sq, ds_num * XSC_SEND_WQE_DS, &pi);
313+
314+
return xsc_eth_xmit_frame(skb, sq, wqe, pi);
315+
}

0 commit comments

Comments
 (0)