Skip to content

Commit

Permalink
ravb: minimize TX data copying
Browse files Browse the repository at this point in the history
Renesas Ethernet AVB controller requires that all data are aligned on 4-byte
boundary.  While it's  easily achievable for  the RX  data with  the help of
skb_reserve() (we even align on 128-byte boundary as recommended by the manual),
we  can't  do the same with the TX data, and it always comes  unaligned from
the networking core. Originally we solved it an easy way, copying all packet
to  a  preallocated  aligned buffer; however, it's enough to copy only up to
3 first bytes from each packet, doing the transfer using 2 TX descriptors
instead of just 1. Here's an implementation of the new  TX algorithm that
significantly reduces the driver's memory requirements.

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Sergei Shtylyov authored and davem330 committed Jul 27, 2015
1 parent c22995c commit 2f45d19
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 45 deletions.
5 changes: 4 additions & 1 deletion drivers/net/ethernet/renesas/ravb.h
Original file line number Diff line number Diff line change
Expand Up @@ -658,6 +658,8 @@ struct ravb_desc {
__le32 dptr; /* Descriptor pointer */
};

#define DPTR_ALIGN 4 /* Required descriptor pointer alignment */

enum DIE_DT {
/* Frame data */
DT_FMID = 0x40,
Expand Down Expand Up @@ -739,6 +741,7 @@ enum RAVB_QUEUE {
#define RX_QUEUE_OFFSET 4
#define NUM_RX_QUEUE 2
#define NUM_TX_QUEUE 2
#define NUM_TX_DESC 2 /* TX descriptors per packet */

struct ravb_tstamp_skb {
struct list_head list;
Expand Down Expand Up @@ -777,9 +780,9 @@ struct ravb_private {
dma_addr_t tx_desc_dma[NUM_TX_QUEUE];
struct ravb_ex_rx_desc *rx_ring[NUM_RX_QUEUE];
struct ravb_tx_desc *tx_ring[NUM_TX_QUEUE];
void *tx_align[NUM_TX_QUEUE];
struct sk_buff **rx_skb[NUM_RX_QUEUE];
struct sk_buff **tx_skb[NUM_TX_QUEUE];
void **tx_buffers[NUM_TX_QUEUE];
u32 rx_over_errors;
u32 rx_fifo_errors;
struct net_device_stats stats[NUM_RX_QUEUE];
Expand Down
104 changes: 60 additions & 44 deletions drivers/net/ethernet/renesas/ravb_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -195,12 +195,8 @@ static void ravb_ring_free(struct net_device *ndev, int q)
priv->tx_skb[q] = NULL;

/* Free aligned TX buffers */
if (priv->tx_buffers[q]) {
for (i = 0; i < priv->num_tx_ring[q]; i++)
kfree(priv->tx_buffers[q][i]);
}
kfree(priv->tx_buffers[q]);
priv->tx_buffers[q] = NULL;
kfree(priv->tx_align[q]);
priv->tx_align[q] = NULL;

if (priv->rx_ring[q]) {
ring_size = sizeof(struct ravb_ex_rx_desc) *
Expand All @@ -212,7 +208,7 @@ static void ravb_ring_free(struct net_device *ndev, int q)

if (priv->tx_ring[q]) {
ring_size = sizeof(struct ravb_tx_desc) *
(priv->num_tx_ring[q] + 1);
(priv->num_tx_ring[q] * NUM_TX_DESC + 1);
dma_free_coherent(NULL, ring_size, priv->tx_ring[q],
priv->tx_desc_dma[q]);
priv->tx_ring[q] = NULL;
Expand All @@ -227,7 +223,8 @@ static void ravb_ring_format(struct net_device *ndev, int q)
struct ravb_tx_desc *tx_desc;
struct ravb_desc *desc;
int rx_ring_size = sizeof(*rx_desc) * priv->num_rx_ring[q];
int tx_ring_size = sizeof(*tx_desc) * priv->num_tx_ring[q];
int tx_ring_size = sizeof(*tx_desc) * priv->num_tx_ring[q] *
NUM_TX_DESC;
dma_addr_t dma_addr;
int i;

Expand Down Expand Up @@ -260,11 +257,12 @@ static void ravb_ring_format(struct net_device *ndev, int q)

memset(priv->tx_ring[q], 0, tx_ring_size);
/* Build TX ring buffer */
for (i = 0; i < priv->num_tx_ring[q]; i++) {
tx_desc = &priv->tx_ring[q][i];
for (i = 0, tx_desc = priv->tx_ring[q]; i < priv->num_tx_ring[q];
i++, tx_desc++) {
tx_desc->die_dt = DT_EEMPTY;
tx_desc++;
tx_desc->die_dt = DT_EEMPTY;
}
tx_desc = &priv->tx_ring[q][i];
tx_desc->dptr = cpu_to_le32((u32)priv->tx_desc_dma[q]);
tx_desc->die_dt = DT_LINKFIX; /* type */

Expand All @@ -285,7 +283,6 @@ static int ravb_ring_init(struct net_device *ndev, int q)
struct ravb_private *priv = netdev_priv(ndev);
struct sk_buff *skb;
int ring_size;
void *buffer;
int i;

/* Allocate RX and TX skb rings */
Expand All @@ -305,19 +302,11 @@ static int ravb_ring_init(struct net_device *ndev, int q)
}

/* Allocate rings for the aligned buffers */
priv->tx_buffers[q] = kcalloc(priv->num_tx_ring[q],
sizeof(*priv->tx_buffers[q]), GFP_KERNEL);
if (!priv->tx_buffers[q])
priv->tx_align[q] = kmalloc(DPTR_ALIGN * priv->num_tx_ring[q] +
DPTR_ALIGN - 1, GFP_KERNEL);
if (!priv->tx_align[q])
goto error;

for (i = 0; i < priv->num_tx_ring[q]; i++) {
buffer = kmalloc(PKT_BUF_SZ + RAVB_ALIGN - 1, GFP_KERNEL);
if (!buffer)
goto error;
/* Aligned TX buffer */
priv->tx_buffers[q][i] = buffer;
}

/* Allocate all RX descriptors. */
ring_size = sizeof(struct ravb_ex_rx_desc) * (priv->num_rx_ring[q] + 1);
priv->rx_ring[q] = dma_alloc_coherent(NULL, ring_size,
Expand All @@ -329,7 +318,8 @@ static int ravb_ring_init(struct net_device *ndev, int q)
priv->dirty_rx[q] = 0;

/* Allocate all TX descriptors. */
ring_size = sizeof(struct ravb_tx_desc) * (priv->num_tx_ring[q] + 1);
ring_size = sizeof(struct ravb_tx_desc) *
(priv->num_tx_ring[q] * NUM_TX_DESC + 1);
priv->tx_ring[q] = dma_alloc_coherent(NULL, ring_size,
&priv->tx_desc_dma[q],
GFP_KERNEL);
Expand Down Expand Up @@ -443,22 +433,27 @@ static int ravb_tx_free(struct net_device *ndev, int q)
u32 size;

for (; priv->cur_tx[q] - priv->dirty_tx[q] > 0; priv->dirty_tx[q]++) {
entry = priv->dirty_tx[q] % priv->num_tx_ring[q];
entry = priv->dirty_tx[q] % (priv->num_tx_ring[q] *
NUM_TX_DESC);
desc = &priv->tx_ring[q][entry];
if (desc->die_dt != DT_FEMPTY)
break;
/* Descriptor type must be checked before all other reads */
dma_rmb();
size = le16_to_cpu(desc->ds_tagl) & TX_DS;
/* Free the original skb. */
if (priv->tx_skb[q][entry]) {
if (priv->tx_skb[q][entry / NUM_TX_DESC]) {
dma_unmap_single(&ndev->dev, le32_to_cpu(desc->dptr),
size, DMA_TO_DEVICE);
dev_kfree_skb_any(priv->tx_skb[q][entry]);
priv->tx_skb[q][entry] = NULL;
/* Last packet descriptor? */
if (entry % NUM_TX_DESC == NUM_TX_DESC - 1) {
entry /= NUM_TX_DESC;
dev_kfree_skb_any(priv->tx_skb[q][entry]);
priv->tx_skb[q][entry] = NULL;
stats->tx_packets++;
}
free_num++;
}
stats->tx_packets++;
stats->tx_bytes += size;
desc->die_dt = DT_EEMPTY;
}
Expand Down Expand Up @@ -1284,37 +1279,53 @@ static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev)
u32 dma_addr;
void *buffer;
u32 entry;
u32 len;

spin_lock_irqsave(&priv->lock, flags);
if (priv->cur_tx[q] - priv->dirty_tx[q] >= priv->num_tx_ring[q]) {
if (priv->cur_tx[q] - priv->dirty_tx[q] > (priv->num_tx_ring[q] - 1) *
NUM_TX_DESC) {
netif_err(priv, tx_queued, ndev,
"still transmitting with the full ring!\n");
netif_stop_subqueue(ndev, q);
spin_unlock_irqrestore(&priv->lock, flags);
return NETDEV_TX_BUSY;
}
entry = priv->cur_tx[q] % priv->num_tx_ring[q];
priv->tx_skb[q][entry] = skb;
entry = priv->cur_tx[q] % (priv->num_tx_ring[q] * NUM_TX_DESC);
priv->tx_skb[q][entry / NUM_TX_DESC] = skb;

if (skb_put_padto(skb, ETH_ZLEN))
goto drop;

buffer = PTR_ALIGN(priv->tx_buffers[q][entry], RAVB_ALIGN);
memcpy(buffer, skb->data, skb->len);
desc = &priv->tx_ring[q][entry];
desc->ds_tagl = cpu_to_le16(skb->len);
dma_addr = dma_map_single(&ndev->dev, buffer, skb->len, DMA_TO_DEVICE);
buffer = PTR_ALIGN(priv->tx_align[q], DPTR_ALIGN) +
entry / NUM_TX_DESC * DPTR_ALIGN;
len = PTR_ALIGN(skb->data, DPTR_ALIGN) - skb->data;
memcpy(buffer, skb->data, len);
dma_addr = dma_map_single(&ndev->dev, buffer, len, DMA_TO_DEVICE);
if (dma_mapping_error(&ndev->dev, dma_addr))
goto drop;

desc = &priv->tx_ring[q][entry];
desc->ds_tagl = cpu_to_le16(len);
desc->dptr = cpu_to_le32(dma_addr);

buffer = skb->data + len;
len = skb->len - len;
dma_addr = dma_map_single(&ndev->dev, buffer, len, DMA_TO_DEVICE);
if (dma_mapping_error(&ndev->dev, dma_addr))
goto unmap;

desc++;
desc->ds_tagl = cpu_to_le16(len);
desc->dptr = cpu_to_le32(dma_addr);

/* TX timestamp required */
if (q == RAVB_NC) {
ts_skb = kmalloc(sizeof(*ts_skb), GFP_ATOMIC);
if (!ts_skb) {
dma_unmap_single(&ndev->dev, dma_addr, skb->len,
desc--;
dma_unmap_single(&ndev->dev, dma_addr, len,
DMA_TO_DEVICE);
goto drop;
goto unmap;
}
ts_skb->skb = skb;
ts_skb->tag = priv->ts_skb_tag++;
Expand All @@ -1330,23 +1341,28 @@ static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev)

/* Descriptor type must be set after all the above writes */
dma_wmb();
desc->die_dt = DT_FSINGLE;
desc->die_dt = DT_FEND;
desc--;
desc->die_dt = DT_FSTART;

ravb_write(ndev, ravb_read(ndev, TCCR) | (TCCR_TSRQ0 << q), TCCR);

priv->cur_tx[q]++;
if (priv->cur_tx[q] - priv->dirty_tx[q] >= priv->num_tx_ring[q] &&
!ravb_tx_free(ndev, q))
priv->cur_tx[q] += NUM_TX_DESC;
if (priv->cur_tx[q] - priv->dirty_tx[q] >
(priv->num_tx_ring[q] - 1) * NUM_TX_DESC && !ravb_tx_free(ndev, q))
netif_stop_subqueue(ndev, q);

exit:
mmiowb();
spin_unlock_irqrestore(&priv->lock, flags);
return NETDEV_TX_OK;

unmap:
dma_unmap_single(&ndev->dev, le32_to_cpu(desc->dptr),
le16_to_cpu(desc->ds_tagl), DMA_TO_DEVICE);
drop:
dev_kfree_skb_any(skb);
priv->tx_skb[q][entry] = NULL;
priv->tx_skb[q][entry / NUM_TX_DESC] = NULL;
goto exit;
}

Expand Down

0 comments on commit 2f45d19

Please sign in to comment.