123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206 |
- From e015593573b3e3f74bd8a63c05fa92902194a354 Mon Sep 17 00:00:00 2001
- From: Aleksander Jan Bajkowski <[email protected]>
- Date: Tue, 4 Jan 2022 16:11:44 +0100
- Subject: [PATCH 715/715] net: lantiq_xrx200: convert to build_skb
- We can increase the efficiency of rx path by using buffers to receive
- packets then build SKBs around them just before passing into the network
- stack. In contrast, preallocating SKBs too early reduces CPU cache
- efficiency.
- NAT Performance results on BT Home Hub 5A (kernel 5.10.89, mtu 1500):
- Down Up
- Before 577 Mbps 648 Mbps
- After 624 Mbps 695 Mbps
- Signed-off-by: Aleksander Jan Bajkowski <[email protected]>
- Signed-off-by: Jakub Kicinski <[email protected]>
- ---
- drivers/net/ethernet/lantiq_xrx200.c | 56 ++++++++++++++++++----------
- 1 file changed, 36 insertions(+), 20 deletions(-)
- --- a/drivers/net/ethernet/lantiq_xrx200.c
- +++ b/drivers/net/ethernet/lantiq_xrx200.c
- @@ -63,7 +63,11 @@ struct xrx200_chan {
-
- struct napi_struct napi;
- struct ltq_dma_channel dma;
- - struct sk_buff *skb[LTQ_DESC_NUM];
- +
- + union {
- + struct sk_buff *skb[LTQ_DESC_NUM];
- + void *rx_buff[LTQ_DESC_NUM];
- + };
-
- struct sk_buff *skb_head;
- struct sk_buff *skb_tail;
- @@ -78,6 +82,7 @@ struct xrx200_priv {
- struct xrx200_chan chan_rx;
-
- u16 rx_buf_size;
- + u16 rx_skb_size;
-
- struct net_device *net_dev;
- struct device *dev;
- @@ -115,6 +120,12 @@ static int xrx200_buffer_size(int mtu)
- return round_up(xrx200_max_frame_len(mtu), 4 * XRX200_DMA_BURST_LEN);
- }
-
- +static int xrx200_skb_size(u16 buf_size)
- +{
- + return SKB_DATA_ALIGN(buf_size + NET_SKB_PAD + NET_IP_ALIGN) +
- + SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- +}
- +
- /* drop all the packets from the DMA ring */
- static void xrx200_flush_dma(struct xrx200_chan *ch)
- {
- @@ -173,30 +184,29 @@ static int xrx200_close(struct net_devic
- return 0;
- }
-
- -static int xrx200_alloc_skb(struct xrx200_chan *ch)
- +static int xrx200_alloc_buf(struct xrx200_chan *ch, void *(*alloc)(unsigned int size))
- {
- - struct sk_buff *skb = ch->skb[ch->dma.desc];
- + void *buf = ch->rx_buff[ch->dma.desc];
- struct xrx200_priv *priv = ch->priv;
- dma_addr_t mapping;
- int ret = 0;
-
- - ch->skb[ch->dma.desc] = netdev_alloc_skb_ip_align(priv->net_dev,
- - priv->rx_buf_size);
- - if (!ch->skb[ch->dma.desc]) {
- + ch->rx_buff[ch->dma.desc] = alloc(priv->rx_skb_size);
- + if (!ch->rx_buff[ch->dma.desc]) {
- ret = -ENOMEM;
- goto skip;
- }
-
- - mapping = dma_map_single(priv->dev, ch->skb[ch->dma.desc]->data,
- + mapping = dma_map_single(priv->dev, ch->rx_buff[ch->dma.desc],
- priv->rx_buf_size, DMA_FROM_DEVICE);
- if (unlikely(dma_mapping_error(priv->dev, mapping))) {
- - dev_kfree_skb_any(ch->skb[ch->dma.desc]);
- - ch->skb[ch->dma.desc] = skb;
- + skb_free_frag(ch->rx_buff[ch->dma.desc]);
- + ch->rx_buff[ch->dma.desc] = buf;
- ret = -ENOMEM;
- goto skip;
- }
-
- - ch->dma.desc_base[ch->dma.desc].addr = mapping;
- + ch->dma.desc_base[ch->dma.desc].addr = mapping + NET_SKB_PAD + NET_IP_ALIGN;
- /* Make sure the address is written before we give it to HW */
- wmb();
- skip:
- @@ -210,13 +220,14 @@ static int xrx200_hw_receive(struct xrx2
- {
- struct xrx200_priv *priv = ch->priv;
- struct ltq_dma_desc *desc = &ch->dma.desc_base[ch->dma.desc];
- - struct sk_buff *skb = ch->skb[ch->dma.desc];
- + void *buf = ch->rx_buff[ch->dma.desc];
- u32 ctl = desc->ctl;
- int len = (ctl & LTQ_DMA_SIZE_MASK);
- struct net_device *net_dev = priv->net_dev;
- + struct sk_buff *skb;
- int ret;
-
- - ret = xrx200_alloc_skb(ch);
- + ret = xrx200_alloc_buf(ch, napi_alloc_frag);
-
- ch->dma.desc++;
- ch->dma.desc %= LTQ_DESC_NUM;
- @@ -227,19 +238,21 @@ static int xrx200_hw_receive(struct xrx2
- return ret;
- }
-
- + skb = build_skb(buf, priv->rx_skb_size);
- + skb_reserve(skb, NET_SKB_PAD);
- skb_put(skb, len);
-
- /* add buffers to skb via skb->frag_list */
- if (ctl & LTQ_DMA_SOP) {
- ch->skb_head = skb;
- ch->skb_tail = skb;
- + skb_reserve(skb, NET_IP_ALIGN);
- } else if (ch->skb_head) {
- if (ch->skb_head == ch->skb_tail)
- skb_shinfo(ch->skb_tail)->frag_list = skb;
- else
- ch->skb_tail->next = skb;
- ch->skb_tail = skb;
- - skb_reserve(ch->skb_tail, -NET_IP_ALIGN);
- ch->skb_head->len += skb->len;
- ch->skb_head->data_len += skb->len;
- ch->skb_head->truesize += skb->truesize;
- @@ -395,12 +408,13 @@ xrx200_change_mtu(struct net_device *net
- struct xrx200_chan *ch_rx = &priv->chan_rx;
- int old_mtu = net_dev->mtu;
- bool running = false;
- - struct sk_buff *skb;
- + void *buff;
- int curr_desc;
- int ret = 0;
-
- net_dev->mtu = new_mtu;
- priv->rx_buf_size = xrx200_buffer_size(new_mtu);
- + priv->rx_skb_size = xrx200_skb_size(priv->rx_buf_size);
-
- if (new_mtu <= old_mtu)
- return ret;
- @@ -416,14 +430,15 @@ xrx200_change_mtu(struct net_device *net
-
- for (ch_rx->dma.desc = 0; ch_rx->dma.desc < LTQ_DESC_NUM;
- ch_rx->dma.desc++) {
- - skb = ch_rx->skb[ch_rx->dma.desc];
- - ret = xrx200_alloc_skb(ch_rx);
- + buff = ch_rx->rx_buff[ch_rx->dma.desc];
- + ret = xrx200_alloc_buf(ch_rx, netdev_alloc_frag);
- if (ret) {
- net_dev->mtu = old_mtu;
- priv->rx_buf_size = xrx200_buffer_size(old_mtu);
- + priv->rx_skb_size = xrx200_skb_size(priv->rx_buf_size);
- break;
- }
- - dev_kfree_skb_any(skb);
- + skb_free_frag(buff);
- }
-
- ch_rx->dma.desc = curr_desc;
- @@ -476,7 +491,7 @@ static int xrx200_dma_init(struct xrx200
- ltq_dma_alloc_rx(&ch_rx->dma);
- for (ch_rx->dma.desc = 0; ch_rx->dma.desc < LTQ_DESC_NUM;
- ch_rx->dma.desc++) {
- - ret = xrx200_alloc_skb(ch_rx);
- + ret = xrx200_alloc_buf(ch_rx, netdev_alloc_frag);
- if (ret)
- goto rx_free;
- }
- @@ -511,7 +526,7 @@ rx_ring_free:
- /* free the allocated RX ring */
- for (i = 0; i < LTQ_DESC_NUM; i++) {
- if (priv->chan_rx.skb[i])
- - dev_kfree_skb_any(priv->chan_rx.skb[i]);
- + skb_free_frag(priv->chan_rx.rx_buff[i]);
- }
-
- rx_free:
- @@ -528,7 +543,7 @@ static void xrx200_hw_cleanup(struct xrx
-
- /* free the allocated RX ring */
- for (i = 0; i < LTQ_DESC_NUM; i++)
- - dev_kfree_skb_any(priv->chan_rx.skb[i]);
- + skb_free_frag(priv->chan_rx.rx_buff[i]);
- }
-
- static int xrx200_probe(struct platform_device *pdev)
- @@ -554,6 +569,7 @@ static int xrx200_probe(struct platform_
- net_dev->min_mtu = ETH_ZLEN;
- net_dev->max_mtu = XRX200_DMA_DATA_LEN - xrx200_max_frame_len(0);
- priv->rx_buf_size = xrx200_buffer_size(ETH_DATA_LEN);
- + priv->rx_skb_size = xrx200_skb_size(priv->rx_buf_size);
-
- /* load the memory ranges */
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
|