Browse Source

ramips: improve ethernet driver performance with GRO/TSO

GRO stores packets as fraglist. If they are routed back to the ethernet
device, they need to be re-segmented if the driver does not support
sending fraglists.
Add the missing support for that, along with a missing feature flag that
allows full routed GRO->TSO offload.
Considerably reduces CPU utilization for routing

Signed-off-by: Felix Fietkau <[email protected]>
Felix Fietkau 7 years ago
parent
commit
9a4253b81f

+ 101 - 66
target/linux/ramips/files-4.14/drivers/net/ethernet/mediatek/mtk_eth_soc.c

@@ -307,24 +307,20 @@ no_rx_mem:
 
 static void fe_txd_unmap(struct device *dev, struct fe_tx_buf *tx_buf)
 {
-	if (tx_buf->flags & FE_TX_FLAGS_SINGLE0) {
-		dma_unmap_single(dev,
-				 dma_unmap_addr(tx_buf, dma_addr0),
-				 dma_unmap_len(tx_buf, dma_len0),
-				 DMA_TO_DEVICE);
-	} else if (tx_buf->flags & FE_TX_FLAGS_PAGE0) {
+	if (dma_unmap_len(tx_buf, dma_len0))
 		dma_unmap_page(dev,
 			       dma_unmap_addr(tx_buf, dma_addr0),
 			       dma_unmap_len(tx_buf, dma_len0),
 			       DMA_TO_DEVICE);
-	}
-	if (tx_buf->flags & FE_TX_FLAGS_PAGE1)
+
+	if (dma_unmap_len(tx_buf, dma_len1))
 		dma_unmap_page(dev,
 			       dma_unmap_addr(tx_buf, dma_addr1),
 			       dma_unmap_len(tx_buf, dma_len1),
 			       DMA_TO_DEVICE);
 
-	tx_buf->flags = 0;
+	dma_unmap_len_set(tx_buf, dma_addr0, 0);
+	dma_unmap_len_set(tx_buf, dma_addr1, 0);
 	if (tx_buf->skb && (tx_buf->skb != (struct sk_buff *)DMA_DUMMY_DESC))
 		dev_kfree_skb_any(tx_buf->skb);
 	tx_buf->skb = NULL;
@@ -559,6 +555,54 @@ static inline u32 fe_empty_txd(struct fe_tx_ring *ring)
 			 (ring->tx_ring_size - 1)));
 }
 
+static int fe_tx_dma_map_page(struct device *dev, struct fe_tx_buf *tx_buf,
+			      struct fe_tx_dma *txd, int idx,
+			      struct page *page, size_t offset, size_t size)
+{
+	dma_addr_t mapped_addr;
+
+	mapped_addr = dma_map_page(dev, page, offset, size, DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(dev, mapped_addr)))
+		return -EIO;
+
+	if (idx & 1) {
+		txd->txd3 = mapped_addr;
+		txd->txd2 |= TX_DMA_PLEN1(size);
+		dma_unmap_addr_set(tx_buf, dma_addr1, mapped_addr);
+		dma_unmap_len_set(tx_buf, dma_len1, size);
+	} else {
+		tx_buf->skb = (struct sk_buff *)DMA_DUMMY_DESC;
+		txd->txd1 = mapped_addr;
+		txd->txd2 = TX_DMA_PLEN0(size);
+		dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr);
+		dma_unmap_len_set(tx_buf, dma_len0, size);
+	}
+	return 0;
+}
+
+static int fe_tx_dma_map_skb(struct device *dev, struct fe_tx_buf *tx_buf,
+			     struct fe_tx_dma *txd, int idx,
+			     struct sk_buff *skb)
+{
+	struct page *page = virt_to_page(skb->data);
+	size_t offset = offset_in_page(skb->data);
+	size_t size = skb_headlen(skb);
+
+	return fe_tx_dma_map_page(dev, tx_buf, txd, idx, page, offset, size);
+}
+
+static inline struct sk_buff *
+fe_next_frag(struct sk_buff *head, struct sk_buff *skb)
+{
+	if (skb != head)
+		return skb->next;
+
+	if (skb_has_frag_list(skb))
+		return skb_shinfo(skb)->frag_list;
+
+	return NULL;
+}
+
 static int fe_tx_map_dma(struct sk_buff *skb, struct net_device *dev,
 			 int tx_num, struct fe_tx_ring *ring)
 {
@@ -566,7 +610,7 @@ static int fe_tx_map_dma(struct sk_buff *skb, struct net_device *dev,
 	struct skb_frag_struct *frag;
 	struct fe_tx_dma txd, *ptxd;
 	struct fe_tx_buf *tx_buf;
-	dma_addr_t mapped_addr;
+	struct sk_buff *head = skb;
 	unsigned int nr_frags;
 	u32 def_txd4;
 	int i, j, k, frag_size, frag_map_size, offset;
@@ -574,7 +618,6 @@ static int fe_tx_map_dma(struct sk_buff *skb, struct net_device *dev,
 	tx_buf = &ring->tx_buf[ring->tx_next_idx];
 	memset(tx_buf, 0, sizeof(*tx_buf));
 	memset(&txd, 0, sizeof(txd));
-	nr_frags = skb_shinfo(skb)->nr_frags;
 
 	/* init tx descriptor */
 	if (priv->soc->tx_dma)
@@ -613,82 +656,68 @@ static int fe_tx_map_dma(struct sk_buff *skb, struct net_device *dev,
 		}
 	}
 
-	mapped_addr = dma_map_single(&dev->dev, skb->data,
-				     skb_headlen(skb), DMA_TO_DEVICE);
-	if (unlikely(dma_mapping_error(&dev->dev, mapped_addr)))
-		goto err_out;
-	txd.txd1 = mapped_addr;
-	txd.txd2 = TX_DMA_PLEN0(skb_headlen(skb));
+	k = 0;
+	j = ring->tx_next_idx;
 
-	tx_buf->flags |= FE_TX_FLAGS_SINGLE0;
-	dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr);
-	dma_unmap_len_set(tx_buf, dma_len0, skb_headlen(skb));
+next_frag:
+	if (skb_headlen(skb)) {
+		if (fe_tx_dma_map_skb(&dev->dev, tx_buf, &txd, k++, skb))
+			goto err_dma;
+	}
 
 	/* TX SG offload */
-	j = ring->tx_next_idx;
-	k = 0;
+	nr_frags = skb_shinfo(skb)->nr_frags;
 	for (i = 0; i < nr_frags; i++) {
-		offset = 0;
+		struct page *page;
+
 		frag = &skb_shinfo(skb)->frags[i];
 		frag_size = skb_frag_size(frag);
+		offset = frag->page_offset;
+		page = skb_frag_page(frag);
 
 		while (frag_size > 0) {
 			frag_map_size = min(frag_size, TX_DMA_BUF_LEN);
-			mapped_addr = skb_frag_dma_map(&dev->dev, frag, offset,
-						       frag_map_size,
-						       DMA_TO_DEVICE);
-			if (unlikely(dma_mapping_error(&dev->dev, mapped_addr)))
-				goto err_dma;
-
-			if (k & 0x1) {
-				j = NEXT_TX_DESP_IDX(j);
-				txd.txd1 = mapped_addr;
-				txd.txd2 = TX_DMA_PLEN0(frag_map_size);
+			if (!(k & 0x1)) {
+				fe_set_txd(&txd, &ring->tx_dma[j]);
+				memset(&txd, 0, sizeof(txd));
 				txd.txd4 = def_txd4;
-
+				j = NEXT_TX_DESP_IDX(j);
 				tx_buf = &ring->tx_buf[j];
-				memset(tx_buf, 0, sizeof(*tx_buf));
-
-				tx_buf->flags |= FE_TX_FLAGS_PAGE0;
-				dma_unmap_addr_set(tx_buf, dma_addr0,
-						   mapped_addr);
-				dma_unmap_len_set(tx_buf, dma_len0,
-						  frag_map_size);
-			} else {
-				txd.txd3 = mapped_addr;
-				txd.txd2 |= TX_DMA_PLEN1(frag_map_size);
-
-				tx_buf->skb = (struct sk_buff *)DMA_DUMMY_DESC;
-				tx_buf->flags |= FE_TX_FLAGS_PAGE1;
-				dma_unmap_addr_set(tx_buf, dma_addr1,
-						   mapped_addr);
-				dma_unmap_len_set(tx_buf, dma_len1,
-						  frag_map_size);
-
-				if (!((i == (nr_frags - 1)) &&
-				      (frag_map_size == frag_size))) {
-					fe_set_txd(&txd, &ring->tx_dma[j]);
-					memset(&txd, 0, sizeof(txd));
-				}
 			}
+
+			if (fe_tx_dma_map_page(&dev->dev, tx_buf, &txd, k++,
+					       page, offset, frag_map_size))
+				goto err_dma;
+
 			frag_size -= frag_map_size;
 			offset += frag_map_size;
-			k++;
 		}
 	}
 
+	skb = fe_next_frag(head, skb);
+	if (skb) {
+		if (!(k & 0x1)) {
+			fe_set_txd(&txd, &ring->tx_dma[j]);
+			memset(&txd, 0, sizeof(txd));
+			txd.txd4 = def_txd4;
+			j = NEXT_TX_DESP_IDX(j);
+			tx_buf = &ring->tx_buf[j];
+		}
+		goto next_frag;
+	}
+
 	/* set last segment */
 	if (k & 0x1)
-		txd.txd2 |= TX_DMA_LS1;
-	else
 		txd.txd2 |= TX_DMA_LS0;
+	else
+		txd.txd2 |= TX_DMA_LS1;
 	fe_set_txd(&txd, &ring->tx_dma[j]);
 
 	/* store skb to cleanup */
-	tx_buf->skb = skb;
+	tx_buf->skb = head;
 
-	netdev_sent_queue(dev, skb->len);
-	skb_tx_timestamp(skb);
+	netdev_sent_queue(dev, head->len);
+	skb_tx_timestamp(head);
 
 	ring->tx_next_idx = NEXT_TX_DESP_IDX(j);
 	/* make sure that all changes to the dma ring are flushed before we
@@ -702,7 +731,7 @@ static int fe_tx_map_dma(struct sk_buff *skb, struct net_device *dev,
 			netif_wake_queue(dev);
 	}
 
-	if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)) || !skb->xmit_more)
+	if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)) || !head->xmit_more)
 		fe_reg_w32(ring->tx_next_idx, FE_REG_TX_CTX_IDX0);
 
 	return 0;
@@ -762,10 +791,12 @@ static inline int fe_skb_padto(struct sk_buff *skb, struct fe_priv *priv)
 
 static inline int fe_cal_txd_req(struct sk_buff *skb)
 {
-	int i, nfrags;
+	struct sk_buff *head = skb;
+	int i, nfrags = 0;
 	struct skb_frag_struct *frag;
 
-	nfrags = 1;
+next_frag:
+	nfrags++;
 	if (skb_is_gso(skb)) {
 		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 			frag = &skb_shinfo(skb)->frags[i];
@@ -775,6 +806,10 @@ static inline int fe_cal_txd_req(struct sk_buff *skb)
 		nfrags += skb_shinfo(skb)->nr_frags;
 	}
 
+	skb = fe_next_frag(head, skb);
+	if (skb)
+		goto next_frag;
+
 	return DIV_ROUND_UP(nfrags, 2);
 }
 

+ 2 - 9
target/linux/ramips/files-4.14/drivers/net/ethernet/mediatek/mtk_eth_soc.h

@@ -435,19 +435,12 @@ struct fe_hw_stats {
 #undef _FE
 };
 
-enum fe_tx_flags {
-	FE_TX_FLAGS_SINGLE0	= 0x01,
-	FE_TX_FLAGS_PAGE0	= 0x02,
-	FE_TX_FLAGS_PAGE1	= 0x04,
-};
-
 struct fe_tx_buf {
 	struct sk_buff *skb;
-	u32 flags;
 	DEFINE_DMA_UNMAP_ADDR(dma_addr0);
-	DEFINE_DMA_UNMAP_LEN(dma_len0);
 	DEFINE_DMA_UNMAP_ADDR(dma_addr1);
-	DEFINE_DMA_UNMAP_LEN(dma_len1);
+	u16 dma_len0;
+	u16 dma_len1;
 };
 
 struct fe_tx_ring {

+ 2 - 1
target/linux/ramips/files-4.14/drivers/net/ethernet/mediatek/soc_mt7621.c

@@ -143,7 +143,8 @@ static void mt7621_init_data(struct fe_soc_data *data,
 
 	netdev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
 		NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG | NETIF_F_TSO |
-		NETIF_F_TSO6 | NETIF_F_IPV6_CSUM;
+		NETIF_F_TSO6 | NETIF_F_IPV6_CSUM | NETIF_F_FRAGLIST |
+		NETIF_F_TSO_MANGLEID;
 }
 
 static void mt7621_set_mac(struct fe_priv *priv, unsigned char *mac)