|
|
@@ -0,0 +1,267 @@
|
|
|
+From: Felix Fietkau <[email protected]>
|
|
|
+Date: Mon, 23 Mar 2015 02:42:26 +0100
|
|
|
+Subject: [PATCH] bgmac: implement scatter/gather support
|
|
|
+
|
|
|
+Always use software checksumming, since the hardware does not have any
|
|
|
+checksum offload support.
|
|
|
+This significantly improves local TCP tx performance.
|
|
|
+
|
|
|
+Signed-off-by: Felix Fietkau <[email protected]>
|
|
|
+---
|
|
|
+
|
|
|
+--- a/drivers/net/ethernet/broadcom/bgmac.c
|
|
|
++++ b/drivers/net/ethernet/broadcom/bgmac.c
|
|
|
+@@ -115,53 +115,91 @@ static void bgmac_dma_tx_enable(struct b
|
|
|
+ bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL, ctl);
|
|
|
+ }
|
|
|
+
|
|
|
++static void
|
|
|
++bgmac_dma_tx_add_buf(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
|
|
|
++ int i, int len, u32 ctl0)
|
|
|
++{
|
|
|
++ struct bgmac_slot_info *slot;
|
|
|
++ struct bgmac_dma_desc *dma_desc;
|
|
|
++ u32 ctl1;
|
|
|
++
|
|
|
++ if (i == ring->num_slots - 1)
|
|
|
++ ctl0 |= BGMAC_DESC_CTL0_EOT;
|
|
|
++
|
|
|
++ ctl1 = len & BGMAC_DESC_CTL1_LEN;
|
|
|
++
|
|
|
++ slot = &ring->slots[i];
|
|
|
++ dma_desc = &ring->cpu_base[i];
|
|
|
++ dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr));
|
|
|
++ dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr));
|
|
|
++ dma_desc->ctl0 = cpu_to_le32(ctl0);
|
|
|
++ dma_desc->ctl1 = cpu_to_le32(ctl1);
|
|
|
++}
|
|
|
++
|
|
|
+ static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
|
|
|
+ struct bgmac_dma_ring *ring,
|
|
|
+ struct sk_buff *skb)
|
|
|
+ {
|
|
|
+ struct device *dma_dev = bgmac->core->dma_dev;
|
|
|
+ struct net_device *net_dev = bgmac->net_dev;
|
|
|
+- struct bgmac_dma_desc *dma_desc;
|
|
|
+- struct bgmac_slot_info *slot;
|
|
|
+- u32 ctl0, ctl1;
|
|
|
++ struct bgmac_slot_info *slot = &ring->slots[ring->end];
|
|
|
+ int free_slots;
|
|
|
++ int nr_frags;
|
|
|
++ u32 flags;
|
|
|
++ int index = ring->end;
|
|
|
++ int i;
|
|
|
+
|
|
|
+ if (skb->len > BGMAC_DESC_CTL1_LEN) {
|
|
|
+ bgmac_err(bgmac, "Too long skb (%d)\n", skb->len);
|
|
|
+- goto err_stop_drop;
|
|
|
++ goto err_drop;
|
|
|
+ }
|
|
|
+
|
|
|
++ if (skb->ip_summed == CHECKSUM_PARTIAL)
|
|
|
++ skb_checksum_help(skb);
|
|
|
++
|
|
|
++ nr_frags = skb_shinfo(skb)->nr_frags;
|
|
|
++
|
|
|
+ if (ring->start <= ring->end)
|
|
|
+ free_slots = ring->start - ring->end + BGMAC_TX_RING_SLOTS;
|
|
|
+ else
|
|
|
+ free_slots = ring->start - ring->end;
|
|
|
+- if (free_slots == 1) {
|
|
|
++
|
|
|
++ if (free_slots <= nr_frags + 1) {
|
|
|
+ bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n");
|
|
|
+ netif_stop_queue(net_dev);
|
|
|
+ return NETDEV_TX_BUSY;
|
|
|
+ }
|
|
|
+
|
|
|
+- slot = &ring->slots[ring->end];
|
|
|
+- slot->skb = skb;
|
|
|
+- slot->dma_addr = dma_map_single(dma_dev, skb->data, skb->len,
|
|
|
++ slot->dma_addr = dma_map_single(dma_dev, skb->data, skb_headlen(skb),
|
|
|
+ DMA_TO_DEVICE);
|
|
|
+- if (dma_mapping_error(dma_dev, slot->dma_addr)) {
|
|
|
+- bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n",
|
|
|
+- ring->mmio_base);
|
|
|
+- goto err_stop_drop;
|
|
|
+- }
|
|
|
++ if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr)))
|
|
|
++ goto err_dma_head;
|
|
|
+
|
|
|
+- ctl0 = BGMAC_DESC_CTL0_IOC | BGMAC_DESC_CTL0_SOF | BGMAC_DESC_CTL0_EOF;
|
|
|
+- if (ring->end == ring->num_slots - 1)
|
|
|
+- ctl0 |= BGMAC_DESC_CTL0_EOT;
|
|
|
+- ctl1 = skb->len & BGMAC_DESC_CTL1_LEN;
|
|
|
++ flags = BGMAC_DESC_CTL0_SOF;
|
|
|
++ if (!nr_frags)
|
|
|
++ flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC;
|
|
|
++
|
|
|
++ bgmac_dma_tx_add_buf(bgmac, ring, index, skb_headlen(skb), flags);
|
|
|
++ flags = 0;
|
|
|
++
|
|
|
++ for (i = 0; i < nr_frags; i++) {
|
|
|
++ struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
|
|
|
++ int len = skb_frag_size(frag);
|
|
|
++
|
|
|
++ index = (index + 1) % BGMAC_TX_RING_SLOTS;
|
|
|
++ slot = &ring->slots[index];
|
|
|
++ slot->dma_addr = skb_frag_dma_map(dma_dev, frag, 0,
|
|
|
++ len, DMA_TO_DEVICE);
|
|
|
++ if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr)))
|
|
|
++ goto err_dma;
|
|
|
+
|
|
|
+- dma_desc = ring->cpu_base;
|
|
|
+- dma_desc += ring->end;
|
|
|
+- dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr));
|
|
|
+- dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr));
|
|
|
+- dma_desc->ctl0 = cpu_to_le32(ctl0);
|
|
|
+- dma_desc->ctl1 = cpu_to_le32(ctl1);
|
|
|
++ if (i == nr_frags - 1)
|
|
|
++ flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC;
|
|
|
++
|
|
|
++ bgmac_dma_tx_add_buf(bgmac, ring, index, len, flags);
|
|
|
++ }
|
|
|
++
|
|
|
++ slot->skb = skb;
|
|
|
+
|
|
|
+ netdev_sent_queue(net_dev, skb->len);
|
|
|
+
|
|
|
+@@ -170,20 +208,35 @@ static netdev_tx_t bgmac_dma_tx_add(stru
|
|
|
+ /* Increase ring->end to point empty slot. We tell hardware the first
|
|
|
+ * slot it should *not* read.
|
|
|
+ */
|
|
|
+- if (++ring->end >= BGMAC_TX_RING_SLOTS)
|
|
|
+- ring->end = 0;
|
|
|
++ ring->end = (index + 1) % BGMAC_TX_RING_SLOTS;
|
|
|
+ bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_INDEX,
|
|
|
+ ring->index_base +
|
|
|
+ ring->end * sizeof(struct bgmac_dma_desc));
|
|
|
+
|
|
|
+- /* Always keep one slot free to allow detecting bugged calls. */
|
|
|
+- if (--free_slots == 1)
|
|
|
++ free_slots -= nr_frags + 1;
|
|
|
++ if (free_slots < 8)
|
|
|
+ netif_stop_queue(net_dev);
|
|
|
+
|
|
|
+ return NETDEV_TX_OK;
|
|
|
+
|
|
|
+-err_stop_drop:
|
|
|
+- netif_stop_queue(net_dev);
|
|
|
++err_dma:
|
|
|
++ dma_unmap_single(dma_dev, slot->dma_addr, skb_headlen(skb),
|
|
|
++ DMA_TO_DEVICE);
|
|
|
++
|
|
|
++ while (i > 0) {
|
|
|
++ int index = (ring->end + i) % BGMAC_TX_RING_SLOTS;
|
|
|
++ struct bgmac_slot_info *slot = &ring->slots[index];
|
|
|
++ u32 ctl1 = le32_to_cpu(ring->cpu_base[index].ctl1);
|
|
|
++ int len = ctl1 & BGMAC_DESC_CTL1_LEN;
|
|
|
++
|
|
|
++ dma_unmap_page(dma_dev, slot->dma_addr, len, DMA_TO_DEVICE);
|
|
|
++ }
|
|
|
++
|
|
|
++err_dma_head:
|
|
|
++ bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n",
|
|
|
++ ring->mmio_base);
|
|
|
++
|
|
|
++err_drop:
|
|
|
+ dev_kfree_skb(skb);
|
|
|
+ return NETDEV_TX_OK;
|
|
|
+ }
|
|
|
+@@ -205,32 +258,45 @@ static void bgmac_dma_tx_free(struct bgm
|
|
|
+
|
|
|
+ while (ring->start != empty_slot) {
|
|
|
+ struct bgmac_slot_info *slot = &ring->slots[ring->start];
|
|
|
++ u32 ctl1 = le32_to_cpu(ring->cpu_base[ring->start].ctl1);
|
|
|
++ int len = ctl1 & BGMAC_DESC_CTL1_LEN;
|
|
|
+
|
|
|
+- if (slot->skb) {
|
|
|
++ if (!slot->dma_addr) {
|
|
|
++ bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
|
|
|
++ ring->start, ring->end);
|
|
|
++ goto next;
|
|
|
++ }
|
|
|
++
|
|
|
++ if (ctl1 & BGMAC_DESC_CTL0_SOF)
|
|
|
+ /* Unmap no longer used buffer */
|
|
|
+- dma_unmap_single(dma_dev, slot->dma_addr,
|
|
|
+- slot->skb->len, DMA_TO_DEVICE);
|
|
|
+- slot->dma_addr = 0;
|
|
|
++ dma_unmap_single(dma_dev, slot->dma_addr, len,
|
|
|
++ DMA_TO_DEVICE);
|
|
|
++ else
|
|
|
++ dma_unmap_page(dma_dev, slot->dma_addr, len,
|
|
|
++ DMA_TO_DEVICE);
|
|
|
+
|
|
|
++ if (slot->skb) {
|
|
|
+ bytes_compl += slot->skb->len;
|
|
|
+ pkts_compl++;
|
|
|
+
|
|
|
+ /* Free memory! :) */
|
|
|
+ dev_kfree_skb(slot->skb);
|
|
|
+ slot->skb = NULL;
|
|
|
+- } else {
|
|
|
+- bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
|
|
|
+- ring->start, ring->end);
|
|
|
+ }
|
|
|
+
|
|
|
++next:
|
|
|
++ slot->dma_addr = 0;
|
|
|
+ if (++ring->start >= BGMAC_TX_RING_SLOTS)
|
|
|
+ ring->start = 0;
|
|
|
+ freed = true;
|
|
|
+ }
|
|
|
+
|
|
|
++ if (!pkts_compl)
|
|
|
++ return;
|
|
|
++
|
|
|
+ netdev_completed_queue(bgmac->net_dev, pkts_compl, bytes_compl);
|
|
|
+
|
|
|
+- if (freed && netif_queue_stopped(bgmac->net_dev))
|
|
|
++ if (netif_queue_stopped(bgmac->net_dev))
|
|
|
+ netif_wake_queue(bgmac->net_dev);
|
|
|
+ }
|
|
|
+
|
|
|
+@@ -439,17 +505,25 @@ static void bgmac_dma_tx_ring_free(struc
|
|
|
+ struct bgmac_dma_ring *ring)
|
|
|
+ {
|
|
|
+ struct device *dma_dev = bgmac->core->dma_dev;
|
|
|
++ struct bgmac_dma_desc *dma_desc = ring->cpu_base;
|
|
|
+ struct bgmac_slot_info *slot;
|
|
|
+ int i;
|
|
|
+
|
|
|
+ for (i = 0; i < ring->num_slots; i++) {
|
|
|
++ int len = dma_desc[i].ctl1 & BGMAC_DESC_CTL1_LEN;
|
|
|
++
|
|
|
+ slot = &ring->slots[i];
|
|
|
+- if (slot->skb) {
|
|
|
+- if (slot->dma_addr)
|
|
|
+- dma_unmap_single(dma_dev, slot->dma_addr,
|
|
|
+- slot->skb->len, DMA_TO_DEVICE);
|
|
|
+- dev_kfree_skb(slot->skb);
|
|
|
+- }
|
|
|
++ dev_kfree_skb(slot->skb);
|
|
|
++
|
|
|
++ if (!slot->dma_addr)
|
|
|
++ continue;
|
|
|
++
|
|
|
++ if (slot->skb)
|
|
|
++ dma_unmap_single(dma_dev, slot->dma_addr,
|
|
|
++ len, DMA_TO_DEVICE);
|
|
|
++ else
|
|
|
++ dma_unmap_page(dma_dev, slot->dma_addr,
|
|
|
++ len, DMA_TO_DEVICE);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+@@ -1583,6 +1657,10 @@ static int bgmac_probe(struct bcma_devic
|
|
|
+ goto err_dma_free;
|
|
|
+ }
|
|
|
+
|
|
|
++ net_dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
|
|
|
++ net_dev->hw_features = net_dev->features;
|
|
|
++ net_dev->vlan_features = net_dev->features;
|
|
|
++
|
|
|
+ err = register_netdev(bgmac->net_dev);
|
|
|
+ if (err) {
|
|
|
+ bgmac_err(bgmac, "Cannot register net device\n");
|