|
@@ -0,0 +1,78 @@
|
|
|
|
+From: Alexander Lobakin <[email protected]>
|
|
|
|
+Date: Fri, 15 Nov 2019 12:11:35 +0300
|
|
|
|
+Subject: [PATCH] net: core: allow fast GRO for skbs with Ethernet header in
|
|
|
|
+ head
|
|
|
|
+
|
|
|
|
+Commit 78d3fd0b7de8 ("gro: Only use skb_gro_header for completely
|
|
|
|
+non-linear packets") back in May'09 (v2.6.31-rc1) has changed the
|
|
|
|
+original condition '!skb_headlen(skb)' to
|
|
|
|
+'skb->mac_header == skb->tail' in gro_reset_offset() saying: "Since
|
|
|
|
+the drivers that need this optimisation all provide completely
|
|
|
|
+non-linear packets" (note that this condition has become the current
|
|
|
|
+'skb_mac_header(skb) == skb_tail_pointer(skb)' later with commmit
|
|
|
|
+ced14f6804a9 ("net: Correct comparisons and calculations using
|
|
|
|
+skb->tail and skb-transport_header") without any functional changes).
|
|
|
|
+
|
|
|
|
+For now, we have the following rough statistics for v5.4-rc7:
|
|
|
|
+1) napi_gro_frags: 14
|
|
|
|
+2) napi_gro_receive with skb->head containing (most of) payload: 83
|
|
|
|
+3) napi_gro_receive with skb->head containing all the headers: 20
|
|
|
|
+4) napi_gro_receive with skb->head containing only Ethernet header: 2
|
|
|
|
+
|
|
|
|
+With the current condition, fast GRO with the usage of
|
|
|
|
+NAPI_GRO_CB(skb)->frag0 is available only in the [1] case.
|
|
|
|
+Packets pushed by [2] and [3] go through the 'slow' path, but
|
|
|
|
+it's not a problem for them as they already contain all the needed
|
|
|
|
+headers in skb->head, so pskb_may_pull() only moves skb->data.
|
|
|
|
+
|
|
|
|
+The layout of skbs in the fourth [4] case at the moment of
|
|
|
|
+dev_gro_receive() is identical to skbs that have come through [1],
|
|
|
|
+as napi_frags_skb() pulls Ethernet header to skb->head. The only
|
|
|
|
+difference is that the mentioned condition is always false for them,
|
|
|
|
+because skb_put() and friends irreversibly alter the tail pointer.
|
|
|
|
+They also go through the 'slow' path, but now every single
|
|
|
|
+pskb_may_pull() in every single .gro_receive() will call the *really*
|
|
|
|
+slow __pskb_pull_tail() to pull headers to head. This significantly
|
|
|
|
+decreases the overall performance for no visible reasons.
|
|
|
|
+
|
|
|
|
+The only two users of method [4] is:
|
|
|
|
+* drivers/staging/qlge
|
|
|
|
+* drivers/net/wireless/iwlwifi (all three variants: dvm, mvm, mvm-mq)
|
|
|
|
+
|
|
|
|
+Note that in case with wireless drivers we can't use [1]
|
|
|
|
+(napi_gro_frags()) at least for now and mac80211 stack always
|
|
|
|
+performs pushes and pulls anyways, so performance hit is inavoidable.
|
|
|
|
+
|
|
|
|
+At the moment of v2.6.31 the mentioned change was necessary (that's
|
|
|
|
+why I don't add the "Fixes:" tag), but it became obsolete since
|
|
|
|
+skb_gro_mac_header() has gone in commit a50e233c50db ("net-gro:
|
|
|
|
+restore frag0 optimization"), so we can simply revert the condition
|
|
|
|
+in gro_reset_offset() to allow skbs from [4] go through the 'fast'
|
|
|
|
+path just like in case [1].
|
|
|
|
+
|
|
|
|
+This was tested on a 600 MHz MIPS CPU and a custom driver and this
|
|
|
|
+patch gave boosts up to 40 Mbps to method [4] in both directions
|
|
|
|
+comparing to net-next, which made overall performance relatively
|
|
|
|
+close to [1] (without it, [4] is the slowest).
|
|
|
|
+
|
|
|
|
+v2:
|
|
|
|
+- Add more references and explanations to commit message
|
|
|
|
+- Fix some typos ibid
|
|
|
|
+- No functional changes
|
|
|
|
+
|
|
|
|
+Signed-off-by: Alexander Lobakin <[email protected]>
|
|
|
|
+Signed-off-by: David S. Miller <[email protected]>
|
|
|
|
+---
|
|
|
|
+
|
|
|
|
+--- a/net/core/dev.c
|
|
|
|
++++ b/net/core/dev.c
|
|
|
|
+@@ -5403,8 +5403,7 @@ static void skb_gro_reset_offset(struct
|
|
|
|
+ NAPI_GRO_CB(skb)->frag0 = NULL;
|
|
|
|
+ NAPI_GRO_CB(skb)->frag0_len = 0;
|
|
|
|
+
|
|
|
|
+- if (skb_mac_header(skb) == skb_tail_pointer(skb) &&
|
|
|
|
+- pinfo->nr_frags &&
|
|
|
|
++ if (!skb_headlen(skb) && pinfo->nr_frags &&
|
|
|
|
+ !PageHighMem(skb_frag_page(frag0))) {
|
|
|
|
+ NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
|
|
|
|
+ NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int,
|