|
|
@@ -0,0 +1,80 @@
|
|
|
+From 98e09386c0ef4dfd48af7ba60ff908f0d525cdee Mon Sep 17 00:00:00 2001
|
|
|
+From: Eric Dumazet <[email protected]>
|
|
|
+Date: Wed, 13 Nov 2013 14:32:54 +0000
|
|
|
+Subject: tcp: tsq: restore minimal amount of queueing
|
|
|
+
|
|
|
+After commit c9eeec26e32e ("tcp: TSQ can use a dynamic limit"), several
|
|
|
+users reported throughput regressions, notably on mvneta and wifi
|
|
|
+adapters.
|
|
|
+
|
|
|
+802.11 AMPDU requires a fair amount of queueing to be effective.
|
|
|
+
|
|
|
+This patch partially reverts the change done in tcp_write_xmit()
|
|
|
+so that the minimal amount is sysctl_tcp_limit_output_bytes.
|
|
|
+
|
|
|
+It also remove the use of this sysctl while building skb stored
|
|
|
+in write queue, as TSO autosizing does the right thing anyway.
|
|
|
+
|
|
|
+Users with well behaving NICS and correct qdisc (like sch_fq),
|
|
|
+can then lower the default sysctl_tcp_limit_output_bytes value from
|
|
|
+128KB to 8KB.
|
|
|
+
|
|
|
+This new usage of sysctl_tcp_limit_output_bytes permits each driver
|
|
|
+authors to check how their driver performs when/if the value is set
|
|
|
+to a minimum of 4KB.
|
|
|
+
|
|
|
+Normally, line rate for a single TCP flow should be possible,
|
|
|
+but some drivers rely on timers to perform TX completion and
|
|
|
+too long TX completion delays prevent reaching full throughput.
|
|
|
+
|
|
|
+Fixes: c9eeec26e32e ("tcp: TSQ can use a dynamic limit")
|
|
|
+Signed-off-by: Eric Dumazet <[email protected]>
|
|
|
+Reported-by: Sujith Manoharan <[email protected]>
|
|
|
+Reported-by: Arnaud Ebalard <[email protected]>
|
|
|
+Tested-by: Sujith Manoharan <[email protected]>
|
|
|
+Cc: Felix Fietkau <[email protected]>
|
|
|
+Signed-off-by: David S. Miller <[email protected]>
|
|
|
+---
|
|
|
+--- a/Documentation/networking/ip-sysctl.txt
|
|
|
++++ b/Documentation/networking/ip-sysctl.txt
|
|
|
+@@ -571,9 +571,6 @@ tcp_limit_output_bytes - INTEGER
|
|
|
+ typical pfifo_fast qdiscs.
|
|
|
+ tcp_limit_output_bytes limits the number of bytes on qdisc
|
|
|
+ or device to reduce artificial RTT/cwnd and reduce bufferbloat.
|
|
|
+- Note: For GSO/TSO enabled flows, we try to have at least two
|
|
|
+- packets in flight. Reducing tcp_limit_output_bytes might also
|
|
|
+- reduce the size of individual GSO packet (64KB being the max)
|
|
|
+ Default: 131072
|
|
|
+
|
|
|
+ tcp_challenge_ack_limit - INTEGER
|
|
|
+--- a/net/ipv4/tcp.c
|
|
|
++++ b/net/ipv4/tcp.c
|
|
|
+@@ -807,12 +807,6 @@ static unsigned int tcp_xmit_size_goal(s
|
|
|
+ xmit_size_goal = min_t(u32, gso_size,
|
|
|
+ sk->sk_gso_max_size - 1 - hlen);
|
|
|
+
|
|
|
+- /* TSQ : try to have at least two segments in flight
|
|
|
+- * (one in NIC TX ring, another in Qdisc)
|
|
|
+- */
|
|
|
+- xmit_size_goal = min_t(u32, xmit_size_goal,
|
|
|
+- sysctl_tcp_limit_output_bytes >> 1);
|
|
|
+-
|
|
|
+ xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
|
|
|
+
|
|
|
+ /* We try hard to avoid divides here */
|
|
|
+--- a/net/ipv4/tcp_output.c
|
|
|
++++ b/net/ipv4/tcp_output.c
|
|
|
+@@ -1866,8 +1866,12 @@ static bool tcp_write_xmit(struct sock *
|
|
|
+ * - better RTT estimation and ACK scheduling
|
|
|
+ * - faster recovery
|
|
|
+ * - high rates
|
|
|
++ * Alas, some drivers / subsystems require a fair amount
|
|
|
++ * of queued bytes to ensure line rate.
|
|
|
++ * One example is wifi aggregation (802.11 AMPDU)
|
|
|
+ */
|
|
|
+- limit = max(skb->truesize, sk->sk_pacing_rate >> 10);
|
|
|
++ limit = max_t(unsigned int, sysctl_tcp_limit_output_bytes,
|
|
|
++ sk->sk_pacing_rate >> 10);
|
|
|
+
|
|
|
+ if (atomic_read(&sk->sk_wmem_alloc) > limit) {
|
|
|
+ set_bit(TSQ_THROTTLED, &tp->tsq_flags);
|