Add IPv6 dhcp.lan.max_preferred_lifetime and dhcp.lan.max_valid_lifet…

…ime LuCI options Refresh LRNG and BBRv3 patches Refresh IPv6 fixed patches Signed-off-by: Nicholas Sun <nicholas-sun@outlook.com>
nicholas-opensource · Dec 27, 2023 · 85f4456 · 85f4456
1 parent 9ffb75b
commit 85f4456
Show file tree

Hide file tree

Showing 64 changed files with 794 additions and 558 deletions.
diff --git a/PATCH/BBRv3/kernel/009-0001-net-tcp_rate-consolidate-inflight-tracking-approache.patch b/PATCH/BBRv3/kernel/009-0001-net-tcp_rate-consolidate-inflight-tracking-approache.patch
@@ -0,0 +1,95 @@
+From 0384efbd7b715430d9321a9645565a097e450a27 Mon Sep 17 00:00:00 2001
+From: Yuchung Cheng <ycheng@google.com>
+Date: Tue, 27 Mar 2018 18:33:29 -0700
+Subject: [PATCH 02/21] net-tcp_rate: consolidate inflight tracking approaches
+ in TCP
+
+In order to track CE marks per rate sample (one round trip), we'll
+need to snap the starting tcp delivered_ce acount in the packet
+meta header (tcp_skb_cb). But there's not enough space.
+
+Good news is that the "last_in_flight" in the header, used by
+NV congestion control, is almost equivalent as "delivered". In
+fact "delivered" is better by accounting out-of-order packets
+additionally.  Therefore we can remove it to make room for the
+CE tracking.
+
+This would make delayed ACK detection slightly less accurate but the
+impact is negligible since it's not used for any critical control.
+
+Effort: net-tcp_rate
+Origin-9xx-SHA1: ddcd46ec85d5f1c4454258af0c54b3254c0d64a7
+Change-Id: I1a184aad6d101c981ac7f2f275aa9417ff856910
+---
+ include/net/tcp.h     |  5 ++---
+ net/ipv4/tcp_input.c  | 11 +++++------
+ net/ipv4/tcp_output.c |  2 --
+ 3 files changed, 7 insertions(+), 11 deletions(-)
+
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -872,9 +872,8 @@ struct tcp_skb_cb {
+ 	union {
+ 		struct {
+ 			/* There is space for up to 24 bytes */
+-			__u32 in_flight:30,/* Bytes in flight at transmit */
+-			      is_app_limited:1, /* cwnd not fully used? */
+-			      unused:1;
++			__u32 is_app_limited:1, /* cwnd not fully used? */
++			      unused:31;
+ 			/* pkts S/ACKed so far upon tx of skb, incl retrans: */
+ 			__u32 delivered;
+ 			/* start of send pipeline phase */
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -3255,7 +3255,6 @@ static int tcp_clean_rtx_queue(struct so
+ 	long seq_rtt_us = -1L;
+ 	long ca_rtt_us = -1L;
+ 	u32 pkts_acked = 0;
+-	u32 last_in_flight = 0;
+ 	bool rtt_update;
+ 	int flag = 0;
+
+@@ -3291,7 +3290,6 @@ static int tcp_clean_rtx_queue(struct so
+ 			if (!first_ackt)
+ 				first_ackt = last_ackt;
+
+-			last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
+ 			if (before(start_seq, reord))
+ 				reord = start_seq;
+ 			if (!after(scb->end_seq, tp->high_seq))
+@@ -3357,8 +3355,8 @@ static int tcp_clean_rtx_queue(struct so
+ 		seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt);
+ 		ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt);
+
+-		if (pkts_acked == 1 && last_in_flight < tp->mss_cache &&
+-		    last_in_flight && !prior_sacked && fully_acked &&
++		if (pkts_acked == 1 && fully_acked && !prior_sacked &&
++		    (tp->snd_una - prior_snd_una) < tp->mss_cache &&
+ 		    sack->rate->prior_delivered + 1 == tp->delivered &&
+ 		    !(flag & (FLAG_CA_ALERT | FLAG_SYN_ACKED))) {
+ 			/* Conservatively mark a delayed ACK. It's typically
+@@ -3415,9 +3413,10 @@ static int tcp_clean_rtx_queue(struct so
+
+ 	if (icsk->icsk_ca_ops->pkts_acked) {
+ 		struct ack_sample sample = { .pkts_acked = pkts_acked,
+-					     .rtt_us = sack->rate->rtt_us,
+-					     .in_flight = last_in_flight };
++					     .rtt_us = sack->rate->rtt_us };
+
++		sample.in_flight = tp->mss_cache *
++			(tp->delivered - sack->rate->prior_delivered);
+ 		icsk->icsk_ca_ops->pkts_acked(sk, &sample);
+ 	}
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -1253,8 +1253,6 @@ static int __tcp_transmit_skb(struct soc
+ 	tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
+ 	skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
+ 	if (clone_it) {
+-		TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
+-			- tp->snd_una;
+ 		oskb = skb;
+
+ 		tcp_skb_tsorted_save(oskb) {
diff --git a/PATCH/BBRv3/kernel/009-0002-net-tcp_rate-account-for-CE-marks-in-rate-sample.patch b/PATCH/BBRv3/kernel/009-0002-net-tcp_rate-account-for-CE-marks-in-rate-sample.patch
@@ -0,0 +1,70 @@
+From d97bc46a9d0a65cb72a226ef79f5c815d6b2659c Mon Sep 17 00:00:00 2001
+From: Yuchung Cheng <ycheng@google.com>
+Date: Tue, 27 Mar 2018 18:01:46 -0700
+Subject: [PATCH 03/21] net-tcp_rate: account for CE marks in rate sample
+
+This patch counts number of packets delivered have CE mark in the
+rate sample, using similar approach of delivery accounting.
+
+Effort: net-tcp_rate
+Origin-9xx-SHA1: 710644db434c3da335a7c8b72207a671ccbb5cf8
+Change-Id: I0968fb33fe19b5c774e8c3afd2685558a6ec8710
+---
+ include/net/tcp.h   | 6 +++++-
+ net/ipv4/tcp_rate.c | 6 ++++++
+ 2 files changed, 11 insertions(+), 1 deletion(-)
+
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -871,9 +871,11 @@ struct tcp_skb_cb {
+ 	__u32		ack_seq;	/* Sequence number ACK'd	*/
+ 	union {
+ 		struct {
++#define TCPCB_DELIVERED_CE_MASK ((1U<<20) - 1)
+ 			/* There is space for up to 24 bytes */
+ 			__u32 is_app_limited:1, /* cwnd not fully used? */
+-			      unused:31;
++			      delivered_ce:20,
++			      unused:11;
+ 			/* pkts S/ACKed so far upon tx of skb, incl retrans: */
+ 			__u32 delivered;
+ 			/* start of send pipeline phase */
+@@ -1025,7 +1027,9 @@ struct ack_sample {
+ struct rate_sample {
+ 	u64  prior_mstamp; /* starting timestamp for interval */
+ 	u32  prior_delivered;	/* tp->delivered at "prior_mstamp" */
++	u32  prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */
+ 	s32  delivered;		/* number of packets delivered over interval */
++	s32  delivered_ce;	/* number of packets delivered w/ CE marks*/
+ 	long interval_us;	/* time for tp->delivered to incr "delivered" */
+ 	u32 snd_interval_us;	/* snd interval for delivered packets */
+ 	u32 rcv_interval_us;	/* rcv interval for delivered packets */
+--- a/net/ipv4/tcp_rate.c
++++ b/net/ipv4/tcp_rate.c
+@@ -65,6 +65,7 @@ void tcp_rate_skb_sent(struct sock *sk,
+ 	TCP_SKB_CB(skb)->tx.first_tx_mstamp	= tp->first_tx_mstamp;
+ 	TCP_SKB_CB(skb)->tx.delivered_mstamp	= tp->delivered_mstamp;
+ 	TCP_SKB_CB(skb)->tx.delivered		= tp->delivered;
++	TCP_SKB_CB(skb)->tx.delivered_ce	= tp->delivered_ce;
+ 	TCP_SKB_CB(skb)->tx.is_app_limited	= tp->app_limited ? 1 : 0;
+ }
+
+@@ -90,6 +91,7 @@ void tcp_rate_skb_delivered(struct sock
+ 	if (!rs->prior_delivered ||
+ 	    tcp_skb_sent_after(tx_tstamp, tp->first_tx_mstamp,
+ 			       scb->end_seq, rs->last_end_seq)) {
++		rs->prior_delivered_ce  = scb->tx.delivered_ce;
+ 		rs->prior_delivered  = scb->tx.delivered;
+ 		rs->prior_mstamp     = scb->tx.delivered_mstamp;
+ 		rs->is_app_limited   = scb->tx.is_app_limited;
+@@ -143,6 +145,10 @@ void tcp_rate_gen(struct sock *sk, u32 d
+ 	}
+ 	rs->delivered   = tp->delivered - rs->prior_delivered;
+
++	rs->delivered_ce = tp->delivered_ce - rs->prior_delivered_ce;
++	/* delivered_ce occupies less than 32 bits in the skb control block */
++	rs->delivered_ce &= TCPCB_DELIVERED_CE_MASK;
++
+ 	/* Model sending data and receiving ACKs as separate pipeline phases
+ 	 * for a window. Usually the ACK phase is longer, but with ACK
+ 	 * compression the send phase can be longer. To be safe we use the
diff --git a/PATCH/BBRv3/kernel/009-0003-gso-net-allow-gso_max_size-to-exceed-65536.patch b/PATCH/BBRv3/kernel/009-0003-gso-net-allow-gso_max_size-to-exceed-65536.patch
@@ -0,0 +1,93 @@
+From 9fffe7e05bb0da5aef700cc246a71fc33672c81e Mon Sep 17 00:00:00 2001
+From: Alexander Duyck <alexanderduyck@fb.com>
+Date: Fri, 13 May 2022 11:33:57 -0700
+Subject: [PATCH 16/21] net: allow gso_max_size to exceed 65536
+
+The code for gso_max_size was added originally to allow for debugging and
+workaround of buggy devices that couldn't support TSO with blocks 64K in
+size. The original reason for limiting it to 64K was because that was the
+existing limits of IPv4 and non-jumbogram IPv6 length fields.
+
+With the addition of Big TCP we can remove this limit and allow the value
+to potentially go up to UINT_MAX and instead be limited by the tso_max_size
+value.
+
+So in order to support this we need to go through and clean up the
+remaining users of the gso_max_size value so that the values will cap at
+64K for non-TCPv6 flows. In addition we can clean up the GSO_MAX_SIZE value
+so that 64K becomes GSO_LEGACY_MAX_SIZE and UINT_MAX will now be the upper
+limit for GSO_MAX_SIZE.
+
+v6: (edumazet) fixed a compile error if CONFIG_IPV6=n,
+               in a new sk_trim_gso_size() helper.
+               netif_set_tso_max_size() caps the requested TSO size
+               with GSO_MAX_SIZE.
+
+Signed-off-by: Alexander Duyck <alexanderduyck@fb.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ include/linux/netdevice.h                  | 4 +++-
+ net/bpf/test_run.c                         | 2 +-
+ net/core/dev.c                             | 2 +-
+ net/ipv4/tcp_output.c                      | 2 +-
+ 10 files changed, 16 insertions(+), 10 deletions(-)
+
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -2251,7 +2251,9 @@ struct net_device {
+ 	const struct rtnl_link_ops *rtnl_link_ops;
+
+ 	/* for setting kernel sock attribute on TCP connection setup */
+-#define GSO_MAX_SIZE		65536
++#define GSO_LEGACY_MAX_SIZE	65536u
++#define GSO_MAX_SIZE		UINT_MAX
++
+ 	unsigned int		gso_max_size;
+ #define GSO_MAX_SEGS		65535
+ 	u16			gso_max_segs;
+--- a/net/bpf/test_run.c
++++ b/net/bpf/test_run.c
+@@ -524,7 +524,7 @@ static int convert___skb_to_skb(struct s
+ 		cb->pkt_len = skb->len;
+ 	} else {
+ 		if (__skb->wire_len < skb->len ||
+-		    __skb->wire_len > GSO_MAX_SIZE)
++		    __skb->wire_len > GSO_LEGACY_MAX_SIZE)
+ 			return -EINVAL;
+ 		cb->pkt_len = __skb->wire_len;
+ 	}
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -10858,7 +10858,7 @@ struct net_device *alloc_netdev_mqs(int
+
+ 	dev_net_set(dev, &init_net);
+
+-	dev->gso_max_size = GSO_MAX_SIZE;
++	dev->gso_max_size = GSO_LEGACY_MAX_SIZE;
+ 	dev->gso_max_segs = GSO_MAX_SEGS;
+ 	dev->upper_level = 1;
+ 	dev->lower_level = 1;
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -1548,7 +1548,7 @@ int tcp_fragment(struct sock *sk, enum t
+ 	 * SO_SNDBUF values.
+ 	 * Also allow first and last skb in retransmit queue to be split.
+ 	 */
+-	limit = sk->sk_sndbuf + 2 * SKB_TRUESIZE(GSO_MAX_SIZE);
++	limit = sk->sk_sndbuf + 2 * SKB_TRUESIZE(GSO_LEGACY_MAX_SIZE);
+ 	if (unlikely((sk->sk_wmem_queued >> 1) > limit &&
+ 		     tcp_queue != TCP_FRAG_IN_WRITE_QUEUE &&
+ 		     skb != tcp_rtx_queue_head(sk) &&
+--- a/net/sctp/output.c
++++ b/net/sctp/output.c
+@@ -134,7 +134,8 @@ void sctp_packet_config(struct sctp_pack
+ 		dst_hold(tp->dst);
+ 		sk_setup_caps(sk, tp->dst);
+ 	}
+-	packet->max_size = sk_can_gso(sk) ? tp->dst->dev->gso_max_size
++	packet->max_size = sk_can_gso(sk) ? min(READ_ONCE(tp->dst->dev->gso_max_size),
++						GSO_LEGACY_MAX_SIZE)
+ 					  : asoc->pathmtu;
+ 	rcu_read_unlock();
+ }
diff --git a/...-add-sysctls-for-TCP-PLB-parameters.patch → ...-add-sysctls-for-TCP-PLB-parameters.patch b/...-add-sysctls-for-TCP-PLB-parameters.patch → ...-add-sysctls-for-TCP-PLB-parameters.patch
@@ -1,7 +1,7 @@
-From 67a575e5256a44bef46a8db3e1f0b9c18398467e Mon Sep 17 00:00:00 2001
+From 945918a4e7fdac837f3589e51296683857909403 Mon Sep 17 00:00:00 2001
 From: Mubashir Adnan Qureshi <mubashirq@google.com>
 Date: Wed, 26 Oct 2022 13:51:11 +0000
-Subject: [PATCH 17/21] tcp: add sysctls for TCP PLB parameters
+Subject: [PATCH 1/2] tcp: add sysctls for TCP PLB parameters
 
 PLB (Protective Load Balancing) is a host based mechanism for load
 balancing across switch links. It leverages congestion signals(e.g. ECN)
@@ -24,10 +24,10 @@ Signed-off-by: David S. Miller <davem@davemloft.net>
 Signed-off-by: Alexandre Frade <kernel@xanmod.org>
 ---
  Documentation/networking/ip-sysctl.rst | 75 ++++++++++++++++++++++++++
- include/net/netns/ipv4.h               |  6 +++
- net/ipv4/sysctl_net_ipv4.c             | 44 +++++++++++++++
+ include/net/netns/ipv4.h               |  5 ++
+ net/ipv4/sysctl_net_ipv4.c             | 43 +++++++++++++++
  net/ipv4/tcp_ipv4.c                    |  8 +++
- 4 files changed, 133 insertions(+)
+ 4 files changed, 131 insertions(+)
 
 --- a/Documentation/networking/ip-sysctl.rst
 +++ b/Documentation/networking/ip-sysctl.rst
@@ -115,32 +115,30 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
 
 --- a/include/net/netns/ipv4.h
 +++ b/include/net/netns/ipv4.h
-@@ -177,6 +177,12 @@ struct netns_ipv4 {
+@@ -176,6 +176,11 @@ struct netns_ipv4 {
+ 	unsigned int sysctl_tcp_fastopen_blackhole_timeout;
  	atomic_t tfo_active_disable_times;
  	unsigned long tfo_active_disable_stamp;
-
 +	u8 sysctl_tcp_plb_enabled;
 +	u8 sysctl_tcp_plb_idle_rehash_rounds;
 +	u8 sysctl_tcp_plb_rehash_rounds;
 +	u8 sysctl_tcp_plb_suspend_rto_sec;
 +	int sysctl_tcp_plb_cong_thresh;
-+
+ 
  	int sysctl_udp_wmem_min;
  	int sysctl_udp_rmem_min;
-
 --- a/net/ipv4/sysctl_net_ipv4.c
 +++ b/net/ipv4/sysctl_net_ipv4.c
-@@ -54,6 +54,9 @@ static int one_day_secs = 24 * 3600;
+@@ -53,6 +53,8 @@ static u32 u32_max_div_HZ = UINT_MAX / H
+ static int one_day_secs = 24 * 3600;
  static u32 fib_multipath_hash_fields_all_mask __maybe_unused =
  	FIB_MULTIPATH_HASH_FIELD_ALL_MASK;
-
 +static int tcp_plb_max_rounds = 31;
 +static int tcp_plb_max_cong_thresh = 256;
-+
+ 
  /* obsolete */
  static int sysctl_tcp_low_latency __read_mostly;
-
-@@ -1362,6 +1365,47 @@ static struct ctl_table ipv4_net_table[]
+@@ -1362,6 +1364,47 @@ static struct ctl_table ipv4_net_table[]
  		.extra1		= SYSCTL_ZERO,
  		.extra2		= &two,
  	},
@@ -190,7 +188,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
 
 --- a/net/ipv4/tcp_ipv4.c
 +++ b/net/ipv4/tcp_ipv4.c
-@@ -3205,6 +3205,14 @@ static int __net_init tcp_sk_init(struct
+@@ -3206,6 +3206,14 @@ static int __net_init tcp_sk_init(struct
  	net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 0;
  	atomic_set(&net->ipv4.tfo_active_disable_times, 0);
 

diff --git a/...8-tcp-add-PLB-functionality-for-TCP.patch → ...2-tcp-add-PLB-functionality-for-TCP.patch b/...8-tcp-add-PLB-functionality-for-TCP.patch → ...2-tcp-add-PLB-functionality-for-TCP.patch
@@ -1,7 +1,7 @@
-From 45c18820a190e8f028cbbe76ef99cbe08b913e2e Mon Sep 17 00:00:00 2001
+From ad14b4ef5ffb6597e6b69b986badd182c98d1497 Mon Sep 17 00:00:00 2001
 From: Mubashir Adnan Qureshi <mubashirq@google.com>
 Date: Wed, 26 Oct 2022 13:51:12 +0000
-Subject: [PATCH 18/21] tcp: add PLB functionality for TCP
+Subject: [PATCH 2/2] tcp: add PLB functionality for TCP
 
 Congestion control algorithms track PLB state and cause the connection
 to trigger a path change when either of the 2 conditions is satisfied:
@@ -34,7 +34,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
 
 --- a/include/net/tcp.h
 +++ b/include/net/tcp.h
-@@ -2173,6 +2173,34 @@ extern void tcp_rack_advance(struct tcp_
+@@ -2120,6 +2120,34 @@ extern void tcp_rack_advance(struct tcp_
  extern void tcp_rack_reo_timeout(struct sock *sk);
  extern void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs);
 
@@ -82,7 +82,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
  	     inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \
 --- a/net/ipv4/tcp_ipv4.c
 +++ b/net/ipv4/tcp_ipv4.c
-@@ -3211,7 +3211,7 @@ static int __net_init tcp_sk_init(struct
+@@ -3212,7 +3212,7 @@ static int __net_init tcp_sk_init(struct
  	net->ipv4.sysctl_tcp_plb_rehash_rounds = 12;
  	net->ipv4.sysctl_tcp_plb_suspend_rto_sec = 60;
  	/* Default congestion threshold for PLB to mark a round is 50% */

diff --git a/PATCH/BBRv3/kernel/010-bbr3-0001-net-tcp_bbr-broaden-app-limited-rate-sample-detectio.patch b/PATCH/BBRv3/kernel/010-bbr3-0001-net-tcp_bbr-broaden-app-limited-rate-sample-detectio.patch
@@ -1,7 +1,7 @@
-From ee5316263c5ba74198729ad83aeef714a11b767b Mon Sep 17 00:00:00 2001
+From 247736e1b64c6acb6a0e2040f630bb9e8dd8312c Mon Sep 17 00:00:00 2001
 From: Neal Cardwell <ncardwell@google.com>
 Date: Tue, 11 Jun 2019 12:26:55 -0400
-Subject: [PATCH 01/21] net-tcp_bbr: broaden app-limited rate sample detection
+Subject: [PATCH 01/18] net-tcp_bbr: broaden app-limited rate sample detection
 
 This commit is a bug fix for the Linux TCP app-limited
 (application-limited) logic that is used for collecting rate
@@ -24,14 +24,15 @@ can't know whether inflight was fully using the old cwnd.
 
 Origin-9xx-SHA1: 3fe9b53291e018407780fb8c356adb5666722cbc
 Change-Id: I37221506f5166877c2b110753d39bb0757985e68
+Signed-off-by: Alexandre Frade <kernel@xanmod.org>
 ---
  net/ipv4/tcp_input.c | 1 +
  net/ipv4/tcp_timer.c | 1 +
  2 files changed, 2 insertions(+)
 
 --- a/net/ipv4/tcp_input.c
 +++ b/net/ipv4/tcp_input.c
-@@ -3811,6 +3811,7 @@ static int tcp_ack(struct sock *sk, cons
+@@ -3822,6 +3822,7 @@ static int tcp_ack(struct sock *sk, cons
 
  	prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
  	rs.prior_in_flight = tcp_packets_in_flight(tp);
@@ -41,7 +42,7 @@ Change-Id: I37221506f5166877c2b110753d39bb0757985e68
  	 * is in window.
 --- a/net/ipv4/tcp_timer.c
 +++ b/net/ipv4/tcp_timer.c
-@@ -611,6 +611,7 @@ void tcp_write_timer_handler(struct sock
+@@ -629,6 +629,7 @@ void tcp_write_timer_handler(struct sock
  		goto out;
  	}