Skip to content

Commit

Permalink
Add IPv6 dhcp.lan.max_preferred_lifetime and dhcp.lan.max_valid_lifet…
Browse files Browse the repository at this point in the history
…ime LuCI options

Refresh LRNG and BBRv3 patches
Refresh IPv6 fixed patches

Signed-off-by: Nicholas Sun <nicholas-sun@outlook.com>
  • Loading branch information
nicholas-opensource committed Dec 27, 2023
1 parent 9ffb75b commit 85f4456
Show file tree
Hide file tree
Showing 64 changed files with 794 additions and 558 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
From 0384efbd7b715430d9321a9645565a097e450a27 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Tue, 27 Mar 2018 18:33:29 -0700
Subject: [PATCH 02/21] net-tcp_rate: consolidate inflight tracking approaches
in TCP

In order to track CE marks per rate sample (one round trip), we'll
need to snap the starting tcp delivered_ce acount in the packet
meta header (tcp_skb_cb). But there's not enough space.

Good news is that the "last_in_flight" in the header, used by
NV congestion control, is almost equivalent as "delivered". In
fact "delivered" is better by accounting out-of-order packets
additionally. Therefore we can remove it to make room for the
CE tracking.

This would make delayed ACK detection slightly less accurate but the
impact is negligible since it's not used for any critical control.

Effort: net-tcp_rate
Origin-9xx-SHA1: ddcd46ec85d5f1c4454258af0c54b3254c0d64a7
Change-Id: I1a184aad6d101c981ac7f2f275aa9417ff856910
---
include/net/tcp.h | 5 ++---
net/ipv4/tcp_input.c | 11 +++++------
net/ipv4/tcp_output.c | 2 --
3 files changed, 7 insertions(+), 11 deletions(-)

--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -872,9 +872,8 @@ struct tcp_skb_cb {
union {
struct {
/* There is space for up to 24 bytes */
- __u32 in_flight:30,/* Bytes in flight at transmit */
- is_app_limited:1, /* cwnd not fully used? */
- unused:1;
+ __u32 is_app_limited:1, /* cwnd not fully used? */
+ unused:31;
/* pkts S/ACKed so far upon tx of skb, incl retrans: */
__u32 delivered;
/* start of send pipeline phase */
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3255,7 +3255,6 @@ static int tcp_clean_rtx_queue(struct so
long seq_rtt_us = -1L;
long ca_rtt_us = -1L;
u32 pkts_acked = 0;
- u32 last_in_flight = 0;
bool rtt_update;
int flag = 0;

@@ -3291,7 +3290,6 @@ static int tcp_clean_rtx_queue(struct so
if (!first_ackt)
first_ackt = last_ackt;

- last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
if (before(start_seq, reord))
reord = start_seq;
if (!after(scb->end_seq, tp->high_seq))
@@ -3357,8 +3355,8 @@ static int tcp_clean_rtx_queue(struct so
seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt);
ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt);

- if (pkts_acked == 1 && last_in_flight < tp->mss_cache &&
- last_in_flight && !prior_sacked && fully_acked &&
+ if (pkts_acked == 1 && fully_acked && !prior_sacked &&
+ (tp->snd_una - prior_snd_una) < tp->mss_cache &&
sack->rate->prior_delivered + 1 == tp->delivered &&
!(flag & (FLAG_CA_ALERT | FLAG_SYN_ACKED))) {
/* Conservatively mark a delayed ACK. It's typically
@@ -3415,9 +3413,10 @@ static int tcp_clean_rtx_queue(struct so

if (icsk->icsk_ca_ops->pkts_acked) {
struct ack_sample sample = { .pkts_acked = pkts_acked,
- .rtt_us = sack->rate->rtt_us,
- .in_flight = last_in_flight };
+ .rtt_us = sack->rate->rtt_us };

+ sample.in_flight = tp->mss_cache *
+ (tp->delivered - sack->rate->prior_delivered);
icsk->icsk_ca_ops->pkts_acked(sk, &sample);
}

--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1253,8 +1253,6 @@ static int __tcp_transmit_skb(struct soc
tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
if (clone_it) {
- TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
- - tp->snd_una;
oskb = skb;

tcp_skb_tsorted_save(oskb) {
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
From d97bc46a9d0a65cb72a226ef79f5c815d6b2659c Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Tue, 27 Mar 2018 18:01:46 -0700
Subject: [PATCH 03/21] net-tcp_rate: account for CE marks in rate sample

This patch counts number of packets delivered have CE mark in the
rate sample, using similar approach of delivery accounting.

Effort: net-tcp_rate
Origin-9xx-SHA1: 710644db434c3da335a7c8b72207a671ccbb5cf8
Change-Id: I0968fb33fe19b5c774e8c3afd2685558a6ec8710
---
include/net/tcp.h | 6 +++++-
net/ipv4/tcp_rate.c | 6 ++++++
2 files changed, 11 insertions(+), 1 deletion(-)

--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -871,9 +871,11 @@ struct tcp_skb_cb {
__u32 ack_seq; /* Sequence number ACK'd */
union {
struct {
+#define TCPCB_DELIVERED_CE_MASK ((1U<<20) - 1)
/* There is space for up to 24 bytes */
__u32 is_app_limited:1, /* cwnd not fully used? */
- unused:31;
+ delivered_ce:20,
+ unused:11;
/* pkts S/ACKed so far upon tx of skb, incl retrans: */
__u32 delivered;
/* start of send pipeline phase */
@@ -1025,7 +1027,9 @@ struct ack_sample {
struct rate_sample {
u64 prior_mstamp; /* starting timestamp for interval */
u32 prior_delivered; /* tp->delivered at "prior_mstamp" */
+ u32 prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */
s32 delivered; /* number of packets delivered over interval */
+ s32 delivered_ce; /* number of packets delivered w/ CE marks*/
long interval_us; /* time for tp->delivered to incr "delivered" */
u32 snd_interval_us; /* snd interval for delivered packets */
u32 rcv_interval_us; /* rcv interval for delivered packets */
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -65,6 +65,7 @@ void tcp_rate_skb_sent(struct sock *sk,
TCP_SKB_CB(skb)->tx.first_tx_mstamp = tp->first_tx_mstamp;
TCP_SKB_CB(skb)->tx.delivered_mstamp = tp->delivered_mstamp;
TCP_SKB_CB(skb)->tx.delivered = tp->delivered;
+ TCP_SKB_CB(skb)->tx.delivered_ce = tp->delivered_ce;
TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0;
}

@@ -90,6 +91,7 @@ void tcp_rate_skb_delivered(struct sock
if (!rs->prior_delivered ||
tcp_skb_sent_after(tx_tstamp, tp->first_tx_mstamp,
scb->end_seq, rs->last_end_seq)) {
+ rs->prior_delivered_ce = scb->tx.delivered_ce;
rs->prior_delivered = scb->tx.delivered;
rs->prior_mstamp = scb->tx.delivered_mstamp;
rs->is_app_limited = scb->tx.is_app_limited;
@@ -143,6 +145,10 @@ void tcp_rate_gen(struct sock *sk, u32 d
}
rs->delivered = tp->delivered - rs->prior_delivered;

+ rs->delivered_ce = tp->delivered_ce - rs->prior_delivered_ce;
+ /* delivered_ce occupies less than 32 bits in the skb control block */
+ rs->delivered_ce &= TCPCB_DELIVERED_CE_MASK;
+
/* Model sending data and receiving ACKs as separate pipeline phases
* for a window. Usually the ACK phase is longer, but with ACK
* compression the send phase can be longer. To be safe we use the
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
From 9fffe7e05bb0da5aef700cc246a71fc33672c81e Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexanderduyck@fb.com>
Date: Fri, 13 May 2022 11:33:57 -0700
Subject: [PATCH 16/21] net: allow gso_max_size to exceed 65536

The code for gso_max_size was added originally to allow for debugging and
workaround of buggy devices that couldn't support TSO with blocks 64K in
size. The original reason for limiting it to 64K was because that was the
existing limits of IPv4 and non-jumbogram IPv6 length fields.

With the addition of Big TCP we can remove this limit and allow the value
to potentially go up to UINT_MAX and instead be limited by the tso_max_size
value.

So in order to support this we need to go through and clean up the
remaining users of the gso_max_size value so that the values will cap at
64K for non-TCPv6 flows. In addition we can clean up the GSO_MAX_SIZE value
so that 64K becomes GSO_LEGACY_MAX_SIZE and UINT_MAX will now be the upper
limit for GSO_MAX_SIZE.

v6: (edumazet) fixed a compile error if CONFIG_IPV6=n,
in a new sk_trim_gso_size() helper.
netif_set_tso_max_size() caps the requested TSO size
with GSO_MAX_SIZE.

Signed-off-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
include/linux/netdevice.h | 4 +++-
net/bpf/test_run.c | 2 +-
net/core/dev.c | 2 +-
net/ipv4/tcp_output.c | 2 +-
10 files changed, 16 insertions(+), 10 deletions(-)

--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2251,7 +2251,9 @@ struct net_device {
const struct rtnl_link_ops *rtnl_link_ops;

/* for setting kernel sock attribute on TCP connection setup */
-#define GSO_MAX_SIZE 65536
+#define GSO_LEGACY_MAX_SIZE 65536u
+#define GSO_MAX_SIZE UINT_MAX
+
unsigned int gso_max_size;
#define GSO_MAX_SEGS 65535
u16 gso_max_segs;
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -524,7 +524,7 @@ static int convert___skb_to_skb(struct s
cb->pkt_len = skb->len;
} else {
if (__skb->wire_len < skb->len ||
- __skb->wire_len > GSO_MAX_SIZE)
+ __skb->wire_len > GSO_LEGACY_MAX_SIZE)
return -EINVAL;
cb->pkt_len = __skb->wire_len;
}
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10858,7 +10858,7 @@ struct net_device *alloc_netdev_mqs(int

dev_net_set(dev, &init_net);

- dev->gso_max_size = GSO_MAX_SIZE;
+ dev->gso_max_size = GSO_LEGACY_MAX_SIZE;
dev->gso_max_segs = GSO_MAX_SEGS;
dev->upper_level = 1;
dev->lower_level = 1;
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1548,7 +1548,7 @@ int tcp_fragment(struct sock *sk, enum t
* SO_SNDBUF values.
* Also allow first and last skb in retransmit queue to be split.
*/
- limit = sk->sk_sndbuf + 2 * SKB_TRUESIZE(GSO_MAX_SIZE);
+ limit = sk->sk_sndbuf + 2 * SKB_TRUESIZE(GSO_LEGACY_MAX_SIZE);
if (unlikely((sk->sk_wmem_queued >> 1) > limit &&
tcp_queue != TCP_FRAG_IN_WRITE_QUEUE &&
skb != tcp_rtx_queue_head(sk) &&
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -134,7 +134,8 @@ void sctp_packet_config(struct sctp_pack
dst_hold(tp->dst);
sk_setup_caps(sk, tp->dst);
}
- packet->max_size = sk_can_gso(sk) ? tp->dst->dev->gso_max_size
+ packet->max_size = sk_can_gso(sk) ? min(READ_ONCE(tp->dst->dev->gso_max_size),
+ GSO_LEGACY_MAX_SIZE)
: asoc->pathmtu;
rcu_read_unlock();
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
From 67a575e5256a44bef46a8db3e1f0b9c18398467e Mon Sep 17 00:00:00 2001
From 945918a4e7fdac837f3589e51296683857909403 Mon Sep 17 00:00:00 2001
From: Mubashir Adnan Qureshi <mubashirq@google.com>
Date: Wed, 26 Oct 2022 13:51:11 +0000
Subject: [PATCH 17/21] tcp: add sysctls for TCP PLB parameters
Subject: [PATCH 1/2] tcp: add sysctls for TCP PLB parameters

PLB (Protective Load Balancing) is a host based mechanism for load
balancing across switch links. It leverages congestion signals(e.g. ECN)
Expand All @@ -24,10 +24,10 @@ Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
Documentation/networking/ip-sysctl.rst | 75 ++++++++++++++++++++++++++
include/net/netns/ipv4.h | 6 +++
net/ipv4/sysctl_net_ipv4.c | 44 +++++++++++++++
include/net/netns/ipv4.h | 5 ++
net/ipv4/sysctl_net_ipv4.c | 43 +++++++++++++++
net/ipv4/tcp_ipv4.c | 8 +++
4 files changed, 133 insertions(+)
4 files changed, 131 insertions(+)

--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
Expand Down Expand Up @@ -115,32 +115,30 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>

--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -177,6 +177,12 @@ struct netns_ipv4 {
@@ -176,6 +176,11 @@ struct netns_ipv4 {
unsigned int sysctl_tcp_fastopen_blackhole_timeout;
atomic_t tfo_active_disable_times;
unsigned long tfo_active_disable_stamp;

+ u8 sysctl_tcp_plb_enabled;
+ u8 sysctl_tcp_plb_idle_rehash_rounds;
+ u8 sysctl_tcp_plb_rehash_rounds;
+ u8 sysctl_tcp_plb_suspend_rto_sec;
+ int sysctl_tcp_plb_cong_thresh;
+
int sysctl_udp_wmem_min;
int sysctl_udp_rmem_min;

--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -54,6 +54,9 @@ static int one_day_secs = 24 * 3600;
@@ -53,6 +53,8 @@ static u32 u32_max_div_HZ = UINT_MAX / H
static int one_day_secs = 24 * 3600;
static u32 fib_multipath_hash_fields_all_mask __maybe_unused =
FIB_MULTIPATH_HASH_FIELD_ALL_MASK;

+static int tcp_plb_max_rounds = 31;
+static int tcp_plb_max_cong_thresh = 256;
+
/* obsolete */
static int sysctl_tcp_low_latency __read_mostly;

@@ -1362,6 +1365,47 @@ static struct ctl_table ipv4_net_table[]
@@ -1362,6 +1364,47 @@ static struct ctl_table ipv4_net_table[]
.extra1 = SYSCTL_ZERO,
.extra2 = &two,
},
Expand Down Expand Up @@ -190,7 +188,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>

--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -3205,6 +3205,14 @@ static int __net_init tcp_sk_init(struct
@@ -3206,6 +3206,14 @@ static int __net_init tcp_sk_init(struct
net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 0;
atomic_set(&net->ipv4.tfo_active_disable_times, 0);

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
From 45c18820a190e8f028cbbe76ef99cbe08b913e2e Mon Sep 17 00:00:00 2001
From ad14b4ef5ffb6597e6b69b986badd182c98d1497 Mon Sep 17 00:00:00 2001
From: Mubashir Adnan Qureshi <mubashirq@google.com>
Date: Wed, 26 Oct 2022 13:51:12 +0000
Subject: [PATCH 18/21] tcp: add PLB functionality for TCP
Subject: [PATCH 2/2] tcp: add PLB functionality for TCP

Congestion control algorithms track PLB state and cause the connection
to trigger a path change when either of the 2 conditions is satisfied:
Expand Down Expand Up @@ -34,7 +34,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>

--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2173,6 +2173,34 @@ extern void tcp_rack_advance(struct tcp_
@@ -2120,6 +2120,34 @@ extern void tcp_rack_advance(struct tcp_
extern void tcp_rack_reo_timeout(struct sock *sk);
extern void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs);

Expand Down Expand Up @@ -82,7 +82,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -3211,7 +3211,7 @@ static int __net_init tcp_sk_init(struct
@@ -3212,7 +3212,7 @@ static int __net_init tcp_sk_init(struct
net->ipv4.sysctl_tcp_plb_rehash_rounds = 12;
net->ipv4.sysctl_tcp_plb_suspend_rto_sec = 60;
/* Default congestion threshold for PLB to mark a round is 50% */
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
From ee5316263c5ba74198729ad83aeef714a11b767b Mon Sep 17 00:00:00 2001
From 247736e1b64c6acb6a0e2040f630bb9e8dd8312c Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Tue, 11 Jun 2019 12:26:55 -0400
Subject: [PATCH 01/21] net-tcp_bbr: broaden app-limited rate sample detection
Subject: [PATCH 01/18] net-tcp_bbr: broaden app-limited rate sample detection

This commit is a bug fix for the Linux TCP app-limited
(application-limited) logic that is used for collecting rate
Expand All @@ -24,14 +24,15 @@ can't know whether inflight was fully using the old cwnd.

Origin-9xx-SHA1: 3fe9b53291e018407780fb8c356adb5666722cbc
Change-Id: I37221506f5166877c2b110753d39bb0757985e68
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
net/ipv4/tcp_input.c | 1 +
net/ipv4/tcp_timer.c | 1 +
2 files changed, 2 insertions(+)

--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3811,6 +3811,7 @@ static int tcp_ack(struct sock *sk, cons
@@ -3822,6 +3822,7 @@ static int tcp_ack(struct sock *sk, cons

prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
rs.prior_in_flight = tcp_packets_in_flight(tp);
Expand All @@ -41,7 +42,7 @@ Change-Id: I37221506f5166877c2b110753d39bb0757985e68
* is in window.
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -611,6 +611,7 @@ void tcp_write_timer_handler(struct sock
@@ -629,6 +629,7 @@ void tcp_write_timer_handler(struct sock
goto out;
}

Expand Down
Loading

0 comments on commit 85f4456

Please sign in to comment.