Skip to content
This repository was archived by the owner on Apr 18, 2024. It is now read-only.

Commit ea0f3dc

Browse files
committed
Revert "mptcp: correctly ensure to not overfill subflows"
This reverts commit 088ed7d. It has been reported that this new commit is causing issues with kernels v4.14 and older. Probably more adaptions are needed to work on these old kernels. Best to revert this if it is causing more issues than what it solves on these old kernels. Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
1 parent 7aab281 commit ea0f3dc

File tree

3 files changed

+36
-49
lines changed

3 files changed

+36
-49
lines changed

include/net/tcp.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,6 @@ int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib);
380380
void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb);
381381
int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
382382
gfp_t gfp_mask);
383-
u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now);
384383
unsigned int tcp_mss_split_point(const struct sock *sk,
385384
const struct sk_buff *skb,
386385
unsigned int mss_now,

net/ipv4/tcp_output.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1756,7 +1756,7 @@ EXPORT_SYMBOL(tcp_tso_autosize);
17561756
/* Return the number of segments we want in the skb we are transmitting.
17571757
* See if congestion control module wants to decide; otherwise, autosize.
17581758
*/
1759-
u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
1759+
static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
17601760
{
17611761
const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
17621762
u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0;

net/mptcp/mptcp_sched.c

Lines changed: 35 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
/* MPTCP Scheduler module selector. Highly inspired by tcp_cong.c */
22

3-
#include <linux/bug.h>
43
#include <linux/module.h>
54
#include <net/mptcp.h>
65

@@ -37,38 +36,12 @@ bool mptcp_is_def_unavailable(struct sock *sk)
3736
}
3837
EXPORT_SYMBOL_GPL(mptcp_is_def_unavailable);
3938

40-
/* estimate number of segments currently in flight + unsent in
41-
* the subflow socket.
42-
*/
43-
static int mptcp_subflow_queued(struct sock *sk, u32 max_tso_segs)
44-
{
45-
const struct tcp_sock *tp = tcp_sk(sk);
46-
unsigned int queued;
47-
48-
/* estimate the max number of segments in the write queue
49-
* this is an overestimation, avoiding to iterate over the queue
50-
* to make a better estimation.
51-
* Having only one skb in the queue however might trigger tso deferral,
52-
* delaying the sending of a tso segment in the hope that skb_entail
53-
* will append more data to the skb soon.
54-
* Therefore, in the case only one skb is in the queue, we choose to
55-
* potentially underestimate, risking to schedule one skb too many onto
56-
* the subflow rather than not enough.
57-
*/
58-
if (sk->sk_write_queue.qlen > 1)
59-
queued = sk->sk_write_queue.qlen * max_tso_segs;
60-
else
61-
queued = sk->sk_write_queue.qlen;
62-
63-
return queued + tcp_packets_in_flight(tp);
64-
}
65-
6639
static bool mptcp_is_temp_unavailable(struct sock *sk,
6740
const struct sk_buff *skb,
6841
bool zero_wnd_test)
6942
{
7043
const struct tcp_sock *tp = tcp_sk(sk);
71-
unsigned int mss_now;
44+
unsigned int mss_now, space, in_flight;
7245

7346
if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) {
7447
/* If SACK is disabled, and we got a loss, TCP does not exit
@@ -92,11 +65,19 @@ static bool mptcp_is_temp_unavailable(struct sock *sk,
9265
return true;
9366
}
9467

68+
in_flight = tcp_packets_in_flight(tp);
69+
/* Not even a single spot in the cwnd */
70+
if (in_flight >= tp->snd_cwnd)
71+
return true;
72+
9573
mss_now = tcp_current_mss(sk);
9674

97-
/* Not even a single spot in the cwnd */
98-
if (mptcp_subflow_queued(sk, tcp_tso_segs(sk, tcp_current_mss(sk)))
99-
>= tp->snd_cwnd)
75+
/* Now, check if what is queued in the subflow's send-queue
76+
* already fills the cwnd.
77+
*/
78+
space = (tp->snd_cwnd - in_flight) * mss_now;
79+
80+
if (tp->write_seq - tp->snd_nxt >= space)
10081
return true;
10182

10283
if (zero_wnd_test && !before(tp->write_seq, tcp_wnd_end(tp)))
@@ -416,10 +397,11 @@ static struct sk_buff *mptcp_next_segment(struct sock *meta_sk,
416397
unsigned int *limit)
417398
{
418399
struct sk_buff *skb = __mptcp_next_segment(meta_sk, reinject);
419-
unsigned int mss_now;
420-
u32 max_len, gso_max_segs, max_segs, max_tso_segs, window;
400+
unsigned int mss_now, in_flight_space;
401+
int remaining_in_flight_space;
402+
u32 max_len, max_segs, window;
421403
struct tcp_sock *subtp;
422-
int queued;
404+
u16 gso_max_segs;
423405

424406
/* As we set it, we have to reset it as well. */
425407
*limit = 0;
@@ -457,30 +439,36 @@ static struct sk_buff *mptcp_next_segment(struct sock *meta_sk,
457439
if (skb->len <= mss_now)
458440
return skb;
459441

460-
max_tso_segs = tcp_tso_segs(*subsk, tcp_current_mss(*subsk));
461-
queued = mptcp_subflow_queued(*subsk, max_tso_segs);
462-
463-
/* this condition should already have been established in
464-
* mptcp_is_temp_unavailable when selecting available flows
442+
/* The following is similar to tcp_mss_split_point, but
443+
* we do not care about nagle, because we will anyways
444+
* use TCP_NAGLE_PUSH, which overrides this.
465445
*/
466-
WARN_ONCE(subtp->snd_cwnd <= queued, "Selected subflow no cwnd room");
467446

468447
gso_max_segs = (*subsk)->sk_gso_max_segs;
469448
if (!gso_max_segs) /* No gso supported on the subflow's NIC */
470449
gso_max_segs = 1;
471-
472-
max_segs = min_t(unsigned int, subtp->snd_cwnd - queued, gso_max_segs);
450+
max_segs = min_t(unsigned int, tcp_cwnd_test(subtp, skb), gso_max_segs);
473451
if (!max_segs)
474452
return NULL;
475453

476-
/* if there is room for a segment, schedule up to a complete TSO
477-
* segment to avoid TSO splitting. Even if it is more than allowed by
478-
* the congestion window.
454+
/* max_len is what would fit in the cwnd (respecting the 2GSO-limit of
455+
* tcp_cwnd_test), but ignoring whatever was already queued.
479456
*/
480-
max_segs = max_t(unsigned int, max_tso_segs, max_segs);
481-
482457
max_len = min(mss_now * max_segs, skb->len);
483458

459+
in_flight_space = (subtp->snd_cwnd - tcp_packets_in_flight(subtp)) * mss_now;
460+
remaining_in_flight_space = (int)in_flight_space - (subtp->write_seq - subtp->snd_nxt);
461+
462+
if (remaining_in_flight_space <= 0)
463+
WARN_ONCE(1, "in_flight %u cwnd %u wseq %u snxt %u mss_now %u cache %u",
464+
tcp_packets_in_flight(subtp), subtp->snd_cwnd,
465+
subtp->write_seq, subtp->snd_nxt, mss_now, subtp->mss_cache);
466+
else
467+
/* max_len now fits exactly in the write-queue, taking into
468+
* account what was already queued.
469+
*/
470+
max_len = min_t(u32, max_len, remaining_in_flight_space);
471+
484472
window = tcp_wnd_end(subtp) - subtp->write_seq;
485473

486474
/* max_len now also respects the announced receive-window */

0 commit comments

Comments
 (0)