1
1
/* MPTCP Scheduler module selector. Highly inspired by tcp_cong.c */
2
2
3
- #include <linux/bug.h>
4
3
#include <linux/module.h>
5
4
#include <net/mptcp.h>
6
5
@@ -37,38 +36,12 @@ bool mptcp_is_def_unavailable(struct sock *sk)
37
36
}
38
37
EXPORT_SYMBOL_GPL (mptcp_is_def_unavailable );
39
38
40
- /* estimate number of segments currently in flight + unsent in
41
- * the subflow socket.
42
- */
43
- static int mptcp_subflow_queued (struct sock * sk , u32 max_tso_segs )
44
- {
45
- const struct tcp_sock * tp = tcp_sk (sk );
46
- unsigned int queued ;
47
-
48
- /* estimate the max number of segments in the write queue
49
- * this is an overestimation, avoiding to iterate over the queue
50
- * to make a better estimation.
51
- * Having only one skb in the queue however might trigger tso deferral,
52
- * delaying the sending of a tso segment in the hope that skb_entail
53
- * will append more data to the skb soon.
54
- * Therefore, in the case only one skb is in the queue, we choose to
55
- * potentially underestimate, risking to schedule one skb too many onto
56
- * the subflow rather than not enough.
57
- */
58
- if (sk -> sk_write_queue .qlen > 1 )
59
- queued = sk -> sk_write_queue .qlen * max_tso_segs ;
60
- else
61
- queued = sk -> sk_write_queue .qlen ;
62
-
63
- return queued + tcp_packets_in_flight (tp );
64
- }
65
-
66
39
static bool mptcp_is_temp_unavailable (struct sock * sk ,
67
40
const struct sk_buff * skb ,
68
41
bool zero_wnd_test )
69
42
{
70
43
const struct tcp_sock * tp = tcp_sk (sk );
71
- unsigned int mss_now ;
44
+ unsigned int mss_now , space , in_flight ;
72
45
73
46
if (inet_csk (sk )-> icsk_ca_state == TCP_CA_Loss ) {
74
47
/* If SACK is disabled, and we got a loss, TCP does not exit
@@ -92,11 +65,19 @@ static bool mptcp_is_temp_unavailable(struct sock *sk,
92
65
return true;
93
66
}
94
67
68
+ in_flight = tcp_packets_in_flight (tp );
69
+ /* Not even a single spot in the cwnd */
70
+ if (in_flight >= tp -> snd_cwnd )
71
+ return true;
72
+
95
73
mss_now = tcp_current_mss (sk );
96
74
97
- /* Not even a single spot in the cwnd */
98
- if (mptcp_subflow_queued (sk , tcp_tso_segs (sk , tcp_current_mss (sk )))
99
- >= tp -> snd_cwnd )
75
+ /* Now, check if what is queued in the subflow's send-queue
76
+ * already fills the cwnd.
77
+ */
78
+ space = (tp -> snd_cwnd - in_flight ) * mss_now ;
79
+
80
+ if (tp -> write_seq - tp -> snd_nxt >= space )
100
81
return true;
101
82
102
83
if (zero_wnd_test && !before (tp -> write_seq , tcp_wnd_end (tp )))
@@ -416,10 +397,11 @@ static struct sk_buff *mptcp_next_segment(struct sock *meta_sk,
416
397
unsigned int * limit )
417
398
{
418
399
struct sk_buff * skb = __mptcp_next_segment (meta_sk , reinject );
419
- unsigned int mss_now ;
420
- u32 max_len , gso_max_segs , max_segs , max_tso_segs , window ;
400
+ unsigned int mss_now , in_flight_space ;
401
+ int remaining_in_flight_space ;
402
+ u32 max_len , max_segs , window ;
421
403
struct tcp_sock * subtp ;
422
- int queued ;
404
+ u16 gso_max_segs ;
423
405
424
406
/* As we set it, we have to reset it as well. */
425
407
* limit = 0 ;
@@ -457,30 +439,36 @@ static struct sk_buff *mptcp_next_segment(struct sock *meta_sk,
457
439
if (skb -> len <= mss_now )
458
440
return skb ;
459
441
460
- max_tso_segs = tcp_tso_segs (* subsk , tcp_current_mss (* subsk ));
461
- queued = mptcp_subflow_queued (* subsk , max_tso_segs );
462
-
463
- /* this condition should already have been established in
464
- * mptcp_is_temp_unavailable when selecting available flows
442
+ /* The following is similar to tcp_mss_split_point, but
443
+ * we do not care about nagle, because we will anyways
444
+ * use TCP_NAGLE_PUSH, which overrides this.
465
445
*/
466
- WARN_ONCE (subtp -> snd_cwnd <= queued , "Selected subflow no cwnd room" );
467
446
468
447
gso_max_segs = (* subsk )-> sk_gso_max_segs ;
469
448
if (!gso_max_segs ) /* No gso supported on the subflow's NIC */
470
449
gso_max_segs = 1 ;
471
-
472
- max_segs = min_t (unsigned int , subtp -> snd_cwnd - queued , gso_max_segs );
450
+ max_segs = min_t (unsigned int , tcp_cwnd_test (subtp , skb ), gso_max_segs );
473
451
if (!max_segs )
474
452
return NULL ;
475
453
476
- /* if there is room for a segment, schedule up to a complete TSO
477
- * segment to avoid TSO splitting. Even if it is more than allowed by
478
- * the congestion window.
454
+ /* max_len is what would fit in the cwnd (respecting the 2GSO-limit of
455
+ * tcp_cwnd_test), but ignoring whatever was already queued.
479
456
*/
480
- max_segs = max_t (unsigned int , max_tso_segs , max_segs );
481
-
482
457
max_len = min (mss_now * max_segs , skb -> len );
483
458
459
+ in_flight_space = (subtp -> snd_cwnd - tcp_packets_in_flight (subtp )) * mss_now ;
460
+ remaining_in_flight_space = (int )in_flight_space - (subtp -> write_seq - subtp -> snd_nxt );
461
+
462
+ if (remaining_in_flight_space <= 0 )
463
+ WARN_ONCE (1 , "in_flight %u cwnd %u wseq %u snxt %u mss_now %u cache %u" ,
464
+ tcp_packets_in_flight (subtp ), subtp -> snd_cwnd ,
465
+ subtp -> write_seq , subtp -> snd_nxt , mss_now , subtp -> mss_cache );
466
+ else
467
+ /* max_len now fits exactly in the write-queue, taking into
468
+ * account what was already queued.
469
+ */
470
+ max_len = min_t (u32 , max_len , remaining_in_flight_space );
471
+
484
472
window = tcp_wnd_end (subtp ) - subtp -> write_seq ;
485
473
486
474
/* max_len now also respects the announced receive-window */
0 commit comments