Skip to content

Commit 164891a

Browse files
Stephen HemmingerDavid S. Miller
Stephen Hemminger
authored and
David S. Miller
committed
[TCP]: Congestion control API update.
Do some simple changes to make congestion control API faster/cleaner. * use ktime_t rather than timeval * merge rtt sampling into existing ack callback this means one indirect call versus two per ack. * use flags bits to store options/settings Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 65d1b4a commit 164891a

15 files changed

+65
-55
lines changed

include/linux/skbuff.h

+5
Original file line numberDiff line numberDiff line change
@@ -1569,6 +1569,11 @@ static inline void __net_timestamp(struct sk_buff *skb)
15691569
skb->tstamp = ktime_get_real();
15701570
}
15711571

1572+
static inline ktime_t net_timedelta(ktime_t t)
1573+
{
1574+
return ktime_sub(ktime_get_real(), t);
1575+
}
1576+
15721577

15731578
extern __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len);
15741579
extern __sum16 __skb_checksum_complete(struct sk_buff *skb);

include/net/tcp.h

+5-4
Original file line numberDiff line numberDiff line change
@@ -629,9 +629,12 @@ enum tcp_ca_event {
629629
#define TCP_CA_MAX 128
630630
#define TCP_CA_BUF_MAX (TCP_CA_NAME_MAX*TCP_CA_MAX)
631631

632+
#define TCP_CONG_NON_RESTRICTED 0x1
633+
#define TCP_CONG_RTT_STAMP 0x2
634+
632635
struct tcp_congestion_ops {
633636
struct list_head list;
634-
int non_restricted;
637+
unsigned long flags;
635638

636639
/* initialize private data (optional) */
637640
void (*init)(struct sock *sk);
@@ -645,16 +648,14 @@ struct tcp_congestion_ops {
645648
/* do new cwnd calculation (required) */
646649
void (*cong_avoid)(struct sock *sk, u32 ack,
647650
u32 rtt, u32 in_flight, int good_ack);
648-
/* round trip time sample per acked packet (optional) */
649-
void (*rtt_sample)(struct sock *sk, u32 usrtt);
650651
/* call before changing ca_state (optional) */
651652
void (*set_state)(struct sock *sk, u8 new_state);
652653
/* call when cwnd event occurs (optional) */
653654
void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
654655
/* new value of cwnd after loss (optional) */
655656
u32 (*undo_cwnd)(struct sock *sk);
656657
/* hook for packet ack accounting (optional) */
657-
void (*pkts_acked)(struct sock *sk, u32 num_acked);
658+
void (*pkts_acked)(struct sock *sk, u32 num_acked, ktime_t last);
658659
/* get info for inet_diag (optional) */
659660
void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb);
660661

net/ipv4/tcp_bic.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ static void bictcp_state(struct sock *sk, u8 new_state)
206206
/* Track delayed acknowledgment ratio using sliding window
207207
* ratio = (15*ratio + sample) / 16
208208
*/
209-
static void bictcp_acked(struct sock *sk, u32 cnt)
209+
static void bictcp_acked(struct sock *sk, u32 cnt, ktime_t last)
210210
{
211211
const struct inet_connection_sock *icsk = inet_csk(sk);
212212

net/ipv4/tcp_cong.c

+7-7
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ int tcp_set_default_congestion_control(const char *name)
126126
#endif
127127

128128
if (ca) {
129-
ca->non_restricted = 1; /* default is always allowed */
129+
ca->flags |= TCP_CONG_NON_RESTRICTED; /* default is always allowed */
130130
list_move(&ca->list, &tcp_cong_list);
131131
ret = 0;
132132
}
@@ -181,7 +181,7 @@ void tcp_get_allowed_congestion_control(char *buf, size_t maxlen)
181181
*buf = '\0';
182182
rcu_read_lock();
183183
list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
184-
if (!ca->non_restricted)
184+
if (!(ca->flags & TCP_CONG_NON_RESTRICTED))
185185
continue;
186186
offs += snprintf(buf + offs, maxlen - offs,
187187
"%s%s",
@@ -212,16 +212,16 @@ int tcp_set_allowed_congestion_control(char *val)
212212
}
213213
}
214214

215-
/* pass 2 clear */
215+
/* pass 2 clear old values */
216216
list_for_each_entry_rcu(ca, &tcp_cong_list, list)
217-
ca->non_restricted = 0;
217+
ca->flags &= ~TCP_CONG_NON_RESTRICTED;
218218

219219
/* pass 3 mark as allowed */
220220
while ((name = strsep(&val, " ")) && *name) {
221221
ca = tcp_ca_find(name);
222222
WARN_ON(!ca);
223223
if (ca)
224-
ca->non_restricted = 1;
224+
ca->flags |= TCP_CONG_NON_RESTRICTED;
225225
}
226226
out:
227227
spin_unlock(&tcp_cong_list_lock);
@@ -256,7 +256,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
256256
if (!ca)
257257
err = -ENOENT;
258258

259-
else if (!(ca->non_restricted || capable(CAP_NET_ADMIN)))
259+
else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || capable(CAP_NET_ADMIN)))
260260
err = -EPERM;
261261

262262
else if (!try_module_get(ca->owner))
@@ -371,8 +371,8 @@ u32 tcp_reno_min_cwnd(const struct sock *sk)
371371
EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd);
372372

373373
struct tcp_congestion_ops tcp_reno = {
374+
.flags = TCP_CONG_NON_RESTRICTED,
374375
.name = "reno",
375-
.non_restricted = 1,
376376
.owner = THIS_MODULE,
377377
.ssthresh = tcp_reno_ssthresh,
378378
.cong_avoid = tcp_reno_cong_avoid,

net/ipv4/tcp_cubic.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,7 @@ static void bictcp_state(struct sock *sk, u8 new_state)
334334
/* Track delayed acknowledgment ratio using sliding window
335335
* ratio = (15*ratio + sample) / 16
336336
*/
337-
static void bictcp_acked(struct sock *sk, u32 cnt)
337+
static void bictcp_acked(struct sock *sk, u32 cnt, ktime_t last)
338338
{
339339
const struct inet_connection_sock *icsk = inet_csk(sk);
340340

net/ipv4/tcp_htcp.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ static inline void measure_rtt(struct sock *sk)
9898
}
9999
}
100100

101-
static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked)
101+
static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked, ktime_t last)
102102
{
103103
const struct inet_connection_sock *icsk = inet_csk(sk);
104104
const struct tcp_sock *tp = tcp_sk(sk);

net/ipv4/tcp_illinois.c

+7-9
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,14 @@ static void tcp_illinois_init(struct sock *sk)
8383
}
8484

8585
/* Measure RTT for each ack. */
86-
static void tcp_illinois_rtt_sample(struct sock *sk, u32 rtt)
86+
static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked, ktime_t last)
8787
{
8888
struct illinois *ca = inet_csk_ca(sk);
89+
u32 rtt;
90+
91+
ca->acked = pkts_acked;
92+
93+
rtt = ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC;
8994

9095
/* ignore bogus values, this prevents wraparound in alpha math */
9196
if (rtt > RTT_MAX)
@@ -103,13 +108,6 @@ static void tcp_illinois_rtt_sample(struct sock *sk, u32 rtt)
103108
ca->sum_rtt += rtt;
104109
}
105110

106-
/* Capture count of packets covered by ack, to adjust for delayed acks */
107-
static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked)
108-
{
109-
struct illinois *ca = inet_csk_ca(sk);
110-
ca->acked = pkts_acked;
111-
}
112-
113111
/* Maximum queuing delay */
114112
static inline u32 max_delay(const struct illinois *ca)
115113
{
@@ -325,12 +323,12 @@ static void tcp_illinois_info(struct sock *sk, u32 ext,
325323
}
326324

327325
static struct tcp_congestion_ops tcp_illinois = {
326+
.flags = TCP_CONG_RTT_STAMP,
328327
.init = tcp_illinois_init,
329328
.ssthresh = tcp_illinois_ssthresh,
330329
.min_cwnd = tcp_reno_min_cwnd,
331330
.cong_avoid = tcp_illinois_cong_avoid,
332331
.set_state = tcp_illinois_state,
333-
.rtt_sample = tcp_illinois_rtt_sample,
334332
.get_info = tcp_illinois_info,
335333
.pkts_acked = tcp_illinois_acked,
336334

net/ipv4/tcp_input.c

+8-17
Original file line numberDiff line numberDiff line change
@@ -2402,14 +2402,6 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
24022402
return acked;
24032403
}
24042404

2405-
static u32 tcp_usrtt(struct timeval *tv)
2406-
{
2407-
struct timeval now;
2408-
2409-
do_gettimeofday(&now);
2410-
return (now.tv_sec - tv->tv_sec) * 1000000 + (now.tv_usec - tv->tv_usec);
2411-
}
2412-
24132405
/* Remove acknowledged frames from the retransmission queue. */
24142406
static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
24152407
{
@@ -2420,9 +2412,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
24202412
int acked = 0;
24212413
__s32 seq_rtt = -1;
24222414
u32 pkts_acked = 0;
2423-
void (*rtt_sample)(struct sock *sk, u32 usrtt)
2424-
= icsk->icsk_ca_ops->rtt_sample;
2425-
struct timeval tv = { .tv_sec = 0, .tv_usec = 0 };
2415+
ktime_t last_ackt = ktime_set(0,0);
24262416

24272417
while ((skb = tcp_write_queue_head(sk)) &&
24282418
skb != tcp_send_head(sk)) {
@@ -2471,7 +2461,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
24712461
seq_rtt = -1;
24722462
} else if (seq_rtt < 0) {
24732463
seq_rtt = now - scb->when;
2474-
skb_get_timestamp(skb, &tv);
2464+
last_ackt = skb->tstamp;
24752465
}
24762466
if (sacked & TCPCB_SACKED_ACKED)
24772467
tp->sacked_out -= tcp_skb_pcount(skb);
@@ -2484,7 +2474,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
24842474
}
24852475
} else if (seq_rtt < 0) {
24862476
seq_rtt = now - scb->when;
2487-
skb_get_timestamp(skb, &tv);
2477+
last_ackt = skb->tstamp;
24882478
}
24892479
tcp_dec_pcount_approx(&tp->fackets_out, skb);
24902480
tcp_packets_out_dec(tp, skb);
@@ -2494,13 +2484,14 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
24942484
}
24952485

24962486
if (acked&FLAG_ACKED) {
2487+
const struct tcp_congestion_ops *ca_ops
2488+
= inet_csk(sk)->icsk_ca_ops;
2489+
24972490
tcp_ack_update_rtt(sk, acked, seq_rtt);
24982491
tcp_ack_packets_out(sk);
2499-
if (rtt_sample && !(acked & FLAG_RETRANS_DATA_ACKED))
2500-
(*rtt_sample)(sk, tcp_usrtt(&tv));
25012492

2502-
if (icsk->icsk_ca_ops->pkts_acked)
2503-
icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked);
2493+
if (ca_ops->pkts_acked)
2494+
ca_ops->pkts_acked(sk, pkts_acked, last_ackt);
25042495
}
25052496

25062497
#if FASTRETRANS_DEBUG > 0

net/ipv4/tcp_lp.c

+5-3
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ static u32 tcp_lp_owd_calculator(struct sock *sk)
218218
* 3. calc smoothed OWD (SOWD).
219219
* Most ideas come from the original TCP-LP implementation.
220220
*/
221-
static void tcp_lp_rtt_sample(struct sock *sk, u32 usrtt)
221+
static void tcp_lp_rtt_sample(struct sock *sk, u32 rtt)
222222
{
223223
struct lp *lp = inet_csk_ca(sk);
224224
s64 mowd = tcp_lp_owd_calculator(sk);
@@ -261,11 +261,13 @@ static void tcp_lp_rtt_sample(struct sock *sk, u32 usrtt)
261261
* newReno in increase case.
262262
* We work it out by following the idea from TCP-LP's paper directly
263263
*/
264-
static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked)
264+
static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, ktime_t last)
265265
{
266266
struct tcp_sock *tp = tcp_sk(sk);
267267
struct lp *lp = inet_csk_ca(sk);
268268

269+
tcp_lp_rtt_sample(sk, ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC);
270+
269271
/* calc inference */
270272
if (tcp_time_stamp > tp->rx_opt.rcv_tsecr)
271273
lp->inference = 3 * (tcp_time_stamp - tp->rx_opt.rcv_tsecr);
@@ -312,11 +314,11 @@ static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked)
312314
}
313315

314316
static struct tcp_congestion_ops tcp_lp = {
317+
.flags = TCP_CONG_RTT_STAMP,
315318
.init = tcp_lp_init,
316319
.ssthresh = tcp_reno_ssthresh,
317320
.cong_avoid = tcp_lp_cong_avoid,
318321
.min_cwnd = tcp_reno_min_cwnd,
319-
.rtt_sample = tcp_lp_rtt_sample,
320322
.pkts_acked = tcp_lp_pkts_acked,
321323

322324
.owner = THIS_MODULE,

net/ipv4/tcp_output.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
409409
/* If congestion control is doing timestamping, we must
410410
* take such a timestamp before we potentially clone/copy.
411411
*/
412-
if (icsk->icsk_ca_ops->rtt_sample)
412+
if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP)
413413
__net_timestamp(skb);
414414

415415
if (likely(clone_it)) {

net/ipv4/tcp_vegas.c

+7-3
Original file line numberDiff line numberDiff line change
@@ -120,10 +120,13 @@ static void tcp_vegas_init(struct sock *sk)
120120
* o min-filter RTT samples from a much longer window (forever for now)
121121
* to find the propagation delay (baseRTT)
122122
*/
123-
static void tcp_vegas_rtt_calc(struct sock *sk, u32 usrtt)
123+
static void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
124124
{
125125
struct vegas *vegas = inet_csk_ca(sk);
126-
u32 vrtt = usrtt + 1; /* Never allow zero rtt or baseRTT */
126+
u32 vrtt;
127+
128+
/* Never allow zero rtt or baseRTT */
129+
vrtt = (ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC) + 1;
127130

128131
/* Filter to find propagation delay: */
129132
if (vrtt < vegas->baseRTT)
@@ -353,11 +356,12 @@ static void tcp_vegas_get_info(struct sock *sk, u32 ext,
353356
}
354357

355358
static struct tcp_congestion_ops tcp_vegas = {
359+
.flags = TCP_CONG_RTT_STAMP,
356360
.init = tcp_vegas_init,
357361
.ssthresh = tcp_reno_ssthresh,
358362
.cong_avoid = tcp_vegas_cong_avoid,
359363
.min_cwnd = tcp_reno_min_cwnd,
360-
.rtt_sample = tcp_vegas_rtt_calc,
364+
.pkts_acked = tcp_vegas_pkts_acked,
361365
.set_state = tcp_vegas_state,
362366
.cwnd_event = tcp_vegas_cwnd_event,
363367
.get_info = tcp_vegas_get_info,

net/ipv4/tcp_veno.c

+7-3
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,13 @@ static void tcp_veno_init(struct sock *sk)
6969
}
7070

7171
/* Do rtt sampling needed for Veno. */
72-
static void tcp_veno_rtt_calc(struct sock *sk, u32 usrtt)
72+
static void tcp_veno_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
7373
{
7474
struct veno *veno = inet_csk_ca(sk);
75-
u32 vrtt = usrtt + 1; /* Never allow zero rtt or basertt */
75+
u32 vrtt;
76+
77+
/* Never allow zero rtt or baseRTT */
78+
vrtt = (ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC) + 1;
7679

7780
/* Filter to find propagation delay: */
7881
if (vrtt < veno->basertt)
@@ -199,10 +202,11 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
199202
}
200203

201204
static struct tcp_congestion_ops tcp_veno = {
205+
.flags = TCP_CONG_RTT_STAMP,
202206
.init = tcp_veno_init,
203207
.ssthresh = tcp_veno_ssthresh,
204208
.cong_avoid = tcp_veno_cong_avoid,
205-
.rtt_sample = tcp_veno_rtt_calc,
209+
.pkts_acked = tcp_veno_pkts_acked,
206210
.set_state = tcp_veno_state,
207211
.cwnd_event = tcp_veno_cwnd_event,
208212

net/ipv4/tcp_westwood.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ static void westwood_filter(struct westwood *w, u32 delta)
100100
* Called after processing group of packets.
101101
* but all westwood needs is the last sample of srtt.
102102
*/
103-
static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt)
103+
static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
104104
{
105105
struct westwood *w = inet_csk_ca(sk);
106106
if (cnt > 0)

net/ipv4/tcp_yeah.c

+4-2
Original file line numberDiff line numberDiff line change
@@ -64,13 +64,15 @@ static void tcp_yeah_init(struct sock *sk)
6464
}
6565

6666

67-
static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked)
67+
static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, ktime_t last)
6868
{
6969
const struct inet_connection_sock *icsk = inet_csk(sk);
7070
struct yeah *yeah = inet_csk_ca(sk);
7171

7272
if (icsk->icsk_ca_state == TCP_CA_Open)
7373
yeah->pkts_acked = pkts_acked;
74+
75+
tcp_vegas_pkts_acked(sk, pkts_acked, last);
7476
}
7577

7678
static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
@@ -237,11 +239,11 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) {
237239
}
238240

239241
static struct tcp_congestion_ops tcp_yeah = {
242+
.flags = TCP_CONG_RTT_STAMP,
240243
.init = tcp_yeah_init,
241244
.ssthresh = tcp_yeah_ssthresh,
242245
.cong_avoid = tcp_yeah_cong_avoid,
243246
.min_cwnd = tcp_reno_min_cwnd,
244-
.rtt_sample = tcp_vegas_rtt_calc,
245247
.set_state = tcp_vegas_state,
246248
.cwnd_event = tcp_vegas_cwnd_event,
247249
.get_info = tcp_vegas_get_info,

net/ipv4/tcp_yeah.h

+5-2
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,13 @@ static void tcp_vegas_state(struct sock *sk, u8 ca_state)
8181
* o min-filter RTT samples from a much longer window (forever for now)
8282
* to find the propagation delay (baseRTT)
8383
*/
84-
static void tcp_vegas_rtt_calc(struct sock *sk, u32 usrtt)
84+
static void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
8585
{
8686
struct vegas *vegas = inet_csk_ca(sk);
87-
u32 vrtt = usrtt + 1; /* Never allow zero rtt or baseRTT */
87+
u32 vrtt;
88+
89+
/* Never allow zero rtt or baseRTT */
90+
vrtt = (ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC) + 1;
8891

8992
/* Filter to find propagation delay: */
9093
if (vrtt < vegas->baseRTT)

0 commit comments

Comments
 (0)