Skip to content
This repository has been archived by the owner on Apr 18, 2024. It is now read-only.

Commit

Permalink
mptcp: Avoid last mptcp_sock_def_error_report warning
Browse files Browse the repository at this point in the history
There is a packet-sequence that can end up closing an MPTCP-socket at
the metae-level while in infinite-mapping mode without closing the
subflow. This happens in the following sequence:

+0      socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0      setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0      bind(3, {sa_family = AF_INET, sin_port = htons(13000), sin_addr = inet_addr("192.168.0.1")}, ...) = 0
+0      listen(3, 1) = 0

+0      socket(..., SOCK_STREAM, IPPROTO_TCP) = 5
+0      setsockopt(5, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0      bind(5, {sa_family = AF_INET, sin_port = htons(13001), sin_addr = inet_addr("192.168.0.1")}, ...) = 0
+0      listen(5,1) = 0

+0      < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 7,mp_capable key_a> sock(3)
+0      > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 7,mp_capable key_b> sock(3)
+0.1    < . 1:1(0) ack 1 win 257 <mp_capable key_a key_b> sock(3)
+0      accept(3, ..., ...) = 4

+0      < P. 1:11(10) ack 1 win 257 <dss dack4 dsn4> sock(4)
+0      > . 1:1(0) ack 11 <dss dack4=11> sock(4)

+0      write(4, ..., 100) = 100
+0      > P. 1:101(100) ack 11 <dss dack4=11 dsn4> sock(4)

+0      < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 7,mp_join_syn address_id=0 token=sha1_32(key_b)> sock(5)
+0      > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 7, mp_join_syn_ack backup=0 address_id=0 sender_hmac=trunc_l64_hmac(key_b key_a) > sock(5)
+0      < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 7,mp_join_syn address_id=0 token=sha1_32(key_b)> sock(5)
+0      > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 7, mp_join_syn_ack backup=0 address_id=0 sender_hmac=trunc_l64_hmac(key_b key_a) > sock(5)
+0      < . 1:1(0) ack 1 win 32792 <mp_join_ack sender_hmac=full_160_hmac(key_a key_b)> sock(5)
+0      mp_join_accept(5) = 6

+0      > . 1:1(0) ack 1 <...> sock(6) // reliably mp_join_ack

+0      < F. 1:1(0) ack 1 win 257 <dss dack4 dsn4 FIN> sock(6)
+0      > . 1:1(0) ack 2 <dss dack4=12> sock(6)

+0.1    shutdown(4, SHUT_WR) = 0
+0      > F. 1:1(0) ack 2 <dss dack4 dsn4 FIN> sock(6)

/* This triggers a fallback as there is no DATA_ACK. */
+0      < . 11:11(0) ack 101 win 257 sock(4)

/* Incoming RST on a connection that has fallen back and meta has been close */
+0.1    < R. 11:11(0) ack 102 win 257 sock(4)

+1      close(4) = 0

Problem is that the outstanding data on sock(4) was DATA_ACK'ed on
sock(6). Thus, sequence-numbers our no more in-sync.

We need to handle this properly. We know that at the beginning of a
subflow all data sent on it will be in-order, tracked by
last_end_data_seq. Thus, if last_end_data_seq is less then
meta->snd_una, it means that we are out-of-sync and need to drain the
subflow before incoming ACKs will allow to move the meta->snd_una
forward.

Fixes: Zero-day bug
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
(cherry picked from commit ce507d8)
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
(cherry picked from commit edf6791)
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
  • Loading branch information
cpaasch authored and matttbe committed Mar 8, 2022
1 parent 9b78db2 commit 95d34c6
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 59 deletions.
53 changes: 8 additions & 45 deletions include/net/mptcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,9 @@ struct mptcp_cb {
server_side:1,
infinite_mapping_rcv:1,
infinite_mapping_snd:1,
infinite_send_una_ahead:1, /* While falling back, the snd_una
*on meta is ahead of the subflow.
*/
dfin_combined:1, /* Was the DFIN combined with subflow-fin? */
passive_close:1,
snd_hiseq_index:1, /* Index in snd_high_order of snd_nxt */
Expand Down Expand Up @@ -782,6 +785,8 @@ bool mptcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
int push_one, gfp_t gfp);
void tcp_parse_mptcp_options(const struct sk_buff *skb,
struct mptcp_options_received *mopt);
bool mptcp_handle_ack_in_infinite(struct sock *sk, const struct sk_buff *skb,
int flag);
void mptcp_parse_options(const uint8_t *ptr, int opsize,
struct mptcp_options_received *mopt,
const struct sk_buff *skb,
Expand Down Expand Up @@ -818,7 +823,6 @@ unsigned int mptcp_current_mss(struct sock *meta_sk);
int mptcp_select_size(const struct sock *meta_sk, bool first_skb, bool zc);
void mptcp_hmac_sha1(const u8 *key_1, const u8 *key_2, u32 *hash_out,
int arg_num, ...);
void mptcp_clean_rtx_infinite(const struct sk_buff *skb, struct sock *sk);
void mptcp_fin(struct sock *meta_sk);
void mptcp_meta_retransmit_timer(struct sock *meta_sk);
void mptcp_sub_retransmit_timer(struct sock *sk);
Expand Down Expand Up @@ -1229,47 +1233,6 @@ static inline void mptcp_fallback_close(struct mptcp_cb *mpcb,
mpcb->pm_ops->close_session(mptcp_meta_sk(except));
}

static inline bool mptcp_fallback_infinite(struct sock *sk, int flag)
{
struct tcp_sock *tp = tcp_sk(sk);
struct mptcp_cb *mpcb = tp->mpcb;

/* If data has been acknowleged on the meta-level, fully_established
* will have been set before and thus we will not fall back to infinite
* mapping.
*/
if (likely(tp->mptcp->fully_established))
return false;

if (!(flag & MPTCP_FLAG_DATA_ACKED))
return false;

/* Don't fallback twice ;) */
if (mpcb->infinite_mapping_snd)
return false;

pr_debug("%s %#x will fallback - pi %d, src %pI4:%u dst %pI4:%u rcv_nxt %u from %pS\n",
__func__, mpcb->mptcp_loc_token, tp->mptcp->path_index,
&inet_sk(sk)->inet_saddr, ntohs(inet_sk(sk)->inet_sport),
&inet_sk(sk)->inet_daddr, ntohs(inet_sk(sk)->inet_dport),
tp->rcv_nxt, __builtin_return_address(0));
if (!is_master_tp(tp)) {
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_FBACKSUB);
return true;
}

mpcb->infinite_mapping_snd = 1;
mpcb->infinite_mapping_rcv = 1;
mpcb->infinite_rcv_seq = mptcp_get_rcv_nxt_64(mptcp_meta_tp(tp));
tp->mptcp->fully_established = 1;

mptcp_fallback_close(mpcb, sk);

MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_FBACKINIT);

return false;
}

static inline bool mptcp_v6_is_v4_mapped(const struct sock *sk)
{
return sk->sk_family == AF_INET6 &&
Expand Down Expand Up @@ -1359,8 +1322,6 @@ static inline void mptcp_del_sock(const struct sock *sk) {}
static inline void mptcp_update_metasocket(const struct sock *meta_sk) {}
static inline void mptcp_reinject_data(struct sock *orig_sk, int clone_it) {}
static inline void mptcp_update_sndbuf(const struct tcp_sock *tp) {}
static inline void mptcp_clean_rtx_infinite(const struct sk_buff *skb,
const struct sock *sk) {}
static inline void mptcp_sub_close(struct sock *sk, unsigned long delay) {}
static inline void mptcp_set_rto(const struct sock *sk) {}
static inline void mptcp_send_fin(const struct sock *meta_sk) {}
Expand Down Expand Up @@ -1414,7 +1375,9 @@ static inline unsigned int mptcp_current_mss(struct sock *meta_sk)
return 0;
}
static inline void mptcp_sub_close_passive(struct sock *sk) {}
static inline bool mptcp_fallback_infinite(const struct sock *sk, int flag)
static inline bool mptcp_handle_ack_in_infinite(const struct sock *sk,
const struct sk_buff *skb,
int flag)
{
return false;
}
Expand Down
4 changes: 1 addition & 3 deletions net/ipv4/tcp_input.c
Original file line number Diff line number Diff line change
Expand Up @@ -3705,13 +3705,11 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
tcp_rack_update_reo_wnd(sk, &rs);

if (mptcp(tp)) {
if (mptcp_fallback_infinite(sk, flag)) {
if (mptcp_handle_ack_in_infinite(sk, skb, flag)) {
pr_debug("%s resetting flow\n", __func__);
mptcp_send_reset(sk);
goto invalid_ack;
}

mptcp_clean_rtx_infinite(skb, sk);
}

if (tp->tlp_high_seq)
Expand Down
85 changes: 74 additions & 11 deletions net/mptcp/mptcp_input.c
Original file line number Diff line number Diff line change
Expand Up @@ -1439,7 +1439,7 @@ static bool mptcp_process_data_ack(struct sock *sk, const struct sk_buff *skb)
}

/* If we are in infinite mapping mode, rx_opt.data_ack has been
* set by mptcp_clean_rtx_infinite.
* set by mptcp_handle_ack_in_infinite.
*/
if (!(tcb->mptcp_flags & MPTCPHDR_ACK) && !tp->mpcb->infinite_mapping_snd)
return false;
Expand Down Expand Up @@ -1567,23 +1567,86 @@ static bool mptcp_process_data_ack(struct sock *sk, const struct sk_buff *skb)
return false;
}

void mptcp_clean_rtx_infinite(const struct sk_buff *skb, struct sock *sk)
bool mptcp_handle_ack_in_infinite(struct sock *sk, const struct sk_buff *skb,
int flag)
{
struct tcp_sock *tp = tcp_sk(sk), *meta_tp = tcp_sk(mptcp_meta_sk(sk));
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_sock *meta_tp = mptcp_meta_tp(tp);
struct mptcp_cb *mpcb = tp->mpcb;

if (!tp->mpcb->infinite_mapping_snd)
return;
/* We are already in fallback-mode. Data is in-sequence and we know
* exactly what is being sent on this subflow belongs to the current
* meta-level sequence number space.
*/
if (mpcb->infinite_mapping_snd) {
if (mpcb->infinite_send_una_ahead &&
!before(meta_tp->snd_una, tp->mptcp->last_end_data_seq - (tp->snd_nxt - tp->snd_una))) {
tp->mptcp->rx_opt.data_ack = meta_tp->snd_una;
} else {
/* Remember that meta snd_una is no more ahead of the game */
mpcb->infinite_send_una_ahead = 0;

/* The difference between both write_seq's represents the offset between
* data-sequence and subflow-sequence. As we are infinite, this must
* match.
*
* Thus, from this difference we can infer the meta snd_una.
*/
tp->mptcp->rx_opt.data_ack = meta_tp->snd_nxt -
(tp->snd_nxt - tp->snd_una);
}

goto exit;
}

/* If data has been acknowleged on the meta-level, fully_established
* will have been set before and thus we will not fall back to infinite
* mapping.
*/
if (likely(tp->mptcp->fully_established))
return false;

/* The difference between both write_seq's represents the offset between
* data-sequence and subflow-sequence. As we are infinite, this must
* match.
if (!(flag & MPTCP_FLAG_DATA_ACKED))
return false;

pr_debug("%s %#x will fallback - pi %d, src %pI4:%u dst %pI4:%u rcv_nxt %u from %pS\n",
__func__, mpcb->mptcp_loc_token, tp->mptcp->path_index,
&inet_sk(sk)->inet_saddr, ntohs(inet_sk(sk)->inet_sport),
&inet_sk(sk)->inet_daddr, ntohs(inet_sk(sk)->inet_dport),
tp->rcv_nxt, __builtin_return_address(0));
if (!is_master_tp(tp)) {
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_FBACKSUB);
return true;
}

mpcb->infinite_mapping_snd = 1;
mpcb->infinite_mapping_rcv = 1;
mpcb->infinite_rcv_seq = mptcp_get_rcv_nxt_64(mptcp_meta_tp(tp));
tp->mptcp->fully_established = 1;

mptcp_fallback_close(mpcb, sk);

MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_FBACKINIT);

/* The acknowledged data-seq at the subflow-level is:
* last_end_data_seq - (tp->snd_nxt - tp->snd_una)
*
* Thus, from this difference we can infer the meta snd_una.
* If this is less than meta->snd_una, then we ignore it. Otherwise,
* this becomes our data_ack.
*/
tp->mptcp->rx_opt.data_ack = meta_tp->snd_nxt - tp->snd_nxt +
tp->snd_una;
if (after(meta_tp->snd_una, tp->mptcp->last_end_data_seq - (tp->snd_nxt - tp->snd_una))) {
/* Remmeber that meta snd_una is ahead of the game */
mpcb->infinite_send_una_ahead = 1;
tp->mptcp->rx_opt.data_ack = meta_tp->snd_una;
} else {
tp->mptcp->rx_opt.data_ack = tp->mptcp->last_end_data_seq -
(tp->snd_nxt - tp->snd_una);
}

exit:
mptcp_process_data_ack(sk, skb);

return false;
}

/**** static functions used by mptcp_parse_options */
Expand Down

0 comments on commit 95d34c6

Please sign in to comment.