diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 6b7b4b67f18c8..b63f26bf348f8 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -797,7 +797,12 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, mptcp_subflow_fully_established(subflow, mp_opt); fully_established: - if (likely(subflow->pm_notified)) + /* if the subflow is not already linked into the conn_list, we can't + * notify the PM: this subflow is still on the listener queue + * and the PM possibly acquiring the subflow lock could race with + * the listener close + */ + if (likely(subflow->pm_notified) || list_empty(&subflow->node)) return true; subflow->pm_notified = 1; diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 75c5040e8d5d7..74ccc76a11cd9 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -111,8 +111,14 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk) spin_lock_bh(&pm->lock); - if (READ_ONCE(pm->work_pending)) + /* mptcp_pm_fully_established() can be invoked by multiple + * racing paths - accept() and check_fully_established() + * be sure to serve this event only once. + */ + if (READ_ONCE(pm->work_pending) && + !(msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED))) mptcp_pm_schedule_work(msk, MPTCP_PM_ESTABLISHED); + msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED); spin_unlock_bh(&pm->lock); } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 57213ff60f784..4e29dcf17ecda 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3208,6 +3208,17 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, bool slowpath; slowpath = lock_sock_fast(newsk); + + /* PM/worker can now acquire the first subflow socket + * lock without racing with listener queue cleanup, + * we can notify it, if needed. + */ + subflow = mptcp_subflow_ctx(msk->first); + list_add(&subflow->node, &msk->conn_list); + sock_hold(msk->first); + if (mptcp_is_fully_established(newsk)) + mptcp_pm_fully_established(msk); + mptcp_copy_inaddrs(newsk, msk->first); mptcp_rcv_space_init(msk, msk->first); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index fc56e730fb35c..4db8c905b0dbc 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -165,6 +165,7 @@ enum mptcp_pm_status { MPTCP_PM_ADD_ADDR_SEND_ACK, MPTCP_PM_RM_ADDR_RECEIVED, MPTCP_PM_ESTABLISHED, + MPTCP_PM_ALREADY_ESTABLISHED, /* persistent status, set after ESTABLISHED event */ MPTCP_PM_SUBFLOW_ESTABLISHED, }; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 5f5815a1665f1..9b5a966b0041c 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -614,8 +614,9 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, */ inet_sk_state_store((void *)new_msk, TCP_ESTABLISHED); - /* link the newly created socket to the msk */ - mptcp_add_pending_subflow(mptcp_sk(new_msk), ctx); + /* record the newly created socket as the first msk + * subflow, but don't link it yet into conn_list + */ WRITE_ONCE(mptcp_sk(new_msk)->first, child); /* new mpc subflow takes ownership of the newly @@ -1148,13 +1149,18 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, subflow->request_bkup = !!(loc->flags & MPTCP_PM_ADDR_FLAG_BACKUP); mptcp_info2sockaddr(remote, &addr); + mptcp_add_pending_subflow(msk, subflow); err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK); if (err && err != -EINPROGRESS) - goto failed; + goto failed_unlink; - mptcp_add_pending_subflow(msk, subflow); return err; +failed_unlink: + spin_lock_bh(&msk->join_list_lock); + list_del(&subflow->node); + spin_unlock_bh(&msk->join_list_lock); + failed: subflow->disposable = 1; sock_release(sf);