Skip to content

Commit 6374044

Browse files
Paolo Abenidavem330
authored andcommitted
mptcp: fix accept vs worker race
The mptcp worker and mptcp_accept() can race, as reported by Christoph: refcount_t: addition on 0; use-after-free. WARNING: CPU: 1 PID: 14351 at lib/refcount.c:25 refcount_warn_saturate+0x105/0x1b0 lib/refcount.c:25 Modules linked in: CPU: 1 PID: 14351 Comm: syz-executor.2 Not tainted 6.3.0-rc1-gde5e8fd0123c #11 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 RIP: 0010:refcount_warn_saturate+0x105/0x1b0 lib/refcount.c:25 Code: 02 31 ff 89 de e8 1b f0 a7 ff 84 db 0f 85 6e ff ff ff e8 3e f5 a7 ff 48 c7 c7 d8 c7 34 83 c6 05 6d 2d 0f 02 01 e8 cb 3d 90 ff <0f> 0b e9 4f ff ff ff e8 1f f5 a7 ff 0f b6 1d 54 2d 0f 02 31 ff 89 RSP: 0018:ffffc90000a47bf8 EFLAGS: 00010282 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: ffff88802eae98c0 RSI: ffffffff81097d4f RDI: 0000000000000001 RBP: ffff88802e712180 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000001 R11: ffff88802eaea148 R12: ffff88802e712100 R13: ffff88802e712a88 R14: ffff888005cb93a8 R15: ffff88802e712a88 FS: 0000000000000000(0000) GS:ffff88803ed00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f277fd89120 CR3: 0000000035486002 CR4: 0000000000370ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: <TASK> __refcount_add include/linux/refcount.h:199 [inline] __refcount_inc include/linux/refcount.h:250 [inline] refcount_inc include/linux/refcount.h:267 [inline] sock_hold include/net/sock.h:775 [inline] __mptcp_close+0x4c6/0x4d0 net/mptcp/protocol.c:3051 mptcp_close+0x24/0xe0 net/mptcp/protocol.c:3072 inet_release+0x56/0xa0 net/ipv4/af_inet.c:429 __sock_release+0x51/0xf0 net/socket.c:653 sock_close+0x18/0x20 net/socket.c:1395 __fput+0x113/0x430 fs/file_table.c:321 task_work_run+0x96/0x100 kernel/task_work.c:179 exit_task_work include/linux/task_work.h:38 [inline] do_exit+0x4fc/0x10c0 kernel/exit.c:869 do_group_exit+0x51/0xf0 kernel/exit.c:1019 get_signal+0x12b0/0x1390 kernel/signal.c:2859 arch_do_signal_or_restart+0x25/0x260 arch/x86/kernel/signal.c:306 exit_to_user_mode_loop kernel/entry/common.c:168 [inline] exit_to_user_mode_prepare+0x131/0x1a0 kernel/entry/common.c:203 __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline] syscall_exit_to_user_mode+0x19/0x40 kernel/entry/common.c:296 do_syscall_64+0x46/0x90 arch/x86/entry/common.c:86 entry_SYSCALL_64_after_hwframe+0x72/0xdc RIP: 0033:0x7fec4b4926a9 Code: Unable to access opcode bytes at 0x7fec4b49267f. RSP: 002b:00007fec49f9dd78 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca RAX: fffffffffffffe00 RBX: 00000000006bc058 RCX: 00007fec4b4926a9 RDX: 0000000000000000 RSI: 0000000000000080 RDI: 00000000006bc058 RBP: 00000000006bc050 R08: 00000000007df998 R09: 00000000007df998 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000006bc05c R13: fffffffffffffea8 R14: 000000000000000b R15: 000000000001fe40 </TASK> The root cause is that the worker can force fallback to TCP the first mptcp subflow, actually deleting the unaccepted msk socket. We can explicitly prevent the race delaying the unaccepted msk deletion at listener shutdown time. In case the closed subflow is later accepted, just drop the mptcp context and let the user-space deal with the paired mptcp socket. Fixes: b6985b9 ("mptcp: use the workqueue to destroy unaccepted sockets") Cc: stable@vger.kernel.org Reported-by: Christoph Paasch <cpaasch@apple.com> Link: multipath-tcp/mptcp_net-next#375 Signed-off-by: Paolo Abeni <pabeni@redhat.com> Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net> Tested-by: Christoph Paasch <cpaasch@apple.com> Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 2a6a870 commit 6374044

File tree

3 files changed

+58
-33
lines changed

3 files changed

+58
-33
lines changed

net/mptcp/protocol.c

Lines changed: 45 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2315,7 +2315,26 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
23152315
unsigned int flags)
23162316
{
23172317
struct mptcp_sock *msk = mptcp_sk(sk);
2318-
bool need_push, dispose_it;
2318+
bool dispose_it, need_push = false;
2319+
2320+
/* If the first subflow moved to a close state before accept, e.g. due
2321+
* to an incoming reset, mptcp either:
2322+
* - if either the subflow or the msk are dead, destroy the context
2323+
* (the subflow socket is deleted by inet_child_forget) and the msk
2324+
* - otherwise do nothing at the moment and take action at accept and/or
2325+
* listener shutdown - user-space must be able to accept() the closed
2326+
* socket.
2327+
*/
2328+
if (msk->in_accept_queue && msk->first == ssk) {
2329+
if (!sock_flag(sk, SOCK_DEAD) && !sock_flag(ssk, SOCK_DEAD))
2330+
return;
2331+
2332+
/* ensure later check in mptcp_worker() will dispose the msk */
2333+
sock_set_flag(sk, SOCK_DEAD);
2334+
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
2335+
mptcp_subflow_drop_ctx(ssk);
2336+
goto out_release;
2337+
}
23192338

23202339
dispose_it = !msk->subflow || ssk != msk->subflow->sk;
23212340
if (dispose_it)
@@ -2351,18 +2370,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
23512370
if (!inet_csk(ssk)->icsk_ulp_ops) {
23522371
WARN_ON_ONCE(!sock_flag(ssk, SOCK_DEAD));
23532372
kfree_rcu(subflow, rcu);
2354-
} else if (msk->in_accept_queue && msk->first == ssk) {
2355-
/* if the first subflow moved to a close state, e.g. due to
2356-
* incoming reset and we reach here before inet_child_forget()
2357-
* the TCP stack could later try to close it via
2358-
* inet_csk_listen_stop(), or deliver it to the user space via
2359-
* accept().
2360-
* We can't delete the subflow - or risk a double free - nor let
2361-
* the msk survive - or will be leaked in the non accept scenario:
2362-
* fallback and let TCP cope with the subflow cleanup.
2363-
*/
2364-
WARN_ON_ONCE(sock_flag(ssk, SOCK_DEAD));
2365-
mptcp_subflow_drop_ctx(ssk);
23662373
} else {
23672374
/* otherwise tcp will dispose of the ssk and subflow ctx */
23682375
if (ssk->sk_state == TCP_LISTEN) {
@@ -2377,6 +2384,8 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
23772384
/* close acquired an extra ref */
23782385
__sock_put(ssk);
23792386
}
2387+
2388+
out_release:
23802389
release_sock(ssk);
23812390

23822391
sock_put(ssk);
@@ -2431,21 +2440,14 @@ static void __mptcp_close_subflow(struct sock *sk)
24312440
mptcp_close_ssk(sk, ssk, subflow);
24322441
}
24332442

2434-
/* if the MPC subflow has been closed before the msk is accepted,
2435-
* msk will never be accept-ed, close it now
2436-
*/
2437-
if (!msk->first && msk->in_accept_queue) {
2438-
sock_set_flag(sk, SOCK_DEAD);
2439-
inet_sk_state_store(sk, TCP_CLOSE);
2440-
}
24412443
}
24422444

2443-
static bool mptcp_check_close_timeout(const struct sock *sk)
2445+
static bool mptcp_should_close(const struct sock *sk)
24442446
{
24452447
s32 delta = tcp_jiffies32 - inet_csk(sk)->icsk_mtup.probe_timestamp;
24462448
struct mptcp_subflow_context *subflow;
24472449

2448-
if (delta >= TCP_TIMEWAIT_LEN)
2450+
if (delta >= TCP_TIMEWAIT_LEN || mptcp_sk(sk)->in_accept_queue)
24492451
return true;
24502452

24512453
/* if all subflows are in closed status don't bother with additional
@@ -2653,7 +2655,7 @@ static void mptcp_worker(struct work_struct *work)
26532655
* even if it is orphaned and in FIN_WAIT2 state
26542656
*/
26552657
if (sock_flag(sk, SOCK_DEAD)) {
2656-
if (mptcp_check_close_timeout(sk)) {
2658+
if (mptcp_should_close(sk)) {
26572659
inet_sk_state_store(sk, TCP_CLOSE);
26582660
mptcp_do_fastclose(sk);
26592661
}
@@ -2899,6 +2901,14 @@ static void __mptcp_destroy_sock(struct sock *sk)
28992901
sock_put(sk);
29002902
}
29012903

2904+
void __mptcp_unaccepted_force_close(struct sock *sk)
2905+
{
2906+
sock_set_flag(sk, SOCK_DEAD);
2907+
inet_sk_state_store(sk, TCP_CLOSE);
2908+
mptcp_do_fastclose(sk);
2909+
__mptcp_destroy_sock(sk);
2910+
}
2911+
29022912
static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
29032913
{
29042914
/* Concurrent splices from sk_receive_queue into receive_queue will
@@ -3737,6 +3747,18 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
37373747
if (!ssk->sk_socket)
37383748
mptcp_sock_graft(ssk, newsock);
37393749
}
3750+
3751+
/* Do late cleanup for the first subflow as necessary. Also
3752+
* deal with bad peers not doing a complete shutdown.
3753+
*/
3754+
if (msk->first &&
3755+
unlikely(inet_sk_state_load(msk->first) == TCP_CLOSE)) {
3756+
__mptcp_close_ssk(newsk, msk->first,
3757+
mptcp_subflow_ctx(msk->first), 0);
3758+
if (unlikely(list_empty(&msk->conn_list)))
3759+
inet_sk_state_store(newsk, TCP_CLOSE);
3760+
}
3761+
37403762
release_sock(newsk);
37413763
}
37423764

net/mptcp/protocol.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -634,6 +634,7 @@ void mptcp_sock_graft(struct sock *sk, struct socket *parent);
634634
struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
635635
bool __mptcp_close(struct sock *sk, long timeout);
636636
void mptcp_cancel_work(struct sock *sk);
637+
void __mptcp_unaccepted_force_close(struct sock *sk);
637638
void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk);
638639

639640
bool mptcp_addresses_equal(const struct mptcp_addr_info *a,

net/mptcp/subflow.c

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -723,9 +723,12 @@ void mptcp_subflow_drop_ctx(struct sock *ssk)
723723
if (!ctx)
724724
return;
725725

726-
subflow_ulp_fallback(ssk, ctx);
727-
if (ctx->conn)
728-
sock_put(ctx->conn);
726+
list_del(&mptcp_subflow_ctx(ssk)->node);
727+
if (inet_csk(ssk)->icsk_ulp_ops) {
728+
subflow_ulp_fallback(ssk, ctx);
729+
if (ctx->conn)
730+
sock_put(ctx->conn);
731+
}
729732

730733
kfree_rcu(ctx, rcu);
731734
}
@@ -1824,6 +1827,7 @@ void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_s
18241827
struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue;
18251828
struct mptcp_sock *msk, *next, *head = NULL;
18261829
struct request_sock *req;
1830+
struct sock *sk;
18271831

18281832
/* build a list of all unaccepted mptcp sockets */
18291833
spin_lock_bh(&queue->rskq_lock);
@@ -1839,11 +1843,12 @@ void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_s
18391843
continue;
18401844

18411845
/* skip if already in list */
1842-
msk = mptcp_sk(subflow->conn);
1846+
sk = subflow->conn;
1847+
msk = mptcp_sk(sk);
18431848
if (msk->dl_next || msk == head)
18441849
continue;
18451850

1846-
sock_hold(subflow->conn);
1851+
sock_hold(sk);
18471852
msk->dl_next = head;
18481853
head = msk;
18491854
}
@@ -1857,16 +1862,13 @@ void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_s
18571862
release_sock(listener_ssk);
18581863

18591864
for (msk = head; msk; msk = next) {
1860-
struct sock *sk = (struct sock *)msk;
1865+
sk = (struct sock *)msk;
18611866

18621867
lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
18631868
next = msk->dl_next;
18641869
msk->dl_next = NULL;
18651870

1866-
/* prevent the stack from later re-schedule the worker for
1867-
* this socket
1868-
*/
1869-
inet_sk_state_store(sk, TCP_CLOSE);
1871+
__mptcp_unaccepted_force_close(sk);
18701872
release_sock(sk);
18711873

18721874
/* lockdep will report a false positive ABBA deadlock

0 commit comments

Comments
 (0)