3535#include <linux/in.h>
3636#include <linux/module.h>
3737#include <net/tcp.h>
38+ #include <net/net_namespace.h>
39+ #include <net/netns/generic.h>
40+ #include <net/tcp.h>
3841
3942#include "rds.h"
4043#include "tcp.h"
@@ -250,16 +253,7 @@ static void rds_tcp_destroy_conns(void)
250253 }
251254}
252255
253- static void rds_tcp_exit (void )
254- {
255- rds_info_deregister_func (RDS_INFO_TCP_SOCKETS , rds_tcp_tc_info );
256- rds_tcp_listen_stop ();
257- rds_tcp_destroy_conns ();
258- rds_trans_unregister (& rds_tcp_transport );
259- rds_tcp_recv_exit ();
260- kmem_cache_destroy (rds_tcp_conn_slab );
261- }
262- module_exit (rds_tcp_exit );
256+ static void rds_tcp_exit (void );
263257
264258struct rds_transport rds_tcp_transport = {
265259 .laddr_check = rds_tcp_laddr_check ,
@@ -281,6 +275,136 @@ struct rds_transport rds_tcp_transport = {
281275 .t_prefer_loopback = 1 ,
282276};
283277
278+ static int rds_tcp_netid ;
279+
280+ /* per-network namespace private data for this module */
281+ struct rds_tcp_net {
282+ struct socket * rds_tcp_listen_sock ;
283+ struct work_struct rds_tcp_accept_w ;
284+ };
285+
286+ static void rds_tcp_accept_worker (struct work_struct * work )
287+ {
288+ struct rds_tcp_net * rtn = container_of (work ,
289+ struct rds_tcp_net ,
290+ rds_tcp_accept_w );
291+
292+ while (rds_tcp_accept_one (rtn -> rds_tcp_listen_sock ) == 0 )
293+ cond_resched ();
294+ }
295+
296+ void rds_tcp_accept_work (struct sock * sk )
297+ {
298+ struct net * net = sock_net (sk );
299+ struct rds_tcp_net * rtn = net_generic (net , rds_tcp_netid );
300+
301+ queue_work (rds_wq , & rtn -> rds_tcp_accept_w );
302+ }
303+
304+ static __net_init int rds_tcp_init_net (struct net * net )
305+ {
306+ struct rds_tcp_net * rtn = net_generic (net , rds_tcp_netid );
307+
308+ rtn -> rds_tcp_listen_sock = rds_tcp_listen_init (net );
309+ if (!rtn -> rds_tcp_listen_sock ) {
310+ pr_warn ("could not set up listen sock\n" );
311+ return - EAFNOSUPPORT ;
312+ }
313+ INIT_WORK (& rtn -> rds_tcp_accept_w , rds_tcp_accept_worker );
314+ return 0 ;
315+ }
316+
317+ static void __net_exit rds_tcp_exit_net (struct net * net )
318+ {
319+ struct rds_tcp_net * rtn = net_generic (net , rds_tcp_netid );
320+
321+ /* If rds_tcp_exit_net() is called as a result of netns deletion,
322+ * the rds_tcp_kill_sock() device notifier would already have cleaned
323+ * up the listen socket, thus there is no work to do in this function.
324+ *
325+ * If rds_tcp_exit_net() is called as a result of module unload,
326+ * i.e., due to rds_tcp_exit() -> unregister_pernet_subsys(), then
327+ * we do need to clean up the listen socket here.
328+ */
329+ if (rtn -> rds_tcp_listen_sock ) {
330+ rds_tcp_listen_stop (rtn -> rds_tcp_listen_sock );
331+ rtn -> rds_tcp_listen_sock = NULL ;
332+ flush_work (& rtn -> rds_tcp_accept_w );
333+ }
334+ }
335+
336+ static struct pernet_operations rds_tcp_net_ops = {
337+ .init = rds_tcp_init_net ,
338+ .exit = rds_tcp_exit_net ,
339+ .id = & rds_tcp_netid ,
340+ .size = sizeof (struct rds_tcp_net ),
341+ };
342+
343+ static void rds_tcp_kill_sock (struct net * net )
344+ {
345+ struct rds_tcp_connection * tc , * _tc ;
346+ struct sock * sk ;
347+ LIST_HEAD (tmp_list );
348+ struct rds_tcp_net * rtn = net_generic (net , rds_tcp_netid );
349+
350+ rds_tcp_listen_stop (rtn -> rds_tcp_listen_sock );
351+ rtn -> rds_tcp_listen_sock = NULL ;
352+ flush_work (& rtn -> rds_tcp_accept_w );
353+ spin_lock_irq (& rds_tcp_conn_lock );
354+ list_for_each_entry_safe (tc , _tc , & rds_tcp_conn_list , t_tcp_node ) {
355+ struct net * c_net = read_pnet (& tc -> conn -> c_net );
356+
357+ if (net != c_net || !tc -> t_sock )
358+ continue ;
359+ list_move_tail (& tc -> t_tcp_node , & tmp_list );
360+ }
361+ spin_unlock_irq (& rds_tcp_conn_lock );
362+ list_for_each_entry_safe (tc , _tc , & tmp_list , t_tcp_node ) {
363+ sk = tc -> t_sock -> sk ;
364+ sk -> sk_prot -> disconnect (sk , 0 );
365+ tcp_done (sk );
366+ if (tc -> conn -> c_passive )
367+ rds_conn_destroy (tc -> conn -> c_passive );
368+ rds_conn_destroy (tc -> conn );
369+ }
370+ }
371+
372+ static int rds_tcp_dev_event (struct notifier_block * this ,
373+ unsigned long event , void * ptr )
374+ {
375+ struct net_device * dev = netdev_notifier_info_to_dev (ptr );
376+
377+ /* rds-tcp registers as a pernet subys, so the ->exit will only
378+ * get invoked after network acitivity has quiesced. We need to
379+ * clean up all sockets to quiesce network activity, and use
380+ * the unregistration of the per-net loopback device as a trigger
381+ * to start that cleanup.
382+ */
383+ if (event == NETDEV_UNREGISTER_FINAL &&
384+ dev -> ifindex == LOOPBACK_IFINDEX )
385+ rds_tcp_kill_sock (dev_net (dev ));
386+
387+ return NOTIFY_DONE ;
388+ }
389+
390+ static struct notifier_block rds_tcp_dev_notifier = {
391+ .notifier_call = rds_tcp_dev_event ,
392+ .priority = -10 , /* must be called after other network notifiers */
393+ };
394+
395+ static void rds_tcp_exit (void )
396+ {
397+ rds_info_deregister_func (RDS_INFO_TCP_SOCKETS , rds_tcp_tc_info );
398+ unregister_pernet_subsys (& rds_tcp_net_ops );
399+ if (unregister_netdevice_notifier (& rds_tcp_dev_notifier ))
400+ pr_warn ("could not unregister rds_tcp_dev_notifier\n" );
401+ rds_tcp_destroy_conns ();
402+ rds_trans_unregister (& rds_tcp_transport );
403+ rds_tcp_recv_exit ();
404+ kmem_cache_destroy (rds_tcp_conn_slab );
405+ }
406+ module_exit (rds_tcp_exit );
407+
284408static int rds_tcp_init (void )
285409{
286410 int ret ;
@@ -293,6 +417,16 @@ static int rds_tcp_init(void)
293417 goto out ;
294418 }
295419
420+ ret = register_netdevice_notifier (& rds_tcp_dev_notifier );
421+ if (ret ) {
422+ pr_warn ("could not register rds_tcp_dev_notifier\n" );
423+ goto out ;
424+ }
425+
426+ ret = register_pernet_subsys (& rds_tcp_net_ops );
427+ if (ret )
428+ goto out_slab ;
429+
296430 ret = rds_tcp_recv_init ();
297431 if (ret )
298432 goto out_slab ;
@@ -301,19 +435,14 @@ static int rds_tcp_init(void)
301435 if (ret )
302436 goto out_recv ;
303437
304- ret = rds_tcp_listen_init ();
305- if (ret )
306- goto out_register ;
307-
308438 rds_info_register_func (RDS_INFO_TCP_SOCKETS , rds_tcp_tc_info );
309439
310440 goto out ;
311441
312- out_register :
313- rds_trans_unregister (& rds_tcp_transport );
314442out_recv :
315443 rds_tcp_recv_exit ();
316444out_slab :
445+ unregister_pernet_subsys (& rds_tcp_net_ops );
317446 kmem_cache_destroy (rds_tcp_conn_slab );
318447out :
319448 return ret ;
0 commit comments