99#include <linux/rtnetlink.h>
1010#include <linux/slab.h>
1111#include <net/nexthop.h>
12+ #include <net/route.h>
1213#include <net/sock.h>
1314
15+ #define NH_DEV_HASHBITS 8
16+ #define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS)
17+
1418static const struct nla_policy rtm_nh_policy [NHA_MAX + 1 ] = {
1519 [NHA_UNSPEC ] = { .strict_start_type = NHA_UNSPEC + 1 },
1620 [NHA_ID ] = { .type = NLA_U32 },
@@ -25,12 +29,39 @@ static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = {
2529 [NHA_MASTER ] = { .type = NLA_U32 },
2630};
2731
32+ static unsigned int nh_dev_hashfn (unsigned int val )
33+ {
34+ unsigned int mask = NH_DEV_HASHSIZE - 1 ;
35+
36+ return (val ^
37+ (val >> NH_DEV_HASHBITS ) ^
38+ (val >> (NH_DEV_HASHBITS * 2 ))) & mask ;
39+ }
40+
41+ static void nexthop_devhash_add (struct net * net , struct nh_info * nhi )
42+ {
43+ struct net_device * dev = nhi -> fib_nhc .nhc_dev ;
44+ struct hlist_head * head ;
45+ unsigned int hash ;
46+
47+ WARN_ON (!dev );
48+
49+ hash = nh_dev_hashfn (dev -> ifindex );
50+ head = & net -> nexthop .devhash [hash ];
51+ hlist_add_head (& nhi -> dev_hash , head );
52+ }
53+
2854void nexthop_free_rcu (struct rcu_head * head )
2955{
3056 struct nexthop * nh = container_of (head , struct nexthop , rcu );
3157 struct nh_info * nhi ;
3258
3359 nhi = rcu_dereference_raw (nh -> nh_info );
60+ switch (nhi -> family ) {
61+ case AF_INET :
62+ fib_nh_release (nh -> net , & nhi -> fib_nh );
63+ break ;
64+ }
3465 kfree (nhi );
3566
3667 kfree (nh );
@@ -96,6 +127,7 @@ static u32 nh_find_unused_id(struct net *net)
96127static int nh_fill_node (struct sk_buff * skb , struct nexthop * nh ,
97128 int event , u32 portid , u32 seq , unsigned int nlflags )
98129{
130+ struct fib_nh * fib_nh ;
99131 struct nlmsghdr * nlh ;
100132 struct nh_info * nhi ;
101133 struct nhmsg * nhm ;
@@ -120,6 +152,22 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
120152 if (nla_put_flag (skb , NHA_BLACKHOLE ))
121153 goto nla_put_failure ;
122154 goto out ;
155+ } else {
156+ const struct net_device * dev ;
157+
158+ dev = nhi -> fib_nhc .nhc_dev ;
159+ if (dev && nla_put_u32 (skb , NHA_OIF , dev -> ifindex ))
160+ goto nla_put_failure ;
161+ }
162+
163+ nhm -> nh_scope = nhi -> fib_nhc .nhc_scope ;
164+ switch (nhi -> family ) {
165+ case AF_INET :
166+ fib_nh = & nhi -> fib_nh ;
167+ if (fib_nh -> fib_nh_gw_family &&
168+ nla_put_u32 (skb , NHA_GATEWAY , fib_nh -> fib_nh_gw4 ))
169+ goto nla_put_failure ;
170+ break ;
123171 }
124172
125173out :
@@ -132,13 +180,21 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
132180
133181static size_t nh_nlmsg_size (struct nexthop * nh )
134182{
183+ struct nh_info * nhi = rtnl_dereference (nh -> nh_info );
135184 size_t sz = nla_total_size (4 ); /* NHA_ID */
136185
137186 /* covers NHA_BLACKHOLE since NHA_OIF and BLACKHOLE
138187 * are mutually exclusive
139188 */
140189 sz += nla_total_size (4 ); /* NHA_OIF */
141190
191+ switch (nhi -> family ) {
192+ case AF_INET :
193+ if (nhi -> fib_nh .fib_nh_gw_family )
194+ sz += nla_total_size (4 ); /* NHA_GATEWAY */
195+ break ;
196+ }
197+
142198 return sz ;
143199}
144200
@@ -169,6 +225,15 @@ static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info)
169225 rtnl_set_sk_err (info -> nl_net , RTNLGRP_NEXTHOP , err );
170226}
171227
228+ static void __remove_nexthop (struct net * net , struct nexthop * nh )
229+ {
230+ struct nh_info * nhi ;
231+
232+ nhi = rtnl_dereference (nh -> nh_info );
233+ if (nhi -> fib_nhc .nhc_dev )
234+ hlist_del (& nhi -> dev_hash );
235+ }
236+
172237static void remove_nexthop (struct net * net , struct nexthop * nh ,
173238 bool skip_fib , struct nl_info * nlinfo )
174239{
@@ -178,6 +243,7 @@ static void remove_nexthop(struct net *net, struct nexthop *nh,
178243 if (nlinfo )
179244 nexthop_notify (RTM_DELNEXTHOP , nh , nlinfo );
180245
246+ __remove_nexthop (net , nh );
181247 nh_base_seq_inc (net );
182248
183249 nexthop_put (nh );
@@ -244,6 +310,24 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh,
244310 return rc ;
245311}
246312
313+ /* rtnl */
314+ /* remove all nexthops tied to a device being deleted */
315+ static void nexthop_flush_dev (struct net_device * dev )
316+ {
317+ unsigned int hash = nh_dev_hashfn (dev -> ifindex );
318+ struct net * net = dev_net (dev );
319+ struct hlist_head * head = & net -> nexthop .devhash [hash ];
320+ struct hlist_node * n ;
321+ struct nh_info * nhi ;
322+
323+ hlist_for_each_entry_safe (nhi , n , head , dev_hash ) {
324+ if (nhi -> fib_nhc .nhc_dev != dev )
325+ continue ;
326+
327+ remove_nexthop (net , nhi -> nh_parent , false, NULL );
328+ }
329+ }
330+
247331/* rtnl; called when net namespace is deleted */
248332static void flush_all_nexthops (struct net * net )
249333{
@@ -258,6 +342,38 @@ static void flush_all_nexthops(struct net *net)
258342 }
259343}
260344
345+ static int nh_create_ipv4 (struct net * net , struct nexthop * nh ,
346+ struct nh_info * nhi , struct nh_config * cfg ,
347+ struct netlink_ext_ack * extack )
348+ {
349+ struct fib_nh * fib_nh = & nhi -> fib_nh ;
350+ struct fib_config fib_cfg = {
351+ .fc_oif = cfg -> nh_ifindex ,
352+ .fc_gw4 = cfg -> gw .ipv4 ,
353+ .fc_gw_family = cfg -> gw .ipv4 ? AF_INET : 0 ,
354+ .fc_flags = cfg -> nh_flags ,
355+ };
356+ u32 tb_id = l3mdev_fib_table (cfg -> dev );
357+ int err = - EINVAL ;
358+
359+ err = fib_nh_init (net , fib_nh , & fib_cfg , 1 , extack );
360+ if (err ) {
361+ fib_nh_release (net , fib_nh );
362+ goto out ;
363+ }
364+
365+ /* sets nh_dev if successful */
366+ err = fib_check_nh (net , fib_nh , tb_id , 0 , extack );
367+ if (!err ) {
368+ nh -> nh_flags = fib_nh -> fib_nh_flags ;
369+ fib_info_update_nh_saddr (net , fib_nh , fib_nh -> fib_nh_scope );
370+ } else {
371+ fib_nh_release (net , fib_nh );
372+ }
373+ out :
374+ return err ;
375+ }
376+
261377static struct nexthop * nexthop_create (struct net * net , struct nh_config * cfg ,
262378 struct netlink_ext_ack * extack )
263379{
@@ -287,12 +403,21 @@ static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg,
287403 cfg -> nh_ifindex = net -> loopback_dev -> ifindex ;
288404 }
289405
406+ switch (cfg -> nh_family ) {
407+ case AF_INET :
408+ err = nh_create_ipv4 (net , nh , nhi , cfg , extack );
409+ break ;
410+ }
411+
290412 if (err ) {
291413 kfree (nhi );
292414 kfree (nh );
293415 return ERR_PTR (err );
294416 }
295417
418+ /* add the entry to the device based hash */
419+ nexthop_devhash_add (net , nhi );
420+
296421 rcu_assign_pointer (nh -> nh_info , nhi );
297422
298423 return nh ;
@@ -329,6 +454,7 @@ static struct nexthop *nexthop_add(struct net *net, struct nh_config *cfg,
329454
330455 err = insert_nexthop (net , nh , cfg , extack );
331456 if (err ) {
457+ __remove_nexthop (net , nh );
332458 nexthop_put (nh );
333459 nh = ERR_PTR (err );
334460 }
@@ -360,6 +486,8 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
360486 }
361487
362488 switch (nhm -> nh_family ) {
489+ case AF_INET :
490+ break ;
363491 default :
364492 NL_SET_ERR_MSG (extack , "Invalid address family" );
365493 goto out ;
@@ -416,6 +544,32 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
416544 goto out ;
417545 }
418546
547+ err = - EINVAL ;
548+ if (tb [NHA_GATEWAY ]) {
549+ struct nlattr * gwa = tb [NHA_GATEWAY ];
550+
551+ switch (cfg -> nh_family ) {
552+ case AF_INET :
553+ if (nla_len (gwa ) != sizeof (u32 )) {
554+ NL_SET_ERR_MSG (extack , "Invalid gateway" );
555+ goto out ;
556+ }
557+ cfg -> gw .ipv4 = nla_get_be32 (gwa );
558+ break ;
559+ default :
560+ NL_SET_ERR_MSG (extack ,
561+ "Unknown address family for gateway" );
562+ goto out ;
563+ }
564+ } else {
565+ /* device only nexthop (no gateway) */
566+ if (cfg -> nh_flags & RTNH_F_ONLINK ) {
567+ NL_SET_ERR_MSG (extack ,
568+ "ONLINK flag can not be set for nexthop without a gateway" );
569+ goto out ;
570+ }
571+ }
572+
419573 err = 0 ;
420574out :
421575 return err ;
@@ -683,16 +837,68 @@ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
683837 return err ;
684838}
685839
840+ static void nexthop_sync_mtu (struct net_device * dev , u32 orig_mtu )
841+ {
842+ unsigned int hash = nh_dev_hashfn (dev -> ifindex );
843+ struct net * net = dev_net (dev );
844+ struct hlist_head * head = & net -> nexthop .devhash [hash ];
845+ struct hlist_node * n ;
846+ struct nh_info * nhi ;
847+
848+ hlist_for_each_entry_safe (nhi , n , head , dev_hash ) {
849+ if (nhi -> fib_nhc .nhc_dev == dev ) {
850+ if (nhi -> family == AF_INET )
851+ fib_nhc_update_mtu (& nhi -> fib_nhc , dev -> mtu ,
852+ orig_mtu );
853+ }
854+ }
855+ }
856+
857+ /* rtnl */
858+ static int nh_netdev_event (struct notifier_block * this ,
859+ unsigned long event , void * ptr )
860+ {
861+ struct net_device * dev = netdev_notifier_info_to_dev (ptr );
862+ struct netdev_notifier_info_ext * info_ext ;
863+
864+ switch (event ) {
865+ case NETDEV_DOWN :
866+ case NETDEV_UNREGISTER :
867+ nexthop_flush_dev (dev );
868+ break ;
869+ case NETDEV_CHANGE :
870+ if (!(dev_get_flags (dev ) & (IFF_RUNNING | IFF_LOWER_UP )))
871+ nexthop_flush_dev (dev );
872+ break ;
873+ case NETDEV_CHANGEMTU :
874+ info_ext = ptr ;
875+ nexthop_sync_mtu (dev , info_ext -> ext .mtu );
876+ rt_cache_flush (dev_net (dev ));
877+ break ;
878+ }
879+ return NOTIFY_DONE ;
880+ }
881+
882+ static struct notifier_block nh_netdev_notifier = {
883+ .notifier_call = nh_netdev_event ,
884+ };
885+
686886static void __net_exit nexthop_net_exit (struct net * net )
687887{
688888 rtnl_lock ();
689889 flush_all_nexthops (net );
690890 rtnl_unlock ();
891+ kfree (net -> nexthop .devhash );
691892}
692893
693894static int __net_init nexthop_net_init (struct net * net )
694895{
896+ size_t sz = sizeof (struct hlist_head ) * NH_DEV_HASHSIZE ;
897+
695898 net -> nexthop .rb_root = RB_ROOT ;
899+ net -> nexthop .devhash = kzalloc (sz , GFP_KERNEL );
900+ if (!net -> nexthop .devhash )
901+ return - ENOMEM ;
696902
697903 return 0 ;
698904}
@@ -706,6 +912,8 @@ static int __init nexthop_init(void)
706912{
707913 register_pernet_subsys (& nexthop_net_ops );
708914
915+ register_netdevice_notifier (& nh_netdev_notifier );
916+
709917 rtnl_register (PF_UNSPEC , RTM_NEWNEXTHOP , rtm_new_nexthop , NULL , 0 );
710918 rtnl_register (PF_UNSPEC , RTM_DELNEXTHOP , rtm_del_nexthop , NULL , 0 );
711919 rtnl_register (PF_UNSPEC , RTM_GETNEXTHOP , rtm_get_nexthop ,
0 commit comments