Skip to content

Commit 38428d6

Browse files
roopa-prabhudavem330
authored andcommitted
nexthop: support for fdb ecmp nexthops
This patch introduces ecmp nexthops and nexthop groups for mac fdb entries. In subsequent patches this is used by the vxlan driver fdb entries. The use case is E-VPN multihoming [1,2,3] which requires bridged vxlan traffic to be load balanced to remote switches (vteps) belonging to the same multi-homed ethernet segment (This is analogous to a multi-homed LAG but over vxlan). Changes include new nexthop flag NHA_FDB for nexthops referenced by fdb entries. These nexthops only have ip. This patch includes appropriate checks to avoid routes referencing such nexthops. example: $ip nexthop add id 12 via 172.16.1.2 fdb $ip nexthop add id 13 via 172.16.1.3 fdb $ip nexthop add id 102 group 12/13 fdb $bridge fdb add 02:02:00:00:00:13 dev vxlan1000 nhid 101 self [1] E-VPN https://tools.ietf.org/html/rfc7432 [2] E-VPN VxLAN: https://tools.ietf.org/html/rfc8365 [3] LPC talk with mention of nexthop groups for L2 ecmp http://vger.kernel.org/lpc_net2018_talks/scaling_bridge_fdb_database_slidesV3.pdf v4 - fixed uninitialized variable reported by kernel test robot Reported-by: kernel test robot <rong.a.chen@intel.com> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com> Reviewed-by: David Ahern <dsahern@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 7b1b843 commit 38428d6

File tree

5 files changed

+148
-25
lines changed

5 files changed

+148
-25
lines changed

include/net/ip6_fib.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ struct fib6_config {
6565
struct nl_info fc_nlinfo;
6666
struct nlattr *fc_encap;
6767
u16 fc_encap_type;
68+
bool fc_is_fdb;
6869
};
6970

7071
struct fib6_node {

include/net/nexthop.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ struct nh_config {
2626
u8 nh_family;
2727
u8 nh_protocol;
2828
u8 nh_blackhole;
29+
u8 nh_fdb;
2930
u32 nh_flags;
3031

3132
int nh_ifindex;
@@ -52,6 +53,7 @@ struct nh_info {
5253

5354
u8 family;
5455
bool reject_nh;
56+
bool fdb_nh;
5557

5658
union {
5759
struct fib_nh_common fib_nhc;
@@ -80,6 +82,7 @@ struct nexthop {
8082
struct rb_node rb_node; /* entry on netns rbtree */
8183
struct list_head fi_list; /* v4 entries using nh */
8284
struct list_head f6i_list; /* v6 entries using nh */
85+
struct list_head fdb_list; /* fdb entries using this nh */
8386
struct list_head grp_list; /* nh group entries using this nh */
8487
struct net *net;
8588

@@ -88,6 +91,7 @@ struct nexthop {
8891
u8 protocol; /* app managing this nh */
8992
u8 nh_flags;
9093
bool is_group;
94+
bool is_fdb_nh;
9195

9296
refcount_t refcnt;
9397
struct rcu_head rcu;
@@ -304,4 +308,32 @@ static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash)
304308
int nexthop_for_each_fib6_nh(struct nexthop *nh,
305309
int (*cb)(struct fib6_nh *nh, void *arg),
306310
void *arg);
311+
312+
static inline int nexthop_get_family(struct nexthop *nh)
313+
{
314+
struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info);
315+
316+
return nhi->family;
317+
}
318+
319+
static inline
320+
struct fib_nh_common *nexthop_fdb_nhc(struct nexthop *nh)
321+
{
322+
struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info);
323+
324+
return &nhi->fib_nhc;
325+
}
326+
327+
static inline struct fib_nh_common *nexthop_path_fdb_result(struct nexthop *nh,
328+
int hash)
329+
{
330+
struct nh_info *nhi;
331+
struct nexthop *nhp;
332+
333+
nhp = nexthop_select_path(nh, hash);
334+
if (unlikely(!nhp))
335+
return NULL;
336+
nhi = rcu_dereference(nhp->nh_info);
337+
return &nhi->fib_nhc;
338+
}
307339
#endif

include/uapi/linux/nexthop.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ enum {
4949
NHA_GROUPS, /* flag; only return nexthop groups in dump */
5050
NHA_MASTER, /* u32; only return nexthops with given master dev */
5151

52+
NHA_FDB, /* flag; nexthop belongs to a bridge fdb */
53+
/* if NHA_FDB is added, OIF, BLACKHOLE, ENCAP cannot be set */
54+
5255
__NHA_MAX,
5356
};
5457

net/ipv4/nexthop.c

Lines changed: 107 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = {
3333
[NHA_ENCAP] = { .type = NLA_NESTED },
3434
[NHA_GROUPS] = { .type = NLA_FLAG },
3535
[NHA_MASTER] = { .type = NLA_U32 },
36+
[NHA_FDB] = { .type = NLA_FLAG },
3637
};
3738

3839
static unsigned int nh_dev_hashfn(unsigned int val)
@@ -107,6 +108,7 @@ static struct nexthop *nexthop_alloc(void)
107108
INIT_LIST_HEAD(&nh->fi_list);
108109
INIT_LIST_HEAD(&nh->f6i_list);
109110
INIT_LIST_HEAD(&nh->grp_list);
111+
INIT_LIST_HEAD(&nh->fdb_list);
110112
}
111113
return nh;
112114
}
@@ -227,6 +229,9 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
227229
if (nla_put_u32(skb, NHA_ID, nh->id))
228230
goto nla_put_failure;
229231

232+
if (nh->is_fdb_nh && nla_put_flag(skb, NHA_FDB))
233+
goto nla_put_failure;
234+
230235
if (nh->is_group) {
231236
struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
232237

@@ -241,7 +246,7 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
241246
if (nla_put_flag(skb, NHA_BLACKHOLE))
242247
goto nla_put_failure;
243248
goto out;
244-
} else {
249+
} else if (!nh->is_fdb_nh) {
245250
const struct net_device *dev;
246251

247252
dev = nhi->fib_nhc.nhc_dev;
@@ -387,12 +392,35 @@ static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
387392
return true;
388393
}
389394

395+
static int nh_check_attr_fdb_group(struct nexthop *nh, u8 *nh_family,
396+
struct netlink_ext_ack *extack)
397+
{
398+
struct nh_info *nhi;
399+
400+
if (!nh->is_fdb_nh) {
401+
NL_SET_ERR_MSG(extack, "FDB nexthop group can only have fdb nexthops");
402+
return -EINVAL;
403+
}
404+
405+
nhi = rtnl_dereference(nh->nh_info);
406+
if (*nh_family == AF_UNSPEC) {
407+
*nh_family = nhi->family;
408+
} else if (*nh_family != nhi->family) {
409+
NL_SET_ERR_MSG(extack, "FDB nexthop group cannot have mixed family nexthops");
410+
return -EINVAL;
411+
}
412+
413+
return 0;
414+
}
415+
390416
static int nh_check_attr_group(struct net *net, struct nlattr *tb[],
391417
struct netlink_ext_ack *extack)
392418
{
393419
unsigned int len = nla_len(tb[NHA_GROUP]);
420+
u8 nh_family = AF_UNSPEC;
394421
struct nexthop_grp *nhg;
395422
unsigned int i, j;
423+
u8 nhg_fdb = 0;
396424

397425
if (len & (sizeof(struct nexthop_grp) - 1)) {
398426
NL_SET_ERR_MSG(extack,
@@ -421,6 +449,8 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[],
421449
}
422450
}
423451

452+
if (tb[NHA_FDB])
453+
nhg_fdb = 1;
424454
nhg = nla_data(tb[NHA_GROUP]);
425455
for (i = 0; i < len; ++i) {
426456
struct nexthop *nh;
@@ -432,11 +462,20 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[],
432462
}
433463
if (!valid_group_nh(nh, len, extack))
434464
return -EINVAL;
465+
466+
if (nhg_fdb && nh_check_attr_fdb_group(nh, &nh_family, extack))
467+
return -EINVAL;
468+
469+
if (!nhg_fdb && nh->is_fdb_nh) {
470+
NL_SET_ERR_MSG(extack, "Non FDB nexthop group cannot have fdb nexthops");
471+
return -EINVAL;
472+
}
435473
}
436474
for (i = NHA_GROUP + 1; i < __NHA_MAX; ++i) {
437475
if (!tb[i])
438476
continue;
439-
477+
if (tb[NHA_FDB])
478+
continue;
440479
NL_SET_ERR_MSG(extack,
441480
"No other attributes can be set in nexthop groups");
442481
return -EINVAL;
@@ -495,6 +534,9 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
495534
if (hash > atomic_read(&nhge->upper_bound))
496535
continue;
497536

537+
if (nhge->nh->is_fdb_nh)
538+
return nhge->nh;
539+
498540
/* nexthops always check if it is good and does
499541
* not rely on a sysctl for this behavior
500542
*/
@@ -564,6 +606,11 @@ int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
564606
{
565607
struct nh_info *nhi;
566608

609+
if (nh->is_fdb_nh) {
610+
NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
611+
return -EINVAL;
612+
}
613+
567614
/* fib6_src is unique to a fib6_info and limits the ability to cache
568615
* routes in fib6_nh within a nexthop that is potentially shared
569616
* across multiple fib entries. If the config wants to use source
@@ -640,6 +687,12 @@ int fib_check_nexthop(struct nexthop *nh, u8 scope,
640687
{
641688
int err = 0;
642689

690+
if (nh->is_fdb_nh) {
691+
NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
692+
err = -EINVAL;
693+
goto out;
694+
}
695+
643696
if (nh->is_group) {
644697
struct nh_group *nhg;
645698

@@ -1125,6 +1178,9 @@ static struct nexthop *nexthop_create_group(struct net *net,
11251178
nh_group_rebalance(nhg);
11261179
}
11271180

1181+
if (cfg->nh_fdb)
1182+
nh->is_fdb_nh = 1;
1183+
11281184
rcu_assign_pointer(nh->nh_grp, nhg);
11291185

11301186
return nh;
@@ -1152,7 +1208,7 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh,
11521208
.fc_encap = cfg->nh_encap,
11531209
.fc_encap_type = cfg->nh_encap_type,
11541210
};
1155-
u32 tb_id = l3mdev_fib_table(cfg->dev);
1211+
u32 tb_id = (cfg->dev ? l3mdev_fib_table(cfg->dev) : RT_TABLE_MAIN);
11561212
int err;
11571213

11581214
err = fib_nh_init(net, fib_nh, &fib_cfg, 1, extack);
@@ -1161,6 +1217,9 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh,
11611217
goto out;
11621218
}
11631219

1220+
if (nh->is_fdb_nh)
1221+
goto out;
1222+
11641223
/* sets nh_dev if successful */
11651224
err = fib_check_nh(net, fib_nh, tb_id, 0, extack);
11661225
if (!err) {
@@ -1186,6 +1245,7 @@ static int nh_create_ipv6(struct net *net, struct nexthop *nh,
11861245
.fc_flags = cfg->nh_flags,
11871246
.fc_encap = cfg->nh_encap,
11881247
.fc_encap_type = cfg->nh_encap_type,
1248+
.fc_is_fdb = cfg->nh_fdb,
11891249
};
11901250
int err;
11911251

@@ -1227,6 +1287,9 @@ static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg,
12271287
nhi->family = cfg->nh_family;
12281288
nhi->fib_nhc.nhc_scope = RT_SCOPE_LINK;
12291289

1290+
if (cfg->nh_fdb)
1291+
nh->is_fdb_nh = 1;
1292+
12301293
if (cfg->nh_blackhole) {
12311294
nhi->reject_nh = 1;
12321295
cfg->nh_ifindex = net->loopback_dev->ifindex;
@@ -1248,7 +1311,8 @@ static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg,
12481311
}
12491312

12501313
/* add the entry to the device based hash */
1251-
nexthop_devhash_add(net, nhi);
1314+
if (!nh->is_fdb_nh)
1315+
nexthop_devhash_add(net, nhi);
12521316

12531317
rcu_assign_pointer(nh->nh_info, nhi);
12541318

@@ -1352,6 +1416,19 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
13521416
if (tb[NHA_ID])
13531417
cfg->nh_id = nla_get_u32(tb[NHA_ID]);
13541418

1419+
if (tb[NHA_FDB]) {
1420+
if (tb[NHA_OIF] || tb[NHA_BLACKHOLE] ||
1421+
tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE]) {
1422+
NL_SET_ERR_MSG(extack, "Fdb attribute can not be used with encap, oif or blackhole");
1423+
goto out;
1424+
}
1425+
if (nhm->nh_flags) {
1426+
NL_SET_ERR_MSG(extack, "Unsupported nexthop flags in ancillary header");
1427+
goto out;
1428+
}
1429+
cfg->nh_fdb = nla_get_flag(tb[NHA_FDB]);
1430+
}
1431+
13551432
if (tb[NHA_GROUP]) {
13561433
if (nhm->nh_family != AF_UNSPEC) {
13571434
NL_SET_ERR_MSG(extack, "Invalid family for group");
@@ -1375,8 +1452,8 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
13751452

13761453
if (tb[NHA_BLACKHOLE]) {
13771454
if (tb[NHA_GATEWAY] || tb[NHA_OIF] ||
1378-
tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE]) {
1379-
NL_SET_ERR_MSG(extack, "Blackhole attribute can not be used with gateway or oif");
1455+
tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE] || tb[NHA_FDB]) {
1456+
NL_SET_ERR_MSG(extack, "Blackhole attribute can not be used with gateway, oif, encap or fdb");
13801457
goto out;
13811458
}
13821459

@@ -1385,26 +1462,28 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
13851462
goto out;
13861463
}
13871464

1388-
if (!tb[NHA_OIF]) {
1389-
NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole nexthops");
1465+
if (!cfg->nh_fdb && !tb[NHA_OIF]) {
1466+
NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole and non-fdb nexthops");
13901467
goto out;
13911468
}
13921469

1393-
cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]);
1394-
if (cfg->nh_ifindex)
1395-
cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex);
1470+
if (!cfg->nh_fdb && tb[NHA_OIF]) {
1471+
cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]);
1472+
if (cfg->nh_ifindex)
1473+
cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex);
13961474

1397-
if (!cfg->dev) {
1398-
NL_SET_ERR_MSG(extack, "Invalid device index");
1399-
goto out;
1400-
} else if (!(cfg->dev->flags & IFF_UP)) {
1401-
NL_SET_ERR_MSG(extack, "Nexthop device is not up");
1402-
err = -ENETDOWN;
1403-
goto out;
1404-
} else if (!netif_carrier_ok(cfg->dev)) {
1405-
NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down");
1406-
err = -ENETDOWN;
1407-
goto out;
1475+
if (!cfg->dev) {
1476+
NL_SET_ERR_MSG(extack, "Invalid device index");
1477+
goto out;
1478+
} else if (!(cfg->dev->flags & IFF_UP)) {
1479+
NL_SET_ERR_MSG(extack, "Nexthop device is not up");
1480+
err = -ENETDOWN;
1481+
goto out;
1482+
} else if (!netif_carrier_ok(cfg->dev)) {
1483+
NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down");
1484+
err = -ENETDOWN;
1485+
goto out;
1486+
}
14081487
}
14091488

14101489
err = -EINVAL;
@@ -1633,7 +1712,7 @@ static bool nh_dump_filtered(struct nexthop *nh, int dev_idx, int master_idx,
16331712

16341713
static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx,
16351714
int *master_idx, bool *group_filter,
1636-
struct netlink_callback *cb)
1715+
bool *fdb_filter, struct netlink_callback *cb)
16371716
{
16381717
struct netlink_ext_ack *extack = cb->extack;
16391718
struct nlattr *tb[NHA_MAX + 1];
@@ -1670,6 +1749,9 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx,
16701749
case NHA_GROUPS:
16711750
*group_filter = true;
16721751
break;
1752+
case NHA_FDB:
1753+
*fdb_filter = true;
1754+
break;
16731755
default:
16741756
NL_SET_ERR_MSG(extack, "Unsupported attribute in dump request");
16751757
return -EINVAL;
@@ -1688,17 +1770,17 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx,
16881770
/* rtnl */
16891771
static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
16901772
{
1773+
bool group_filter = false, fdb_filter = false;
16911774
struct nhmsg *nhm = nlmsg_data(cb->nlh);
16921775
int dev_filter_idx = 0, master_idx = 0;
16931776
struct net *net = sock_net(skb->sk);
16941777
struct rb_root *root = &net->nexthop.rb_root;
1695-
bool group_filter = false;
16961778
struct rb_node *node;
16971779
int idx = 0, s_idx;
16981780
int err;
16991781

17001782
err = nh_valid_dump_req(cb->nlh, &dev_filter_idx, &master_idx,
1701-
&group_filter, cb);
1783+
&group_filter, &fdb_filter, cb);
17021784
if (err < 0)
17031785
return err;
17041786

0 commit comments

Comments
 (0)