Skip to content

Commit 4d5c32e

Browse files
committed
Merge branch 'nexthop-group-fixes'
David Ahern says: ==================== nexthops: Fix 2 fundamental flaws with nexthop groups Nik's torture tests have exposed 2 fundamental mistakes with the initial nexthop code for groups. First, the nexthops entries and num_nh in the nh_grp struct should not be modified once the struct is set under rcu. Doing so has major affects on the datapath seeing valid nexthop entries. Second, the helpers in the header file were convenient for not repeating code, but they cause datapath walks to potentially see 2 different group structs after an rcu replace, disrupting a walk of the path objects. This second problem applies solely to IPv4 as I re-used too much of the existing code in walking legs of a multipath route. Patches 1 is refactoring change to simplify the overhead of reviewing and understanding the change in patch 2 which fixes the update of nexthop groups when a compnent leg is removed. Patches 3-5 address the second problem. Patch 3 inlines the multipath check such that the mpath lookup and subsequent calls all use the same nh_grp struct. Patches 4 and 5 fix datapath uses of fib_info_num_path with iterative calls to fib_info_nhc. fib_info_num_path can be used in control plane path in a 'for loop' with subsequent fib_info_nhc calls to get each leg since the nh_grp struct is only changed while holding the rtnl; the combination can not be used in the data plane with external nexthops as it involves repeated dereferences of nh_grp struct which can change between calls. Similarly, nexthop_is_multipath can be used for branching decisions in the datapath since the nexthop type can not be changed (a group can not be converted to standalone and vice versa). Patch set developed in coordination with Nikolay Aleksandrov. He did a lot of work creating a good reproducer, discussing options to fix it and testing iterations. I have adapted Nik's commands into additional tests in the nexthops selftest script which I will send against -next. v2 - fixed whitespace errors ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
2 parents 963bdfc + 1fd1c76 commit 4d5c32e

File tree

5 files changed

+205
-79
lines changed

5 files changed

+205
-79
lines changed

include/net/ip_fib.h

+12
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,16 @@ static inline int fib_num_tclassid_users(struct net *net)
447447
#endif
448448
int fib_unmerge(struct net *net);
449449

450+
static inline bool nhc_l3mdev_matches_dev(const struct fib_nh_common *nhc,
451+
const struct net_device *dev)
452+
{
453+
if (nhc->nhc_dev == dev ||
454+
l3mdev_master_ifindex_rcu(nhc->nhc_dev) == dev->ifindex)
455+
return true;
456+
457+
return false;
458+
}
459+
450460
/* Exported by fib_semantics.c */
451461
int ip_fib_check_default(__be32 gw, struct net_device *dev);
452462
int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force);
@@ -479,6 +489,8 @@ void fib_nh_common_release(struct fib_nh_common *nhc);
479489
void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri);
480490
void fib_trie_init(void);
481491
struct fib_table *fib_trie_table(u32 id, struct fib_table *alias);
492+
bool fib_lookup_good_nhc(const struct fib_nh_common *nhc, int fib_flags,
493+
const struct flowi4 *flp);
482494

483495
static inline void fib_combine_itag(u32 *itag, const struct fib_result *res)
484496
{

include/net/nexthop.h

+84-16
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ struct nh_grp_entry {
7070
};
7171

7272
struct nh_group {
73+
struct nh_group *spare; /* spare group for removals */
7374
u16 num_nh;
7475
bool mpath;
7576
bool has_v4;
@@ -136,21 +137,20 @@ static inline unsigned int nexthop_num_path(const struct nexthop *nh)
136137
{
137138
unsigned int rc = 1;
138139

139-
if (nexthop_is_multipath(nh)) {
140+
if (nh->is_group) {
140141
struct nh_group *nh_grp;
141142

142143
nh_grp = rcu_dereference_rtnl(nh->nh_grp);
143-
rc = nh_grp->num_nh;
144+
if (nh_grp->mpath)
145+
rc = nh_grp->num_nh;
144146
}
145147

146148
return rc;
147149
}
148150

149151
static inline
150-
struct nexthop *nexthop_mpath_select(const struct nexthop *nh, int nhsel)
152+
struct nexthop *nexthop_mpath_select(const struct nh_group *nhg, int nhsel)
151153
{
152-
const struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
153-
154154
/* for_nexthops macros in fib_semantics.c grabs a pointer to
155155
* the nexthop before checking nhsel
156156
*/
@@ -185,12 +185,14 @@ static inline bool nexthop_is_blackhole(const struct nexthop *nh)
185185
{
186186
const struct nh_info *nhi;
187187

188-
if (nexthop_is_multipath(nh)) {
189-
if (nexthop_num_path(nh) > 1)
190-
return false;
191-
nh = nexthop_mpath_select(nh, 0);
192-
if (!nh)
188+
if (nh->is_group) {
189+
struct nh_group *nh_grp;
190+
191+
nh_grp = rcu_dereference_rtnl(nh->nh_grp);
192+
if (nh_grp->num_nh > 1)
193193
return false;
194+
195+
nh = nh_grp->nh_entries[0].nh;
194196
}
195197

196198
nhi = rcu_dereference_rtnl(nh->nh_info);
@@ -216,16 +218,79 @@ struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel)
216218
BUILD_BUG_ON(offsetof(struct fib_nh, nh_common) != 0);
217219
BUILD_BUG_ON(offsetof(struct fib6_nh, nh_common) != 0);
218220

219-
if (nexthop_is_multipath(nh)) {
220-
nh = nexthop_mpath_select(nh, nhsel);
221-
if (!nh)
222-
return NULL;
221+
if (nh->is_group) {
222+
struct nh_group *nh_grp;
223+
224+
nh_grp = rcu_dereference_rtnl(nh->nh_grp);
225+
if (nh_grp->mpath) {
226+
nh = nexthop_mpath_select(nh_grp, nhsel);
227+
if (!nh)
228+
return NULL;
229+
}
223230
}
224231

225232
nhi = rcu_dereference_rtnl(nh->nh_info);
226233
return &nhi->fib_nhc;
227234
}
228235

236+
/* called from fib_table_lookup with rcu_lock */
237+
static inline
238+
struct fib_nh_common *nexthop_get_nhc_lookup(const struct nexthop *nh,
239+
int fib_flags,
240+
const struct flowi4 *flp,
241+
int *nhsel)
242+
{
243+
struct nh_info *nhi;
244+
245+
if (nh->is_group) {
246+
struct nh_group *nhg = rcu_dereference(nh->nh_grp);
247+
int i;
248+
249+
for (i = 0; i < nhg->num_nh; i++) {
250+
struct nexthop *nhe = nhg->nh_entries[i].nh;
251+
252+
nhi = rcu_dereference(nhe->nh_info);
253+
if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) {
254+
*nhsel = i;
255+
return &nhi->fib_nhc;
256+
}
257+
}
258+
} else {
259+
nhi = rcu_dereference(nh->nh_info);
260+
if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) {
261+
*nhsel = 0;
262+
return &nhi->fib_nhc;
263+
}
264+
}
265+
266+
return NULL;
267+
}
268+
269+
static inline bool nexthop_uses_dev(const struct nexthop *nh,
270+
const struct net_device *dev)
271+
{
272+
struct nh_info *nhi;
273+
274+
if (nh->is_group) {
275+
struct nh_group *nhg = rcu_dereference(nh->nh_grp);
276+
int i;
277+
278+
for (i = 0; i < nhg->num_nh; i++) {
279+
struct nexthop *nhe = nhg->nh_entries[i].nh;
280+
281+
nhi = rcu_dereference(nhe->nh_info);
282+
if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev))
283+
return true;
284+
}
285+
} else {
286+
nhi = rcu_dereference(nh->nh_info);
287+
if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev))
288+
return true;
289+
}
290+
291+
return false;
292+
}
293+
229294
static inline unsigned int fib_info_num_path(const struct fib_info *fi)
230295
{
231296
if (unlikely(fi->nh))
@@ -263,8 +328,11 @@ static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh)
263328
{
264329
struct nh_info *nhi;
265330

266-
if (nexthop_is_multipath(nh)) {
267-
nh = nexthop_mpath_select(nh, 0);
331+
if (nh->is_group) {
332+
struct nh_group *nh_grp;
333+
334+
nh_grp = rcu_dereference_rtnl(nh->nh_grp);
335+
nh = nexthop_mpath_select(nh_grp, 0);
268336
if (!nh)
269337
return NULL;
270338
}

net/ipv4/fib_frontend.c

+10-9
Original file line numberDiff line numberDiff line change
@@ -309,17 +309,18 @@ bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev)
309309
{
310310
bool dev_match = false;
311311
#ifdef CONFIG_IP_ROUTE_MULTIPATH
312-
int ret;
312+
if (unlikely(fi->nh)) {
313+
dev_match = nexthop_uses_dev(fi->nh, dev);
314+
} else {
315+
int ret;
313316

314-
for (ret = 0; ret < fib_info_num_path(fi); ret++) {
315-
const struct fib_nh_common *nhc = fib_info_nhc(fi, ret);
317+
for (ret = 0; ret < fib_info_num_path(fi); ret++) {
318+
const struct fib_nh_common *nhc = fib_info_nhc(fi, ret);
316319

317-
if (nhc->nhc_dev == dev) {
318-
dev_match = true;
319-
break;
320-
} else if (l3mdev_master_ifindex_rcu(nhc->nhc_dev) == dev->ifindex) {
321-
dev_match = true;
322-
break;
320+
if (nhc_l3mdev_matches_dev(nhc, dev)) {
321+
dev_match = true;
322+
break;
323+
}
323324
}
324325
}
325326
#else

net/ipv4/fib_trie.c

+36-15
Original file line numberDiff line numberDiff line change
@@ -1371,6 +1371,26 @@ static inline t_key prefix_mismatch(t_key key, struct key_vector *n)
13711371
return (key ^ prefix) & (prefix | -prefix);
13721372
}
13731373

1374+
bool fib_lookup_good_nhc(const struct fib_nh_common *nhc, int fib_flags,
1375+
const struct flowi4 *flp)
1376+
{
1377+
if (nhc->nhc_flags & RTNH_F_DEAD)
1378+
return false;
1379+
1380+
if (ip_ignore_linkdown(nhc->nhc_dev) &&
1381+
nhc->nhc_flags & RTNH_F_LINKDOWN &&
1382+
!(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
1383+
return false;
1384+
1385+
if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) {
1386+
if (flp->flowi4_oif &&
1387+
flp->flowi4_oif != nhc->nhc_oif)
1388+
return false;
1389+
}
1390+
1391+
return true;
1392+
}
1393+
13741394
/* should be called with rcu_read_lock */
13751395
int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
13761396
struct fib_result *res, int fib_flags)
@@ -1503,6 +1523,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
15031523
/* Step 3: Process the leaf, if that fails fall back to backtracing */
15041524
hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
15051525
struct fib_info *fi = fa->fa_info;
1526+
struct fib_nh_common *nhc;
15061527
int nhsel, err;
15071528

15081529
if ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen < KEYLENGTH)) {
@@ -1528,26 +1549,25 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
15281549
if (fi->fib_flags & RTNH_F_DEAD)
15291550
continue;
15301551

1531-
if (unlikely(fi->nh && nexthop_is_blackhole(fi->nh))) {
1532-
err = fib_props[RTN_BLACKHOLE].error;
1533-
goto out_reject;
1552+
if (unlikely(fi->nh)) {
1553+
if (nexthop_is_blackhole(fi->nh)) {
1554+
err = fib_props[RTN_BLACKHOLE].error;
1555+
goto out_reject;
1556+
}
1557+
1558+
nhc = nexthop_get_nhc_lookup(fi->nh, fib_flags, flp,
1559+
&nhsel);
1560+
if (nhc)
1561+
goto set_result;
1562+
goto miss;
15341563
}
15351564

15361565
for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) {
1537-
struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel);
1566+
nhc = fib_info_nhc(fi, nhsel);
15381567

1539-
if (nhc->nhc_flags & RTNH_F_DEAD)
1568+
if (!fib_lookup_good_nhc(nhc, fib_flags, flp))
15401569
continue;
1541-
if (ip_ignore_linkdown(nhc->nhc_dev) &&
1542-
nhc->nhc_flags & RTNH_F_LINKDOWN &&
1543-
!(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
1544-
continue;
1545-
if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) {
1546-
if (flp->flowi4_oif &&
1547-
flp->flowi4_oif != nhc->nhc_oif)
1548-
continue;
1549-
}
1550-
1570+
set_result:
15511571
if (!(fib_flags & FIB_LOOKUP_NOREF))
15521572
refcount_inc(&fi->fib_clntref);
15531573

@@ -1568,6 +1588,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
15681588
return err;
15691589
}
15701590
}
1591+
miss:
15711592
#ifdef CONFIG_IP_FIB_TRIE_STATS
15721593
this_cpu_inc(stats->semantic_match_miss);
15731594
#endif

0 commit comments

Comments
 (0)