Skip to content

Commit 5eb902b

Browse files
ThinkerYzu1davem330
authored andcommitted
net/ipv6: Remove expired routes with a separated list of routes.
FIB6 GC walks trees of fib6_tables to remove expired routes. Walking a tree can be expensive if the number of routes in a table is big, even if most of them are permanent. Checking routes in a separated list of routes having expiration will avoid this potential issue. Reviewed-by: David Ahern <dsahern@kernel.org> Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 60df43d commit 5eb902b

File tree

5 files changed

+154
-16
lines changed

5 files changed

+154
-16
lines changed

include/net/ip6_fib.h

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,9 @@ struct fib6_info {
173173

174174
refcount_t fib6_ref;
175175
unsigned long expires;
176+
177+
struct hlist_node gc_link;
178+
176179
struct dst_metrics *fib6_metrics;
177180
#define fib6_pmtu fib6_metrics->metrics[RTAX_MTU-1]
178181

@@ -241,12 +244,18 @@ static inline bool fib6_requires_src(const struct fib6_info *rt)
241244
return rt->fib6_src.plen > 0;
242245
}
243246

247+
/* The callers should hold f6i->fib6_table->tb6_lock if a route has ever
248+
* been added to a table before.
249+
*/
244250
static inline void fib6_clean_expires(struct fib6_info *f6i)
245251
{
246252
f6i->fib6_flags &= ~RTF_EXPIRES;
247253
f6i->expires = 0;
248254
}
249255

256+
/* The callers should hold f6i->fib6_table->tb6_lock if a route has ever
257+
* been added to a table before.
258+
*/
250259
static inline void fib6_set_expires(struct fib6_info *f6i,
251260
unsigned long expires)
252261
{
@@ -327,8 +336,10 @@ static inline bool fib6_info_hold_safe(struct fib6_info *f6i)
327336

328337
static inline void fib6_info_release(struct fib6_info *f6i)
329338
{
330-
if (f6i && refcount_dec_and_test(&f6i->fib6_ref))
339+
if (f6i && refcount_dec_and_test(&f6i->fib6_ref)) {
340+
DEBUG_NET_WARN_ON_ONCE(!hlist_unhashed(&f6i->gc_link));
331341
call_rcu(&f6i->rcu, fib6_info_destroy_rcu);
342+
}
332343
}
333344

334345
enum fib6_walk_state {
@@ -382,6 +393,7 @@ struct fib6_table {
382393
struct inet_peer_base tb6_peers;
383394
unsigned int flags;
384395
unsigned int fib_seq;
396+
struct hlist_head tb6_gc_hlist; /* GC candidates */
385397
#define RT6_TABLE_HAS_DFLT_ROUTER BIT(0)
386398
};
387399

@@ -498,6 +510,38 @@ void fib6_gc_cleanup(void);
498510

499511
int fib6_init(void);
500512

513+
/* Add the route to the gc list if it is not already there
514+
*
515+
* The callers should hold f6i->fib6_table->tb6_lock.
516+
*/
517+
static inline void fib6_add_gc_list(struct fib6_info *f6i)
518+
{
519+
/* If fib6_node is null, the f6i is not in (or removed from) the
520+
* table.
521+
*
522+
* There is a gap between finding the f6i from the table and
523+
* calling this function without the protection of the tb6_lock.
524+
* This check makes sure the f6i is not added to the gc list when
525+
* it is not on the table.
526+
*/
527+
if (!rcu_dereference_protected(f6i->fib6_node,
528+
lockdep_is_held(&f6i->fib6_table->tb6_lock)))
529+
return;
530+
531+
if (hlist_unhashed(&f6i->gc_link))
532+
hlist_add_head(&f6i->gc_link, &f6i->fib6_table->tb6_gc_hlist);
533+
}
534+
535+
/* Remove the route from the gc list if it is on the list.
536+
*
537+
* The callers should hold f6i->fib6_table->tb6_lock.
538+
*/
539+
static inline void fib6_remove_gc_list(struct fib6_info *f6i)
540+
{
541+
if (!hlist_unhashed(&f6i->gc_link))
542+
hlist_del_init(&f6i->gc_link);
543+
}
544+
501545
struct ipv6_route_iter {
502546
struct seq_net_private p;
503547
struct fib6_walker w;

net/ipv6/addrconf.c

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1255,6 +1255,7 @@ static void
12551255
cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires,
12561256
bool del_rt, bool del_peer)
12571257
{
1258+
struct fib6_table *table;
12581259
struct fib6_info *f6i;
12591260

12601261
f6i = addrconf_get_prefix_route(del_peer ? &ifp->peer_addr : &ifp->addr,
@@ -1264,8 +1265,15 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires,
12641265
if (del_rt)
12651266
ip6_del_rt(dev_net(ifp->idev->dev), f6i, false);
12661267
else {
1267-
if (!(f6i->fib6_flags & RTF_EXPIRES))
1268+
if (!(f6i->fib6_flags & RTF_EXPIRES)) {
1269+
table = f6i->fib6_table;
1270+
spin_lock_bh(&table->tb6_lock);
1271+
12681272
fib6_set_expires(f6i, expires);
1273+
fib6_add_gc_list(f6i);
1274+
1275+
spin_unlock_bh(&table->tb6_lock);
1276+
}
12691277
fib6_info_release(f6i);
12701278
}
12711279
}
@@ -2706,6 +2714,7 @@ EXPORT_SYMBOL_GPL(addrconf_prefix_rcv_add_addr);
27062714
void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
27072715
{
27082716
struct prefix_info *pinfo;
2717+
struct fib6_table *table;
27092718
__u32 valid_lft;
27102719
__u32 prefered_lft;
27112720
int addr_type, err;
@@ -2782,11 +2791,20 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
27822791
if (valid_lft == 0) {
27832792
ip6_del_rt(net, rt, false);
27842793
rt = NULL;
2785-
} else if (addrconf_finite_timeout(rt_expires)) {
2786-
/* not infinity */
2787-
fib6_set_expires(rt, jiffies + rt_expires);
27882794
} else {
2789-
fib6_clean_expires(rt);
2795+
table = rt->fib6_table;
2796+
spin_lock_bh(&table->tb6_lock);
2797+
2798+
if (addrconf_finite_timeout(rt_expires)) {
2799+
/* not infinity */
2800+
fib6_set_expires(rt, jiffies + rt_expires);
2801+
fib6_add_gc_list(rt);
2802+
} else {
2803+
fib6_clean_expires(rt);
2804+
fib6_remove_gc_list(rt);
2805+
}
2806+
2807+
spin_unlock_bh(&table->tb6_lock);
27902808
}
27912809
} else if (valid_lft) {
27922810
clock_t expires = 0;
@@ -4741,6 +4759,7 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
47414759
unsigned long expires, u32 flags,
47424760
bool modify_peer)
47434761
{
4762+
struct fib6_table *table;
47444763
struct fib6_info *f6i;
47454764
u32 prio;
47464765

@@ -4761,10 +4780,18 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
47614780
ifp->rt_priority, ifp->idev->dev,
47624781
expires, flags, GFP_KERNEL);
47634782
} else {
4764-
if (!expires)
4783+
table = f6i->fib6_table;
4784+
spin_lock_bh(&table->tb6_lock);
4785+
4786+
if (!expires) {
47654787
fib6_clean_expires(f6i);
4766-
else
4788+
fib6_remove_gc_list(f6i);
4789+
} else {
47674790
fib6_set_expires(f6i, expires);
4791+
fib6_add_gc_list(f6i);
4792+
}
4793+
4794+
spin_unlock_bh(&table->tb6_lock);
47684795

47694796
fib6_info_release(f6i);
47704797
}

net/ipv6/ip6_fib.c

Lines changed: 55 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,8 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh)
160160
INIT_LIST_HEAD(&f6i->fib6_siblings);
161161
refcount_set(&f6i->fib6_ref, 1);
162162

163+
INIT_HLIST_NODE(&f6i->gc_link);
164+
163165
return f6i;
164166
}
165167

@@ -246,6 +248,7 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
246248
net->ipv6.fib6_null_entry);
247249
table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
248250
inet_peer_base_init(&table->tb6_peers);
251+
INIT_HLIST_HEAD(&table->tb6_gc_hlist);
249252
}
250253

251254
return table;
@@ -1055,6 +1058,9 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
10551058
lockdep_is_held(&table->tb6_lock));
10561059
}
10571060
}
1061+
1062+
fib6_clean_expires(rt);
1063+
fib6_remove_gc_list(rt);
10581064
}
10591065

10601066
/*
@@ -1115,10 +1121,13 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
11151121
rt->fib6_nsiblings = 0;
11161122
if (!(iter->fib6_flags & RTF_EXPIRES))
11171123
return -EEXIST;
1118-
if (!(rt->fib6_flags & RTF_EXPIRES))
1124+
if (!(rt->fib6_flags & RTF_EXPIRES)) {
11191125
fib6_clean_expires(iter);
1120-
else
1126+
fib6_remove_gc_list(iter);
1127+
} else {
11211128
fib6_set_expires(iter, rt->expires);
1129+
fib6_add_gc_list(iter);
1130+
}
11221131

11231132
if (rt->fib6_pmtu)
11241133
fib6_metric_set(iter, RTAX_MTU,
@@ -1477,6 +1486,10 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
14771486
if (rt->nh)
14781487
list_add(&rt->nh_list, &rt->nh->f6i_list);
14791488
__fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net));
1489+
1490+
if (rt->fib6_flags & RTF_EXPIRES)
1491+
fib6_add_gc_list(rt);
1492+
14801493
fib6_start_gc(info->nl_net, rt);
14811494
}
14821495

@@ -2280,9 +2293,8 @@ static void fib6_flush_trees(struct net *net)
22802293
* Garbage collection
22812294
*/
22822295

2283-
static int fib6_age(struct fib6_info *rt, void *arg)
2296+
static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args)
22842297
{
2285-
struct fib6_gc_args *gc_args = arg;
22862298
unsigned long now = jiffies;
22872299

22882300
/*
@@ -2307,6 +2319,42 @@ static int fib6_age(struct fib6_info *rt, void *arg)
23072319
return 0;
23082320
}
23092321

2322+
static void fib6_gc_table(struct net *net,
2323+
struct fib6_table *tb6,
2324+
struct fib6_gc_args *gc_args)
2325+
{
2326+
struct fib6_info *rt;
2327+
struct hlist_node *n;
2328+
struct nl_info info = {
2329+
.nl_net = net,
2330+
.skip_notify = false,
2331+
};
2332+
2333+
hlist_for_each_entry_safe(rt, n, &tb6->tb6_gc_hlist, gc_link)
2334+
if (fib6_age(rt, gc_args) == -1)
2335+
fib6_del(rt, &info);
2336+
}
2337+
2338+
static void fib6_gc_all(struct net *net, struct fib6_gc_args *gc_args)
2339+
{
2340+
struct fib6_table *table;
2341+
struct hlist_head *head;
2342+
unsigned int h;
2343+
2344+
rcu_read_lock();
2345+
for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
2346+
head = &net->ipv6.fib_table_hash[h];
2347+
hlist_for_each_entry_rcu(table, head, tb6_hlist) {
2348+
spin_lock_bh(&table->tb6_lock);
2349+
2350+
fib6_gc_table(net, table, gc_args);
2351+
2352+
spin_unlock_bh(&table->tb6_lock);
2353+
}
2354+
}
2355+
rcu_read_unlock();
2356+
}
2357+
23102358
void fib6_run_gc(unsigned long expires, struct net *net, bool force)
23112359
{
23122360
struct fib6_gc_args gc_args;
@@ -2322,7 +2370,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force)
23222370
net->ipv6.sysctl.ip6_rt_gc_interval;
23232371
gc_args.more = 0;
23242372

2325-
fib6_clean_all(net, fib6_age, &gc_args);
2373+
fib6_gc_all(net, &gc_args);
23262374
now = jiffies;
23272375
net->ipv6.ip6_rt_last_gc = now;
23282376

@@ -2382,6 +2430,7 @@ static int __net_init fib6_net_init(struct net *net)
23822430
net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
23832431
RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
23842432
inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers);
2433+
INIT_HLIST_HEAD(&net->ipv6.fib6_main_tbl->tb6_gc_hlist);
23852434

23862435
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
23872436
net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl),
@@ -2394,6 +2443,7 @@ static int __net_init fib6_net_init(struct net *net)
23942443
net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
23952444
RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
23962445
inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers);
2446+
INIT_HLIST_HEAD(&net->ipv6.fib6_local_tbl->tb6_gc_hlist);
23972447
#endif
23982448
fib6_tables_init(net);
23992449

net/ipv6/ndisc.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1237,6 +1237,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
12371237
struct ndisc_options ndopts;
12381238
struct fib6_info *rt = NULL;
12391239
struct inet6_dev *in6_dev;
1240+
struct fib6_table *table;
12401241
u32 defrtr_usr_metric;
12411242
unsigned int pref = 0;
12421243
__u32 old_if_flags;
@@ -1410,8 +1411,15 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
14101411
inet6_rt_notify(RTM_NEWROUTE, rt, &nlinfo, NLM_F_REPLACE);
14111412
}
14121413

1413-
if (rt)
1414+
if (rt) {
1415+
table = rt->fib6_table;
1416+
spin_lock_bh(&table->tb6_lock);
1417+
14141418
fib6_set_expires(rt, jiffies + (HZ * lifetime));
1419+
fib6_add_gc_list(rt);
1420+
1421+
spin_unlock_bh(&table->tb6_lock);
1422+
}
14151423
if (in6_dev->cnf.accept_ra_min_hop_limit < 256 &&
14161424
ra_msg->icmph.icmp6_hop_limit) {
14171425
if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) {

net/ipv6/route.c

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -931,6 +931,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
931931
struct net *net = dev_net(dev);
932932
struct route_info *rinfo = (struct route_info *) opt;
933933
struct in6_addr prefix_buf, *prefix;
934+
struct fib6_table *table;
934935
unsigned int pref;
935936
unsigned long lifetime;
936937
struct fib6_info *rt;
@@ -989,10 +990,18 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
989990
(rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
990991

991992
if (rt) {
992-
if (!addrconf_finite_timeout(lifetime))
993+
table = rt->fib6_table;
994+
spin_lock_bh(&table->tb6_lock);
995+
996+
if (!addrconf_finite_timeout(lifetime)) {
993997
fib6_clean_expires(rt);
994-
else
998+
fib6_remove_gc_list(rt);
999+
} else {
9951000
fib6_set_expires(rt, jiffies + HZ * lifetime);
1001+
fib6_add_gc_list(rt);
1002+
}
1003+
1004+
spin_unlock_bh(&table->tb6_lock);
9961005

9971006
fib6_info_release(rt);
9981007
}

0 commit comments

Comments
 (0)