Skip to content
This repository was archived by the owner on Dec 20, 2023. It is now read-only.

Commit 9a76db0

Browse files
author
Nathan Taylor
committed
fib_select_multipath() to choose nexthop via naive but lockless round robin
1 parent 0f02bdb commit 9a76db0

File tree

3 files changed

+32
-61
lines changed

3 files changed

+32
-61
lines changed

include/net/ip_fib.h

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,10 @@ struct fib_nh {
7070
struct net_device *nh_dev;
7171
struct hlist_node nh_hash;
7272
struct fib_info *nh_parent;
73-
unsigned int nh_flags;
73+
unsigned long nh_flags;
7474
unsigned char nh_scope;
7575
#ifdef CONFIG_IP_ROUTE_MULTIPATH
7676
int nh_weight;
77-
int nh_power;
7877
#endif
7978
#ifdef CONFIG_IP_ROUTE_CLASSID
8079
__u32 nh_tclassid;
@@ -111,9 +110,6 @@ struct fib_info {
111110
#define fib_rtt fib_metrics[RTAX_RTT-1]
112111
#define fib_advmss fib_metrics[RTAX_ADVMSS-1]
113112
int fib_nhs;
114-
#ifdef CONFIG_IP_ROUTE_MULTIPATH
115-
int fib_power;
116-
#endif
117113
struct rcu_head rcu;
118114
struct fib_nh fib_nh[0];
119115
#define fib_dev fib_nh[0].nh_dev

include/uapi/linux/rtnetlink.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -323,9 +323,14 @@ struct rtnexthop {
323323

324324
/* rtnh_flags */
325325

326-
#define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */
327-
#define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */
328-
#define RTNH_F_ONLINK 4 /* Gateway is forced on link */
326+
#define RTNH_F_DEAD_OFFSET 0
327+
#define RTNH_F_DEAD (1 << RTNH_F_DEAD_OFFSET) /* Nexthop is dead (used by multipath) */
328+
329+
#define RTNH_F_PERVASIVE_OFFSET 1
330+
#define RTNH_F_PERVASIVE (1 << RTNH_F_PERVASIVE_OFFSET) /* Do recursive gateway lookup */
331+
332+
#define RTNH_F_ONLINK_OFFSET 2
333+
#define RTNH_F_ONLINK (1 << RTNH_F_ONLINK_OFFSET) /* Gateway is forced on link */
329334

330335
/* Macros to handle hexthops */
331336

net/ipv4/fib_semantics.c

Lines changed: 23 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include <linux/skbuff.h>
3333
#include <linux/init.h>
3434
#include <linux/slab.h>
35+
#include <linux/percpu.h>
3536

3637
#include <net/arp.h>
3738
#include <net/ip.h>
@@ -57,7 +58,7 @@ static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
5758

5859
#ifdef CONFIG_IP_ROUTE_MULTIPATH
5960

60-
static DEFINE_SPINLOCK(fib_multipath_lock);
61+
DEFINE_PER_CPU(int, fib_multipath_counter) = -1;
6162

6263
#define for_nexthops(fi) { \
6364
int nhsel; const struct fib_nh *nh; \
@@ -258,9 +259,7 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
258259
if (nh->nh_oif != onh->nh_oif ||
259260
nh->nh_gw != onh->nh_gw ||
260261
nh->nh_scope != onh->nh_scope ||
261-
#ifdef CONFIG_IP_ROUTE_MULTIPATH
262262
nh->nh_weight != onh->nh_weight ||
263-
#endif
264263
#ifdef CONFIG_IP_ROUTE_CLASSID
265264
nh->nh_tclassid != onh->nh_tclassid ||
266265
#endif
@@ -1137,12 +1136,6 @@ int fib_sync_down_dev(struct net_device *dev, int force)
11371136
else if (nexthop_nh->nh_dev == dev &&
11381137
nexthop_nh->nh_scope != scope) {
11391138
nexthop_nh->nh_flags |= RTNH_F_DEAD;
1140-
#ifdef CONFIG_IP_ROUTE_MULTIPATH
1141-
spin_lock_bh(&fib_multipath_lock);
1142-
fi->fib_power -= nexthop_nh->nh_power;
1143-
nexthop_nh->nh_power = 0;
1144-
spin_unlock_bh(&fib_multipath_lock);
1145-
#endif
11461139
dead++;
11471140
}
11481141
#ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -1261,10 +1254,7 @@ int fib_sync_up(struct net_device *dev)
12611254
!__in_dev_get_rtnl(dev))
12621255
continue;
12631256
alive++;
1264-
spin_lock_bh(&fib_multipath_lock);
1265-
nexthop_nh->nh_power = 0;
1266-
nexthop_nh->nh_flags &= ~RTNH_F_DEAD;
1267-
spin_unlock_bh(&fib_multipath_lock);
1257+
clear_bit(RTNH_F_DEAD_OFFSET, &nexthop_nh->nh_flags);
12681258
} endfor_nexthops(fi)
12691259

12701260
if (alive > 0) {
@@ -1277,55 +1267,35 @@ int fib_sync_up(struct net_device *dev)
12771267
}
12781268

12791269
/*
1280-
* The algorithm is suboptimal, but it provides really
1281-
* fair weighted route distribution.
1270+
* The algorithm is suboptimal, and it doesn't provide really
1271+
* fair weighted route distribution, but it's lock-free!
12821272
*/
12831273
void fib_select_multipath(struct fib_result *res)
12841274
{
12851275
struct fib_info *fi = res->fi;
1286-
int w;
1276+
struct fib_nh *nexthop_nh;
1277+
int nhsel, attempts;
12871278

1288-
spin_lock_bh(&fib_multipath_lock);
1289-
if (fi->fib_power <= 0) {
1290-
int power = 0;
1291-
change_nexthops(fi) {
1292-
if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) {
1293-
power += nexthop_nh->nh_weight;
1294-
nexthop_nh->nh_power = nexthop_nh->nh_weight;
1295-
}
1296-
} endfor_nexthops(fi);
1297-
fi->fib_power = power;
1298-
if (power <= 0) {
1299-
spin_unlock_bh(&fib_multipath_lock);
1300-
/* Race condition: route has just become dead. */
1301-
res->nh_sel = 0;
1302-
return;
1303-
}
1279+
/* Initialise the counter with the CPU ID if not already set. */
1280+
if (this_cpu_read(fib_multipath_counter) == -1) {
1281+
this_cpu_write(fib_multipath_counter, get_cpu());
13041282
}
13051283

1284+
/* Round-robin multipaths, choosing the next live one or
1285+
* an arbitrary dead one if none are live. */
1286+
attempts = fi->fib_nhs;
1287+
nhsel = 0;
1288+
while (--attempts >= 0) {
1289+
this_cpu_inc(fib_multipath_counter);
1290+
nhsel = this_cpu_read(fib_multipath_counter) % fi->fib_nhs;
13061291

1307-
/* w should be random number [0..fi->fib_power-1],
1308-
* it is pretty bad approximation.
1309-
*/
1310-
1311-
w = jiffies % fi->fib_power;
1312-
1313-
change_nexthops(fi) {
1314-
if (!(nexthop_nh->nh_flags & RTNH_F_DEAD) &&
1315-
nexthop_nh->nh_power) {
1316-
w -= nexthop_nh->nh_power;
1317-
if (w <= 0) {
1318-
nexthop_nh->nh_power--;
1319-
fi->fib_power--;
1320-
res->nh_sel = nhsel;
1321-
spin_unlock_bh(&fib_multipath_lock);
1322-
return;
1323-
}
1292+
nexthop_nh = &fi->fib_nh[nhsel];
1293+
if (!nexthop_nh->nh_flags & RTNH_F_DEAD) {
1294+
break;
13241295
}
1325-
} endfor_nexthops(fi);
1296+
}
13261297

1327-
/* Race condition: route has just become dead. */
1328-
res->nh_sel = 0;
1329-
spin_unlock_bh(&fib_multipath_lock);
1298+
/* Race condition: route may have just become dead. */
1299+
res->nh_sel = nhsel;
13301300
}
13311301
#endif

0 commit comments

Comments
 (0)