Skip to content

Commit 0287587

Browse files
Eric Dumazetdavem330
Eric Dumazet
authored andcommitted
net: better IFF_XMIT_DST_RELEASE support
Testing xmit_more support with netperf and connected UDP sockets, I found strange dst refcount false sharing. Current handling of IFF_XMIT_DST_RELEASE is not optimal. Dropping dst in validate_xmit_skb() is certainly too late in case packet was queued by cpu X but dequeued by cpu Y The logical point to take care of drop/force is in __dev_queue_xmit() before even taking qdisc lock. As Julian Anastasov pointed out, need for skb_dst() might come from some packet schedulers or classifiers. This patch adds new helper to cleanly express needs of various drivers or qdiscs/classifiers. Drivers that need skb_dst() in their ndo_start_xmit() should call following helper in their setup instead of the prior : dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; -> netif_keep_dst(dev); Instead of using a single bit, we use two bits, one being eventually rebuilt in bonding/team drivers. The other one, is permanent and blocks IFF_XMIT_DST_RELEASE being rebuilt in bonding/team. Eventually, we could add something smarter later. Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Julian Anastasov <ja@ssi.bg> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent fe971b9 commit 0287587

27 files changed

+54
-39
lines changed

drivers/infiniband/ulp/ipoib/ipoib_main.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -1364,7 +1364,7 @@ void ipoib_setup(struct net_device *dev)
13641364
dev->tx_queue_len = ipoib_sendq_size * 2;
13651365
dev->features = (NETIF_F_VLAN_CHALLENGED |
13661366
NETIF_F_HIGHDMA);
1367-
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1367+
netif_keep_dst(dev);
13681368

13691369
memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN);
13701370

drivers/net/appletalk/ipddp.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ static struct net_device * __init ipddp_init(void)
7474
if (!dev)
7575
return ERR_PTR(-ENOMEM);
7676

77-
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
77+
netif_keep_dst(dev);
7878
strcpy(dev->name, "ipddp%d");
7979

8080
if (version_printed++ == 0)

drivers/net/bonding/bond_main.c

+6-3
Original file line numberDiff line numberDiff line change
@@ -1002,7 +1002,8 @@ static netdev_features_t bond_fix_features(struct net_device *dev,
10021002

10031003
static void bond_compute_features(struct bonding *bond)
10041004
{
1005-
unsigned int flags, dst_release_flag = IFF_XMIT_DST_RELEASE;
1005+
unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE |
1006+
IFF_XMIT_DST_RELEASE_PERM;
10061007
netdev_features_t vlan_features = BOND_VLAN_FEATURES;
10071008
netdev_features_t enc_features = BOND_ENC_FEATURES;
10081009
struct net_device *bond_dev = bond->dev;
@@ -1038,8 +1039,10 @@ static void bond_compute_features(struct bonding *bond)
10381039
bond_dev->gso_max_segs = gso_max_segs;
10391040
netif_set_gso_max_size(bond_dev, gso_max_size);
10401041

1041-
flags = bond_dev->priv_flags & ~IFF_XMIT_DST_RELEASE;
1042-
bond_dev->priv_flags = flags | dst_release_flag;
1042+
bond_dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1043+
if ((bond_dev->priv_flags & IFF_XMIT_DST_RELEASE_PERM) &&
1044+
dst_release_flag == (IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM))
1045+
bond_dev->priv_flags |= IFF_XMIT_DST_RELEASE;
10431046

10441047
netdev_change_features(bond_dev);
10451048
}

drivers/net/eql.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ static void __init eql_setup(struct net_device *dev)
199199

200200
dev->type = ARPHRD_SLIP;
201201
dev->tx_queue_len = 5; /* Hands them off fast */
202-
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
202+
netif_keep_dst(dev);
203203
}
204204

205205
static int eql_open(struct net_device *dev)

drivers/net/ifb.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,8 @@ static void ifb_setup(struct net_device *dev)
185185

186186
dev->flags |= IFF_NOARP;
187187
dev->flags &= ~IFF_MULTICAST;
188-
dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
188+
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
189+
netif_keep_dst(dev);
189190
eth_hw_addr_random(dev);
190191
}
191192

drivers/net/loopback.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ static void loopback_setup(struct net_device *dev)
169169
dev->type = ARPHRD_LOOPBACK; /* 0x0001*/
170170
dev->flags = IFF_LOOPBACK;
171171
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
172-
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
172+
netif_keep_dst(dev);
173173
dev->hw_features = NETIF_F_ALL_TSO | NETIF_F_UFO;
174174
dev->features = NETIF_F_SG | NETIF_F_FRAGLIST
175175
| NETIF_F_ALL_TSO

drivers/net/macvlan.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -1025,7 +1025,8 @@ void macvlan_common_setup(struct net_device *dev)
10251025
{
10261026
ether_setup(dev);
10271027

1028-
dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
1028+
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1029+
netif_keep_dst(dev);
10291030
dev->priv_flags |= IFF_UNICAST_FLT;
10301031
dev->netdev_ops = &macvlan_netdev_ops;
10311032
dev->destructor = free_netdev;

drivers/net/ppp/ppp_generic.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -1103,7 +1103,7 @@ static void ppp_setup(struct net_device *dev)
11031103
dev->type = ARPHRD_PPP;
11041104
dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
11051105
dev->features |= NETIF_F_NETNS_LOCAL;
1106-
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1106+
netif_keep_dst(dev);
11071107
}
11081108

11091109
/*

drivers/net/team/team.c

+5-3
Original file line numberDiff line numberDiff line change
@@ -970,7 +970,8 @@ static void __team_compute_features(struct team *team)
970970
struct team_port *port;
971971
u32 vlan_features = TEAM_VLAN_FEATURES & NETIF_F_ALL_FOR_ALL;
972972
unsigned short max_hard_header_len = ETH_HLEN;
973-
unsigned int flags, dst_release_flag = IFF_XMIT_DST_RELEASE;
973+
unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE |
974+
IFF_XMIT_DST_RELEASE_PERM;
974975

975976
list_for_each_entry(port, &team->port_list, list) {
976977
vlan_features = netdev_increment_features(vlan_features,
@@ -985,8 +986,9 @@ static void __team_compute_features(struct team *team)
985986
team->dev->vlan_features = vlan_features;
986987
team->dev->hard_header_len = max_hard_header_len;
987988

988-
flags = team->dev->priv_flags & ~IFF_XMIT_DST_RELEASE;
989-
team->dev->priv_flags = flags | dst_release_flag;
989+
team->dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
990+
if (dst_release_flag == (IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM))
991+
team->dev->priv_flags |= IFF_XMIT_DST_RELEASE;
990992

991993
netdev_change_features(team->dev);
992994
}

drivers/net/vxlan.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -2193,7 +2193,7 @@ static void vxlan_setup(struct net_device *dev)
21932193
dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
21942194
dev->hw_features |= NETIF_F_GSO_SOFTWARE;
21952195
dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
2196-
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
2196+
netif_keep_dst(dev);
21972197
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
21982198

21992199
INIT_LIST_HEAD(&vxlan->next);

drivers/net/wan/hdlc_fr.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -1047,7 +1047,7 @@ static void pvc_setup(struct net_device *dev)
10471047
dev->flags = IFF_POINTOPOINT;
10481048
dev->hard_header_len = 10;
10491049
dev->addr_len = 2;
1050-
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1050+
netif_keep_dst(dev);
10511051
}
10521052

10531053
static const struct net_device_ops pvc_ops = {

drivers/s390/net/qeth_l3_main.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -3306,7 +3306,7 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
33063306
card->dev->features |= NETIF_F_HW_VLAN_CTAG_TX |
33073307
NETIF_F_HW_VLAN_CTAG_RX |
33083308
NETIF_F_HW_VLAN_CTAG_FILTER;
3309-
card->dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
3309+
netif_keep_dst(card->dev);
33103310
card->dev->gso_max_size = 15 * PAGE_SIZE;
33113311

33123312
SET_NETDEV_DEV(card->dev, &card->gdev->dev);

include/linux/netdevice.h

+8
Original file line numberDiff line numberDiff line change
@@ -1206,6 +1206,7 @@ enum netdev_priv_flags {
12061206
IFF_SUPP_NOFCS = 1<<19,
12071207
IFF_LIVE_ADDR_CHANGE = 1<<20,
12081208
IFF_MACVLAN = 1<<21,
1209+
IFF_XMIT_DST_RELEASE_PERM = 1<<22,
12091210
};
12101211

12111212
#define IFF_802_1Q_VLAN IFF_802_1Q_VLAN
@@ -1230,6 +1231,7 @@ enum netdev_priv_flags {
12301231
#define IFF_SUPP_NOFCS IFF_SUPP_NOFCS
12311232
#define IFF_LIVE_ADDR_CHANGE IFF_LIVE_ADDR_CHANGE
12321233
#define IFF_MACVLAN IFF_MACVLAN
1234+
#define IFF_XMIT_DST_RELEASE_PERM IFF_XMIT_DST_RELEASE_PERM
12331235

12341236
/**
12351237
* struct net_device - The DEVICE structure.
@@ -3588,6 +3590,12 @@ static inline bool netif_supports_nofcs(struct net_device *dev)
35883590
return dev->priv_flags & IFF_SUPP_NOFCS;
35893591
}
35903592

3593+
/* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */
3594+
static inline void netif_keep_dst(struct net_device *dev)
3595+
{
3596+
dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM);
3597+
}
3598+
35913599
extern struct pernet_operations __net_initdata loopback_net_ops;
35923600

35933601
/* Logging, debugging and troubleshooting/diagnostic helpers. */

net/8021q/vlan_dev.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -799,7 +799,8 @@ void vlan_setup(struct net_device *dev)
799799
ether_setup(dev);
800800

801801
dev->priv_flags |= IFF_802_1Q_VLAN;
802-
dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
802+
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
803+
netif_keep_dst(dev);
803804
dev->tx_queue_len = 0;
804805

805806
dev->netdev_ops = &vlan_netdev_ops;

net/atm/clip.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -501,7 +501,7 @@ static void clip_setup(struct net_device *dev)
501501
/* without any more elaborate queuing. 100 is a reasonable */
502502
/* compromise between decent burst-tolerance and protection */
503503
/* against memory hogs. */
504-
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
504+
netif_keep_dst(dev);
505505
}
506506

507507
static int clip_create(int number)

net/core/dev.c

+9-10
Original file line numberDiff line numberDiff line change
@@ -2665,12 +2665,6 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
26652665
if (skb->next)
26662666
return skb;
26672667

2668-
/* If device doesn't need skb->dst, release it right now while
2669-
* its hot in this cpu cache
2670-
*/
2671-
if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
2672-
skb_dst_drop(skb);
2673-
26742668
features = netif_skb_features(skb);
26752669
skb = validate_xmit_vlan(skb, features);
26762670
if (unlikely(!skb))
@@ -2811,8 +2805,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
28112805
* waiting to be sent out; and the qdisc is not running -
28122806
* xmit the skb directly.
28132807
*/
2814-
if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
2815-
skb_dst_force(skb);
28162808

28172809
qdisc_bstats_update(q, skb);
28182810

@@ -2827,7 +2819,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
28272819

28282820
rc = NET_XMIT_SUCCESS;
28292821
} else {
2830-
skb_dst_force(skb);
28312822
rc = q->enqueue(skb, q) & NET_XMIT_MASK;
28322823
if (qdisc_run_begin(q)) {
28332824
if (unlikely(contended)) {
@@ -2924,6 +2915,14 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
29242915

29252916
skb_update_prio(skb);
29262917

2918+
/* If device/qdisc don't need skb->dst, release it right now while
2919+
* its hot in this cpu cache.
2920+
*/
2921+
if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
2922+
skb_dst_drop(skb);
2923+
else
2924+
skb_dst_force(skb);
2925+
29272926
txq = netdev_pick_tx(dev, skb, accel_priv);
29282927
q = rcu_dereference_bh(txq->qdisc);
29292928

@@ -6674,7 +6673,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
66746673
INIT_LIST_HEAD(&dev->adj_list.lower);
66756674
INIT_LIST_HEAD(&dev->all_adj_list.upper);
66766675
INIT_LIST_HEAD(&dev->all_adj_list.lower);
6677-
dev->priv_flags = IFF_XMIT_DST_RELEASE;
6676+
dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
66786677
setup(dev);
66796678

66806679
dev->num_tx_queues = txqs;

net/ipv4/ip_gre.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -510,7 +510,7 @@ static int ipgre_tunnel_init(struct net_device *dev)
510510
memcpy(dev->broadcast, &iph->daddr, 4);
511511

512512
dev->flags = IFF_NOARP;
513-
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
513+
netif_keep_dst(dev);
514514
dev->addr_len = 4;
515515

516516
if (iph->daddr) {

net/ipv4/ip_vti.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -364,7 +364,7 @@ static int vti_tunnel_init(struct net_device *dev)
364364
dev->iflink = 0;
365365
dev->addr_len = 4;
366366
dev->features |= NETIF_F_LLTX;
367-
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
367+
netif_keep_dst(dev);
368368

369369
return ip_tunnel_init(dev);
370370
}

net/ipv4/ipip.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ static void ipip_tunnel_setup(struct net_device *dev)
289289
dev->iflink = 0;
290290
dev->addr_len = 4;
291291
dev->features |= NETIF_F_LLTX;
292-
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
292+
netif_keep_dst(dev);
293293

294294
dev->features |= IPIP_FEATURES;
295295
dev->hw_features |= IPIP_FEATURES;

net/ipv6/ip6_gre.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -1242,7 +1242,7 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
12421242
dev->flags |= IFF_NOARP;
12431243
dev->iflink = 0;
12441244
dev->addr_len = sizeof(struct in6_addr);
1245-
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1245+
netif_keep_dst(dev);
12461246
}
12471247

12481248
static int ip6gre_tunnel_init(struct net_device *dev)

net/ipv6/ip6_tunnel.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -1493,7 +1493,7 @@ static void ip6_tnl_dev_setup(struct net_device *dev)
14931493
dev->mtu -= 8;
14941494
dev->flags |= IFF_NOARP;
14951495
dev->addr_len = sizeof(struct in6_addr);
1496-
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1496+
netif_keep_dst(dev);
14971497
/* This perm addr will be used as interface identifier by IPv6 */
14981498
dev->addr_assign_type = NET_ADDR_RANDOM;
14991499
eth_random_addr(dev->perm_addr);

net/ipv6/ip6_vti.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -807,7 +807,7 @@ static void vti6_dev_setup(struct net_device *dev)
807807
dev->mtu = ETH_DATA_LEN;
808808
dev->flags |= IFF_NOARP;
809809
dev->addr_len = sizeof(struct in6_addr);
810-
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
810+
netif_keep_dst(dev);
811811
}
812812

813813
/**

net/ipv6/sit.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -1364,7 +1364,7 @@ static void ipip6_tunnel_setup(struct net_device *dev)
13641364
dev->hard_header_len = LL_MAX_HEADER + t_hlen;
13651365
dev->mtu = ETH_DATA_LEN - t_hlen;
13661366
dev->flags = IFF_NOARP;
1367-
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1367+
netif_keep_dst(dev);
13681368
dev->iflink = 0;
13691369
dev->addr_len = 4;
13701370
dev->features |= NETIF_F_LLTX;

net/sched/cls_flow.c

+2
Original file line numberDiff line numberDiff line change
@@ -493,6 +493,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
493493
tcf_exts_change(tp, &fnew->exts, &e);
494494
tcf_em_tree_change(tp, &fnew->ematches, &t);
495495

496+
netif_keep_dst(qdisc_dev(tp->q));
497+
496498
if (tb[TCA_FLOW_KEYS]) {
497499
fnew->keymask = keymask;
498500
fnew->nkeys = nkeys;

net/sched/cls_route.c

+1
Original file line numberDiff line numberDiff line change
@@ -524,6 +524,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
524524
if (f->handle < f1->handle)
525525
break;
526526

527+
netif_keep_dst(qdisc_dev(tp->q));
527528
rcu_assign_pointer(f->next, f1);
528529
rcu_assign_pointer(*fp, f);
529530

net/sched/sch_generic.c

-3
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ EXPORT_SYMBOL(default_qdisc_ops);
4747

4848
static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
4949
{
50-
skb_dst_force(skb);
5150
q->gso_skb = skb;
5251
q->qstats.requeues++;
5352
q->q.qlen++; /* it's still part of the queue */
@@ -218,8 +217,6 @@ static inline int qdisc_restart(struct Qdisc *q)
218217
if (unlikely(!skb))
219218
return 0;
220219

221-
WARN_ON_ONCE(skb_dst_is_noref(skb));
222-
223220
root_lock = qdisc_lock(q);
224221
dev = qdisc_dev(q);
225222
txq = skb_get_tx_queue(dev, skb);

net/sched/sch_teql.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -470,7 +470,7 @@ static __init void teql_master_setup(struct net_device *dev)
470470
dev->tx_queue_len = 100;
471471
dev->flags = IFF_NOARP;
472472
dev->hard_header_len = LL_MAX_HEADER;
473-
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
473+
netif_keep_dst(dev);
474474
}
475475

476476
static LIST_HEAD(master_dev_list);

0 commit comments

Comments
 (0)