Skip to content

Commit a59cc02

Browse files
Huy Nguyenjfvogel
authored andcommitted
net/dcb: Add dcbnl buffer attribute
In this patch, we add dcbnl buffer attribute to allow user change the NIC's buffer configuration such as priority to buffer mapping and buffer size of individual buffer. This attribute combined with pfc attribute allows advanced user to fine tune the qos setting for specific priority queue. For example, user can give dedicated buffer for one or more priorities or user can give large buffer to certain priorities. The dcb buffer configuration will be controlled by lldptool. lldptool -T -i eth2 -V BUFFER prio 0,2,5,7,1,2,3,6 maps priorities 0,1,2,3,4,5,6,7 to receive buffer 0,2,5,7,1,2,3,6 lldptool -T -i eth2 -V BUFFER size 87296,87296,0,87296,0,0,0,0 sets receive buffer size for buffer 0,1,2,3,4,5,6,7 respectively After discussion on mailing list with Jakub, Jiri, Ido and John, we agreed to choose dcbnl over devlink interface since this feature is intended to set port attributes which are governed by the netdev instance of that port, where devlink API is more suitable for global ASIC configurations. We present an use case scenario where dcbnl buffer attribute configured by advance user helps reduce the latency of messages of different sizes. Scenarios description: On ConnectX-5, we run latency sensitive traffic with small/medium message sizes ranging from 64B to 256KB and bandwidth sensitive traffic with large messages sizes 512KB and 1MB. We group small, medium, and large message sizes to their own pfc enables priorities as follow. Priorities 1 & 2 (64B, 256B and 1KB) Priorities 3 & 4 (4KB, 8KB, 16KB, 64KB, 128KB and 256KB) Priorities 5 & 6 (512KB and 1MB) By default, ConnectX-5 maps all pfc enabled priorities to a single lossless fixed buffer size of 50% of total available buffer space. The other 50% is assigned to lossy buffer. Using dcbnl buffer attribute, we create three equal size lossless buffers. Each buffer has 25% of total available buffer space. Thus, the lossy buffer size reduces to 25%. Priority to lossless buffer mappings are set as follow. Priorities 1 & 2 on lossless buffer #1 Priorities 3 & 4 on lossless buffer #2 Priorities 5 & 6 on lossless buffer #3 We observe improvements in latency for small and medium message sizes as follows. Please note that the large message sizes bandwidth performance is reduced but the total bandwidth remains the same. 256B message size (42 % latency reduction) 4K message size (21% latency reduction) 64K message size (16% latency reduction) CC: Ido Schimmel <idosch@idosch.org> CC: Jakub Kicinski <jakub.kicinski@netronome.com> CC: Jiri Pirko <jiri@resnulli.us> CC: Or Gerlitz <gerlitz.or@gmail.com> CC: Parav Pandit <parav@mellanox.com> CC: Aron Silverton <aron.silverton@oracle.com> Signed-off-by: Huy Nguyen <huyn@mellanox.com> Reviewed-by: Parav Pandit <parav@mellanox.com> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com> Orabug: 28102581 (cherry picked from commit e549f6f) cherry-pick-repo=git/davem/net-next.git To avoid breaking the kABI, the new operations in the dcbnl_rtnl_ops has been split into a new struct that is put at the end of the netdevice struct. Change-Id: I942f0568b2db51376788326ab0c0fe0e24d25809 Signed-off-by: Dag Moxnes <dag.moxnes@oracle.com> Reviewed-by: Qing Huang <qing.huang@oracle.com> Tested-by: Gerald Gibson <gerald.gibson@oracle.com>
1 parent ad7d28a commit a59cc02

File tree

4 files changed

+46
-0
lines changed

4 files changed

+46
-0
lines changed

include/linux/netdevice.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1920,8 +1920,13 @@ struct net_device {
19201920
struct lock_class_key *qdisc_running_key;
19211921
bool proto_down;
19221922

1923+
#ifdef CONFIG_DCB
1924+
/* This has been split out from dcbnl_ops to avoid breaking kABI */
1925+
UEK_KABI_REPLACE(UEK_KABI_RESERVED_P(1), const struct dcbnl_rtnl_kabi_ops *dcbnl_kabi_ops)
1926+
#else
19231927
/* Space for future expansion without breaking kABI. */
19241928
UEK_KABI_RESERVED_P(1);
1929+
#endif
19251930
UEK_KABI_RESERVED_P(2);
19261931
UEK_KABI_RESERVED_P(3);
19271932
UEK_KABI_RESERVED_P(4);

include/net/dcbnl.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,14 @@ struct dcbnl_rtnl_ops {
101101
/* CEE peer */
102102
int (*cee_peer_getpg) (struct net_device *, struct cee_pg *);
103103
int (*cee_peer_getpfc) (struct net_device *, struct cee_pfc *);
104+
105+
};
106+
107+
/* This has been split out from dcbnl_ops to avoid breaking kABI */
108+
struct dcbnl_rtnl_kabi_ops {
109+
/* buffer settings */
110+
int (*dcbnl_getbuffer)(struct net_device *, struct dcbnl_buffer *);
111+
int (*dcbnl_setbuffer)(struct net_device *, struct dcbnl_buffer *);
104112
};
105113

106114
#endif /* __NET_DCBNL_H__ */

include/uapi/linux/dcbnl.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,16 @@ struct ieee_pfc {
163163
__u64 indications[IEEE_8021QAZ_MAX_TCS];
164164
};
165165

166+
#define IEEE_8021Q_MAX_PRIORITIES 8
167+
#define DCBX_MAX_BUFFERS 8
168+
struct dcbnl_buffer {
169+
/* priority to buffer mapping */
170+
__u8 prio2buffer[IEEE_8021Q_MAX_PRIORITIES];
171+
/* buffer size in Bytes */
172+
__u32 buffer_size[DCBX_MAX_BUFFERS];
173+
__u32 total_size;
174+
};
175+
166176
/* CEE DCBX std supported values */
167177
#define CEE_DCBX_MAX_PGS 8
168178
#define CEE_DCBX_MAX_PRIO 8
@@ -406,6 +416,7 @@ enum ieee_attrs {
406416
DCB_ATTR_IEEE_MAXRATE,
407417
DCB_ATTR_IEEE_QCN,
408418
DCB_ATTR_IEEE_QCN_STATS,
419+
DCB_ATTR_DCB_BUFFER,
409420
__DCB_ATTR_IEEE_MAX
410421
};
411422
#define DCB_ATTR_IEEE_MAX (__DCB_ATTR_IEEE_MAX - 1)

net/dcb/dcbnl.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,7 @@ static const struct nla_policy dcbnl_ieee_policy[DCB_ATTR_IEEE_MAX + 1] = {
176176
[DCB_ATTR_IEEE_MAXRATE] = {.len = sizeof(struct ieee_maxrate)},
177177
[DCB_ATTR_IEEE_QCN] = {.len = sizeof(struct ieee_qcn)},
178178
[DCB_ATTR_IEEE_QCN_STATS] = {.len = sizeof(struct ieee_qcn_stats)},
179+
[DCB_ATTR_DCB_BUFFER] = {.len = sizeof(struct dcbnl_buffer)},
179180
};
180181

181182
/* DCB number of traffic classes nested attributes. */
@@ -1028,6 +1029,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev)
10281029
struct nlattr *ieee, *app;
10291030
struct dcb_app_type *itr;
10301031
const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops;
1032+
const struct dcbnl_rtnl_kabi_ops *kabi_ops = netdev->dcbnl_kabi_ops;
10311033
int dcbx;
10321034
int err;
10331035

@@ -1094,6 +1096,16 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev)
10941096
return -EMSGSIZE;
10951097
}
10961098

1099+
if (kabi_ops && kabi_ops->dcbnl_getbuffer) {
1100+
struct dcbnl_buffer buffer;
1101+
1102+
memset(&buffer, 0, sizeof(buffer));
1103+
err = kabi_ops->dcbnl_getbuffer(netdev, &buffer);
1104+
if (!err &&
1105+
nla_put(skb, DCB_ATTR_DCB_BUFFER, sizeof(buffer), &buffer))
1106+
return -EMSGSIZE;
1107+
}
1108+
10971109
app = nla_nest_start(skb, DCB_ATTR_IEEE_APP_TABLE);
10981110
if (!app)
10991111
return -EMSGSIZE;
@@ -1408,6 +1420,7 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh,
14081420
u32 seq, struct nlattr **tb, struct sk_buff *skb)
14091421
{
14101422
const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops;
1423+
const struct dcbnl_rtnl_kabi_ops *kabi_ops = netdev->dcbnl_kabi_ops;
14111424
struct nlattr *ieee[DCB_ATTR_IEEE_MAX + 1];
14121425
int err;
14131426

@@ -1453,6 +1466,15 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh,
14531466
goto err;
14541467
}
14551468

1469+
if (ieee[DCB_ATTR_DCB_BUFFER] && kabi_ops && kabi_ops->dcbnl_setbuffer) {
1470+
struct dcbnl_buffer *buffer =
1471+
nla_data(ieee[DCB_ATTR_DCB_BUFFER]);
1472+
1473+
err = kabi_ops->dcbnl_setbuffer(netdev, buffer);
1474+
if (err)
1475+
goto err;
1476+
}
1477+
14561478
if (ieee[DCB_ATTR_IEEE_APP_TABLE]) {
14571479
struct nlattr *attr;
14581480
int rem;

0 commit comments

Comments
 (0)