Skip to content

Commit 96619c4

Browse files
cjubrankuba-moo
authored andcommitted
net/mlx5: Add support for setting tc-bw on nodes
Introduce support for enabling and disabling Traffic Class (TC) arbitration for existing devlink rate nodes. This patch adds support for a new scheduling node type, `SCHED_NODE_TYPE_TC_ARBITER_TSAR`. Key changes include: - New helper functions for transitioning existing rate nodes to TC arbiter nodes and vice versa. These functions handle the allocation of TC arbiter nodes, copying of child nodes, and restoring vport QoS settings when TC arbitration is disabled. - Implementation of `mlx5_esw_devlink_rate_node_tc_bw_set()` to manage tc-bw configuration on nodes. - Introduced stubs for `esw_qos_tc_arbiter_scheduling_setup()` and `esw_qos_tc_arbiter_scheduling_teardown()`, which will be extended in future patches to provide full support for tc-bw on devlink rate objects. - Validation functions for tc-bw settings, allowing graceful handling of unsupported traffic class bandwidth configurations. - Updated `__esw_qos_alloc_node()` to insert the new node into the parent’s children list only if the parent is not NULL. For the root TSAR, the new node is inserted directly after the allocation call. - Don't allow `tc-bw` configuration for nodes containing non-leaf children. This patch lays the groundwork for future support for configuring tc-bw on devlink rate nodes. Although the infrastructure is in place, full support for tc-bw is not yet implemented; attempts to set tc-bw on nodes will return `-EOPNOTSUPP`. No functional changes are introduced at this stage. Signed-off-by: Carolina Jubran <cjubran@nvidia.com> Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com> Signed-off-by: Tariq Toukan <tariqt@nvidia.com> Signed-off-by: Mark Bloch <mbloch@nvidia.com> Link: https://patch.msgid.link/20250629142138.361537-6-mbloch@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
1 parent 7109282 commit 96619c4

File tree

1 file changed

+304
-9
lines changed
  • drivers/net/ethernet/mellanox/mlx5/core/esw

1 file changed

+304
-9
lines changed

drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c

Lines changed: 304 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,13 @@ static void esw_qos_domain_release(struct mlx5_eswitch *esw)
6464
enum sched_node_type {
6565
SCHED_NODE_TYPE_VPORTS_TSAR,
6666
SCHED_NODE_TYPE_VPORT,
67+
SCHED_NODE_TYPE_TC_ARBITER_TSAR,
6768
};
6869

6970
static const char * const sched_node_type_str[] = {
7071
[SCHED_NODE_TYPE_VPORTS_TSAR] = "vports TSAR",
7172
[SCHED_NODE_TYPE_VPORT] = "vport",
73+
[SCHED_NODE_TYPE_TC_ARBITER_TSAR] = "TC Arbiter TSAR",
7274
};
7375

7476
struct mlx5_esw_sched_node {
@@ -106,6 +108,13 @@ static void esw_qos_node_attach_to_parent(struct mlx5_esw_sched_node *node)
106108
}
107109
}
108110

111+
static int esw_qos_num_tcs(struct mlx5_core_dev *dev)
112+
{
113+
int num_tcs = mlx5_max_tc(dev) + 1;
114+
115+
return num_tcs < DEVLINK_RATE_TCS_MAX ? num_tcs : DEVLINK_RATE_TCS_MAX;
116+
}
117+
109118
static void
110119
esw_qos_node_set_parent(struct mlx5_esw_sched_node *node, struct mlx5_esw_sched_node *parent)
111120
{
@@ -116,6 +125,27 @@ esw_qos_node_set_parent(struct mlx5_esw_sched_node *node, struct mlx5_esw_sched_
116125
esw_qos_node_attach_to_parent(node);
117126
}
118127

128+
static void esw_qos_nodes_set_parent(struct list_head *nodes,
129+
struct mlx5_esw_sched_node *parent)
130+
{
131+
struct mlx5_esw_sched_node *node, *tmp;
132+
133+
list_for_each_entry_safe(node, tmp, nodes, entry) {
134+
esw_qos_node_set_parent(node, parent);
135+
if (!list_empty(&node->children) &&
136+
parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
137+
struct mlx5_esw_sched_node *child;
138+
139+
list_for_each_entry(child, &node->children, entry) {
140+
struct mlx5_vport *vport = child->vport;
141+
142+
if (vport)
143+
vport->qos.sched_node->parent = parent;
144+
}
145+
}
146+
}
147+
}
148+
119149
void mlx5_esw_qos_vport_qos_free(struct mlx5_vport *vport)
120150
{
121151
kfree(vport->qos.sched_node);
@@ -141,16 +171,24 @@ mlx5_esw_qos_vport_get_parent(const struct mlx5_vport *vport)
141171

142172
static void esw_qos_sched_elem_warn(struct mlx5_esw_sched_node *node, int err, const char *op)
143173
{
144-
if (node->vport) {
174+
switch (node->type) {
175+
case SCHED_NODE_TYPE_VPORT:
145176
esw_warn(node->esw->dev,
146177
"E-Switch %s %s scheduling element failed (vport=%d,err=%d)\n",
147178
op, sched_node_type_str[node->type], node->vport->vport, err);
148-
return;
179+
break;
180+
case SCHED_NODE_TYPE_TC_ARBITER_TSAR:
181+
case SCHED_NODE_TYPE_VPORTS_TSAR:
182+
esw_warn(node->esw->dev,
183+
"E-Switch %s %s scheduling element failed (err=%d)\n",
184+
op, sched_node_type_str[node->type], err);
185+
break;
186+
default:
187+
esw_warn(node->esw->dev,
188+
"E-Switch %s scheduling element failed (err=%d)\n",
189+
op, err);
190+
break;
149191
}
150-
151-
esw_warn(node->esw->dev,
152-
"E-Switch %s %s scheduling element failed (err=%d)\n",
153-
op, sched_node_type_str[node->type], err);
154192
}
155193

156194
static int esw_qos_node_create_sched_element(struct mlx5_esw_sched_node *node, void *ctx,
@@ -388,6 +426,14 @@ __esw_qos_alloc_node(struct mlx5_eswitch *esw, u32 tsar_ix, enum sched_node_type
388426
node->parent = parent;
389427
INIT_LIST_HEAD(&node->children);
390428
esw_qos_node_attach_to_parent(node);
429+
if (!parent) {
430+
/* The caller is responsible for inserting the node into the
431+
* parent list if necessary. This function can also be used with
432+
* a NULL parent, which doesn't necessarily indicate that it
433+
* refers to the root scheduling element.
434+
*/
435+
list_del_init(&node->entry);
436+
}
391437

392438
return node;
393439
}
@@ -426,6 +472,7 @@ __esw_qos_create_vports_sched_node(struct mlx5_eswitch *esw, struct mlx5_esw_sch
426472
goto err_alloc_node;
427473
}
428474

475+
list_add_tail(&node->entry, &esw->qos.domain->nodes);
429476
esw_qos_normalize_min_rate(esw, NULL, extack);
430477
trace_mlx5_esw_node_qos_create(esw->dev, node, node->ix);
431478

@@ -498,6 +545,9 @@ static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta
498545
SCHED_NODE_TYPE_VPORTS_TSAR,
499546
NULL))
500547
esw->qos.node0 = ERR_PTR(-ENOMEM);
548+
else
549+
list_add_tail(&esw->qos.node0->entry,
550+
&esw->qos.domain->nodes);
501551
}
502552
if (IS_ERR(esw->qos.node0)) {
503553
err = PTR_ERR(esw->qos.node0);
@@ -555,6 +605,18 @@ static void esw_qos_put(struct mlx5_eswitch *esw)
555605
esw_qos_destroy(esw);
556606
}
557607

608+
static void
609+
esw_qos_tc_arbiter_scheduling_teardown(struct mlx5_esw_sched_node *node,
610+
struct netlink_ext_ack *extack)
611+
{}
612+
613+
static int esw_qos_tc_arbiter_scheduling_setup(struct mlx5_esw_sched_node *node,
614+
struct netlink_ext_ack *extack)
615+
{
616+
NL_SET_ERR_MSG_MOD(extack, "TC arbiter elements are not supported.");
617+
return -EOPNOTSUPP;
618+
}
619+
558620
static void esw_qos_vport_disable(struct mlx5_vport *vport, struct netlink_ext_ack *extack)
559621
{
560622
struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
@@ -723,6 +785,195 @@ static int esw_qos_vport_update_parent(struct mlx5_vport *vport, struct mlx5_esw
723785
return err;
724786
}
725787

788+
static void
789+
esw_qos_switch_vport_tcs_to_vport(struct mlx5_esw_sched_node *tc_arbiter_node,
790+
struct mlx5_esw_sched_node *node,
791+
struct netlink_ext_ack *extack)
792+
{
793+
struct mlx5_esw_sched_node *vports_tc_node, *vport_tc_node, *tmp;
794+
795+
vports_tc_node = list_first_entry(&tc_arbiter_node->children,
796+
struct mlx5_esw_sched_node,
797+
entry);
798+
799+
list_for_each_entry_safe(vport_tc_node, tmp, &vports_tc_node->children,
800+
entry)
801+
esw_qos_vport_update_parent(vport_tc_node->vport, node, extack);
802+
}
803+
804+
static int esw_qos_switch_tc_arbiter_node_to_vports(
805+
struct mlx5_esw_sched_node *tc_arbiter_node,
806+
struct mlx5_esw_sched_node *node,
807+
struct netlink_ext_ack *extack)
808+
{
809+
u32 parent_tsar_ix = node->parent ?
810+
node->parent->ix : node->esw->qos.root_tsar_ix;
811+
int err;
812+
813+
err = esw_qos_create_node_sched_elem(node->esw->dev, parent_tsar_ix,
814+
node->max_rate, node->bw_share,
815+
&node->ix);
816+
if (err) {
817+
NL_SET_ERR_MSG_MOD(extack,
818+
"Failed to create scheduling element for vports node when disabliing vports TC QoS");
819+
return err;
820+
}
821+
822+
node->type = SCHED_NODE_TYPE_VPORTS_TSAR;
823+
824+
/* Disable TC QoS for vports in the arbiter node. */
825+
esw_qos_switch_vport_tcs_to_vport(tc_arbiter_node, node, extack);
826+
827+
return 0;
828+
}
829+
830+
static int esw_qos_switch_vports_node_to_tc_arbiter(
831+
struct mlx5_esw_sched_node *node,
832+
struct mlx5_esw_sched_node *tc_arbiter_node,
833+
struct netlink_ext_ack *extack)
834+
{
835+
struct mlx5_esw_sched_node *vport_node, *tmp;
836+
struct mlx5_vport *vport;
837+
int err;
838+
839+
/* Enable TC QoS for each vport in the node. */
840+
list_for_each_entry_safe(vport_node, tmp, &node->children, entry) {
841+
vport = vport_node->vport;
842+
err = esw_qos_vport_update_parent(vport, tc_arbiter_node,
843+
extack);
844+
if (err)
845+
goto err_out;
846+
}
847+
848+
/* Destroy the current vports node TSAR. */
849+
err = mlx5_destroy_scheduling_element_cmd(node->esw->dev,
850+
SCHEDULING_HIERARCHY_E_SWITCH,
851+
node->ix);
852+
if (err)
853+
goto err_out;
854+
855+
return 0;
856+
err_out:
857+
/* Restore vports back into the node if an error occurs. */
858+
esw_qos_switch_vport_tcs_to_vport(tc_arbiter_node, node, NULL);
859+
860+
return err;
861+
}
862+
863+
static struct mlx5_esw_sched_node *
864+
esw_qos_move_node(struct mlx5_esw_sched_node *curr_node)
865+
{
866+
struct mlx5_esw_sched_node *new_node;
867+
868+
new_node = __esw_qos_alloc_node(curr_node->esw, curr_node->ix,
869+
curr_node->type, NULL);
870+
if (!IS_ERR(new_node))
871+
esw_qos_nodes_set_parent(&curr_node->children, new_node);
872+
873+
return new_node;
874+
}
875+
876+
static int esw_qos_node_disable_tc_arbitration(struct mlx5_esw_sched_node *node,
877+
struct netlink_ext_ack *extack)
878+
{
879+
struct mlx5_esw_sched_node *curr_node;
880+
int err;
881+
882+
if (node->type != SCHED_NODE_TYPE_TC_ARBITER_TSAR)
883+
return 0;
884+
885+
/* Allocate a new rate node to hold the current state, which will allow
886+
* for restoring the vports back to this node after disabling TC
887+
* arbitration.
888+
*/
889+
curr_node = esw_qos_move_node(node);
890+
if (IS_ERR(curr_node)) {
891+
NL_SET_ERR_MSG_MOD(extack, "Failed setting up vports node");
892+
return PTR_ERR(curr_node);
893+
}
894+
895+
/* Disable TC QoS for all vports, and assign them back to the node. */
896+
err = esw_qos_switch_tc_arbiter_node_to_vports(curr_node, node, extack);
897+
if (err)
898+
goto err_out;
899+
900+
/* Clean up the TC arbiter node after disabling TC QoS for vports. */
901+
esw_qos_tc_arbiter_scheduling_teardown(curr_node, extack);
902+
goto out;
903+
err_out:
904+
esw_qos_nodes_set_parent(&curr_node->children, node);
905+
out:
906+
__esw_qos_free_node(curr_node);
907+
return err;
908+
}
909+
910+
static int esw_qos_node_enable_tc_arbitration(struct mlx5_esw_sched_node *node,
911+
struct netlink_ext_ack *extack)
912+
{
913+
struct mlx5_esw_sched_node *curr_node, *child;
914+
int err, new_level, max_level;
915+
916+
if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR)
917+
return 0;
918+
919+
/* Increase the hierarchy level by one to account for the additional
920+
* vports TC scheduling node, and verify that the new level does not
921+
* exceed the maximum allowed depth.
922+
*/
923+
new_level = node->level + 1;
924+
max_level = 1 << MLX5_CAP_QOS(node->esw->dev, log_esw_max_sched_depth);
925+
if (new_level > max_level) {
926+
NL_SET_ERR_MSG_MOD(extack,
927+
"TC arbitration on nodes is not supported beyond max scheduling depth");
928+
return -EOPNOTSUPP;
929+
}
930+
931+
/* Ensure the node does not contain non-leaf children before assigning
932+
* TC bandwidth.
933+
*/
934+
if (!list_empty(&node->children)) {
935+
list_for_each_entry(child, &node->children, entry) {
936+
if (!child->vport) {
937+
NL_SET_ERR_MSG_MOD(extack,
938+
"Cannot configure TC bandwidth on a node with non-leaf children");
939+
return -EOPNOTSUPP;
940+
}
941+
}
942+
}
943+
944+
/* Allocate a new node that will store the information of the current
945+
* node. This will be used later to restore the node if necessary.
946+
*/
947+
curr_node = esw_qos_move_node(node);
948+
if (IS_ERR(curr_node)) {
949+
NL_SET_ERR_MSG_MOD(extack, "Failed setting up node TC QoS");
950+
return PTR_ERR(curr_node);
951+
}
952+
953+
/* Initialize the TC arbiter node for QoS management.
954+
* This step prepares the node for handling Traffic Class arbitration.
955+
*/
956+
err = esw_qos_tc_arbiter_scheduling_setup(node, extack);
957+
if (err)
958+
goto err_setup;
959+
960+
/* Enable TC QoS for each vport within the current node. */
961+
err = esw_qos_switch_vports_node_to_tc_arbiter(curr_node, node, extack);
962+
if (err)
963+
goto err_switch_vports;
964+
goto out;
965+
966+
err_switch_vports:
967+
esw_qos_tc_arbiter_scheduling_teardown(node, NULL);
968+
node->ix = curr_node->ix;
969+
node->type = curr_node->type;
970+
err_setup:
971+
esw_qos_nodes_set_parent(&curr_node->children, node);
972+
out:
973+
__esw_qos_free_node(curr_node);
974+
return err;
975+
}
976+
726977
static u32 mlx5_esw_qos_lag_link_speed_get_locked(struct mlx5_core_dev *mdev)
727978
{
728979
struct ethtool_link_ksettings lksettings;
@@ -848,6 +1099,31 @@ static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *
8481099
return 0;
8491100
}
8501101

1102+
static bool esw_qos_validate_unsupported_tc_bw(struct mlx5_eswitch *esw,
1103+
u32 *tc_bw)
1104+
{
1105+
int i, num_tcs = esw_qos_num_tcs(esw->dev);
1106+
1107+
for (i = num_tcs; i < DEVLINK_RATE_TCS_MAX; i++) {
1108+
if (tc_bw[i])
1109+
return false;
1110+
}
1111+
1112+
return true;
1113+
}
1114+
1115+
static bool esw_qos_tc_bw_disabled(u32 *tc_bw)
1116+
{
1117+
int i;
1118+
1119+
for (i = 0; i < DEVLINK_RATE_TCS_MAX; i++) {
1120+
if (tc_bw[i])
1121+
return false;
1122+
}
1123+
1124+
return true;
1125+
}
1126+
8511127
int mlx5_esw_qos_init(struct mlx5_eswitch *esw)
8521128
{
8531129
if (esw->qos.domain)
@@ -921,9 +1197,28 @@ int mlx5_esw_devlink_rate_node_tc_bw_set(struct devlink_rate *rate_node,
9211197
u32 *tc_bw,
9221198
struct netlink_ext_ack *extack)
9231199
{
924-
NL_SET_ERR_MSG_MOD(extack,
925-
"TC bandwidth shares are not supported on nodes");
926-
return -EOPNOTSUPP;
1200+
struct mlx5_esw_sched_node *node = priv;
1201+
struct mlx5_eswitch *esw = node->esw;
1202+
bool disable;
1203+
int err;
1204+
1205+
if (!esw_qos_validate_unsupported_tc_bw(esw, tc_bw)) {
1206+
NL_SET_ERR_MSG_MOD(extack,
1207+
"E-Switch traffic classes number is not supported");
1208+
return -EOPNOTSUPP;
1209+
}
1210+
1211+
disable = esw_qos_tc_bw_disabled(tc_bw);
1212+
esw_qos_lock(esw);
1213+
if (disable) {
1214+
err = esw_qos_node_disable_tc_arbitration(node, extack);
1215+
goto unlock;
1216+
}
1217+
1218+
err = esw_qos_node_enable_tc_arbitration(node, extack);
1219+
unlock:
1220+
esw_qos_unlock(esw);
1221+
return err;
9271222
}
9281223

9291224
int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,

0 commit comments

Comments
 (0)