@@ -64,11 +64,13 @@ static void esw_qos_domain_release(struct mlx5_eswitch *esw)
6464enum sched_node_type {
6565 SCHED_NODE_TYPE_VPORTS_TSAR ,
6666 SCHED_NODE_TYPE_VPORT ,
67+ SCHED_NODE_TYPE_TC_ARBITER_TSAR ,
6768};
6869
6970static const char * const sched_node_type_str [] = {
7071 [SCHED_NODE_TYPE_VPORTS_TSAR ] = "vports TSAR" ,
7172 [SCHED_NODE_TYPE_VPORT ] = "vport" ,
73+ [SCHED_NODE_TYPE_TC_ARBITER_TSAR ] = "TC Arbiter TSAR" ,
7274};
7375
7476struct mlx5_esw_sched_node {
@@ -106,6 +108,13 @@ static void esw_qos_node_attach_to_parent(struct mlx5_esw_sched_node *node)
106108 }
107109}
108110
111+ static int esw_qos_num_tcs (struct mlx5_core_dev * dev )
112+ {
113+ int num_tcs = mlx5_max_tc (dev ) + 1 ;
114+
115+ return num_tcs < DEVLINK_RATE_TCS_MAX ? num_tcs : DEVLINK_RATE_TCS_MAX ;
116+ }
117+
109118static void
110119esw_qos_node_set_parent (struct mlx5_esw_sched_node * node , struct mlx5_esw_sched_node * parent )
111120{
@@ -116,6 +125,27 @@ esw_qos_node_set_parent(struct mlx5_esw_sched_node *node, struct mlx5_esw_sched_
116125 esw_qos_node_attach_to_parent (node );
117126}
118127
128+ static void esw_qos_nodes_set_parent (struct list_head * nodes ,
129+ struct mlx5_esw_sched_node * parent )
130+ {
131+ struct mlx5_esw_sched_node * node , * tmp ;
132+
133+ list_for_each_entry_safe (node , tmp , nodes , entry ) {
134+ esw_qos_node_set_parent (node , parent );
135+ if (!list_empty (& node -> children ) &&
136+ parent -> type == SCHED_NODE_TYPE_TC_ARBITER_TSAR ) {
137+ struct mlx5_esw_sched_node * child ;
138+
139+ list_for_each_entry (child , & node -> children , entry ) {
140+ struct mlx5_vport * vport = child -> vport ;
141+
142+ if (vport )
143+ vport -> qos .sched_node -> parent = parent ;
144+ }
145+ }
146+ }
147+ }
148+
119149void mlx5_esw_qos_vport_qos_free (struct mlx5_vport * vport )
120150{
121151 kfree (vport -> qos .sched_node );
@@ -141,16 +171,24 @@ mlx5_esw_qos_vport_get_parent(const struct mlx5_vport *vport)
141171
142172static void esw_qos_sched_elem_warn (struct mlx5_esw_sched_node * node , int err , const char * op )
143173{
144- if (node -> vport ) {
174+ switch (node -> type ) {
175+ case SCHED_NODE_TYPE_VPORT :
145176 esw_warn (node -> esw -> dev ,
146177 "E-Switch %s %s scheduling element failed (vport=%d,err=%d)\n" ,
147178 op , sched_node_type_str [node -> type ], node -> vport -> vport , err );
148- return ;
179+ break ;
180+ case SCHED_NODE_TYPE_TC_ARBITER_TSAR :
181+ case SCHED_NODE_TYPE_VPORTS_TSAR :
182+ esw_warn (node -> esw -> dev ,
183+ "E-Switch %s %s scheduling element failed (err=%d)\n" ,
184+ op , sched_node_type_str [node -> type ], err );
185+ break ;
186+ default :
187+ esw_warn (node -> esw -> dev ,
188+ "E-Switch %s scheduling element failed (err=%d)\n" ,
189+ op , err );
190+ break ;
149191 }
150-
151- esw_warn (node -> esw -> dev ,
152- "E-Switch %s %s scheduling element failed (err=%d)\n" ,
153- op , sched_node_type_str [node -> type ], err );
154192}
155193
156194static int esw_qos_node_create_sched_element (struct mlx5_esw_sched_node * node , void * ctx ,
@@ -388,6 +426,14 @@ __esw_qos_alloc_node(struct mlx5_eswitch *esw, u32 tsar_ix, enum sched_node_type
388426 node -> parent = parent ;
389427 INIT_LIST_HEAD (& node -> children );
390428 esw_qos_node_attach_to_parent (node );
429+ if (!parent ) {
430+ /* The caller is responsible for inserting the node into the
431+ * parent list if necessary. This function can also be used with
432+ * a NULL parent, which doesn't necessarily indicate that it
433+ * refers to the root scheduling element.
434+ */
435+ list_del_init (& node -> entry );
436+ }
391437
392438 return node ;
393439}
@@ -426,6 +472,7 @@ __esw_qos_create_vports_sched_node(struct mlx5_eswitch *esw, struct mlx5_esw_sch
426472 goto err_alloc_node ;
427473 }
428474
475+ list_add_tail (& node -> entry , & esw -> qos .domain -> nodes );
429476 esw_qos_normalize_min_rate (esw , NULL , extack );
430477 trace_mlx5_esw_node_qos_create (esw -> dev , node , node -> ix );
431478
@@ -498,6 +545,9 @@ static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta
498545 SCHED_NODE_TYPE_VPORTS_TSAR ,
499546 NULL ))
500547 esw -> qos .node0 = ERR_PTR (- ENOMEM );
548+ else
549+ list_add_tail (& esw -> qos .node0 -> entry ,
550+ & esw -> qos .domain -> nodes );
501551 }
502552 if (IS_ERR (esw -> qos .node0 )) {
503553 err = PTR_ERR (esw -> qos .node0 );
@@ -555,6 +605,18 @@ static void esw_qos_put(struct mlx5_eswitch *esw)
555605 esw_qos_destroy (esw );
556606}
557607
608+ static void
609+ esw_qos_tc_arbiter_scheduling_teardown (struct mlx5_esw_sched_node * node ,
610+ struct netlink_ext_ack * extack )
611+ {}
612+
613+ static int esw_qos_tc_arbiter_scheduling_setup (struct mlx5_esw_sched_node * node ,
614+ struct netlink_ext_ack * extack )
615+ {
616+ NL_SET_ERR_MSG_MOD (extack , "TC arbiter elements are not supported." );
617+ return - EOPNOTSUPP ;
618+ }
619+
558620static void esw_qos_vport_disable (struct mlx5_vport * vport , struct netlink_ext_ack * extack )
559621{
560622 struct mlx5_esw_sched_node * vport_node = vport -> qos .sched_node ;
@@ -723,6 +785,195 @@ static int esw_qos_vport_update_parent(struct mlx5_vport *vport, struct mlx5_esw
723785 return err ;
724786}
725787
788+ static void
789+ esw_qos_switch_vport_tcs_to_vport (struct mlx5_esw_sched_node * tc_arbiter_node ,
790+ struct mlx5_esw_sched_node * node ,
791+ struct netlink_ext_ack * extack )
792+ {
793+ struct mlx5_esw_sched_node * vports_tc_node , * vport_tc_node , * tmp ;
794+
795+ vports_tc_node = list_first_entry (& tc_arbiter_node -> children ,
796+ struct mlx5_esw_sched_node ,
797+ entry );
798+
799+ list_for_each_entry_safe (vport_tc_node , tmp , & vports_tc_node -> children ,
800+ entry )
801+ esw_qos_vport_update_parent (vport_tc_node -> vport , node , extack );
802+ }
803+
804+ static int esw_qos_switch_tc_arbiter_node_to_vports (
805+ struct mlx5_esw_sched_node * tc_arbiter_node ,
806+ struct mlx5_esw_sched_node * node ,
807+ struct netlink_ext_ack * extack )
808+ {
809+ u32 parent_tsar_ix = node -> parent ?
810+ node -> parent -> ix : node -> esw -> qos .root_tsar_ix ;
811+ int err ;
812+
813+ err = esw_qos_create_node_sched_elem (node -> esw -> dev , parent_tsar_ix ,
814+ node -> max_rate , node -> bw_share ,
815+ & node -> ix );
816+ if (err ) {
817+ NL_SET_ERR_MSG_MOD (extack ,
818+ "Failed to create scheduling element for vports node when disabliing vports TC QoS" );
819+ return err ;
820+ }
821+
822+ node -> type = SCHED_NODE_TYPE_VPORTS_TSAR ;
823+
824+ /* Disable TC QoS for vports in the arbiter node. */
825+ esw_qos_switch_vport_tcs_to_vport (tc_arbiter_node , node , extack );
826+
827+ return 0 ;
828+ }
829+
830+ static int esw_qos_switch_vports_node_to_tc_arbiter (
831+ struct mlx5_esw_sched_node * node ,
832+ struct mlx5_esw_sched_node * tc_arbiter_node ,
833+ struct netlink_ext_ack * extack )
834+ {
835+ struct mlx5_esw_sched_node * vport_node , * tmp ;
836+ struct mlx5_vport * vport ;
837+ int err ;
838+
839+ /* Enable TC QoS for each vport in the node. */
840+ list_for_each_entry_safe (vport_node , tmp , & node -> children , entry ) {
841+ vport = vport_node -> vport ;
842+ err = esw_qos_vport_update_parent (vport , tc_arbiter_node ,
843+ extack );
844+ if (err )
845+ goto err_out ;
846+ }
847+
848+ /* Destroy the current vports node TSAR. */
849+ err = mlx5_destroy_scheduling_element_cmd (node -> esw -> dev ,
850+ SCHEDULING_HIERARCHY_E_SWITCH ,
851+ node -> ix );
852+ if (err )
853+ goto err_out ;
854+
855+ return 0 ;
856+ err_out :
857+ /* Restore vports back into the node if an error occurs. */
858+ esw_qos_switch_vport_tcs_to_vport (tc_arbiter_node , node , NULL );
859+
860+ return err ;
861+ }
862+
863+ static struct mlx5_esw_sched_node *
864+ esw_qos_move_node (struct mlx5_esw_sched_node * curr_node )
865+ {
866+ struct mlx5_esw_sched_node * new_node ;
867+
868+ new_node = __esw_qos_alloc_node (curr_node -> esw , curr_node -> ix ,
869+ curr_node -> type , NULL );
870+ if (!IS_ERR (new_node ))
871+ esw_qos_nodes_set_parent (& curr_node -> children , new_node );
872+
873+ return new_node ;
874+ }
875+
876+ static int esw_qos_node_disable_tc_arbitration (struct mlx5_esw_sched_node * node ,
877+ struct netlink_ext_ack * extack )
878+ {
879+ struct mlx5_esw_sched_node * curr_node ;
880+ int err ;
881+
882+ if (node -> type != SCHED_NODE_TYPE_TC_ARBITER_TSAR )
883+ return 0 ;
884+
885+ /* Allocate a new rate node to hold the current state, which will allow
886+ * for restoring the vports back to this node after disabling TC
887+ * arbitration.
888+ */
889+ curr_node = esw_qos_move_node (node );
890+ if (IS_ERR (curr_node )) {
891+ NL_SET_ERR_MSG_MOD (extack , "Failed setting up vports node" );
892+ return PTR_ERR (curr_node );
893+ }
894+
895+ /* Disable TC QoS for all vports, and assign them back to the node. */
896+ err = esw_qos_switch_tc_arbiter_node_to_vports (curr_node , node , extack );
897+ if (err )
898+ goto err_out ;
899+
900+ /* Clean up the TC arbiter node after disabling TC QoS for vports. */
901+ esw_qos_tc_arbiter_scheduling_teardown (curr_node , extack );
902+ goto out ;
903+ err_out :
904+ esw_qos_nodes_set_parent (& curr_node -> children , node );
905+ out :
906+ __esw_qos_free_node (curr_node );
907+ return err ;
908+ }
909+
910+ static int esw_qos_node_enable_tc_arbitration (struct mlx5_esw_sched_node * node ,
911+ struct netlink_ext_ack * extack )
912+ {
913+ struct mlx5_esw_sched_node * curr_node , * child ;
914+ int err , new_level , max_level ;
915+
916+ if (node -> type == SCHED_NODE_TYPE_TC_ARBITER_TSAR )
917+ return 0 ;
918+
919+ /* Increase the hierarchy level by one to account for the additional
920+ * vports TC scheduling node, and verify that the new level does not
921+ * exceed the maximum allowed depth.
922+ */
923+ new_level = node -> level + 1 ;
924+ max_level = 1 << MLX5_CAP_QOS (node -> esw -> dev , log_esw_max_sched_depth );
925+ if (new_level > max_level ) {
926+ NL_SET_ERR_MSG_MOD (extack ,
927+ "TC arbitration on nodes is not supported beyond max scheduling depth" );
928+ return - EOPNOTSUPP ;
929+ }
930+
931+ /* Ensure the node does not contain non-leaf children before assigning
932+ * TC bandwidth.
933+ */
934+ if (!list_empty (& node -> children )) {
935+ list_for_each_entry (child , & node -> children , entry ) {
936+ if (!child -> vport ) {
937+ NL_SET_ERR_MSG_MOD (extack ,
938+ "Cannot configure TC bandwidth on a node with non-leaf children" );
939+ return - EOPNOTSUPP ;
940+ }
941+ }
942+ }
943+
944+ /* Allocate a new node that will store the information of the current
945+ * node. This will be used later to restore the node if necessary.
946+ */
947+ curr_node = esw_qos_move_node (node );
948+ if (IS_ERR (curr_node )) {
949+ NL_SET_ERR_MSG_MOD (extack , "Failed setting up node TC QoS" );
950+ return PTR_ERR (curr_node );
951+ }
952+
953+ /* Initialize the TC arbiter node for QoS management.
954+ * This step prepares the node for handling Traffic Class arbitration.
955+ */
956+ err = esw_qos_tc_arbiter_scheduling_setup (node , extack );
957+ if (err )
958+ goto err_setup ;
959+
960+ /* Enable TC QoS for each vport within the current node. */
961+ err = esw_qos_switch_vports_node_to_tc_arbiter (curr_node , node , extack );
962+ if (err )
963+ goto err_switch_vports ;
964+ goto out ;
965+
966+ err_switch_vports :
967+ esw_qos_tc_arbiter_scheduling_teardown (node , NULL );
968+ node -> ix = curr_node -> ix ;
969+ node -> type = curr_node -> type ;
970+ err_setup :
971+ esw_qos_nodes_set_parent (& curr_node -> children , node );
972+ out :
973+ __esw_qos_free_node (curr_node );
974+ return err ;
975+ }
976+
726977static u32 mlx5_esw_qos_lag_link_speed_get_locked (struct mlx5_core_dev * mdev )
727978{
728979 struct ethtool_link_ksettings lksettings ;
@@ -848,6 +1099,31 @@ static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *
8481099 return 0 ;
8491100}
8501101
1102+ static bool esw_qos_validate_unsupported_tc_bw (struct mlx5_eswitch * esw ,
1103+ u32 * tc_bw )
1104+ {
1105+ int i , num_tcs = esw_qos_num_tcs (esw -> dev );
1106+
1107+ for (i = num_tcs ; i < DEVLINK_RATE_TCS_MAX ; i ++ ) {
1108+ if (tc_bw [i ])
1109+ return false;
1110+ }
1111+
1112+ return true;
1113+ }
1114+
1115+ static bool esw_qos_tc_bw_disabled (u32 * tc_bw )
1116+ {
1117+ int i ;
1118+
1119+ for (i = 0 ; i < DEVLINK_RATE_TCS_MAX ; i ++ ) {
1120+ if (tc_bw [i ])
1121+ return false;
1122+ }
1123+
1124+ return true;
1125+ }
1126+
8511127int mlx5_esw_qos_init (struct mlx5_eswitch * esw )
8521128{
8531129 if (esw -> qos .domain )
@@ -921,9 +1197,28 @@ int mlx5_esw_devlink_rate_node_tc_bw_set(struct devlink_rate *rate_node,
9211197 u32 * tc_bw ,
9221198 struct netlink_ext_ack * extack )
9231199{
924- NL_SET_ERR_MSG_MOD (extack ,
925- "TC bandwidth shares are not supported on nodes" );
926- return - EOPNOTSUPP ;
1200+ struct mlx5_esw_sched_node * node = priv ;
1201+ struct mlx5_eswitch * esw = node -> esw ;
1202+ bool disable ;
1203+ int err ;
1204+
1205+ if (!esw_qos_validate_unsupported_tc_bw (esw , tc_bw )) {
1206+ NL_SET_ERR_MSG_MOD (extack ,
1207+ "E-Switch traffic classes number is not supported" );
1208+ return - EOPNOTSUPP ;
1209+ }
1210+
1211+ disable = esw_qos_tc_bw_disabled (tc_bw );
1212+ esw_qos_lock (esw );
1213+ if (disable ) {
1214+ err = esw_qos_node_disable_tc_arbitration (node , extack );
1215+ goto unlock ;
1216+ }
1217+
1218+ err = esw_qos_node_enable_tc_arbitration (node , extack );
1219+ unlock :
1220+ esw_qos_unlock (esw );
1221+ return err ;
9271222}
9281223
9291224int mlx5_esw_devlink_rate_node_tx_share_set (struct devlink_rate * rate_node , void * priv ,
0 commit comments