Skip to content

Commit 04de7dd

Browse files
roidayanSaeed Mahameed
authored and
Saeed Mahameed
committed
net/mlx5e: Infrastructure for duplicated offloading of TC flows
Under uplink LAG or multipath schemes, traffic that matches one flow might arrive on both uplink ports and transmitted through both as part of supporting aggregation and high-availability. To cope with the fact that the SW model might use logical SW port (e.g uplink team or bond) but we have two HW ports with e-switch on each, there are cases where in order to offload a SW TC rule we need to duplicate it to two HW flows. Since each HW rule has its own counter we also aggregate the counter of both rules when a flow stats query is executed from user-space. Introduce the changes for the different elements (add/delete/stats), currently nothing is duplicated. Signed-off-by: Roi Dayan <roid@mellanox.com> Signed-off-by: Aviv Heller <avivh@mellanox.com> Signed-off-by: Shahar Klein <shahark@mellanox.com> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
1 parent ac004b8 commit 04de7dd

File tree

3 files changed

+176
-8
lines changed

3 files changed

+176
-8
lines changed

drivers/net/ethernet/mellanox/mlx5/core/en_tc.c

Lines changed: 167 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
#include "fs_core.h"
5353
#include "en/port.h"
5454
#include "en/tc_tun.h"
55+
#include "lib/devcom.h"
5556

5657
struct mlx5_nic_flow_attr {
5758
u32 action;
@@ -74,6 +75,7 @@ enum {
7475
MLX5E_TC_FLOW_HAIRPIN = BIT(MLX5E_TC_FLOW_BASE + 3),
7576
MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 4),
7677
MLX5E_TC_FLOW_SLOW = BIT(MLX5E_TC_FLOW_BASE + 5),
78+
MLX5E_TC_FLOW_DUP = BIT(MLX5E_TC_FLOW_BASE + 6),
7779
};
7880

7981
#define MLX5E_TC_MAX_SPLITS 1
@@ -111,8 +113,10 @@ struct mlx5e_tc_flow {
111113
* destinations.
112114
*/
113115
struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
116+
struct mlx5e_tc_flow *peer_flow;
114117
struct list_head mod_hdr; /* flows sharing the same mod hdr ID */
115118
struct list_head hairpin; /* flows sharing the same hairpin */
119+
struct list_head peer; /* flows with peer flow */
116120
union {
117121
struct mlx5_esw_flow_attr esw_attr[0];
118122
struct mlx5_nic_flow_attr nic_attr[0];
@@ -1249,13 +1253,48 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv,
12491253
}
12501254
}
12511255

1256+
static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
1257+
{
1258+
struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
1259+
1260+
if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) ||
1261+
!(flow->flags & MLX5E_TC_FLOW_DUP))
1262+
return;
1263+
1264+
mutex_lock(&esw->offloads.peer_mutex);
1265+
list_del(&flow->peer);
1266+
mutex_unlock(&esw->offloads.peer_mutex);
1267+
1268+
flow->flags &= ~MLX5E_TC_FLOW_DUP;
1269+
1270+
mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
1271+
kvfree(flow->peer_flow);
1272+
flow->peer_flow = NULL;
1273+
}
1274+
1275+
static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
1276+
{
1277+
struct mlx5_core_dev *dev = flow->priv->mdev;
1278+
struct mlx5_devcom *devcom = dev->priv.devcom;
1279+
struct mlx5_eswitch *peer_esw;
1280+
1281+
peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1282+
if (!peer_esw)
1283+
return;
1284+
1285+
__mlx5e_tc_del_fdb_peer_flow(flow);
1286+
mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1287+
}
1288+
12521289
static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
12531290
struct mlx5e_tc_flow *flow)
12541291
{
1255-
if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
1292+
if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
1293+
mlx5e_tc_del_fdb_peer_flow(flow);
12561294
mlx5e_tc_del_fdb_flow(priv, flow);
1257-
else
1295+
} else {
12581296
mlx5e_tc_del_nic_flow(priv, flow);
1297+
}
12591298
}
12601299

12611300

@@ -2660,6 +2699,11 @@ static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv)
26602699
return &priv->fs.tc.ht;
26612700
}
26622701

2702+
static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
2703+
{
2704+
return false;
2705+
}
2706+
26632707
static int
26642708
mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
26652709
struct tc_cls_flower_offload *f, u16 flow_flags,
@@ -2693,11 +2737,13 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
26932737
}
26942738

26952739
static int
2696-
mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
2697-
struct tc_cls_flower_offload *f,
2698-
u16 flow_flags,
2699-
struct net_device *filter_dev,
2700-
struct mlx5e_tc_flow **__flow)
2740+
__mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
2741+
struct tc_cls_flower_offload *f,
2742+
u16 flow_flags,
2743+
struct net_device *filter_dev,
2744+
struct mlx5_eswitch_rep *in_rep,
2745+
struct mlx5_core_dev *in_mdev,
2746+
struct mlx5e_tc_flow **__flow)
27012747
{
27022748
struct netlink_ext_ack *extack = f->common.extack;
27032749
struct mlx5e_tc_flow_parse_attr *parse_attr;
@@ -2723,6 +2769,8 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
27232769
if (err)
27242770
goto err_free;
27252771

2772+
flow->esw_attr->in_rep = in_rep;
2773+
flow->esw_attr->in_mdev = in_mdev;
27262774
err = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow, extack);
27272775
if (err)
27282776
goto err_free;
@@ -2738,6 +2786,87 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
27382786
return err;
27392787
}
27402788

2789+
static int mlx5e_tc_add_fdb_peer_flow(struct tc_cls_flower_offload *f,
2790+
struct mlx5e_tc_flow *flow)
2791+
{
2792+
struct mlx5e_priv *priv = flow->priv, *peer_priv;
2793+
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
2794+
struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
2795+
struct mlx5e_tc_flow_parse_attr *parse_attr;
2796+
struct mlx5e_rep_priv *peer_urpriv;
2797+
struct mlx5e_tc_flow *peer_flow;
2798+
struct mlx5_core_dev *in_mdev;
2799+
int err = 0;
2800+
2801+
peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
2802+
if (!peer_esw)
2803+
return -ENODEV;
2804+
2805+
peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH);
2806+
peer_priv = netdev_priv(peer_urpriv->netdev);
2807+
2808+
/* in_mdev is assigned of which the packet originated from.
2809+
* So packets redirected to uplink use the same mdev of the
2810+
* original flow and packets redirected from uplink use the
2811+
* peer mdev.
2812+
*/
2813+
if (flow->esw_attr->in_rep->vport == FDB_UPLINK_VPORT)
2814+
in_mdev = peer_priv->mdev;
2815+
else
2816+
in_mdev = priv->mdev;
2817+
2818+
parse_attr = flow->esw_attr->parse_attr;
2819+
err = __mlx5e_add_fdb_flow(peer_priv, f, flow->flags,
2820+
parse_attr->filter_dev,
2821+
flow->esw_attr->in_rep, in_mdev, &peer_flow);
2822+
if (err)
2823+
goto out;
2824+
2825+
flow->peer_flow = peer_flow;
2826+
flow->flags |= MLX5E_TC_FLOW_DUP;
2827+
mutex_lock(&esw->offloads.peer_mutex);
2828+
list_add_tail(&flow->peer, &esw->offloads.peer_flows);
2829+
mutex_unlock(&esw->offloads.peer_mutex);
2830+
2831+
out:
2832+
mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
2833+
return err;
2834+
}
2835+
2836+
static int
2837+
mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
2838+
struct tc_cls_flower_offload *f,
2839+
u16 flow_flags,
2840+
struct net_device *filter_dev,
2841+
struct mlx5e_tc_flow **__flow)
2842+
{
2843+
struct mlx5e_rep_priv *rpriv = priv->ppriv;
2844+
struct mlx5_eswitch_rep *in_rep = rpriv->rep;
2845+
struct mlx5_core_dev *in_mdev = priv->mdev;
2846+
struct mlx5e_tc_flow *flow;
2847+
int err;
2848+
2849+
err = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep,
2850+
in_mdev, &flow);
2851+
if (err)
2852+
goto out;
2853+
2854+
if (is_peer_flow_needed(flow)) {
2855+
err = mlx5e_tc_add_fdb_peer_flow(f, flow);
2856+
if (err) {
2857+
mlx5e_tc_del_fdb_flow(priv, flow);
2858+
goto out;
2859+
}
2860+
}
2861+
2862+
*__flow = flow;
2863+
2864+
return 0;
2865+
2866+
out:
2867+
return err;
2868+
}
2869+
27412870
static int
27422871
mlx5e_add_nic_flow(struct mlx5e_priv *priv,
27432872
struct tc_cls_flower_offload *f,
@@ -2882,7 +3011,9 @@ int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
28823011
int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
28833012
struct tc_cls_flower_offload *f, int flags)
28843013
{
3014+
struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
28853015
struct rhashtable *tc_ht = get_tc_ht(priv);
3016+
struct mlx5_eswitch *peer_esw;
28863017
struct mlx5e_tc_flow *flow;
28873018
struct mlx5_fc *counter;
28883019
u64 bytes;
@@ -2902,6 +3033,27 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
29023033

29033034
mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
29043035

3036+
peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
3037+
if (!peer_esw)
3038+
goto out;
3039+
3040+
if ((flow->flags & MLX5E_TC_FLOW_DUP) &&
3041+
(flow->peer_flow->flags & MLX5E_TC_FLOW_OFFLOADED)) {
3042+
u64 bytes2;
3043+
u64 packets2;
3044+
u64 lastuse2;
3045+
3046+
counter = mlx5e_tc_get_counter(flow->peer_flow);
3047+
mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2);
3048+
3049+
bytes += bytes2;
3050+
packets += packets2;
3051+
lastuse = max_t(u64, lastuse, lastuse2);
3052+
}
3053+
3054+
mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
3055+
3056+
out:
29053057
tcf_exts_stats_update(f->exts, bytes, packets, lastuse);
29063058

29073059
return 0;
@@ -3014,3 +3166,11 @@ int mlx5e_tc_num_filters(struct mlx5e_priv *priv)
30143166

30153167
return atomic_read(&tc_ht->nelems);
30163168
}
3169+
3170+
void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
3171+
{
3172+
struct mlx5e_tc_flow *flow, *tmp;
3173+
3174+
list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer)
3175+
__mlx5e_tc_del_fdb_peer_flow(flow);
3176+
}

drivers/net/ethernet/mellanox/mlx5/core/eswitch.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,8 @@ struct mlx5_esw_offload {
167167
struct mlx5_flow_table *ft_offloads;
168168
struct mlx5_flow_group *vport_rx_group;
169169
struct mlx5_eswitch_rep *vport_reps;
170+
struct list_head peer_flows;
171+
struct mutex peer_mutex;
170172
DECLARE_HASHTABLE(encap_tbl, 8);
171173
DECLARE_HASHTABLE(mod_hdr_tbl, 8);
172174
u8 inline_mode;

drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -563,7 +563,7 @@ static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev,
563563
dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
564564
dest->vport.num = 0;
565565
dest->vport.vhca_id = MLX5_CAP_GEN(peer_dev, vhca_id);
566-
dest->vport.vhca_id_valid = 1;
566+
dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
567567
}
568568

569569
static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
@@ -1313,8 +1313,11 @@ static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
13131313
return 0;
13141314
}
13151315

1316+
void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw);
1317+
13161318
static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw)
13171319
{
1320+
mlx5e_tc_clean_fdb_peer_flows(esw);
13181321
esw_del_fdb_peer_miss_rules(esw);
13191322
}
13201323

@@ -1365,6 +1368,9 @@ static void esw_offloads_devcom_init(struct mlx5_eswitch *esw)
13651368
{
13661369
struct mlx5_devcom *devcom = esw->dev->priv.devcom;
13671370

1371+
INIT_LIST_HEAD(&esw->offloads.peer_flows);
1372+
mutex_init(&esw->offloads.peer_mutex);
1373+
13681374
if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
13691375
return;
13701376

0 commit comments

Comments
 (0)