Skip to content

Commit

Permalink
net/mlx5: Lag, use hash when in roce lag on 4 ports
Browse files Browse the repository at this point in the history
Downstream patches will add support for lag over 4 ports.
In that mode we will only use hash as the uplink selection method.
Using hash instead of queue affinity (before this patch) offers key
advantages like:

- Align ports selection method with the method used by the bond device

- Better packets distribution where a single queue can transmit from
  multiple ports (with queue affinity a queue is bound to a single port
  regardless of the packet being sent).

- In case of failover we traffic is split between multiple ports and not
  a single one like in queue affinity.

Going forward it was decided that queue affinity will be deprecated
as using hash provides a better user experience which means on 4 ports
HCAs hash will always be used.

Future work will add hash support for 2 ports HCAs as well.

Signed-off-by: Mark Bloch <mbloch@nvidia.com>
Reviewed-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
  • Loading branch information
mark-bloch authored and Saeed Mahameed committed May 10, 2022
1 parent e2c4593 commit cdf611d
Showing 1 changed file with 36 additions and 9 deletions.
45 changes: 36 additions & 9 deletions drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
Original file line number Diff line number Diff line change
Expand Up @@ -310,17 +310,41 @@ void mlx5_modify_lag(struct mlx5_lag *ldev,
mlx5_lag_drop_rule_setup(ldev, tracker);
}

static void mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev,
struct lag_tracker *tracker, u8 *flags)
#define MLX5_LAG_ROCE_HASH_PORTS_SUPPORTED 4
static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev,
struct lag_tracker *tracker, u8 *flags)
{
bool roce_lag = !!(*flags & MLX5_LAG_FLAG_ROCE);
struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1];

if (roce_lag ||
!MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) ||
tracker->tx_type != NETDEV_LAG_TX_TYPE_HASH)
return;
*flags |= MLX5_LAG_FLAG_HASH_BASED;
if (ldev->ports == MLX5_LAG_ROCE_HASH_PORTS_SUPPORTED) {
/* Four ports are support only in hash mode */
if (!MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table))
return -EINVAL;
*flags |= MLX5_LAG_FLAG_HASH_BASED;
}

return 0;
}

static int mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag *ldev,
struct lag_tracker *tracker, u8 *flags)
{
struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1];

if (MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) &&
tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH)
*flags |= MLX5_LAG_FLAG_HASH_BASED;
return 0;
}

static int mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev,
struct lag_tracker *tracker, u8 *flags)
{
bool roce_lag = !!(*flags & MLX5_LAG_FLAG_ROCE);

if (roce_lag)
return mlx5_lag_set_port_sel_mode_roce(ldev, tracker, flags);
return mlx5_lag_set_port_sel_mode_offloads(ldev, tracker, flags);
}

static char *get_str_port_sel_mode(u8 flags)
Expand Down Expand Up @@ -382,7 +406,10 @@ int mlx5_activate_lag(struct mlx5_lag *ldev,

mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1],
&ldev->v2p_map[MLX5_LAG_P2]);
mlx5_lag_set_port_sel_mode(ldev, tracker, &flags);
err = mlx5_lag_set_port_sel_mode(ldev, tracker, &flags);
if (err)
return err;

if (flags & MLX5_LAG_FLAG_HASH_BASED) {
err = mlx5_lag_port_sel_create(ldev, tracker->hash_type,
ldev->v2p_map[MLX5_LAG_P1],
Expand Down

0 comments on commit cdf611d

Please sign in to comment.