Skip to content

Commit 061f5b2

Browse files
shayshyiSaeed Mahameed
authored and
Saeed Mahameed
committed
net/mlx5: SF, Use all available cpu for setting cpu affinity
Currently all SFs are using the same CPUs. Spreading SF over CPUs, in round-robin manner, in order to achieve better distribution of the SFs over available CPUs. Signed-off-by: Shay Drory <shayd@nvidia.com> Reviewed-by: Moshe Shemesh <moshe@nvidia.com> Reviewed-by: Parav Pandit <parav@nvidia.com> Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
1 parent 79b60ca commit 061f5b2

File tree

5 files changed

+201
-34
lines changed

5 files changed

+201
-34
lines changed

drivers/net/ethernet/mellanox/mlx5/core/eq.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -798,7 +798,10 @@ static void comp_irqs_release(struct mlx5_core_dev *dev)
798798
{
799799
struct mlx5_eq_table *table = dev->priv.eq_table;
800800

801-
mlx5_irqs_release_vectors(table->comp_irqs, table->num_comp_eqs);
801+
if (mlx5_core_is_sf(dev))
802+
mlx5_irq_affinity_irqs_release(dev, table->comp_irqs, table->num_comp_eqs);
803+
else
804+
mlx5_irqs_release_vectors(table->comp_irqs, table->num_comp_eqs);
802805
kfree(table->comp_irqs);
803806
}
804807

@@ -814,6 +817,12 @@ static int comp_irqs_request(struct mlx5_core_dev *dev)
814817
table->comp_irqs = kcalloc(ncomp_eqs, sizeof(*table->comp_irqs), GFP_KERNEL);
815818
if (!table->comp_irqs)
816819
return -ENOMEM;
820+
if (mlx5_core_is_sf(dev)) {
821+
ret = mlx5_irq_affinity_irqs_request_auto(dev, ncomp_eqs, table->comp_irqs);
822+
if (ret < 0)
823+
goto free_irqs;
824+
return ret;
825+
}
817826

818827
cpus = kcalloc(ncomp_eqs, sizeof(*cpus), GFP_KERNEL);
819828
if (!cpus) {

drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c

Lines changed: 133 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,81 @@
55
#include "mlx5_irq.h"
66
#include "pci_irq.h"
77

8+
static void cpu_put(struct mlx5_irq_pool *pool, int cpu)
9+
{
10+
pool->irqs_per_cpu[cpu]--;
11+
}
12+
13+
static void cpu_get(struct mlx5_irq_pool *pool, int cpu)
14+
{
15+
pool->irqs_per_cpu[cpu]++;
16+
}
17+
18+
/* Gets the least loaded CPU. e.g.: the CPU with least IRQs bound to it */
19+
static int cpu_get_least_loaded(struct mlx5_irq_pool *pool,
20+
const struct cpumask *req_mask)
21+
{
22+
int best_cpu = -1;
23+
int cpu;
24+
25+
for_each_cpu_and(cpu, req_mask, cpu_online_mask) {
26+
/* CPU has zero IRQs on it. No need to search any more CPUs. */
27+
if (!pool->irqs_per_cpu[cpu]) {
28+
best_cpu = cpu;
29+
break;
30+
}
31+
if (best_cpu < 0)
32+
best_cpu = cpu;
33+
if (pool->irqs_per_cpu[cpu] < pool->irqs_per_cpu[best_cpu])
34+
best_cpu = cpu;
35+
}
36+
if (best_cpu == -1) {
37+
/* There isn't online CPUs in req_mask */
38+
mlx5_core_err(pool->dev, "NO online CPUs in req_mask (%*pbl)\n",
39+
cpumask_pr_args(req_mask));
40+
best_cpu = cpumask_first(cpu_online_mask);
41+
}
42+
pool->irqs_per_cpu[best_cpu]++;
43+
return best_cpu;
44+
}
45+
846
/* Creating an IRQ from irq_pool */
947
static struct mlx5_irq *
1048
irq_pool_request_irq(struct mlx5_irq_pool *pool, const struct cpumask *req_mask)
1149
{
50+
cpumask_var_t auto_mask;
51+
struct mlx5_irq *irq;
1252
u32 irq_index;
1353
int err;
1454

15-
err = xa_alloc(&pool->irqs, &irq_index, NULL, pool->xa_num_irqs,
16-
GFP_KERNEL);
55+
if (!zalloc_cpumask_var(&auto_mask, GFP_KERNEL))
56+
return ERR_PTR(-ENOMEM);
57+
err = xa_alloc(&pool->irqs, &irq_index, NULL, pool->xa_num_irqs, GFP_KERNEL);
1758
if (err)
1859
return ERR_PTR(err);
19-
return mlx5_irq_alloc(pool, irq_index, req_mask);
60+
if (pool->irqs_per_cpu) {
61+
if (cpumask_weight(req_mask) > 1)
62+
/* if req_mask contain more then one CPU, set the least loadad CPU
63+
* of req_mask
64+
*/
65+
cpumask_set_cpu(cpu_get_least_loaded(pool, req_mask), auto_mask);
66+
else
67+
cpu_get(pool, cpumask_first(req_mask));
68+
}
69+
irq = mlx5_irq_alloc(pool, irq_index, cpumask_empty(auto_mask) ? req_mask : auto_mask);
70+
free_cpumask_var(auto_mask);
71+
return irq;
2072
}
2173

22-
/* Looking for the IRQ with the smallest refcount and the same mask */
74+
/* Looking for the IRQ with the smallest refcount that fits req_mask.
75+
* If pool is sf_comp_pool, then we are looking for an IRQ with any of the
76+
* requested CPUs in req_mask.
77+
* for example: req_mask = 0xf, irq0_mask = 0x10, irq1_mask = 0x1. irq0_mask
78+
* isn't subset of req_mask, so we will skip it. irq1_mask is subset of req_mask,
79+
* we don't skip it.
80+
* If pool is sf_ctrl_pool, then all IRQs have the same mask, so any IRQ will
81+
* fit. And since mask is subset of itself, we will pass the first if bellow.
82+
*/
2383
static struct mlx5_irq *
2484
irq_pool_find_least_loaded(struct mlx5_irq_pool *pool, const struct cpumask *req_mask)
2585
{
@@ -35,8 +95,8 @@ irq_pool_find_least_loaded(struct mlx5_irq_pool *pool, const struct cpumask *req
3595
struct cpumask *iter_mask = mlx5_irq_get_affinity_mask(iter);
3696
int iter_refcount = mlx5_irq_read_locked(iter);
3797

38-
if (!cpumask_equal(iter_mask, req_mask))
39-
/* If a user request a mask, skip IRQs that's aren't a match */
98+
if (!cpumask_subset(iter_mask, req_mask))
99+
/* skip IRQs with a mask which is not subset of req_mask */
40100
continue;
41101
if (iter_refcount < pool->min_threshold)
42102
/* If we found an IRQ with less than min_thres, return it */
@@ -97,3 +157,70 @@ mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, const struct cpumask *req_
97157
mutex_unlock(&pool->lock);
98158
return least_loaded_irq;
99159
}
160+
161+
void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, struct mlx5_irq **irqs,
162+
int num_irqs)
163+
{
164+
struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev);
165+
int i;
166+
167+
for (i = 0; i < num_irqs; i++) {
168+
int cpu = cpumask_first(mlx5_irq_get_affinity_mask(irqs[i]));
169+
170+
synchronize_irq(pci_irq_vector(pool->dev->pdev,
171+
mlx5_irq_get_index(irqs[i])));
172+
if (mlx5_irq_put(irqs[i]))
173+
if (pool->irqs_per_cpu)
174+
cpu_put(pool, cpu);
175+
}
176+
}
177+
178+
/**
179+
* mlx5_irq_affinity_irqs_request_auto - request one or more IRQs for mlx5 device.
180+
* @dev: mlx5 device that is requesting the IRQs.
181+
* @nirqs: number of IRQs to request.
182+
* @irqs: an output array of IRQs pointers.
183+
*
184+
* Each IRQ is bounded to at most 1 CPU.
185+
* This function is requesting IRQs according to the default assignment.
186+
* The default assignment policy is:
187+
* - in each iteration, request the least loaded IRQ which is not bound to any
188+
* CPU of the previous IRQs requested.
189+
*
190+
* This function returns the number of IRQs requested, (which might be smaller than
191+
* @nirqs), if successful, or a negative error code in case of an error.
192+
*/
193+
int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs,
194+
struct mlx5_irq **irqs)
195+
{
196+
struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev);
197+
cpumask_var_t req_mask;
198+
struct mlx5_irq *irq;
199+
int i = 0;
200+
201+
if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL))
202+
return -ENOMEM;
203+
cpumask_copy(req_mask, cpu_online_mask);
204+
for (i = 0; i < nirqs; i++) {
205+
if (mlx5_irq_pool_is_sf_pool(pool))
206+
irq = mlx5_irq_affinity_request(pool, req_mask);
207+
else
208+
/* In case SF pool doesn't exists, fallback to the PF IRQs.
209+
* The PF IRQs are already allocated and binded to CPU
210+
* at this point. Hence, only an index is needed.
211+
*/
212+
irq = mlx5_irq_request(dev, i, NULL);
213+
if (IS_ERR(irq))
214+
break;
215+
irqs[i] = irq;
216+
cpumask_clear_cpu(cpumask_first(mlx5_irq_get_affinity_mask(irq)), req_mask);
217+
mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n",
218+
pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)),
219+
cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)),
220+
mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ);
221+
}
222+
free_cpumask_var(req_mask);
223+
if (!i)
224+
return PTR_ERR(irq);
225+
return i;
226+
}

drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,26 @@ int mlx5_irq_get_index(struct mlx5_irq *irq);
3636

3737
struct mlx5_irq_pool;
3838
#ifdef CONFIG_MLX5_SF
39+
int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs,
40+
struct mlx5_irq **irqs);
3941
struct mlx5_irq *mlx5_irq_affinity_request(struct mlx5_irq_pool *pool,
4042
const struct cpumask *req_mask);
43+
void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, struct mlx5_irq **irqs,
44+
int num_irqs);
4145
#else
46+
static inline int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs,
47+
struct mlx5_irq **irqs)
48+
{
49+
return -EOPNOTSUPP;
50+
}
51+
4252
static inline struct mlx5_irq *
4353
mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, const struct cpumask *req_mask)
4454
{
4555
return ERR_PTR(-EOPNOTSUPP);
4656
}
57+
58+
static inline void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev,
59+
struct mlx5_irq **irqs, int num_irqs) {}
4760
#endif
4861
#endif /* __MLX5_IRQ_H__ */

drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c

Lines changed: 37 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -139,15 +139,19 @@ static void irq_release(struct mlx5_irq *irq)
139139
kfree(irq);
140140
}
141141

142-
static void irq_put(struct mlx5_irq *irq)
142+
int mlx5_irq_put(struct mlx5_irq *irq)
143143
{
144144
struct mlx5_irq_pool *pool = irq->pool;
145+
int ret = 0;
145146

146147
mutex_lock(&pool->lock);
147148
irq->refcount--;
148-
if (!irq->refcount)
149+
if (!irq->refcount) {
149150
irq_release(irq);
151+
ret = 1;
152+
}
150153
mutex_unlock(&pool->lock);
154+
return ret;
151155
}
152156

153157
int mlx5_irq_read_locked(struct mlx5_irq *irq)
@@ -202,11 +206,6 @@ static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
202206
snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx);
203207
}
204208

205-
static bool irq_pool_is_sf_pool(struct mlx5_irq_pool *pool)
206-
{
207-
return !strncmp("mlx5_sf", pool->name, strlen("mlx5_sf"));
208-
}
209-
210209
struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
211210
const struct cpumask *affinity)
212211
{
@@ -219,7 +218,7 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
219218
if (!irq)
220219
return ERR_PTR(-ENOMEM);
221220
irq->irqn = pci_irq_vector(dev->pdev, i);
222-
if (!irq_pool_is_sf_pool(pool))
221+
if (!mlx5_irq_pool_is_sf_pool(pool))
223222
irq_set_name(pool, name, i);
224223
else
225224
irq_sf_set_name(pool, name, i);
@@ -273,7 +272,7 @@ int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
273272
return -ENOENT;
274273
ret = atomic_notifier_chain_register(&irq->nh, nb);
275274
if (ret)
276-
irq_put(irq);
275+
mlx5_irq_put(irq);
277276
return ret;
278277
}
279278

@@ -282,7 +281,7 @@ int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
282281
int err = 0;
283282

284283
err = atomic_notifier_chain_unregister(&irq->nh, nb);
285-
irq_put(irq);
284+
mlx5_irq_put(irq);
286285
return err;
287286
}
288287

@@ -327,6 +326,20 @@ static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table)
327326
return irq_table->sf_comp_pool;
328327
}
329328

329+
struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev)
330+
{
331+
struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
332+
struct mlx5_irq_pool *pool = NULL;
333+
334+
if (mlx5_core_is_sf(dev))
335+
pool = sf_irq_pool_get(irq_table);
336+
337+
/* In some configs, there won't be a pool of SFs IRQs. Hence, returning
338+
* the PF IRQs pool in case the SF pool doesn't exist.
339+
*/
340+
return pool ? pool : irq_table->pf_pool;
341+
}
342+
330343
static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev)
331344
{
332345
struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
@@ -352,7 +365,7 @@ static void mlx5_irqs_release(struct mlx5_irq **irqs, int nirqs)
352365

353366
for (i = 0; i < nirqs; i++) {
354367
synchronize_irq(irqs[i]->irqn);
355-
irq_put(irqs[i]);
368+
mlx5_irq_put(irqs[i]);
356369
}
357370
}
358371

@@ -380,7 +393,7 @@ struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev)
380393
if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL))
381394
return ERR_PTR(-ENOMEM);
382395
cpumask_copy(req_mask, cpu_online_mask);
383-
if (!irq_pool_is_sf_pool(pool)) {
396+
if (!mlx5_irq_pool_is_sf_pool(pool)) {
384397
/* In case we are allocating a control IRQ for PF/VF */
385398
if (!pool->xa_num_irqs.max) {
386399
cpumask_clear(req_mask);
@@ -398,7 +411,7 @@ struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev)
398411
}
399412

400413
/**
401-
* mlx5_irq_request - request an IRQ for mlx5 device.
414+
* mlx5_irq_request - request an IRQ for mlx5 PF/VF device.
402415
* @dev: mlx5 device that requesting the IRQ.
403416
* @vecidx: vector index of the IRQ. This argument is ignore if affinity is
404417
* provided.
@@ -413,22 +426,8 @@ struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
413426
struct mlx5_irq_pool *pool;
414427
struct mlx5_irq *irq;
415428

416-
if (mlx5_core_is_sf(dev)) {
417-
pool = sf_irq_pool_get(irq_table);
418-
if (!pool)
419-
/* we don't have IRQs for SFs, using the PF IRQs */
420-
goto pf_irq;
421-
if (cpumask_empty(affinity) && !strcmp(pool->name, "mlx5_sf_comp"))
422-
/* In case an SF user request IRQ with vecidx */
423-
irq = irq_pool_request_vector(pool, vecidx, NULL);
424-
else
425-
irq = mlx5_irq_affinity_request(pool, affinity);
426-
goto out;
427-
}
428-
pf_irq:
429429
pool = irq_table->pf_pool;
430430
irq = irq_pool_request_vector(pool, vecidx, affinity);
431-
out:
432431
if (IS_ERR(irq))
433432
return irq;
434433
mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n",
@@ -518,6 +517,7 @@ static void irq_pool_free(struct mlx5_irq_pool *pool)
518517
irq_release(irq);
519518
xa_destroy(&pool->irqs);
520519
mutex_destroy(&pool->lock);
520+
kfree(pool->irqs_per_cpu);
521521
kvfree(pool);
522522
}
523523

@@ -565,7 +565,17 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec)
565565
err = PTR_ERR(table->sf_comp_pool);
566566
goto err_sf_ctrl;
567567
}
568+
569+
table->sf_comp_pool->irqs_per_cpu = kcalloc(nr_cpu_ids, sizeof(u16), GFP_KERNEL);
570+
if (!table->sf_comp_pool->irqs_per_cpu) {
571+
err = -ENOMEM;
572+
goto err_irqs_per_cpu;
573+
}
574+
568575
return 0;
576+
577+
err_irqs_per_cpu:
578+
irq_pool_free(table->sf_comp_pool);
569579
err_sf_ctrl:
570580
irq_pool_free(table->sf_ctrl_pool);
571581
err_pf:

drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,20 @@ struct mlx5_irq_pool {
2020
struct xarray irqs;
2121
u32 max_threshold;
2222
u32 min_threshold;
23+
u16 *irqs_per_cpu;
2324
struct mlx5_core_dev *dev;
2425
};
2526

27+
struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev);
28+
static inline bool mlx5_irq_pool_is_sf_pool(struct mlx5_irq_pool *pool)
29+
{
30+
return !strncmp("mlx5_sf", pool->name, strlen("mlx5_sf"));
31+
}
32+
2633
struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
2734
const struct cpumask *affinity);
2835
int mlx5_irq_get_locked(struct mlx5_irq *irq);
2936
int mlx5_irq_read_locked(struct mlx5_irq *irq);
37+
int mlx5_irq_put(struct mlx5_irq *irq);
3038

3139
#endif /* __PCI_IRQ_H__ */

0 commit comments

Comments
 (0)