Skip to content

Commit d991666

Browse files
baileyforrestkuba-moo
authored andcommitted
gve: make IRQ handlers and page allocation NUMA aware
All memory in GVE is currently allocated without regard for the NUMA node of the device. Because access to NUMA-local memory access is significantly cheaper than access to a remote node, this change attempts to ensure that page frags used in the RX path, including page pool frags, are allocated on the NUMA node local to the gVNIC device. Note that this attempt is best-effort. If necessary, the driver will still allocate non-local memory, as __GFP_THISNODE is not passed. Descriptor ring allocations are not updated, as dma_alloc_coherent handles that. This change also modifies the IRQ affinity setting to only select CPUs from the node local to the device, preserving the behavior that TX and RX queues of the same index share CPU affinity. Signed-off-by: Bailey Forrest <bcf@google.com> Signed-off-by: Joshua Washington <joshwash@google.com> Reviewed-by: Willem de Bruijn <willemb@google.com> Signed-off-by: Harshitha Ramamurthy <hramamurthy@google.com> Signed-off-by: Jeroen de Borst <jeroendb@google.com> Reviewed-by: Simon Horman <horms@kernel.org> Link: https://patch.msgid.link/20250707210107.2742029-1-jeroendb@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
1 parent 11b5d56 commit d991666

File tree

5 files changed

+37
-17
lines changed

5 files changed

+37
-17
lines changed

drivers/net/ethernet/google/gve/gve.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -804,6 +804,7 @@ struct gve_priv {
804804
struct gve_tx_queue_config tx_cfg;
805805
struct gve_rx_queue_config rx_cfg;
806806
u32 num_ntfy_blks; /* split between TX and RX so must be even */
807+
int numa_node;
807808

808809
struct gve_registers __iomem *reg_bar0; /* see gve_register.h */
809810
__be32 __iomem *db_bar2; /* "array" of doorbells */

drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,7 @@ struct page_pool *gve_rx_create_page_pool(struct gve_priv *priv,
246246
.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
247247
.order = 0,
248248
.pool_size = GVE_PAGE_POOL_SIZE_MULTIPLIER * priv->rx_desc_cnt,
249+
.nid = priv->numa_node,
249250
.dev = &priv->pdev->dev,
250251
.netdev = priv->dev,
251252
.napi = &priv->ntfy_blocks[ntfy_id].napi,

drivers/net/ethernet/google/gve/gve_main.c

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -461,10 +461,19 @@ int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
461461
return work_done;
462462
}
463463

464+
static const struct cpumask *gve_get_node_mask(struct gve_priv *priv)
465+
{
466+
if (priv->numa_node == NUMA_NO_NODE)
467+
return cpu_all_mask;
468+
else
469+
return cpumask_of_node(priv->numa_node);
470+
}
471+
464472
static int gve_alloc_notify_blocks(struct gve_priv *priv)
465473
{
466474
int num_vecs_requested = priv->num_ntfy_blks + 1;
467-
unsigned int active_cpus;
475+
const struct cpumask *node_mask;
476+
unsigned int cur_cpu;
468477
int vecs_enabled;
469478
int i, j;
470479
int err;
@@ -503,8 +512,6 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv)
503512
if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues)
504513
priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
505514
}
506-
/* Half the notification blocks go to TX and half to RX */
507-
active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus());
508515

509516
/* Setup Management Vector - the last vector */
510517
snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s",
@@ -533,6 +540,8 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv)
533540
}
534541

535542
/* Setup the other blocks - the first n-1 vectors */
543+
node_mask = gve_get_node_mask(priv);
544+
cur_cpu = cpumask_first(node_mask);
536545
for (i = 0; i < priv->num_ntfy_blks; i++) {
537546
struct gve_notify_block *block = &priv->ntfy_blocks[i];
538547
int msix_idx = i;
@@ -549,9 +558,17 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv)
549558
goto abort_with_some_ntfy_blocks;
550559
}
551560
block->irq = priv->msix_vectors[msix_idx].vector;
552-
irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
553-
get_cpu_mask(i % active_cpus));
561+
irq_set_affinity_and_hint(block->irq,
562+
cpumask_of(cur_cpu));
554563
block->irq_db_index = &priv->irq_db_indices[i].index;
564+
565+
cur_cpu = cpumask_next(cur_cpu, node_mask);
566+
/* Wrap once CPUs in the node have been exhausted, or when
567+
* starting RX queue affinities. TX and RX queues of the same
568+
* index share affinity.
569+
*/
570+
if (cur_cpu >= nr_cpu_ids || (i + 1) == priv->tx_cfg.max_queues)
571+
cur_cpu = cpumask_first(node_mask);
555572
}
556573
return 0;
557574
abort_with_some_ntfy_blocks:
@@ -1040,7 +1057,7 @@ int gve_alloc_page(struct gve_priv *priv, struct device *dev,
10401057
struct page **page, dma_addr_t *dma,
10411058
enum dma_data_direction dir, gfp_t gfp_flags)
10421059
{
1043-
*page = alloc_page(gfp_flags);
1060+
*page = alloc_pages_node(priv->numa_node, gfp_flags, 0);
10441061
if (!*page) {
10451062
priv->page_alloc_fail++;
10461063
return -ENOMEM;
@@ -2322,6 +2339,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
23222339
*/
23232340
priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1;
23242341
priv->mgmt_msix_idx = priv->num_ntfy_blks;
2342+
priv->numa_node = dev_to_node(&priv->pdev->dev);
23252343

23262344
priv->tx_cfg.max_queues =
23272345
min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2);

drivers/net/ethernet/google/gve/gve_rx.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -192,8 +192,8 @@ static int gve_rx_prefill_pages(struct gve_rx_ring *rx,
192192
*/
193193
slots = rx->mask + 1;
194194

195-
rx->data.page_info = kvzalloc(slots *
196-
sizeof(*rx->data.page_info), GFP_KERNEL);
195+
rx->data.page_info = kvcalloc_node(slots, sizeof(*rx->data.page_info),
196+
GFP_KERNEL, priv->numa_node);
197197
if (!rx->data.page_info)
198198
return -ENOMEM;
199199

@@ -216,7 +216,8 @@ static int gve_rx_prefill_pages(struct gve_rx_ring *rx,
216216

217217
if (!rx->data.raw_addressing) {
218218
for (j = 0; j < rx->qpl_copy_pool_mask + 1; j++) {
219-
struct page *page = alloc_page(GFP_KERNEL);
219+
struct page *page = alloc_pages_node(priv->numa_node,
220+
GFP_KERNEL, 0);
220221

221222
if (!page) {
222223
err = -ENOMEM;
@@ -303,10 +304,9 @@ int gve_rx_alloc_ring_gqi(struct gve_priv *priv,
303304

304305
rx->qpl_copy_pool_mask = min_t(u32, U32_MAX, slots * 2) - 1;
305306
rx->qpl_copy_pool_head = 0;
306-
rx->qpl_copy_pool = kvcalloc(rx->qpl_copy_pool_mask + 1,
307-
sizeof(rx->qpl_copy_pool[0]),
308-
GFP_KERNEL);
309-
307+
rx->qpl_copy_pool = kvcalloc_node(rx->qpl_copy_pool_mask + 1,
308+
sizeof(rx->qpl_copy_pool[0]),
309+
GFP_KERNEL, priv->numa_node);
310310
if (!rx->qpl_copy_pool) {
311311
err = -ENOMEM;
312312
goto abort_with_slots;

drivers/net/ethernet/google/gve/gve_rx_dqo.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -237,9 +237,9 @@ int gve_rx_alloc_ring_dqo(struct gve_priv *priv,
237237

238238
rx->dqo.num_buf_states = cfg->raw_addressing ? buffer_queue_slots :
239239
gve_get_rx_pages_per_qpl_dqo(cfg->ring_size);
240-
rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states,
241-
sizeof(rx->dqo.buf_states[0]),
242-
GFP_KERNEL);
240+
rx->dqo.buf_states = kvcalloc_node(rx->dqo.num_buf_states,
241+
sizeof(rx->dqo.buf_states[0]),
242+
GFP_KERNEL, priv->numa_node);
243243
if (!rx->dqo.buf_states)
244244
return -ENOMEM;
245245

@@ -488,7 +488,7 @@ static int gve_rx_copy_ondemand(struct gve_rx_ring *rx,
488488
struct gve_rx_buf_state_dqo *buf_state,
489489
u16 buf_len)
490490
{
491-
struct page *page = alloc_page(GFP_ATOMIC);
491+
struct page *page = alloc_pages_node(rx->gve->numa_node, GFP_ATOMIC, 0);
492492
int num_frags;
493493

494494
if (!page)

0 commit comments

Comments
 (0)