Skip to content

Commit 989b09b

Browse files
committed
slab: skip percpu sheaves for remote object freeing
Since we don't control the NUMA locality of objects in percpu sheaves, allocations with node restrictions bypass them. Allocations without restrictions may however still expect to get local objects with high probability, and the introduction of sheaves can decrease it due to freed object from a remote node ending up in percpu sheaves. The fraction of such remote frees seems low (5% on an 8-node machine) but it can be expected that some cache or workload specific corner cases exist. We can either conclude that this is not a problem due to the low fraction, or we can make remote frees bypass percpu sheaves and go directly to their slabs. This will make the remote frees more expensive, but if it's only a small fraction, most frees will still benefit from the lower overhead of percpu sheaves. This patch thus makes remote object freeing bypass percpu sheaves, including bulk freeing, and kfree_rcu() via the rcu_free sheaf. However it's not intended to be 100% guarantee that percpu sheaves will only contain local objects. The refill from slabs does not provide that guarantee in the first place, and there might be cpu migrations happening when we need to unlock the local_lock. Avoiding all that could be possible but complicated so we can leave it for later investigation whether it would be worth it. It can be expected that the more selective freeing will itself prevent accumulation of remote objects in percpu sheaves so any such violations would have only short-term effects. Reviewed-by: Harry Yoo <harry.yoo@oracle.com> Reviewed-by: Suren Baghdasaryan <surenb@google.com> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
1 parent 0829422 commit 989b09b

File tree

2 files changed

+40
-8
lines changed

2 files changed

+40
-8
lines changed

mm/slab_common.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1623,8 +1623,11 @@ static bool kfree_rcu_sheaf(void *obj)
16231623

16241624
slab = folio_slab(folio);
16251625
s = slab->slab_cache;
1626-
if (s->cpu_sheaves)
1627-
return __kfree_rcu_sheaf(s, obj);
1626+
if (s->cpu_sheaves) {
1627+
if (likely(!IS_ENABLED(CONFIG_NUMA) ||
1628+
slab_nid(slab) == numa_mem_id()))
1629+
return __kfree_rcu_sheaf(s, obj);
1630+
}
16281631

16291632
return false;
16301633
}

mm/slub.c

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -472,6 +472,7 @@ struct slab_sheaf {
472472
};
473473
struct kmem_cache *cache;
474474
unsigned int size;
475+
int node; /* only used for rcu_sheaf */
475476
void *objects[];
476477
};
477478

@@ -5822,7 +5823,7 @@ static void rcu_free_sheaf(struct rcu_head *head)
58225823
*/
58235824
__rcu_free_sheaf_prepare(s, sheaf);
58245825

5825-
barn = get_node(s, numa_mem_id())->barn;
5826+
barn = get_node(s, sheaf->node)->barn;
58265827

58275828
/* due to slab_free_hook() */
58285829
if (unlikely(sheaf->size == 0))
@@ -5912,10 +5913,12 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj)
59125913
*/
59135914
rcu_sheaf->objects[rcu_sheaf->size++] = obj;
59145915

5915-
if (likely(rcu_sheaf->size < s->sheaf_capacity))
5916+
if (likely(rcu_sheaf->size < s->sheaf_capacity)) {
59165917
rcu_sheaf = NULL;
5917-
else
5918+
} else {
59185919
pcs->rcu_free = NULL;
5920+
rcu_sheaf->node = numa_mem_id();
5921+
}
59195922

59205923
/*
59215924
* we flush before local_unlock to make sure a racing
@@ -5946,7 +5949,11 @@ static void free_to_pcs_bulk(struct kmem_cache *s, size_t size, void **p)
59465949
bool init = slab_want_init_on_free(s);
59475950
unsigned int batch, i = 0;
59485951
struct node_barn *barn;
5952+
void *remote_objects[PCS_BATCH_MAX];
5953+
unsigned int remote_nr = 0;
5954+
int node = numa_mem_id();
59495955

5956+
next_remote_batch:
59505957
while (i < size) {
59515958
struct slab *slab = virt_to_slab(p[i]);
59525959

@@ -5956,7 +5963,15 @@ static void free_to_pcs_bulk(struct kmem_cache *s, size_t size, void **p)
59565963
if (unlikely(!slab_free_hook(s, p[i], init, false))) {
59575964
p[i] = p[--size];
59585965
if (!size)
5959-
return;
5966+
goto flush_remote;
5967+
continue;
5968+
}
5969+
5970+
if (unlikely(IS_ENABLED(CONFIG_NUMA) && slab_nid(slab) != node)) {
5971+
remote_objects[remote_nr] = p[i];
5972+
p[i] = p[--size];
5973+
if (++remote_nr >= PCS_BATCH_MAX)
5974+
goto flush_remote;
59605975
continue;
59615976
}
59625977

@@ -6026,6 +6041,15 @@ static void free_to_pcs_bulk(struct kmem_cache *s, size_t size, void **p)
60266041
*/
60276042
fallback:
60286043
__kmem_cache_free_bulk(s, size, p);
6044+
6045+
flush_remote:
6046+
if (remote_nr) {
6047+
__kmem_cache_free_bulk(s, remote_nr, &remote_objects[0]);
6048+
if (i < size) {
6049+
remote_nr = 0;
6050+
goto next_remote_batch;
6051+
}
6052+
}
60296053
}
60306054

60316055
#ifndef CONFIG_SLUB_TINY
@@ -6117,8 +6141,13 @@ void slab_free(struct kmem_cache *s, struct slab *slab, void *object,
61176141
if (unlikely(!slab_free_hook(s, object, slab_want_init_on_free(s), false)))
61186142
return;
61196143

6120-
if (!s->cpu_sheaves || !free_to_pcs(s, object))
6121-
do_slab_free(s, slab, object, object, 1, addr);
6144+
if (s->cpu_sheaves && likely(!IS_ENABLED(CONFIG_NUMA) ||
6145+
slab_nid(slab) == numa_mem_id())) {
6146+
if (likely(free_to_pcs(s, object)))
6147+
return;
6148+
}
6149+
6150+
do_slab_free(s, slab, object, object, 1, addr);
61226151
}
61236152

61246153
#ifdef CONFIG_MEMCG

0 commit comments

Comments
 (0)