Skip to content

Commit b4ff815

Browse files
laoarintel-lab-lkp
authored andcommitted
mm: thp: add bpf thp struct ops
A new bpf_thp struct ops is introduced to provide finer-grained control over THP allocation policy. The struct ops includes two APIs for determining the THP allocator and reclaimer behavior: - THP allocator int (*allocator)(unsigned long vm_flags, unsigned long tva_flags); The BPF program returns either THP_ALLOC_CURRENT or THP_ALLOC_KHUGEPAGED, indicating whether THP allocation should be performed synchronously (current task) or asynchronously (khugepaged). The decision is based on the current task context, VMA flags, and TVA flags. - THP reclaimer int (*reclaimer)(bool vma_madvised); The BPF program returns either RECLAIMER_CURRENT or RECLAIMER_KSWAPD, determining whether memory reclamation is handled by the current task or kswapd. The decision depends on the current task and VMA flags. Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
1 parent b5d9648 commit b4ff815

File tree

3 files changed

+190
-10
lines changed

3 files changed

+190
-10
lines changed

include/linux/huge_mm.h

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ enum transparent_hugepage_flag {
5454
TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG,
5555
TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG,
5656
TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG,
57+
TRANSPARENT_HUGEPAGE_BPF_ATTACHED, /* BPF prog is attached */
5758
};
5859

5960
struct kobject;
@@ -192,16 +193,8 @@ static inline bool hugepage_global_always(void)
192193

193194
#define THP_ALLOC_KHUGEPAGED (1 << 1)
194195
#define THP_ALLOC_CURRENT (1 << 2)
195-
static inline int bpf_thp_allocator(unsigned long vm_flags,
196-
unsigned long tva_flags)
197-
{
198-
return THP_ALLOC_KHUGEPAGED | THP_ALLOC_CURRENT;
199-
}
200-
201-
static inline gfp_t bpf_thp_gfp_mask(bool vma_madvised)
202-
{
203-
return 0;
204-
}
196+
int bpf_thp_allocator(unsigned long vm_flags, unsigned long tva_flags);
197+
gfp_t bpf_thp_gfp_mask(bool vma_madvised);
205198

206199
static inline int highest_order(unsigned long orders)
207200
{

mm/Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,9 @@ obj-$(CONFIG_MIGRATION) += migrate.o
9999
obj-$(CONFIG_NUMA) += memory-tiers.o
100100
obj-$(CONFIG_DEVICE_MIGRATION) += migrate_device.o
101101
obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o khugepaged.o
102+
ifdef CONFIG_BPF_SYSCALL
103+
obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += bpf_thp.o
104+
endif
102105
obj-$(CONFIG_PAGE_COUNTER) += page_counter.o
103106
obj-$(CONFIG_MEMCG_V1) += memcontrol-v1.o
104107
obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o

mm/bpf_thp.c

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
3+
#include <linux/bpf.h>
4+
#include <linux/btf.h>
5+
#include <linux/huge_mm.h>
6+
#include <linux/khugepaged.h>
7+
8+
#define RECLAIMER_CURRENT (1 << 1)
9+
#define RECLAIMER_KSWAPD (1 << 2)
10+
#define RECLAIMER_BOTH (RECLAIMER_CURRENT | RECLAIMER_KSWAPD)
11+
12+
struct bpf_thp_ops {
13+
/**
14+
* @allocator: Specifies whether the THP allocation is performed
15+
* by the current task or by khugepaged.
16+
* @vm_flags: Flags for the VMA in the current allocation context
17+
* @tva_flags: Flags for the TVA in the current allocation context
18+
*
19+
* Rerurn:
20+
* - THP_ALLOC_CURRENT: THP was allocated synchronously by the calling
21+
* task's context.
22+
* - THP_ALLOC_KHUGEPAGED: THP was allocated asynchronously by the
23+
* khugepaged kernel thread.
24+
* - 0: THP allocation is disallowed in the current context.
25+
*/
26+
int (*allocator)(unsigned long vm_flags, unsigned long tva_flags);
27+
/**
28+
* @reclaimer: Specifies the entity performing page reclaim:
29+
* - current task context
30+
* - kswapd
31+
* - none (no reclaim)
32+
* @vma_madvised: MADV flags for this VMA (e.g., MADV_HUGEPAGE, MADV_NOHUGEPAGE)
33+
*
34+
* Return:
35+
* - RECLAIMER_CURRENT: Direct reclaim by the current task if THP
36+
* allocation fails.
37+
* - RECLAIMER_KSWAPD: Wake kswapd to reclaim memory if THP allocation fails.
38+
* - RECLAIMER_ALL: Both current and kswapd will perform the reclaim
39+
* - 0: No reclaim will be attempted.
40+
*/
41+
int (*reclaimer)(bool vma_madvised);
42+
};
43+
44+
static struct bpf_thp_ops bpf_thp;
45+
46+
int bpf_thp_allocator(unsigned long vm_flags, unsigned long tva_flags)
47+
{
48+
int allocator;
49+
50+
/* No BPF program is attached */
51+
if (!(transparent_hugepage_flags & (1<<TRANSPARENT_HUGEPAGE_BPF_ATTACHED)))
52+
return THP_ALLOC_KHUGEPAGED | THP_ALLOC_CURRENT;
53+
54+
if (current_is_khugepaged())
55+
return THP_ALLOC_KHUGEPAGED | THP_ALLOC_CURRENT;
56+
if (!bpf_thp.allocator)
57+
return THP_ALLOC_KHUGEPAGED | THP_ALLOC_CURRENT;
58+
59+
allocator = bpf_thp.allocator(vm_flags, tva_flags);
60+
if (!allocator)
61+
return 0;
62+
/* invalid return value */
63+
if (allocator & ~(THP_ALLOC_KHUGEPAGED | THP_ALLOC_CURRENT))
64+
return THP_ALLOC_KHUGEPAGED | THP_ALLOC_CURRENT;
65+
return allocator;
66+
}
67+
68+
gfp_t bpf_thp_gfp_mask(bool vma_madvised)
69+
{
70+
int reclaimer;
71+
72+
if (!(transparent_hugepage_flags & (1<<TRANSPARENT_HUGEPAGE_BPF_ATTACHED)))
73+
return 0;
74+
75+
if (!bpf_thp.reclaimer)
76+
return 0;
77+
78+
reclaimer = bpf_thp.reclaimer(vma_madvised);
79+
switch (reclaimer) {
80+
case RECLAIMER_CURRENT:
81+
return GFP_TRANSHUGE | __GFP_NORETRY;
82+
case RECLAIMER_KSWAPD:
83+
return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM;
84+
case RECLAIMER_BOTH:
85+
return GFP_TRANSHUGE | __GFP_KSWAPD_RECLAIM | __GFP_NORETRY;
86+
default:
87+
return 0;
88+
}
89+
}
90+
91+
static bool bpf_thp_ops_is_valid_access(int off, int size,
92+
enum bpf_access_type type,
93+
const struct bpf_prog *prog,
94+
struct bpf_insn_access_aux *info)
95+
{
96+
return bpf_tracing_btf_ctx_access(off, size, type, prog, info);
97+
}
98+
99+
static const struct bpf_func_proto *
100+
bpf_thp_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
101+
{
102+
return bpf_base_func_proto(func_id, prog);
103+
}
104+
105+
static const struct bpf_verifier_ops thp_bpf_verifier_ops = {
106+
.get_func_proto = bpf_thp_get_func_proto,
107+
.is_valid_access = bpf_thp_ops_is_valid_access,
108+
};
109+
110+
static int bpf_thp_reg(void *kdata, struct bpf_link *link)
111+
{
112+
struct bpf_thp_ops *ops = kdata;
113+
114+
/* TODO: add support for multiple attaches */
115+
if (test_and_set_bit(TRANSPARENT_HUGEPAGE_BPF_ATTACHED,
116+
&transparent_hugepage_flags))
117+
return -EOPNOTSUPP;
118+
bpf_thp.allocator = ops->allocator;
119+
bpf_thp.reclaimer = ops->reclaimer;
120+
return 0;
121+
}
122+
123+
static void bpf_thp_unreg(void *kdata, struct bpf_link *link)
124+
{
125+
clear_bit(TRANSPARENT_HUGEPAGE_BPF_ATTACHED, &transparent_hugepage_flags);
126+
bpf_thp.allocator = NULL;
127+
bpf_thp.reclaimer = NULL;
128+
}
129+
130+
static int bpf_thp_check_member(const struct btf_type *t,
131+
const struct btf_member *member,
132+
const struct bpf_prog *prog)
133+
{
134+
return 0;
135+
}
136+
137+
static int bpf_thp_init_member(const struct btf_type *t,
138+
const struct btf_member *member,
139+
void *kdata, const void *udata)
140+
{
141+
return 0;
142+
}
143+
144+
static int bpf_thp_init(struct btf *btf)
145+
{
146+
return 0;
147+
}
148+
149+
static int allocator(unsigned long vm_flags, unsigned long tva_flags)
150+
{
151+
return 0;
152+
}
153+
154+
static int reclaimer(bool vma_madvised)
155+
{
156+
return 0;
157+
}
158+
159+
static struct bpf_thp_ops __bpf_thp_ops = {
160+
.allocator = allocator,
161+
.reclaimer = reclaimer,
162+
};
163+
164+
static struct bpf_struct_ops bpf_bpf_thp_ops = {
165+
.verifier_ops = &thp_bpf_verifier_ops,
166+
.init = bpf_thp_init,
167+
.check_member = bpf_thp_check_member,
168+
.init_member = bpf_thp_init_member,
169+
.reg = bpf_thp_reg,
170+
.unreg = bpf_thp_unreg,
171+
.name = "bpf_thp_ops",
172+
.cfi_stubs = &__bpf_thp_ops,
173+
.owner = THIS_MODULE,
174+
};
175+
176+
static int __init bpf_thp_ops_init(void)
177+
{
178+
int err = register_bpf_struct_ops(&bpf_bpf_thp_ops, bpf_thp_ops);
179+
180+
if (err)
181+
pr_err("bpf_thp: Failed to register struct_ops (%d)\n", err);
182+
return err;
183+
}
184+
late_initcall(bpf_thp_ops_init);

0 commit comments

Comments
 (0)