|
| 1 | +// SPDX-License-Identifier: GPL-2.0 |
| 2 | + |
| 3 | +#include <linux/bpf.h> |
| 4 | +#include <linux/btf.h> |
| 5 | +#include <linux/huge_mm.h> |
| 6 | +#include <linux/khugepaged.h> |
| 7 | + |
| 8 | +#define RECLAIMER_CURRENT (1 << 1) |
| 9 | +#define RECLAIMER_KSWAPD (1 << 2) |
| 10 | +#define RECLAIMER_BOTH (RECLAIMER_CURRENT | RECLAIMER_KSWAPD) |
| 11 | + |
| 12 | +struct bpf_thp_ops { |
| 13 | + /** |
| 14 | + * @allocator: Specifies whether the THP allocation is performed |
| 15 | + * by the current task or by khugepaged. |
| 16 | + * @vm_flags: Flags for the VMA in the current allocation context |
| 17 | + * @tva_flags: Flags for the TVA in the current allocation context |
| 18 | + * |
| 19 | + * Rerurn: |
| 20 | + * - THP_ALLOC_CURRENT: THP was allocated synchronously by the calling |
| 21 | + * task's context. |
| 22 | + * - THP_ALLOC_KHUGEPAGED: THP was allocated asynchronously by the |
| 23 | + * khugepaged kernel thread. |
| 24 | + * - 0: THP allocation is disallowed in the current context. |
| 25 | + */ |
| 26 | + int (*allocator)(unsigned long vm_flags, unsigned long tva_flags); |
| 27 | + /** |
| 28 | + * @reclaimer: Specifies the entity performing page reclaim: |
| 29 | + * - current task context |
| 30 | + * - kswapd |
| 31 | + * - none (no reclaim) |
| 32 | + * @vma_madvised: MADV flags for this VMA (e.g., MADV_HUGEPAGE, MADV_NOHUGEPAGE) |
| 33 | + * |
| 34 | + * Return: |
| 35 | + * - RECLAIMER_CURRENT: Direct reclaim by the current task if THP |
| 36 | + * allocation fails. |
| 37 | + * - RECLAIMER_KSWAPD: Wake kswapd to reclaim memory if THP allocation fails. |
| 38 | + * - RECLAIMER_ALL: Both current and kswapd will perform the reclaim |
| 39 | + * - 0: No reclaim will be attempted. |
| 40 | + */ |
| 41 | + int (*reclaimer)(bool vma_madvised); |
| 42 | +}; |
| 43 | + |
| 44 | +static struct bpf_thp_ops bpf_thp; |
| 45 | + |
| 46 | +int bpf_thp_allocator(unsigned long vm_flags, unsigned long tva_flags) |
| 47 | +{ |
| 48 | + int allocator; |
| 49 | + |
| 50 | + /* No BPF program is attached */ |
| 51 | + if (!(transparent_hugepage_flags & (1<<TRANSPARENT_HUGEPAGE_BPF_ATTACHED))) |
| 52 | + return THP_ALLOC_KHUGEPAGED | THP_ALLOC_CURRENT; |
| 53 | + |
| 54 | + if (current_is_khugepaged()) |
| 55 | + return THP_ALLOC_KHUGEPAGED | THP_ALLOC_CURRENT; |
| 56 | + if (!bpf_thp.allocator) |
| 57 | + return THP_ALLOC_KHUGEPAGED | THP_ALLOC_CURRENT; |
| 58 | + |
| 59 | + allocator = bpf_thp.allocator(vm_flags, tva_flags); |
| 60 | + if (!allocator) |
| 61 | + return 0; |
| 62 | + /* invalid return value */ |
| 63 | + if (allocator & ~(THP_ALLOC_KHUGEPAGED | THP_ALLOC_CURRENT)) |
| 64 | + return THP_ALLOC_KHUGEPAGED | THP_ALLOC_CURRENT; |
| 65 | + return allocator; |
| 66 | +} |
| 67 | + |
| 68 | +gfp_t bpf_thp_gfp_mask(bool vma_madvised) |
| 69 | +{ |
| 70 | + int reclaimer; |
| 71 | + |
| 72 | + if (!(transparent_hugepage_flags & (1<<TRANSPARENT_HUGEPAGE_BPF_ATTACHED))) |
| 73 | + return 0; |
| 74 | + |
| 75 | + if (!bpf_thp.reclaimer) |
| 76 | + return 0; |
| 77 | + |
| 78 | + reclaimer = bpf_thp.reclaimer(vma_madvised); |
| 79 | + switch (reclaimer) { |
| 80 | + case RECLAIMER_CURRENT: |
| 81 | + return GFP_TRANSHUGE | __GFP_NORETRY; |
| 82 | + case RECLAIMER_KSWAPD: |
| 83 | + return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM; |
| 84 | + case RECLAIMER_BOTH: |
| 85 | + return GFP_TRANSHUGE | __GFP_KSWAPD_RECLAIM | __GFP_NORETRY; |
| 86 | + default: |
| 87 | + return 0; |
| 88 | + } |
| 89 | +} |
| 90 | + |
| 91 | +static bool bpf_thp_ops_is_valid_access(int off, int size, |
| 92 | + enum bpf_access_type type, |
| 93 | + const struct bpf_prog *prog, |
| 94 | + struct bpf_insn_access_aux *info) |
| 95 | +{ |
| 96 | + return bpf_tracing_btf_ctx_access(off, size, type, prog, info); |
| 97 | +} |
| 98 | + |
| 99 | +static const struct bpf_func_proto * |
| 100 | +bpf_thp_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) |
| 101 | +{ |
| 102 | + return bpf_base_func_proto(func_id, prog); |
| 103 | +} |
| 104 | + |
| 105 | +static const struct bpf_verifier_ops thp_bpf_verifier_ops = { |
| 106 | + .get_func_proto = bpf_thp_get_func_proto, |
| 107 | + .is_valid_access = bpf_thp_ops_is_valid_access, |
| 108 | +}; |
| 109 | + |
| 110 | +static int bpf_thp_reg(void *kdata, struct bpf_link *link) |
| 111 | +{ |
| 112 | + struct bpf_thp_ops *ops = kdata; |
| 113 | + |
| 114 | + /* TODO: add support for multiple attaches */ |
| 115 | + if (test_and_set_bit(TRANSPARENT_HUGEPAGE_BPF_ATTACHED, |
| 116 | + &transparent_hugepage_flags)) |
| 117 | + return -EOPNOTSUPP; |
| 118 | + bpf_thp.allocator = ops->allocator; |
| 119 | + bpf_thp.reclaimer = ops->reclaimer; |
| 120 | + return 0; |
| 121 | +} |
| 122 | + |
| 123 | +static void bpf_thp_unreg(void *kdata, struct bpf_link *link) |
| 124 | +{ |
| 125 | + clear_bit(TRANSPARENT_HUGEPAGE_BPF_ATTACHED, &transparent_hugepage_flags); |
| 126 | + bpf_thp.allocator = NULL; |
| 127 | + bpf_thp.reclaimer = NULL; |
| 128 | +} |
| 129 | + |
| 130 | +static int bpf_thp_check_member(const struct btf_type *t, |
| 131 | + const struct btf_member *member, |
| 132 | + const struct bpf_prog *prog) |
| 133 | +{ |
| 134 | + return 0; |
| 135 | +} |
| 136 | + |
| 137 | +static int bpf_thp_init_member(const struct btf_type *t, |
| 138 | + const struct btf_member *member, |
| 139 | + void *kdata, const void *udata) |
| 140 | +{ |
| 141 | + return 0; |
| 142 | +} |
| 143 | + |
| 144 | +static int bpf_thp_init(struct btf *btf) |
| 145 | +{ |
| 146 | + return 0; |
| 147 | +} |
| 148 | + |
| 149 | +static int allocator(unsigned long vm_flags, unsigned long tva_flags) |
| 150 | +{ |
| 151 | + return 0; |
| 152 | +} |
| 153 | + |
| 154 | +static int reclaimer(bool vma_madvised) |
| 155 | +{ |
| 156 | + return 0; |
| 157 | +} |
| 158 | + |
| 159 | +static struct bpf_thp_ops __bpf_thp_ops = { |
| 160 | + .allocator = allocator, |
| 161 | + .reclaimer = reclaimer, |
| 162 | +}; |
| 163 | + |
| 164 | +static struct bpf_struct_ops bpf_bpf_thp_ops = { |
| 165 | + .verifier_ops = &thp_bpf_verifier_ops, |
| 166 | + .init = bpf_thp_init, |
| 167 | + .check_member = bpf_thp_check_member, |
| 168 | + .init_member = bpf_thp_init_member, |
| 169 | + .reg = bpf_thp_reg, |
| 170 | + .unreg = bpf_thp_unreg, |
| 171 | + .name = "bpf_thp_ops", |
| 172 | + .cfi_stubs = &__bpf_thp_ops, |
| 173 | + .owner = THIS_MODULE, |
| 174 | +}; |
| 175 | + |
| 176 | +static int __init bpf_thp_ops_init(void) |
| 177 | +{ |
| 178 | + int err = register_bpf_struct_ops(&bpf_bpf_thp_ops, bpf_thp_ops); |
| 179 | + |
| 180 | + if (err) |
| 181 | + pr_err("bpf_thp: Failed to register struct_ops (%d)\n", err); |
| 182 | + return err; |
| 183 | +} |
| 184 | +late_initcall(bpf_thp_ops_init); |
0 commit comments