Skip to content

Commit 0f9b685

Browse files
surenbaghdasaryanakpm00
authored andcommitted
alloc_tag: populate memory for module tags as needed
The memory reserved for module tags does not need to be backed by physical pages until there are tags to store there. Change the way we reserve this memory to allocate only virtual area for the tags and populate it with physical pages as needed when we load a module. [surenb@google.com: avoid execmem_vmap() when !MMU] Link: https://lkml.kernel.org/r/20241031233611.3833002-1-surenb@google.com Link: https://lkml.kernel.org/r/20241023170759.999909-5-surenb@google.com Signed-off-by: Suren Baghdasaryan <surenb@google.com> Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com> Cc: Ard Biesheuvel <ardb@kernel.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Borislav Petkov (AMD) <bp@alien8.de> Cc: Christoph Hellwig <hch@infradead.org> Cc: Daniel Gomez <da.gomez@samsung.com> Cc: David Hildenbrand <david@redhat.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: David Rientjes <rientjes@google.com> Cc: Dennis Zhou <dennis@kernel.org> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: John Hubbard <jhubbard@nvidia.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Kalesh Singh <kaleshsingh@google.com> Cc: Kees Cook <keescook@chromium.org> Cc: Kent Overstreet <kent.overstreet@linux.dev> Cc: Liam R. Howlett <Liam.Howlett@Oracle.com> Cc: Luis Chamberlain <mcgrof@kernel.org> Cc: Matthew Wilcox <willy@infradead.org> Cc: Michal Hocko <mhocko@suse.com> Cc: Mike Rapoport (Microsoft) <rppt@kernel.org> Cc: Minchan Kim <minchan@google.com> Cc: Paul E. McKenney <paulmck@kernel.org> Cc: Petr Pavlu <petr.pavlu@suse.com> Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Sami Tolvanen <samitolvanen@google.com> Cc: Sourav Panda <souravpanda@google.com> Cc: Steven Rostedt (Google) <rostedt@goodmis.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Thomas Huth <thuth@redhat.com> Cc: Uladzislau Rezki (Sony) <urezki@gmail.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Xiongwei Song <xiongwei.song@windriver.com> Cc: Yu Zhao <yuzhao@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent 0db6f8d commit 0f9b685

File tree

7 files changed

+104
-11
lines changed

7 files changed

+104
-11
lines changed

include/linux/execmem.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,18 @@ void *execmem_alloc(enum execmem_type type, size_t size);
139139
*/
140140
void execmem_free(void *ptr);
141141

142+
#ifdef CONFIG_MMU
143+
/**
144+
* execmem_vmap - create virtual mapping for EXECMEM_MODULE_DATA memory
145+
* @size: size of the virtual mapping in bytes
146+
*
147+
* Maps virtually contiguous area in the range suitable for EXECMEM_MODULE_DATA.
148+
*
149+
* Return: the area descriptor on success or %NULL on failure.
150+
*/
151+
struct vm_struct *execmem_vmap(size_t size);
152+
#endif
153+
142154
/**
143155
* execmem_update_copy - copy an update to executable memory
144156
* @dst: destination address to update

include/linux/vmalloc.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,9 @@ extern int remap_vmalloc_range_partial(struct vm_area_struct *vma,
202202
extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
203203
unsigned long pgoff);
204204

205+
int vmap_pages_range(unsigned long addr, unsigned long end, pgprot_t prot,
206+
struct page **pages, unsigned int page_shift);
207+
205208
/*
206209
* Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values
207210
* and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings()

lib/Kconfig.debug

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -993,6 +993,7 @@ config CODE_TAGGING
993993
config MEM_ALLOC_PROFILING
994994
bool "Enable memory allocation profiling"
995995
default n
996+
depends on MMU
996997
depends on PROC_FS
997998
depends on !DEBUG_FORCE_WEAK_PER_CPU
998999
select CODE_TAGGING

lib/alloc_tag.c

Lines changed: 64 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,15 @@
88
#include <linux/proc_fs.h>
99
#include <linux/seq_buf.h>
1010
#include <linux/seq_file.h>
11+
#include <linux/vmalloc.h>
1112

1213
#define ALLOCINFO_FILE_NAME "allocinfo"
1314
#define MODULE_ALLOC_TAG_VMAP_SIZE (100000UL * sizeof(struct alloc_tag))
1415

1516
#ifdef CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT
16-
static bool mem_profiling_support __meminitdata = true;
17+
static bool mem_profiling_support = true;
1718
#else
18-
static bool mem_profiling_support __meminitdata;
19+
static bool mem_profiling_support;
1920
#endif
2021

2122
static struct codetag_type *alloc_tag_cttype;
@@ -154,7 +155,7 @@ size_t alloc_tag_top_users(struct codetag_bytes *tags, size_t count, bool can_sl
154155
return nr;
155156
}
156157

157-
static void __init shutdown_mem_profiling(void)
158+
static void shutdown_mem_profiling(void)
158159
{
159160
if (mem_alloc_profiling_enabled())
160161
static_branch_disable(&mem_alloc_profiling_key);
@@ -179,6 +180,7 @@ static void __init procfs_init(void)
179180
#ifdef CONFIG_MODULES
180181

181182
static struct maple_tree mod_area_mt = MTREE_INIT(mod_area_mt, MT_FLAGS_ALLOC_RANGE);
183+
static struct vm_struct *vm_module_tags;
182184
/* A dummy object used to indicate an unloaded module */
183185
static struct module unloaded_mod;
184186
/* A dummy object used to indicate a module prepended area */
@@ -252,6 +254,33 @@ static bool find_aligned_area(struct ma_state *mas, unsigned long section_size,
252254
return false;
253255
}
254256

257+
static int vm_module_tags_populate(void)
258+
{
259+
unsigned long phys_size = vm_module_tags->nr_pages << PAGE_SHIFT;
260+
261+
if (phys_size < module_tags.size) {
262+
struct page **next_page = vm_module_tags->pages + vm_module_tags->nr_pages;
263+
unsigned long addr = module_tags.start_addr + phys_size;
264+
unsigned long more_pages;
265+
unsigned long nr;
266+
267+
more_pages = ALIGN(module_tags.size - phys_size, PAGE_SIZE) >> PAGE_SHIFT;
268+
nr = alloc_pages_bulk_array_node(GFP_KERNEL | __GFP_NOWARN,
269+
NUMA_NO_NODE, more_pages, next_page);
270+
if (nr < more_pages ||
271+
vmap_pages_range(addr, addr + (nr << PAGE_SHIFT), PAGE_KERNEL,
272+
next_page, PAGE_SHIFT) < 0) {
273+
/* Clean up and error out */
274+
for (int i = 0; i < nr; i++)
275+
__free_page(next_page[i]);
276+
return -ENOMEM;
277+
}
278+
vm_module_tags->nr_pages += nr;
279+
}
280+
281+
return 0;
282+
}
283+
255284
static void *reserve_module_tags(struct module *mod, unsigned long size,
256285
unsigned int prepend, unsigned long align)
257286
{
@@ -310,8 +339,18 @@ static void *reserve_module_tags(struct module *mod, unsigned long size,
310339
if (IS_ERR(ret))
311340
return ret;
312341

313-
if (module_tags.size < offset + size)
342+
if (module_tags.size < offset + size) {
343+
int grow_res;
344+
314345
module_tags.size = offset + size;
346+
grow_res = vm_module_tags_populate();
347+
if (grow_res) {
348+
shutdown_mem_profiling();
349+
pr_err("Failed to allocate memory for allocation tags in the module %s. Memory allocation profiling is disabled!\n",
350+
mod->name);
351+
return ERR_PTR(grow_res);
352+
}
353+
}
315354

316355
return (struct alloc_tag *)(module_tags.start_addr + offset);
317356
}
@@ -372,21 +411,37 @@ static void replace_module(struct module *mod, struct module *new_mod)
372411

373412
static int __init alloc_mod_tags_mem(void)
374413
{
375-
/* Allocate space to copy allocation tags */
376-
module_tags.start_addr = (unsigned long)execmem_alloc(EXECMEM_MODULE_DATA,
377-
MODULE_ALLOC_TAG_VMAP_SIZE);
378-
if (!module_tags.start_addr)
414+
/* Map space to copy allocation tags */
415+
vm_module_tags = execmem_vmap(MODULE_ALLOC_TAG_VMAP_SIZE);
416+
if (!vm_module_tags) {
417+
pr_err("Failed to map %lu bytes for module allocation tags\n",
418+
MODULE_ALLOC_TAG_VMAP_SIZE);
419+
module_tags.start_addr = 0;
379420
return -ENOMEM;
421+
}
380422

423+
vm_module_tags->pages = kmalloc_array(get_vm_area_size(vm_module_tags) >> PAGE_SHIFT,
424+
sizeof(struct page *), GFP_KERNEL | __GFP_ZERO);
425+
if (!vm_module_tags->pages) {
426+
free_vm_area(vm_module_tags);
427+
return -ENOMEM;
428+
}
429+
430+
module_tags.start_addr = (unsigned long)vm_module_tags->addr;
381431
module_tags.end_addr = module_tags.start_addr + MODULE_ALLOC_TAG_VMAP_SIZE;
382432

383433
return 0;
384434
}
385435

386436
static void __init free_mod_tags_mem(void)
387437
{
388-
execmem_free((void *)module_tags.start_addr);
438+
int i;
439+
389440
module_tags.start_addr = 0;
441+
for (i = 0; i < vm_module_tags->nr_pages; i++)
442+
__free_page(vm_module_tags->pages[i]);
443+
kfree(vm_module_tags->pages);
444+
free_vm_area(vm_module_tags);
390445
}
391446

392447
#else /* CONFIG_MODULES */

mm/execmem.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,22 @@ static void *execmem_vmalloc(struct execmem_range *range, size_t size,
6464

6565
return p;
6666
}
67+
68+
struct vm_struct *execmem_vmap(size_t size)
69+
{
70+
struct execmem_range *range = &execmem_info->ranges[EXECMEM_MODULE_DATA];
71+
struct vm_struct *area;
72+
73+
area = __get_vm_area_node(size, range->alignment, PAGE_SHIFT, VM_ALLOC,
74+
range->start, range->end, NUMA_NO_NODE,
75+
GFP_KERNEL, __builtin_return_address(0));
76+
if (!area && range->fallback_start)
77+
area = __get_vm_area_node(size, range->alignment, PAGE_SHIFT, VM_ALLOC,
78+
range->fallback_start, range->fallback_end,
79+
NUMA_NO_NODE, GFP_KERNEL, __builtin_return_address(0));
80+
81+
return area;
82+
}
6783
#else
6884
static void *execmem_vmalloc(struct execmem_range *range, size_t size,
6985
pgprot_t pgprot, unsigned long vm_flags)

mm/internal.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1263,6 +1263,12 @@ int numa_migrate_check(struct folio *folio, struct vm_fault *vmf,
12631263
void free_zone_device_folio(struct folio *folio);
12641264
int migrate_device_coherent_folio(struct folio *folio);
12651265

1266+
struct vm_struct *__get_vm_area_node(unsigned long size,
1267+
unsigned long align, unsigned long shift,
1268+
unsigned long flags, unsigned long start,
1269+
unsigned long end, int node, gfp_t gfp_mask,
1270+
const void *caller);
1271+
12661272
/*
12671273
* mm/gup.c
12681274
*/

mm/vmalloc.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -653,7 +653,7 @@ int vmap_pages_range_noflush(unsigned long addr, unsigned long end,
653653
* RETURNS:
654654
* 0 on success, -errno on failure.
655655
*/
656-
static int vmap_pages_range(unsigned long addr, unsigned long end,
656+
int vmap_pages_range(unsigned long addr, unsigned long end,
657657
pgprot_t prot, struct page **pages, unsigned int page_shift)
658658
{
659659
int err;
@@ -3106,7 +3106,7 @@ static void clear_vm_uninitialized_flag(struct vm_struct *vm)
31063106
vm->flags &= ~VM_UNINITIALIZED;
31073107
}
31083108

3109-
static struct vm_struct *__get_vm_area_node(unsigned long size,
3109+
struct vm_struct *__get_vm_area_node(unsigned long size,
31103110
unsigned long align, unsigned long shift, unsigned long flags,
31113111
unsigned long start, unsigned long end, int node,
31123112
gfp_t gfp_mask, const void *caller)

0 commit comments

Comments
 (0)