Skip to content

Commit

Permalink
x86, mm: introduce vmem_altmap to augment vmemmap_populate()
Browse files Browse the repository at this point in the history
In support of providing struct page for large persistent memory
capacities, use struct vmem_altmap to change the default policy for
allocating memory for the memmap array.  The default vmemmap_populate()
allocates page table storage area from the page allocator.  Given
persistent memory capacities relative to DRAM it may not be feasible to
store the memmap in 'System Memory'.  Instead vmem_altmap represents
pre-allocated "device pages" to satisfy vmemmap_alloc_block_buf()
requests.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Reported-by: kbuild test robot <lkp@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
djbw authored and torvalds committed Jan 16, 2016
1 parent 9476df7 commit 4b94ffd
Show file tree
Hide file tree
Showing 10 changed files with 282 additions and 42 deletions.
33 changes: 26 additions & 7 deletions arch/x86/mm/init_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include <linux/module.h>
#include <linux/memory.h>
#include <linux/memory_hotplug.h>
#include <linux/memremap.h>
#include <linux/nmi.h>
#include <linux/gfp.h>
#include <linux/kcore.h>
Expand Down Expand Up @@ -714,6 +715,12 @@ static void __meminit free_pagetable(struct page *page, int order)
{
unsigned long magic;
unsigned int nr_pages = 1 << order;
struct vmem_altmap *altmap = to_vmem_altmap((unsigned long) page);

if (altmap) {
vmem_altmap_free(altmap, nr_pages);
return;
}

/* bootmem page has reserved flag */
if (PageReserved(page)) {
Expand Down Expand Up @@ -1017,13 +1024,19 @@ int __ref arch_remove_memory(u64 start, u64 size)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
struct page *page = pfn_to_page(start_pfn);
struct vmem_altmap *altmap;
struct zone *zone;
int ret;

zone = page_zone(pfn_to_page(start_pfn));
kernel_physical_mapping_remove(start, start + size);
/* With altmap the first mapped page is offset from @start */
altmap = to_vmem_altmap((unsigned long) page);
if (altmap)
page += vmem_altmap_offset(altmap);
zone = page_zone(page);
ret = __remove_pages(zone, start_pfn, nr_pages);
WARN_ON_ONCE(ret);
kernel_physical_mapping_remove(start, start + size);

return ret;
}
Expand Down Expand Up @@ -1235,7 +1248,7 @@ static void __meminitdata *p_start, *p_end;
static int __meminitdata node_start;

static int __meminit vmemmap_populate_hugepages(unsigned long start,
unsigned long end, int node)
unsigned long end, int node, struct vmem_altmap *altmap)
{
unsigned long addr;
unsigned long next;
Expand All @@ -1258,7 +1271,7 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
if (pmd_none(*pmd)) {
void *p;

p = vmemmap_alloc_block_buf(PMD_SIZE, node);
p = __vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
if (p) {
pte_t entry;

Expand All @@ -1279,7 +1292,8 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
addr_end = addr + PMD_SIZE;
p_end = p + PMD_SIZE;
continue;
}
} else if (altmap)
return -ENOMEM; /* no fallback */
} else if (pmd_large(*pmd)) {
vmemmap_verify((pte_t *)pmd, node, addr, next);
continue;
Expand All @@ -1293,11 +1307,16 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,

int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
{
struct vmem_altmap *altmap = to_vmem_altmap(start);
int err;

if (cpu_has_pse)
err = vmemmap_populate_hugepages(start, end, node);
else
err = vmemmap_populate_hugepages(start, end, node, altmap);
else if (altmap) {
pr_err_once("%s: no cpu support for altmap allocations\n",
__func__);
err = -ENOMEM;
} else
err = vmemmap_populate_basepages(start, end, node);
if (!err)
sync_global_pgds(start, end - 1, 0);
Expand Down
6 changes: 4 additions & 2 deletions drivers/nvdimm/pmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,8 @@ static struct pmem_device *pmem_alloc(struct device *dev,

pmem->pfn_flags = PFN_DEV;
if (pmem_should_map_pages(dev)) {
pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, res);
pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, res,
NULL);
pmem->pfn_flags |= PFN_MAP;
} else
pmem->virt_addr = (void __pmem *) devm_memremap(dev,
Expand Down Expand Up @@ -387,7 +388,8 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns)
/* establish pfn range for lookup, and switch to direct map */
pmem = dev_get_drvdata(dev);
devm_memunmap(dev, (void __force *) pmem->virt_addr);
pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &nsio->res);
pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &nsio->res,
NULL);
pmem->pfn_flags |= PFN_MAP;
if (IS_ERR(pmem->virt_addr)) {
rc = PTR_ERR(pmem->virt_addr);
Expand Down
3 changes: 2 additions & 1 deletion include/linux/memory_hotplug.h
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,8 @@ extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
extern bool is_memblock_offlined(struct memory_block *mem);
extern void remove_memory(int nid, u64 start, u64 size);
extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn);
extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms);
extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
unsigned long map_offset);
extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
unsigned long pnum);

Expand Down
39 changes: 35 additions & 4 deletions include/linux/memremap.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,53 @@

struct resource;
struct device;

/**
* struct vmem_altmap - pre-allocated storage for vmemmap_populate
* @base_pfn: base of the entire dev_pagemap mapping
* @reserve: pages mapped, but reserved for driver use (relative to @base)
* @free: free pages set aside in the mapping for memmap storage
* @align: pages reserved to meet allocation alignments
* @alloc: track pages consumed, private to vmemmap_populate()
*/
struct vmem_altmap {
const unsigned long base_pfn;
const unsigned long reserve;
unsigned long free;
unsigned long align;
unsigned long alloc;
};

unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);

#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_ZONE_DEVICE)
struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start);
#else
static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
{
return NULL;
}
#endif

/**
* struct dev_pagemap - metadata for ZONE_DEVICE mappings
* @altmap: pre-allocated/reserved memory for vmemmap allocations
* @dev: host device of the mapping for debug
*/
struct dev_pagemap {
/* TODO: vmem_altmap and percpu_ref count */
struct vmem_altmap *altmap;
const struct resource *res;
struct device *dev;
};

#ifdef CONFIG_ZONE_DEVICE
void *devm_memremap_pages(struct device *dev, struct resource *res);
void *devm_memremap_pages(struct device *dev, struct resource *res,
struct vmem_altmap *altmap);
struct dev_pagemap *find_dev_pagemap(resource_size_t phys);
#else
static inline void *devm_memremap_pages(struct device *dev,
struct resource *res)
struct resource *res, struct vmem_altmap *altmap)
{
/*
* Fail attempts to call devm_memremap_pages() without
Expand All @@ -34,5 +66,4 @@ static inline struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
return NULL;
}
#endif

#endif /* _LINUX_MEMREMAP_H_ */
9 changes: 8 additions & 1 deletion include/linux/mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -2217,7 +2217,14 @@ pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node);
pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);
pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node);
void *vmemmap_alloc_block(unsigned long size, int node);
void *vmemmap_alloc_block_buf(unsigned long size, int node);
struct vmem_altmap;
void *__vmemmap_alloc_block_buf(unsigned long size, int node,
struct vmem_altmap *altmap);
static inline void *vmemmap_alloc_block_buf(unsigned long size, int node)
{
return __vmemmap_alloc_block_buf(size, node, NULL);
}

void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);
int vmemmap_populate_basepages(unsigned long start, unsigned long end,
int node);
Expand Down
72 changes: 70 additions & 2 deletions kernel/memremap.c
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ struct page_map {
struct resource res;
struct percpu_ref *ref;
struct dev_pagemap pgmap;
struct vmem_altmap altmap;
};

static void pgmap_radix_release(struct resource *res)
Expand All @@ -183,13 +184,16 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
struct page_map *page_map = data;
struct resource *res = &page_map->res;
resource_size_t align_start, align_size;
struct dev_pagemap *pgmap = &page_map->pgmap;

pgmap_radix_release(res);

/* pages are dead and unused, undo the arch mapping */
align_start = res->start & ~(SECTION_SIZE - 1);
align_size = ALIGN(resource_size(res), SECTION_SIZE);
arch_remove_memory(align_start, align_size);
dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc,
"%s: failed to free all reserved pages\n", __func__);
}

/* assumes rcu_read_lock() held at entry */
Expand All @@ -203,11 +207,23 @@ struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
return page_map ? &page_map->pgmap : NULL;
}

void *devm_memremap_pages(struct device *dev, struct resource *res)
/**
* devm_memremap_pages - remap and provide memmap backing for the given resource
* @dev: hosting device for @res
* @res: "host memory" address range
* @altmap: optional descriptor for allocating the memmap from @res
*
* Note, the expectation is that @res is a host memory range that could
* feasibly be treated as a "System RAM" range, i.e. not a device mmio
* range, but this is not enforced.
*/
void *devm_memremap_pages(struct device *dev, struct resource *res,
struct vmem_altmap *altmap)
{
int is_ram = region_intersects(res->start, resource_size(res),
"System RAM");
resource_size_t key, align_start, align_size;
struct dev_pagemap *pgmap;
struct page_map *page_map;
int error, nid;

Expand All @@ -220,14 +236,27 @@ void *devm_memremap_pages(struct device *dev, struct resource *res)
if (is_ram == REGION_INTERSECTS)
return __va(res->start);

if (altmap && !IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP)) {
dev_err(dev, "%s: altmap requires CONFIG_SPARSEMEM_VMEMMAP=y\n",
__func__);
return ERR_PTR(-ENXIO);
}

page_map = devres_alloc_node(devm_memremap_pages_release,
sizeof(*page_map), GFP_KERNEL, dev_to_node(dev));
if (!page_map)
return ERR_PTR(-ENOMEM);
pgmap = &page_map->pgmap;

memcpy(&page_map->res, res, sizeof(*res));

page_map->pgmap.dev = dev;
pgmap->dev = dev;
if (altmap) {
memcpy(&page_map->altmap, altmap, sizeof(*altmap));
pgmap->altmap = &page_map->altmap;
}
pgmap->res = &page_map->res;

mutex_lock(&pgmap_lock);
error = 0;
for (key = res->start; key <= res->end; key += SECTION_SIZE) {
Expand Down Expand Up @@ -273,4 +302,43 @@ void *devm_memremap_pages(struct device *dev, struct resource *res)
return ERR_PTR(error);
}
EXPORT_SYMBOL(devm_memremap_pages);

unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
{
/* number of pfns from base where pfn_to_page() is valid */
return altmap->reserve + altmap->free;
}

void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns)
{
altmap->alloc -= nr_pfns;
}

#ifdef CONFIG_SPARSEMEM_VMEMMAP
struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
{
/*
* 'memmap_start' is the virtual address for the first "struct
* page" in this range of the vmemmap array. In the case of
* CONFIG_SPARSE_VMEMMAP a page_to_pfn conversion is simple
* pointer arithmetic, so we can perform this to_vmem_altmap()
* conversion without concern for the initialization state of
* the struct page fields.
*/
struct page *page = (struct page *) memmap_start;
struct dev_pagemap *pgmap;

/*
* Uncoditionally retrieve a dev_pagemap associated with the
* given physical address, this is only for use in the
* arch_{add|remove}_memory() for setting up and tearing down
* the memmap.
*/
rcu_read_lock();
pgmap = find_dev_pagemap(__pfn_to_phys(page_to_pfn(page)));
rcu_read_unlock();

return pgmap ? pgmap->altmap : NULL;
}
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
#endif /* CONFIG_ZONE_DEVICE */
Loading

0 comments on commit 4b94ffd

Please sign in to comment.