Skip to content
This repository was archived by the owner on Oct 30, 2021. It is now read-only.

Commit ca460b3

Browse files
dennisszhouhtejun
authored andcommitted
percpu: introduce bitmap metadata blocks
This patch introduces the bitmap metadata blocks and adds the skeleton of the code that will be used to maintain these blocks. Each chunk's bitmap is made up of full metadata blocks. These blocks maintain basic metadata to help prevent scanning unnecssarily to update hints. Full scanning methods are used for the skeleton and will be replaced in the coming patches. A number of helper functions are added as well to do conversion of pages to blocks and manage offsets. Comments will be updated as the final version of each function is added. There exists a relationship between PAGE_SIZE, PCPU_BITMAP_BLOCK_SIZE, the region size, and unit_size. Every chunk's region (including offsets) is page aligned at the beginning to preserve alignment. The end is aligned to LCM(PAGE_SIZE, PCPU_BITMAP_BLOCK_SIZE) to ensure that the end can fit with the populated page map which is by page and every metadata block is fully accounted for. The unit_size is already page aligned, but must also be aligned with PCPU_BITMAP_BLOCK_SIZE to ensure full metadata blocks. Signed-off-by: Dennis Zhou <dennisszhou@gmail.com> Reviewed-by: Josef Bacik <jbacik@fb.com> Signed-off-by: Tejun Heo <tj@kernel.org>
1 parent 40064ae commit ca460b3

File tree

3 files changed

+257
-12
lines changed

3 files changed

+257
-12
lines changed

include/linux/percpu.h

+12
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,18 @@
2525
#define PCPU_MIN_ALLOC_SHIFT 2
2626
#define PCPU_MIN_ALLOC_SIZE (1 << PCPU_MIN_ALLOC_SHIFT)
2727

28+
/*
29+
* This determines the size of each metadata block. There are several subtle
30+
* constraints around this constant. The reserved region must be a multiple of
31+
* PCPU_BITMAP_BLOCK_SIZE. Additionally, PCPU_BITMAP_BLOCK_SIZE must be a
32+
* multiple of PAGE_SIZE or PAGE_SIZE must be a multiple of
33+
* PCPU_BITMAP_BLOCK_SIZE to align with the populated page map. The unit_size
34+
* also has to be a multiple of PCPU_BITMAP_BLOCK_SIZE to ensure full blocks.
35+
*/
36+
#define PCPU_BITMAP_BLOCK_SIZE PAGE_SIZE
37+
#define PCPU_BITMAP_BLOCK_BITS (PCPU_BITMAP_BLOCK_SIZE >> \
38+
PCPU_MIN_ALLOC_SHIFT)
39+
2840
/*
2941
* Percpu allocator can serve percpu allocations before slab is
3042
* initialized which allows slab to depend on the percpu allocator.

mm/percpu-internal.h

+29
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,22 @@
44
#include <linux/types.h>
55
#include <linux/percpu.h>
66

7+
/*
8+
* pcpu_block_md is the metadata block struct.
9+
* Each chunk's bitmap is split into a number of full blocks.
10+
* All units are in terms of bits.
11+
*/
12+
struct pcpu_block_md {
13+
int contig_hint; /* contig hint for block */
14+
int contig_hint_start; /* block relative starting
15+
position of the contig hint */
16+
int left_free; /* size of free space along
17+
the left side of the block */
18+
int right_free; /* size of free space along
19+
the right side of the block */
20+
int first_free; /* block position of first free */
21+
};
22+
723
struct pcpu_chunk {
824
#ifdef CONFIG_PERCPU_STATS
925
int nr_alloc; /* # of allocations */
@@ -17,6 +33,7 @@ struct pcpu_chunk {
1733

1834
unsigned long *alloc_map; /* allocation map */
1935
unsigned long *bound_map; /* boundary map */
36+
struct pcpu_block_md *md_blocks; /* metadata blocks */
2037

2138
void *data; /* chunk data */
2239
int first_free; /* no free below this */
@@ -43,6 +60,18 @@ extern int pcpu_nr_empty_pop_pages;
4360
extern struct pcpu_chunk *pcpu_first_chunk;
4461
extern struct pcpu_chunk *pcpu_reserved_chunk;
4562

63+
/**
64+
* pcpu_chunk_nr_blocks - converts nr_pages to # of md_blocks
65+
* @chunk: chunk of interest
66+
*
67+
* This conversion is from the number of physical pages that the chunk
68+
* serves to the number of bitmap blocks used.
69+
*/
70+
static inline int pcpu_chunk_nr_blocks(struct pcpu_chunk *chunk)
71+
{
72+
return chunk->nr_pages * PAGE_SIZE / PCPU_BITMAP_BLOCK_SIZE;
73+
}
74+
4675
/**
4776
* pcpu_nr_pages_to_map_bits - converts the pages to size of bitmap
4877
* @pages: number of physical pages

mm/percpu.c

+216-12
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363
#include <linux/bitmap.h>
6464
#include <linux/bootmem.h>
6565
#include <linux/err.h>
66+
#include <linux/lcm.h>
6667
#include <linux/list.h>
6768
#include <linux/log2.h>
6869
#include <linux/mm.h>
@@ -279,6 +280,26 @@ static void pcpu_next_pop(unsigned long *bitmap, int *rs, int *re, int end)
279280
(rs) < (re); \
280281
(rs) = (re) + 1, pcpu_next_pop((bitmap), &(rs), &(re), (end)))
281282

283+
/*
284+
* The following are helper functions to help access bitmaps and convert
285+
* between bitmap offsets to address offsets.
286+
*/
287+
static unsigned long *pcpu_index_alloc_map(struct pcpu_chunk *chunk, int index)
288+
{
289+
return chunk->alloc_map +
290+
(index * PCPU_BITMAP_BLOCK_BITS / BITS_PER_LONG);
291+
}
292+
293+
static unsigned long pcpu_off_to_block_index(int off)
294+
{
295+
return off / PCPU_BITMAP_BLOCK_BITS;
296+
}
297+
298+
static unsigned long pcpu_off_to_block_off(int off)
299+
{
300+
return off & (PCPU_BITMAP_BLOCK_BITS - 1);
301+
}
302+
282303
/**
283304
* pcpu_mem_zalloc - allocate memory
284305
* @size: bytes to allocate
@@ -430,6 +451,154 @@ static void pcpu_chunk_refresh_hint(struct pcpu_chunk *chunk)
430451
chunk->nr_empty_pop_pages = nr_empty_pop_pages;
431452
}
432453

454+
/**
455+
* pcpu_block_update - updates a block given a free area
456+
* @block: block of interest
457+
* @start: start offset in block
458+
* @end: end offset in block
459+
*
460+
* Updates a block given a known free area. The region [start, end) is
461+
* expected to be the entirety of the free area within a block.
462+
*/
463+
static void pcpu_block_update(struct pcpu_block_md *block, int start, int end)
464+
{
465+
int contig = end - start;
466+
467+
block->first_free = min(block->first_free, start);
468+
if (start == 0)
469+
block->left_free = contig;
470+
471+
if (end == PCPU_BITMAP_BLOCK_BITS)
472+
block->right_free = contig;
473+
474+
if (contig > block->contig_hint) {
475+
block->contig_hint_start = start;
476+
block->contig_hint = contig;
477+
}
478+
}
479+
480+
/**
481+
* pcpu_block_refresh_hint
482+
* @chunk: chunk of interest
483+
* @index: index of the metadata block
484+
*
485+
* Scans over the block beginning at first_free and updates the block
486+
* metadata accordingly.
487+
*/
488+
static void pcpu_block_refresh_hint(struct pcpu_chunk *chunk, int index)
489+
{
490+
struct pcpu_block_md *block = chunk->md_blocks + index;
491+
unsigned long *alloc_map = pcpu_index_alloc_map(chunk, index);
492+
int rs, re; /* region start, region end */
493+
494+
/* clear hints */
495+
block->contig_hint = 0;
496+
block->left_free = block->right_free = 0;
497+
498+
/* iterate over free areas and update the contig hints */
499+
pcpu_for_each_unpop_region(alloc_map, rs, re, block->first_free,
500+
PCPU_BITMAP_BLOCK_BITS) {
501+
pcpu_block_update(block, rs, re);
502+
}
503+
}
504+
505+
/**
506+
* pcpu_block_update_hint_alloc - update hint on allocation path
507+
* @chunk: chunk of interest
508+
* @bit_off: chunk offset
509+
* @bits: size of request
510+
*/
511+
static void pcpu_block_update_hint_alloc(struct pcpu_chunk *chunk, int bit_off,
512+
int bits)
513+
{
514+
struct pcpu_block_md *s_block, *e_block, *block;
515+
int s_index, e_index; /* block indexes of the freed allocation */
516+
int s_off, e_off; /* block offsets of the freed allocation */
517+
518+
/*
519+
* Calculate per block offsets.
520+
* The calculation uses an inclusive range, but the resulting offsets
521+
* are [start, end). e_index always points to the last block in the
522+
* range.
523+
*/
524+
s_index = pcpu_off_to_block_index(bit_off);
525+
e_index = pcpu_off_to_block_index(bit_off + bits - 1);
526+
s_off = pcpu_off_to_block_off(bit_off);
527+
e_off = pcpu_off_to_block_off(bit_off + bits - 1) + 1;
528+
529+
s_block = chunk->md_blocks + s_index;
530+
e_block = chunk->md_blocks + e_index;
531+
532+
/*
533+
* Update s_block.
534+
*/
535+
pcpu_block_refresh_hint(chunk, s_index);
536+
537+
/*
538+
* Update e_block.
539+
*/
540+
if (s_index != e_index) {
541+
pcpu_block_refresh_hint(chunk, e_index);
542+
543+
/* update in-between md_blocks */
544+
for (block = s_block + 1; block < e_block; block++) {
545+
block->contig_hint = 0;
546+
block->left_free = 0;
547+
block->right_free = 0;
548+
}
549+
}
550+
551+
pcpu_chunk_refresh_hint(chunk);
552+
}
553+
554+
/**
555+
* pcpu_block_update_hint_free - updates the block hints on the free path
556+
* @chunk: chunk of interest
557+
* @bit_off: chunk offset
558+
* @bits: size of request
559+
*/
560+
static void pcpu_block_update_hint_free(struct pcpu_chunk *chunk, int bit_off,
561+
int bits)
562+
{
563+
struct pcpu_block_md *s_block, *e_block, *block;
564+
int s_index, e_index; /* block indexes of the freed allocation */
565+
int s_off, e_off; /* block offsets of the freed allocation */
566+
567+
/*
568+
* Calculate per block offsets.
569+
* The calculation uses an inclusive range, but the resulting offsets
570+
* are [start, end). e_index always points to the last block in the
571+
* range.
572+
*/
573+
s_index = pcpu_off_to_block_index(bit_off);
574+
e_index = pcpu_off_to_block_index(bit_off + bits - 1);
575+
s_off = pcpu_off_to_block_off(bit_off);
576+
e_off = pcpu_off_to_block_off(bit_off + bits - 1) + 1;
577+
578+
s_block = chunk->md_blocks + s_index;
579+
e_block = chunk->md_blocks + e_index;
580+
581+
/* update s_block */
582+
pcpu_block_refresh_hint(chunk, s_index);
583+
584+
/* freeing in the same block */
585+
if (s_index != e_index) {
586+
/* update e_block */
587+
pcpu_block_refresh_hint(chunk, e_index);
588+
589+
/* reset md_blocks in the middle */
590+
for (block = s_block + 1; block < e_block; block++) {
591+
block->first_free = 0;
592+
block->contig_hint_start = 0;
593+
block->contig_hint = PCPU_BITMAP_BLOCK_BITS;
594+
block->left_free = PCPU_BITMAP_BLOCK_BITS;
595+
block->right_free = PCPU_BITMAP_BLOCK_BITS;
596+
}
597+
}
598+
599+
pcpu_chunk_refresh_hint(chunk);
600+
}
601+
433602
/**
434603
* pcpu_is_populated - determines if the region is populated
435604
* @chunk: chunk of interest
@@ -546,7 +715,7 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int alloc_bits,
546715

547716
chunk->free_bytes -= alloc_bits * PCPU_MIN_ALLOC_SIZE;
548717

549-
pcpu_chunk_refresh_hint(chunk);
718+
pcpu_block_update_hint_alloc(chunk, bit_off, alloc_bits);
550719

551720
pcpu_chunk_relocate(chunk, oslot);
552721

@@ -581,11 +750,24 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int off)
581750
/* update metadata */
582751
chunk->free_bytes += bits * PCPU_MIN_ALLOC_SIZE;
583752

584-
pcpu_chunk_refresh_hint(chunk);
753+
pcpu_block_update_hint_free(chunk, bit_off, bits);
585754

586755
pcpu_chunk_relocate(chunk, oslot);
587756
}
588757

758+
static void pcpu_init_md_blocks(struct pcpu_chunk *chunk)
759+
{
760+
struct pcpu_block_md *md_block;
761+
762+
for (md_block = chunk->md_blocks;
763+
md_block != chunk->md_blocks + pcpu_chunk_nr_blocks(chunk);
764+
md_block++) {
765+
md_block->contig_hint = PCPU_BITMAP_BLOCK_BITS;
766+
md_block->left_free = PCPU_BITMAP_BLOCK_BITS;
767+
md_block->right_free = PCPU_BITMAP_BLOCK_BITS;
768+
}
769+
}
770+
589771
/**
590772
* pcpu_alloc_first_chunk - creates chunks that serve the first chunk
591773
* @tmp_addr: the start of the region served
@@ -603,15 +785,21 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr,
603785
int map_size)
604786
{
605787
struct pcpu_chunk *chunk;
606-
unsigned long aligned_addr;
788+
unsigned long aligned_addr, lcm_align;
607789
int start_offset, offset_bits, region_size, region_bits;
608790

609791
/* region calculations */
610792
aligned_addr = tmp_addr & PAGE_MASK;
611793

612794
start_offset = tmp_addr - aligned_addr;
613795

614-
region_size = PFN_ALIGN(start_offset + map_size);
796+
/*
797+
* Align the end of the region with the LCM of PAGE_SIZE and
798+
* PCPU_BITMAP_BLOCK_SIZE. One of these constants is a multiple of
799+
* the other.
800+
*/
801+
lcm_align = lcm(PAGE_SIZE, PCPU_BITMAP_BLOCK_SIZE);
802+
region_size = ALIGN(start_offset + map_size, lcm_align);
615803

616804
/* allocate chunk */
617805
chunk = memblock_virt_alloc(sizeof(struct pcpu_chunk) +
@@ -627,12 +815,13 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr,
627815
chunk->nr_pages = region_size >> PAGE_SHIFT;
628816
region_bits = pcpu_chunk_map_bits(chunk);
629817

630-
chunk->alloc_map = memblock_virt_alloc(
631-
BITS_TO_LONGS(region_bits) *
632-
sizeof(chunk->alloc_map[0]), 0);
633-
chunk->bound_map = memblock_virt_alloc(
634-
BITS_TO_LONGS(region_bits + 1) *
635-
sizeof(chunk->bound_map[0]), 0);
818+
chunk->alloc_map = memblock_virt_alloc(BITS_TO_LONGS(region_bits) *
819+
sizeof(chunk->alloc_map[0]), 0);
820+
chunk->bound_map = memblock_virt_alloc(BITS_TO_LONGS(region_bits + 1) *
821+
sizeof(chunk->bound_map[0]), 0);
822+
chunk->md_blocks = memblock_virt_alloc(pcpu_chunk_nr_blocks(chunk) *
823+
sizeof(chunk->md_blocks[0]), 0);
824+
pcpu_init_md_blocks(chunk);
636825

637826
/* manage populated page bitmap */
638827
chunk->immutable = true;
@@ -651,6 +840,8 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr,
651840
bitmap_set(chunk->alloc_map, 0, offset_bits);
652841
set_bit(0, chunk->bound_map);
653842
set_bit(offset_bits, chunk->bound_map);
843+
844+
pcpu_block_update_hint_alloc(chunk, 0, offset_bits);
654845
}
655846

656847
if (chunk->end_offset) {
@@ -662,9 +853,10 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr,
662853
set_bit((start_offset + map_size) / PCPU_MIN_ALLOC_SIZE,
663854
chunk->bound_map);
664855
set_bit(region_bits, chunk->bound_map);
665-
}
666856

667-
pcpu_chunk_refresh_hint(chunk);
857+
pcpu_block_update_hint_alloc(chunk, pcpu_chunk_map_bits(chunk)
858+
- offset_bits, offset_bits);
859+
}
668860

669861
return chunk;
670862
}
@@ -692,12 +884,21 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
692884
if (!chunk->bound_map)
693885
goto bound_map_fail;
694886

887+
chunk->md_blocks = pcpu_mem_zalloc(pcpu_chunk_nr_blocks(chunk) *
888+
sizeof(chunk->md_blocks[0]));
889+
if (!chunk->md_blocks)
890+
goto md_blocks_fail;
891+
892+
pcpu_init_md_blocks(chunk);
893+
695894
/* init metadata */
696895
chunk->contig_bits = region_bits;
697896
chunk->free_bytes = chunk->nr_pages * PAGE_SIZE;
698897

699898
return chunk;
700899

900+
md_blocks_fail:
901+
pcpu_mem_free(chunk->bound_map);
701902
bound_map_fail:
702903
pcpu_mem_free(chunk->alloc_map);
703904
alloc_map_fail:
@@ -1535,9 +1736,12 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
15351736
PCPU_SETUP_BUG_ON(ai->unit_size < size_sum);
15361737
PCPU_SETUP_BUG_ON(offset_in_page(ai->unit_size));
15371738
PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE);
1739+
PCPU_SETUP_BUG_ON(!IS_ALIGNED(ai->unit_size, PCPU_BITMAP_BLOCK_SIZE));
15381740
PCPU_SETUP_BUG_ON(ai->dyn_size < PERCPU_DYNAMIC_EARLY_SIZE);
15391741
PCPU_SETUP_BUG_ON(!ai->dyn_size);
15401742
PCPU_SETUP_BUG_ON(!IS_ALIGNED(ai->reserved_size, PCPU_MIN_ALLOC_SIZE));
1743+
PCPU_SETUP_BUG_ON(!(IS_ALIGNED(PCPU_BITMAP_BLOCK_SIZE, PAGE_SIZE) ||
1744+
IS_ALIGNED(PAGE_SIZE, PCPU_BITMAP_BLOCK_SIZE)));
15411745
PCPU_SETUP_BUG_ON(pcpu_verify_alloc_info(ai) < 0);
15421746

15431747
/* process group information and build config tables accordingly */

0 commit comments

Comments
 (0)