4
4
* Copyright (C) 2009 SUSE Linux Products GmbH
5
5
* Copyright (C) 2009 Tejun Heo <tj@kernel.org>
6
6
*
7
- * This file is released under the GPLv2.
7
+ * This file is released under the GPLv2 license .
8
8
*
9
- * This is percpu allocator which can handle both static and dynamic
10
- * areas. Percpu areas are allocated in chunks. Each chunk is
11
- * consisted of boot-time determined number of units and the first
12
- * chunk is used for static percpu variables in the kernel image
13
- * (special boot time alloc/init handling necessary as these areas
14
- * need to be brought up before allocation services are running).
15
- * Unit grows as necessary and all units grow or shrink in unison.
16
- * When a chunk is filled up, another chunk is allocated.
9
+ * The percpu allocator handles both static and dynamic areas. Percpu
10
+ * areas are allocated in chunks which are divided into units. There is
11
+ * a 1-to-1 mapping for units to possible cpus. These units are grouped
12
+ * based on NUMA properties of the machine.
17
13
*
18
14
* c0 c1 c2
19
15
* ------------------- ------------------- ------------
20
16
* | u0 | u1 | u2 | u3 | | u0 | u1 | u2 | u3 | | u0 | u1 | u
21
17
* ------------------- ...... ------------------- .... ------------
22
18
*
23
- * Allocation is done in offset-size areas of single unit space. Ie,
24
- * an area of 512 bytes at 6k in c1 occupies 512 bytes at 6k of c1:u0,
25
- * c1:u1, c1:u2 and c1:u3. On UMA, units corresponds directly to
26
- * cpus. On NUMA, the mapping can be non-linear and even sparse.
27
- * Percpu access can be done by configuring percpu base registers
28
- * according to cpu to unit mapping and pcpu_unit_size.
29
- *
30
- * There are usually many small percpu allocations many of them being
31
- * as small as 4 bytes. The allocator organizes chunks into lists
32
- * according to free size and tries to allocate from the fullest one.
33
- * Each chunk keeps the maximum contiguous area size hint which is
34
- * guaranteed to be equal to or larger than the maximum contiguous
35
- * area in the chunk. This helps the allocator not to iterate the
36
- * chunk maps unnecessarily.
19
+ * Allocation is done by offsets into a unit's address space. Ie., an
20
+ * area of 512 bytes at 6k in c1 occupies 512 bytes at 6k in c1:u0,
21
+ * c1:u1, c1:u2, etc. On NUMA machines, the mapping may be non-linear
22
+ * and even sparse. Access is handled by configuring percpu base
23
+ * registers according to the cpu to unit mappings and offsetting the
24
+ * base address using pcpu_unit_size.
25
+ *
26
+ * There is special consideration for the first chunk which must handle
27
+ * the static percpu variables in the kernel image as allocation services
28
+ * are not online yet. In short, the first chunk is structure like so:
29
+ *
30
+ * <Static | [Reserved] | Dynamic>
31
+ *
32
+ * The static data is copied from the original section managed by the
33
+ * linker. The reserved section, if non-zero, primarily manages static
34
+ * percpu variables from kernel modules. Finally, the dynamic section
35
+ * takes care of normal allocations.
37
36
*
38
37
* Allocation state in each chunk is kept using an array of integers
39
38
* on chunk->map. A positive value in the map represents a free
43
42
* Chunks can be determined from the address using the index field
44
43
* in the page struct. The index field contains a pointer to the chunk.
45
44
*
45
+ * These chunks are organized into lists according to free_size and
46
+ * tries to allocate from the fullest chunk first. Each chunk maintains
47
+ * a maximum contiguous area size hint which is guaranteed to be equal
48
+ * to or larger than the maximum contiguous area in the chunk. This
49
+ * helps prevent the allocator from iterating over chunks unnecessarily.
50
+ *
46
51
* To use this allocator, arch code should do the following:
47
52
*
48
53
* - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate
@@ -1842,6 +1847,7 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
1842
1847
*/
1843
1848
min_unit_size = max_t (size_t , size_sum , PCPU_MIN_UNIT_SIZE );
1844
1849
1850
+ /* determine the maximum # of units that can fit in an allocation */
1845
1851
alloc_size = roundup (min_unit_size , atom_size );
1846
1852
upa = alloc_size / min_unit_size ;
1847
1853
while (alloc_size % upa || (offset_in_page (alloc_size / upa )))
@@ -1868,9 +1874,9 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
1868
1874
}
1869
1875
1870
1876
/*
1871
- * Expand unit size until address space usage goes over 75%
1872
- * and then as much as possible without using more address
1873
- * space .
1877
+ * Wasted space is caused by a ratio imbalance of upa to group_cnt.
1878
+ * Expand the unit_size until we use >= 75% of the units allocated.
1879
+ * Related to atom_size, which could be much larger than the unit_size .
1874
1880
*/
1875
1881
last_allocs = INT_MAX ;
1876
1882
for (upa = max_upa ; upa ; upa -- ) {
0 commit comments