Skip to content

Commit

Permalink
percpu: align percpu readmostly subsection to cacheline
Browse files Browse the repository at this point in the history
Currently percpu readmostly subsection may share cachelines with other
percpu subsections which may result in unnecessary cacheline bounce
and performance degradation.

This patch adds @CacheLine parameter to PERCPU() and PERCPU_VADDR()
linker macros, makes each arch linker scripts specify its cacheline
size and use it to align percpu subsections.

This is based on Shaohua's x86 only patch.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Shaohua Li <shaohua.li@intel.com>
  • Loading branch information
htejun committed Jan 25, 2011
1 parent c723fda commit 19df0c2
Show file tree
Hide file tree
Showing 19 changed files with 41 additions and 32 deletions.
2 changes: 1 addition & 1 deletion arch/alpha/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ SECTIONS
__init_begin = ALIGN(PAGE_SIZE);
INIT_TEXT_SECTION(PAGE_SIZE)
INIT_DATA_SECTION(16)
PERCPU(PAGE_SIZE)
PERCPU(64, PAGE_SIZE)
/* Align to THREAD_SIZE rather than PAGE_SIZE here so any padding page
needed for the THREAD_SIZE aligned init_task gets freed after init */
. = ALIGN(THREAD_SIZE);
Expand Down
2 changes: 1 addition & 1 deletion arch/arm/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ SECTIONS
#endif
}

PERCPU(PAGE_SIZE)
PERCPU(32, PAGE_SIZE)

#ifndef CONFIG_XIP_KERNEL
. = ALIGN(PAGE_SIZE);
Expand Down
2 changes: 1 addition & 1 deletion arch/blackfin/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ SECTIONS

. = ALIGN(16);
INIT_DATA_SECTION(16)
PERCPU(4)
PERCPU(32, 4)

.exit.data :
{
Expand Down
2 changes: 1 addition & 1 deletion arch/cris/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ SECTIONS
#endif
__vmlinux_end = .; /* Last address of the physical file. */
#ifdef CONFIG_ETRAX_ARCH_V32
PERCPU(PAGE_SIZE)
PERCPU(32, PAGE_SIZE)

.init.ramfs : {
INIT_RAM_FS
Expand Down
2 changes: 1 addition & 1 deletion arch/frv/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ SECTIONS
_einittext = .;

INIT_DATA_SECTION(8)
PERCPU(4096)
PERCPU(L1_CACHE_BYTES, 4096)

. = ALIGN(PAGE_SIZE);
__init_end = .;
Expand Down
2 changes: 1 addition & 1 deletion arch/ia64/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ SECTIONS {

/* Per-cpu data: */
. = ALIGN(PERCPU_PAGE_SIZE);
PERCPU_VADDR(PERCPU_ADDR, :percpu)
PERCPU_VADDR(SMP_CACHE_BYTES, PERCPU_ADDR, :percpu)
__phys_per_cpu_start = __per_cpu_load;
/*
* ensure percpu data fits
Expand Down
2 changes: 1 addition & 1 deletion arch/m32r/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ SECTIONS
__init_begin = .;
INIT_TEXT_SECTION(PAGE_SIZE)
INIT_DATA_SECTION(16)
PERCPU(PAGE_SIZE)
PERCPU(32, PAGE_SIZE)
. = ALIGN(PAGE_SIZE);
__init_end = .;
/* freed after init ends here */
Expand Down
2 changes: 1 addition & 1 deletion arch/mips/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ SECTIONS
EXIT_DATA
}

PERCPU(PAGE_SIZE)
PERCPU(1 << CONFIG_MIPS_L1_CACHE_SHIFT, PAGE_SIZE)
. = ALIGN(PAGE_SIZE);
__init_end = .;
/* freed after init ends here */
Expand Down
2 changes: 1 addition & 1 deletion arch/mn10300/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ SECTIONS
.exit.text : { EXIT_TEXT; }
.exit.data : { EXIT_DATA; }

PERCPU(PAGE_SIZE)
PERCPU(32, PAGE_SIZE)
. = ALIGN(PAGE_SIZE);
__init_end = .;
/* freed after init ends here */
Expand Down
2 changes: 1 addition & 1 deletion arch/parisc/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ SECTIONS
EXIT_DATA
}

PERCPU(PAGE_SIZE)
PERCPU(L1_CACHE_BYTES, PAGE_SIZE)
. = ALIGN(PAGE_SIZE);
__init_end = .;
/* freed after init ends here */
Expand Down
2 changes: 1 addition & 1 deletion arch/powerpc/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ SECTIONS
INIT_RAM_FS
}

PERCPU(PAGE_SIZE)
PERCPU(L1_CACHE_BYTES, PAGE_SIZE)

. = ALIGN(8);
.machine.desc : AT(ADDR(.machine.desc) - LOAD_OFFSET) {
Expand Down
2 changes: 1 addition & 1 deletion arch/s390/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ SECTIONS
. = ALIGN(PAGE_SIZE);
INIT_DATA_SECTION(0x100)

PERCPU(PAGE_SIZE)
PERCPU(0x100, PAGE_SIZE)
. = ALIGN(PAGE_SIZE);
__init_end = .; /* freed after init ends here */

Expand Down
2 changes: 1 addition & 1 deletion arch/sh/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ SECTIONS
__machvec_end = .;
}

PERCPU(PAGE_SIZE)
PERCPU(L1_CACHE_BYTES, PAGE_SIZE)

/*
* .exit.text is discarded at runtime, not link time, to deal with
Expand Down
2 changes: 1 addition & 1 deletion arch/sparc/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ SECTIONS
__sun4v_2insn_patch_end = .;
}

PERCPU(PAGE_SIZE)
PERCPU(SMP_CACHE_BYTES, PAGE_SIZE)

. = ALIGN(PAGE_SIZE);
__init_end = .;
Expand Down
2 changes: 1 addition & 1 deletion arch/tile/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ SECTIONS
*(.init.page)
} :data =0
INIT_DATA_SECTION(16)
PERCPU(PAGE_SIZE)
PERCPU(L2_CACHE_BYTES, PAGE_SIZE)
. = ALIGN(PAGE_SIZE);
VMLINUX_SYMBOL(_einitdata) = .;

Expand Down
2 changes: 1 addition & 1 deletion arch/um/include/asm/common.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
INIT_SETUP(0)
}

PERCPU(32)
PERCPU(32, 32)

.initcall.init : {
INIT_CALLS
Expand Down
4 changes: 2 additions & 2 deletions arch/x86/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ SECTIONS
* output PHDR, so the next output section - .init.text - should
* start another segment - init.
*/
PERCPU_VADDR(0, :percpu)
PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu)
#endif

INIT_TEXT_SECTION(PAGE_SIZE)
Expand Down Expand Up @@ -305,7 +305,7 @@ SECTIONS
}

#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
PERCPU(THREAD_SIZE)
PERCPU(INTERNODE_CACHE_BYTES, THREAD_SIZE)
#endif

. = ALIGN(PAGE_SIZE);
Expand Down
2 changes: 1 addition & 1 deletion arch/xtensa/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ SECTIONS
INIT_RAM_FS
}

PERCPU(PAGE_SIZE)
PERCPU(XCHAL_ICACHE_LINESIZE, PAGE_SIZE)

/* We need this dummy segment here */

Expand Down
35 changes: 22 additions & 13 deletions include/asm-generic/vmlinux.lds.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* HEAD_TEXT_SECTION
* INIT_TEXT_SECTION(PAGE_SIZE)
* INIT_DATA_SECTION(...)
* PERCPU(PAGE_SIZE)
* PERCPU(CACHELINE_SIZE, PAGE_SIZE)
* __init_end = .;
*
* _stext = .;
Expand Down Expand Up @@ -683,13 +683,18 @@

/**
* PERCPU_VADDR - define output section for percpu area
* @cacheline: cacheline size
* @vaddr: explicit base address (optional)
* @phdr: destination PHDR (optional)
*
* Macro which expands to output section for percpu area. If @vaddr
* is not blank, it specifies explicit base address and all percpu
* symbols will be offset from the given address. If blank, @vaddr
* always equals @laddr + LOAD_OFFSET.
* Macro which expands to output section for percpu area.
*
* @cacheline is used to align subsections to avoid false cacheline
* sharing between subsections for different purposes.
*
* If @vaddr is not blank, it specifies explicit base address and all
* percpu symbols will be offset from the given address. If blank,
* @vaddr always equals @laddr + LOAD_OFFSET.
*
* @phdr defines the output PHDR to use if not blank. Be warned that
* output PHDR is sticky. If @phdr is specified, the next output
Expand All @@ -700,15 +705,17 @@
* If there is no need to put the percpu section at a predetermined
* address, use PERCPU().
*/
#define PERCPU_VADDR(vaddr, phdr) \
#define PERCPU_VADDR(cacheline, vaddr, phdr) \
VMLINUX_SYMBOL(__per_cpu_load) = .; \
.data..percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \
- LOAD_OFFSET) { \
VMLINUX_SYMBOL(__per_cpu_start) = .; \
*(.data..percpu..first) \
. = ALIGN(PAGE_SIZE); \
*(.data..percpu..page_aligned) \
. = ALIGN(cacheline); \
*(.data..percpu..readmostly) \
. = ALIGN(cacheline); \
*(.data..percpu) \
*(.data..percpu..shared_aligned) \
VMLINUX_SYMBOL(__per_cpu_end) = .; \
Expand All @@ -717,26 +724,28 @@

/**
* PERCPU - define output section for percpu area, simple version
* @cacheline: cacheline size
* @align: required alignment
*
* Align to @align and outputs output section for percpu area. This
* macro doesn't maniuplate @vaddr or @phdr and __per_cpu_load and
* Align to @align and outputs output section for percpu area. This macro
* doesn't manipulate @vaddr or @phdr and __per_cpu_load and
* __per_cpu_start will be identical.
*
* This macro is equivalent to ALIGN(align); PERCPU_VADDR( , ) except
* that __per_cpu_load is defined as a relative symbol against
* .data..percpu which is required for relocatable x86_32
* configuration.
* This macro is equivalent to ALIGN(@align); PERCPU_VADDR(@cacheline,,)
* except that __per_cpu_load is defined as a relative symbol against
* .data..percpu which is required for relocatable x86_32 configuration.
*/
#define PERCPU(align) \
#define PERCPU(cacheline, align) \
. = ALIGN(align); \
.data..percpu : AT(ADDR(.data..percpu) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__per_cpu_load) = .; \
VMLINUX_SYMBOL(__per_cpu_start) = .; \
*(.data..percpu..first) \
. = ALIGN(PAGE_SIZE); \
*(.data..percpu..page_aligned) \
. = ALIGN(cacheline); \
*(.data..percpu..readmostly) \
. = ALIGN(cacheline); \
*(.data..percpu) \
*(.data..percpu..shared_aligned) \
VMLINUX_SYMBOL(__per_cpu_end) = .; \
Expand Down

0 comments on commit 19df0c2

Please sign in to comment.