-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
perf bench: Copy kernel files needed to build mem{cpy,set} x86_64 ben…
…chmarks commit 7d7d1bf1d1dabe435ef50efb051724b8664749cb upstream. We can't access kernel files directly from tools/, so copy the required bits, and make sure that we detect when the original files, in the kernel, gets modified. Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Link: http://lkml.kernel.org/n/tip-z7e76274ch5j4nugv048qacb@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Daniel Díaz <daniel.diaz@linaro.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
- Loading branch information
Showing
10 changed files
with
849 additions
and
6 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
#ifndef _ASM_X86_DISABLED_FEATURES_H | ||
#define _ASM_X86_DISABLED_FEATURES_H | ||
|
||
/* These features, although they might be available in a CPU | ||
* will not be used because the compile options to support | ||
* them are not present. | ||
* | ||
* This code allows them to be checked and disabled at | ||
* compile time without an explicit #ifdef. Use | ||
* cpu_feature_enabled(). | ||
*/ | ||
|
||
#ifdef CONFIG_X86_INTEL_MPX | ||
# define DISABLE_MPX 0 | ||
#else | ||
# define DISABLE_MPX (1<<(X86_FEATURE_MPX & 31)) | ||
#endif | ||
|
||
#ifdef CONFIG_X86_64 | ||
# define DISABLE_VME (1<<(X86_FEATURE_VME & 31)) | ||
# define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31)) | ||
# define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31)) | ||
# define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31)) | ||
# define DISABLE_PCID 0 | ||
#else | ||
# define DISABLE_VME 0 | ||
# define DISABLE_K6_MTRR 0 | ||
# define DISABLE_CYRIX_ARR 0 | ||
# define DISABLE_CENTAUR_MCR 0 | ||
# define DISABLE_PCID (1<<(X86_FEATURE_PCID & 31)) | ||
#endif /* CONFIG_X86_64 */ | ||
|
||
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS | ||
# define DISABLE_PKU 0 | ||
# define DISABLE_OSPKE 0 | ||
#else | ||
# define DISABLE_PKU (1<<(X86_FEATURE_PKU & 31)) | ||
# define DISABLE_OSPKE (1<<(X86_FEATURE_OSPKE & 31)) | ||
#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */ | ||
|
||
/* | ||
* Make sure to add features to the correct mask | ||
*/ | ||
#define DISABLED_MASK0 (DISABLE_VME) | ||
#define DISABLED_MASK1 0 | ||
#define DISABLED_MASK2 0 | ||
#define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR) | ||
#define DISABLED_MASK4 (DISABLE_PCID) | ||
#define DISABLED_MASK5 0 | ||
#define DISABLED_MASK6 0 | ||
#define DISABLED_MASK7 0 | ||
#define DISABLED_MASK8 0 | ||
#define DISABLED_MASK9 (DISABLE_MPX) | ||
#define DISABLED_MASK10 0 | ||
#define DISABLED_MASK11 0 | ||
#define DISABLED_MASK12 0 | ||
#define DISABLED_MASK13 0 | ||
#define DISABLED_MASK14 0 | ||
#define DISABLED_MASK15 0 | ||
#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE) | ||
#define DISABLED_MASK17 0 | ||
#define DISABLED_MASK18 0 | ||
#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) | ||
|
||
#endif /* _ASM_X86_DISABLED_FEATURES_H */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
#ifndef _ASM_X86_REQUIRED_FEATURES_H | ||
#define _ASM_X86_REQUIRED_FEATURES_H | ||
|
||
/* Define minimum CPUID feature set for kernel These bits are checked | ||
really early to actually display a visible error message before the | ||
kernel dies. Make sure to assign features to the proper mask! | ||
Some requirements that are not in CPUID yet are also in the | ||
CONFIG_X86_MINIMUM_CPU_FAMILY which is checked too. | ||
The real information is in arch/x86/Kconfig.cpu, this just converts | ||
the CONFIGs into a bitmask */ | ||
|
||
#ifndef CONFIG_MATH_EMULATION | ||
# define NEED_FPU (1<<(X86_FEATURE_FPU & 31)) | ||
#else | ||
# define NEED_FPU 0 | ||
#endif | ||
|
||
#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64) | ||
# define NEED_PAE (1<<(X86_FEATURE_PAE & 31)) | ||
#else | ||
# define NEED_PAE 0 | ||
#endif | ||
|
||
#ifdef CONFIG_X86_CMPXCHG64 | ||
# define NEED_CX8 (1<<(X86_FEATURE_CX8 & 31)) | ||
#else | ||
# define NEED_CX8 0 | ||
#endif | ||
|
||
#if defined(CONFIG_X86_CMOV) || defined(CONFIG_X86_64) | ||
# define NEED_CMOV (1<<(X86_FEATURE_CMOV & 31)) | ||
#else | ||
# define NEED_CMOV 0 | ||
#endif | ||
|
||
#ifdef CONFIG_X86_USE_3DNOW | ||
# define NEED_3DNOW (1<<(X86_FEATURE_3DNOW & 31)) | ||
#else | ||
# define NEED_3DNOW 0 | ||
#endif | ||
|
||
#if defined(CONFIG_X86_P6_NOP) || defined(CONFIG_X86_64) | ||
# define NEED_NOPL (1<<(X86_FEATURE_NOPL & 31)) | ||
#else | ||
# define NEED_NOPL 0 | ||
#endif | ||
|
||
#ifdef CONFIG_MATOM | ||
# define NEED_MOVBE (1<<(X86_FEATURE_MOVBE & 31)) | ||
#else | ||
# define NEED_MOVBE 0 | ||
#endif | ||
|
||
#ifdef CONFIG_X86_64 | ||
#ifdef CONFIG_PARAVIRT | ||
/* Paravirtualized systems may not have PSE or PGE available */ | ||
#define NEED_PSE 0 | ||
#define NEED_PGE 0 | ||
#else | ||
#define NEED_PSE (1<<(X86_FEATURE_PSE) & 31) | ||
#define NEED_PGE (1<<(X86_FEATURE_PGE) & 31) | ||
#endif | ||
#define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) | ||
#define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31)) | ||
#define NEED_XMM (1<<(X86_FEATURE_XMM & 31)) | ||
#define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31)) | ||
#define NEED_LM (1<<(X86_FEATURE_LM & 31)) | ||
#else | ||
#define NEED_PSE 0 | ||
#define NEED_MSR 0 | ||
#define NEED_PGE 0 | ||
#define NEED_FXSR 0 | ||
#define NEED_XMM 0 | ||
#define NEED_XMM2 0 | ||
#define NEED_LM 0 | ||
#endif | ||
|
||
#define REQUIRED_MASK0 (NEED_FPU|NEED_PSE|NEED_MSR|NEED_PAE|\ | ||
NEED_CX8|NEED_PGE|NEED_FXSR|NEED_CMOV|\ | ||
NEED_XMM|NEED_XMM2) | ||
#define SSE_MASK (NEED_XMM|NEED_XMM2) | ||
|
||
#define REQUIRED_MASK1 (NEED_LM|NEED_3DNOW) | ||
|
||
#define REQUIRED_MASK2 0 | ||
#define REQUIRED_MASK3 (NEED_NOPL) | ||
#define REQUIRED_MASK4 (NEED_MOVBE) | ||
#define REQUIRED_MASK5 0 | ||
#define REQUIRED_MASK6 0 | ||
#define REQUIRED_MASK7 0 | ||
#define REQUIRED_MASK8 0 | ||
#define REQUIRED_MASK9 0 | ||
#define REQUIRED_MASK10 0 | ||
#define REQUIRED_MASK11 0 | ||
#define REQUIRED_MASK12 0 | ||
#define REQUIRED_MASK13 0 | ||
#define REQUIRED_MASK14 0 | ||
#define REQUIRED_MASK15 0 | ||
#define REQUIRED_MASK16 0 | ||
#define REQUIRED_MASK17 0 | ||
#define REQUIRED_MASK18 0 | ||
#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) | ||
|
||
#endif /* _ASM_X86_REQUIRED_FEATURES_H */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,179 @@ | ||
/* Copyright 2002 Andi Kleen */ | ||
|
||
#include <linux/linkage.h> | ||
#include <asm/cpufeatures.h> | ||
#include <asm/alternative-asm.h> | ||
|
||
/* | ||
* We build a jump to memcpy_orig by default which gets NOPped out on | ||
* the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which | ||
* have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs | ||
* to a jmp to memcpy_erms which does the REP; MOVSB mem copy. | ||
*/ | ||
|
||
.weak memcpy | ||
|
||
/* | ||
* memcpy - Copy a memory block. | ||
* | ||
* Input: | ||
* rdi destination | ||
* rsi source | ||
* rdx count | ||
* | ||
* Output: | ||
* rax original destination | ||
*/ | ||
ENTRY(__memcpy) | ||
ENTRY(memcpy) | ||
ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ | ||
"jmp memcpy_erms", X86_FEATURE_ERMS | ||
|
||
movq %rdi, %rax | ||
movq %rdx, %rcx | ||
shrq $3, %rcx | ||
andl $7, %edx | ||
rep movsq | ||
movl %edx, %ecx | ||
rep movsb | ||
ret | ||
ENDPROC(memcpy) | ||
ENDPROC(__memcpy) | ||
|
||
/* | ||
* memcpy_erms() - enhanced fast string memcpy. This is faster and | ||
* simpler than memcpy. Use memcpy_erms when possible. | ||
*/ | ||
ENTRY(memcpy_erms) | ||
movq %rdi, %rax | ||
movq %rdx, %rcx | ||
rep movsb | ||
ret | ||
ENDPROC(memcpy_erms) | ||
|
||
ENTRY(memcpy_orig) | ||
movq %rdi, %rax | ||
|
||
cmpq $0x20, %rdx | ||
jb .Lhandle_tail | ||
|
||
/* | ||
* We check whether memory false dependence could occur, | ||
* then jump to corresponding copy mode. | ||
*/ | ||
cmp %dil, %sil | ||
jl .Lcopy_backward | ||
subq $0x20, %rdx | ||
.Lcopy_forward_loop: | ||
subq $0x20, %rdx | ||
|
||
/* | ||
* Move in blocks of 4x8 bytes: | ||
*/ | ||
movq 0*8(%rsi), %r8 | ||
movq 1*8(%rsi), %r9 | ||
movq 2*8(%rsi), %r10 | ||
movq 3*8(%rsi), %r11 | ||
leaq 4*8(%rsi), %rsi | ||
|
||
movq %r8, 0*8(%rdi) | ||
movq %r9, 1*8(%rdi) | ||
movq %r10, 2*8(%rdi) | ||
movq %r11, 3*8(%rdi) | ||
leaq 4*8(%rdi), %rdi | ||
jae .Lcopy_forward_loop | ||
addl $0x20, %edx | ||
jmp .Lhandle_tail | ||
|
||
.Lcopy_backward: | ||
/* | ||
* Calculate copy position to tail. | ||
*/ | ||
addq %rdx, %rsi | ||
addq %rdx, %rdi | ||
subq $0x20, %rdx | ||
/* | ||
* At most 3 ALU operations in one cycle, | ||
* so append NOPS in the same 16 bytes trunk. | ||
*/ | ||
.p2align 4 | ||
.Lcopy_backward_loop: | ||
subq $0x20, %rdx | ||
movq -1*8(%rsi), %r8 | ||
movq -2*8(%rsi), %r9 | ||
movq -3*8(%rsi), %r10 | ||
movq -4*8(%rsi), %r11 | ||
leaq -4*8(%rsi), %rsi | ||
movq %r8, -1*8(%rdi) | ||
movq %r9, -2*8(%rdi) | ||
movq %r10, -3*8(%rdi) | ||
movq %r11, -4*8(%rdi) | ||
leaq -4*8(%rdi), %rdi | ||
jae .Lcopy_backward_loop | ||
|
||
/* | ||
* Calculate copy position to head. | ||
*/ | ||
addl $0x20, %edx | ||
subq %rdx, %rsi | ||
subq %rdx, %rdi | ||
.Lhandle_tail: | ||
cmpl $16, %edx | ||
jb .Lless_16bytes | ||
|
||
/* | ||
* Move data from 16 bytes to 31 bytes. | ||
*/ | ||
movq 0*8(%rsi), %r8 | ||
movq 1*8(%rsi), %r9 | ||
movq -2*8(%rsi, %rdx), %r10 | ||
movq -1*8(%rsi, %rdx), %r11 | ||
movq %r8, 0*8(%rdi) | ||
movq %r9, 1*8(%rdi) | ||
movq %r10, -2*8(%rdi, %rdx) | ||
movq %r11, -1*8(%rdi, %rdx) | ||
retq | ||
.p2align 4 | ||
.Lless_16bytes: | ||
cmpl $8, %edx | ||
jb .Lless_8bytes | ||
/* | ||
* Move data from 8 bytes to 15 bytes. | ||
*/ | ||
movq 0*8(%rsi), %r8 | ||
movq -1*8(%rsi, %rdx), %r9 | ||
movq %r8, 0*8(%rdi) | ||
movq %r9, -1*8(%rdi, %rdx) | ||
retq | ||
.p2align 4 | ||
.Lless_8bytes: | ||
cmpl $4, %edx | ||
jb .Lless_3bytes | ||
|
||
/* | ||
* Move data from 4 bytes to 7 bytes. | ||
*/ | ||
movl (%rsi), %ecx | ||
movl -4(%rsi, %rdx), %r8d | ||
movl %ecx, (%rdi) | ||
movl %r8d, -4(%rdi, %rdx) | ||
retq | ||
.p2align 4 | ||
.Lless_3bytes: | ||
subl $1, %edx | ||
jb .Lend | ||
/* | ||
* Move data from 1 bytes to 3 bytes. | ||
*/ | ||
movzbl (%rsi), %ecx | ||
jz .Lstore_1byte | ||
movzbq 1(%rsi), %r8 | ||
movzbq (%rsi, %rdx), %r9 | ||
movb %r8b, 1(%rdi) | ||
movb %r9b, (%rdi, %rdx) | ||
.Lstore_1byte: | ||
movb %cl, (%rdi) | ||
|
||
.Lend: | ||
retq | ||
ENDPROC(memcpy_orig) |
Oops, something went wrong.