Skip to content

Commit

Permalink
perf bench: Copy kernel files needed to build mem{cpy,set} x86_64 ben…
Browse files Browse the repository at this point in the history
…chmarks

commit 7d7d1bf1d1dabe435ef50efb051724b8664749cb upstream.

We can't access kernel files directly from tools/, so copy the required
bits, and make sure that we detect when the original files, in the
kernel, gets modified.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-z7e76274ch5j4nugv048qacb@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Daniel Díaz <daniel.diaz@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
  • Loading branch information
acmel authored and gregkh committed Mar 23, 2019
1 parent fefcb29 commit 7bc0cb4
Show file tree
Hide file tree
Showing 10 changed files with 849 additions and 6 deletions.
336 changes: 336 additions & 0 deletions tools/arch/x86/include/asm/cpufeatures.h

Large diffs are not rendered by default.

65 changes: 65 additions & 0 deletions tools/arch/x86/include/asm/disabled-features.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#ifndef _ASM_X86_DISABLED_FEATURES_H
#define _ASM_X86_DISABLED_FEATURES_H

/* These features, although they might be available in a CPU
* will not be used because the compile options to support
* them are not present.
*
* This code allows them to be checked and disabled at
* compile time without an explicit #ifdef. Use
* cpu_feature_enabled().
*/

#ifdef CONFIG_X86_INTEL_MPX
# define DISABLE_MPX 0
#else
# define DISABLE_MPX (1<<(X86_FEATURE_MPX & 31))
#endif

#ifdef CONFIG_X86_64
# define DISABLE_VME (1<<(X86_FEATURE_VME & 31))
# define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31))
# define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31))
# define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31))
# define DISABLE_PCID 0
#else
# define DISABLE_VME 0
# define DISABLE_K6_MTRR 0
# define DISABLE_CYRIX_ARR 0
# define DISABLE_CENTAUR_MCR 0
# define DISABLE_PCID (1<<(X86_FEATURE_PCID & 31))
#endif /* CONFIG_X86_64 */

#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
# define DISABLE_PKU 0
# define DISABLE_OSPKE 0
#else
# define DISABLE_PKU (1<<(X86_FEATURE_PKU & 31))
# define DISABLE_OSPKE (1<<(X86_FEATURE_OSPKE & 31))
#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */

/*
* Make sure to add features to the correct mask
*/
#define DISABLED_MASK0 (DISABLE_VME)
#define DISABLED_MASK1 0
#define DISABLED_MASK2 0
#define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR)
#define DISABLED_MASK4 (DISABLE_PCID)
#define DISABLED_MASK5 0
#define DISABLED_MASK6 0
#define DISABLED_MASK7 0
#define DISABLED_MASK8 0
#define DISABLED_MASK9 (DISABLE_MPX)
#define DISABLED_MASK10 0
#define DISABLED_MASK11 0
#define DISABLED_MASK12 0
#define DISABLED_MASK13 0
#define DISABLED_MASK14 0
#define DISABLED_MASK15 0
#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE)
#define DISABLED_MASK17 0
#define DISABLED_MASK18 0
#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)

#endif /* _ASM_X86_DISABLED_FEATURES_H */
106 changes: 106 additions & 0 deletions tools/arch/x86/include/asm/required-features.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#ifndef _ASM_X86_REQUIRED_FEATURES_H
#define _ASM_X86_REQUIRED_FEATURES_H

/* Define minimum CPUID feature set for kernel These bits are checked
really early to actually display a visible error message before the
kernel dies. Make sure to assign features to the proper mask!
Some requirements that are not in CPUID yet are also in the
CONFIG_X86_MINIMUM_CPU_FAMILY which is checked too.
The real information is in arch/x86/Kconfig.cpu, this just converts
the CONFIGs into a bitmask */

#ifndef CONFIG_MATH_EMULATION
# define NEED_FPU (1<<(X86_FEATURE_FPU & 31))
#else
# define NEED_FPU 0
#endif

#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
# define NEED_PAE (1<<(X86_FEATURE_PAE & 31))
#else
# define NEED_PAE 0
#endif

#ifdef CONFIG_X86_CMPXCHG64
# define NEED_CX8 (1<<(X86_FEATURE_CX8 & 31))
#else
# define NEED_CX8 0
#endif

#if defined(CONFIG_X86_CMOV) || defined(CONFIG_X86_64)
# define NEED_CMOV (1<<(X86_FEATURE_CMOV & 31))
#else
# define NEED_CMOV 0
#endif

#ifdef CONFIG_X86_USE_3DNOW
# define NEED_3DNOW (1<<(X86_FEATURE_3DNOW & 31))
#else
# define NEED_3DNOW 0
#endif

#if defined(CONFIG_X86_P6_NOP) || defined(CONFIG_X86_64)
# define NEED_NOPL (1<<(X86_FEATURE_NOPL & 31))
#else
# define NEED_NOPL 0
#endif

#ifdef CONFIG_MATOM
# define NEED_MOVBE (1<<(X86_FEATURE_MOVBE & 31))
#else
# define NEED_MOVBE 0
#endif

#ifdef CONFIG_X86_64
#ifdef CONFIG_PARAVIRT
/* Paravirtualized systems may not have PSE or PGE available */
#define NEED_PSE 0
#define NEED_PGE 0
#else
#define NEED_PSE (1<<(X86_FEATURE_PSE) & 31)
#define NEED_PGE (1<<(X86_FEATURE_PGE) & 31)
#endif
#define NEED_MSR (1<<(X86_FEATURE_MSR & 31))
#define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31))
#define NEED_XMM (1<<(X86_FEATURE_XMM & 31))
#define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31))
#define NEED_LM (1<<(X86_FEATURE_LM & 31))
#else
#define NEED_PSE 0
#define NEED_MSR 0
#define NEED_PGE 0
#define NEED_FXSR 0
#define NEED_XMM 0
#define NEED_XMM2 0
#define NEED_LM 0
#endif

#define REQUIRED_MASK0 (NEED_FPU|NEED_PSE|NEED_MSR|NEED_PAE|\
NEED_CX8|NEED_PGE|NEED_FXSR|NEED_CMOV|\
NEED_XMM|NEED_XMM2)
#define SSE_MASK (NEED_XMM|NEED_XMM2)

#define REQUIRED_MASK1 (NEED_LM|NEED_3DNOW)

#define REQUIRED_MASK2 0
#define REQUIRED_MASK3 (NEED_NOPL)
#define REQUIRED_MASK4 (NEED_MOVBE)
#define REQUIRED_MASK5 0
#define REQUIRED_MASK6 0
#define REQUIRED_MASK7 0
#define REQUIRED_MASK8 0
#define REQUIRED_MASK9 0
#define REQUIRED_MASK10 0
#define REQUIRED_MASK11 0
#define REQUIRED_MASK12 0
#define REQUIRED_MASK13 0
#define REQUIRED_MASK14 0
#define REQUIRED_MASK15 0
#define REQUIRED_MASK16 0
#define REQUIRED_MASK17 0
#define REQUIRED_MASK18 0
#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)

#endif /* _ASM_X86_REQUIRED_FEATURES_H */
179 changes: 179 additions & 0 deletions tools/arch/x86/lib/memcpy_64.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
/* Copyright 2002 Andi Kleen */

#include <linux/linkage.h>
#include <asm/cpufeatures.h>
#include <asm/alternative-asm.h>

/*
* We build a jump to memcpy_orig by default which gets NOPped out on
* the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
* have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
* to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
*/

.weak memcpy

/*
* memcpy - Copy a memory block.
*
* Input:
* rdi destination
* rsi source
* rdx count
*
* Output:
* rax original destination
*/
ENTRY(__memcpy)
ENTRY(memcpy)
ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
"jmp memcpy_erms", X86_FEATURE_ERMS

movq %rdi, %rax
movq %rdx, %rcx
shrq $3, %rcx
andl $7, %edx
rep movsq
movl %edx, %ecx
rep movsb
ret
ENDPROC(memcpy)
ENDPROC(__memcpy)

/*
* memcpy_erms() - enhanced fast string memcpy. This is faster and
* simpler than memcpy. Use memcpy_erms when possible.
*/
ENTRY(memcpy_erms)
movq %rdi, %rax
movq %rdx, %rcx
rep movsb
ret
ENDPROC(memcpy_erms)

ENTRY(memcpy_orig)
movq %rdi, %rax

cmpq $0x20, %rdx
jb .Lhandle_tail

/*
* We check whether memory false dependence could occur,
* then jump to corresponding copy mode.
*/
cmp %dil, %sil
jl .Lcopy_backward
subq $0x20, %rdx
.Lcopy_forward_loop:
subq $0x20, %rdx

/*
* Move in blocks of 4x8 bytes:
*/
movq 0*8(%rsi), %r8
movq 1*8(%rsi), %r9
movq 2*8(%rsi), %r10
movq 3*8(%rsi), %r11
leaq 4*8(%rsi), %rsi

movq %r8, 0*8(%rdi)
movq %r9, 1*8(%rdi)
movq %r10, 2*8(%rdi)
movq %r11, 3*8(%rdi)
leaq 4*8(%rdi), %rdi
jae .Lcopy_forward_loop
addl $0x20, %edx
jmp .Lhandle_tail

.Lcopy_backward:
/*
* Calculate copy position to tail.
*/
addq %rdx, %rsi
addq %rdx, %rdi
subq $0x20, %rdx
/*
* At most 3 ALU operations in one cycle,
* so append NOPS in the same 16 bytes trunk.
*/
.p2align 4
.Lcopy_backward_loop:
subq $0x20, %rdx
movq -1*8(%rsi), %r8
movq -2*8(%rsi), %r9
movq -3*8(%rsi), %r10
movq -4*8(%rsi), %r11
leaq -4*8(%rsi), %rsi
movq %r8, -1*8(%rdi)
movq %r9, -2*8(%rdi)
movq %r10, -3*8(%rdi)
movq %r11, -4*8(%rdi)
leaq -4*8(%rdi), %rdi
jae .Lcopy_backward_loop

/*
* Calculate copy position to head.
*/
addl $0x20, %edx
subq %rdx, %rsi
subq %rdx, %rdi
.Lhandle_tail:
cmpl $16, %edx
jb .Lless_16bytes

/*
* Move data from 16 bytes to 31 bytes.
*/
movq 0*8(%rsi), %r8
movq 1*8(%rsi), %r9
movq -2*8(%rsi, %rdx), %r10
movq -1*8(%rsi, %rdx), %r11
movq %r8, 0*8(%rdi)
movq %r9, 1*8(%rdi)
movq %r10, -2*8(%rdi, %rdx)
movq %r11, -1*8(%rdi, %rdx)
retq
.p2align 4
.Lless_16bytes:
cmpl $8, %edx
jb .Lless_8bytes
/*
* Move data from 8 bytes to 15 bytes.
*/
movq 0*8(%rsi), %r8
movq -1*8(%rsi, %rdx), %r9
movq %r8, 0*8(%rdi)
movq %r9, -1*8(%rdi, %rdx)
retq
.p2align 4
.Lless_8bytes:
cmpl $4, %edx
jb .Lless_3bytes

/*
* Move data from 4 bytes to 7 bytes.
*/
movl (%rsi), %ecx
movl -4(%rsi, %rdx), %r8d
movl %ecx, (%rdi)
movl %r8d, -4(%rdi, %rdx)
retq
.p2align 4
.Lless_3bytes:
subl $1, %edx
jb .Lend
/*
* Move data from 1 bytes to 3 bytes.
*/
movzbl (%rsi), %ecx
jz .Lstore_1byte
movzbq 1(%rsi), %r8
movzbq (%rsi, %rdx), %r9
movb %r8b, 1(%rdi)
movb %r9b, (%rdi, %rdx)
.Lstore_1byte:
movb %cl, (%rdi)

.Lend:
retq
ENDPROC(memcpy_orig)
Loading

0 comments on commit 7bc0cb4

Please sign in to comment.