Skip to content

Commit

Permalink
Merge branch 'tip/perf/core' of git://git.kernel.org/pub/scm/linux/ke…
Browse files Browse the repository at this point in the history
…rnel/git/rostedt/linux-2.6-trace into perf/core
  • Loading branch information
Ingo Molnar committed Sep 24, 2010
2 parents d0303d7 + 46eb3b6 commit a5a2bad
Show file tree
Hide file tree
Showing 33 changed files with 843 additions and 209 deletions.
5 changes: 5 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -591,6 +591,11 @@ KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow)
# conserve stack if available
KBUILD_CFLAGS += $(call cc-option,-fconserve-stack)

# check for 'asm goto'
ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC)), y)
KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO
endif

# Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments
# But warn user when we do so
warn-assign = \
Expand Down
3 changes: 3 additions & 0 deletions arch/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -158,4 +158,7 @@ config HAVE_PERF_EVENTS_NMI
subsystem. Also has support for calculating CPU cycle events
to determine how many clock cycles in a given period.

config HAVE_ARCH_JUMP_LABEL
bool

source "kernel/gcov/Kconfig"
1 change: 1 addition & 0 deletions arch/sparc/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ config SPARC
select PERF_USE_VMALLOC
select HAVE_DMA_ATTRS
select HAVE_DMA_API_DEBUG
select HAVE_ARCH_JUMP_LABEL

config SPARC32
def_bool !64BIT
Expand Down
32 changes: 32 additions & 0 deletions arch/sparc/include/asm/jump_label.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#ifndef _ASM_SPARC_JUMP_LABEL_H
#define _ASM_SPARC_JUMP_LABEL_H

#ifdef __KERNEL__

#include <linux/types.h>
#include <asm/system.h>

#define JUMP_LABEL_NOP_SIZE 4

#define JUMP_LABEL(key, label) \
do { \
asm goto("1:\n\t" \
"nop\n\t" \
"nop\n\t" \
".pushsection __jump_table, \"a\"\n\t"\
".word 1b, %l[" #label "], %c0\n\t" \
".popsection \n\t" \
: : "i" (key) : : label);\
} while (0)

#endif /* __KERNEL__ */

typedef u32 jump_label_t;

struct jump_entry {
jump_label_t code;
jump_label_t target;
jump_label_t key;
};

#endif
2 changes: 2 additions & 0 deletions arch/sparc/kernel/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -119,3 +119,5 @@ obj-$(CONFIG_COMPAT) += $(audit--y)

pc--$(CONFIG_PERF_EVENTS) := perf_event.o
obj-$(CONFIG_SPARC64) += $(pc--y)

obj-$(CONFIG_SPARC64) += jump_label.o
47 changes: 47 additions & 0 deletions arch/sparc/kernel/jump_label.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/mutex.h>
#include <linux/cpu.h>

#include <linux/jump_label.h>
#include <linux/memory.h>

#ifdef HAVE_JUMP_LABEL

void arch_jump_label_transform(struct jump_entry *entry,
enum jump_label_type type)
{
u32 val;
u32 *insn = (u32 *) (unsigned long) entry->code;

if (type == JUMP_LABEL_ENABLE) {
s32 off = (s32)entry->target - (s32)entry->code;

#ifdef CONFIG_SPARC64
/* ba,pt %xcc, . + (off << 2) */
val = 0x10680000 | ((u32) off >> 2);
#else
/* ba . + (off << 2) */
val = 0x10800000 | ((u32) off >> 2);
#endif
} else {
val = 0x01000000;
}

get_online_cpus();
mutex_lock(&text_mutex);
*insn = val;
flushi(insn);
mutex_unlock(&text_mutex);
put_online_cpus();
}

void arch_jump_label_text_poke_early(jump_label_t addr)
{
u32 *insn_p = (u32 *) (unsigned long) addr;

*insn_p = 0x01000000;
flushi(insn_p);
}

#endif
6 changes: 6 additions & 0 deletions arch/sparc/kernel/module.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
#include <asm/spitfire.h>

#ifdef CONFIG_SPARC64

#include <linux/jump_label.h>

static void *module_map(unsigned long size)
{
struct vm_struct *area;
Expand Down Expand Up @@ -227,6 +230,9 @@ int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
struct module *me)
{
/* make jump label nops */
jump_label_apply_nops(me);

/* Cheetah's I-cache is fully coherent. */
if (tlb_type == spitfire) {
unsigned long va;
Expand Down
1 change: 1 addition & 0 deletions arch/x86/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ config X86
select ANON_INODES
select HAVE_ARCH_KMEMCHECK
select HAVE_USER_RETURN_NOTIFIER
select HAVE_ARCH_JUMP_LABEL

config INSTRUCTION_DECODER
def_bool (KPROBES || PERF_EVENTS)
Expand Down
11 changes: 11 additions & 0 deletions arch/x86/include/asm/alternative.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/stringify.h>
#include <linux/jump_label.h>
#include <asm/asm.h>

/*
Expand Down Expand Up @@ -160,6 +161,8 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
#define __parainstructions_end NULL
#endif

extern void *text_poke_early(void *addr, const void *opcode, size_t len);

/*
* Clear and restore the kernel write-protection flag on the local CPU.
* Allows the kernel to edit read-only pages.
Expand All @@ -180,4 +183,12 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
extern void *text_poke(void *addr, const void *opcode, size_t len);
extern void *text_poke_smp(void *addr, const void *opcode, size_t len);

#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
#define IDEAL_NOP_SIZE_5 5
extern unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
extern void arch_init_ideal_nop5(void);
#else
static inline void arch_init_ideal_nop5(void) {}
#endif

#endif /* _ASM_X86_ALTERNATIVE_H */
37 changes: 37 additions & 0 deletions arch/x86/include/asm/jump_label.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#ifndef _ASM_X86_JUMP_LABEL_H
#define _ASM_X86_JUMP_LABEL_H

#ifdef __KERNEL__

#include <linux/types.h>
#include <asm/nops.h>

#define JUMP_LABEL_NOP_SIZE 5

# define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"

# define JUMP_LABEL(key, label) \
do { \
asm goto("1:" \
JUMP_LABEL_INITIAL_NOP \
".pushsection __jump_table, \"a\" \n\t"\
_ASM_PTR "1b, %l[" #label "], %c0 \n\t" \
".popsection \n\t" \
: : "i" (key) : : label); \
} while (0)

#endif /* __KERNEL__ */

#ifdef CONFIG_X86_64
typedef u64 jump_label_t;
#else
typedef u32 jump_label_t;
#endif

struct jump_entry {
jump_label_t code;
jump_label_t target;
jump_label_t key;
};

#endif
2 changes: 1 addition & 1 deletion arch/x86/kernel/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ GCOV_PROFILE_paravirt.o := n
obj-y := process_$(BITS).o signal.o entry_$(BITS).o
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
obj-y += time.o ioport.o ldt.o dumpstack.o
obj-y += setup.o x86_init.o i8259.o irqinit.o
obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o
obj-$(CONFIG_X86_VISWS) += visws_quirks.o
obj-$(CONFIG_X86_32) += probe_roms_32.o
obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
Expand Down
68 changes: 66 additions & 2 deletions arch/x86/kernel/alternative.c
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len)

extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
extern s32 __smp_locks[], __smp_locks_end[];
static void *text_poke_early(void *addr, const void *opcode, size_t len);
void *text_poke_early(void *addr, const void *opcode, size_t len);

/* Replace instructions with better alternatives for this CPU type.
This runs before SMP is initialized to avoid SMP problems with
Expand Down Expand Up @@ -522,7 +522,7 @@ void __init alternative_instructions(void)
* instructions. And on the local CPU you need to be protected again NMI or MCE
* handlers seeing an inconsistent instruction while you patch.
*/
static void *__init_or_module text_poke_early(void *addr, const void *opcode,
void *__init_or_module text_poke_early(void *addr, const void *opcode,
size_t len)
{
unsigned long flags;
Expand Down Expand Up @@ -641,3 +641,67 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
return addr;
}

#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)

unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];

void __init arch_init_ideal_nop5(void)
{
extern const unsigned char ftrace_test_p6nop[];
extern const unsigned char ftrace_test_nop5[];
extern const unsigned char ftrace_test_jmp[];
int faulted = 0;

/*
* There is no good nop for all x86 archs.
* We will default to using the P6_NOP5, but first we
* will test to make sure that the nop will actually
* work on this CPU. If it faults, we will then
* go to a lesser efficient 5 byte nop. If that fails
* we then just use a jmp as our nop. This isn't the most
* efficient nop, but we can not use a multi part nop
* since we would then risk being preempted in the middle
* of that nop, and if we enabled tracing then, it might
* cause a system crash.
*
* TODO: check the cpuid to determine the best nop.
*/
asm volatile (
"ftrace_test_jmp:"
"jmp ftrace_test_p6nop\n"
"nop\n"
"nop\n"
"nop\n" /* 2 byte jmp + 3 bytes */
"ftrace_test_p6nop:"
P6_NOP5
"jmp 1f\n"
"ftrace_test_nop5:"
".byte 0x66,0x66,0x66,0x66,0x90\n"
"1:"
".section .fixup, \"ax\"\n"
"2: movl $1, %0\n"
" jmp ftrace_test_nop5\n"
"3: movl $2, %0\n"
" jmp 1b\n"
".previous\n"
_ASM_EXTABLE(ftrace_test_p6nop, 2b)
_ASM_EXTABLE(ftrace_test_nop5, 3b)
: "=r"(faulted) : "0" (faulted));

switch (faulted) {
case 0:
pr_info("converting mcount calls to 0f 1f 44 00 00\n");
memcpy(ideal_nop5, ftrace_test_p6nop, IDEAL_NOP_SIZE_5);
break;
case 1:
pr_info("converting mcount calls to 66 66 66 66 90\n");
memcpy(ideal_nop5, ftrace_test_nop5, IDEAL_NOP_SIZE_5);
break;
case 2:
pr_info("converting mcount calls to jmp . + 5\n");
memcpy(ideal_nop5, ftrace_test_jmp, IDEAL_NOP_SIZE_5);
break;
}

}
#endif
63 changes: 1 addition & 62 deletions arch/x86/kernel/ftrace.c
Original file line number Diff line number Diff line change
Expand Up @@ -257,14 +257,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
return mod_code_status;
}




static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];

static unsigned char *ftrace_nop_replace(void)
{
return ftrace_nop;
return ideal_nop5;
}

static int
Expand Down Expand Up @@ -338,62 +333,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)

int __init ftrace_dyn_arch_init(void *data)
{
extern const unsigned char ftrace_test_p6nop[];
extern const unsigned char ftrace_test_nop5[];
extern const unsigned char ftrace_test_jmp[];
int faulted = 0;

/*
* There is no good nop for all x86 archs.
* We will default to using the P6_NOP5, but first we
* will test to make sure that the nop will actually
* work on this CPU. If it faults, we will then
* go to a lesser efficient 5 byte nop. If that fails
* we then just use a jmp as our nop. This isn't the most
* efficient nop, but we can not use a multi part nop
* since we would then risk being preempted in the middle
* of that nop, and if we enabled tracing then, it might
* cause a system crash.
*
* TODO: check the cpuid to determine the best nop.
*/
asm volatile (
"ftrace_test_jmp:"
"jmp ftrace_test_p6nop\n"
"nop\n"
"nop\n"
"nop\n" /* 2 byte jmp + 3 bytes */
"ftrace_test_p6nop:"
P6_NOP5
"jmp 1f\n"
"ftrace_test_nop5:"
".byte 0x66,0x66,0x66,0x66,0x90\n"
"1:"
".section .fixup, \"ax\"\n"
"2: movl $1, %0\n"
" jmp ftrace_test_nop5\n"
"3: movl $2, %0\n"
" jmp 1b\n"
".previous\n"
_ASM_EXTABLE(ftrace_test_p6nop, 2b)
_ASM_EXTABLE(ftrace_test_nop5, 3b)
: "=r"(faulted) : "0" (faulted));

switch (faulted) {
case 0:
pr_info("converting mcount calls to 0f 1f 44 00 00\n");
memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE);
break;
case 1:
pr_info("converting mcount calls to 66 66 66 66 90\n");
memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE);
break;
case 2:
pr_info("converting mcount calls to jmp . + 5\n");
memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE);
break;
}

/* The return code is retured via data */
*(unsigned long *)data = 0;

Expand Down
Loading

0 comments on commit a5a2bad

Please sign in to comment.