Skip to content

Commit

Permalink
Merge branches 'perf-fixes-for-linus' and 'x86-fixes-for-linus' of gi…
Browse files Browse the repository at this point in the history
…t://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  jump label: Add work around to i386 gcc asm goto bug
  x86, ftrace: Use safe noops, drop trap test
  jump_label: Fix unaligned traps on sparc.
  jump label: Make arch_jump_label_text_poke_early() optional
  jump label: Fix error with preempt disable holding mutex
  oprofile: Remove deprecated use of flush_scheduled_work()
  oprofile: Fix the hang while taking the cpu offline
  jump label: Fix deadlock b/w jump_label_mutex vs. text_mutex
  jump label: Fix module __init section race

* 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86: Check irq_remapped instead of remapping_enabled in destroy_irq()
  • Loading branch information
torvalds committed Oct 30, 2010
3 parents 925d169 + 169ed55 + 7b79462 commit f02a38d
Show file tree
Hide file tree
Showing 12 changed files with 153 additions and 82 deletions.
14 changes: 14 additions & 0 deletions arch/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,20 @@ config KPROBES
for kernel debugging, non-intrusive instrumentation and testing.
If in doubt, say "N".

config JUMP_LABEL
bool "Optimize trace point call sites"
depends on HAVE_ARCH_JUMP_LABEL
help
If it is detected that the compiler has support for "asm goto",
the kernel will compile trace point locations with just a
nop instruction. When trace points are enabled, the nop will
be converted to a jump to the trace function. This technique
lowers overhead and stress on the branch prediction of the
processor.

On i386, options added to the compiler flags may increase
the size of the kernel slightly.

config OPTPROBES
def_bool y
depends on KPROBES && HAVE_OPTPROBES
Expand Down
1 change: 1 addition & 0 deletions arch/sparc/include/asm/jump_label.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
"nop\n\t" \
"nop\n\t" \
".pushsection __jump_table, \"a\"\n\t"\
".align 4\n\t" \
".word 1b, %l[" #label "], %c0\n\t" \
".popsection \n\t" \
: : "i" (key) : : label);\
Expand Down
13 changes: 12 additions & 1 deletion arch/x86/Makefile_32.cpu
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,18 @@ cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686))
# prologue (push %ebp, mov %esp, %ebp) which breaks the function graph
# tracer assumptions. For i686, generic, core2 this is set by the
# compiler anyway
cflags-$(CONFIG_FUNCTION_GRAPH_TRACER) += $(call cc-option,-maccumulate-outgoing-args)
ifeq ($(CONFIG_FUNCTION_GRAPH_TRACER), y)
ADD_ACCUMULATE_OUTGOING_ARGS := y
endif

# Work around to a bug with asm goto with first implementations of it
# in gcc causing gcc to mess up the push and pop of the stack in some
# uses of asm goto.
ifeq ($(CONFIG_JUMP_LABEL), y)
ADD_ACCUMULATE_OUTGOING_ARGS := y
endif

cflags-$(ADD_ACCUMULATE_OUTGOING_ARGS) += $(call cc-option,-maccumulate-outgoing-args)

# Bug fix for binutils: this option is required in order to keep
# binutils from generating NOPL instructions against our will.
Expand Down
69 changes: 15 additions & 54 deletions arch/x86/kernel/alternative.c
Original file line number Diff line number Diff line change
Expand Up @@ -644,65 +644,26 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)

#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)

unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
#ifdef CONFIG_X86_64
unsigned char ideal_nop5[5] = { 0x66, 0x66, 0x66, 0x66, 0x90 };
#else
unsigned char ideal_nop5[5] = { 0x3e, 0x8d, 0x74, 0x26, 0x00 };
#endif

void __init arch_init_ideal_nop5(void)
{
extern const unsigned char ftrace_test_p6nop[];
extern const unsigned char ftrace_test_nop5[];
extern const unsigned char ftrace_test_jmp[];
int faulted = 0;

/*
* There is no good nop for all x86 archs.
* We will default to using the P6_NOP5, but first we
* will test to make sure that the nop will actually
* work on this CPU. If it faults, we will then
* go to a lesser efficient 5 byte nop. If that fails
* we then just use a jmp as our nop. This isn't the most
* efficient nop, but we can not use a multi part nop
* since we would then risk being preempted in the middle
* of that nop, and if we enabled tracing then, it might
* cause a system crash.
* There is no good nop for all x86 archs. This selection
* algorithm should be unified with the one in find_nop_table(),
* but this should be good enough for now.
*
* TODO: check the cpuid to determine the best nop.
* For cases other than the ones below, use the safe (as in
* always functional) defaults above.
*/
asm volatile (
"ftrace_test_jmp:"
"jmp ftrace_test_p6nop\n"
"nop\n"
"nop\n"
"nop\n" /* 2 byte jmp + 3 bytes */
"ftrace_test_p6nop:"
P6_NOP5
"jmp 1f\n"
"ftrace_test_nop5:"
".byte 0x66,0x66,0x66,0x66,0x90\n"
"1:"
".section .fixup, \"ax\"\n"
"2: movl $1, %0\n"
" jmp ftrace_test_nop5\n"
"3: movl $2, %0\n"
" jmp 1b\n"
".previous\n"
_ASM_EXTABLE(ftrace_test_p6nop, 2b)
_ASM_EXTABLE(ftrace_test_nop5, 3b)
: "=r"(faulted) : "0" (faulted));

switch (faulted) {
case 0:
pr_info("converting mcount calls to 0f 1f 44 00 00\n");
memcpy(ideal_nop5, ftrace_test_p6nop, IDEAL_NOP_SIZE_5);
break;
case 1:
pr_info("converting mcount calls to 66 66 66 66 90\n");
memcpy(ideal_nop5, ftrace_test_nop5, IDEAL_NOP_SIZE_5);
break;
case 2:
pr_info("converting mcount calls to jmp . + 5\n");
memcpy(ideal_nop5, ftrace_test_jmp, IDEAL_NOP_SIZE_5);
break;
}

#ifdef CONFIG_X86_64
/* Don't use these on 32 bits due to broken virtualizers */
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
memcpy(ideal_nop5, p6_nops[5], 5);
#endif
}
#endif
2 changes: 1 addition & 1 deletion arch/x86/kernel/apic/io_apic.c
Original file line number Diff line number Diff line change
Expand Up @@ -3109,7 +3109,7 @@ void destroy_irq(unsigned int irq)

irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);

if (intr_remapping_enabled)
if (irq_remapped(cfg))
free_irte(irq);
raw_spin_lock_irqsave(&vector_lock, flags);
__clear_irq_vector(irq, cfg);
Expand Down
2 changes: 1 addition & 1 deletion drivers/oprofile/buffer_sync.c
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ void sync_stop(void)
profile_event_unregister(PROFILE_TASK_EXIT, &task_exit_nb);
task_handoff_unregister(&task_free_nb);
mutex_unlock(&buffer_mutex);
flush_scheduled_work();
flush_cpu_work();

/* make sure we don't leak task structs */
process_task_mortuary();
Expand Down
10 changes: 7 additions & 3 deletions drivers/oprofile/cpu_buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,14 +111,18 @@ void start_cpu_work(void)

void end_cpu_work(void)
{
int i;

work_enabled = 0;
}

void flush_cpu_work(void)
{
int i;

for_each_online_cpu(i) {
struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i);

cancel_delayed_work(&b->work);
/* these works are per-cpu, no need for flush_sync */
flush_delayed_work(&b->work);
}
}

Expand Down
1 change: 1 addition & 0 deletions drivers/oprofile/cpu_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ void free_cpu_buffers(void);

void start_cpu_work(void);
void end_cpu_work(void);
void flush_cpu_work(void);

/* CPU buffer is composed of such entries (which are
* also used for context switch notes)
Expand Down
13 changes: 13 additions & 0 deletions drivers/oprofile/timer_int.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "oprof.h"

static DEFINE_PER_CPU(struct hrtimer, oprofile_hrtimer);
static int ctr_running;

static enum hrtimer_restart oprofile_hrtimer_notify(struct hrtimer *hrtimer)
{
Expand All @@ -33,6 +34,9 @@ static void __oprofile_hrtimer_start(void *unused)
{
struct hrtimer *hrtimer = &__get_cpu_var(oprofile_hrtimer);

if (!ctr_running)
return;

hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hrtimer->function = oprofile_hrtimer_notify;

Expand All @@ -42,23 +46,32 @@ static void __oprofile_hrtimer_start(void *unused)

static int oprofile_hrtimer_start(void)
{
get_online_cpus();
ctr_running = 1;
on_each_cpu(__oprofile_hrtimer_start, NULL, 1);
put_online_cpus();
return 0;
}

static void __oprofile_hrtimer_stop(int cpu)
{
struct hrtimer *hrtimer = &per_cpu(oprofile_hrtimer, cpu);

if (!ctr_running)
return;

hrtimer_cancel(hrtimer);
}

static void oprofile_hrtimer_stop(void)
{
int cpu;

get_online_cpus();
for_each_online_cpu(cpu)
__oprofile_hrtimer_stop(cpu);
ctr_running = 0;
put_online_cpus();
}

static int __cpuinit oprofile_cpu_notify(struct notifier_block *self,
Expand Down
7 changes: 6 additions & 1 deletion include/linux/jump_label.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#ifndef _LINUX_JUMP_LABEL_H
#define _LINUX_JUMP_LABEL_H

#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_HAVE_ARCH_JUMP_LABEL)
#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
# include <asm/jump_label.h>
# define HAVE_JUMP_LABEL
#endif
Expand All @@ -18,6 +18,8 @@ struct module;
extern struct jump_entry __start___jump_table[];
extern struct jump_entry __stop___jump_table[];

extern void jump_label_lock(void);
extern void jump_label_unlock(void);
extern void arch_jump_label_transform(struct jump_entry *entry,
enum jump_label_type type);
extern void arch_jump_label_text_poke_early(jump_label_t addr);
Expand Down Expand Up @@ -59,6 +61,9 @@ static inline int jump_label_text_reserved(void *start, void *end)
return 0;
}

static inline void jump_label_lock(void) {}
static inline void jump_label_unlock(void) {}

#endif

#define COND_STMT(key, stmt) \
Expand Down
Loading

0 comments on commit f02a38d

Please sign in to comment.