forked from openbmc/linux
-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/…
…linux/kernel/git/tip/tip Pull perf updates from Ingo Molnar: "This tree includes some late late perf items that missed the first round: tools: - Bash auto completion improvements, now we can auto complete the tools long options, tracepoint event names, etc, from Namhyung Kim. - Look up thread using tid instead of pid in 'perf sched'. - Move global variables into a perf_kvm struct, from David Ahern. - Hists refactorings, preparatory for improved 'diff' command, from Jiri Olsa. - Hists refactorings, preparatory for event group viewieng work, from Namhyung Kim. - Remove double negation on optional feature macro definitions, from Namhyung Kim. - Remove several cases of needless global variables, on most builtins. - misc fixes kernel: - sysfs support for IBS on AMD CPUs, from Robert Richter. - Support for an upcoming Intel CPU, the Xeon-Phi / Knights Corner HPC blade PMU, from Vince Weaver. - misc fixes" * 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (46 commits) perf: Fix perf_cgroup_switch for sw-events perf: Clarify perf_cpu_context::active_pmu usage by renaming it to ::unique_pmu perf/AMD/IBS: Add sysfs support perf hists: Add more helpers for hist entry stat perf hists: Move he->stat.nr_events initialization to a template perf hists: Introduce struct he_stat perf diff: Removing the total_period argument from output code perf tool: Add hpp interface to enable/disable hpp column perf tools: Removing hists pair argument from output path perf hists: Separate overhead and baseline columns perf diff: Refactor diff displacement possition info perf hists: Add struct hists pointer to struct hist_entry perf tools: Complete tracepoint event names perf/x86: Add support for Intel Xeon-Phi Knights Corner PMU perf evlist: Remove some unused methods perf evlist: Introduce add_newtp method perf kvm: Move global variables into a perf_kvm struct perf tools: Convert to BACKTRACE_SUPPORT perf tools: Long option completion support for each subcommands perf tools: Complete long option names of perf command ...
- Loading branch information
Showing
55 changed files
with
1,517 additions
and
1,198 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,248 @@ | ||
/* Driver for Intel Xeon Phi "Knights Corner" PMU */ | ||
|
||
#include <linux/perf_event.h> | ||
#include <linux/types.h> | ||
|
||
#include "perf_event.h" | ||
|
||
static const u64 knc_perfmon_event_map[] = | ||
{ | ||
[PERF_COUNT_HW_CPU_CYCLES] = 0x002a, | ||
[PERF_COUNT_HW_INSTRUCTIONS] = 0x0016, | ||
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x0028, | ||
[PERF_COUNT_HW_CACHE_MISSES] = 0x0029, | ||
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0012, | ||
[PERF_COUNT_HW_BRANCH_MISSES] = 0x002b, | ||
}; | ||
|
||
static __initconst u64 knc_hw_cache_event_ids | ||
[PERF_COUNT_HW_CACHE_MAX] | ||
[PERF_COUNT_HW_CACHE_OP_MAX] | ||
[PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
{ | ||
[ C(L1D) ] = { | ||
[ C(OP_READ) ] = { | ||
/* On Xeon Phi event "0" is a valid DATA_READ */ | ||
/* (L1 Data Cache Reads) Instruction. */ | ||
/* We code this as ARCH_PERFMON_EVENTSEL_INT as this */ | ||
/* bit will always be set in x86_pmu_hw_config(). */ | ||
[ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT, | ||
/* DATA_READ */ | ||
[ C(RESULT_MISS) ] = 0x0003, /* DATA_READ_MISS */ | ||
}, | ||
[ C(OP_WRITE) ] = { | ||
[ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */ | ||
[ C(RESULT_MISS) ] = 0x0004, /* DATA_WRITE_MISS */ | ||
}, | ||
[ C(OP_PREFETCH) ] = { | ||
[ C(RESULT_ACCESS) ] = 0x0011, /* L1_DATA_PF1 */ | ||
[ C(RESULT_MISS) ] = 0x001c, /* L1_DATA_PF1_MISS */ | ||
}, | ||
}, | ||
[ C(L1I ) ] = { | ||
[ C(OP_READ) ] = { | ||
[ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */ | ||
[ C(RESULT_MISS) ] = 0x000e, /* CODE_CACHE_MISS */ | ||
}, | ||
[ C(OP_WRITE) ] = { | ||
[ C(RESULT_ACCESS) ] = -1, | ||
[ C(RESULT_MISS) ] = -1, | ||
}, | ||
[ C(OP_PREFETCH) ] = { | ||
[ C(RESULT_ACCESS) ] = 0x0, | ||
[ C(RESULT_MISS) ] = 0x0, | ||
}, | ||
}, | ||
[ C(LL ) ] = { | ||
[ C(OP_READ) ] = { | ||
[ C(RESULT_ACCESS) ] = 0, | ||
[ C(RESULT_MISS) ] = 0x10cb, /* L2_READ_MISS */ | ||
}, | ||
[ C(OP_WRITE) ] = { | ||
[ C(RESULT_ACCESS) ] = 0x10cc, /* L2_WRITE_HIT */ | ||
[ C(RESULT_MISS) ] = 0, | ||
}, | ||
[ C(OP_PREFETCH) ] = { | ||
[ C(RESULT_ACCESS) ] = 0x10fc, /* L2_DATA_PF2 */ | ||
[ C(RESULT_MISS) ] = 0x10fe, /* L2_DATA_PF2_MISS */ | ||
}, | ||
}, | ||
[ C(DTLB) ] = { | ||
[ C(OP_READ) ] = { | ||
[ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT, | ||
/* DATA_READ */ | ||
/* see note on L1 OP_READ */ | ||
[ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */ | ||
}, | ||
[ C(OP_WRITE) ] = { | ||
[ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */ | ||
[ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */ | ||
}, | ||
[ C(OP_PREFETCH) ] = { | ||
[ C(RESULT_ACCESS) ] = 0x0, | ||
[ C(RESULT_MISS) ] = 0x0, | ||
}, | ||
}, | ||
[ C(ITLB) ] = { | ||
[ C(OP_READ) ] = { | ||
[ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */ | ||
[ C(RESULT_MISS) ] = 0x000d, /* CODE_PAGE_WALK */ | ||
}, | ||
[ C(OP_WRITE) ] = { | ||
[ C(RESULT_ACCESS) ] = -1, | ||
[ C(RESULT_MISS) ] = -1, | ||
}, | ||
[ C(OP_PREFETCH) ] = { | ||
[ C(RESULT_ACCESS) ] = -1, | ||
[ C(RESULT_MISS) ] = -1, | ||
}, | ||
}, | ||
[ C(BPU ) ] = { | ||
[ C(OP_READ) ] = { | ||
[ C(RESULT_ACCESS) ] = 0x0012, /* BRANCHES */ | ||
[ C(RESULT_MISS) ] = 0x002b, /* BRANCHES_MISPREDICTED */ | ||
}, | ||
[ C(OP_WRITE) ] = { | ||
[ C(RESULT_ACCESS) ] = -1, | ||
[ C(RESULT_MISS) ] = -1, | ||
}, | ||
[ C(OP_PREFETCH) ] = { | ||
[ C(RESULT_ACCESS) ] = -1, | ||
[ C(RESULT_MISS) ] = -1, | ||
}, | ||
}, | ||
}; | ||
|
||
|
||
static u64 knc_pmu_event_map(int hw_event) | ||
{ | ||
return knc_perfmon_event_map[hw_event]; | ||
} | ||
|
||
static struct event_constraint knc_event_constraints[] = | ||
{ | ||
INTEL_EVENT_CONSTRAINT(0xc3, 0x1), /* HWP_L2HIT */ | ||
INTEL_EVENT_CONSTRAINT(0xc4, 0x1), /* HWP_L2MISS */ | ||
INTEL_EVENT_CONSTRAINT(0xc8, 0x1), /* L2_READ_HIT_E */ | ||
INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* L2_READ_HIT_M */ | ||
INTEL_EVENT_CONSTRAINT(0xca, 0x1), /* L2_READ_HIT_S */ | ||
INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* L2_READ_MISS */ | ||
INTEL_EVENT_CONSTRAINT(0xcc, 0x1), /* L2_WRITE_HIT */ | ||
INTEL_EVENT_CONSTRAINT(0xce, 0x1), /* L2_STRONGLY_ORDERED_STREAMING_VSTORES_MISS */ | ||
INTEL_EVENT_CONSTRAINT(0xcf, 0x1), /* L2_WEAKLY_ORDERED_STREAMING_VSTORE_MISS */ | ||
INTEL_EVENT_CONSTRAINT(0xd7, 0x1), /* L2_VICTIM_REQ_WITH_DATA */ | ||
INTEL_EVENT_CONSTRAINT(0xe3, 0x1), /* SNP_HITM_BUNIT */ | ||
INTEL_EVENT_CONSTRAINT(0xe6, 0x1), /* SNP_HIT_L2 */ | ||
INTEL_EVENT_CONSTRAINT(0xe7, 0x1), /* SNP_HITM_L2 */ | ||
INTEL_EVENT_CONSTRAINT(0xf1, 0x1), /* L2_DATA_READ_MISS_CACHE_FILL */ | ||
INTEL_EVENT_CONSTRAINT(0xf2, 0x1), /* L2_DATA_WRITE_MISS_CACHE_FILL */ | ||
INTEL_EVENT_CONSTRAINT(0xf6, 0x1), /* L2_DATA_READ_MISS_MEM_FILL */ | ||
INTEL_EVENT_CONSTRAINT(0xf7, 0x1), /* L2_DATA_WRITE_MISS_MEM_FILL */ | ||
INTEL_EVENT_CONSTRAINT(0xfc, 0x1), /* L2_DATA_PF2 */ | ||
INTEL_EVENT_CONSTRAINT(0xfd, 0x1), /* L2_DATA_PF2_DROP */ | ||
INTEL_EVENT_CONSTRAINT(0xfe, 0x1), /* L2_DATA_PF2_MISS */ | ||
INTEL_EVENT_CONSTRAINT(0xff, 0x1), /* L2_DATA_HIT_INFLIGHT_PF2 */ | ||
EVENT_CONSTRAINT_END | ||
}; | ||
|
||
#define MSR_KNC_IA32_PERF_GLOBAL_STATUS 0x0000002d | ||
#define MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL 0x0000002e | ||
#define MSR_KNC_IA32_PERF_GLOBAL_CTRL 0x0000002f | ||
|
||
#define KNC_ENABLE_COUNTER0 0x00000001 | ||
#define KNC_ENABLE_COUNTER1 0x00000002 | ||
|
||
static void knc_pmu_disable_all(void) | ||
{ | ||
u64 val; | ||
|
||
rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); | ||
val &= ~(KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1); | ||
wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); | ||
} | ||
|
||
static void knc_pmu_enable_all(int added) | ||
{ | ||
u64 val; | ||
|
||
rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); | ||
val |= (KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1); | ||
wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); | ||
} | ||
|
||
static inline void | ||
knc_pmu_disable_event(struct perf_event *event) | ||
{ | ||
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
struct hw_perf_event *hwc = &event->hw; | ||
u64 val; | ||
|
||
val = hwc->config; | ||
if (cpuc->enabled) | ||
val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; | ||
|
||
(void)wrmsrl_safe(hwc->config_base + hwc->idx, val); | ||
} | ||
|
||
static void knc_pmu_enable_event(struct perf_event *event) | ||
{ | ||
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
struct hw_perf_event *hwc = &event->hw; | ||
u64 val; | ||
|
||
val = hwc->config; | ||
if (cpuc->enabled) | ||
val |= ARCH_PERFMON_EVENTSEL_ENABLE; | ||
|
||
(void)wrmsrl_safe(hwc->config_base + hwc->idx, val); | ||
} | ||
|
||
PMU_FORMAT_ATTR(event, "config:0-7" ); | ||
PMU_FORMAT_ATTR(umask, "config:8-15" ); | ||
PMU_FORMAT_ATTR(edge, "config:18" ); | ||
PMU_FORMAT_ATTR(inv, "config:23" ); | ||
PMU_FORMAT_ATTR(cmask, "config:24-31" ); | ||
|
||
static struct attribute *intel_knc_formats_attr[] = { | ||
&format_attr_event.attr, | ||
&format_attr_umask.attr, | ||
&format_attr_edge.attr, | ||
&format_attr_inv.attr, | ||
&format_attr_cmask.attr, | ||
NULL, | ||
}; | ||
|
||
static __initconst struct x86_pmu knc_pmu = { | ||
.name = "knc", | ||
.handle_irq = x86_pmu_handle_irq, | ||
.disable_all = knc_pmu_disable_all, | ||
.enable_all = knc_pmu_enable_all, | ||
.enable = knc_pmu_enable_event, | ||
.disable = knc_pmu_disable_event, | ||
.hw_config = x86_pmu_hw_config, | ||
.schedule_events = x86_schedule_events, | ||
.eventsel = MSR_KNC_EVNTSEL0, | ||
.perfctr = MSR_KNC_PERFCTR0, | ||
.event_map = knc_pmu_event_map, | ||
.max_events = ARRAY_SIZE(knc_perfmon_event_map), | ||
.apic = 1, | ||
.max_period = (1ULL << 31) - 1, | ||
.version = 0, | ||
.num_counters = 2, | ||
/* in theory 40 bits, early silicon is buggy though */ | ||
.cntval_bits = 32, | ||
.cntval_mask = (1ULL << 32) - 1, | ||
.get_event_constraints = x86_get_event_constraints, | ||
.event_constraints = knc_event_constraints, | ||
.format_attrs = intel_knc_formats_attr, | ||
}; | ||
|
||
__init int knc_pmu_init(void) | ||
{ | ||
x86_pmu = knc_pmu; | ||
|
||
memcpy(hw_cache_event_ids, knc_hw_cache_event_ids, | ||
sizeof(hw_cache_event_ids)); | ||
|
||
return 0; | ||
} |
Oops, something went wrong.