Skip to content

Commit

Permalink
Merge tag 'perf-core-2023-06-27' of git://git.kernel.org/pub/scm/linu…
Browse files Browse the repository at this point in the history
…x/kernel/git/tip/tip

Pull perf events updates from Ingo Molnar:

 - Rework & fix the event forwarding logic by extending the core
   interface.

   This fixes AMD PMU events that have to be forwarded from the
   core PMU to the IBS PMU.

 - Add self-tests to test AMD IBS invocation via core PMU events

 - Clean up Intel FixCntrCtl MSR encoding & handling

* tag 'perf-core-2023-06-27' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf: Re-instate the linear PMU search
  perf/x86/intel: Define bit macros for FixCntrCtl MSR
  perf test: Add selftest to test IBS invocation via core pmu events
  perf/core: Remove pmu linear searching code
  perf/ibs: Fix interface via core pmu events
  perf/core: Rework forwarding of {task|cpu}-clock events
  • Loading branch information
torvalds committed Jun 27, 2023
2 parents bc6cb4d + 228020b commit a193cc7
Show file tree
Hide file tree
Showing 10 changed files with 174 additions and 73 deletions.
2 changes: 1 addition & 1 deletion arch/x86/events/amd/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ static int amd_pmu_hw_config(struct perf_event *event)

/* pass precise event sampling to ibs: */
if (event->attr.precise_ip && get_ibs_caps())
return -ENOENT;
return forward_event_to_ibs(event);

if (has_branch_stack(event) && !x86_pmu.lbr_nr)
return -EOPNOTSUPP;
Expand Down
53 changes: 26 additions & 27 deletions arch/x86/events/amd/ibs.c
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ static struct perf_ibs *get_ibs_pmu(int type)
}

/*
* Use IBS for precise event sampling:
* core pmu config -> IBS config
*
* perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count
* perf record -a -e r076:p ... # same as -e cpu-cycles:p
Expand All @@ -199,25 +199,9 @@ static struct perf_ibs *get_ibs_pmu(int type)
* IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl,
* MSRC001_1033) is used to select either cycle or micro-ops counting
* mode.
*
* The rip of IBS samples has skid 0. Thus, IBS supports precise
* levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
* rip is invalid when IBS was not able to record the rip correctly.
* We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
*
*/
static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
static int core_pmu_ibs_config(struct perf_event *event, u64 *config)
{
switch (event->attr.precise_ip) {
case 0:
return -ENOENT;
case 1:
case 2:
break;
default:
return -EOPNOTSUPP;
}

switch (event->attr.type) {
case PERF_TYPE_HARDWARE:
switch (event->attr.config) {
Expand All @@ -243,22 +227,37 @@ static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
return -EOPNOTSUPP;
}

/*
* The rip of IBS samples has skid 0. Thus, IBS supports precise
* levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
* rip is invalid when IBS was not able to record the rip correctly.
* We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
*/
int forward_event_to_ibs(struct perf_event *event)
{
u64 config = 0;

if (!event->attr.precise_ip || event->attr.precise_ip > 2)
return -EOPNOTSUPP;

if (!core_pmu_ibs_config(event, &config)) {
event->attr.type = perf_ibs_op.pmu.type;
event->attr.config = config;
}
return -ENOENT;
}

static int perf_ibs_init(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
struct perf_ibs *perf_ibs;
u64 max_cnt, config;
int ret;

perf_ibs = get_ibs_pmu(event->attr.type);
if (perf_ibs) {
config = event->attr.config;
} else {
perf_ibs = &perf_ibs_op;
ret = perf_ibs_precise_event(event, &config);
if (ret)
return ret;
}
if (!perf_ibs)
return -ENOENT;

config = event->attr.config;

if (event->pmu != &perf_ibs->pmu)
return -ENOENT;
Expand Down
18 changes: 9 additions & 9 deletions arch/x86/events/intel/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -2461,7 +2461,7 @@ static void intel_pmu_disable_fixed(struct perf_event *event)

intel_clear_masks(event, idx);

mask = 0xfULL << ((idx - INTEL_PMC_IDX_FIXED) * 4);
mask = intel_fixed_bits_by_idx(idx - INTEL_PMC_IDX_FIXED, INTEL_FIXED_BITS_MASK);
cpuc->fixed_ctrl_val &= ~mask;
}

Expand Down Expand Up @@ -2760,25 +2760,25 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
* if requested:
*/
if (!event->attr.precise_ip)
bits |= 0x8;
bits |= INTEL_FIXED_0_ENABLE_PMI;
if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
bits |= 0x2;
bits |= INTEL_FIXED_0_USER;
if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
bits |= 0x1;
bits |= INTEL_FIXED_0_KERNEL;

/*
* ANY bit is supported in v3 and up
*/
if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
bits |= 0x4;
bits |= INTEL_FIXED_0_ANYTHREAD;

idx -= INTEL_PMC_IDX_FIXED;
bits <<= (idx * 4);
mask = 0xfULL << (idx * 4);
bits = intel_fixed_bits_by_idx(idx, bits);
mask = intel_fixed_bits_by_idx(idx, INTEL_FIXED_BITS_MASK);

if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip) {
bits |= ICL_FIXED_0_ADAPTIVE << (idx * 4);
mask |= ICL_FIXED_0_ADAPTIVE << (idx * 4);
bits |= intel_fixed_bits_by_idx(idx, ICL_FIXED_0_ADAPTIVE);
mask |= intel_fixed_bits_by_idx(idx, ICL_FIXED_0_ADAPTIVE);
}

cpuc->fixed_ctrl_val &= ~mask;
Expand Down
12 changes: 12 additions & 0 deletions arch/x86/include/asm/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,21 @@
#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23)
#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL

#define INTEL_FIXED_BITS_MASK 0xFULL
#define INTEL_FIXED_BITS_STRIDE 4
#define INTEL_FIXED_0_KERNEL (1ULL << 0)
#define INTEL_FIXED_0_USER (1ULL << 1)
#define INTEL_FIXED_0_ANYTHREAD (1ULL << 2)
#define INTEL_FIXED_0_ENABLE_PMI (1ULL << 3)

#define HSW_IN_TX (1ULL << 32)
#define HSW_IN_TX_CHECKPOINTED (1ULL << 33)
#define ICL_EVENTSEL_ADAPTIVE (1ULL << 34)
#define ICL_FIXED_0_ADAPTIVE (1ULL << 32)

#define intel_fixed_bits_by_idx(_idx, _bits) \
((_bits) << ((_idx) * INTEL_FIXED_BITS_STRIDE))

#define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36)
#define AMD64_EVENTSEL_GUESTONLY (1ULL << 40)
#define AMD64_EVENTSEL_HOSTONLY (1ULL << 41)
Expand Down Expand Up @@ -478,8 +488,10 @@ struct pebs_xmm {

#ifdef CONFIG_X86_LOCAL_APIC
extern u32 get_ibs_caps(void);
extern int forward_event_to_ibs(struct perf_event *event);
#else
static inline u32 get_ibs_caps(void) { return 0; }
static inline int forward_event_to_ibs(struct perf_event *event) { return -ENOENT; }
#endif

#ifdef CONFIG_PERF_EVENTS
Expand Down
10 changes: 10 additions & 0 deletions include/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,8 @@ struct perf_event_pmu_context;

struct perf_output_handle;

#define PMU_NULL_DEV ((void *)(~0UL))

/**
* struct pmu - generic performance monitoring unit
*/
Expand Down Expand Up @@ -827,6 +829,14 @@ struct perf_event {
void *security;
#endif
struct list_head sb_list;

/*
* Certain events gets forwarded to another pmu internally by over-
* writing kernel copy of event->attr.type without user being aware
* of it. event->orig_type contains original 'type' requested by
* user.
*/
__u32 orig_type;
#endif /* CONFIG_PERF_EVENTS */
};

Expand Down
77 changes: 41 additions & 36 deletions kernel/events/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -6647,7 +6647,7 @@ static void perf_sigtrap(struct perf_event *event)
return;

send_sig_perf((void __user *)event->pending_addr,
event->attr.type, event->attr.sig_data);
event->orig_type, event->attr.sig_data);
}

/*
Expand Down Expand Up @@ -9951,6 +9951,9 @@ static void sw_perf_event_destroy(struct perf_event *event)
swevent_hlist_put();
}

static struct pmu perf_cpu_clock; /* fwd declaration */
static struct pmu perf_task_clock;

static int perf_swevent_init(struct perf_event *event)
{
u64 event_id = event->attr.config;
Expand All @@ -9966,7 +9969,10 @@ static int perf_swevent_init(struct perf_event *event)

switch (event_id) {
case PERF_COUNT_SW_CPU_CLOCK:
event->attr.type = perf_cpu_clock.type;
return -ENOENT;
case PERF_COUNT_SW_TASK_CLOCK:
event->attr.type = perf_task_clock.type;
return -ENOENT;

default:
Expand Down Expand Up @@ -11098,7 +11104,7 @@ static void cpu_clock_event_read(struct perf_event *event)

static int cpu_clock_event_init(struct perf_event *event)
{
if (event->attr.type != PERF_TYPE_SOFTWARE)
if (event->attr.type != perf_cpu_clock.type)
return -ENOENT;

if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK)
Expand All @@ -11119,6 +11125,7 @@ static struct pmu perf_cpu_clock = {
.task_ctx_nr = perf_sw_context,

.capabilities = PERF_PMU_CAP_NO_NMI,
.dev = PMU_NULL_DEV,

.event_init = cpu_clock_event_init,
.add = cpu_clock_event_add,
Expand Down Expand Up @@ -11179,7 +11186,7 @@ static void task_clock_event_read(struct perf_event *event)

static int task_clock_event_init(struct perf_event *event)
{
if (event->attr.type != PERF_TYPE_SOFTWARE)
if (event->attr.type != perf_task_clock.type)
return -ENOENT;

if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK)
Expand All @@ -11200,6 +11207,7 @@ static struct pmu perf_task_clock = {
.task_ctx_nr = perf_sw_context,

.capabilities = PERF_PMU_CAP_NO_NMI,
.dev = PMU_NULL_DEV,

.event_init = task_clock_event_init,
.add = task_clock_event_add,
Expand Down Expand Up @@ -11427,31 +11435,31 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type)
goto unlock;

pmu->type = -1;
if (!name)
goto skip_type;
if (WARN_ONCE(!name, "Can not register anonymous pmu.\n")) {
ret = -EINVAL;
goto free_pdc;
}

pmu->name = name;

if (type != PERF_TYPE_SOFTWARE) {
if (type >= 0)
max = type;
if (type >= 0)
max = type;

ret = idr_alloc(&pmu_idr, pmu, max, 0, GFP_KERNEL);
if (ret < 0)
goto free_pdc;
ret = idr_alloc(&pmu_idr, pmu, max, 0, GFP_KERNEL);
if (ret < 0)
goto free_pdc;

WARN_ON(type >= 0 && ret != type);
WARN_ON(type >= 0 && ret != type);

type = ret;
}
type = ret;
pmu->type = type;

if (pmu_bus_running) {
if (pmu_bus_running && !pmu->dev) {
ret = pmu_dev_alloc(pmu);
if (ret)
goto free_idr;
}

skip_type:
ret = -ENOMEM;
pmu->cpu_pmu_context = alloc_percpu(struct perf_cpu_pmu_context);
if (!pmu->cpu_pmu_context)
Expand Down Expand Up @@ -11493,16 +11501,7 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type)
if (!pmu->event_idx)
pmu->event_idx = perf_event_idx_default;

/*
* Ensure the TYPE_SOFTWARE PMUs are at the head of the list,
* since these cannot be in the IDR. This way the linear search
* is fast, provided a valid software event is provided.
*/
if (type == PERF_TYPE_SOFTWARE || !name)
list_add_rcu(&pmu->entry, &pmus);
else
list_add_tail_rcu(&pmu->entry, &pmus);

list_add_rcu(&pmu->entry, &pmus);
atomic_set(&pmu->exclusive_cnt, 0);
ret = 0;
unlock:
Expand All @@ -11511,12 +11510,13 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type)
return ret;

free_dev:
device_del(pmu->dev);
put_device(pmu->dev);
if (pmu->dev && pmu->dev != PMU_NULL_DEV) {
device_del(pmu->dev);
put_device(pmu->dev);
}

free_idr:
if (pmu->type != PERF_TYPE_SOFTWARE)
idr_remove(&pmu_idr, pmu->type);
idr_remove(&pmu_idr, pmu->type);

free_pdc:
free_percpu(pmu->pmu_disable_count);
Expand All @@ -11537,9 +11537,8 @@ void perf_pmu_unregister(struct pmu *pmu)
synchronize_rcu();

free_percpu(pmu->pmu_disable_count);
if (pmu->type != PERF_TYPE_SOFTWARE)
idr_remove(&pmu_idr, pmu->type);
if (pmu_bus_running) {
idr_remove(&pmu_idr, pmu->type);
if (pmu_bus_running && pmu->dev && pmu->dev != PMU_NULL_DEV) {
if (pmu->nr_addr_filters)
device_remove_file(pmu->dev, &dev_attr_nr_addr_filters);
device_del(pmu->dev);
Expand Down Expand Up @@ -11613,6 +11612,12 @@ static struct pmu *perf_init_event(struct perf_event *event)

idx = srcu_read_lock(&pmus_srcu);

/*
* Save original type before calling pmu->event_init() since certain
* pmus overwrites event->attr.type to forward event to another pmu.
*/
event->orig_type = event->attr.type;

/* Try parent's PMU first: */
if (event->parent && event->parent->pmu) {
pmu = event->parent->pmu;
Expand Down Expand Up @@ -13652,8 +13657,8 @@ void __init perf_event_init(void)
perf_event_init_all_cpus();
init_srcu_struct(&pmus_srcu);
perf_pmu_register(&perf_swevent, "software", PERF_TYPE_SOFTWARE);
perf_pmu_register(&perf_cpu_clock, NULL, -1);
perf_pmu_register(&perf_task_clock, NULL, -1);
perf_pmu_register(&perf_cpu_clock, "cpu_clock", -1);
perf_pmu_register(&perf_task_clock, "task_clock", -1);
perf_tp_register();
perf_event_init_cpu(smp_processor_id());
register_reboot_notifier(&perf_reboot_notifier);
Expand Down Expand Up @@ -13696,7 +13701,7 @@ static int __init perf_event_sysfs_init(void)
goto unlock;

list_for_each_entry(pmu, &pmus, entry) {
if (!pmu->name || pmu->type < 0)
if (pmu->dev)
continue;

ret = pmu_dev_alloc(pmu);
Expand Down
1 change: 1 addition & 0 deletions tools/perf/arch/x86/include/arch-tests.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ int test__intel_pt_pkt_decoder(struct test_suite *test, int subtest);
int test__intel_pt_hybrid_compat(struct test_suite *test, int subtest);
int test__bp_modify(struct test_suite *test, int subtest);
int test__x86_sample_parsing(struct test_suite *test, int subtest);
int test__amd_ibs_via_core_pmu(struct test_suite *test, int subtest);

extern struct test_suite *arch_tests[];

Expand Down
1 change: 1 addition & 0 deletions tools/perf/arch/x86/tests/Build
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ perf-y += arch-tests.o
perf-y += sample-parsing.o
perf-$(CONFIG_AUXTRACE) += insn-x86.o intel-pt-test.o
perf-$(CONFIG_X86_64) += bp-modify.o
perf-y += amd-ibs-via-core-pmu.o
Loading

0 comments on commit a193cc7

Please sign in to comment.