Skip to content

Commit

Permalink
Merge tag 'perf-core-2023-02-20' of git://git.kernel.org/pub/scm/linu…
Browse files Browse the repository at this point in the history
…x/kernel/git/tip/tip

Pull perf updates from Ingo Molnar:

 - Optimize perf_sample_data layout

 - Prepare sample data handling for BPF integration

 - Update the x86 PMU driver for Intel Meteor Lake

 - Restructure the x86 uncore code to fix a SPR (Sapphire Rapids)
   discovery breakage

 - Fix the x86 Zhaoxin PMU driver

 - Cleanups

* tag 'perf-core-2023-02-20' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (27 commits)
  perf/x86/intel/uncore: Add Meteor Lake support
  x86/perf/zhaoxin: Add stepping check for ZXC
  perf/x86/intel/ds: Fix the conversion from TSC to perf time
  perf/x86/uncore: Don't WARN_ON_ONCE() for a broken discovery table
  perf/x86/uncore: Add a quirk for UPI on SPR
  perf/x86/uncore: Ignore broken units in discovery table
  perf/x86/uncore: Fix potential NULL pointer in uncore_get_alias_name
  perf/x86/uncore: Factor out uncore_device_to_die()
  perf/core: Call perf_prepare_sample() before running BPF
  perf/core: Introduce perf_prepare_header()
  perf/core: Do not pass header for sample ID init
  perf/core: Set data->sample_flags in perf_prepare_sample()
  perf/core: Add perf_sample_save_brstack() helper
  perf/core: Add perf_sample_save_raw_data() helper
  perf/core: Add perf_sample_save_callchain() helper
  perf/core: Save the dynamic parts of sample data size
  x86/kprobes: Use switch-case for 0xFF opcodes in prepare_emulation
  perf/core: Change the layout of perf_sample_data
  perf/x86/msr: Add Meteor Lake support
  perf/x86/cstate: Add Meteor Lake support
  ...
  • Loading branch information
torvalds committed Feb 21, 2023
2 parents 6e649d0 + c828441 commit a2f0e7e
Show file tree
Hide file tree
Showing 25 changed files with 953 additions and 317 deletions.
3 changes: 1 addition & 2 deletions arch/powerpc/perf/core-book3s.c
Original file line number Diff line number Diff line change
Expand Up @@ -2313,8 +2313,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
struct cpu_hw_events *cpuhw;
cpuhw = this_cpu_ptr(&cpu_hw_events);
power_pmu_bhrb_read(event, cpuhw);
data.br_stack = &cpuhw->bhrb_stack;
data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
perf_sample_save_brstack(&data, event, &cpuhw->bhrb_stack);
}

if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC &&
Expand Down
4 changes: 1 addition & 3 deletions arch/s390/kernel/perf_cpum_cf.c
Original file line number Diff line number Diff line change
Expand Up @@ -662,9 +662,7 @@ static int cfdiag_push_sample(struct perf_event *event,
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
raw.frag.size = cpuhw->usedss;
raw.frag.data = cpuhw->stop;
raw.size = raw.frag.size;
data.raw = &raw;
data.sample_flags |= PERF_SAMPLE_RAW;
perf_sample_save_raw_data(&data, &raw);
}

overflow = perf_event_overflow(event, &data, &regs);
Expand Down
3 changes: 2 additions & 1 deletion arch/s390/kernel/perf_cpum_sf.c
Original file line number Diff line number Diff line change
Expand Up @@ -672,7 +672,8 @@ static void cpumsf_output_event_pid(struct perf_event *event,
/* Protect callchain buffers, tasks */
rcu_read_lock();

perf_prepare_sample(&header, data, event, regs);
perf_prepare_sample(data, event, regs);
perf_prepare_header(&header, data, event, regs);
if (perf_output_begin(&handle, data, event, header.size))
goto out;

Expand Down
4 changes: 1 addition & 3 deletions arch/s390/kernel/perf_pai_crypto.c
Original file line number Diff line number Diff line change
Expand Up @@ -362,9 +362,7 @@ static int paicrypt_push_sample(void)
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
raw.frag.size = rawsize;
raw.frag.data = cpump->save;
raw.size = raw.frag.size;
data.raw = &raw;
data.sample_flags |= PERF_SAMPLE_RAW;
perf_sample_save_raw_data(&data, &raw);
}

overflow = perf_event_overflow(event, &data, &regs);
Expand Down
4 changes: 1 addition & 3 deletions arch/s390/kernel/perf_pai_ext.c
Original file line number Diff line number Diff line change
Expand Up @@ -451,9 +451,7 @@ static int paiext_push_sample(void)
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
raw.frag.size = rawsize;
raw.frag.data = cpump->save;
raw.size = raw.frag.size;
data.raw = &raw;
data.sample_flags |= PERF_SAMPLE_RAW;
perf_sample_save_raw_data(&data, &raw);
}

overflow = perf_event_overflow(event, &data, &regs);
Expand Down
6 changes: 2 additions & 4 deletions arch/x86/events/amd/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -928,10 +928,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
if (!x86_perf_event_set_period(event))
continue;

if (has_branch_stack(event)) {
data.br_stack = &cpuc->lbr_stack;
data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
}
if (has_branch_stack(event))
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);

if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
Expand Down
9 changes: 3 additions & 6 deletions arch/x86/events/amd/ibs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1110,8 +1110,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
.data = ibs_data.data,
},
};
data.raw = &raw;
data.sample_flags |= PERF_SAMPLE_RAW;
perf_sample_save_raw_data(&data, &raw);
}

if (perf_ibs == &perf_ibs_op)
Expand All @@ -1122,10 +1121,8 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
* recorded as part of interrupt regs. Thus we need to use rip from
* interrupt regs while unwinding call stack.
*/
if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
data.callchain = perf_callchain(event, iregs);
data.sample_flags |= PERF_SAMPLE_CALLCHAIN;
}
if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
perf_sample_save_callchain(&data, event, iregs);

throttle = perf_event_overflow(event, &data, &regs);
out:
Expand Down
199 changes: 181 additions & 18 deletions arch/x86/events/intel/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -2119,6 +2119,16 @@ static struct extra_reg intel_grt_extra_regs[] __read_mostly = {
EVENT_EXTRA_END
};

static struct extra_reg intel_cmt_extra_regs[] __read_mostly = {
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x800ff3ffffffffffull, RSP_0),
INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xff3ffffffffffull, RSP_1),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x5d0),
INTEL_UEVENT_EXTRA_REG(0x0127, MSR_SNOOP_RSP_0, 0xffffffffffffffffull, SNOOP_0),
INTEL_UEVENT_EXTRA_REG(0x0227, MSR_SNOOP_RSP_1, 0xffffffffffffffffull, SNOOP_1),
EVENT_EXTRA_END
};

#define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */
#define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */
#define KNL_MCDRAM_LOCAL BIT_ULL(21)
Expand Down Expand Up @@ -3026,10 +3036,8 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)

perf_sample_data_init(&data, 0, event->hw.last_period);

if (has_branch_stack(event)) {
data.br_stack = &cpuc->lbr_stack;
data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
}
if (has_branch_stack(event))
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);

if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
Expand Down Expand Up @@ -4182,6 +4190,12 @@ static int hsw_hw_config(struct perf_event *event)
static struct event_constraint counter0_constraint =
INTEL_ALL_EVENT_CONSTRAINT(0, 0x1);

static struct event_constraint counter1_constraint =
INTEL_ALL_EVENT_CONSTRAINT(0, 0x2);

static struct event_constraint counter0_1_constraint =
INTEL_ALL_EVENT_CONSTRAINT(0, 0x3);

static struct event_constraint counter2_constraint =
EVENT_CONSTRAINT(0, 0x4, 0);

Expand All @@ -4191,6 +4205,12 @@ static struct event_constraint fixed0_constraint =
static struct event_constraint fixed0_counter0_constraint =
INTEL_ALL_EVENT_CONSTRAINT(0, 0x100000001ULL);

static struct event_constraint fixed0_counter0_1_constraint =
INTEL_ALL_EVENT_CONSTRAINT(0, 0x100000003ULL);

static struct event_constraint counters_1_7_constraint =
INTEL_ALL_EVENT_CONSTRAINT(0, 0xfeULL);

static struct event_constraint *
hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
Expand Down Expand Up @@ -4322,6 +4342,78 @@ adl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
return &emptyconstraint;
}

static struct event_constraint *
cmt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
{
struct event_constraint *c;

c = intel_get_event_constraints(cpuc, idx, event);

/*
* The :ppp indicates the Precise Distribution (PDist) facility, which
* is only supported on the GP counter 0 & 1 and Fixed counter 0.
* If a :ppp event which is not available on the above eligible counters,
* error out.
*/
if (event->attr.precise_ip == 3) {
/* Force instruction:ppp on PMC0, 1 and Fixed counter 0 */
if (constraint_match(&fixed0_constraint, event->hw.config))
return &fixed0_counter0_1_constraint;

switch (c->idxmsk64 & 0x3ull) {
case 0x1:
return &counter0_constraint;
case 0x2:
return &counter1_constraint;
case 0x3:
return &counter0_1_constraint;
}
return &emptyconstraint;
}

return c;
}

static struct event_constraint *
rwc_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
{
struct event_constraint *c;

c = spr_get_event_constraints(cpuc, idx, event);

/* The Retire Latency is not supported by the fixed counter 0. */
if (event->attr.precise_ip &&
(event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE) &&
constraint_match(&fixed0_constraint, event->hw.config)) {
/*
* The Instruction PDIR is only available
* on the fixed counter 0. Error out for this case.
*/
if (event->attr.precise_ip == 3)
return &emptyconstraint;
return &counters_1_7_constraint;
}

return c;
}

static struct event_constraint *
mtl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
{
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);

if (pmu->cpu_type == hybrid_big)
return rwc_get_event_constraints(cpuc, idx, event);
if (pmu->cpu_type == hybrid_small)
return cmt_get_event_constraints(cpuc, idx, event);

WARN_ON(1);
return &emptyconstraint;
}

static int adl_hw_config(struct perf_event *event)
{
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
Expand Down Expand Up @@ -4494,6 +4586,25 @@ static void flip_smm_bit(void *data)
}
}

static void intel_pmu_check_num_counters(int *num_counters,
int *num_counters_fixed,
u64 *intel_ctrl, u64 fixed_mask);

static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
{
unsigned int sub_bitmaps = cpuid_eax(ARCH_PERFMON_EXT_LEAF);
unsigned int eax, ebx, ecx, edx;

if (sub_bitmaps & ARCH_PERFMON_NUM_COUNTER_LEAF_BIT) {
cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF,
&eax, &ebx, &ecx, &edx);
pmu->num_counters = fls(eax);
pmu->num_counters_fixed = fls(ebx);
intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed,
&pmu->intel_ctrl, ebx);
}
}

static bool init_hybrid_pmu(int cpu)
{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
Expand All @@ -4519,6 +4630,9 @@ static bool init_hybrid_pmu(int cpu)
if (!cpumask_empty(&pmu->supported_cpus))
goto end;

if (this_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT))
update_pmu_cap(pmu);

if (!check_hw_exists(&pmu->pmu, pmu->num_counters, pmu->num_counters_fixed))
return false;

Expand Down Expand Up @@ -5463,6 +5577,12 @@ static struct attribute *adl_hybrid_mem_attrs[] = {
NULL,
};

static struct attribute *mtl_hybrid_mem_attrs[] = {
EVENT_PTR(mem_ld_adl),
EVENT_PTR(mem_st_adl),
NULL
};

EVENT_ATTR_STR_HYBRID(tx-start, tx_start_adl, "event=0xc9,umask=0x1", hybrid_big);
EVENT_ATTR_STR_HYBRID(tx-commit, tx_commit_adl, "event=0xc9,umask=0x2", hybrid_big);
EVENT_ATTR_STR_HYBRID(tx-abort, tx_abort_adl, "event=0xc9,umask=0x4", hybrid_big);
Expand Down Expand Up @@ -5490,20 +5610,40 @@ FORMAT_ATTR_HYBRID(offcore_rsp, hybrid_big_small);
FORMAT_ATTR_HYBRID(ldlat, hybrid_big_small);
FORMAT_ATTR_HYBRID(frontend, hybrid_big);

#define ADL_HYBRID_RTM_FORMAT_ATTR \
FORMAT_HYBRID_PTR(in_tx), \
FORMAT_HYBRID_PTR(in_tx_cp)

#define ADL_HYBRID_FORMAT_ATTR \
FORMAT_HYBRID_PTR(offcore_rsp), \
FORMAT_HYBRID_PTR(ldlat), \
FORMAT_HYBRID_PTR(frontend)

static struct attribute *adl_hybrid_extra_attr_rtm[] = {
FORMAT_HYBRID_PTR(in_tx),
FORMAT_HYBRID_PTR(in_tx_cp),
FORMAT_HYBRID_PTR(offcore_rsp),
FORMAT_HYBRID_PTR(ldlat),
FORMAT_HYBRID_PTR(frontend),
NULL,
ADL_HYBRID_RTM_FORMAT_ATTR,
ADL_HYBRID_FORMAT_ATTR,
NULL
};

static struct attribute *adl_hybrid_extra_attr[] = {
FORMAT_HYBRID_PTR(offcore_rsp),
FORMAT_HYBRID_PTR(ldlat),
FORMAT_HYBRID_PTR(frontend),
NULL,
ADL_HYBRID_FORMAT_ATTR,
NULL
};

PMU_FORMAT_ATTR_SHOW(snoop_rsp, "config1:0-63");
FORMAT_ATTR_HYBRID(snoop_rsp, hybrid_small);

static struct attribute *mtl_hybrid_extra_attr_rtm[] = {
ADL_HYBRID_RTM_FORMAT_ATTR,
ADL_HYBRID_FORMAT_ATTR,
FORMAT_HYBRID_PTR(snoop_rsp),
NULL
};

static struct attribute *mtl_hybrid_extra_attr[] = {
ADL_HYBRID_FORMAT_ATTR,
FORMAT_HYBRID_PTR(snoop_rsp),
NULL
};

static bool is_attr_for_this_pmu(struct kobject *kobj, struct attribute *attr)
Expand Down Expand Up @@ -5725,6 +5865,12 @@ static void intel_pmu_check_hybrid_pmus(u64 fixed_mask)
}
}

static __always_inline bool is_mtl(u8 x86_model)
{
return (x86_model == INTEL_FAM6_METEORLAKE) ||
(x86_model == INTEL_FAM6_METEORLAKE_L);
}

__init int intel_pmu_init(void)
{
struct attribute **extra_skl_attr = &empty_attrs;
Expand Down Expand Up @@ -6382,6 +6528,8 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_RAPTORLAKE:
case INTEL_FAM6_RAPTORLAKE_P:
case INTEL_FAM6_RAPTORLAKE_S:
case INTEL_FAM6_METEORLAKE:
case INTEL_FAM6_METEORLAKE_L:
/*
* Alder Lake has 2 types of CPU, core and atom.
*
Expand All @@ -6401,9 +6549,7 @@ __init int intel_pmu_init(void)
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
x86_pmu.lbr_pt_coexist = true;
intel_pmu_pebs_data_source_adl();
x86_pmu.pebs_latency_data = adl_latency_data_small;
x86_pmu.num_topdown_events = 8;
static_call_update(intel_pmu_update_topdown_event,
Expand Down Expand Up @@ -6490,8 +6636,22 @@ __init int intel_pmu_init(void)
pmu->event_constraints = intel_slm_event_constraints;
pmu->pebs_constraints = intel_grt_pebs_event_constraints;
pmu->extra_regs = intel_grt_extra_regs;
pr_cont("Alderlake Hybrid events, ");
name = "alderlake_hybrid";
if (is_mtl(boot_cpu_data.x86_model)) {
x86_pmu.pebs_latency_data = mtl_latency_data_small;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr;
mem_attr = mtl_hybrid_mem_attrs;
intel_pmu_pebs_data_source_mtl();
x86_pmu.get_event_constraints = mtl_get_event_constraints;
pmu->extra_regs = intel_cmt_extra_regs;
pr_cont("Meteorlake Hybrid events, ");
name = "meteorlake_hybrid";
} else {
x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
intel_pmu_pebs_data_source_adl();
pr_cont("Alderlake Hybrid events, ");
name = "alderlake_hybrid";
}
break;

default:
Expand Down Expand Up @@ -6606,6 +6766,9 @@ __init int intel_pmu_init(void)
if (is_hybrid())
intel_pmu_check_hybrid_pmus((u64)fixed_mask);

if (x86_pmu.intel_cap.pebs_timing_info)
x86_pmu.flags |= PMU_FL_RETIRE_LATENCY;

intel_aux_output_init();

return 0;
Expand Down
Loading

0 comments on commit a2f0e7e

Please sign in to comment.