Skip to content

Commit 16b0bde

Browse files
atishp04avpatel
authored andcommitted
RISC-V: KVM: Add perf sampling support for guests
KVM enables perf for guest via counter virtualization. However, the sampling can not be supported as there is no mechanism to enabled trap/emulate scountovf in ISA yet. Rely on the SBI PMU snapshot to provide the counter overflow data via the shared memory. In case of sampling event, the host first sets the guest's LCOFI interrupt and injects to the guest via irq filtering mechanism defined in AIA specification. Thus, ssaia must be enabled in the host in order to use perf sampling in the guest. No other AIA dependency w.r.t kernel is required. Reviewed-by: Anup Patel <anup@brainfault.org> Reviewed-by: Andrew Jones <ajones@ventanamicro.com> Signed-off-by: Atish Patra <atishp@rivosinc.com> Link: https://lore.kernel.org/r/20240420151741.962500-15-atishp@rivosinc.com Signed-off-by: Anup Patel <anup@brainfault.org>
1 parent c2f41dd commit 16b0bde

File tree

7 files changed

+93
-8
lines changed

7 files changed

+93
-8
lines changed

arch/riscv/include/asm/csr.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,8 @@
168168
#define VSIP_TO_HVIP_SHIFT (IRQ_VS_SOFT - IRQ_S_SOFT)
169169
#define VSIP_VALID_MASK ((_AC(1, UL) << IRQ_S_SOFT) | \
170170
(_AC(1, UL) << IRQ_S_TIMER) | \
171-
(_AC(1, UL) << IRQ_S_EXT))
171+
(_AC(1, UL) << IRQ_S_EXT) | \
172+
(_AC(1, UL) << IRQ_PMU_OVF))
172173

173174
/* AIA CSR bits */
174175
#define TOPI_IID_SHIFT 16

arch/riscv/include/asm/kvm_vcpu_pmu.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ struct kvm_pmc {
3636
bool started;
3737
/* Monitoring event ID */
3838
unsigned long event_idx;
39+
struct kvm_vcpu *vcpu;
3940
};
4041

4142
/* PMU data structure per vcpu */
@@ -50,6 +51,8 @@ struct kvm_pmu {
5051
bool init_done;
5152
/* Bit map of all the virtual counter used */
5253
DECLARE_BITMAP(pmc_in_use, RISCV_KVM_MAX_COUNTERS);
54+
/* Bit map of all the virtual counter overflown */
55+
DECLARE_BITMAP(pmc_overflown, RISCV_KVM_MAX_COUNTERS);
5356
/* The address of the counter snapshot area (guest physical address) */
5457
gpa_t snapshot_addr;
5558
/* The actual data of the snapshot */

arch/riscv/include/uapi/asm/kvm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ enum KVM_RISCV_ISA_EXT_ID {
167167
KVM_RISCV_ISA_EXT_ZFA,
168168
KVM_RISCV_ISA_EXT_ZTSO,
169169
KVM_RISCV_ISA_EXT_ZACAS,
170+
KVM_RISCV_ISA_EXT_SSCOFPMF,
170171
KVM_RISCV_ISA_EXT_MAX,
171172
};
172173

arch/riscv/kvm/aia.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,9 @@ void kvm_riscv_aia_enable(void)
545545
enable_percpu_irq(hgei_parent_irq,
546546
irq_get_trigger_type(hgei_parent_irq));
547547
csr_set(CSR_HIE, BIT(IRQ_S_GEXT));
548+
/* Enable IRQ filtering for overflow interrupt only if sscofpmf is present */
549+
if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSCOFPMF))
550+
csr_write(CSR_HVIEN, BIT(IRQ_PMU_OVF));
548551
}
549552

550553
void kvm_riscv_aia_disable(void)
@@ -558,6 +561,8 @@ void kvm_riscv_aia_disable(void)
558561
return;
559562
hgctrl = get_cpu_ptr(&aia_hgei);
560563

564+
if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSCOFPMF))
565+
csr_clear(CSR_HVIEN, BIT(IRQ_PMU_OVF));
561566
/* Disable per-CPU SGEI interrupt */
562567
csr_clear(CSR_HIE, BIT(IRQ_S_GEXT));
563568
disable_percpu_irq(hgei_parent_irq);

arch/riscv/kvm/vcpu.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,13 @@ void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu)
373373
}
374374
}
375375

376+
/* Sync up the HVIP.LCOFIP bit changes (only clear) by the guest */
377+
if ((csr->hvip ^ hvip) & (1UL << IRQ_PMU_OVF)) {
378+
if (!(hvip & (1UL << IRQ_PMU_OVF)) &&
379+
!test_and_set_bit(IRQ_PMU_OVF, v->irqs_pending_mask))
380+
clear_bit(IRQ_PMU_OVF, v->irqs_pending);
381+
}
382+
376383
/* Sync-up AIA high interrupts */
377384
kvm_riscv_vcpu_aia_sync_interrupts(vcpu);
378385

@@ -390,7 +397,8 @@ int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
390397
if (irq < IRQ_LOCAL_MAX &&
391398
irq != IRQ_VS_SOFT &&
392399
irq != IRQ_VS_TIMER &&
393-
irq != IRQ_VS_EXT)
400+
irq != IRQ_VS_EXT &&
401+
irq != IRQ_PMU_OVF)
394402
return -EINVAL;
395403

396404
set_bit(irq, vcpu->arch.irqs_pending);
@@ -405,14 +413,15 @@ int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
405413
int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
406414
{
407415
/*
408-
* We only allow VS-mode software, timer, and external
416+
* We only allow VS-mode software, timer, counter overflow and external
409417
* interrupts when irq is one of the local interrupts
410418
* defined by RISC-V privilege specification.
411419
*/
412420
if (irq < IRQ_LOCAL_MAX &&
413421
irq != IRQ_VS_SOFT &&
414422
irq != IRQ_VS_TIMER &&
415-
irq != IRQ_VS_EXT)
423+
irq != IRQ_VS_EXT &&
424+
irq != IRQ_PMU_OVF)
416425
return -EINVAL;
417426

418427
clear_bit(irq, vcpu->arch.irqs_pending);

arch/riscv/kvm/vcpu_onereg.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ static const unsigned long kvm_isa_ext_arr[] = {
3636
/* Multi letter extensions (alphabetically sorted) */
3737
KVM_ISA_EXT_ARR(SMSTATEEN),
3838
KVM_ISA_EXT_ARR(SSAIA),
39+
KVM_ISA_EXT_ARR(SSCOFPMF),
3940
KVM_ISA_EXT_ARR(SSTC),
4041
KVM_ISA_EXT_ARR(SVINVAL),
4142
KVM_ISA_EXT_ARR(SVNAPOT),
@@ -99,6 +100,9 @@ static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext)
99100
switch (ext) {
100101
case KVM_RISCV_ISA_EXT_H:
101102
return false;
103+
case KVM_RISCV_ISA_EXT_SSCOFPMF:
104+
/* Sscofpmf depends on interrupt filtering defined in ssaia */
105+
return __riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSAIA);
102106
case KVM_RISCV_ISA_EXT_V:
103107
return riscv_v_vstate_ctrl_user_allowed();
104108
default:
@@ -116,6 +120,8 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
116120
case KVM_RISCV_ISA_EXT_C:
117121
case KVM_RISCV_ISA_EXT_I:
118122
case KVM_RISCV_ISA_EXT_M:
123+
/* There is not architectural config bit to disable sscofpmf completely */
124+
case KVM_RISCV_ISA_EXT_SSCOFPMF:
119125
case KVM_RISCV_ISA_EXT_SSTC:
120126
case KVM_RISCV_ISA_EXT_SVINVAL:
121127
case KVM_RISCV_ISA_EXT_SVNAPOT:

arch/riscv/kvm/vcpu_pmu.c

Lines changed: 64 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,47 @@ static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ct
230230
return 0;
231231
}
232232

233+
static void kvm_riscv_pmu_overflow(struct perf_event *perf_event,
234+
struct perf_sample_data *data,
235+
struct pt_regs *regs)
236+
{
237+
struct kvm_pmc *pmc = perf_event->overflow_handler_context;
238+
struct kvm_vcpu *vcpu = pmc->vcpu;
239+
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
240+
struct riscv_pmu *rpmu = to_riscv_pmu(perf_event->pmu);
241+
u64 period;
242+
243+
/*
244+
* Stop the event counting by directly accessing the perf_event.
245+
* Otherwise, this needs to deferred via a workqueue.
246+
* That will introduce skew in the counter value because the actual
247+
* physical counter would start after returning from this function.
248+
* It will be stopped again once the workqueue is scheduled
249+
*/
250+
rpmu->pmu.stop(perf_event, PERF_EF_UPDATE);
251+
252+
/*
253+
* The hw counter would start automatically when this function returns.
254+
* Thus, the host may continue to interrupt and inject it to the guest
255+
* even without the guest configuring the next event. Depending on the hardware
256+
* the host may have some sluggishness only if privilege mode filtering is not
257+
* available. In an ideal world, where qemu is not the only capable hardware,
258+
* this can be removed.
259+
* FYI: ARM64 does this way while x86 doesn't do anything as such.
260+
* TODO: Should we keep it for RISC-V ?
261+
*/
262+
period = -(local64_read(&perf_event->count));
263+
264+
local64_set(&perf_event->hw.period_left, 0);
265+
perf_event->attr.sample_period = period;
266+
perf_event->hw.sample_period = period;
267+
268+
set_bit(pmc->idx, kvpmu->pmc_overflown);
269+
kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_PMU_OVF);
270+
271+
rpmu->pmu.start(perf_event, PERF_EF_RELOAD);
272+
}
273+
233274
static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr,
234275
unsigned long flags, unsigned long eidx,
235276
unsigned long evtdata)
@@ -249,7 +290,7 @@ static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_att
249290
*/
250291
attr->sample_period = kvm_pmu_get_sample_period(pmc);
251292

252-
event = perf_event_create_kernel_counter(attr, -1, current, NULL, pmc);
293+
event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc);
253294
if (IS_ERR(event)) {
254295
pr_err("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event));
255296
return PTR_ERR(event);
@@ -443,6 +484,8 @@ int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base,
443484
pmc_index = i + ctr_base;
444485
if (!test_bit(pmc_index, kvpmu->pmc_in_use))
445486
continue;
487+
/* The guest started the counter again. Reset the overflow status */
488+
clear_bit(pmc_index, kvpmu->pmc_overflown);
446489
pmc = &kvpmu->pmc[pmc_index];
447490
if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) {
448491
pmc->counter_val = ival;
@@ -546,14 +589,29 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
546589
else if (pmc->perf_event)
547590
pmc->counter_val += perf_event_read_value(pmc->perf_event,
548591
&enabled, &running);
549-
/* TODO: Add counter overflow support when sscofpmf support is added */
592+
/*
593+
* The counter and overflow indicies in the snapshot region are w.r.to
594+
* cbase. Modify the set bit in the counter mask instead of the pmc_index
595+
* which indicates the absolute counter index.
596+
*/
597+
if (test_bit(pmc_index, kvpmu->pmc_overflown))
598+
kvpmu->sdata->ctr_overflow_mask |= BIT(i);
550599
kvpmu->sdata->ctr_values[i] = pmc->counter_val;
551600
shmem_needs_update = true;
552601
}
553602

554603
if (flags & SBI_PMU_STOP_FLAG_RESET) {
555604
pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
556605
clear_bit(pmc_index, kvpmu->pmc_in_use);
606+
clear_bit(pmc_index, kvpmu->pmc_overflown);
607+
if (snap_flag_set) {
608+
/*
609+
* Only clear the given counter as the caller is responsible to
610+
* validate both the overflow mask and configured counters.
611+
*/
612+
kvpmu->sdata->ctr_overflow_mask &= ~BIT(i);
613+
shmem_needs_update = true;
614+
}
557615
}
558616
}
559617

@@ -703,6 +761,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
703761
pmc = &kvpmu->pmc[i];
704762
pmc->idx = i;
705763
pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
764+
pmc->vcpu = vcpu;
706765
if (i < kvpmu->num_hw_ctrs) {
707766
pmc->cinfo.type = SBI_PMU_CTR_TYPE_HW;
708767
if (i < 3)
@@ -735,13 +794,14 @@ void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu)
735794
if (!kvpmu)
736795
return;
737796

738-
for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_MAX_COUNTERS) {
797+
for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS) {
739798
pmc = &kvpmu->pmc[i];
740799
pmc->counter_val = 0;
741800
kvm_pmu_release_perf_event(pmc);
742801
pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
743802
}
744-
bitmap_zero(kvpmu->pmc_in_use, RISCV_MAX_COUNTERS);
803+
bitmap_zero(kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS);
804+
bitmap_zero(kvpmu->pmc_overflown, RISCV_KVM_MAX_COUNTERS);
745805
memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
746806
kvm_pmu_clear_snapshot_area(vcpu);
747807
}

0 commit comments

Comments
 (0)