@@ -230,6 +230,47 @@ static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ct
230230 return 0 ;
231231}
232232
233+ static void kvm_riscv_pmu_overflow (struct perf_event * perf_event ,
234+ struct perf_sample_data * data ,
235+ struct pt_regs * regs )
236+ {
237+ struct kvm_pmc * pmc = perf_event -> overflow_handler_context ;
238+ struct kvm_vcpu * vcpu = pmc -> vcpu ;
239+ struct kvm_pmu * kvpmu = vcpu_to_pmu (vcpu );
240+ struct riscv_pmu * rpmu = to_riscv_pmu (perf_event -> pmu );
241+ u64 period ;
242+
243+ /*
244+ * Stop the event counting by directly accessing the perf_event.
245+ * Otherwise, this needs to deferred via a workqueue.
246+ * That will introduce skew in the counter value because the actual
247+ * physical counter would start after returning from this function.
248+ * It will be stopped again once the workqueue is scheduled
249+ */
250+ rpmu -> pmu .stop (perf_event , PERF_EF_UPDATE );
251+
252+ /*
253+ * The hw counter would start automatically when this function returns.
254+ * Thus, the host may continue to interrupt and inject it to the guest
255+ * even without the guest configuring the next event. Depending on the hardware
256+ * the host may have some sluggishness only if privilege mode filtering is not
257+ * available. In an ideal world, where qemu is not the only capable hardware,
258+ * this can be removed.
259+ * FYI: ARM64 does this way while x86 doesn't do anything as such.
260+ * TODO: Should we keep it for RISC-V ?
261+ */
262+ period = - (local64_read (& perf_event -> count ));
263+
264+ local64_set (& perf_event -> hw .period_left , 0 );
265+ perf_event -> attr .sample_period = period ;
266+ perf_event -> hw .sample_period = period ;
267+
268+ set_bit (pmc -> idx , kvpmu -> pmc_overflown );
269+ kvm_riscv_vcpu_set_interrupt (vcpu , IRQ_PMU_OVF );
270+
271+ rpmu -> pmu .start (perf_event , PERF_EF_RELOAD );
272+ }
273+
233274static long kvm_pmu_create_perf_event (struct kvm_pmc * pmc , struct perf_event_attr * attr ,
234275 unsigned long flags , unsigned long eidx ,
235276 unsigned long evtdata )
@@ -249,7 +290,7 @@ static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_att
249290 */
250291 attr -> sample_period = kvm_pmu_get_sample_period (pmc );
251292
252- event = perf_event_create_kernel_counter (attr , -1 , current , NULL , pmc );
293+ event = perf_event_create_kernel_counter (attr , -1 , current , kvm_riscv_pmu_overflow , pmc );
253294 if (IS_ERR (event )) {
254295 pr_err ("kvm pmu event creation failed for eidx %lx: %ld\n" , eidx , PTR_ERR (event ));
255296 return PTR_ERR (event );
@@ -443,6 +484,8 @@ int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base,
443484 pmc_index = i + ctr_base ;
444485 if (!test_bit (pmc_index , kvpmu -> pmc_in_use ))
445486 continue ;
487+ /* The guest started the counter again. Reset the overflow status */
488+ clear_bit (pmc_index , kvpmu -> pmc_overflown );
446489 pmc = & kvpmu -> pmc [pmc_index ];
447490 if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE ) {
448491 pmc -> counter_val = ival ;
@@ -546,14 +589,29 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
546589 else if (pmc -> perf_event )
547590 pmc -> counter_val += perf_event_read_value (pmc -> perf_event ,
548591 & enabled , & running );
549- /* TODO: Add counter overflow support when sscofpmf support is added */
592+ /*
593+ * The counter and overflow indicies in the snapshot region are w.r.to
594+ * cbase. Modify the set bit in the counter mask instead of the pmc_index
595+ * which indicates the absolute counter index.
596+ */
597+ if (test_bit (pmc_index , kvpmu -> pmc_overflown ))
598+ kvpmu -> sdata -> ctr_overflow_mask |= BIT (i );
550599 kvpmu -> sdata -> ctr_values [i ] = pmc -> counter_val ;
551600 shmem_needs_update = true;
552601 }
553602
554603 if (flags & SBI_PMU_STOP_FLAG_RESET ) {
555604 pmc -> event_idx = SBI_PMU_EVENT_IDX_INVALID ;
556605 clear_bit (pmc_index , kvpmu -> pmc_in_use );
606+ clear_bit (pmc_index , kvpmu -> pmc_overflown );
607+ if (snap_flag_set ) {
608+ /*
609+ * Only clear the given counter as the caller is responsible to
610+ * validate both the overflow mask and configured counters.
611+ */
612+ kvpmu -> sdata -> ctr_overflow_mask &= ~BIT (i );
613+ shmem_needs_update = true;
614+ }
557615 }
558616 }
559617
@@ -703,6 +761,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
703761 pmc = & kvpmu -> pmc [i ];
704762 pmc -> idx = i ;
705763 pmc -> event_idx = SBI_PMU_EVENT_IDX_INVALID ;
764+ pmc -> vcpu = vcpu ;
706765 if (i < kvpmu -> num_hw_ctrs ) {
707766 pmc -> cinfo .type = SBI_PMU_CTR_TYPE_HW ;
708767 if (i < 3 )
@@ -735,13 +794,14 @@ void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu)
735794 if (!kvpmu )
736795 return ;
737796
738- for_each_set_bit (i , kvpmu -> pmc_in_use , RISCV_MAX_COUNTERS ) {
797+ for_each_set_bit (i , kvpmu -> pmc_in_use , RISCV_KVM_MAX_COUNTERS ) {
739798 pmc = & kvpmu -> pmc [i ];
740799 pmc -> counter_val = 0 ;
741800 kvm_pmu_release_perf_event (pmc );
742801 pmc -> event_idx = SBI_PMU_EVENT_IDX_INVALID ;
743802 }
744- bitmap_zero (kvpmu -> pmc_in_use , RISCV_MAX_COUNTERS );
803+ bitmap_zero (kvpmu -> pmc_in_use , RISCV_KVM_MAX_COUNTERS );
804+ bitmap_zero (kvpmu -> pmc_overflown , RISCV_KVM_MAX_COUNTERS );
745805 memset (& kvpmu -> fw_event , 0 , SBI_PMU_FW_MAX * sizeof (struct kvm_fw_event ));
746806 kvm_pmu_clear_snapshot_area (vcpu );
747807}
0 commit comments