Skip to content

Commit c9aaa89

Browse files
Glauber Costaavikivity
authored andcommitted
KVM: Steal time implementation
To implement steal time, we need the hypervisor to pass the guest information about how much time was spent running other processes outside the VM, while the vcpu had meaningful work to do - halt time does not count. This information is acquired through the run_delay field of delayacct/schedstats infrastructure, that counts time spent in a runqueue but not running. Steal time is a per-cpu information, so the traditional MSR-based infrastructure is used. A new msr, KVM_MSR_STEAL_TIME, holds the memory area address containing information about steal time This patch contains the hypervisor part of the steal time infrasructure, and can be backported independently of the guest portion. [avi, yongjie: export delayacct_on, to avoid build failures in some configs] Signed-off-by: Glauber Costa <glommer@redhat.com> Tested-by: Eric B Munson <emunson@mgebm.net> CC: Rik van Riel <riel@redhat.com> CC: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> CC: Peter Zijlstra <peterz@infradead.org> CC: Anthony Liguori <aliguori@us.ibm.com> Signed-off-by: Yongjie Ren <yongjie.ren@intel.com> Signed-off-by: Avi Kivity <avi@redhat.com>
1 parent 9ddabbe commit c9aaa89

File tree

6 files changed

+89
-2
lines changed

6 files changed

+89
-2
lines changed

arch/x86/include/asm/kvm_host.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,15 @@ struct kvm_vcpu_arch {
389389
unsigned int hw_tsc_khz;
390390
unsigned int time_offset;
391391
struct page *time_page;
392+
393+
struct {
394+
u64 msr_val;
395+
u64 last_steal;
396+
u64 accum_steal;
397+
struct gfn_to_hva_cache stime;
398+
struct kvm_steal_time steal;
399+
} st;
400+
392401
u64 last_guest_tsc;
393402
u64 last_kernel_ns;
394403
u64 last_tsc_nsec;

arch/x86/include/asm/kvm_para.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ struct kvm_steal_time {
4545
__u32 pad[12];
4646
};
4747

48+
#define KVM_STEAL_ALIGNMENT_BITS 5
49+
#define KVM_STEAL_VALID_BITS ((-1ULL << (KVM_STEAL_ALIGNMENT_BITS + 1)))
50+
#define KVM_STEAL_RESERVED_MASK (((1 << KVM_STEAL_ALIGNMENT_BITS) - 1 ) << 1)
51+
4852
#define KVM_MAX_MMU_OP_BATCH 32
4953

5054
#define KVM_ASYNC_PF_ENABLED (1 << 0)

arch/x86/kvm/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ config KVM
3131
select KVM_ASYNC_PF
3232
select USER_RETURN_NOTIFIER
3333
select KVM_MMIO
34+
select TASK_DELAY_ACCT
3435
---help---
3536
Support hosting fully virtualized guest machines using hardware
3637
virtualization extensions. You will need a fairly recent

arch/x86/kvm/x86.c

Lines changed: 72 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -808,12 +808,12 @@ EXPORT_SYMBOL_GPL(kvm_get_dr);
808808
* kvm-specific. Those are put in the beginning of the list.
809809
*/
810810

811-
#define KVM_SAVE_MSRS_BEGIN 8
811+
#define KVM_SAVE_MSRS_BEGIN 9
812812
static u32 msrs_to_save[] = {
813813
MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
814814
MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
815815
HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
816-
HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN,
816+
HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
817817
MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
818818
MSR_STAR,
819819
#ifdef CONFIG_X86_64
@@ -1488,6 +1488,35 @@ static void kvmclock_reset(struct kvm_vcpu *vcpu)
14881488
}
14891489
}
14901490

1491+
static void accumulate_steal_time(struct kvm_vcpu *vcpu)
1492+
{
1493+
u64 delta;
1494+
1495+
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
1496+
return;
1497+
1498+
delta = current->sched_info.run_delay - vcpu->arch.st.last_steal;
1499+
vcpu->arch.st.last_steal = current->sched_info.run_delay;
1500+
vcpu->arch.st.accum_steal = delta;
1501+
}
1502+
1503+
static void record_steal_time(struct kvm_vcpu *vcpu)
1504+
{
1505+
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
1506+
return;
1507+
1508+
if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
1509+
&vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
1510+
return;
1511+
1512+
vcpu->arch.st.steal.steal += vcpu->arch.st.accum_steal;
1513+
vcpu->arch.st.steal.version += 2;
1514+
vcpu->arch.st.accum_steal = 0;
1515+
1516+
kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
1517+
&vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
1518+
}
1519+
14911520
int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
14921521
{
14931522
switch (msr) {
@@ -1570,6 +1599,33 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
15701599
if (kvm_pv_enable_async_pf(vcpu, data))
15711600
return 1;
15721601
break;
1602+
case MSR_KVM_STEAL_TIME:
1603+
1604+
if (unlikely(!sched_info_on()))
1605+
return 1;
1606+
1607+
if (data & KVM_STEAL_RESERVED_MASK)
1608+
return 1;
1609+
1610+
if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime,
1611+
data & KVM_STEAL_VALID_BITS))
1612+
return 1;
1613+
1614+
vcpu->arch.st.msr_val = data;
1615+
1616+
if (!(data & KVM_MSR_ENABLED))
1617+
break;
1618+
1619+
vcpu->arch.st.last_steal = current->sched_info.run_delay;
1620+
1621+
preempt_disable();
1622+
accumulate_steal_time(vcpu);
1623+
preempt_enable();
1624+
1625+
kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
1626+
1627+
break;
1628+
15731629
case MSR_IA32_MCG_CTL:
15741630
case MSR_IA32_MCG_STATUS:
15751631
case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
@@ -1855,6 +1911,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
18551911
case MSR_KVM_ASYNC_PF_EN:
18561912
data = vcpu->arch.apf.msr_val;
18571913
break;
1914+
case MSR_KVM_STEAL_TIME:
1915+
data = vcpu->arch.st.msr_val;
1916+
break;
18581917
case MSR_IA32_P5_MC_ADDR:
18591918
case MSR_IA32_P5_MC_TYPE:
18601919
case MSR_IA32_MCG_CAP:
@@ -2166,6 +2225,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
21662225
kvm_migrate_timers(vcpu);
21672226
vcpu->cpu = cpu;
21682227
}
2228+
2229+
accumulate_steal_time(vcpu);
2230+
kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
21692231
}
21702232

21712233
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -2487,6 +2549,10 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
24872549
(1 << KVM_FEATURE_CLOCKSOURCE2) |
24882550
(1 << KVM_FEATURE_ASYNC_PF) |
24892551
(1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
2552+
2553+
if (sched_info_on())
2554+
entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
2555+
24902556
entry->ebx = 0;
24912557
entry->ecx = 0;
24922558
entry->edx = 0;
@@ -5470,6 +5536,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
54705536
r = 1;
54715537
goto out;
54725538
}
5539+
if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
5540+
record_steal_time(vcpu);
5541+
54735542
}
54745543

54755544
r = kvm_mmu_reload(vcpu);
@@ -6206,6 +6275,7 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
62066275

62076276
kvm_make_request(KVM_REQ_EVENT, vcpu);
62086277
vcpu->arch.apf.msr_val = 0;
6278+
vcpu->arch.st.msr_val = 0;
62096279

62106280
kvmclock_reset(vcpu);
62116281

include/linux/kvm_host.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
#define KVM_REQ_DEACTIVATE_FPU 10
4848
#define KVM_REQ_EVENT 11
4949
#define KVM_REQ_APF_HALT 12
50+
#define KVM_REQ_STEAL_UPDATE 13
5051

5152
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
5253

kernel/delayacct.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,10 @@
1919
#include <linux/time.h>
2020
#include <linux/sysctl.h>
2121
#include <linux/delayacct.h>
22+
#include <linux/module.h>
2223

2324
int delayacct_on __read_mostly = 1; /* Delay accounting turned on/off */
25+
EXPORT_SYMBOL_GPL(delayacct_on);
2426
struct kmem_cache *delayacct_cache;
2527

2628
static int __init delayacct_setup_disable(char *str)

0 commit comments

Comments
 (0)