From 7fe864dc942c041cc4f56e287c4025d54a8e6c1e Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 2 Jun 2014 18:34:03 +0300 Subject: [PATCH 001/104] KVM: x86: Mark VEX-prefix instructions emulation as unimplemented Currently the emulator does not recognize vex-prefix instructions. However, it may incorrectly decode lgdt/lidt instructions and try to execute them. This patch returns unhandlable error on their emulation. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index e4e833d3d7d7b..8ec4a3ebf4033 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4314,6 +4314,13 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) if (ctxt->d & ModRM) ctxt->modrm = insn_fetch(u8, ctxt); + /* vex-prefix instructions are not implemented */ + if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) && + (mode == X86EMUL_MODE_PROT64 || + (mode >= X86EMUL_MODE_PROT16 && (ctxt->modrm & 0x80)))) { + ctxt->d = NotImpl; + } + while (ctxt->d & GroupMask) { switch (ctxt->d & GroupMask) { case Group: From e37a75a13cdae5deaa2ea2cbf8d55b5dd08638b6 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 2 Jun 2014 18:34:04 +0300 Subject: [PATCH 002/104] KVM: x86: Emulator ignores LDTR/TR extended base on LLDT/LTR The current implementation ignores the LDTR/TR base high 32-bits on long-mode. As a result the loaded segment descriptor may be incorrect. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 8ec4a3ebf4033..136088fb038b7 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1422,6 +1422,7 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, ulong desc_addr; int ret; u16 dummy; + u32 base3 = 0; memset(&seg_desc, 0, sizeof seg_desc); @@ -1538,9 +1539,14 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, ret = write_segment_descriptor(ctxt, selector, &seg_desc); if (ret != X86EMUL_CONTINUE) return ret; + } else if (ctxt->mode == X86EMUL_MODE_PROT64) { + ret = ctxt->ops->read_std(ctxt, desc_addr+8, &base3, + sizeof(base3), &ctxt->exception); + if (ret != X86EMUL_CONTINUE) + return ret; } load: - ctxt->ops->set_segment(ctxt, selector, &seg_desc, 0, seg); + ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg); return X86EMUL_CONTINUE; exception: emulate_exception(ctxt, err_vec, err_code, true); From 2eedcac8a97cef43c9c5236398fc8c9d0fd9cc0c Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 2 Jun 2014 18:34:05 +0300 Subject: [PATCH 003/104] KVM: x86: Loading segments on 64-bit mode may be wrong The current emulator implementation ignores the high 32 bits of the base in long-mode. During segment load from the LDT, the base of the LDT is calculated incorrectly and may cause the wrong segment to be loaded. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 136088fb038b7..7e4a45cab4001 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1358,17 +1358,19 @@ static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, u16 selector, struct desc_ptr *dt) { const struct x86_emulate_ops *ops = ctxt->ops; + u32 base3 = 0; if (selector & 1 << 2) { struct desc_struct desc; u16 sel; memset (dt, 0, sizeof *dt); - if (!ops->get_segment(ctxt, &sel, &desc, NULL, VCPU_SREG_LDTR)) + if (!ops->get_segment(ctxt, &sel, &desc, &base3, + VCPU_SREG_LDTR)) return; dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ - dt->address = get_desc_base(&desc); + dt->address = get_desc_base(&desc) | ((u64)base3 << 32); } else ops->get_gdt(ctxt, dt); } From 606b1c3e87597c2d6c9f3eb833a7251262390295 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 2 Jun 2014 18:34:06 +0300 Subject: [PATCH 004/104] KVM: x86: sgdt and sidt are not privilaged The SGDT and SIDT instructions are not privilaged, i.e. they can be executed with CPL>0. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 7e4a45cab4001..a16bf225cab01 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3642,8 +3642,8 @@ static const struct opcode group6[] = { }; static const struct group_dual group7 = { { - II(Mov | DstMem | Priv, em_sgdt, sgdt), - II(Mov | DstMem | Priv, em_sidt, sidt), + II(Mov | DstMem, em_sgdt, sgdt), + II(Mov | DstMem, em_sidt, sidt), II(SrcMem | Priv, em_lgdt, lgdt), II(SrcMem | Priv, em_lidt, lidt), II(SrcNone | DstMem | Mov, em_smsw, smsw), N, From 37c564f2854bf75969d0ac26e03f5cf2bb7d639f Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 2 Jun 2014 18:34:07 +0300 Subject: [PATCH 005/104] KVM: x86: cmpxchg emulation should compare in reverse order The current implementation of cmpxchg does not update the flags correctly, since the accumulator should be compared with the destination and not the other way around. The current implementation does not update the flags correctly. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a16bf225cab01..4cb0da6602839 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2052,8 +2052,10 @@ static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt) static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) { /* Save real source value, then compare EAX against destination. */ + ctxt->dst.orig_val = ctxt->dst.val; + ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX); ctxt->src.orig_val = ctxt->src.val; - ctxt->src.val = reg_read(ctxt, VCPU_REGS_RAX); + ctxt->src.val = ctxt->dst.orig_val; fastop(ctxt, em_cmp); if (ctxt->eflags & EFLG_ZF) { @@ -2063,6 +2065,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) /* Failure: write the value we saw to EAX. */ ctxt->dst.type = OP_REG; ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX); + ctxt->dst.val = ctxt->dst.orig_val; } return X86EMUL_CONTINUE; } From 3b32004a66e96e17d2a031c08d3304245c506dfc Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 2 Jun 2014 18:34:08 +0300 Subject: [PATCH 006/104] KVM: x86: movnti minimum op size of 32-bit is not kept If the operand-size prefix (0x66) is used in 64-bit mode, the emulator would assume the destination operand is 64-bit, when it should be 32-bit. Reminder: movnti does not support 16-bit operands and its default operand size is 32-bit. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 4cb0da6602839..be3f7645e39d7 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4836,8 +4836,8 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) break; case 0xc3: /* movnti */ ctxt->dst.bytes = ctxt->op_bytes; - ctxt->dst.val = (ctxt->op_bytes == 4) ? (u32) ctxt->src.val : - (u64) ctxt->src.val; + ctxt->dst.val = (ctxt->op_bytes == 8) ? (u64) ctxt->src.val : + (u32) ctxt->src.val; break; default: goto cannot_emulate; From 67f4d4288c353734d29c45f6725971c71af96791 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 2 Jun 2014 18:34:09 +0300 Subject: [PATCH 007/104] KVM: x86: rdpmc emulation checks the counter incorrectly The rdpmc emulation checks that the counter (ECX) is not higher than 2, without taking into considerations bits 30:31 role (e.g., bit 30 marks whether the counter is fixed). The fix uses the pmu information for checking the validity of the pmu counter. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_emulate.h | 1 + arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/emulate.c | 2 +- arch/x86/kvm/pmu.c | 9 +++++++++ arch/x86/kvm/x86.c | 7 +++++++ 5 files changed, 19 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index a04fe4eb237d3..ffa2671a7f2f8 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -194,6 +194,7 @@ struct x86_emulate_ops { int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); + int (*check_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc); int (*read_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc, u64 *pdata); void (*halt)(struct x86_emulate_ctxt *ctxt); void (*wbinvd)(struct x86_emulate_ctxt *ctxt); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 49314155b66c8..63e020be3da78 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1070,6 +1070,7 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu); bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr); int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data); int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info); +int kvm_pmu_check_pmc(struct kvm_vcpu *vcpu, unsigned pmc); int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data); void kvm_handle_pmu_event(struct kvm_vcpu *vcpu); void kvm_deliver_pmi(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index be3f7645e39d7..3da8d82acb319 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3507,7 +3507,7 @@ static int check_rdpmc(struct x86_emulate_ctxt *ctxt) u64 rcx = reg_read(ctxt, VCPU_REGS_RCX); if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) || - (rcx > 3)) + ctxt->ops->check_pmc(ctxt, rcx)) return emulate_gp(ctxt, 0); return X86EMUL_CONTINUE; diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index cbecaa90399c1..3dd6accb64ec1 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -428,6 +428,15 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; } +int kvm_pmu_check_pmc(struct kvm_vcpu *vcpu, unsigned pmc) +{ + struct kvm_pmu *pmu = &vcpu->arch.pmu; + bool fixed = pmc & (1u << 30); + pmc &= ~(3u << 30); + return (!fixed && pmc >= pmu->nr_arch_gp_counters) || + (fixed && pmc >= pmu->nr_arch_fixed_counters); +} + int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data) { struct kvm_pmu *pmu = &vcpu->arch.pmu; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f32a02578c0d1..451d6acea808e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4762,6 +4762,12 @@ static int emulator_set_msr(struct x86_emulate_ctxt *ctxt, return kvm_set_msr(emul_to_vcpu(ctxt), &msr); } +static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt, + u32 pmc) +{ + return kvm_pmu_check_pmc(emul_to_vcpu(ctxt), pmc); +} + static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt, u32 pmc, u64 *pdata) { @@ -4838,6 +4844,7 @@ static const struct x86_emulate_ops emulate_ops = { .set_dr = emulator_set_dr, .set_msr = emulator_set_msr, .get_msr = emulator_get_msr, + .check_pmc = emulator_check_pmc, .read_pmc = emulator_read_pmc, .halt = emulator_halt, .wbinvd = emulator_wbinvd, From aaa05f2437b9450f30b301db962ec4d45ec90fbb Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 2 Jun 2014 18:34:10 +0300 Subject: [PATCH 008/104] KVM: x86: Return error on cmpxchg16b emulation cmpxchg16b is currently unimplemented in the emulator. The least we can do is return error upon the emulation of this instruction. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 3da8d82acb319..a151f8d24a1df 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1999,6 +1999,9 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt) { u64 old = ctxt->dst.orig_val64; + if (ctxt->dst.bytes == 16) + return X86EMUL_UNHANDLEABLE; + if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) || ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) { *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0); @@ -4077,7 +4080,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, op->orig_val = op->val; break; case OpMem64: - ctxt->memop.bytes = 8; + ctxt->memop.bytes = (ctxt->op_bytes == 8) ? 16 : 8; goto mem_common; case OpAcc: op->type = OP_REG; From 32e94d0696c26c6ba4f3ff53e70f6e0e825979bc Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 2 Jun 2014 18:34:11 +0300 Subject: [PATCH 009/104] KVM: x86: smsw emulation is incorrect in 64-bit mode In 64-bit mode, when the destination is a register, the assignment is done according to the operand size. Otherwise (memory operand or no 64-bit mode), a 16-bit assignment is performed. Currently, 16-bit assignment is always done to the destination. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a151f8d24a1df..9b5d97db76313 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3235,7 +3235,8 @@ static int em_lidt(struct x86_emulate_ctxt *ctxt) static int em_smsw(struct x86_emulate_ctxt *ctxt) { - ctxt->dst.bytes = 2; + if (ctxt->dst.type == OP_MEM) + ctxt->dst.bytes = 2; ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0); return X86EMUL_CONTINUE; } From a5457e7bcf9a76ec5c2de5d311d9b0d3b724edc6 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 5 Jun 2014 17:29:34 +0200 Subject: [PATCH 010/104] KVM: emulate: POP SS triggers a MOV SS shadow too We did not do that when interruptibility was added to the emulator, because at the time pop to segment was not implemented. Now it is, add it. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 9b5d97db76313..bc670675223de 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1762,6 +1762,9 @@ static int em_pop_sreg(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; + if (ctxt->modrm_reg == VCPU_SREG_SS) + ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS; + rc = load_segment_descriptor(ctxt, (u16)selector, seg); return rc; } From 968889771749d8e730d794deed2bd2e363a98a54 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 1 Apr 2014 14:54:19 +0200 Subject: [PATCH 011/104] KVM: emulate: simplify BitOp handling Memory is always the destination for BitOp instructions. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index bc670675223de..ea976e478dcab 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4079,7 +4079,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, mem_common: *op = ctxt->memop; ctxt->memopp = op; - if ((ctxt->d & BitOp) && op == &ctxt->dst) + if (ctxt->d & BitOp) fetch_bit_operand(ctxt); op->orig_val = op->val; break; From bdc907222c5e4edd848da0c031deb55b59f1cf9a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 6 May 2014 14:03:29 +0200 Subject: [PATCH 012/104] KVM: emulate: fix harmless typo in MMX decoding It was using the wrong member of the union. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index ea976e478dcab..8a1796b1eef25 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1093,7 +1093,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, if (ctxt->d & Mmx) { op->type = OP_MM; op->bytes = 8; - op->addr.xmm = ctxt->modrm_rm & 7; + op->addr.mm = ctxt->modrm_rm & 7; return rc; } fetch_register_operand(op); From bc39c4db7110f88f338cbbabe53d3e43c7400a59 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Sat, 14 Jun 2014 23:44:29 +0200 Subject: [PATCH 013/104] arch/x86/kvm/vmx.c: use PAGE_ALIGNED instead of IS_ALIGNED(PAGE_SIZE use mm.h definition Cc: Gleb Natapov Cc: Paolo Bonzini Signed-off-by: Fabian Frederick Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 801332edefc3c..4f84be0d2d1e1 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5918,7 +5918,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason, * which replaces physical address width with 32 * */ - if (!IS_ALIGNED(vmptr, PAGE_SIZE) || (vmptr >> maxphyaddr)) { + if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) { nested_vmx_failInvalid(vcpu); skip_emulated_instruction(vcpu); return 1; @@ -5936,7 +5936,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason, vmx->nested.vmxon_ptr = vmptr; break; case EXIT_REASON_VMCLEAR: - if (!IS_ALIGNED(vmptr, PAGE_SIZE) || (vmptr >> maxphyaddr)) { + if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) { nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS); skip_emulated_instruction(vcpu); @@ -5951,7 +5951,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason, } break; case EXIT_REASON_VMPTRLD: - if (!IS_ALIGNED(vmptr, PAGE_SIZE) || (vmptr >> maxphyaddr)) { + if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) { nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS); skip_emulated_instruction(vcpu); @@ -8113,14 +8113,14 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) } if ((vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_MSR_BITMAPS) && - !IS_ALIGNED(vmcs12->msr_bitmap, PAGE_SIZE)) { + !PAGE_ALIGNED(vmcs12->msr_bitmap)) { /*TODO: Also verify bits beyond physical address width are 0*/ nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); return 1; } if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) && - !IS_ALIGNED(vmcs12->apic_access_addr, PAGE_SIZE)) { + !PAGE_ALIGNED(vmcs12->apic_access_addr)) { /*TODO: Also verify bits beyond physical address width are 0*/ nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); return 1; From 7dec5603b6b8dc4c3e1c65d318bd2a5a8c62a424 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Sun, 15 Jun 2014 16:12:57 +0300 Subject: [PATCH 014/104] KVM: x86: bit-ops emulation ignores offset on 64-bit The current emulation of bit operations ignores the offset from the destination on 64-bit target memory operands. This patch fixes this behavior. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 8a1796b1eef25..439a3286406a4 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1220,12 +1220,14 @@ static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt) long sv = 0, mask; if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) { - mask = ~(ctxt->dst.bytes * 8 - 1); + mask = ~((long)ctxt->dst.bytes * 8 - 1); if (ctxt->src.bytes == 2) sv = (s16)ctxt->src.val & (s16)mask; else if (ctxt->src.bytes == 4) sv = (s32)ctxt->src.val & (s32)mask; + else + sv = (s64)ctxt->src.val & (s64)mask; ctxt->dst.addr.mem.ea += (sv >> 3); } From ee212297cd425620867d4398d55d068c4203768c Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Sun, 15 Jun 2014 16:12:58 +0300 Subject: [PATCH 015/104] KVM: x86: Wrong emulation on 'xadd X, X' The emulator does not emulate the xadd instruction correctly if the two operands are the same. In this (unlikely) situation the result should be the sum of X and X (2X) when it is currently X. The solution is to first perform writeback to the source, before writing to the destination. The only instruction which should be affected is xadd, as the other instructions that perform writeback to the source use the extended accumlator (e.g., RAX:RDX). Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 439a3286406a4..0d5ecbd458824 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4736,17 +4736,17 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) goto done; writeback: - if (!(ctxt->d & NoWrite)) { - rc = writeback(ctxt, &ctxt->dst); - if (rc != X86EMUL_CONTINUE) - goto done; - } if (ctxt->d & SrcWrite) { BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR); rc = writeback(ctxt, &ctxt->src); if (rc != X86EMUL_CONTINUE) goto done; } + if (!(ctxt->d & NoWrite)) { + rc = writeback(ctxt, &ctxt->dst); + if (rc != X86EMUL_CONTINUE) + goto done; + } /* * restore dst type in case the decoding will be reused From 9e8919ae793f4edfaa29694a70f71a515ae9942a Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Sun, 15 Jun 2014 16:12:59 +0300 Subject: [PATCH 016/104] KVM: x86: Inter-privilege level ret emulation is not implemeneted Return unhandlable error on inter-privilege level ret instruction. This is since the current emulation does not check the privilege level correctly when loading the CS, and does not pop RSP/SS as needed. Cc: stable@vger.kernel.org Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 0d5ecbd458824..27ce38693f096 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2033,6 +2033,7 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) { int rc; unsigned long cs; + int cpl = ctxt->ops->cpl(ctxt); rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes); if (rc != X86EMUL_CONTINUE) @@ -2042,6 +2043,9 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) rc = emulate_pop(ctxt, &cs, ctxt->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; + /* Outer-privilege level return is not implemented */ + if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl) + return X86EMUL_UNHANDLEABLE; rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS); return rc; } From 140bad89fd25db1aab60f80ed7874e9a9bdbae3b Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Sun, 15 Jun 2014 16:13:00 +0300 Subject: [PATCH 017/104] KVM: x86: emulation of dword cmov on long-mode should clear [63:32] Even if the condition of cmov is not satisfied, bits[63:32] should be cleared. This is clearly stated in Intel's CMOVcc documentation. The solution is to reassign the destination onto itself if the condition is unsatisfied. For that matter the original destination value needs to be read. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 27ce38693f096..6f09b2e555ef1 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3923,7 +3923,7 @@ static const struct opcode twobyte_table[256] = { N, N, N, N, N, N, N, N, N, N, /* 0x40 - 0x4F */ - X16(D(DstReg | SrcMem | ModRM | Mov)), + X16(D(DstReg | SrcMem | ModRM)), /* 0x50 - 0x5F */ N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, /* 0x60 - 0x6F */ @@ -4824,8 +4824,10 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val); break; case 0x40 ... 0x4f: /* cmov */ - ctxt->dst.val = ctxt->dst.orig_val = ctxt->src.val; - if (!test_cc(ctxt->b, ctxt->eflags)) + if (test_cc(ctxt->b, ctxt->eflags)) + ctxt->dst.val = ctxt->src.val; + else if (ctxt->mode != X86EMUL_MODE_PROT64 || + ctxt->op_bytes != 4) ctxt->dst.type = OP_NONE; /* no writeback */ break; case 0x80 ... 0x8f: /* jnz rel, etc*/ From a825f5cc4a8455663562809748240169cb9bc2c0 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Sun, 15 Jun 2014 16:13:01 +0300 Subject: [PATCH 018/104] KVM: x86: NOP emulation clears (incorrectly) the high 32-bits of RAX On long-mode the current NOP (0x90) emulation still writes back to RAX. As a result, EAX is zero-extended and the high 32-bits of RAX are cleared. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 6f09b2e555ef1..84dc4ba0364da 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4688,8 +4688,9 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) break; case 0x90 ... 0x97: /* nop / xchg reg, rax */ if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX)) - break; - rc = em_xchg(ctxt); + ctxt->dst.type = OP_NONE; + else + rc = em_xchg(ctxt); break; case 0x98: /* cbw/cwde/cdqe */ switch (ctxt->op_bytes) { From e4aa5288ff07766d101751de9a8420d666c61735 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 12 Jun 2014 19:40:32 +0200 Subject: [PATCH 019/104] KVM: x86: Fix constant value of VM_{EXIT_SAVE,ENTRY_LOAD}_DEBUG_CONTROLS The spec says those controls are at bit position 2 - makes 4 as value. The impact of this mistake is effectively zero as we only use them to ensure that these features are set at position 2 (or, previously, 1) in MSR_IA32_VMX_{EXIT,ENTRY}_CTLS - which is and will be always true according to the spec. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/vmx.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 7004d21e62190..d989829d33043 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -76,7 +76,7 @@ #define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016 -#define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000002 +#define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000004 #define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 #define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000 #define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 @@ -89,7 +89,7 @@ #define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff -#define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000002 +#define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000004 #define VM_ENTRY_IA32E_MODE 0x00000200 #define VM_ENTRY_SMM 0x00000400 #define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 From 3dbcd8da7b564194f93271b003a1c46ef404cbdb Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 16 Jun 2014 13:59:40 +0200 Subject: [PATCH 020/104] KVM: nVMX: Advertise support for MSR_IA32_VMX_TRUE_*_CTLS We already implemented them but failed to advertise them. Currently they all return the identical values to the capability MSRs they are augmenting. So there is no change in exposed features yet. Drop related comments at this chance that are partially incorrect and redundant anyway. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini --- arch/x86/include/uapi/asm/msr-index.h | 1 + arch/x86/kvm/vmx.c | 13 ++----------- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index fcf2b3ae1bf02..eaefcc66c855c 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -558,6 +558,7 @@ /* VMX_BASIC bits and bitmasks */ #define VMX_BASIC_VMCS_SIZE_SHIFT 32 +#define VMX_BASIC_TRUE_CTLS (1ULL << 55) #define VMX_BASIC_64 0x0001000000000000LLU #define VMX_BASIC_MEM_TYPE_SHIFT 50 #define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4f84be0d2d1e1..31379faf952ea 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2265,21 +2265,13 @@ static __init void nested_vmx_setup_ctls_msrs(void) /* pin-based controls */ rdmsr(MSR_IA32_VMX_PINBASED_CTLS, nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high); - /* - * According to the Intel spec, if bit 55 of VMX_BASIC is off (as it is - * in our case), bits 1, 2 and 4 (i.e., 0x16) must be 1 in this MSR. - */ nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS; nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | PIN_BASED_VMX_PREEMPTION_TIMER; - /* - * Exit controls - * If bit 55 of VMX_BASIC is off, bits 0-8 and 10, 11, 13, 14, 16 and - * 17 must be 1. - */ + /* exit controls */ rdmsr(MSR_IA32_VMX_EXIT_CTLS, nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high); nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; @@ -2299,7 +2291,6 @@ static __init void nested_vmx_setup_ctls_msrs(void) /* entry controls */ rdmsr(MSR_IA32_VMX_ENTRY_CTLS, nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high); - /* If bit 55 of VMX_BASIC is off, bits 0-8 and 12 must be 1. */ nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; nested_vmx_entry_ctls_high &= #ifdef CONFIG_X86_64 @@ -2394,7 +2385,7 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) * guest, and the VMCS structure we give it - not about the * VMX support of the underlying hardware. */ - *pdata = VMCS12_REVISION | + *pdata = VMCS12_REVISION | VMX_BASIC_TRUE_CTLS | ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) | (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT); break; From 3dcdf3ec6e48d918741ea11349d4436d0c5aac93 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 16 Jun 2014 13:59:41 +0200 Subject: [PATCH 021/104] KVM: nVMX: Allow to disable CR3 access interception We already have this control enabled by exposing a broken MSR_IA32_VMX_PROCBASED_CTLS value. This will properly advertise our capability once the value is fixed by clearing the right bits in MSR_IA32_VMX_TRUE_PROCBASED_CTLS. We also have to ensure to test the right value on L2 entry. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 31379faf952ea..e55e404b5dba0 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2239,6 +2239,7 @@ static inline bool nested_vmx_allowed(struct kvm_vcpu *vcpu) * or other means. */ static u32 nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high; +static u32 nested_vmx_true_procbased_ctls_low; static u32 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high; static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high; static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high; @@ -2328,6 +2329,10 @@ static __init void nested_vmx_setup_ctls_msrs(void) */ nested_vmx_procbased_ctls_high |= CPU_BASED_USE_MSR_BITMAPS; + /* We support free control of CR3 access interception. */ + nested_vmx_true_procbased_ctls_low = nested_vmx_procbased_ctls_low & + ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING); + /* secondary cpu-based controls */ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high); @@ -2395,6 +2400,9 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) nested_vmx_pinbased_ctls_high); break; case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: + *pdata = vmx_control_msr(nested_vmx_true_procbased_ctls_low, + nested_vmx_procbased_ctls_high); + break; case MSR_IA32_VMX_PROCBASED_CTLS: *pdata = vmx_control_msr(nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high); @@ -8127,7 +8135,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) } if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control, - nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high) || + nested_vmx_true_procbased_ctls_low, + nested_vmx_procbased_ctls_high) || !vmx_control_verify(vmcs12->secondary_vm_exec_control, nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high) || !vmx_control_verify(vmcs12->pin_based_vm_exec_control, From 560b7ee12ca5e1ebc1675d7eb4008bb22708277a Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 16 Jun 2014 13:59:42 +0200 Subject: [PATCH 022/104] KVM: nVMX: Fix returned value of MSR_IA32_VMX_PROCBASED_CTLS SDM says bits 1, 4-6, 8, 13-16, and 26 have to be set. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/vmx.h | 3 +++ arch/x86/kvm/vmx.c | 5 +++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index d989829d33043..bcbfade26d8d5 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -51,6 +51,9 @@ #define CPU_BASED_MONITOR_EXITING 0x20000000 #define CPU_BASED_PAUSE_EXITING 0x40000000 #define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000 + +#define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x0401e172 + /* * Definitions of Secondary Processor-Based VM-Execution Controls. */ diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e55e404b5dba0..66365a009cff7 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2306,7 +2306,7 @@ static __init void nested_vmx_setup_ctls_msrs(void) /* cpu-based controls */ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high); - nested_vmx_procbased_ctls_low = 0; + nested_vmx_procbased_ctls_low = CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR; nested_vmx_procbased_ctls_high &= CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING | @@ -2327,7 +2327,8 @@ static __init void nested_vmx_setup_ctls_msrs(void) * can use it to avoid exits to L1 - even when L0 runs L2 * without MSR bitmaps. */ - nested_vmx_procbased_ctls_high |= CPU_BASED_USE_MSR_BITMAPS; + nested_vmx_procbased_ctls_high |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR | + CPU_BASED_USE_MSR_BITMAPS; /* We support free control of CR3 access interception. */ nested_vmx_true_procbased_ctls_low = nested_vmx_procbased_ctls_low & From 2996fca0690f03a5220203588f4a0d8c5acba2b0 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 16 Jun 2014 13:59:43 +0200 Subject: [PATCH 023/104] KVM: nVMX: Allow to disable VM_{ENTRY_LOAD,EXIT_SAVE}_DEBUG_CONTROLS Allow L1 to "leak" its debug controls into L2, i.e. permit cleared VM_{ENTRY_LOAD,EXIT_SAVE}_DEBUG_CONTROLS. This requires to manually transfer the state of DR7 and IA32_DEBUGCTLMSR from L1 into L2 as both run on different VMCS. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 44 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 66365a009cff7..b93e2ae2bb628 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -383,6 +383,9 @@ struct nested_vmx { struct hrtimer preemption_timer; bool preemption_timer_expired; + + /* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */ + u64 vmcs01_debugctl; }; #define POSTED_INTR_ON 0 @@ -2243,7 +2246,9 @@ static u32 nested_vmx_true_procbased_ctls_low; static u32 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high; static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high; static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high; +static u32 nested_vmx_true_exit_ctls_low; static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high; +static u32 nested_vmx_true_entry_ctls_low; static u32 nested_vmx_misc_low, nested_vmx_misc_high; static u32 nested_vmx_ept_caps; static __init void nested_vmx_setup_ctls_msrs(void) @@ -2289,6 +2294,10 @@ static __init void nested_vmx_setup_ctls_msrs(void) if (vmx_mpx_supported()) nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; + /* We support free control of debug control saving. */ + nested_vmx_true_exit_ctls_low = nested_vmx_exit_ctls_low & + ~VM_EXIT_SAVE_DEBUG_CONTROLS; + /* entry controls */ rdmsr(MSR_IA32_VMX_ENTRY_CTLS, nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high); @@ -2303,6 +2312,10 @@ static __init void nested_vmx_setup_ctls_msrs(void) if (vmx_mpx_supported()) nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; + /* We support free control of debug control loading. */ + nested_vmx_true_entry_ctls_low = nested_vmx_entry_ctls_low & + ~VM_ENTRY_LOAD_DEBUG_CONTROLS; + /* cpu-based controls */ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high); @@ -2409,11 +2422,17 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) nested_vmx_procbased_ctls_high); break; case MSR_IA32_VMX_TRUE_EXIT_CTLS: + *pdata = vmx_control_msr(nested_vmx_true_exit_ctls_low, + nested_vmx_exit_ctls_high); + break; case MSR_IA32_VMX_EXIT_CTLS: *pdata = vmx_control_msr(nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high); break; case MSR_IA32_VMX_TRUE_ENTRY_CTLS: + *pdata = vmx_control_msr(nested_vmx_true_entry_ctls_low, + nested_vmx_entry_ctls_high); + break; case MSR_IA32_VMX_ENTRY_CTLS: *pdata = vmx_control_msr(nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high); @@ -7836,7 +7855,13 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base); vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base); - vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl); + if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) { + kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); + vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl); + } else { + kvm_set_dr(vcpu, 7, vcpu->arch.dr7); + vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl); + } vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, vmcs12->vm_entry_intr_info_field); vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, @@ -7846,7 +7871,6 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, vmcs12->guest_interruptibility_info); vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); - kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); vmx_set_rflags(vcpu, vmcs12->guest_rflags); vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, vmcs12->guest_pending_dbg_exceptions); @@ -8143,9 +8167,11 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) !vmx_control_verify(vmcs12->pin_based_vm_exec_control, nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high) || !vmx_control_verify(vmcs12->vm_exit_controls, - nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high) || + nested_vmx_true_exit_ctls_low, + nested_vmx_exit_ctls_high) || !vmx_control_verify(vmcs12->vm_entry_controls, - nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high)) + nested_vmx_true_entry_ctls_low, + nested_vmx_entry_ctls_high)) { nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); return 1; @@ -8222,6 +8248,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); + if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) + vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); + cpu = get_cpu(); vmx->loaded_vmcs = vmcs02; vmx_vcpu_put(vcpu); @@ -8399,7 +8428,6 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12); - kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7); vmcs12->guest_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP); vmcs12->guest_rip = kvm_register_read(vcpu, VCPU_REGS_RIP); vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS); @@ -8478,9 +8506,13 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE); + if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS) { + kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7); + vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); + } + /* TODO: These cannot have changed unless we have MSR bitmaps and * the relevant bit asks not to trap the change */ - vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT) vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT); if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER) From 5381417f6a51293e7b8af1eb18aefa5d47976a71 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 16 Jun 2014 13:59:44 +0200 Subject: [PATCH 024/104] KVM: nVMX: Fix returned value of MSR_IA32_VMX_VMCS_ENUM Many real CPUs get this wrong as well, but ours is totally off: bits 9:1 define the highest index value. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b93e2ae2bb628..a717c13b9466d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2461,7 +2461,7 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) *pdata = -1ULL; break; case MSR_IA32_VMX_VMCS_ENUM: - *pdata = 0x1f; + *pdata = 0x2e; /* highest index: VMX_PREEMPTION_TIMER_VALUE */ break; case MSR_IA32_VMX_PROCBASED_CTLS2: *pdata = vmx_control_msr(nested_vmx_secondary_ctls_low, From 5777392e83c96e3a0799dd2985598e0fc76cf4aa Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Wed, 18 Jun 2014 17:19:23 +0300 Subject: [PATCH 025/104] KVM: x86: check DR6/7 high-bits are clear only on long-mode When the guest sets DR6 and DR7, KVM asserts the high 32-bits are clear, and otherwise injects a #GP exception. This exception should only be injected only if running in long-mode. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 2 +- arch/x86/kvm/x86.h | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a717c13b9466d..b362a1a38e518 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5184,7 +5184,7 @@ static int handle_dr(struct kvm_vcpu *vcpu) return 1; kvm_register_write(vcpu, reg, val); } else - if (kvm_set_dr(vcpu, dr, kvm_register_read(vcpu, reg))) + if (kvm_set_dr(vcpu, dr, kvm_register_readl(vcpu, reg))) return 1; skip_emulated_instruction(vcpu); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 8c97bac9a895a..c5b61a7eb1441 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -47,6 +47,16 @@ static inline int is_long_mode(struct kvm_vcpu *vcpu) #endif } +static inline bool is_64_bit_mode(struct kvm_vcpu *vcpu) +{ + int cs_db, cs_l; + + if (!is_long_mode(vcpu)) + return false; + kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); + return cs_l; +} + static inline bool mmu_is_nested(struct kvm_vcpu *vcpu) { return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu; @@ -108,6 +118,14 @@ static inline bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) return false; } +static inline unsigned long kvm_register_readl(struct kvm_vcpu *vcpu, + enum kvm_reg reg) +{ + unsigned long val = kvm_register_read(vcpu, reg); + + return is_64_bit_mode(vcpu) ? val : (u32)val; +} + void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); From a449c7aa51e10c9bde0ea9bee4e682d6d067ebab Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Wed, 18 Jun 2014 17:19:24 +0300 Subject: [PATCH 026/104] KVM: x86: Hypercall handling does not considers opsize correctly Currently, the hypercall handling routine only considers LME as an indication to whether the guest uses 32/64-bit mode. This is incosistent with hyperv hypercalls handling and against the common sense of considering cs.l as well. This patch uses is_64_bit_mode instead of is_long_mode for that matter. In addition, the result is masked in respect to the guest execution mode. Last, it changes kvm_hv_hypercall to use is_64_bit_mode as well to simplify the code. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 451d6acea808e..874607ae0583e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5669,7 +5669,6 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) u64 param, ingpa, outgpa, ret; uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0; bool fast, longmode; - int cs_db, cs_l; /* * hypercall generates UD from non zero cpl and real mode @@ -5680,8 +5679,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) return 0; } - kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); - longmode = is_long_mode(vcpu) && cs_l == 1; + longmode = is_64_bit_mode(vcpu); if (!longmode) { param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) | @@ -5746,7 +5744,7 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) { unsigned long nr, a0, a1, a2, a3, ret; - int r = 1; + int op_64_bit, r = 1; if (kvm_hv_hypercall_enabled(vcpu->kvm)) return kvm_hv_hypercall(vcpu); @@ -5759,7 +5757,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) trace_kvm_hypercall(nr, a0, a1, a2, a3); - if (!is_long_mode(vcpu)) { + op_64_bit = is_64_bit_mode(vcpu); + if (!op_64_bit) { nr &= 0xFFFFFFFF; a0 &= 0xFFFFFFFF; a1 &= 0xFFFFFFFF; @@ -5785,6 +5784,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) break; } out: + if (!op_64_bit) + ret = (u32)ret; kvm_register_write(vcpu, VCPU_REGS_RAX, ret); ++vcpu->stat.hypercalls; return r; From 1e32c07955b43e7f827174bf320ed35971117275 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Wed, 18 Jun 2014 17:19:25 +0300 Subject: [PATCH 027/104] KVM: vmx: handle_cr ignores 32/64-bit mode On 32-bit mode only bits [31:0] of the CR should be used for setting the CR value. Otherwise, the host may incorrectly assume the value is invalid if bits [63:32] are not zero. Moreover, the CR is currently being read twice when CR8 is used. Last, nested mov-cr exiting is modified to handle the CR value correctly as well. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b362a1a38e518..45024bf0e2293 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5058,7 +5058,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) reg = (exit_qualification >> 8) & 15; switch ((exit_qualification >> 4) & 3) { case 0: /* mov to cr */ - val = kvm_register_read(vcpu, reg); + val = kvm_register_readl(vcpu, reg); trace_kvm_cr_write(cr, val); switch (cr) { case 0: @@ -5075,7 +5075,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) return 1; case 8: { u8 cr8_prev = kvm_get_cr8(vcpu); - u8 cr8 = kvm_register_read(vcpu, reg); + u8 cr8 = (u8)val; err = kvm_set_cr8(vcpu, cr8); kvm_complete_insn_gp(vcpu, err); if (irqchip_in_kernel(vcpu->kvm)) @@ -6770,7 +6770,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); int cr = exit_qualification & 15; int reg = (exit_qualification >> 8) & 15; - unsigned long val = kvm_register_read(vcpu, reg); + unsigned long val = kvm_register_readl(vcpu, reg); switch ((exit_qualification >> 4) & 3) { case 0: /* mov to cr */ From 27e6fb5dae2819d17f38dc9224692b771e989981 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Wed, 18 Jun 2014 17:19:26 +0300 Subject: [PATCH 028/104] KVM: vmx: vmx instructions handling does not consider cs.l VMX instructions use 32-bit operands in 32-bit mode, and 64-bit operands in 64-bit mode. The current implementation is broken since it does not use the register operands correctly, and always uses 64-bit for reads and writes. Moreover, write to memory in vmwrite only considers long-mode, so it ignores cs.l. This patch fixes this behavior. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 12 ++++++------ arch/x86/kvm/x86.h | 9 +++++++++ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 45024bf0e2293..8748c2e19ed67 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6403,7 +6403,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu) return 1; /* Decode instruction info and find the field to read */ - field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); + field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); /* Read the field, zero-extended to a u64 field_value */ if (!vmcs12_read_any(vcpu, field, &field_value)) { nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); @@ -6416,7 +6416,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu) * on the guest's mode (32 or 64 bit), not on the given field's length. */ if (vmx_instruction_info & (1u << 10)) { - kvm_register_write(vcpu, (((vmx_instruction_info) >> 3) & 0xf), + kvm_register_writel(vcpu, (((vmx_instruction_info) >> 3) & 0xf), field_value); } else { if (get_vmx_mem_address(vcpu, exit_qualification, @@ -6453,21 +6453,21 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) return 1; if (vmx_instruction_info & (1u << 10)) - field_value = kvm_register_read(vcpu, + field_value = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 3) & 0xf)); else { if (get_vmx_mem_address(vcpu, exit_qualification, vmx_instruction_info, &gva)) return 1; if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, - &field_value, (is_long_mode(vcpu) ? 8 : 4), &e)) { + &field_value, (is_64_bit_mode(vcpu) ? 8 : 4), &e)) { kvm_inject_page_fault(vcpu, &e); return 1; } } - field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); + field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); if (vmcs_field_readonly(field)) { nested_vmx_failValid(vcpu, VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT); @@ -6590,7 +6590,7 @@ static int handle_invept(struct kvm_vcpu *vcpu) } vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); - type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf); + type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf); types = (nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index c5b61a7eb1441..306a1b77581fd 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -126,6 +126,15 @@ static inline unsigned long kvm_register_readl(struct kvm_vcpu *vcpu, return is_64_bit_mode(vcpu) ? val : (u32)val; } +static inline void kvm_register_writel(struct kvm_vcpu *vcpu, + enum kvm_reg reg, + unsigned long val) +{ + if (!is_64_bit_mode(vcpu)) + val = (u32)val; + return kvm_register_write(vcpu, reg, val); +} + void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); From d116e812f9026e3cca46ce1009e577afec62916d Mon Sep 17 00:00:00 2001 From: Deng-Cheng Zhu Date: Thu, 26 Jun 2014 12:11:34 -0700 Subject: [PATCH 029/104] MIPS: KVM: Reformat code and comments No logic changes inside. Signed-off-by: Deng-Cheng Zhu Reviewed-by: James Hogan Signed-off-by: Paolo Bonzini --- arch/mips/include/asm/kvm_host.h | 10 +- arch/mips/include/asm/r4kcache.h | 3 + arch/mips/kvm/kvm_locore.S | 55 +++-- arch/mips/kvm/kvm_mips.c | 179 +++++++------- arch/mips/kvm/kvm_mips_comm.h | 21 +- arch/mips/kvm/kvm_mips_commpage.c | 21 +- arch/mips/kvm/kvm_mips_dyntrans.c | 38 ++- arch/mips/kvm/kvm_mips_emul.c | 389 +++++++++++++++--------------- arch/mips/kvm/kvm_mips_int.c | 45 ++-- arch/mips/kvm/kvm_mips_int.h | 22 +- arch/mips/kvm/kvm_mips_opcode.h | 26 +- arch/mips/kvm/kvm_mips_stats.c | 18 +- arch/mips/kvm/kvm_tlb.c | 194 +++++++-------- arch/mips/kvm/kvm_trap_emul.c | 77 +++--- arch/mips/kvm/trace.h | 18 +- 15 files changed, 564 insertions(+), 552 deletions(-) diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index b0aa95565752a..3f813f295134b 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -359,13 +359,17 @@ enum emulation_result { #define MIPS3_PG_FRAME 0x3fffffc0 #define VPN2_MASK 0xffffe000 -#define TLB_IS_GLOBAL(x) (((x).tlb_lo0 & MIPS3_PG_G) && \ +#define TLB_IS_GLOBAL(x) (((x).tlb_lo0 & MIPS3_PG_G) && \ ((x).tlb_lo1 & MIPS3_PG_G)) #define TLB_VPN2(x) ((x).tlb_hi & VPN2_MASK) #define TLB_ASID(x) ((x).tlb_hi & ASID_MASK) -#define TLB_IS_VALID(x, va) (((va) & (1 << PAGE_SHIFT)) \ - ? ((x).tlb_lo1 & MIPS3_PG_V) \ +#define TLB_IS_VALID(x, va) (((va) & (1 << PAGE_SHIFT)) \ + ? ((x).tlb_lo1 & MIPS3_PG_V) \ : ((x).tlb_lo0 & MIPS3_PG_V)) +#define TLB_HI_VPN2_HIT(x, y) ((TLB_VPN2(x) & ~(x).tlb_mask) == \ + ((y) & VPN2_MASK & ~(x).tlb_mask)) +#define TLB_HI_ASID_HIT(x, y) (TLB_IS_GLOBAL(x) || \ + TLB_ASID(x) == ((y) & ASID_MASK)) struct kvm_mips_tlb { long tlb_mask; diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h index 0b8bd28a0df1c..4520adc8699b9 100644 --- a/arch/mips/include/asm/r4kcache.h +++ b/arch/mips/include/asm/r4kcache.h @@ -19,6 +19,9 @@ #include #include /* for segment_eq() */ +extern void (*r4k_blast_dcache)(void); +extern void (*r4k_blast_icache)(void); + /* * This macro return a properly sign-extended address suitable as base address * for indexed cache operations. Two issues here: diff --git a/arch/mips/kvm/kvm_locore.S b/arch/mips/kvm/kvm_locore.S index 033ac343e72c8..d7279c03c517a 100644 --- a/arch/mips/kvm/kvm_locore.S +++ b/arch/mips/kvm/kvm_locore.S @@ -16,7 +16,6 @@ #include #include - #define _C_LABEL(x) x #define MIPSX(name) mips32_ ## name #define CALLFRAME_SIZ 32 @@ -91,7 +90,10 @@ FEXPORT(__kvm_mips_vcpu_run) LONG_S $24, PT_R24(k1) LONG_S $25, PT_R25(k1) - /* XXXKYMA k0/k1 not saved, not being used if we got here through an ioctl() */ + /* + * XXXKYMA k0/k1 not saved, not being used if we got here through + * an ioctl() + */ LONG_S $28, PT_R28(k1) LONG_S $29, PT_R29(k1) @@ -132,7 +134,10 @@ FEXPORT(__kvm_mips_vcpu_run) /* Save the kernel gp as well */ LONG_S gp, VCPU_HOST_GP(k1) - /* Setup status register for running the guest in UM, interrupts are disabled */ + /* + * Setup status register for running the guest in UM, interrupts + * are disabled + */ li k0, (ST0_EXL | KSU_USER | ST0_BEV) mtc0 k0, CP0_STATUS ehb @@ -152,7 +157,6 @@ FEXPORT(__kvm_mips_vcpu_run) mtc0 k0, CP0_STATUS ehb - /* Set Guest EPC */ LONG_L t0, VCPU_PC(k1) mtc0 t0, CP0_EPC @@ -165,7 +169,7 @@ FEXPORT(__kvm_mips_load_asid) INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID /* (BD) */ INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID /* else user */ 1: - /* t1: contains the base of the ASID array, need to get the cpu id */ + /* t1: contains the base of the ASID array, need to get the cpu id */ LONG_L t2, TI_CPU($28) /* smp_processor_id */ INT_SLL t2, t2, 2 /* x4 */ REG_ADDU t3, t1, t2 @@ -229,9 +233,7 @@ FEXPORT(__kvm_mips_load_k0k1) eret VECTOR(MIPSX(exception), unknown) -/* - * Find out what mode we came from and jump to the proper handler. - */ +/* Find out what mode we came from and jump to the proper handler. */ mtc0 k0, CP0_ERROREPC #01: Save guest k0 ehb #02: @@ -239,7 +241,8 @@ VECTOR(MIPSX(exception), unknown) INT_SRL k0, k0, 10 #03: Get rid of CPUNum INT_SLL k0, k0, 10 #04 LONG_S k1, 0x3000(k0) #05: Save k1 @ offset 0x3000 - INT_ADDIU k0, k0, 0x2000 #06: Exception handler is installed @ offset 0x2000 + INT_ADDIU k0, k0, 0x2000 #06: Exception handler is + # installed @ offset 0x2000 j k0 #07: jump to the function nop #08: branch delay slot VECTOR_END(MIPSX(exceptionEnd)) @@ -248,7 +251,6 @@ VECTOR_END(MIPSX(exceptionEnd)) /* * Generic Guest exception handler. We end up here when the guest * does something that causes a trap to kernel mode. - * */ NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra) /* Get the VCPU pointer from DDTATA_LO */ @@ -290,9 +292,7 @@ NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra) LONG_S $30, VCPU_R30(k1) LONG_S $31, VCPU_R31(k1) - /* We need to save hi/lo and restore them on - * the way out - */ + /* We need to save hi/lo and restore them on the way out */ mfhi t0 LONG_S t0, VCPU_HI(k1) @@ -321,8 +321,10 @@ NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra) /* Save pointer to run in s0, will be saved by the compiler */ move s0, a0 - /* Save Host level EPC, BadVaddr and Cause to VCPU, useful to - * process the exception */ + /* + * Save Host level EPC, BadVaddr and Cause to VCPU, useful to + * process the exception + */ mfc0 k0,CP0_EPC LONG_S k0, VCPU_PC(k1) @@ -351,7 +353,6 @@ NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra) LONG_L k0, VCPU_HOST_EBASE(k1) mtc0 k0,CP0_EBASE - /* Now that the new EBASE has been loaded, unset BEV and KSU_USER */ .set at and v0, v0, ~(ST0_EXL | KSU_USER | ST0_IE) @@ -369,7 +370,8 @@ NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra) /* Saved host state */ INT_ADDIU sp, sp, -PT_SIZE - /* XXXKYMA do we need to load the host ASID, maybe not because the + /* + * XXXKYMA do we need to load the host ASID, maybe not because the * kernel entries are marked GLOBAL, need to verify */ @@ -383,9 +385,11 @@ NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra) /* Jump to handler */ FEXPORT(__kvm_mips_jump_to_handler) - /* XXXKYMA: not sure if this is safe, how large is the stack?? + /* + * XXXKYMA: not sure if this is safe, how large is the stack?? * Now jump to the kvm_mips_handle_exit() to see if we can deal - * with this in the kernel */ + * with this in the kernel + */ PTR_LA t9, kvm_mips_handle_exit jalr.hb t9 INT_ADDIU sp, sp, -CALLFRAME_SIZ /* BD Slot */ @@ -394,7 +398,8 @@ FEXPORT(__kvm_mips_jump_to_handler) di ehb - /* XXXKYMA: k0/k1 could have been blown away if we processed + /* + * XXXKYMA: k0/k1 could have been blown away if we processed * an exception while we were handling the exception from the * guest, reload k1 */ @@ -402,7 +407,8 @@ FEXPORT(__kvm_mips_jump_to_handler) move k1, s1 INT_ADDIU k1, k1, VCPU_HOST_ARCH - /* Check return value, should tell us if we are returning to the + /* + * Check return value, should tell us if we are returning to the * host (handle I/O etc)or resuming the guest */ andi t0, v0, RESUME_HOST @@ -521,8 +527,10 @@ __kvm_mips_return_to_host: LONG_L $0, PT_R0(k1) LONG_L $1, PT_R1(k1) - /* r2/v0 is the return code, shift it down by 2 (arithmetic) - * to recover the err code */ + /* + * r2/v0 is the return code, shift it down by 2 (arithmetic) + * to recover the err code + */ INT_SRA k0, v0, 2 move $2, k0 @@ -566,7 +574,6 @@ __kvm_mips_return_to_host: PTR_LI k0, 0x2000000F mtc0 k0, CP0_HWRENA - /* Restore RA, which is the address we will return to */ LONG_L ra, PT_R31(k1) j ra diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c index cd5e4f568439e..52be52adf0308 100644 --- a/arch/mips/kvm/kvm_mips.c +++ b/arch/mips/kvm/kvm_mips.c @@ -7,7 +7,7 @@ * * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. * Authors: Sanjay Lal -*/ + */ #include #include @@ -31,38 +31,41 @@ #define VECTORSPACING 0x100 /* for EI/VI mode */ #endif -#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU +#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x) struct kvm_stats_debugfs_item debugfs_entries[] = { - { "wait", VCPU_STAT(wait_exits) }, - { "cache", VCPU_STAT(cache_exits) }, - { "signal", VCPU_STAT(signal_exits) }, - { "interrupt", VCPU_STAT(int_exits) }, - { "cop_unsuable", VCPU_STAT(cop_unusable_exits) }, - { "tlbmod", VCPU_STAT(tlbmod_exits) }, - { "tlbmiss_ld", VCPU_STAT(tlbmiss_ld_exits) }, - { "tlbmiss_st", VCPU_STAT(tlbmiss_st_exits) }, - { "addrerr_st", VCPU_STAT(addrerr_st_exits) }, - { "addrerr_ld", VCPU_STAT(addrerr_ld_exits) }, - { "syscall", VCPU_STAT(syscall_exits) }, - { "resvd_inst", VCPU_STAT(resvd_inst_exits) }, - { "break_inst", VCPU_STAT(break_inst_exits) }, - { "flush_dcache", VCPU_STAT(flush_dcache_exits) }, - { "halt_wakeup", VCPU_STAT(halt_wakeup) }, + { "wait", VCPU_STAT(wait_exits), KVM_STAT_VCPU }, + { "cache", VCPU_STAT(cache_exits), KVM_STAT_VCPU }, + { "signal", VCPU_STAT(signal_exits), KVM_STAT_VCPU }, + { "interrupt", VCPU_STAT(int_exits), KVM_STAT_VCPU }, + { "cop_unsuable", VCPU_STAT(cop_unusable_exits), KVM_STAT_VCPU }, + { "tlbmod", VCPU_STAT(tlbmod_exits), KVM_STAT_VCPU }, + { "tlbmiss_ld", VCPU_STAT(tlbmiss_ld_exits), KVM_STAT_VCPU }, + { "tlbmiss_st", VCPU_STAT(tlbmiss_st_exits), KVM_STAT_VCPU }, + { "addrerr_st", VCPU_STAT(addrerr_st_exits), KVM_STAT_VCPU }, + { "addrerr_ld", VCPU_STAT(addrerr_ld_exits), KVM_STAT_VCPU }, + { "syscall", VCPU_STAT(syscall_exits), KVM_STAT_VCPU }, + { "resvd_inst", VCPU_STAT(resvd_inst_exits), KVM_STAT_VCPU }, + { "break_inst", VCPU_STAT(break_inst_exits), KVM_STAT_VCPU }, + { "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU }, + { "halt_wakeup", VCPU_STAT(halt_wakeup), KVM_STAT_VCPU }, {NULL} }; static int kvm_mips_reset_vcpu(struct kvm_vcpu *vcpu) { int i; + for_each_possible_cpu(i) { vcpu->arch.guest_kernel_asid[i] = 0; vcpu->arch.guest_user_asid[i] = 0; } + return 0; } -/* XXXKYMA: We are simulatoring a processor that has the WII bit set in Config7, so we - * are "runnable" if interrupts are pending +/* + * XXXKYMA: We are simulatoring a processor that has the WII bit set in + * Config7, so we are "runnable" if interrupts are pending */ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { @@ -103,7 +106,10 @@ static void kvm_mips_init_tlbs(struct kvm *kvm) { unsigned long wired; - /* Add a wired entry to the TLB, it is used to map the commpage to the Guest kernel */ + /* + * Add a wired entry to the TLB, it is used to map the commpage to + * the Guest kernel + */ wired = read_c0_wired(); write_c0_wired(wired + 1); mtc0_tlbw_hazard(); @@ -130,7 +136,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) on_each_cpu(kvm_mips_init_vm_percpu, kvm, 1); } - return 0; } @@ -185,8 +190,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) } } -long -kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) +long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg) { return -ENOIOCTLCMD; } @@ -207,17 +212,17 @@ void kvm_arch_memslots_updated(struct kvm *kvm) } int kvm_arch_prepare_memory_region(struct kvm *kvm, - struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem, - enum kvm_mr_change change) + struct kvm_memory_slot *memslot, + struct kvm_userspace_memory_region *mem, + enum kvm_mr_change change) { return 0; } void kvm_arch_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old, - enum kvm_mr_change change) + struct kvm_userspace_memory_region *mem, + const struct kvm_memory_slot *old, + enum kvm_mr_change change) { unsigned long npages = 0; int i, err = 0; @@ -246,9 +251,8 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, npages, kvm->arch.guest_pmap); /* Now setup the page table */ - for (i = 0; i < npages; i++) { + for (i = 0; i < npages; i++) kvm->arch.guest_pmap[i] = KVM_INVALID_PAGE; - } } } out: @@ -270,8 +274,6 @@ void kvm_arch_flush_shadow(struct kvm *kvm) struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) { - extern char mips32_exception[], mips32_exceptionEnd[]; - extern char mips32_GuestException[], mips32_GuestExceptionEnd[]; int err, size, offset; void *gebase; int i; @@ -290,14 +292,14 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) kvm_debug("kvm @ %p: create cpu %d at %p\n", kvm, id, vcpu); - /* Allocate space for host mode exception handlers that handle + /* + * Allocate space for host mode exception handlers that handle * guest mode exits */ - if (cpu_has_veic || cpu_has_vint) { + if (cpu_has_veic || cpu_has_vint) size = 0x200 + VECTORSPACING * 64; - } else { + else size = 0x4000; - } /* Save Linux EBASE */ vcpu->arch.host_ebase = (void *)read_c0_ebase(); @@ -345,7 +347,10 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) local_flush_icache_range((unsigned long)gebase, (unsigned long)gebase + ALIGN(size, PAGE_SIZE)); - /* Allocate comm page for guest kernel, a TLB will be reserved for mapping GVA @ 0xFFFF8000 to this page */ + /* + * Allocate comm page for guest kernel, a TLB will be reserved for + * mapping GVA @ 0xFFFF8000 to this page + */ vcpu->arch.kseg0_commpage = kzalloc(PAGE_SIZE << 1, GFP_KERNEL); if (!vcpu->arch.kseg0_commpage) { @@ -391,9 +396,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_arch_vcpu_free(vcpu); } -int -kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, - struct kvm_guest_debug *dbg) +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) { return -ENOIOCTLCMD; } @@ -430,8 +434,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) return r; } -int -kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_mips_interrupt *irq) +int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, + struct kvm_mips_interrupt *irq) { int intr = (int)irq->irq; struct kvm_vcpu *dvcpu = NULL; @@ -458,23 +462,20 @@ kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_mips_interrupt *irq) dvcpu->arch.wait = 0; - if (waitqueue_active(&dvcpu->wq)) { + if (waitqueue_active(&dvcpu->wq)) wake_up_interruptible(&dvcpu->wq); - } return 0; } -int -kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, - struct kvm_mp_state *mp_state) +int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) { return -ENOIOCTLCMD; } -int -kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, - struct kvm_mp_state *mp_state) +int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) { return -ENOIOCTLCMD; } @@ -631,10 +632,12 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, } if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64) { u64 __user *uaddr64 = (u64 __user *)(long)reg->addr; + return put_user(v, uaddr64); } else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U32) { u32 __user *uaddr32 = (u32 __user *)(long)reg->addr; u32 v32 = (u32)v; + return put_user(v32, uaddr32); } else { return -EINVAL; @@ -727,8 +730,8 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu, return 0; } -long -kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) +long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg) { struct kvm_vcpu *vcpu = filp->private_data; void __user *argp = (void __user *)arg; @@ -738,6 +741,7 @@ kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) case KVM_SET_ONE_REG: case KVM_GET_ONE_REG: { struct kvm_one_reg reg; + if (copy_from_user(®, argp, sizeof(reg))) return -EFAULT; if (ioctl == KVM_SET_ONE_REG) @@ -772,6 +776,7 @@ kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) case KVM_INTERRUPT: { struct kvm_mips_interrupt irq; + r = -EFAULT; if (copy_from_user(&irq, argp, sizeof(irq))) goto out; @@ -790,9 +795,7 @@ kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) return r; } -/* - * Get (and clear) the dirty memory log for a memory slot. - */ +/* Get (and clear) the dirty memory log for a memory slot. */ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) { struct kvm_memory_slot *memslot; @@ -859,14 +862,14 @@ void kvm_arch_exit(void) kvm_mips_callbacks = NULL; } -int -kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) { return -ENOIOCTLCMD; } -int -kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) { return -ENOIOCTLCMD; } @@ -979,14 +982,11 @@ static void kvm_mips_comparecount_func(unsigned long data) kvm_mips_callbacks->queue_timer_int(vcpu); vcpu->arch.wait = 0; - if (waitqueue_active(&vcpu->wq)) { + if (waitqueue_active(&vcpu->wq)) wake_up_interruptible(&vcpu->wq); - } } -/* - * low level hrtimer wake routine. - */ +/* low level hrtimer wake routine */ static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer) { struct kvm_vcpu *vcpu; @@ -1010,8 +1010,8 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) return; } -int -kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, struct kvm_translation *tr) +int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) { return 0; } @@ -1022,8 +1022,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) return kvm_mips_callbacks->vcpu_setup(vcpu); } -static -void kvm_mips_set_c0_status(void) +static void kvm_mips_set_c0_status(void) { uint32_t status = read_c0_status(); @@ -1053,7 +1052,10 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) run->exit_reason = KVM_EXIT_UNKNOWN; run->ready_for_interrupt_injection = 1; - /* Set the appropriate status bits based on host CPU features, before we hit the scheduler */ + /* + * Set the appropriate status bits based on host CPU features, + * before we hit the scheduler + */ kvm_mips_set_c0_status(); local_irq_enable(); @@ -1061,7 +1063,8 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) kvm_debug("kvm_mips_handle_exit: cause: %#x, PC: %p, kvm_run: %p, kvm_vcpu: %p\n", cause, opc, run, vcpu); - /* Do a privilege check, if in UM most of these exit conditions end up + /* + * Do a privilege check, if in UM most of these exit conditions end up * causing an exception to be delivered to the Guest Kernel */ er = kvm_mips_check_privilege(cause, opc, run, vcpu); @@ -1080,9 +1083,8 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) ++vcpu->stat.int_exits; trace_kvm_exit(vcpu, INT_EXITS); - if (need_resched()) { + if (need_resched()) cond_resched(); - } ret = RESUME_GUEST; break; @@ -1094,9 +1096,8 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) trace_kvm_exit(vcpu, COP_UNUSABLE_EXITS); ret = kvm_mips_callbacks->handle_cop_unusable(vcpu); /* XXXKYMA: Might need to return to user space */ - if (run->exit_reason == KVM_EXIT_IRQ_WINDOW_OPEN) { + if (run->exit_reason == KVM_EXIT_IRQ_WINDOW_OPEN) ret = RESUME_HOST; - } break; case T_TLB_MOD: @@ -1106,10 +1107,9 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) break; case T_TLB_ST_MISS: - kvm_debug - ("TLB ST fault: cause %#x, status %#lx, PC: %p, BadVaddr: %#lx\n", - cause, kvm_read_c0_guest_status(vcpu->arch.cop0), opc, - badvaddr); + kvm_debug("TLB ST fault: cause %#x, status %#lx, PC: %p, BadVaddr: %#lx\n", + cause, kvm_read_c0_guest_status(vcpu->arch.cop0), opc, + badvaddr); ++vcpu->stat.tlbmiss_st_exits; trace_kvm_exit(vcpu, TLBMISS_ST_EXITS); @@ -1156,10 +1156,9 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) break; default: - kvm_err - ("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#lx\n", - exccode, opc, kvm_get_inst(opc, vcpu), badvaddr, - kvm_read_c0_guest_status(vcpu->arch.cop0)); + kvm_err("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#lx\n", + exccode, opc, kvm_get_inst(opc, vcpu), badvaddr, + kvm_read_c0_guest_status(vcpu->arch.cop0)); kvm_arch_vcpu_dump_regs(vcpu); run->exit_reason = KVM_EXIT_INTERNAL_ERROR; ret = RESUME_HOST; @@ -1174,7 +1173,7 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) kvm_mips_deliver_interrupts(vcpu, cause); if (!(ret & RESUME_HOST)) { - /* Only check for signals if not already exiting to userspace */ + /* Only check for signals if not already exiting to userspace */ if (signal_pending(current)) { run->exit_reason = KVM_EXIT_INTR; ret = (-EINTR << 2) | RESUME_HOST; @@ -1195,11 +1194,13 @@ int __init kvm_mips_init(void) if (ret) return ret; - /* On MIPS, kernel modules are executed from "mapped space", which requires TLBs. - * The TLB handling code is statically linked with the rest of the kernel (kvm_tlb.c) - * to avoid the possibility of double faulting. The issue is that the TLB code - * references routines that are part of the the KVM module, - * which are only available once the module is loaded. + /* + * On MIPS, kernel modules are executed from "mapped space", which + * requires TLBs. The TLB handling code is statically linked with + * the rest of the kernel (kvm_tlb.c) to avoid the possibility of + * double faulting. The issue is that the TLB code references + * routines that are part of the the KVM module, which are only + * available once the module is loaded. */ kvm_mips_gfn_to_pfn = gfn_to_pfn; kvm_mips_release_pfn_clean = kvm_release_pfn_clean; diff --git a/arch/mips/kvm/kvm_mips_comm.h b/arch/mips/kvm/kvm_mips_comm.h index a4a8c85cc8f71..08c5fa2bbc0f7 100644 --- a/arch/mips/kvm/kvm_mips_comm.h +++ b/arch/mips/kvm/kvm_mips_comm.h @@ -1,19 +1,20 @@ /* -* This file is subject to the terms and conditions of the GNU General Public -* License. See the file "COPYING" in the main directory of this archive -* for more details. -* -* KVM/MIPS: commpage: mapped into get kernel space -* -* Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. -* Authors: Sanjay Lal -*/ + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS: commpage: mapped into get kernel space + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal + */ #ifndef __KVM_MIPS_COMMPAGE_H__ #define __KVM_MIPS_COMMPAGE_H__ struct kvm_mips_commpage { - struct mips_coproc cop0; /* COP0 state is mapped into Guest kernel via commpage */ + /* COP0 state is mapped into Guest kernel via commpage */ + struct mips_coproc cop0; }; #define KVM_MIPS_COMM_EIDI_OFFSET 0x0 diff --git a/arch/mips/kvm/kvm_mips_commpage.c b/arch/mips/kvm/kvm_mips_commpage.c index 3873b1ecc40f6..ab7096ec0666a 100644 --- a/arch/mips/kvm/kvm_mips_commpage.c +++ b/arch/mips/kvm/kvm_mips_commpage.c @@ -1,14 +1,14 @@ /* -* This file is subject to the terms and conditions of the GNU General Public -* License. See the file "COPYING" in the main directory of this archive -* for more details. -* -* commpage, currently used for Virtual COP0 registers. -* Mapped into the guest kernel @ 0x0. -* -* Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. -* Authors: Sanjay Lal -*/ + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * commpage, currently used for Virtual COP0 registers. + * Mapped into the guest kernel @ 0x0. + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal + */ #include #include @@ -27,6 +27,7 @@ void kvm_mips_commpage_init(struct kvm_vcpu *vcpu) { struct kvm_mips_commpage *page = vcpu->arch.kseg0_commpage; + memset(page, 0, sizeof(struct kvm_mips_commpage)); /* Specific init values for fields */ diff --git a/arch/mips/kvm/kvm_mips_dyntrans.c b/arch/mips/kvm/kvm_mips_dyntrans.c index b80e41d858fd7..fa7184df5450e 100644 --- a/arch/mips/kvm/kvm_mips_dyntrans.c +++ b/arch/mips/kvm/kvm_mips_dyntrans.c @@ -1,13 +1,13 @@ /* -* This file is subject to the terms and conditions of the GNU General Public -* License. See the file "COPYING" in the main directory of this archive -* for more details. -* -* KVM/MIPS: Binary Patching for privileged instructions, reduces traps. -* -* Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. -* Authors: Sanjay Lal -*/ + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS: Binary Patching for privileged instructions, reduces traps. + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal + */ #include #include @@ -28,9 +28,8 @@ #define CLEAR_TEMPLATE 0x00000020 #define SW_TEMPLATE 0xac000000 -int -kvm_mips_trans_cache_index(uint32_t inst, uint32_t *opc, - struct kvm_vcpu *vcpu) +int kvm_mips_trans_cache_index(uint32_t inst, uint32_t *opc, + struct kvm_vcpu *vcpu) { int result = 0; unsigned long kseg0_opc; @@ -47,12 +46,11 @@ kvm_mips_trans_cache_index(uint32_t inst, uint32_t *opc, } /* - * Address based CACHE instructions are transformed into synci(s). A little heavy - * for just D-cache invalidates, but avoids an expensive trap + * Address based CACHE instructions are transformed into synci(s). A little + * heavy for just D-cache invalidates, but avoids an expensive trap */ -int -kvm_mips_trans_cache_va(uint32_t inst, uint32_t *opc, - struct kvm_vcpu *vcpu) +int kvm_mips_trans_cache_va(uint32_t inst, uint32_t *opc, + struct kvm_vcpu *vcpu) { int result = 0; unsigned long kseg0_opc; @@ -72,8 +70,7 @@ kvm_mips_trans_cache_va(uint32_t inst, uint32_t *opc, return result; } -int -kvm_mips_trans_mfc0(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu) +int kvm_mips_trans_mfc0(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu) { int32_t rt, rd, sel; uint32_t mfc0_inst; @@ -115,8 +112,7 @@ kvm_mips_trans_mfc0(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu) return 0; } -int -kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu) +int kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu) { int32_t rt, rd, sel; uint32_t mtc0_inst = SW_TEMPLATE; diff --git a/arch/mips/kvm/kvm_mips_emul.c b/arch/mips/kvm/kvm_mips_emul.c index 8d48400900828..9ec9f1d54b9bf 100644 --- a/arch/mips/kvm/kvm_mips_emul.c +++ b/arch/mips/kvm/kvm_mips_emul.c @@ -1,13 +1,13 @@ /* -* This file is subject to the terms and conditions of the GNU General Public -* License. See the file "COPYING" in the main directory of this archive -* for more details. -* -* KVM/MIPS: Instruction/Exception emulation -* -* Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. -* Authors: Sanjay Lal -*/ + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS: Instruction/Exception emulation + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal + */ #include #include @@ -51,18 +51,14 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu, if (epc & 3) goto unaligned; - /* - * Read the instruction - */ + /* Read the instruction */ insn.word = kvm_get_inst((uint32_t *) epc, vcpu); if (insn.word == KVM_INVALID_INST) return KVM_INVALID_INST; switch (insn.i_format.opcode) { - /* - * jr and jalr are in r_format format. - */ + /* jr and jalr are in r_format format. */ case spec_op: switch (insn.r_format.func) { case jalr_op: @@ -124,18 +120,16 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu, dspcontrol = rddsp(0x01); - if (dspcontrol >= 32) { + if (dspcontrol >= 32) epc = epc + 4 + (insn.i_format.simmediate << 2); - } else + else epc += 8; nextpc = epc; break; } break; - /* - * These are unconditional and in j_format. - */ + /* These are unconditional and in j_format. */ case jal_op: arch->gprs[31] = instpc + 8; case j_op: @@ -146,9 +140,7 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu, nextpc = epc; break; - /* - * These are conditional and in i_format. - */ + /* These are conditional and in i_format. */ case beq_op: case beql_op: if (arch->gprs[insn.i_format.rs] == @@ -189,9 +181,7 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu, nextpc = epc; break; - /* - * And now the FPA/cp1 branch instructions. - */ + /* And now the FPA/cp1 branch instructions. */ case cop1_op: printk("%s: unsupported cop1_op\n", __func__); break; @@ -219,7 +209,8 @@ enum emulation_result update_pc(struct kvm_vcpu *vcpu, uint32_t cause) er = EMULATE_FAIL; } else { vcpu->arch.pc = branch_pc; - kvm_debug("BD update_pc(): New PC: %#lx\n", vcpu->arch.pc); + kvm_debug("BD update_pc(): New PC: %#lx\n", + vcpu->arch.pc); } } else vcpu->arch.pc += 4; @@ -240,6 +231,7 @@ enum emulation_result update_pc(struct kvm_vcpu *vcpu, uint32_t cause) static inline int kvm_mips_count_disabled(struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; + return (vcpu->arch.count_ctl & KVM_REG_MIPS_COUNT_CTL_DC) || (kvm_read_c0_guest_cause(cop0) & CAUSEF_DC); } @@ -392,7 +384,6 @@ static ktime_t kvm_mips_freeze_hrtimer(struct kvm_vcpu *vcpu, return now; } - /** * kvm_mips_resume_hrtimer() - Resume hrtimer, updating expiry. * @vcpu: Virtual CPU. @@ -781,8 +772,9 @@ enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu) vcpu->arch.wait = 1; kvm_vcpu_block(vcpu); - /* We we are runnable, then definitely go off to user space to check if any - * I/O interrupts are pending. + /* + * We we are runnable, then definitely go off to user space to + * check if any I/O interrupts are pending. */ if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) { clear_bit(KVM_REQ_UNHALT, &vcpu->requests); @@ -793,8 +785,9 @@ enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu) return er; } -/* XXXKYMA: Linux doesn't seem to use TLBR, return EMULATE_FAIL for now so that we can catch - * this, if things ever change +/* + * XXXKYMA: Linux doesn't seem to use TLBR, return EMULATE_FAIL for now so that + * we can catch this, if things ever change */ enum emulation_result kvm_mips_emul_tlbr(struct kvm_vcpu *vcpu) { @@ -827,21 +820,22 @@ enum emulation_result kvm_mips_emul_tlbwi(struct kvm_vcpu *vcpu) } tlb = &vcpu->arch.guest_tlb[index]; -#if 1 - /* Probe the shadow host TLB for the entry being overwritten, if one matches, invalidate it */ + /* + * Probe the shadow host TLB for the entry being overwritten, if one + * matches, invalidate it + */ kvm_mips_host_tlb_inv(vcpu, tlb->tlb_hi); -#endif tlb->tlb_mask = kvm_read_c0_guest_pagemask(cop0); tlb->tlb_hi = kvm_read_c0_guest_entryhi(cop0); tlb->tlb_lo0 = kvm_read_c0_guest_entrylo0(cop0); tlb->tlb_lo1 = kvm_read_c0_guest_entrylo1(cop0); - kvm_debug - ("[%#x] COP0_TLBWI [%d] (entryhi: %#lx, entrylo0: %#lx entrylo1: %#lx, mask: %#lx)\n", - pc, index, kvm_read_c0_guest_entryhi(cop0), - kvm_read_c0_guest_entrylo0(cop0), kvm_read_c0_guest_entrylo1(cop0), - kvm_read_c0_guest_pagemask(cop0)); + kvm_debug("[%#x] COP0_TLBWI [%d] (entryhi: %#lx, entrylo0: %#lx entrylo1: %#lx, mask: %#lx)\n", + pc, index, kvm_read_c0_guest_entryhi(cop0), + kvm_read_c0_guest_entrylo0(cop0), + kvm_read_c0_guest_entrylo1(cop0), + kvm_read_c0_guest_pagemask(cop0)); return er; } @@ -855,12 +849,8 @@ enum emulation_result kvm_mips_emul_tlbwr(struct kvm_vcpu *vcpu) uint32_t pc = vcpu->arch.pc; int index; -#if 1 get_random_bytes(&index, sizeof(index)); index &= (KVM_MIPS_GUEST_TLB_SIZE - 1); -#else - index = jiffies % KVM_MIPS_GUEST_TLB_SIZE; -#endif if (index < 0 || index >= KVM_MIPS_GUEST_TLB_SIZE) { printk("%s: illegal index: %d\n", __func__, index); @@ -869,21 +859,21 @@ enum emulation_result kvm_mips_emul_tlbwr(struct kvm_vcpu *vcpu) tlb = &vcpu->arch.guest_tlb[index]; -#if 1 - /* Probe the shadow host TLB for the entry being overwritten, if one matches, invalidate it */ + /* + * Probe the shadow host TLB for the entry being overwritten, if one + * matches, invalidate it + */ kvm_mips_host_tlb_inv(vcpu, tlb->tlb_hi); -#endif tlb->tlb_mask = kvm_read_c0_guest_pagemask(cop0); tlb->tlb_hi = kvm_read_c0_guest_entryhi(cop0); tlb->tlb_lo0 = kvm_read_c0_guest_entrylo0(cop0); tlb->tlb_lo1 = kvm_read_c0_guest_entrylo1(cop0); - kvm_debug - ("[%#x] COP0_TLBWR[%d] (entryhi: %#lx, entrylo0: %#lx entrylo1: %#lx)\n", - pc, index, kvm_read_c0_guest_entryhi(cop0), - kvm_read_c0_guest_entrylo0(cop0), - kvm_read_c0_guest_entrylo1(cop0)); + kvm_debug("[%#x] COP0_TLBWR[%d] (entryhi: %#lx, entrylo0: %#lx entrylo1: %#lx)\n", + pc, index, kvm_read_c0_guest_entryhi(cop0), + kvm_read_c0_guest_entrylo0(cop0), + kvm_read_c0_guest_entrylo1(cop0)); return er; } @@ -906,9 +896,9 @@ enum emulation_result kvm_mips_emul_tlbp(struct kvm_vcpu *vcpu) return er; } -enum emulation_result -kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, + uint32_t cause, struct kvm_run *run, + struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; enum emulation_result er = EMULATE_DONE; @@ -922,9 +912,8 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause, */ curr_pc = vcpu->arch.pc; er = update_pc(vcpu, cause); - if (er == EMULATE_FAIL) { + if (er == EMULATE_FAIL) return er; - } copz = (inst >> 21) & 0x1f; rt = (inst >> 16) & 0x1f; @@ -973,8 +962,7 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause, #ifdef CONFIG_KVM_MIPS_DYN_TRANS kvm_mips_trans_mfc0(inst, opc, vcpu); #endif - } - else { + } else { vcpu->arch.gprs[rt] = cop0->reg[rd][sel]; #ifdef CONFIG_KVM_MIPS_DYN_TRANS @@ -1014,17 +1002,15 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause, kvm_read_c0_guest_ebase(cop0)); } else if (rd == MIPS_CP0_TLB_HI && sel == 0) { uint32_t nasid = - vcpu->arch.gprs[rt] & ASID_MASK; - if ((KSEGX(vcpu->arch.gprs[rt]) != CKSEG0) - && + vcpu->arch.gprs[rt] & ASID_MASK; + if ((KSEGX(vcpu->arch.gprs[rt]) != CKSEG0) && ((kvm_read_c0_guest_entryhi(cop0) & ASID_MASK) != nasid)) { - - kvm_debug - ("MTCz, change ASID from %#lx to %#lx\n", - kvm_read_c0_guest_entryhi(cop0) & - ASID_MASK, - vcpu->arch.gprs[rt] & ASID_MASK); + kvm_debug("MTCz, change ASID from %#lx to %#lx\n", + kvm_read_c0_guest_entryhi(cop0) + & ASID_MASK, + vcpu->arch.gprs[rt] + & ASID_MASK); /* Blow away the shadow host TLBs */ kvm_mips_flush_host_tlb(1); @@ -1049,7 +1035,10 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause, } else if ((rd == MIPS_CP0_STATUS) && (sel == 0)) { kvm_write_c0_guest_status(cop0, vcpu->arch.gprs[rt]); - /* Make sure that CU1 and NMI bits are never set */ + /* + * Make sure that CU1 and NMI bits are + * never set + */ kvm_clear_c0_guest_status(cop0, (ST0_CU1 | ST0_NMI)); @@ -1058,6 +1047,7 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause, #endif } else if ((rd == MIPS_CP0_CAUSE) && (sel == 0)) { uint32_t old_cause, new_cause; + old_cause = kvm_read_c0_guest_cause(cop0); new_cause = vcpu->arch.gprs[rt]; /* Update R/W bits */ @@ -1115,7 +1105,10 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause, cop0->reg[MIPS_CP0_STATUS][2] & 0xf; uint32_t pss = (cop0->reg[MIPS_CP0_STATUS][2] >> 6) & 0xf; - /* We don't support any shadow register sets, so SRSCtl[PSS] == SRSCtl[CSS] = 0 */ + /* + * We don't support any shadow register sets, so + * SRSCtl[PSS] == SRSCtl[CSS] = 0 + */ if (css || pss) { er = EMULATE_FAIL; break; @@ -1135,12 +1128,9 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause, } done: - /* - * Rollback PC only if emulation was unsuccessful - */ - if (er == EMULATE_FAIL) { + /* Rollback PC only if emulation was unsuccessful */ + if (er == EMULATE_FAIL) vcpu->arch.pc = curr_pc; - } dont_update_pc: /* @@ -1152,9 +1142,9 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause, return er; } -enum emulation_result -kvm_mips_emulate_store(uint32_t inst, uint32_t cause, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_emulate_store(uint32_t inst, uint32_t cause, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { enum emulation_result er = EMULATE_DO_MMIO; int32_t op, base, rt, offset; @@ -1257,19 +1247,16 @@ kvm_mips_emulate_store(uint32_t inst, uint32_t cause, break; } - /* - * Rollback PC if emulation was unsuccessful - */ - if (er == EMULATE_FAIL) { + /* Rollback PC if emulation was unsuccessful */ + if (er == EMULATE_FAIL) vcpu->arch.pc = curr_pc; - } return er; } -enum emulation_result -kvm_mips_emulate_load(uint32_t inst, uint32_t cause, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_emulate_load(uint32_t inst, uint32_t cause, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { enum emulation_result er = EMULATE_DO_MMIO; int32_t op, base, rt, offset; @@ -1410,13 +1397,12 @@ int kvm_mips_sync_icache(unsigned long va, struct kvm_vcpu *vcpu) #define MIPS_CACHE_DCACHE 0x1 #define MIPS_CACHE_SEC 0x3 -enum emulation_result -kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc, uint32_t cause, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc, + uint32_t cause, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; - extern void (*r4k_blast_dcache) (void); - extern void (*r4k_blast_icache) (void); enum emulation_result er = EMULATE_DONE; int32_t offset, cache, op_inst, op, base; struct kvm_vcpu_arch *arch = &vcpu->arch; @@ -1443,14 +1429,15 @@ kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc, uint32_t cause, kvm_debug("CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n", cache, op, base, arch->gprs[base], offset); - /* Treat INDEX_INV as a nop, basically issued by Linux on startup to invalidate - * the caches entirely by stepping through all the ways/indexes + /* + * Treat INDEX_INV as a nop, basically issued by Linux on startup to + * invalidate the caches entirely by stepping through all the + * ways/indexes */ if (op == MIPS_CACHE_OP_INDEX_INV) { - kvm_debug - ("@ %#lx/%#lx CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n", - vcpu->arch.pc, vcpu->arch.gprs[31], cache, op, base, - arch->gprs[base], offset); + kvm_debug("@ %#lx/%#lx CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n", + vcpu->arch.pc, vcpu->arch.gprs[31], cache, op, base, + arch->gprs[base], offset); if (cache == MIPS_CACHE_DCACHE) r4k_blast_dcache(); @@ -1470,21 +1457,19 @@ kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc, uint32_t cause, preempt_disable(); if (KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG0) { - - if (kvm_mips_host_tlb_lookup(vcpu, va) < 0) { + if (kvm_mips_host_tlb_lookup(vcpu, va) < 0) kvm_mips_handle_kseg0_tlb_fault(va, vcpu); - } } else if ((KVM_GUEST_KSEGX(va) < KVM_GUEST_KSEG0) || KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG23) { int index; /* If an entry already exists then skip */ - if (kvm_mips_host_tlb_lookup(vcpu, va) >= 0) { + if (kvm_mips_host_tlb_lookup(vcpu, va) >= 0) goto skip_fault; - } - /* If address not in the guest TLB, then give the guest a fault, the - * resulting handler will do the right thing + /* + * If address not in the guest TLB, then give the guest a fault, + * the resulting handler will do the right thing */ index = kvm_mips_guest_tlb_lookup(vcpu, (va & VPN2_MASK) | (kvm_read_c0_guest_entryhi @@ -1499,14 +1484,20 @@ kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc, uint32_t cause, goto dont_update_pc; } else { struct kvm_mips_tlb *tlb = &vcpu->arch.guest_tlb[index]; - /* Check if the entry is valid, if not then setup a TLB invalid exception to the guest */ + /* + * Check if the entry is valid, if not then setup a TLB + * invalid exception to the guest + */ if (!TLB_IS_VALID(*tlb, va)) { er = kvm_mips_emulate_tlbinv_ld(cause, NULL, run, vcpu); preempt_enable(); goto dont_update_pc; } else { - /* We fault an entry from the guest tlb to the shadow host TLB */ + /* + * We fault an entry from the guest tlb to the + * shadow host TLB + */ kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, NULL, NULL); @@ -1530,7 +1521,10 @@ kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc, uint32_t cause, flush_dcache_line(va); #ifdef CONFIG_KVM_MIPS_DYN_TRANS - /* Replace the CACHE instruction, with a SYNCI, not the same, but avoids a trap */ + /* + * Replace the CACHE instruction, with a SYNCI, not the same, + * but avoids a trap + */ kvm_mips_trans_cache_va(inst, opc, vcpu); #endif } else if (op == MIPS_CACHE_OP_HIT_INV && cache == MIPS_CACHE_ICACHE) { @@ -1552,28 +1546,23 @@ kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc, uint32_t cause, preempt_enable(); - dont_update_pc: - /* - * Rollback PC - */ +dont_update_pc: + /* Rollback PC */ vcpu->arch.pc = curr_pc; - done: +done: return er; } -enum emulation_result -kvm_mips_emulate_inst(unsigned long cause, uint32_t *opc, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_emulate_inst(unsigned long cause, uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { enum emulation_result er = EMULATE_DONE; uint32_t inst; - /* - * Fetch the instruction. - */ - if (cause & CAUSEF_BD) { + /* Fetch the instruction. */ + if (cause & CAUSEF_BD) opc += 1; - } inst = kvm_get_inst(opc, vcpu); @@ -1611,9 +1600,10 @@ kvm_mips_emulate_inst(unsigned long cause, uint32_t *opc, return er; } -enum emulation_result -kvm_mips_emulate_syscall(unsigned long cause, uint32_t *opc, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_emulate_syscall(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; struct kvm_vcpu_arch *arch = &vcpu->arch; @@ -1645,9 +1635,10 @@ kvm_mips_emulate_syscall(unsigned long cause, uint32_t *opc, return er; } -enum emulation_result -kvm_mips_emulate_tlbmiss_ld(unsigned long cause, uint32_t *opc, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_emulate_tlbmiss_ld(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; struct kvm_vcpu_arch *arch = &vcpu->arch; @@ -1691,9 +1682,10 @@ kvm_mips_emulate_tlbmiss_ld(unsigned long cause, uint32_t *opc, return er; } -enum emulation_result -kvm_mips_emulate_tlbinv_ld(unsigned long cause, uint32_t *opc, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_emulate_tlbinv_ld(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; struct kvm_vcpu_arch *arch = &vcpu->arch; @@ -1737,9 +1729,10 @@ kvm_mips_emulate_tlbinv_ld(unsigned long cause, uint32_t *opc, return er; } -enum emulation_result -kvm_mips_emulate_tlbmiss_st(unsigned long cause, uint32_t *opc, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_emulate_tlbmiss_st(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; struct kvm_vcpu_arch *arch = &vcpu->arch; @@ -1781,9 +1774,10 @@ kvm_mips_emulate_tlbmiss_st(unsigned long cause, uint32_t *opc, return er; } -enum emulation_result -kvm_mips_emulate_tlbinv_st(unsigned long cause, uint32_t *opc, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_emulate_tlbinv_st(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; struct kvm_vcpu_arch *arch = &vcpu->arch; @@ -1826,9 +1820,9 @@ kvm_mips_emulate_tlbinv_st(unsigned long cause, uint32_t *opc, } /* TLBMOD: store into address matching TLB with Dirty bit off */ -enum emulation_result -kvm_mips_handle_tlbmod(unsigned long cause, uint32_t *opc, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_handle_tlbmod(unsigned long cause, uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { enum emulation_result er = EMULATE_DONE; #ifdef DEBUG @@ -1837,9 +1831,7 @@ kvm_mips_handle_tlbmod(unsigned long cause, uint32_t *opc, (kvm_read_c0_guest_entryhi(cop0) & ASID_MASK); int index; - /* - * If address not in the guest TLB, then we are in trouble - */ + /* If address not in the guest TLB, then we are in trouble */ index = kvm_mips_guest_tlb_lookup(vcpu, entryhi); if (index < 0) { /* XXXKYMA Invalidate and retry */ @@ -1856,9 +1848,10 @@ kvm_mips_handle_tlbmod(unsigned long cause, uint32_t *opc, return er; } -enum emulation_result -kvm_mips_emulate_tlbmod(unsigned long cause, uint32_t *opc, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_emulate_tlbmod(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; unsigned long entryhi = (vcpu->arch.host_cp0_badvaddr & VPN2_MASK) | @@ -1898,9 +1891,10 @@ kvm_mips_emulate_tlbmod(unsigned long cause, uint32_t *opc, return er; } -enum emulation_result -kvm_mips_emulate_fpu_exc(unsigned long cause, uint32_t *opc, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_emulate_fpu_exc(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; struct kvm_vcpu_arch *arch = &vcpu->arch; @@ -1927,9 +1921,10 @@ kvm_mips_emulate_fpu_exc(unsigned long cause, uint32_t *opc, return er; } -enum emulation_result -kvm_mips_emulate_ri_exc(unsigned long cause, uint32_t *opc, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_emulate_ri_exc(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; struct kvm_vcpu_arch *arch = &vcpu->arch; @@ -1961,9 +1956,10 @@ kvm_mips_emulate_ri_exc(unsigned long cause, uint32_t *opc, return er; } -enum emulation_result -kvm_mips_emulate_bp_exc(unsigned long cause, uint32_t *opc, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_emulate_bp_exc(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; struct kvm_vcpu_arch *arch = &vcpu->arch; @@ -1995,9 +1991,7 @@ kvm_mips_emulate_bp_exc(unsigned long cause, uint32_t *opc, return er; } -/* - * ll/sc, rdhwr, sync emulation - */ +/* ll/sc, rdhwr, sync emulation */ #define OPCODE 0xfc000000 #define BASE 0x03e00000 @@ -2012,9 +2006,9 @@ kvm_mips_emulate_bp_exc(unsigned long cause, uint32_t *opc, #define SYNC 0x0000000f #define RDHWR 0x0000003b -enum emulation_result -kvm_mips_handle_ri(unsigned long cause, uint32_t *opc, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_handle_ri(unsigned long cause, uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; struct kvm_vcpu_arch *arch = &vcpu->arch; @@ -2031,9 +2025,7 @@ kvm_mips_handle_ri(unsigned long cause, uint32_t *opc, if (er == EMULATE_FAIL) return er; - /* - * Fetch the instruction. - */ + /* Fetch the instruction. */ if (cause & CAUSEF_BD) opc += 1; @@ -2099,8 +2091,8 @@ kvm_mips_handle_ri(unsigned long cause, uint32_t *opc, return kvm_mips_emulate_ri_exc(cause, opc, run, vcpu); } -enum emulation_result -kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run) +enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu, + struct kvm_run *run) { unsigned long *gpr = &vcpu->arch.gprs[vcpu->arch.io_gpr]; enum emulation_result er = EMULATE_DONE; @@ -2142,18 +2134,18 @@ kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run) } if (vcpu->arch.pending_load_cause & CAUSEF_BD) - kvm_debug - ("[%#lx] Completing %d byte BD Load to gpr %d (0x%08lx) type %d\n", - vcpu->arch.pc, run->mmio.len, vcpu->arch.io_gpr, *gpr, - vcpu->mmio_needed); + kvm_debug("[%#lx] Completing %d byte BD Load to gpr %d (0x%08lx) type %d\n", + vcpu->arch.pc, run->mmio.len, vcpu->arch.io_gpr, *gpr, + vcpu->mmio_needed); done: return er; } -static enum emulation_result -kvm_mips_emulate_exc(unsigned long cause, uint32_t *opc, - struct kvm_run *run, struct kvm_vcpu *vcpu) +static enum emulation_result kvm_mips_emulate_exc(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { uint32_t exccode = (cause >> CAUSEB_EXCCODE) & 0x1f; struct mips_coproc *cop0 = vcpu->arch.cop0; @@ -2188,9 +2180,10 @@ kvm_mips_emulate_exc(unsigned long cause, uint32_t *opc, return er; } -enum emulation_result -kvm_mips_check_privilege(unsigned long cause, uint32_t *opc, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_check_privilege(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { enum emulation_result er = EMULATE_DONE; uint32_t exccode = (cause >> CAUSEB_EXCCODE) & 0x1f; @@ -2215,7 +2208,10 @@ kvm_mips_check_privilege(unsigned long cause, uint32_t *opc, break; case T_TLB_LD_MISS: - /* We we are accessing Guest kernel space, then send an address error exception to the guest */ + /* + * We we are accessing Guest kernel space, then send an + * address error exception to the guest + */ if (badvaddr >= (unsigned long) KVM_GUEST_KSEG0) { printk("%s: LD MISS @ %#lx\n", __func__, badvaddr); @@ -2226,7 +2222,10 @@ kvm_mips_check_privilege(unsigned long cause, uint32_t *opc, break; case T_TLB_ST_MISS: - /* We we are accessing Guest kernel space, then send an address error exception to the guest */ + /* + * We we are accessing Guest kernel space, then send an + * address error exception to the guest + */ if (badvaddr >= (unsigned long) KVM_GUEST_KSEG0) { printk("%s: ST MISS @ %#lx\n", __func__, badvaddr); @@ -2260,21 +2259,23 @@ kvm_mips_check_privilege(unsigned long cause, uint32_t *opc, } } - if (er == EMULATE_PRIV_FAIL) { + if (er == EMULATE_PRIV_FAIL) kvm_mips_emulate_exc(cause, opc, run, vcpu); - } + return er; } -/* User Address (UA) fault, this could happen if +/* + * User Address (UA) fault, this could happen if * (1) TLB entry not present/valid in both Guest and shadow host TLBs, in this * case we pass on the fault to the guest kernel and let it handle it. * (2) TLB entry is present in the Guest TLB but not in the shadow, in this * case we inject the TLB from the Guest TLB into the shadow host TLB */ -enum emulation_result -kvm_mips_handle_tlbmiss(unsigned long cause, uint32_t *opc, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_handle_tlbmiss(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { enum emulation_result er = EMULATE_DONE; uint32_t exccode = (cause >> CAUSEB_EXCCODE) & 0x1f; @@ -2284,10 +2285,11 @@ kvm_mips_handle_tlbmiss(unsigned long cause, uint32_t *opc, kvm_debug("kvm_mips_handle_tlbmiss: badvaddr: %#lx, entryhi: %#lx\n", vcpu->arch.host_cp0_badvaddr, vcpu->arch.host_cp0_entryhi); - /* KVM would not have got the exception if this entry was valid in the shadow host TLB - * Check the Guest TLB, if the entry is not there then send the guest an - * exception. The guest exc handler should then inject an entry into the - * guest TLB + /* + * KVM would not have got the exception if this entry was valid in the + * shadow host TLB. Check the Guest TLB, if the entry is not there then + * send the guest an exception. The guest exc handler should then inject + * an entry into the guest TLB. */ index = kvm_mips_guest_tlb_lookup(vcpu, (va & VPN2_MASK) | @@ -2305,7 +2307,10 @@ kvm_mips_handle_tlbmiss(unsigned long cause, uint32_t *opc, } else { struct kvm_mips_tlb *tlb = &vcpu->arch.guest_tlb[index]; - /* Check if the entry is valid, if not then setup a TLB invalid exception to the guest */ + /* + * Check if the entry is valid, if not then setup a TLB invalid + * exception to the guest + */ if (!TLB_IS_VALID(*tlb, va)) { if (exccode == T_TLB_LD_MISS) { er = kvm_mips_emulate_tlbinv_ld(cause, opc, run, @@ -2319,10 +2324,12 @@ kvm_mips_handle_tlbmiss(unsigned long cause, uint32_t *opc, er = EMULATE_FAIL; } } else { - kvm_debug - ("Injecting hi: %#lx, lo0: %#lx, lo1: %#lx into shadow host TLB\n", - tlb->tlb_hi, tlb->tlb_lo0, tlb->tlb_lo1); - /* OK we have a Guest TLB entry, now inject it into the shadow host TLB */ + kvm_debug("Injecting hi: %#lx, lo0: %#lx, lo1: %#lx into shadow host TLB\n", + tlb->tlb_hi, tlb->tlb_lo0, tlb->tlb_lo1); + /* + * OK we have a Guest TLB entry, now inject it into the + * shadow host TLB + */ kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, NULL, NULL); } diff --git a/arch/mips/kvm/kvm_mips_int.c b/arch/mips/kvm/kvm_mips_int.c index 1e5de16afe297..d458c042d5589 100644 --- a/arch/mips/kvm/kvm_mips_int.c +++ b/arch/mips/kvm/kvm_mips_int.c @@ -1,13 +1,13 @@ /* -* This file is subject to the terms and conditions of the GNU General Public -* License. See the file "COPYING" in the main directory of this archive -* for more details. -* -* KVM/MIPS: Interrupt delivery -* -* Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. -* Authors: Sanjay Lal -*/ + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS: Interrupt delivery + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal + */ #include #include @@ -34,7 +34,8 @@ void kvm_mips_dequeue_irq(struct kvm_vcpu *vcpu, uint32_t priority) void kvm_mips_queue_timer_int_cb(struct kvm_vcpu *vcpu) { - /* Cause bits to reflect the pending timer interrupt, + /* + * Cause bits to reflect the pending timer interrupt, * the EXC code will be set when we are actually * delivering the interrupt: */ @@ -51,12 +52,13 @@ void kvm_mips_dequeue_timer_int_cb(struct kvm_vcpu *vcpu) kvm_mips_dequeue_irq(vcpu, MIPS_EXC_INT_TIMER); } -void -kvm_mips_queue_io_int_cb(struct kvm_vcpu *vcpu, struct kvm_mips_interrupt *irq) +void kvm_mips_queue_io_int_cb(struct kvm_vcpu *vcpu, + struct kvm_mips_interrupt *irq) { int intr = (int)irq->irq; - /* Cause bits to reflect the pending IO interrupt, + /* + * Cause bits to reflect the pending IO interrupt, * the EXC code will be set when we are actually * delivering the interrupt: */ @@ -83,11 +85,11 @@ kvm_mips_queue_io_int_cb(struct kvm_vcpu *vcpu, struct kvm_mips_interrupt *irq) } -void -kvm_mips_dequeue_io_int_cb(struct kvm_vcpu *vcpu, - struct kvm_mips_interrupt *irq) +void kvm_mips_dequeue_io_int_cb(struct kvm_vcpu *vcpu, + struct kvm_mips_interrupt *irq) { int intr = (int)irq->irq; + switch (intr) { case -2: kvm_clear_c0_guest_cause(vcpu->arch.cop0, (C_IRQ0)); @@ -111,9 +113,8 @@ kvm_mips_dequeue_io_int_cb(struct kvm_vcpu *vcpu, } /* Deliver the interrupt of the corresponding priority, if possible. */ -int -kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority, - uint32_t cause) +int kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority, + uint32_t cause) { int allowed = 0; uint32_t exccode; @@ -164,7 +165,6 @@ kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority, /* Are we allowed to deliver the interrupt ??? */ if (allowed) { - if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) { /* save old pc */ kvm_write_c0_guest_epc(cop0, arch->pc); @@ -195,9 +195,8 @@ kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority, return allowed; } -int -kvm_mips_irq_clear_cb(struct kvm_vcpu *vcpu, unsigned int priority, - uint32_t cause) +int kvm_mips_irq_clear_cb(struct kvm_vcpu *vcpu, unsigned int priority, + uint32_t cause) { return 1; } diff --git a/arch/mips/kvm/kvm_mips_int.h b/arch/mips/kvm/kvm_mips_int.h index 20da7d29eeded..4ab4bdfad703e 100644 --- a/arch/mips/kvm/kvm_mips_int.h +++ b/arch/mips/kvm/kvm_mips_int.h @@ -1,14 +1,15 @@ /* -* This file is subject to the terms and conditions of the GNU General Public -* License. See the file "COPYING" in the main directory of this archive -* for more details. -* -* KVM/MIPS: Interrupts -* Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. -* Authors: Sanjay Lal -*/ + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS: Interrupts + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal + */ -/* MIPS Exception Priorities, exceptions (including interrupts) are queued up +/* + * MIPS Exception Priorities, exceptions (including interrupts) are queued up * for the guest in the order specified by their priorities */ @@ -27,6 +28,9 @@ #define MIPS_EXC_MAX 12 /* XXXSL More to follow */ +extern char mips32_exception[], mips32_exceptionEnd[]; +extern char mips32_GuestException[], mips32_GuestExceptionEnd[]; + #define C_TI (_ULCAST_(1) << 30) #define KVM_MIPS_IRQ_DELIVER_ALL_AT_ONCE (0) diff --git a/arch/mips/kvm/kvm_mips_opcode.h b/arch/mips/kvm/kvm_mips_opcode.h index 86d3b4cc348b5..03a6ae84c7df4 100644 --- a/arch/mips/kvm/kvm_mips_opcode.h +++ b/arch/mips/kvm/kvm_mips_opcode.h @@ -1,24 +1,22 @@ /* -* This file is subject to the terms and conditions of the GNU General Public -* License. See the file "COPYING" in the main directory of this archive -* for more details. -* -* Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. -* Authors: Sanjay Lal -*/ - -/* - * Define opcode values not defined in + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal */ +/* Define opcode values not defined in */ + #ifndef __KVM_MIPS_OPCODE_H__ #define __KVM_MIPS_OPCODE_H__ /* COP0 Ops */ -#define mfmcz_op 0x0b /* 01011 */ -#define wrpgpr_op 0x0e /* 01110 */ +#define mfmcz_op 0x0b /* 01011 */ +#define wrpgpr_op 0x0e /* 01110 */ -/* COP0 opcodes (only if COP0 and CO=1): */ -#define wait_op 0x20 /* 100000 */ +/* COP0 opcodes (only if COP0 and CO=1): */ +#define wait_op 0x20 /* 100000 */ #endif /* __KVM_MIPS_OPCODE_H__ */ diff --git a/arch/mips/kvm/kvm_mips_stats.c b/arch/mips/kvm/kvm_mips_stats.c index 075904bcac1ba..6efef38a324da 100644 --- a/arch/mips/kvm/kvm_mips_stats.c +++ b/arch/mips/kvm/kvm_mips_stats.c @@ -1,13 +1,13 @@ /* -* This file is subject to the terms and conditions of the GNU General Public -* License. See the file "COPYING" in the main directory of this archive -* for more details. -* -* KVM/MIPS: COP0 access histogram -* -* Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. -* Authors: Sanjay Lal -*/ + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS: COP0 access histogram + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal + */ #include diff --git a/arch/mips/kvm/kvm_tlb.c b/arch/mips/kvm/kvm_tlb.c index 8a5a700ad8dee..bb7418bd95b97 100644 --- a/arch/mips/kvm/kvm_tlb.c +++ b/arch/mips/kvm/kvm_tlb.c @@ -1,14 +1,14 @@ /* -* This file is subject to the terms and conditions of the GNU General Public -* License. See the file "COPYING" in the main directory of this archive -* for more details. -* -* KVM/MIPS TLB handling, this file is part of the Linux host kernel so that -* TLB handlers run from KSEG0 -* -* Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. -* Authors: Sanjay Lal -*/ + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS TLB handling, this file is part of the Linux host kernel so that + * TLB handlers run from KSEG0 + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal + */ #include #include @@ -18,7 +18,6 @@ #include #include - #include #include #include @@ -39,13 +38,13 @@ atomic_t kvm_mips_instance; EXPORT_SYMBOL(kvm_mips_instance); /* These function pointers are initialized once the KVM module is loaded */ -pfn_t(*kvm_mips_gfn_to_pfn) (struct kvm *kvm, gfn_t gfn); +pfn_t (*kvm_mips_gfn_to_pfn)(struct kvm *kvm, gfn_t gfn); EXPORT_SYMBOL(kvm_mips_gfn_to_pfn); -void (*kvm_mips_release_pfn_clean) (pfn_t pfn); +void (*kvm_mips_release_pfn_clean)(pfn_t pfn); EXPORT_SYMBOL(kvm_mips_release_pfn_clean); -bool(*kvm_mips_is_error_pfn) (pfn_t pfn); +bool (*kvm_mips_is_error_pfn)(pfn_t pfn); EXPORT_SYMBOL(kvm_mips_is_error_pfn); uint32_t kvm_mips_get_kernel_asid(struct kvm_vcpu *vcpu) @@ -53,21 +52,17 @@ uint32_t kvm_mips_get_kernel_asid(struct kvm_vcpu *vcpu) return vcpu->arch.guest_kernel_asid[smp_processor_id()] & ASID_MASK; } - uint32_t kvm_mips_get_user_asid(struct kvm_vcpu *vcpu) { return vcpu->arch.guest_user_asid[smp_processor_id()] & ASID_MASK; } -inline uint32_t kvm_mips_get_commpage_asid (struct kvm_vcpu *vcpu) +inline uint32_t kvm_mips_get_commpage_asid(struct kvm_vcpu *vcpu) { return vcpu->kvm->arch.commpage_tlb; } - -/* - * Structure defining an tlb entry data set. - */ +/* Structure defining an tlb entry data set. */ void kvm_mips_dump_host_tlbs(void) { @@ -116,6 +111,7 @@ void kvm_mips_dump_host_tlbs(void) mtc0_tlbw_hazard(); local_irq_restore(flags); } +EXPORT_SYMBOL(kvm_mips_dump_host_tlbs); void kvm_mips_dump_guest_tlbs(struct kvm_vcpu *vcpu) { @@ -143,6 +139,7 @@ void kvm_mips_dump_guest_tlbs(struct kvm_vcpu *vcpu) (tlb.tlb_lo1 >> 3) & 7, tlb.tlb_mask); } } +EXPORT_SYMBOL(kvm_mips_dump_guest_tlbs); static int kvm_mips_map_page(struct kvm *kvm, gfn_t gfn) { @@ -152,7 +149,7 @@ static int kvm_mips_map_page(struct kvm *kvm, gfn_t gfn) if (kvm->arch.guest_pmap[gfn] != KVM_INVALID_PAGE) return 0; - srcu_idx = srcu_read_lock(&kvm->srcu); + srcu_idx = srcu_read_lock(&kvm->srcu); pfn = kvm_mips_gfn_to_pfn(kvm, gfn); if (kvm_mips_is_error_pfn(pfn)) { @@ -169,7 +166,7 @@ static int kvm_mips_map_page(struct kvm *kvm, gfn_t gfn) /* Translate guest KSEG0 addresses to Host PA */ unsigned long kvm_mips_translate_guest_kseg0_to_hpa(struct kvm_vcpu *vcpu, - unsigned long gva) + unsigned long gva) { gfn_t gfn; uint32_t offset = gva & ~PAGE_MASK; @@ -194,12 +191,13 @@ unsigned long kvm_mips_translate_guest_kseg0_to_hpa(struct kvm_vcpu *vcpu, return (kvm->arch.guest_pmap[gfn] << PAGE_SHIFT) + offset; } +EXPORT_SYMBOL(kvm_mips_translate_guest_kseg0_to_hpa); /* XXXKYMA: Must be called with interrupts disabled */ /* set flush_dcache_mask == 0 if no dcache flush required */ -int -kvm_mips_host_tlb_write(struct kvm_vcpu *vcpu, unsigned long entryhi, - unsigned long entrylo0, unsigned long entrylo1, int flush_dcache_mask) +int kvm_mips_host_tlb_write(struct kvm_vcpu *vcpu, unsigned long entryhi, + unsigned long entrylo0, unsigned long entrylo1, + int flush_dcache_mask) { unsigned long flags; unsigned long old_entryhi; @@ -207,7 +205,6 @@ kvm_mips_host_tlb_write(struct kvm_vcpu *vcpu, unsigned long entryhi, local_irq_save(flags); - old_entryhi = read_c0_entryhi(); write_c0_entryhi(entryhi); mtc0_tlbw_hazard(); @@ -240,12 +237,14 @@ kvm_mips_host_tlb_write(struct kvm_vcpu *vcpu, unsigned long entryhi, if (flush_dcache_mask) { if (entrylo0 & MIPS3_PG_V) { ++vcpu->stat.flush_dcache_exits; - flush_data_cache_page((entryhi & VPN2_MASK) & ~flush_dcache_mask); + flush_data_cache_page((entryhi & VPN2_MASK) & + ~flush_dcache_mask); } if (entrylo1 & MIPS3_PG_V) { ++vcpu->stat.flush_dcache_exits; - flush_data_cache_page(((entryhi & VPN2_MASK) & ~flush_dcache_mask) | - (0x1 << PAGE_SHIFT)); + flush_data_cache_page(((entryhi & VPN2_MASK) & + ~flush_dcache_mask) | + (0x1 << PAGE_SHIFT)); } } @@ -257,10 +256,9 @@ kvm_mips_host_tlb_write(struct kvm_vcpu *vcpu, unsigned long entryhi, return 0; } - /* XXXKYMA: Must be called with interrupts disabled */ int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr, - struct kvm_vcpu *vcpu) + struct kvm_vcpu *vcpu) { gfn_t gfn; pfn_t pfn0, pfn1; @@ -270,7 +268,6 @@ int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr, struct kvm *kvm = vcpu->kvm; const int flush_dcache_mask = 0; - if (KVM_GUEST_KSEGX(badvaddr) != KVM_GUEST_KSEG0) { kvm_err("%s: Invalid BadVaddr: %#lx\n", __func__, badvaddr); kvm_mips_dump_host_tlbs(); @@ -302,14 +299,15 @@ int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr, } entryhi = (vaddr | kvm_mips_get_kernel_asid(vcpu)); - entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | (0x3 << 3) | (1 << 2) | - (0x1 << 1); - entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | (0x3 << 3) | (1 << 2) | - (0x1 << 1); + entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | (0x3 << 3) | + (1 << 2) | (0x1 << 1); + entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | (0x3 << 3) | + (1 << 2) | (0x1 << 1); return kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1, flush_dcache_mask); } +EXPORT_SYMBOL(kvm_mips_handle_kseg0_tlb_fault); int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr, struct kvm_vcpu *vcpu) @@ -318,11 +316,10 @@ int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr, unsigned long flags, old_entryhi = 0, vaddr = 0; unsigned long entrylo0 = 0, entrylo1 = 0; - pfn0 = CPHYSADDR(vcpu->arch.kseg0_commpage) >> PAGE_SHIFT; pfn1 = 0; - entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | (0x3 << 3) | (1 << 2) | - (0x1 << 1); + entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | (0x3 << 3) | + (1 << 2) | (0x1 << 1); entrylo1 = 0; local_irq_save(flags); @@ -341,9 +338,9 @@ int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr, mtc0_tlbw_hazard(); tlbw_use_hazard(); - kvm_debug ("@ %#lx idx: %2d [entryhi(R): %#lx] entrylo0 (R): 0x%08lx, entrylo1(R): 0x%08lx\n", - vcpu->arch.pc, read_c0_index(), read_c0_entryhi(), - read_c0_entrylo0(), read_c0_entrylo1()); + kvm_debug("@ %#lx idx: %2d [entryhi(R): %#lx] entrylo0 (R): 0x%08lx, entrylo1(R): 0x%08lx\n", + vcpu->arch.pc, read_c0_index(), read_c0_entryhi(), + read_c0_entrylo0(), read_c0_entrylo1()); /* Restore old ASID */ write_c0_entryhi(old_entryhi); @@ -353,28 +350,33 @@ int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr, return 0; } +EXPORT_SYMBOL(kvm_mips_handle_commpage_tlb_fault); -int -kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu, - struct kvm_mips_tlb *tlb, unsigned long *hpa0, unsigned long *hpa1) +int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu, + struct kvm_mips_tlb *tlb, + unsigned long *hpa0, + unsigned long *hpa1) { unsigned long entryhi = 0, entrylo0 = 0, entrylo1 = 0; struct kvm *kvm = vcpu->kvm; pfn_t pfn0, pfn1; - if ((tlb->tlb_hi & VPN2_MASK) == 0) { pfn0 = 0; pfn1 = 0; } else { - if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb->tlb_lo0) >> PAGE_SHIFT) < 0) + if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb->tlb_lo0) + >> PAGE_SHIFT) < 0) return -1; - if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb->tlb_lo1) >> PAGE_SHIFT) < 0) + if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb->tlb_lo1) + >> PAGE_SHIFT) < 0) return -1; - pfn0 = kvm->arch.guest_pmap[mips3_tlbpfn_to_paddr(tlb->tlb_lo0) >> PAGE_SHIFT]; - pfn1 = kvm->arch.guest_pmap[mips3_tlbpfn_to_paddr(tlb->tlb_lo1) >> PAGE_SHIFT]; + pfn0 = kvm->arch.guest_pmap[mips3_tlbpfn_to_paddr(tlb->tlb_lo0) + >> PAGE_SHIFT]; + pfn1 = kvm->arch.guest_pmap[mips3_tlbpfn_to_paddr(tlb->tlb_lo1) + >> PAGE_SHIFT]; } if (hpa0) @@ -385,11 +387,12 @@ kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu, /* Get attributes from the Guest TLB */ entryhi = (tlb->tlb_hi & VPN2_MASK) | (KVM_GUEST_KERNEL_MODE(vcpu) ? - kvm_mips_get_kernel_asid(vcpu) : kvm_mips_get_user_asid(vcpu)); + kvm_mips_get_kernel_asid(vcpu) : + kvm_mips_get_user_asid(vcpu)); entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | (0x3 << 3) | - (tlb->tlb_lo0 & MIPS3_PG_D) | (tlb->tlb_lo0 & MIPS3_PG_V); + (tlb->tlb_lo0 & MIPS3_PG_D) | (tlb->tlb_lo0 & MIPS3_PG_V); entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | (0x3 << 3) | - (tlb->tlb_lo1 & MIPS3_PG_D) | (tlb->tlb_lo1 & MIPS3_PG_V); + (tlb->tlb_lo1 & MIPS3_PG_D) | (tlb->tlb_lo1 & MIPS3_PG_V); kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc, tlb->tlb_lo0, tlb->tlb_lo1); @@ -397,6 +400,7 @@ kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu, return kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1, tlb->tlb_mask); } +EXPORT_SYMBOL(kvm_mips_handle_mapped_seg_tlb_fault); int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long entryhi) { @@ -404,10 +408,9 @@ int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long entryhi) int index = -1; struct kvm_mips_tlb *tlb = vcpu->arch.guest_tlb; - for (i = 0; i < KVM_MIPS_GUEST_TLB_SIZE; i++) { - if (((TLB_VPN2(tlb[i]) & ~tlb[i].tlb_mask) == ((entryhi & VPN2_MASK) & ~tlb[i].tlb_mask)) && - (TLB_IS_GLOBAL(tlb[i]) || (TLB_ASID(tlb[i]) == (entryhi & ASID_MASK)))) { + if (TLB_HI_VPN2_HIT(tlb[i], entryhi) && + TLB_HI_ASID_HIT(tlb[i], entryhi)) { index = i; break; } @@ -418,21 +421,23 @@ int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long entryhi) return index; } +EXPORT_SYMBOL(kvm_mips_guest_tlb_lookup); int kvm_mips_host_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long vaddr) { unsigned long old_entryhi, flags; volatile int idx; - local_irq_save(flags); old_entryhi = read_c0_entryhi(); if (KVM_GUEST_KERNEL_MODE(vcpu)) - write_c0_entryhi((vaddr & VPN2_MASK) | kvm_mips_get_kernel_asid(vcpu)); + write_c0_entryhi((vaddr & VPN2_MASK) | + kvm_mips_get_kernel_asid(vcpu)); else { - write_c0_entryhi((vaddr & VPN2_MASK) | kvm_mips_get_user_asid(vcpu)); + write_c0_entryhi((vaddr & VPN2_MASK) | + kvm_mips_get_user_asid(vcpu)); } mtc0_tlbw_hazard(); @@ -452,6 +457,7 @@ int kvm_mips_host_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long vaddr) return idx; } +EXPORT_SYMBOL(kvm_mips_host_tlb_lookup); int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va) { @@ -460,7 +466,6 @@ int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va) local_irq_save(flags); - old_entryhi = read_c0_entryhi(); write_c0_entryhi((va & VPN2_MASK) | kvm_mips_get_user_asid(vcpu)); @@ -499,8 +504,9 @@ int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va) return 0; } +EXPORT_SYMBOL(kvm_mips_host_tlb_inv); -/* XXXKYMA: Fix Guest USER/KERNEL no longer share the same ASID*/ +/* XXXKYMA: Fix Guest USER/KERNEL no longer share the same ASID */ int kvm_mips_host_tlb_inv_index(struct kvm_vcpu *vcpu, int index) { unsigned long flags, old_entryhi; @@ -510,7 +516,6 @@ int kvm_mips_host_tlb_inv_index(struct kvm_vcpu *vcpu, int index) local_irq_save(flags); - old_entryhi = read_c0_entryhi(); write_c0_entryhi(UNIQUE_ENTRYHI(index)); @@ -546,7 +551,6 @@ void kvm_mips_flush_host_tlb(int skip_kseg0) int entry = 0; int maxentry = current_cpu_data.tlbsize; - local_irq_save(flags); old_entryhi = read_c0_entryhi(); @@ -554,7 +558,6 @@ void kvm_mips_flush_host_tlb(int skip_kseg0) /* Blast 'em all away. */ for (entry = 0; entry < maxentry; entry++) { - write_c0_index(entry); mtc0_tlbw_hazard(); @@ -565,9 +568,8 @@ void kvm_mips_flush_host_tlb(int skip_kseg0) entryhi = read_c0_entryhi(); /* Don't blow away guest kernel entries */ - if (KVM_GUEST_KSEGX(entryhi) == KVM_GUEST_KSEG0) { + if (KVM_GUEST_KSEGX(entryhi) == KVM_GUEST_KSEG0) continue; - } } /* Make sure all entries differ. */ @@ -591,17 +593,17 @@ void kvm_mips_flush_host_tlb(int skip_kseg0) local_irq_restore(flags); } +EXPORT_SYMBOL(kvm_mips_flush_host_tlb); -void -kvm_get_new_mmu_context(struct mm_struct *mm, unsigned long cpu, - struct kvm_vcpu *vcpu) +void kvm_get_new_mmu_context(struct mm_struct *mm, unsigned long cpu, + struct kvm_vcpu *vcpu) { unsigned long asid = asid_cache(cpu); - if (!((asid += ASID_INC) & ASID_MASK)) { - if (cpu_has_vtag_icache) { + asid += ASID_INC; + if (!(asid & ASID_MASK)) { + if (cpu_has_vtag_icache) flush_icache_all(); - } kvm_local_flush_tlb_all(); /* start new asid cycle */ @@ -639,6 +641,7 @@ void kvm_local_flush_tlb_all(void) local_irq_restore(flags); } +EXPORT_SYMBOL(kvm_local_flush_tlb_all); /** * kvm_mips_migrate_count() - Migrate timer. @@ -699,7 +702,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) } if (!newasid) { - /* If we preempted while the guest was executing, then reload the pre-empted ASID */ + /* + * If we preempted while the guest was executing, then reload + * the pre-empted ASID + */ if (current->flags & PF_VCPU) { write_c0_entryhi(vcpu->arch. preempt_entryhi & ASID_MASK); @@ -708,9 +714,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) } else { /* New ASIDs were allocated for the VM */ - /* Were we in guest context? If so then the pre-empted ASID is no longer - * valid, we need to set it to what it should be based on the mode of - * the Guest (Kernel/User) + /* + * Were we in guest context? If so then the pre-empted ASID is + * no longer valid, we need to set it to what it should be based + * on the mode of the Guest (Kernel/User) */ if (current->flags & PF_VCPU) { if (KVM_GUEST_KERNEL_MODE(vcpu)) @@ -728,6 +735,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) local_irq_restore(flags); } +EXPORT_SYMBOL(kvm_arch_vcpu_load); /* ASID can change if another task is scheduled during preemption */ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -739,7 +747,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) cpu = smp_processor_id(); - vcpu->arch.preempt_entryhi = read_c0_entryhi(); vcpu->arch.last_sched_cpu = cpu; @@ -754,11 +761,12 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) local_irq_restore(flags); } +EXPORT_SYMBOL(kvm_arch_vcpu_put); uint32_t kvm_get_inst(uint32_t *opc, struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; - unsigned long paddr, flags; + unsigned long paddr, flags, vpn2, asid; uint32_t inst; int index; @@ -769,16 +777,12 @@ uint32_t kvm_get_inst(uint32_t *opc, struct kvm_vcpu *vcpu) if (index >= 0) { inst = *(opc); } else { - index = - kvm_mips_guest_tlb_lookup(vcpu, - ((unsigned long) opc & VPN2_MASK) - | - (kvm_read_c0_guest_entryhi - (cop0) & ASID_MASK)); + vpn2 = (unsigned long) opc & VPN2_MASK; + asid = kvm_read_c0_guest_entryhi(cop0) & ASID_MASK; + index = kvm_mips_guest_tlb_lookup(vcpu, vpn2 | asid); if (index < 0) { - kvm_err - ("%s: get_user_failed for %p, vcpu: %p, ASID: %#lx\n", - __func__, opc, vcpu, read_c0_entryhi()); + kvm_err("%s: get_user_failed for %p, vcpu: %p, ASID: %#lx\n", + __func__, opc, vcpu, read_c0_entryhi()); kvm_mips_dump_host_tlbs(); local_irq_restore(flags); return KVM_INVALID_INST; @@ -793,7 +797,7 @@ uint32_t kvm_get_inst(uint32_t *opc, struct kvm_vcpu *vcpu) } else if (KVM_GUEST_KSEGX(opc) == KVM_GUEST_KSEG0) { paddr = kvm_mips_translate_guest_kseg0_to_hpa(vcpu, - (unsigned long) opc); + (unsigned long) opc); inst = *(uint32_t *) CKSEG0ADDR(paddr); } else { kvm_err("%s: illegal address: %p\n", __func__, opc); @@ -802,18 +806,4 @@ uint32_t kvm_get_inst(uint32_t *opc, struct kvm_vcpu *vcpu) return inst; } - -EXPORT_SYMBOL(kvm_local_flush_tlb_all); -EXPORT_SYMBOL(kvm_mips_handle_mapped_seg_tlb_fault); -EXPORT_SYMBOL(kvm_mips_handle_commpage_tlb_fault); -EXPORT_SYMBOL(kvm_mips_dump_host_tlbs); -EXPORT_SYMBOL(kvm_mips_handle_kseg0_tlb_fault); -EXPORT_SYMBOL(kvm_mips_host_tlb_lookup); -EXPORT_SYMBOL(kvm_mips_flush_host_tlb); -EXPORT_SYMBOL(kvm_mips_guest_tlb_lookup); -EXPORT_SYMBOL(kvm_mips_host_tlb_inv); -EXPORT_SYMBOL(kvm_mips_translate_guest_kseg0_to_hpa); -EXPORT_SYMBOL(kvm_mips_dump_guest_tlbs); EXPORT_SYMBOL(kvm_get_inst); -EXPORT_SYMBOL(kvm_arch_vcpu_load); -EXPORT_SYMBOL(kvm_arch_vcpu_put); diff --git a/arch/mips/kvm/kvm_trap_emul.c b/arch/mips/kvm/kvm_trap_emul.c index 693f952b2fbb5..106335b36861d 100644 --- a/arch/mips/kvm/kvm_trap_emul.c +++ b/arch/mips/kvm/kvm_trap_emul.c @@ -1,13 +1,13 @@ /* -* This file is subject to the terms and conditions of the GNU General Public -* License. See the file "COPYING" in the main directory of this archive -* for more details. -* -* KVM/MIPS: Deliver/Emulate exceptions to the guest kernel -* -* Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. -* Authors: Sanjay Lal -*/ + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS: Deliver/Emulate exceptions to the guest kernel + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal + */ #include #include @@ -37,7 +37,6 @@ static gpa_t kvm_trap_emul_gva_to_gpa_cb(gva_t gva) return gpa; } - static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; @@ -46,9 +45,9 @@ static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu) enum emulation_result er = EMULATE_DONE; int ret = RESUME_GUEST; - if (((cause & CAUSEF_CE) >> CAUSEB_CE) == 1) { + if (((cause & CAUSEF_CE) >> CAUSEB_CE) == 1) er = kvm_mips_emulate_fpu_exc(cause, opc, run, vcpu); - } else + else er = kvm_mips_emulate_inst(cause, opc, run, vcpu); switch (er) { @@ -83,9 +82,8 @@ static int kvm_trap_emul_handle_tlb_mod(struct kvm_vcpu *vcpu) if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0 || KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) { - kvm_debug - ("USER/KSEG23 ADDR TLB MOD fault: cause %#lx, PC: %p, BadVaddr: %#lx\n", - cause, opc, badvaddr); + kvm_debug("USER/KSEG23 ADDR TLB MOD fault: cause %#lx, PC: %p, BadVaddr: %#lx\n", + cause, opc, badvaddr); er = kvm_mips_handle_tlbmod(cause, opc, run, vcpu); if (er == EMULATE_DONE) @@ -95,8 +93,10 @@ static int kvm_trap_emul_handle_tlb_mod(struct kvm_vcpu *vcpu) ret = RESUME_HOST; } } else if (KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG0) { - /* XXXKYMA: The guest kernel does not expect to get this fault when we are not - * using HIGHMEM. Need to address this in a HIGHMEM kernel + /* + * XXXKYMA: The guest kernel does not expect to get this fault + * when we are not using HIGHMEM. Need to address this in a + * HIGHMEM kernel */ printk ("TLB MOD fault not handled, cause %#lx, PC: %p, BadVaddr: %#lx\n", @@ -134,9 +134,8 @@ static int kvm_trap_emul_handle_tlb_st_miss(struct kvm_vcpu *vcpu) } } else if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0 || KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) { - kvm_debug - ("USER ADDR TLB LD fault: cause %#lx, PC: %p, BadVaddr: %#lx\n", - cause, opc, badvaddr); + kvm_debug("USER ADDR TLB LD fault: cause %#lx, PC: %p, BadVaddr: %#lx\n", + cause, opc, badvaddr); er = kvm_mips_handle_tlbmiss(cause, opc, run, vcpu); if (er == EMULATE_DONE) ret = RESUME_GUEST; @@ -145,8 +144,9 @@ static int kvm_trap_emul_handle_tlb_st_miss(struct kvm_vcpu *vcpu) ret = RESUME_HOST; } } else if (KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG0) { - /* All KSEG0 faults are handled by KVM, as the guest kernel does not - * expect to ever get them + /* + * All KSEG0 faults are handled by KVM, as the guest kernel does + * not expect to ever get them */ if (kvm_mips_handle_kseg0_tlb_fault (vcpu->arch.host_cp0_badvaddr, vcpu) < 0) { @@ -154,9 +154,8 @@ static int kvm_trap_emul_handle_tlb_st_miss(struct kvm_vcpu *vcpu) ret = RESUME_HOST; } } else { - kvm_err - ("Illegal TLB LD fault address , cause %#lx, PC: %p, BadVaddr: %#lx\n", - cause, opc, badvaddr); + kvm_err("Illegal TLB LD fault address , cause %#lx, PC: %p, BadVaddr: %#lx\n", + cause, opc, badvaddr); kvm_mips_dump_host_tlbs(); kvm_arch_vcpu_dump_regs(vcpu); run->exit_reason = KVM_EXIT_INTERNAL_ERROR; @@ -185,11 +184,14 @@ static int kvm_trap_emul_handle_tlb_ld_miss(struct kvm_vcpu *vcpu) kvm_debug("USER ADDR TLB ST fault: PC: %#lx, BadVaddr: %#lx\n", vcpu->arch.pc, badvaddr); - /* User Address (UA) fault, this could happen if - * (1) TLB entry not present/valid in both Guest and shadow host TLBs, in this - * case we pass on the fault to the guest kernel and let it handle it. - * (2) TLB entry is present in the Guest TLB but not in the shadow, in this - * case we inject the TLB from the Guest TLB into the shadow host TLB + /* + * User Address (UA) fault, this could happen if + * (1) TLB entry not present/valid in both Guest and shadow host + * TLBs, in this case we pass on the fault to the guest + * kernel and let it handle it. + * (2) TLB entry is present in the Guest TLB but not in the + * shadow, in this case we inject the TLB from the Guest TLB + * into the shadow host TLB */ er = kvm_mips_handle_tlbmiss(cause, opc, run, vcpu); @@ -349,9 +351,9 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu) uint32_t config1; int vcpu_id = vcpu->vcpu_id; - /* Arch specific stuff, set up config registers properly so that the - * guest will come up as expected, for now we simulate a - * MIPS 24kc + /* + * Arch specific stuff, set up config registers properly so that the + * guest will come up as expected, for now we simulate a MIPS 24kc */ kvm_write_c0_guest_prid(cop0, 0x00019300); kvm_write_c0_guest_config(cop0, @@ -373,14 +375,15 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu) kvm_write_c0_guest_config2(cop0, MIPS_CONFIG2); /* MIPS_CONFIG2 | (read_c0_config2() & 0xfff) */ - kvm_write_c0_guest_config3(cop0, - MIPS_CONFIG3 | (0 << CP0C3_VInt) | (1 << - CP0C3_ULRI)); + kvm_write_c0_guest_config3(cop0, MIPS_CONFIG3 | (0 << CP0C3_VInt) | + (1 << CP0C3_ULRI)); /* Set Wait IE/IXMT Ignore in Config7, IAR, AR */ kvm_write_c0_guest_config7(cop0, (MIPS_CONF7_WII) | (1 << 10)); - /* Setup IntCtl defaults, compatibilty mode for timer interrupts (HW5) */ + /* + * Setup IntCtl defaults, compatibilty mode for timer interrupts (HW5) + */ kvm_write_c0_guest_intctl(cop0, 0xFC000000); /* Put in vcpu id as CPUNum into Ebase Reg to handle SMP Guests */ diff --git a/arch/mips/kvm/trace.h b/arch/mips/kvm/trace.h index bc9e0f406c088..c1388d40663b0 100644 --- a/arch/mips/kvm/trace.h +++ b/arch/mips/kvm/trace.h @@ -1,11 +1,11 @@ /* -* This file is subject to the terms and conditions of the GNU General Public -* License. See the file "COPYING" in the main directory of this archive -* for more details. -* -* Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. -* Authors: Sanjay Lal -*/ + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal + */ #if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_KVM_H @@ -17,9 +17,7 @@ #define TRACE_INCLUDE_PATH . #define TRACE_INCLUDE_FILE trace -/* - * Tracepoints for VM eists - */ +/* Tracepoints for VM eists */ extern char *kvm_mips_exit_types_str[MAX_KVM_MIPS_EXIT_TYPES]; TRACE_EVENT(kvm_exit, From 6ad78a5c75c5bcfdac4551f7d09b777b3dc3c19c Mon Sep 17 00:00:00 2001 From: Deng-Cheng Zhu Date: Thu, 26 Jun 2014 12:11:35 -0700 Subject: [PATCH 030/104] MIPS: KVM: Use KVM internal logger Replace printks with kvm_[err|info|debug]. Signed-off-by: Deng-Cheng Zhu Reviewed-by: James Hogan Signed-off-by: Paolo Bonzini --- arch/mips/kvm/kvm_mips.c | 23 +++---- arch/mips/kvm/kvm_mips_emul.c | 107 ++++++++++++++++----------------- arch/mips/kvm/kvm_mips_stats.c | 6 +- arch/mips/kvm/kvm_tlb.c | 60 +++++++++--------- arch/mips/kvm/kvm_trap_emul.c | 31 ++++------ 5 files changed, 110 insertions(+), 117 deletions(-) diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c index 52be52adf0308..330b3af701a64 100644 --- a/arch/mips/kvm/kvm_mips.c +++ b/arch/mips/kvm/kvm_mips.c @@ -817,8 +817,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) ga = memslot->base_gfn << PAGE_SHIFT; ga_end = ga + (memslot->npages << PAGE_SHIFT); - printk("%s: dirty, ga: %#lx, ga_end %#lx\n", __func__, ga, - ga_end); + kvm_info("%s: dirty, ga: %#lx, ga_end %#lx\n", __func__, ga, + ga_end); n = kvm_dirty_bitmap_bytes(memslot); memset(memslot->dirty_bitmap, 0, n); @@ -925,24 +925,25 @@ int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu) if (!vcpu) return -1; - printk("VCPU Register Dump:\n"); - printk("\tpc = 0x%08lx\n", vcpu->arch.pc); - printk("\texceptions: %08lx\n", vcpu->arch.pending_exceptions); + kvm_debug("VCPU Register Dump:\n"); + kvm_debug("\tpc = 0x%08lx\n", vcpu->arch.pc); + kvm_debug("\texceptions: %08lx\n", vcpu->arch.pending_exceptions); for (i = 0; i < 32; i += 4) { - printk("\tgpr%02d: %08lx %08lx %08lx %08lx\n", i, + kvm_debug("\tgpr%02d: %08lx %08lx %08lx %08lx\n", i, vcpu->arch.gprs[i], vcpu->arch.gprs[i + 1], vcpu->arch.gprs[i + 2], vcpu->arch.gprs[i + 3]); } - printk("\thi: 0x%08lx\n", vcpu->arch.hi); - printk("\tlo: 0x%08lx\n", vcpu->arch.lo); + kvm_debug("\thi: 0x%08lx\n", vcpu->arch.hi); + kvm_debug("\tlo: 0x%08lx\n", vcpu->arch.lo); cop0 = vcpu->arch.cop0; - printk("\tStatus: 0x%08lx, Cause: 0x%08lx\n", - kvm_read_c0_guest_status(cop0), kvm_read_c0_guest_cause(cop0)); + kvm_debug("\tStatus: 0x%08lx, Cause: 0x%08lx\n", + kvm_read_c0_guest_status(cop0), + kvm_read_c0_guest_cause(cop0)); - printk("\tEPC: 0x%08lx\n", kvm_read_c0_guest_epc(cop0)); + kvm_debug("\tEPC: 0x%08lx\n", kvm_read_c0_guest_epc(cop0)); return 0; } diff --git a/arch/mips/kvm/kvm_mips_emul.c b/arch/mips/kvm/kvm_mips_emul.c index 9ec9f1d54b9bf..bdd1421b78fc9 100644 --- a/arch/mips/kvm/kvm_mips_emul.c +++ b/arch/mips/kvm/kvm_mips_emul.c @@ -183,18 +183,18 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu, /* And now the FPA/cp1 branch instructions. */ case cop1_op: - printk("%s: unsupported cop1_op\n", __func__); + kvm_err("%s: unsupported cop1_op\n", __func__); break; } return nextpc; unaligned: - printk("%s: unaligned epc\n", __func__); + kvm_err("%s: unaligned epc\n", __func__); return nextpc; sigill: - printk("%s: DSP branch but not DSP ASE\n", __func__); + kvm_err("%s: DSP branch but not DSP ASE\n", __func__); return nextpc; } @@ -751,8 +751,8 @@ enum emulation_result kvm_mips_emul_eret(struct kvm_vcpu *vcpu) kvm_clear_c0_guest_status(cop0, ST0_ERL); vcpu->arch.pc = kvm_read_c0_guest_errorepc(cop0); } else { - printk("[%#lx] ERET when MIPS_SR_EXL|MIPS_SR_ERL == 0\n", - vcpu->arch.pc); + kvm_err("[%#lx] ERET when MIPS_SR_EXL|MIPS_SR_ERL == 0\n", + vcpu->arch.pc); er = EMULATE_FAIL; } @@ -795,7 +795,7 @@ enum emulation_result kvm_mips_emul_tlbr(struct kvm_vcpu *vcpu) enum emulation_result er = EMULATE_FAIL; uint32_t pc = vcpu->arch.pc; - printk("[%#x] COP0_TLBR [%ld]\n", pc, kvm_read_c0_guest_index(cop0)); + kvm_err("[%#x] COP0_TLBR [%ld]\n", pc, kvm_read_c0_guest_index(cop0)); return er; } @@ -809,13 +809,12 @@ enum emulation_result kvm_mips_emul_tlbwi(struct kvm_vcpu *vcpu) uint32_t pc = vcpu->arch.pc; if (index < 0 || index >= KVM_MIPS_GUEST_TLB_SIZE) { - printk("%s: illegal index: %d\n", __func__, index); - printk - ("[%#x] COP0_TLBWI [%d] (entryhi: %#lx, entrylo0: %#lx entrylo1: %#lx, mask: %#lx)\n", - pc, index, kvm_read_c0_guest_entryhi(cop0), - kvm_read_c0_guest_entrylo0(cop0), - kvm_read_c0_guest_entrylo1(cop0), - kvm_read_c0_guest_pagemask(cop0)); + kvm_debug("%s: illegal index: %d\n", __func__, index); + kvm_debug("[%#x] COP0_TLBWI [%d] (entryhi: %#lx, entrylo0: %#lx entrylo1: %#lx, mask: %#lx)\n", + pc, index, kvm_read_c0_guest_entryhi(cop0), + kvm_read_c0_guest_entrylo0(cop0), + kvm_read_c0_guest_entrylo1(cop0), + kvm_read_c0_guest_pagemask(cop0)); index = (index & ~0x80000000) % KVM_MIPS_GUEST_TLB_SIZE; } @@ -853,7 +852,7 @@ enum emulation_result kvm_mips_emul_tlbwr(struct kvm_vcpu *vcpu) index &= (KVM_MIPS_GUEST_TLB_SIZE - 1); if (index < 0 || index >= KVM_MIPS_GUEST_TLB_SIZE) { - printk("%s: illegal index: %d\n", __func__, index); + kvm_err("%s: illegal index: %d\n", __func__, index); return EMULATE_FAIL; } @@ -938,7 +937,7 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, er = kvm_mips_emul_tlbp(vcpu); break; case rfe_op: - printk("!!!COP0_RFE!!!\n"); + kvm_err("!!!COP0_RFE!!!\n"); break; case eret_op: er = kvm_mips_emul_eret(vcpu); @@ -987,8 +986,8 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, if ((rd == MIPS_CP0_TLB_INDEX) && (vcpu->arch.gprs[rt] >= KVM_MIPS_GUEST_TLB_SIZE)) { - printk("Invalid TLB Index: %ld", - vcpu->arch.gprs[rt]); + kvm_err("Invalid TLB Index: %ld", + vcpu->arch.gprs[rt]); er = EMULATE_FAIL; break; } @@ -998,8 +997,8 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, kvm_change_c0_guest_ebase(cop0, ~(C0_EBASE_CORE_MASK), vcpu->arch.gprs[rt]); - printk("MTCz, cop0->reg[EBASE]: %#lx\n", - kvm_read_c0_guest_ebase(cop0)); + kvm_err("MTCz, cop0->reg[EBASE]: %#lx\n", + kvm_read_c0_guest_ebase(cop0)); } else if (rd == MIPS_CP0_TLB_HI && sel == 0) { uint32_t nasid = vcpu->arch.gprs[rt] & ASID_MASK; @@ -1072,9 +1071,8 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, break; case dmtc_op: - printk - ("!!!!!!![%#lx]dmtc_op: rt: %d, rd: %d, sel: %d!!!!!!\n", - vcpu->arch.pc, rt, rd, sel); + kvm_err("!!!!!!![%#lx]dmtc_op: rt: %d, rd: %d, sel: %d!!!!!!\n", + vcpu->arch.pc, rt, rd, sel); er = EMULATE_FAIL; break; @@ -1119,9 +1117,8 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, } break; default: - printk - ("[%#lx]MachEmulateCP0: unsupported COP0, copz: 0x%x\n", - vcpu->arch.pc, copz); + kvm_err("[%#lx]MachEmulateCP0: unsupported COP0, copz: 0x%x\n", + vcpu->arch.pc, copz); er = EMULATE_FAIL; break; } @@ -1242,7 +1239,7 @@ enum emulation_result kvm_mips_emulate_store(uint32_t inst, uint32_t cause, break; default: - printk("Store not yet supported"); + kvm_err("Store not yet supported"); er = EMULATE_FAIL; break; } @@ -1351,7 +1348,7 @@ enum emulation_result kvm_mips_emulate_load(uint32_t inst, uint32_t cause, break; default: - printk("Load not yet supported"); + kvm_err("Load not yet supported"); er = EMULATE_FAIL; break; } @@ -1370,7 +1367,7 @@ int kvm_mips_sync_icache(unsigned long va, struct kvm_vcpu *vcpu) gfn = va >> PAGE_SHIFT; if (gfn >= kvm->arch.guest_pmap_npages) { - printk("%s: Invalid gfn: %#llx\n", __func__, gfn); + kvm_err("%s: Invalid gfn: %#llx\n", __func__, gfn); kvm_mips_dump_host_tlbs(); kvm_arch_vcpu_dump_regs(vcpu); return -1; @@ -1378,7 +1375,8 @@ int kvm_mips_sync_icache(unsigned long va, struct kvm_vcpu *vcpu) pfn = kvm->arch.guest_pmap[gfn]; pa = (pfn << PAGE_SHIFT) | offset; - printk("%s: va: %#lx, unmapped: %#x\n", __func__, va, CKSEG0ADDR(pa)); + kvm_debug("%s: va: %#lx, unmapped: %#x\n", __func__, va, + CKSEG0ADDR(pa)); local_flush_icache_range(CKSEG0ADDR(pa), 32); return 0; @@ -1444,8 +1442,8 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc, else if (cache == MIPS_CACHE_ICACHE) r4k_blast_icache(); else { - printk("%s: unsupported CACHE INDEX operation\n", - __func__); + kvm_err("%s: unsupported CACHE INDEX operation\n", + __func__); return EMULATE_FAIL; } @@ -1504,9 +1502,8 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc, } } } else { - printk - ("INVALID CACHE INDEX/ADDRESS (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n", - cache, op, base, arch->gprs[base], offset); + kvm_err("INVALID CACHE INDEX/ADDRESS (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n", + cache, op, base, arch->gprs[base], offset); er = EMULATE_FAIL; preempt_enable(); goto dont_update_pc; @@ -1536,9 +1533,8 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc, kvm_mips_trans_cache_va(inst, opc, vcpu); #endif } else { - printk - ("NO-OP CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n", - cache, op, base, arch->gprs[base], offset); + kvm_err("NO-OP CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n", + cache, op, base, arch->gprs[base], offset); er = EMULATE_FAIL; preempt_enable(); goto dont_update_pc; @@ -1590,8 +1586,8 @@ enum emulation_result kvm_mips_emulate_inst(unsigned long cause, uint32_t *opc, break; default: - printk("Instruction emulation not supported (%p/%#x)\n", opc, - inst); + kvm_err("Instruction emulation not supported (%p/%#x)\n", opc, + inst); kvm_arch_vcpu_dump_regs(vcpu); er = EMULATE_FAIL; break; @@ -1628,7 +1624,7 @@ enum emulation_result kvm_mips_emulate_syscall(unsigned long cause, arch->pc = KVM_GUEST_KSEG0 + 0x180; } else { - printk("Trying to deliver SYSCALL when EXL is already set\n"); + kvm_err("Trying to deliver SYSCALL when EXL is already set\n"); er = EMULATE_FAIL; } @@ -1984,7 +1980,7 @@ enum emulation_result kvm_mips_emulate_bp_exc(unsigned long cause, arch->pc = KVM_GUEST_KSEG0 + 0x180; } else { - printk("Trying to deliver BP when EXL is already set\n"); + kvm_err("Trying to deliver BP when EXL is already set\n"); er = EMULATE_FAIL; } @@ -2032,7 +2028,7 @@ enum emulation_result kvm_mips_handle_ri(unsigned long cause, uint32_t *opc, inst = kvm_get_inst(opc, vcpu); if (inst == KVM_INVALID_INST) { - printk("%s: Cannot get inst @ %p\n", __func__, opc); + kvm_err("%s: Cannot get inst @ %p\n", __func__, opc); return EMULATE_FAIL; } @@ -2099,7 +2095,7 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu, unsigned long curr_pc; if (run->mmio.len > sizeof(*gpr)) { - printk("Bad MMIO length: %d", run->mmio.len); + kvm_err("Bad MMIO length: %d", run->mmio.len); er = EMULATE_FAIL; goto done; } @@ -2173,7 +2169,7 @@ static enum emulation_result kvm_mips_emulate_exc(unsigned long cause, exccode, kvm_read_c0_guest_epc(cop0), kvm_read_c0_guest_badvaddr(cop0)); } else { - printk("Trying to deliver EXC when EXL is already set\n"); + kvm_err("Trying to deliver EXC when EXL is already set\n"); er = EMULATE_FAIL; } @@ -2213,8 +2209,8 @@ enum emulation_result kvm_mips_check_privilege(unsigned long cause, * address error exception to the guest */ if (badvaddr >= (unsigned long) KVM_GUEST_KSEG0) { - printk("%s: LD MISS @ %#lx\n", __func__, - badvaddr); + kvm_debug("%s: LD MISS @ %#lx\n", __func__, + badvaddr); cause &= ~0xff; cause |= (T_ADDR_ERR_LD << CAUSEB_EXCCODE); er = EMULATE_PRIV_FAIL; @@ -2227,8 +2223,8 @@ enum emulation_result kvm_mips_check_privilege(unsigned long cause, * address error exception to the guest */ if (badvaddr >= (unsigned long) KVM_GUEST_KSEG0) { - printk("%s: ST MISS @ %#lx\n", __func__, - badvaddr); + kvm_debug("%s: ST MISS @ %#lx\n", __func__, + badvaddr); cause &= ~0xff; cause |= (T_ADDR_ERR_ST << CAUSEB_EXCCODE); er = EMULATE_PRIV_FAIL; @@ -2236,8 +2232,8 @@ enum emulation_result kvm_mips_check_privilege(unsigned long cause, break; case T_ADDR_ERR_ST: - printk("%s: address error ST @ %#lx\n", __func__, - badvaddr); + kvm_debug("%s: address error ST @ %#lx\n", __func__, + badvaddr); if ((badvaddr & PAGE_MASK) == KVM_GUEST_COMMPAGE_ADDR) { cause &= ~0xff; cause |= (T_TLB_ST_MISS << CAUSEB_EXCCODE); @@ -2245,8 +2241,8 @@ enum emulation_result kvm_mips_check_privilege(unsigned long cause, er = EMULATE_PRIV_FAIL; break; case T_ADDR_ERR_LD: - printk("%s: address error LD @ %#lx\n", __func__, - badvaddr); + kvm_debug("%s: address error LD @ %#lx\n", __func__, + badvaddr); if ((badvaddr & PAGE_MASK) == KVM_GUEST_COMMPAGE_ADDR) { cause &= ~0xff; cause |= (T_TLB_LD_MISS << CAUSEB_EXCCODE); @@ -2301,7 +2297,8 @@ enum emulation_result kvm_mips_handle_tlbmiss(unsigned long cause, } else if (exccode == T_TLB_ST_MISS) { er = kvm_mips_emulate_tlbmiss_st(cause, opc, run, vcpu); } else { - printk("%s: invalid exc code: %d\n", __func__, exccode); + kvm_err("%s: invalid exc code: %d\n", __func__, + exccode); er = EMULATE_FAIL; } } else { @@ -2319,8 +2316,8 @@ enum emulation_result kvm_mips_handle_tlbmiss(unsigned long cause, er = kvm_mips_emulate_tlbinv_st(cause, opc, run, vcpu); } else { - printk("%s: invalid exc code: %d\n", __func__, - exccode); + kvm_err("%s: invalid exc code: %d\n", __func__, + exccode); er = EMULATE_FAIL; } } else { diff --git a/arch/mips/kvm/kvm_mips_stats.c b/arch/mips/kvm/kvm_mips_stats.c index 6efef38a324da..1ae9f88b4d388 100644 --- a/arch/mips/kvm/kvm_mips_stats.c +++ b/arch/mips/kvm/kvm_mips_stats.c @@ -68,12 +68,12 @@ int kvm_mips_dump_stats(struct kvm_vcpu *vcpu) #ifdef CONFIG_KVM_MIPS_DEBUG_COP0_COUNTERS int i, j; - printk("\nKVM VCPU[%d] COP0 Access Profile:\n", vcpu->vcpu_id); + kvm_info("\nKVM VCPU[%d] COP0 Access Profile:\n", vcpu->vcpu_id); for (i = 0; i < N_MIPS_COPROC_REGS; i++) { for (j = 0; j < N_MIPS_COPROC_SEL; j++) { if (vcpu->arch.cop0->stat[i][j]) - printk("%s[%d]: %lu\n", kvm_cop0_str[i], j, - vcpu->arch.cop0->stat[i][j]); + kvm_info("%s[%d]: %lu\n", kvm_cop0_str[i], j, + vcpu->arch.cop0->stat[i][j]); } } #endif diff --git a/arch/mips/kvm/kvm_tlb.c b/arch/mips/kvm/kvm_tlb.c index bb7418bd95b97..29a5bdb19a515 100644 --- a/arch/mips/kvm/kvm_tlb.c +++ b/arch/mips/kvm/kvm_tlb.c @@ -77,8 +77,8 @@ void kvm_mips_dump_host_tlbs(void) old_entryhi = read_c0_entryhi(); old_pagemask = read_c0_pagemask(); - printk("HOST TLBs:\n"); - printk("ASID: %#lx\n", read_c0_entryhi() & ASID_MASK); + kvm_info("HOST TLBs:\n"); + kvm_info("ASID: %#lx\n", read_c0_entryhi() & ASID_MASK); for (i = 0; i < current_cpu_data.tlbsize; i++) { write_c0_index(i); @@ -92,19 +92,19 @@ void kvm_mips_dump_host_tlbs(void) tlb.tlb_lo1 = read_c0_entrylo1(); tlb.tlb_mask = read_c0_pagemask(); - printk("TLB%c%3d Hi 0x%08lx ", - (tlb.tlb_lo0 | tlb.tlb_lo1) & MIPS3_PG_V ? ' ' : '*', - i, tlb.tlb_hi); - printk("Lo0=0x%09" PRIx64 " %c%c attr %lx ", - (uint64_t) mips3_tlbpfn_to_paddr(tlb.tlb_lo0), - (tlb.tlb_lo0 & MIPS3_PG_D) ? 'D' : ' ', - (tlb.tlb_lo0 & MIPS3_PG_G) ? 'G' : ' ', - (tlb.tlb_lo0 >> 3) & 7); - printk("Lo1=0x%09" PRIx64 " %c%c attr %lx sz=%lx\n", - (uint64_t) mips3_tlbpfn_to_paddr(tlb.tlb_lo1), - (tlb.tlb_lo1 & MIPS3_PG_D) ? 'D' : ' ', - (tlb.tlb_lo1 & MIPS3_PG_G) ? 'G' : ' ', - (tlb.tlb_lo1 >> 3) & 7, tlb.tlb_mask); + kvm_info("TLB%c%3d Hi 0x%08lx ", + (tlb.tlb_lo0 | tlb.tlb_lo1) & MIPS3_PG_V ? ' ' : '*', + i, tlb.tlb_hi); + kvm_info("Lo0=0x%09" PRIx64 " %c%c attr %lx ", + (uint64_t) mips3_tlbpfn_to_paddr(tlb.tlb_lo0), + (tlb.tlb_lo0 & MIPS3_PG_D) ? 'D' : ' ', + (tlb.tlb_lo0 & MIPS3_PG_G) ? 'G' : ' ', + (tlb.tlb_lo0 >> 3) & 7); + kvm_info("Lo1=0x%09" PRIx64 " %c%c attr %lx sz=%lx\n", + (uint64_t) mips3_tlbpfn_to_paddr(tlb.tlb_lo1), + (tlb.tlb_lo1 & MIPS3_PG_D) ? 'D' : ' ', + (tlb.tlb_lo1 & MIPS3_PG_G) ? 'G' : ' ', + (tlb.tlb_lo1 >> 3) & 7, tlb.tlb_mask); } write_c0_entryhi(old_entryhi); write_c0_pagemask(old_pagemask); @@ -119,24 +119,24 @@ void kvm_mips_dump_guest_tlbs(struct kvm_vcpu *vcpu) struct kvm_mips_tlb tlb; int i; - printk("Guest TLBs:\n"); - printk("Guest EntryHi: %#lx\n", kvm_read_c0_guest_entryhi(cop0)); + kvm_info("Guest TLBs:\n"); + kvm_info("Guest EntryHi: %#lx\n", kvm_read_c0_guest_entryhi(cop0)); for (i = 0; i < KVM_MIPS_GUEST_TLB_SIZE; i++) { tlb = vcpu->arch.guest_tlb[i]; - printk("TLB%c%3d Hi 0x%08lx ", - (tlb.tlb_lo0 | tlb.tlb_lo1) & MIPS3_PG_V ? ' ' : '*', - i, tlb.tlb_hi); - printk("Lo0=0x%09" PRIx64 " %c%c attr %lx ", - (uint64_t) mips3_tlbpfn_to_paddr(tlb.tlb_lo0), - (tlb.tlb_lo0 & MIPS3_PG_D) ? 'D' : ' ', - (tlb.tlb_lo0 & MIPS3_PG_G) ? 'G' : ' ', - (tlb.tlb_lo0 >> 3) & 7); - printk("Lo1=0x%09" PRIx64 " %c%c attr %lx sz=%lx\n", - (uint64_t) mips3_tlbpfn_to_paddr(tlb.tlb_lo1), - (tlb.tlb_lo1 & MIPS3_PG_D) ? 'D' : ' ', - (tlb.tlb_lo1 & MIPS3_PG_G) ? 'G' : ' ', - (tlb.tlb_lo1 >> 3) & 7, tlb.tlb_mask); + kvm_info("TLB%c%3d Hi 0x%08lx ", + (tlb.tlb_lo0 | tlb.tlb_lo1) & MIPS3_PG_V ? ' ' : '*', + i, tlb.tlb_hi); + kvm_info("Lo0=0x%09" PRIx64 " %c%c attr %lx ", + (uint64_t) mips3_tlbpfn_to_paddr(tlb.tlb_lo0), + (tlb.tlb_lo0 & MIPS3_PG_D) ? 'D' : ' ', + (tlb.tlb_lo0 & MIPS3_PG_G) ? 'G' : ' ', + (tlb.tlb_lo0 >> 3) & 7); + kvm_info("Lo1=0x%09" PRIx64 " %c%c attr %lx sz=%lx\n", + (uint64_t) mips3_tlbpfn_to_paddr(tlb.tlb_lo1), + (tlb.tlb_lo1 & MIPS3_PG_D) ? 'D' : ' ', + (tlb.tlb_lo1 & MIPS3_PG_G) ? 'G' : ' ', + (tlb.tlb_lo1 >> 3) & 7, tlb.tlb_mask); } } EXPORT_SYMBOL(kvm_mips_dump_guest_tlbs); diff --git a/arch/mips/kvm/kvm_trap_emul.c b/arch/mips/kvm/kvm_trap_emul.c index 106335b36861d..bd2f6bc64d454 100644 --- a/arch/mips/kvm/kvm_trap_emul.c +++ b/arch/mips/kvm/kvm_trap_emul.c @@ -27,7 +27,7 @@ static gpa_t kvm_trap_emul_gva_to_gpa_cb(gva_t gva) if ((kseg == CKSEG0) || (kseg == CKSEG1)) gpa = CPHYSADDR(gva); else { - printk("%s: cannot find GPA for GVA: %#lx\n", __func__, gva); + kvm_err("%s: cannot find GPA for GVA: %#lx\n", __func__, gva); kvm_mips_dump_host_tlbs(); gpa = KVM_INVALID_ADDR; } @@ -98,17 +98,15 @@ static int kvm_trap_emul_handle_tlb_mod(struct kvm_vcpu *vcpu) * when we are not using HIGHMEM. Need to address this in a * HIGHMEM kernel */ - printk - ("TLB MOD fault not handled, cause %#lx, PC: %p, BadVaddr: %#lx\n", - cause, opc, badvaddr); + kvm_err("TLB MOD fault not handled, cause %#lx, PC: %p, BadVaddr: %#lx\n", + cause, opc, badvaddr); kvm_mips_dump_host_tlbs(); kvm_arch_vcpu_dump_regs(vcpu); run->exit_reason = KVM_EXIT_INTERNAL_ERROR; ret = RESUME_HOST; } else { - printk - ("Illegal TLB Mod fault address , cause %#lx, PC: %p, BadVaddr: %#lx\n", - cause, opc, badvaddr); + kvm_err("Illegal TLB Mod fault address , cause %#lx, PC: %p, BadVaddr: %#lx\n", + cause, opc, badvaddr); kvm_mips_dump_host_tlbs(); kvm_arch_vcpu_dump_regs(vcpu); run->exit_reason = KVM_EXIT_INTERNAL_ERROR; @@ -208,9 +206,8 @@ static int kvm_trap_emul_handle_tlb_ld_miss(struct kvm_vcpu *vcpu) ret = RESUME_HOST; } } else { - printk - ("Illegal TLB ST fault address , cause %#lx, PC: %p, BadVaddr: %#lx\n", - cause, opc, badvaddr); + kvm_err("Illegal TLB ST fault address , cause %#lx, PC: %p, BadVaddr: %#lx\n", + cause, opc, badvaddr); kvm_mips_dump_host_tlbs(); kvm_arch_vcpu_dump_regs(vcpu); run->exit_reason = KVM_EXIT_INTERNAL_ERROR; @@ -233,7 +230,7 @@ static int kvm_trap_emul_handle_addr_err_st(struct kvm_vcpu *vcpu) kvm_debug("Emulate Store to MMIO space\n"); er = kvm_mips_emulate_inst(cause, opc, run, vcpu); if (er == EMULATE_FAIL) { - printk("Emulate Store to MMIO space failed\n"); + kvm_err("Emulate Store to MMIO space failed\n"); run->exit_reason = KVM_EXIT_INTERNAL_ERROR; ret = RESUME_HOST; } else { @@ -241,9 +238,8 @@ static int kvm_trap_emul_handle_addr_err_st(struct kvm_vcpu *vcpu) ret = RESUME_HOST; } } else { - printk - ("Address Error (STORE): cause %#lx, PC: %p, BadVaddr: %#lx\n", - cause, opc, badvaddr); + kvm_err("Address Error (STORE): cause %#lx, PC: %p, BadVaddr: %#lx\n", + cause, opc, badvaddr); run->exit_reason = KVM_EXIT_INTERNAL_ERROR; ret = RESUME_HOST; } @@ -263,7 +259,7 @@ static int kvm_trap_emul_handle_addr_err_ld(struct kvm_vcpu *vcpu) kvm_debug("Emulate Load from MMIO space @ %#lx\n", badvaddr); er = kvm_mips_emulate_inst(cause, opc, run, vcpu); if (er == EMULATE_FAIL) { - printk("Emulate Load from MMIO space failed\n"); + kvm_err("Emulate Load from MMIO space failed\n"); run->exit_reason = KVM_EXIT_INTERNAL_ERROR; ret = RESUME_HOST; } else { @@ -271,9 +267,8 @@ static int kvm_trap_emul_handle_addr_err_ld(struct kvm_vcpu *vcpu) ret = RESUME_HOST; } } else { - printk - ("Address Error (LOAD): cause %#lx, PC: %p, BadVaddr: %#lx\n", - cause, opc, badvaddr); + kvm_err("Address Error (LOAD): cause %#lx, PC: %p, BadVaddr: %#lx\n", + cause, opc, badvaddr); run->exit_reason = KVM_EXIT_INTERNAL_ERROR; ret = RESUME_HOST; er = EMULATE_FAIL; From d98403a525fe1648c516f33e2bf674d277d69135 Mon Sep 17 00:00:00 2001 From: Deng-Cheng Zhu Date: Thu, 26 Jun 2014 12:11:36 -0700 Subject: [PATCH 031/104] MIPS: KVM: Simplify functions by removing redundancy No logic changes inside. Reviewed-by: James Hogan Signed-off-by: Deng-Cheng Zhu Signed-off-by: Paolo Bonzini --- arch/mips/include/asm/kvm_host.h | 2 +- arch/mips/kvm/kvm_mips.c | 18 ++++------------ arch/mips/kvm/kvm_mips_commpage.c | 2 -- arch/mips/kvm/kvm_mips_emul.c | 34 ++++++++++--------------------- arch/mips/kvm/kvm_mips_stats.c | 4 +--- 5 files changed, 17 insertions(+), 43 deletions(-) diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 3f813f295134b..7a3fc67bd7f97 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -764,7 +764,7 @@ extern int kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu); /* Misc */ -extern int kvm_mips_dump_stats(struct kvm_vcpu *vcpu); +extern void kvm_mips_dump_stats(struct kvm_vcpu *vcpu); extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm); diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c index 330b3af701a64..289b4d2ae45c7 100644 --- a/arch/mips/kvm/kvm_mips.c +++ b/arch/mips/kvm/kvm_mips.c @@ -97,9 +97,7 @@ void kvm_arch_hardware_unsetup(void) void kvm_arch_check_processor_compat(void *rtn) { - int *r = (int *)rtn; - *r = 0; - return; + *(int *)rtn = 0; } static void kvm_mips_init_tlbs(struct kvm *kvm) @@ -225,7 +223,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, enum kvm_mr_change change) { unsigned long npages = 0; - int i, err = 0; + int i; kvm_debug("%s: kvm: %p slot: %d, GPA: %llx, size: %llx, QVA: %llx\n", __func__, kvm, mem->slot, mem->guest_phys_addr, @@ -243,8 +241,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, if (!kvm->arch.guest_pmap) { kvm_err("Failed to allocate guest PMAP"); - err = -ENOMEM; - goto out; + return; } kvm_debug("Allocated space for Guest PMAP Table (%ld pages) @ %p\n", @@ -255,8 +252,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, kvm->arch.guest_pmap[i] = KVM_INVALID_PAGE; } } -out: - return; } void kvm_arch_flush_shadow_all(struct kvm *kvm) @@ -845,16 +840,12 @@ long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) int kvm_arch_init(void *opaque) { - int ret; - if (kvm_mips_callbacks) { kvm_err("kvm: module already exists\n"); return -EEXIST; } - ret = kvm_mips_emulation_init(&kvm_mips_callbacks); - - return ret; + return kvm_mips_emulation_init(&kvm_mips_callbacks); } void kvm_arch_exit(void) @@ -1008,7 +999,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) { - return; } int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, diff --git a/arch/mips/kvm/kvm_mips_commpage.c b/arch/mips/kvm/kvm_mips_commpage.c index ab7096ec0666a..4b5612b34fef6 100644 --- a/arch/mips/kvm/kvm_mips_commpage.c +++ b/arch/mips/kvm/kvm_mips_commpage.c @@ -33,6 +33,4 @@ void kvm_mips_commpage_init(struct kvm_vcpu *vcpu) /* Specific init values for fields */ vcpu->arch.cop0 = &page->cop0; memset(vcpu->arch.cop0, 0, sizeof(struct mips_coproc)); - - return; } diff --git a/arch/mips/kvm/kvm_mips_emul.c b/arch/mips/kvm/kvm_mips_emul.c index bdd1421b78fc9..f9b4f0fa425d5 100644 --- a/arch/mips/kvm/kvm_mips_emul.c +++ b/arch/mips/kvm/kvm_mips_emul.c @@ -761,8 +761,6 @@ enum emulation_result kvm_mips_emul_eret(struct kvm_vcpu *vcpu) enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu) { - enum emulation_result er = EMULATE_DONE; - kvm_debug("[%#lx] !!!WAIT!!! (%#lx)\n", vcpu->arch.pc, vcpu->arch.pending_exceptions); @@ -782,7 +780,7 @@ enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu) } } - return er; + return EMULATE_DONE; } /* @@ -792,11 +790,10 @@ enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu) enum emulation_result kvm_mips_emul_tlbr(struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; - enum emulation_result er = EMULATE_FAIL; uint32_t pc = vcpu->arch.pc; kvm_err("[%#x] COP0_TLBR [%ld]\n", pc, kvm_read_c0_guest_index(cop0)); - return er; + return EMULATE_FAIL; } /* Write Guest TLB Entry @ Index */ @@ -804,7 +801,6 @@ enum emulation_result kvm_mips_emul_tlbwi(struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; int index = kvm_read_c0_guest_index(cop0); - enum emulation_result er = EMULATE_DONE; struct kvm_mips_tlb *tlb = NULL; uint32_t pc = vcpu->arch.pc; @@ -836,14 +832,13 @@ enum emulation_result kvm_mips_emul_tlbwi(struct kvm_vcpu *vcpu) kvm_read_c0_guest_entrylo1(cop0), kvm_read_c0_guest_pagemask(cop0)); - return er; + return EMULATE_DONE; } /* Write Guest TLB Entry @ Random Index */ enum emulation_result kvm_mips_emul_tlbwr(struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; - enum emulation_result er = EMULATE_DONE; struct kvm_mips_tlb *tlb = NULL; uint32_t pc = vcpu->arch.pc; int index; @@ -874,14 +869,13 @@ enum emulation_result kvm_mips_emul_tlbwr(struct kvm_vcpu *vcpu) kvm_read_c0_guest_entrylo0(cop0), kvm_read_c0_guest_entrylo1(cop0)); - return er; + return EMULATE_DONE; } enum emulation_result kvm_mips_emul_tlbp(struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; long entryhi = kvm_read_c0_guest_entryhi(cop0); - enum emulation_result er = EMULATE_DONE; uint32_t pc = vcpu->arch.pc; int index = -1; @@ -892,7 +886,7 @@ enum emulation_result kvm_mips_emul_tlbp(struct kvm_vcpu *vcpu) kvm_debug("[%#x] COP0_TLBP (entryhi: %#lx), index: %d\n", pc, entryhi, index); - return er; + return EMULATE_DONE; } enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, @@ -1638,7 +1632,6 @@ enum emulation_result kvm_mips_emulate_tlbmiss_ld(unsigned long cause, { struct mips_coproc *cop0 = vcpu->arch.cop0; struct kvm_vcpu_arch *arch = &vcpu->arch; - enum emulation_result er = EMULATE_DONE; unsigned long entryhi = (vcpu->arch. host_cp0_badvaddr & VPN2_MASK) | (kvm_read_c0_guest_entryhi(cop0) & ASID_MASK); @@ -1675,7 +1668,7 @@ enum emulation_result kvm_mips_emulate_tlbmiss_ld(unsigned long cause, /* Blow away the shadow host TLBs */ kvm_mips_flush_host_tlb(1); - return er; + return EMULATE_DONE; } enum emulation_result kvm_mips_emulate_tlbinv_ld(unsigned long cause, @@ -1685,7 +1678,6 @@ enum emulation_result kvm_mips_emulate_tlbinv_ld(unsigned long cause, { struct mips_coproc *cop0 = vcpu->arch.cop0; struct kvm_vcpu_arch *arch = &vcpu->arch; - enum emulation_result er = EMULATE_DONE; unsigned long entryhi = (vcpu->arch.host_cp0_badvaddr & VPN2_MASK) | (kvm_read_c0_guest_entryhi(cop0) & ASID_MASK); @@ -1722,7 +1714,7 @@ enum emulation_result kvm_mips_emulate_tlbinv_ld(unsigned long cause, /* Blow away the shadow host TLBs */ kvm_mips_flush_host_tlb(1); - return er; + return EMULATE_DONE; } enum emulation_result kvm_mips_emulate_tlbmiss_st(unsigned long cause, @@ -1732,7 +1724,6 @@ enum emulation_result kvm_mips_emulate_tlbmiss_st(unsigned long cause, { struct mips_coproc *cop0 = vcpu->arch.cop0; struct kvm_vcpu_arch *arch = &vcpu->arch; - enum emulation_result er = EMULATE_DONE; unsigned long entryhi = (vcpu->arch.host_cp0_badvaddr & VPN2_MASK) | (kvm_read_c0_guest_entryhi(cop0) & ASID_MASK); @@ -1767,7 +1758,7 @@ enum emulation_result kvm_mips_emulate_tlbmiss_st(unsigned long cause, /* Blow away the shadow host TLBs */ kvm_mips_flush_host_tlb(1); - return er; + return EMULATE_DONE; } enum emulation_result kvm_mips_emulate_tlbinv_st(unsigned long cause, @@ -1777,7 +1768,6 @@ enum emulation_result kvm_mips_emulate_tlbinv_st(unsigned long cause, { struct mips_coproc *cop0 = vcpu->arch.cop0; struct kvm_vcpu_arch *arch = &vcpu->arch; - enum emulation_result er = EMULATE_DONE; unsigned long entryhi = (vcpu->arch.host_cp0_badvaddr & VPN2_MASK) | (kvm_read_c0_guest_entryhi(cop0) & ASID_MASK); @@ -1812,7 +1802,7 @@ enum emulation_result kvm_mips_emulate_tlbinv_st(unsigned long cause, /* Blow away the shadow host TLBs */ kvm_mips_flush_host_tlb(1); - return er; + return EMULATE_DONE; } /* TLBMOD: store into address matching TLB with Dirty bit off */ @@ -1853,7 +1843,6 @@ enum emulation_result kvm_mips_emulate_tlbmod(unsigned long cause, unsigned long entryhi = (vcpu->arch.host_cp0_badvaddr & VPN2_MASK) | (kvm_read_c0_guest_entryhi(cop0) & ASID_MASK); struct kvm_vcpu_arch *arch = &vcpu->arch; - enum emulation_result er = EMULATE_DONE; if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) { /* save old pc */ @@ -1884,7 +1873,7 @@ enum emulation_result kvm_mips_emulate_tlbmod(unsigned long cause, /* Blow away the shadow host TLBs */ kvm_mips_flush_host_tlb(1); - return er; + return EMULATE_DONE; } enum emulation_result kvm_mips_emulate_fpu_exc(unsigned long cause, @@ -1894,7 +1883,6 @@ enum emulation_result kvm_mips_emulate_fpu_exc(unsigned long cause, { struct mips_coproc *cop0 = vcpu->arch.cop0; struct kvm_vcpu_arch *arch = &vcpu->arch; - enum emulation_result er = EMULATE_DONE; if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) { /* save old pc */ @@ -1914,7 +1902,7 @@ enum emulation_result kvm_mips_emulate_fpu_exc(unsigned long cause, (T_COP_UNUSABLE << CAUSEB_EXCCODE)); kvm_change_c0_guest_cause(cop0, (CAUSEF_CE), (0x1 << CAUSEB_CE)); - return er; + return EMULATE_DONE; } enum emulation_result kvm_mips_emulate_ri_exc(unsigned long cause, diff --git a/arch/mips/kvm/kvm_mips_stats.c b/arch/mips/kvm/kvm_mips_stats.c index 1ae9f88b4d388..a74d6024c5ad5 100644 --- a/arch/mips/kvm/kvm_mips_stats.c +++ b/arch/mips/kvm/kvm_mips_stats.c @@ -63,7 +63,7 @@ char *kvm_cop0_str[N_MIPS_COPROC_REGS] = { "DESAVE" }; -int kvm_mips_dump_stats(struct kvm_vcpu *vcpu) +void kvm_mips_dump_stats(struct kvm_vcpu *vcpu) { #ifdef CONFIG_KVM_MIPS_DEBUG_COP0_COUNTERS int i, j; @@ -77,6 +77,4 @@ int kvm_mips_dump_stats(struct kvm_vcpu *vcpu) } } #endif - - return 0; } From b045c40620eb51fb10b19a14f9ebc118925bc0f8 Mon Sep 17 00:00:00 2001 From: Deng-Cheng Zhu Date: Thu, 26 Jun 2014 12:11:37 -0700 Subject: [PATCH 032/104] MIPS: KVM: Remove unneeded volatile The keyword volatile for idx in the TLB functions is unnecessary. Reviewed-by: James Hogan Signed-off-by: Deng-Cheng Zhu Signed-off-by: Paolo Bonzini --- arch/mips/kvm/kvm_tlb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/kvm/kvm_tlb.c b/arch/mips/kvm/kvm_tlb.c index 29a5bdb19a515..bbcd82242059d 100644 --- a/arch/mips/kvm/kvm_tlb.c +++ b/arch/mips/kvm/kvm_tlb.c @@ -201,7 +201,7 @@ int kvm_mips_host_tlb_write(struct kvm_vcpu *vcpu, unsigned long entryhi, { unsigned long flags; unsigned long old_entryhi; - volatile int idx; + int idx; local_irq_save(flags); @@ -426,7 +426,7 @@ EXPORT_SYMBOL(kvm_mips_guest_tlb_lookup); int kvm_mips_host_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long vaddr) { unsigned long old_entryhi, flags; - volatile int idx; + int idx; local_irq_save(flags); From d7d5b05faf1679849c5220627c7263b4041e15ef Mon Sep 17 00:00:00 2001 From: Deng-Cheng Zhu Date: Thu, 26 Jun 2014 12:11:38 -0700 Subject: [PATCH 033/104] MIPS: KVM: Rename files to remove the prefix "kvm_" and "kvm_mips_" Since all the files are in arch/mips/kvm/, there's no need of the prefixes "kvm_" and "kvm_mips_". Reviewed-by: James Hogan Signed-off-by: Deng-Cheng Zhu Signed-off-by: Paolo Bonzini --- arch/mips/kvm/Makefile | 8 ++++---- arch/mips/kvm/{kvm_cb.c => callback.c} | 0 arch/mips/kvm/{kvm_mips_commpage.c => commpage.c} | 2 +- arch/mips/kvm/{kvm_mips_comm.h => commpage.h} | 0 arch/mips/kvm/{kvm_mips_dyntrans.c => dyntrans.c} | 2 +- arch/mips/kvm/{kvm_mips_emul.c => emulate.c} | 6 +++--- arch/mips/kvm/{kvm_mips_int.c => interrupt.c} | 2 +- arch/mips/kvm/{kvm_mips_int.h => interrupt.h} | 0 arch/mips/kvm/{kvm_locore.S => locore.S} | 0 arch/mips/kvm/{kvm_mips.c => mips.c} | 6 +++--- arch/mips/kvm/{kvm_mips_opcode.h => opcode.h} | 0 arch/mips/kvm/{kvm_mips_stats.c => stats.c} | 0 arch/mips/kvm/{kvm_tlb.c => tlb.c} | 0 arch/mips/kvm/{kvm_trap_emul.c => trap_emul.c} | 4 ++-- 14 files changed, 15 insertions(+), 15 deletions(-) rename arch/mips/kvm/{kvm_cb.c => callback.c} (100%) rename arch/mips/kvm/{kvm_mips_commpage.c => commpage.c} (97%) rename arch/mips/kvm/{kvm_mips_comm.h => commpage.h} (100%) rename arch/mips/kvm/{kvm_mips_dyntrans.c => dyntrans.c} (99%) rename arch/mips/kvm/{kvm_mips_emul.c => emulate.c} (99%) rename arch/mips/kvm/{kvm_mips_int.c => interrupt.c} (99%) rename arch/mips/kvm/{kvm_mips_int.h => interrupt.h} (100%) rename arch/mips/kvm/{kvm_locore.S => locore.S} (100%) rename arch/mips/kvm/{kvm_mips.c => mips.c} (99%) rename arch/mips/kvm/{kvm_mips_opcode.h => opcode.h} (100%) rename arch/mips/kvm/{kvm_mips_stats.c => stats.c} (100%) rename arch/mips/kvm/{kvm_tlb.c => tlb.c} (100%) rename arch/mips/kvm/{kvm_trap_emul.c => trap_emul.c} (99%) diff --git a/arch/mips/kvm/Makefile b/arch/mips/kvm/Makefile index 78d87bbc99db7..401fe027c2612 100644 --- a/arch/mips/kvm/Makefile +++ b/arch/mips/kvm/Makefile @@ -5,9 +5,9 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) EXTRA_CFLAGS += -Ivirt/kvm -Iarch/mips/kvm -kvm-objs := $(common-objs) kvm_mips.o kvm_mips_emul.o kvm_locore.o \ - kvm_mips_int.o kvm_mips_stats.o kvm_mips_commpage.o \ - kvm_mips_dyntrans.o kvm_trap_emul.o +kvm-objs := $(common-objs) mips.o emulate.o locore.o \ + interrupt.o stats.o commpage.o \ + dyntrans.o trap_emul.o obj-$(CONFIG_KVM) += kvm.o -obj-y += kvm_cb.o kvm_tlb.o +obj-y += callback.o tlb.o diff --git a/arch/mips/kvm/kvm_cb.c b/arch/mips/kvm/callback.c similarity index 100% rename from arch/mips/kvm/kvm_cb.c rename to arch/mips/kvm/callback.c diff --git a/arch/mips/kvm/kvm_mips_commpage.c b/arch/mips/kvm/commpage.c similarity index 97% rename from arch/mips/kvm/kvm_mips_commpage.c rename to arch/mips/kvm/commpage.c index 4b5612b34fef6..61b9c043ea09c 100644 --- a/arch/mips/kvm/kvm_mips_commpage.c +++ b/arch/mips/kvm/commpage.c @@ -22,7 +22,7 @@ #include -#include "kvm_mips_comm.h" +#include "commpage.h" void kvm_mips_commpage_init(struct kvm_vcpu *vcpu) { diff --git a/arch/mips/kvm/kvm_mips_comm.h b/arch/mips/kvm/commpage.h similarity index 100% rename from arch/mips/kvm/kvm_mips_comm.h rename to arch/mips/kvm/commpage.h diff --git a/arch/mips/kvm/kvm_mips_dyntrans.c b/arch/mips/kvm/dyntrans.c similarity index 99% rename from arch/mips/kvm/kvm_mips_dyntrans.c rename to arch/mips/kvm/dyntrans.c index fa7184df5450e..521121bdebff9 100644 --- a/arch/mips/kvm/kvm_mips_dyntrans.c +++ b/arch/mips/kvm/dyntrans.c @@ -18,7 +18,7 @@ #include #include -#include "kvm_mips_comm.h" +#include "commpage.h" #define SYNCI_TEMPLATE 0x041f0000 #define SYNCI_BASE(x) (((x) >> 21) & 0x1f) diff --git a/arch/mips/kvm/kvm_mips_emul.c b/arch/mips/kvm/emulate.c similarity index 99% rename from arch/mips/kvm/kvm_mips_emul.c rename to arch/mips/kvm/emulate.c index f9b4f0fa425d5..1a6068892e663 100644 --- a/arch/mips/kvm/kvm_mips_emul.c +++ b/arch/mips/kvm/emulate.c @@ -29,9 +29,9 @@ #include #define CONFIG_MIPS_MT -#include "kvm_mips_opcode.h" -#include "kvm_mips_int.h" -#include "kvm_mips_comm.h" +#include "opcode.h" +#include "interrupt.h" +#include "commpage.h" #include "trace.h" diff --git a/arch/mips/kvm/kvm_mips_int.c b/arch/mips/kvm/interrupt.c similarity index 99% rename from arch/mips/kvm/kvm_mips_int.c rename to arch/mips/kvm/interrupt.c index d458c042d5589..9b4445940c2bc 100644 --- a/arch/mips/kvm/kvm_mips_int.c +++ b/arch/mips/kvm/interrupt.c @@ -20,7 +20,7 @@ #include -#include "kvm_mips_int.h" +#include "interrupt.h" void kvm_mips_queue_irq(struct kvm_vcpu *vcpu, uint32_t priority) { diff --git a/arch/mips/kvm/kvm_mips_int.h b/arch/mips/kvm/interrupt.h similarity index 100% rename from arch/mips/kvm/kvm_mips_int.h rename to arch/mips/kvm/interrupt.h diff --git a/arch/mips/kvm/kvm_locore.S b/arch/mips/kvm/locore.S similarity index 100% rename from arch/mips/kvm/kvm_locore.S rename to arch/mips/kvm/locore.S diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/mips.c similarity index 99% rename from arch/mips/kvm/kvm_mips.c rename to arch/mips/kvm/mips.c index 289b4d2ae45c7..d687c6e3258d7 100644 --- a/arch/mips/kvm/kvm_mips.c +++ b/arch/mips/kvm/mips.c @@ -21,8 +21,8 @@ #include -#include "kvm_mips_int.h" -#include "kvm_mips_comm.h" +#include "interrupt.h" +#include "commpage.h" #define CREATE_TRACE_POINTS #include "trace.h" @@ -1188,7 +1188,7 @@ int __init kvm_mips_init(void) /* * On MIPS, kernel modules are executed from "mapped space", which * requires TLBs. The TLB handling code is statically linked with - * the rest of the kernel (kvm_tlb.c) to avoid the possibility of + * the rest of the kernel (tlb.c) to avoid the possibility of * double faulting. The issue is that the TLB code references * routines that are part of the the KVM module, which are only * available once the module is loaded. diff --git a/arch/mips/kvm/kvm_mips_opcode.h b/arch/mips/kvm/opcode.h similarity index 100% rename from arch/mips/kvm/kvm_mips_opcode.h rename to arch/mips/kvm/opcode.h diff --git a/arch/mips/kvm/kvm_mips_stats.c b/arch/mips/kvm/stats.c similarity index 100% rename from arch/mips/kvm/kvm_mips_stats.c rename to arch/mips/kvm/stats.c diff --git a/arch/mips/kvm/kvm_tlb.c b/arch/mips/kvm/tlb.c similarity index 100% rename from arch/mips/kvm/kvm_tlb.c rename to arch/mips/kvm/tlb.c diff --git a/arch/mips/kvm/kvm_trap_emul.c b/arch/mips/kvm/trap_emul.c similarity index 99% rename from arch/mips/kvm/kvm_trap_emul.c rename to arch/mips/kvm/trap_emul.c index bd2f6bc64d454..fd7257b70e656 100644 --- a/arch/mips/kvm/kvm_trap_emul.c +++ b/arch/mips/kvm/trap_emul.c @@ -16,8 +16,8 @@ #include -#include "kvm_mips_opcode.h" -#include "kvm_mips_int.h" +#include "opcode.h" +#include "interrupt.h" static gpa_t kvm_trap_emul_gva_to_gpa_cb(gva_t gva) { From db2fb7f202ba1d1b2c6532b359b7c763bf17fe72 Mon Sep 17 00:00:00 2001 From: Deng-Cheng Zhu Date: Thu, 26 Jun 2014 12:11:39 -0700 Subject: [PATCH 034/104] MIPS: KVM: Skip memory cleaning in kvm_mips_commpage_init() The commpage is allocated using kzalloc(), so there's no need of cleaning the memory of the kvm_mips_commpage struct and its internal mips_coproc. Reviewed-by: James Hogan Signed-off-by: Deng-Cheng Zhu Signed-off-by: Paolo Bonzini --- arch/mips/kvm/commpage.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/mips/kvm/commpage.c b/arch/mips/kvm/commpage.c index 61b9c043ea09c..2d6e976d1add9 100644 --- a/arch/mips/kvm/commpage.c +++ b/arch/mips/kvm/commpage.c @@ -28,9 +28,6 @@ void kvm_mips_commpage_init(struct kvm_vcpu *vcpu) { struct kvm_mips_commpage *page = vcpu->arch.kseg0_commpage; - memset(page, 0, sizeof(struct kvm_mips_commpage)); - /* Specific init values for fields */ vcpu->arch.cop0 = &page->cop0; - memset(vcpu->arch.cop0, 0, sizeof(struct mips_coproc)); } From ab76228a48d434d67cf72f8753307db104e81cc6 Mon Sep 17 00:00:00 2001 From: Deng-Cheng Zhu Date: Thu, 26 Jun 2014 12:11:40 -0700 Subject: [PATCH 035/104] MIPS: KVM: Remove dead code of TLB index error in kvm_mips_emul_tlbwr() It's impossible to fall into the error handling of the TLB index after being masked by (KVM_MIPS_GUEST_TLB_SIZE - 1). Remove the dead code. Reported-by: James Hogan Signed-off-by: Deng-Cheng Zhu Reviewed-by: James Hogan Signed-off-by: Paolo Bonzini --- arch/mips/kvm/emulate.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index 1a6068892e663..fb3e8dfd1ff64 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -846,11 +846,6 @@ enum emulation_result kvm_mips_emul_tlbwr(struct kvm_vcpu *vcpu) get_random_bytes(&index, sizeof(index)); index &= (KVM_MIPS_GUEST_TLB_SIZE - 1); - if (index < 0 || index >= KVM_MIPS_GUEST_TLB_SIZE) { - kvm_err("%s: illegal index: %d\n", __func__, index); - return EMULATE_FAIL; - } - tlb = &vcpu->arch.guest_tlb[index]; /* From 9f6226a762c7ae02f6a23a3d4fc552dafa57ea23 Mon Sep 17 00:00:00 2001 From: Rickard Strandqvist Date: Wed, 25 Jun 2014 14:25:58 +0200 Subject: [PATCH 036/104] arch: x86: kvm: x86.c: Cleaning up variable is set more than once A struct member variable is set to the same value more than once This was found using a static code analysis program called cppcheck. Signed-off-by: Rickard Strandqvist Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 874607ae0583e..5a8691b0ed76a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4730,7 +4730,6 @@ static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector, if (desc->g) var.limit = (var.limit << 12) | 0xfff; var.type = desc->type; - var.present = desc->p; var.dpl = desc->dpl; var.db = desc->d; var.s = desc->s; From 62baf44cad3bc6b37115cc21e4228fe53d4f3474 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sun, 29 Jun 2014 21:55:53 +0200 Subject: [PATCH 037/104] KVM: nSVM: Do not report CLTS via SVM_EXIT_WRITE_CR0 to L1 CLTS only changes TS which is not monitored by selected CR0 interception. So skip any attempt to translate WRITE_CR0 to CR0_SEL_WRITE for this instruction. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b5e994ad01359..c79766e1f1e04 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4205,7 +4205,8 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, if (info->intercept == x86_intercept_cr_write) icpt_info.exit_code += info->modrm_reg; - if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0) + if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0 || + info->intercept == x86_intercept_clts) break; intercept = svm->nested.intercept; From 9bf418335e24da995ea682a028926d7e1036be6f Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 30 Jun 2014 10:54:17 +0200 Subject: [PATCH 038/104] KVM: nSVM: Fix IOIO bitmap evaluation First, kvm_read_guest returns 0 on success. And then we need to take the access size into account when testing the bitmap: intercept if any of bits corresponding to the access is set. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index c79766e1f1e04..3483ac978c76e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2116,22 +2116,27 @@ static void nested_svm_unmap(struct page *page) static int nested_svm_intercept_ioio(struct vcpu_svm *svm) { - unsigned port; - u8 val, bit; + unsigned port, size, iopm_len; + u16 val, mask; + u8 start_bit; u64 gpa; if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT))) return NESTED_EXIT_HOST; port = svm->vmcb->control.exit_info_1 >> 16; + size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >> + SVM_IOIO_SIZE_SHIFT; gpa = svm->nested.vmcb_iopm + (port / 8); - bit = port % 8; - val = 0; + start_bit = port % 8; + iopm_len = (start_bit + size > 8) ? 2 : 1; + mask = (0xf >> (4 - size)) << start_bit; + val = 0; - if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, 1)) - val &= (1 << bit); + if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, iopm_len)) + return NESTED_EXIT_DONE; - return val ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; + return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; } static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) From 6493f1574e898b46370e2b2315836d76a1980f2c Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 30 Jun 2014 11:07:05 +0200 Subject: [PATCH 039/104] KVM: nSVM: Fix IOIO size reported on emulation The access size of an in/ins is reported in dst_bytes, and that of out/outs in src_bytes. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 3483ac978c76e..1824949821f9b 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4261,9 +4261,9 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, if (info->intercept == x86_intercept_in || info->intercept == x86_intercept_ins) { exit_info |= SVM_IOIO_TYPE_MASK; - bytes = info->src_bytes; - } else { bytes = info->dst_bytes; + } else { + bytes = info->src_bytes; } if (info->intercept == x86_intercept_outs || From 6cbc5f5a80a9ae5a80bc81efc574b5a85bfd4a84 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 30 Jun 2014 12:52:55 +0200 Subject: [PATCH 040/104] KVM: nSVM: Set correct port for IOIO interception evaluation Obtaining the port number from DX is bogus as a) there are immediate port accesses and b) user space may have changed the register content while processing the PIO access. Forward the correct value from the instruction emulator instead. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_emulate.h | 1 + arch/x86/kvm/emulate.c | 1 + arch/x86/kvm/svm.c | 6 +++--- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index ffa2671a7f2f8..0e0151c13b2c9 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -37,6 +37,7 @@ struct x86_instruction_info { u8 modrm_reg; /* index of register used */ u8 modrm_rm; /* rm part of modrm */ u64 src_val; /* value of source operand */ + u64 dst_val; /* value of destination operand */ u8 src_bytes; /* size of source operand */ u8 dst_bytes; /* size of destination operand */ u8 ad_bytes; /* size of src/dst address */ diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 84dc4ba0364da..15453e569f3d6 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -426,6 +426,7 @@ static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt, .modrm_reg = ctxt->modrm_reg, .modrm_rm = ctxt->modrm_rm, .src_val = ctxt->src.val64, + .dst_val = ctxt->dst.val64, .src_bytes = ctxt->src.bytes, .dst_bytes = ctxt->dst.bytes, .ad_bytes = ctxt->ad_bytes, diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1824949821f9b..85d4458a0b359 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4256,13 +4256,13 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, u64 exit_info; u32 bytes; - exit_info = (vcpu->arch.regs[VCPU_REGS_RDX] & 0xffff) << 16; - if (info->intercept == x86_intercept_in || info->intercept == x86_intercept_ins) { - exit_info |= SVM_IOIO_TYPE_MASK; + exit_info = ((info->src_val & 0xffff) << 16) | + SVM_IOIO_TYPE_MASK; bytes = info->dst_bytes; } else { + exit_info = (info->dst_val & 0xffff) << 16; bytes = info->src_bytes; } From 0d3da0d26e3c3515997c99451ce3b0ad1a69a36c Mon Sep 17 00:00:00 2001 From: Tomasz Grabiec Date: Tue, 24 Jun 2014 09:42:43 +0200 Subject: [PATCH 041/104] KVM: x86: fix TSC matching I've observed kvmclock being marked as unstable on a modern single-socket system with a stable TSC and qemu-1.6.2 or qemu-2.0.0. The culprit was failure in TSC matching because of overflow of kvm_arch::nr_vcpus_matched_tsc in case there were multiple TSC writes in a single synchronization cycle. Turns out that qemu does multiple TSC writes during init, below is the evidence of that (qemu-2.0.0): The first one: 0xffffffffa08ff2b4 : vmx_write_tsc_offset+0xa4/0xb0 [kvm_intel] 0xffffffffa04c9c05 : kvm_write_tsc+0x1a5/0x360 [kvm] 0xffffffffa04cfd6b : kvm_arch_vcpu_postcreate+0x4b/0x80 [kvm] 0xffffffffa04b8188 : kvm_vm_ioctl+0x418/0x750 [kvm] The second one: 0xffffffffa08ff2b4 : vmx_write_tsc_offset+0xa4/0xb0 [kvm_intel] 0xffffffffa04c9c05 : kvm_write_tsc+0x1a5/0x360 [kvm] 0xffffffffa090610d : vmx_set_msr+0x29d/0x350 [kvm_intel] 0xffffffffa04be83b : do_set_msr+0x3b/0x60 [kvm] 0xffffffffa04c10a8 : msr_io+0xc8/0x160 [kvm] 0xffffffffa04caeb6 : kvm_arch_vcpu_ioctl+0xc86/0x1060 [kvm] 0xffffffffa04b6797 : kvm_vcpu_ioctl+0xc7/0x5a0 [kvm] #0 kvm_vcpu_ioctl at /build/buildd/qemu-2.0.0+dfsg/kvm-all.c:1780 #1 kvm_put_msrs at /build/buildd/qemu-2.0.0+dfsg/target-i386/kvm.c:1270 #2 kvm_arch_put_registers at /build/buildd/qemu-2.0.0+dfsg/target-i386/kvm.c:1909 #3 kvm_cpu_synchronize_post_init at /build/buildd/qemu-2.0.0+dfsg/kvm-all.c:1641 #4 cpu_synchronize_post_init at /build/buildd/qemu-2.0.0+dfsg/include/sysemu/kvm.h:330 #5 cpu_synchronize_all_post_init () at /build/buildd/qemu-2.0.0+dfsg/cpus.c:521 #6 main at /build/buildd/qemu-2.0.0+dfsg/vl.c:4390 The third one: 0xffffffffa08ff2b4 : vmx_write_tsc_offset+0xa4/0xb0 [kvm_intel] 0xffffffffa04c9c05 : kvm_write_tsc+0x1a5/0x360 [kvm] 0xffffffffa090610d : vmx_set_msr+0x29d/0x350 [kvm_intel] 0xffffffffa04be83b : do_set_msr+0x3b/0x60 [kvm] 0xffffffffa04c10a8 : msr_io+0xc8/0x160 [kvm] 0xffffffffa04caeb6 : kvm_arch_vcpu_ioctl+0xc86/0x1060 [kvm] 0xffffffffa04b6797 : kvm_vcpu_ioctl+0xc7/0x5a0 [kvm] #0 kvm_vcpu_ioctl at /build/buildd/qemu-2.0.0+dfsg/kvm-all.c:1780 #1 kvm_put_msrs at /build/buildd/qemu-2.0.0+dfsg/target-i386/kvm.c:1270 #2 kvm_arch_put_registers at /build/buildd/qemu-2.0.0+dfsg/target-i386/kvm.c:1909 #3 kvm_cpu_synchronize_post_reset at /build/buildd/qemu-2.0.0+dfsg/kvm-all.c:1635 #4 cpu_synchronize_post_reset at /build/buildd/qemu-2.0.0+dfsg/include/sysemu/kvm.h:323 #5 cpu_synchronize_all_post_reset () at /build/buildd/qemu-2.0.0+dfsg/cpus.c:512 #6 main at /build/buildd/qemu-2.0.0+dfsg/vl.c:4482 The fix is to count each vCPU only once when matched, so that nr_vcpus_matched_tsc holds the size of the matched set. This is achieved by reusing generation counters. Every vCPU with this_tsc_generation == cur_tsc_generation is in the matched set. The match set is cleared by setting cur_tsc_generation to a value which no other vCPU is set to (by incrementing it). I needed to bump up the counter size form u8 to u64 to ensure it never overflows. Otherwise in cases TSC is not written the same number of times on each vCPU the counter could overflow and incorrectly indicate some vCPUs as being in the matched set. This scenario seems unlikely but I'm not sure if it can be disregarded. Signed-off-by: Tomasz Grabiec Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 4 ++-- arch/x86/kvm/x86.c | 11 +++++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 63e020be3da78..af36f89fe67a0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -448,7 +448,7 @@ struct kvm_vcpu_arch { u64 tsc_offset_adjustment; u64 this_tsc_nsec; u64 this_tsc_write; - u8 this_tsc_generation; + u64 this_tsc_generation; bool tsc_catchup; bool tsc_always_catchup; s8 virtual_tsc_shift; @@ -591,7 +591,7 @@ struct kvm_arch { u64 cur_tsc_nsec; u64 cur_tsc_write; u64 cur_tsc_offset; - u8 cur_tsc_generation; + u64 cur_tsc_generation; int nr_vcpus_matched_tsc; spinlock_t pvclock_gtod_sync_lock; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5a8691b0ed76a..f056f855f8e66 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1215,6 +1215,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) unsigned long flags; s64 usdiff; bool matched; + bool already_matched; u64 data = msr->data; raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); @@ -1279,6 +1280,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) pr_debug("kvm: adjusted tsc offset by %llu\n", delta); } matched = true; + already_matched = (vcpu->arch.this_tsc_generation == kvm->arch.cur_tsc_generation); } else { /* * We split periods of matched TSC writes into generations. @@ -1294,7 +1296,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) kvm->arch.cur_tsc_write = data; kvm->arch.cur_tsc_offset = offset; matched = false; - pr_debug("kvm: new tsc generation %u, clock %llu\n", + pr_debug("kvm: new tsc generation %llu, clock %llu\n", kvm->arch.cur_tsc_generation, data); } @@ -1319,10 +1321,11 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); spin_lock(&kvm->arch.pvclock_gtod_sync_lock); - if (matched) - kvm->arch.nr_vcpus_matched_tsc++; - else + if (!matched) { kvm->arch.nr_vcpus_matched_tsc = 0; + } else if (!already_matched) { + kvm->arch.nr_vcpus_matched_tsc++; + } kvm_track_tsc_matching(vcpu); spin_unlock(&kvm->arch.pvclock_gtod_sync_lock); From 98eff52ab5c0ff5cb96940a93e99a1aeb2f11c89 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Sun, 29 Jun 2014 12:28:51 +0300 Subject: [PATCH 042/104] KVM: x86: Fix lapic.c debug prints In two cases lapic.c does not use the apic_debug macro correctly. This patch fixes them. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 0069118581742..3855103f71fda 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1451,7 +1451,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) vcpu->arch.apic_arb_prio = 0; vcpu->arch.apic_attention = 0; - apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr=" + apic_debug("%s: vcpu=%p, id=%d, base_msr=" "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, vcpu, kvm_apic_id(apic), vcpu->arch.apic_base, apic->base_address); @@ -1895,7 +1895,7 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) /* evaluate pending_events before reading the vector */ smp_rmb(); sipi_vector = apic->sipi_vector; - pr_debug("vcpu %d received sipi with vector # %x\n", + apic_debug("vcpu %d received sipi with vector # %x\n", vcpu->vcpu_id, sipi_vector); kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector); vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; From 572e09290ad3f3cc83e8dc291eee3815d7b21ba5 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 4 Jul 2014 15:11:33 +0100 Subject: [PATCH 043/104] KVM: Document KVM_SET_SIGNAL_MASK as universal KVM_SET_SIGNAL_MASK is implemented in generic code and isn't x86 specific, so document it as being applicable for all architectures. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Gleb Natapov Cc: kvm@vger.kernel.org Cc: Randy Dunlap Cc: linux-doc@vger.kernel.org Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 0fe36497642c9..e7be9f4851697 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -512,7 +512,7 @@ struct kvm_cpuid { 4.21 KVM_SET_SIGNAL_MASK Capability: basic -Architectures: x86 +Architectures: all Type: vcpu ioctl Parameters: struct kvm_signal_mask (in) Returns: 0 on success, -1 on error From bf5590f379196aa3ca48a6c778a0ae511ba67d77 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 4 Jul 2014 15:11:34 +0100 Subject: [PATCH 044/104] KVM: Reformat KVM_SET_ONE_REG register documentation Some of the MIPS registers that can be accessed with the KVM_{GET,SET}_ONE_REG interface have fairly long names, so widen the Register column of the table in the KVM_SET_ONE_REG documentation to allow them to fit. Tabs in the table are replaced with spaces at the same time for consistency. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Gleb Natapov Cc: kvm@vger.kernel.org Cc: Randy Dunlap Cc: linux-doc@vger.kernel.org Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 222 +++++++++++++++--------------- 1 file changed, 111 insertions(+), 111 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index e7be9f4851697..c9e91cca7c529 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1774,122 +1774,122 @@ and architecture specific registers. Each have their own range of operation and their own constants and width. To keep track of the implemented registers, find a list below: - Arch | Register | Width (bits) - | | - PPC | KVM_REG_PPC_HIOR | 64 - PPC | KVM_REG_PPC_IAC1 | 64 - PPC | KVM_REG_PPC_IAC2 | 64 - PPC | KVM_REG_PPC_IAC3 | 64 - PPC | KVM_REG_PPC_IAC4 | 64 - PPC | KVM_REG_PPC_DAC1 | 64 - PPC | KVM_REG_PPC_DAC2 | 64 - PPC | KVM_REG_PPC_DABR | 64 - PPC | KVM_REG_PPC_DSCR | 64 - PPC | KVM_REG_PPC_PURR | 64 - PPC | KVM_REG_PPC_SPURR | 64 - PPC | KVM_REG_PPC_DAR | 64 - PPC | KVM_REG_PPC_DSISR | 32 - PPC | KVM_REG_PPC_AMR | 64 - PPC | KVM_REG_PPC_UAMOR | 64 - PPC | KVM_REG_PPC_MMCR0 | 64 - PPC | KVM_REG_PPC_MMCR1 | 64 - PPC | KVM_REG_PPC_MMCRA | 64 - PPC | KVM_REG_PPC_MMCR2 | 64 - PPC | KVM_REG_PPC_MMCRS | 64 - PPC | KVM_REG_PPC_SIAR | 64 - PPC | KVM_REG_PPC_SDAR | 64 - PPC | KVM_REG_PPC_SIER | 64 - PPC | KVM_REG_PPC_PMC1 | 32 - PPC | KVM_REG_PPC_PMC2 | 32 - PPC | KVM_REG_PPC_PMC3 | 32 - PPC | KVM_REG_PPC_PMC4 | 32 - PPC | KVM_REG_PPC_PMC5 | 32 - PPC | KVM_REG_PPC_PMC6 | 32 - PPC | KVM_REG_PPC_PMC7 | 32 - PPC | KVM_REG_PPC_PMC8 | 32 - PPC | KVM_REG_PPC_FPR0 | 64 + Arch | Register | Width (bits) + | | + PPC | KVM_REG_PPC_HIOR | 64 + PPC | KVM_REG_PPC_IAC1 | 64 + PPC | KVM_REG_PPC_IAC2 | 64 + PPC | KVM_REG_PPC_IAC3 | 64 + PPC | KVM_REG_PPC_IAC4 | 64 + PPC | KVM_REG_PPC_DAC1 | 64 + PPC | KVM_REG_PPC_DAC2 | 64 + PPC | KVM_REG_PPC_DABR | 64 + PPC | KVM_REG_PPC_DSCR | 64 + PPC | KVM_REG_PPC_PURR | 64 + PPC | KVM_REG_PPC_SPURR | 64 + PPC | KVM_REG_PPC_DAR | 64 + PPC | KVM_REG_PPC_DSISR | 32 + PPC | KVM_REG_PPC_AMR | 64 + PPC | KVM_REG_PPC_UAMOR | 64 + PPC | KVM_REG_PPC_MMCR0 | 64 + PPC | KVM_REG_PPC_MMCR1 | 64 + PPC | KVM_REG_PPC_MMCRA | 64 + PPC | KVM_REG_PPC_MMCR2 | 64 + PPC | KVM_REG_PPC_MMCRS | 64 + PPC | KVM_REG_PPC_SIAR | 64 + PPC | KVM_REG_PPC_SDAR | 64 + PPC | KVM_REG_PPC_SIER | 64 + PPC | KVM_REG_PPC_PMC1 | 32 + PPC | KVM_REG_PPC_PMC2 | 32 + PPC | KVM_REG_PPC_PMC3 | 32 + PPC | KVM_REG_PPC_PMC4 | 32 + PPC | KVM_REG_PPC_PMC5 | 32 + PPC | KVM_REG_PPC_PMC6 | 32 + PPC | KVM_REG_PPC_PMC7 | 32 + PPC | KVM_REG_PPC_PMC8 | 32 + PPC | KVM_REG_PPC_FPR0 | 64 ... - PPC | KVM_REG_PPC_FPR31 | 64 - PPC | KVM_REG_PPC_VR0 | 128 + PPC | KVM_REG_PPC_FPR31 | 64 + PPC | KVM_REG_PPC_VR0 | 128 ... - PPC | KVM_REG_PPC_VR31 | 128 - PPC | KVM_REG_PPC_VSR0 | 128 + PPC | KVM_REG_PPC_VR31 | 128 + PPC | KVM_REG_PPC_VSR0 | 128 ... - PPC | KVM_REG_PPC_VSR31 | 128 - PPC | KVM_REG_PPC_FPSCR | 64 - PPC | KVM_REG_PPC_VSCR | 32 - PPC | KVM_REG_PPC_VPA_ADDR | 64 - PPC | KVM_REG_PPC_VPA_SLB | 128 - PPC | KVM_REG_PPC_VPA_DTL | 128 - PPC | KVM_REG_PPC_EPCR | 32 - PPC | KVM_REG_PPC_EPR | 32 - PPC | KVM_REG_PPC_TCR | 32 - PPC | KVM_REG_PPC_TSR | 32 - PPC | KVM_REG_PPC_OR_TSR | 32 - PPC | KVM_REG_PPC_CLEAR_TSR | 32 - PPC | KVM_REG_PPC_MAS0 | 32 - PPC | KVM_REG_PPC_MAS1 | 32 - PPC | KVM_REG_PPC_MAS2 | 64 - PPC | KVM_REG_PPC_MAS7_3 | 64 - PPC | KVM_REG_PPC_MAS4 | 32 - PPC | KVM_REG_PPC_MAS6 | 32 - PPC | KVM_REG_PPC_MMUCFG | 32 - PPC | KVM_REG_PPC_TLB0CFG | 32 - PPC | KVM_REG_PPC_TLB1CFG | 32 - PPC | KVM_REG_PPC_TLB2CFG | 32 - PPC | KVM_REG_PPC_TLB3CFG | 32 - PPC | KVM_REG_PPC_TLB0PS | 32 - PPC | KVM_REG_PPC_TLB1PS | 32 - PPC | KVM_REG_PPC_TLB2PS | 32 - PPC | KVM_REG_PPC_TLB3PS | 32 - PPC | KVM_REG_PPC_EPTCFG | 32 - PPC | KVM_REG_PPC_ICP_STATE | 64 - PPC | KVM_REG_PPC_TB_OFFSET | 64 - PPC | KVM_REG_PPC_SPMC1 | 32 - PPC | KVM_REG_PPC_SPMC2 | 32 - PPC | KVM_REG_PPC_IAMR | 64 - PPC | KVM_REG_PPC_TFHAR | 64 - PPC | KVM_REG_PPC_TFIAR | 64 - PPC | KVM_REG_PPC_TEXASR | 64 - PPC | KVM_REG_PPC_FSCR | 64 - PPC | KVM_REG_PPC_PSPB | 32 - PPC | KVM_REG_PPC_EBBHR | 64 - PPC | KVM_REG_PPC_EBBRR | 64 - PPC | KVM_REG_PPC_BESCR | 64 - PPC | KVM_REG_PPC_TAR | 64 - PPC | KVM_REG_PPC_DPDES | 64 - PPC | KVM_REG_PPC_DAWR | 64 - PPC | KVM_REG_PPC_DAWRX | 64 - PPC | KVM_REG_PPC_CIABR | 64 - PPC | KVM_REG_PPC_IC | 64 - PPC | KVM_REG_PPC_VTB | 64 - PPC | KVM_REG_PPC_CSIGR | 64 - PPC | KVM_REG_PPC_TACR | 64 - PPC | KVM_REG_PPC_TCSCR | 64 - PPC | KVM_REG_PPC_PID | 64 - PPC | KVM_REG_PPC_ACOP | 64 - PPC | KVM_REG_PPC_VRSAVE | 32 - PPC | KVM_REG_PPC_LPCR | 64 - PPC | KVM_REG_PPC_PPR | 64 - PPC | KVM_REG_PPC_ARCH_COMPAT 32 - PPC | KVM_REG_PPC_DABRX | 32 - PPC | KVM_REG_PPC_WORT | 64 - PPC | KVM_REG_PPC_TM_GPR0 | 64 + PPC | KVM_REG_PPC_VSR31 | 128 + PPC | KVM_REG_PPC_FPSCR | 64 + PPC | KVM_REG_PPC_VSCR | 32 + PPC | KVM_REG_PPC_VPA_ADDR | 64 + PPC | KVM_REG_PPC_VPA_SLB | 128 + PPC | KVM_REG_PPC_VPA_DTL | 128 + PPC | KVM_REG_PPC_EPCR | 32 + PPC | KVM_REG_PPC_EPR | 32 + PPC | KVM_REG_PPC_TCR | 32 + PPC | KVM_REG_PPC_TSR | 32 + PPC | KVM_REG_PPC_OR_TSR | 32 + PPC | KVM_REG_PPC_CLEAR_TSR | 32 + PPC | KVM_REG_PPC_MAS0 | 32 + PPC | KVM_REG_PPC_MAS1 | 32 + PPC | KVM_REG_PPC_MAS2 | 64 + PPC | KVM_REG_PPC_MAS7_3 | 64 + PPC | KVM_REG_PPC_MAS4 | 32 + PPC | KVM_REG_PPC_MAS6 | 32 + PPC | KVM_REG_PPC_MMUCFG | 32 + PPC | KVM_REG_PPC_TLB0CFG | 32 + PPC | KVM_REG_PPC_TLB1CFG | 32 + PPC | KVM_REG_PPC_TLB2CFG | 32 + PPC | KVM_REG_PPC_TLB3CFG | 32 + PPC | KVM_REG_PPC_TLB0PS | 32 + PPC | KVM_REG_PPC_TLB1PS | 32 + PPC | KVM_REG_PPC_TLB2PS | 32 + PPC | KVM_REG_PPC_TLB3PS | 32 + PPC | KVM_REG_PPC_EPTCFG | 32 + PPC | KVM_REG_PPC_ICP_STATE | 64 + PPC | KVM_REG_PPC_TB_OFFSET | 64 + PPC | KVM_REG_PPC_SPMC1 | 32 + PPC | KVM_REG_PPC_SPMC2 | 32 + PPC | KVM_REG_PPC_IAMR | 64 + PPC | KVM_REG_PPC_TFHAR | 64 + PPC | KVM_REG_PPC_TFIAR | 64 + PPC | KVM_REG_PPC_TEXASR | 64 + PPC | KVM_REG_PPC_FSCR | 64 + PPC | KVM_REG_PPC_PSPB | 32 + PPC | KVM_REG_PPC_EBBHR | 64 + PPC | KVM_REG_PPC_EBBRR | 64 + PPC | KVM_REG_PPC_BESCR | 64 + PPC | KVM_REG_PPC_TAR | 64 + PPC | KVM_REG_PPC_DPDES | 64 + PPC | KVM_REG_PPC_DAWR | 64 + PPC | KVM_REG_PPC_DAWRX | 64 + PPC | KVM_REG_PPC_CIABR | 64 + PPC | KVM_REG_PPC_IC | 64 + PPC | KVM_REG_PPC_VTB | 64 + PPC | KVM_REG_PPC_CSIGR | 64 + PPC | KVM_REG_PPC_TACR | 64 + PPC | KVM_REG_PPC_TCSCR | 64 + PPC | KVM_REG_PPC_PID | 64 + PPC | KVM_REG_PPC_ACOP | 64 + PPC | KVM_REG_PPC_VRSAVE | 32 + PPC | KVM_REG_PPC_LPCR | 64 + PPC | KVM_REG_PPC_PPR | 64 + PPC | KVM_REG_PPC_ARCH_COMPAT | 32 + PPC | KVM_REG_PPC_DABRX | 32 + PPC | KVM_REG_PPC_WORT | 64 + PPC | KVM_REG_PPC_TM_GPR0 | 64 ... - PPC | KVM_REG_PPC_TM_GPR31 | 64 - PPC | KVM_REG_PPC_TM_VSR0 | 128 + PPC | KVM_REG_PPC_TM_GPR31 | 64 + PPC | KVM_REG_PPC_TM_VSR0 | 128 ... - PPC | KVM_REG_PPC_TM_VSR63 | 128 - PPC | KVM_REG_PPC_TM_CR | 64 - PPC | KVM_REG_PPC_TM_LR | 64 - PPC | KVM_REG_PPC_TM_CTR | 64 - PPC | KVM_REG_PPC_TM_FPSCR | 64 - PPC | KVM_REG_PPC_TM_AMR | 64 - PPC | KVM_REG_PPC_TM_PPR | 64 - PPC | KVM_REG_PPC_TM_VRSAVE | 64 - PPC | KVM_REG_PPC_TM_VSCR | 32 - PPC | KVM_REG_PPC_TM_DSCR | 64 - PPC | KVM_REG_PPC_TM_TAR | 64 + PPC | KVM_REG_PPC_TM_VSR63 | 128 + PPC | KVM_REG_PPC_TM_CR | 64 + PPC | KVM_REG_PPC_TM_LR | 64 + PPC | KVM_REG_PPC_TM_CTR | 64 + PPC | KVM_REG_PPC_TM_FPSCR | 64 + PPC | KVM_REG_PPC_TM_AMR | 64 + PPC | KVM_REG_PPC_TM_PPR | 64 + PPC | KVM_REG_PPC_TM_VRSAVE | 64 + PPC | KVM_REG_PPC_TM_VSCR | 32 + PPC | KVM_REG_PPC_TM_DSCR | 64 + PPC | KVM_REG_PPC_TM_TAR | 64 ARM registers are mapped using the lower 32 bits. The upper 16 of that is the register group type, or coprocessor number: From c2d2c21bff27617ff2ada064f7b70079a2123364 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 4 Jul 2014 15:11:35 +0100 Subject: [PATCH 045/104] KVM: MIPS: Document MIPS specifics of KVM API. Document the MIPS specific parts of the KVM API, including: - The layout of the kvm_regs structure. - The interrupt number passed to KVM_INTERRUPT. - The registers supported by the KVM_{GET,SET}_ONE_REG interface, and the encoding of those register ids. - That KVM_INTERRUPT and KVM_GET_REG_LIST are supported on MIPS. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Gleb Natapov Cc: kvm@vger.kernel.org Cc: Randy Dunlap Cc: linux-doc@vger.kernel.org Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 63 ++++++++++++++++++++++++++++++- 1 file changed, 61 insertions(+), 2 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index c9e91cca7c529..5833266407f52 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -297,6 +297,15 @@ struct kvm_regs { __u64 rip, rflags; }; +/* mips */ +struct kvm_regs { + /* out (KVM_GET_REGS) / in (KVM_SET_REGS) */ + __u64 gpr[32]; + __u64 hi; + __u64 lo; + __u64 pc; +}; + 4.12 KVM_SET_REGS @@ -378,7 +387,7 @@ struct kvm_translation { 4.16 KVM_INTERRUPT Capability: basic -Architectures: x86, ppc +Architectures: x86, ppc, mips Type: vcpu ioctl Parameters: struct kvm_interrupt (in) Returns: 0 on success, -1 on error @@ -423,6 +432,11 @@ c) KVM_INTERRUPT_SET_LEVEL Note that any value for 'irq' other than the ones stated above is invalid and incurs unexpected behavior. +MIPS: + +Queues an external interrupt to be injected into the virtual CPU. A negative +interrupt number dequeues the interrupt. + 4.17 KVM_DEBUG_GUEST @@ -1890,6 +1904,35 @@ registers, find a list below: PPC | KVM_REG_PPC_TM_VSCR | 32 PPC | KVM_REG_PPC_TM_DSCR | 64 PPC | KVM_REG_PPC_TM_TAR | 64 + | | + MIPS | KVM_REG_MIPS_R0 | 64 + ... + MIPS | KVM_REG_MIPS_R31 | 64 + MIPS | KVM_REG_MIPS_HI | 64 + MIPS | KVM_REG_MIPS_LO | 64 + MIPS | KVM_REG_MIPS_PC | 64 + MIPS | KVM_REG_MIPS_CP0_INDEX | 32 + MIPS | KVM_REG_MIPS_CP0_CONTEXT | 64 + MIPS | KVM_REG_MIPS_CP0_USERLOCAL | 64 + MIPS | KVM_REG_MIPS_CP0_PAGEMASK | 32 + MIPS | KVM_REG_MIPS_CP0_WIRED | 32 + MIPS | KVM_REG_MIPS_CP0_HWRENA | 32 + MIPS | KVM_REG_MIPS_CP0_BADVADDR | 64 + MIPS | KVM_REG_MIPS_CP0_COUNT | 32 + MIPS | KVM_REG_MIPS_CP0_ENTRYHI | 64 + MIPS | KVM_REG_MIPS_CP0_COMPARE | 32 + MIPS | KVM_REG_MIPS_CP0_STATUS | 32 + MIPS | KVM_REG_MIPS_CP0_CAUSE | 32 + MIPS | KVM_REG_MIPS_CP0_EPC | 64 + MIPS | KVM_REG_MIPS_CP0_CONFIG | 32 + MIPS | KVM_REG_MIPS_CP0_CONFIG1 | 32 + MIPS | KVM_REG_MIPS_CP0_CONFIG2 | 32 + MIPS | KVM_REG_MIPS_CP0_CONFIG3 | 32 + MIPS | KVM_REG_MIPS_CP0_CONFIG7 | 32 + MIPS | KVM_REG_MIPS_CP0_ERROREPC | 64 + MIPS | KVM_REG_MIPS_COUNT_CTL | 64 + MIPS | KVM_REG_MIPS_COUNT_RESUME | 64 + MIPS | KVM_REG_MIPS_COUNT_HZ | 64 ARM registers are mapped using the lower 32 bits. The upper 16 of that is the register group type, or coprocessor number: @@ -1928,6 +1971,22 @@ arm64 CCSIDR registers are demultiplexed by CSSELR value: arm64 system registers have the following id bit patterns: 0x6030 0000 0013 + +MIPS registers are mapped using the lower 32 bits. The upper 16 of that is +the register group type: + +MIPS core registers (see above) have the following id bit patterns: + 0x7030 0000 0000 + +MIPS CP0 registers (see KVM_REG_MIPS_CP0_* above) have the following id bit +patterns depending on whether they're 32-bit or 64-bit registers: + 0x7020 0000 0001 00 (32-bit) + 0x7030 0000 0001 00 (64-bit) + +MIPS KVM control registers (see above) have the following id bit patterns: + 0x7030 0000 0002 + + 4.69 KVM_GET_ONE_REG Capability: KVM_CAP_ONE_REG @@ -2415,7 +2474,7 @@ in VCPU matching underlying host. 4.84 KVM_GET_REG_LIST Capability: basic -Architectures: arm, arm64 +Architectures: arm, arm64, mips Type: vcpu ioctl Parameters: struct kvm_reg_list (in/out) Returns: 0 on success; -1 on error From 7dfc63cf977447e09b1072911c22564f900fc578 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 15 May 2014 14:25:25 +0200 Subject: [PATCH 046/104] KVM: s390: allow only one SIGP STOP (AND STORE STATUS) at a time A SIGP STOP (AND STORE STATUS) order is complete as soon as the VCPU has been stopped. This patch makes sure that only one SIGP STOP (AND STORE STATUS) may be pending at a time (as defined by the architecture). If the action_bits are still set, a SIGP STOP has been issued but not completed yet. The VCPU is busy for further SIGP STOP orders. Also set the CPUSTAT_STOP_INT after the action_bits variable has been modified (the same order that is used when injecting a KVM_S390_SIGP_STOP from userspace). Both changes are needed in preparation for a user space driven VCPU state change (to avoid race conditions). Signed-off-by: David Hildenbrand Reviewed-by: Cornelia Huck Reviewed-by: Christian Borntraeger Signed-off-by: Christian Borntraeger --- arch/s390/kvm/sigp.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 43079a48cc98d..fd7fb5c5ef5dc 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -136,6 +136,11 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) inti->type = KVM_S390_SIGP_STOP; spin_lock_bh(&li->lock); + if (li->action_bits & ACTION_STOP_ON_STOP) { + /* another SIGP STOP is pending */ + rc = SIGP_CC_BUSY; + goto out; + } if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) { kfree(inti); if ((action & ACTION_STORE_ON_STOP) != 0) @@ -144,8 +149,8 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) } list_add_tail(&inti->list, &li->list); atomic_set(&li->active, 1); - atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); li->action_bits |= action; + atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); if (waitqueue_active(li->wq)) wake_up_interruptible(li->wq); out: From 32f5ff63ff9c87195d06896e6ab4086b6369546a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 14 Apr 2014 12:40:03 +0200 Subject: [PATCH 047/104] KVM: s390: move finalization of SIGP STOP orders to kvm_s390_vcpu_stop Let's move the finalization of SIGP STOP and SIGP STOP AND STORE STATUS orders to the point where the VCPU is actually stopped. This change is needed to prepare for a user space driven VCPU state change. The action_bits may only be cleared when setting the cpu state to STOPPED while holding the local irq lock. Signed-off-by: David Hildenbrand Reviewed-by: Cornelia Huck Reviewed-by: Christian Borntraeger Signed-off-by: Christian Borntraeger --- arch/s390/kvm/intercept.c | 31 ++++++++++++------------------- arch/s390/kvm/kvm-s390.c | 8 ++++++++ 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index a0b586c1913c1..ac6b32585a36d 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -56,32 +56,25 @@ static int handle_noop(struct kvm_vcpu *vcpu) static int handle_stop(struct kvm_vcpu *vcpu) { int rc = 0; + unsigned int action_bits; vcpu->stat.exit_stop_request++; - spin_lock_bh(&vcpu->arch.local_int.lock); - trace_kvm_s390_stop_request(vcpu->arch.local_int.action_bits); - if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) { - kvm_s390_vcpu_stop(vcpu); - vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP; - VCPU_EVENT(vcpu, 3, "%s", "cpu stopped"); - rc = -EOPNOTSUPP; - } + action_bits = vcpu->arch.local_int.action_bits; - if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) { - vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP; - /* store status must be called unlocked. Since local_int.lock - * only protects local_int.* and not guest memory we can give - * up the lock here */ - spin_unlock_bh(&vcpu->arch.local_int.lock); + if (!(action_bits & ACTION_STOP_ON_STOP)) + return 0; + + if (action_bits & ACTION_STORE_ON_STOP) { rc = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_NOADDR); - if (rc >= 0) - rc = -EOPNOTSUPP; - } else - spin_unlock_bh(&vcpu->arch.local_int.lock); - return rc; + if (rc) + return rc; + } + + kvm_s390_vcpu_stop(vcpu); + return -EOPNOTSUPP; } static int handle_validity(struct kvm_vcpu *vcpu) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 2f3e14fe91a48..c5077899de5bf 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1494,7 +1494,15 @@ void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) spin_lock_bh(&vcpu->kvm->arch.start_stop_lock); online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); + /* Need to lock access to action_bits to avoid a SIGP race condition */ + spin_lock_bh(&vcpu->arch.local_int.lock); atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); + + /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */ + vcpu->arch.local_int.action_bits &= + ~(ACTION_STOP_ON_STOP | ACTION_STORE_ON_STOP); + spin_unlock_bh(&vcpu->arch.local_int.lock); + __disable_ibs_on_vcpu(vcpu); for (i = 0; i < online_vcpus; i++) { From 7a42fdc20f1ff31bb47b05a4283c17129d0ecca3 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 5 May 2014 16:26:19 +0200 Subject: [PATCH 048/104] KVM: s390: remove __cpu_is_stopped and expose is_vcpu_stopped The function "__cpu_is_stopped" is not used any more. Let's remove it and expose the function "is_vcpu_stopped" instead, which is actually what we want. This patch also converts an open coded check for CPUSTAT_STOPPED to is_vcpu_stopped(). Signed-off-by: David Hildenbrand Reviewed-by: Cornelia Huck Acked-by: Christian Borntraeger Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 7 +------ arch/s390/kvm/kvm-s390.h | 4 ++-- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index c5077899de5bf..342895350825c 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -926,7 +926,7 @@ static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) { int rc = 0; - if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED)) + if (!is_vcpu_stopped(vcpu)) rc = -EBUSY; else { vcpu->run->psw_mask = psw.mask; @@ -1413,11 +1413,6 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) return kvm_s390_store_status_unloaded(vcpu, addr); } -static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu) -{ - return atomic_read(&(vcpu)->arch.sie_block->cpuflags) & CPUSTAT_STOPPED; -} - static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu) { kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu); diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index a8655ed316167..77ed846342d47 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -45,9 +45,9 @@ do { \ d_args); \ } while (0) -static inline int __cpu_is_stopped(struct kvm_vcpu *vcpu) +static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu) { - return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOP_INT; + return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED; } static inline int kvm_is_ucontrol(struct kvm *kvm) From 0b4820d6d8b6448bc9f7fac1bb1a801a53b425e1 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 12 May 2014 16:05:13 +0200 Subject: [PATCH 049/104] KVM: prepare for KVM_(S|G)ET_MP_STATE on other architectures Highlight the aspects of the ioctls that are actually specific to x86 and ia64. As defined restrictions (irqchip) and mp states may not apply to other architectures, these parts are flagged to belong to x86 and ia64. In preparation for the use of KVM_(S|G)ET_MP_STATE by s390. Fix a spelling error (KVM_SET_MP_STATE vs. KVM_SET_MPSTATE) on the way. Signed-off-by: David Hildenbrand Reviewed-by: Cornelia Huck Acked-by: Christian Borntraeger Signed-off-by: Christian Borntraeger --- Documentation/virtual/kvm/api.txt | 21 ++++++++++++--------- include/uapi/linux/kvm.h | 3 ++- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 0fe36497642c9..904c61cdd311b 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -988,18 +988,20 @@ uniprocessor guests). Possible values are: - - KVM_MP_STATE_RUNNABLE: the vcpu is currently running + - KVM_MP_STATE_RUNNABLE: the vcpu is currently running [x86, ia64] - KVM_MP_STATE_UNINITIALIZED: the vcpu is an application processor (AP) - which has not yet received an INIT signal + which has not yet received an INIT signal [x86, + ia64] - KVM_MP_STATE_INIT_RECEIVED: the vcpu has received an INIT signal, and is - now ready for a SIPI + now ready for a SIPI [x86, ia64] - KVM_MP_STATE_HALTED: the vcpu has executed a HLT instruction and - is waiting for an interrupt + is waiting for an interrupt [x86, ia64] - KVM_MP_STATE_SIPI_RECEIVED: the vcpu has just received a SIPI (vector - accessible via KVM_GET_VCPU_EVENTS) + accessible via KVM_GET_VCPU_EVENTS) [x86, ia64] -This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel -irqchip, the multiprocessing state must be maintained by userspace. +On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an +in-kernel irqchip, the multiprocessing state must be maintained by userspace on +these architectures. 4.39 KVM_SET_MP_STATE @@ -1013,8 +1015,9 @@ Returns: 0 on success; -1 on error Sets the vcpu's current "multiprocessing state"; see KVM_GET_MP_STATE for arguments. -This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel -irqchip, the multiprocessing state must be maintained by userspace. +On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an +in-kernel irqchip, the multiprocessing state must be maintained by userspace on +these architectures. 4.40 KVM_SET_IDENTITY_MAP_ADDR diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index e11d8f170a629..37d4ec6f14d8a 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -399,8 +399,9 @@ struct kvm_vapic_addr { __u64 vapic_addr; }; -/* for KVM_SET_MPSTATE */ +/* for KVM_SET_MP_STATE */ +/* not all states are valid on all architectures */ #define KVM_MP_STATE_RUNNABLE 0 #define KVM_MP_STATE_UNINITIALIZED 1 #define KVM_MP_STATE_INIT_RECEIVED 2 From 6352e4d2dd9a349024a41356148eced553e1dce4 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 10 Apr 2014 17:35:00 +0200 Subject: [PATCH 050/104] KVM: s390: implement KVM_(S|G)ET_MP_STATE for user space state control This patch - adds s390 specific MP states to linux headers and documents them - implements the KVM_{SET,GET}_MP_STATE ioctls - enables KVM_CAP_MP_STATE - allows user space to control the VCPU state on s390. If user space sets the VCPU state using the ioctl KVM_SET_MP_STATE, we can disable manual changing of the VCPU state and trust user space to do the right thing. Signed-off-by: David Hildenbrand Reviewed-by: Cornelia Huck Acked-by: Christian Borntraeger Signed-off-by: Christian Borntraeger --- Documentation/virtual/kvm/api.txt | 10 +++++++-- arch/s390/include/asm/kvm_host.h | 1 + arch/s390/kvm/diag.c | 3 ++- arch/s390/kvm/intercept.c | 3 ++- arch/s390/kvm/kvm-s390.c | 37 +++++++++++++++++++++++++++---- arch/s390/kvm/kvm-s390.h | 6 +++++ include/uapi/linux/kvm.h | 4 ++++ 7 files changed, 56 insertions(+), 8 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 904c61cdd311b..a41465bd6a5c3 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -974,7 +974,7 @@ for vm-wide capabilities. 4.38 KVM_GET_MP_STATE Capability: KVM_CAP_MP_STATE -Architectures: x86, ia64 +Architectures: x86, ia64, s390 Type: vcpu ioctl Parameters: struct kvm_mp_state (out) Returns: 0 on success; -1 on error @@ -998,6 +998,12 @@ Possible values are: is waiting for an interrupt [x86, ia64] - KVM_MP_STATE_SIPI_RECEIVED: the vcpu has just received a SIPI (vector accessible via KVM_GET_VCPU_EVENTS) [x86, ia64] + - KVM_MP_STATE_STOPPED: the vcpu is stopped [s390] + - KVM_MP_STATE_CHECK_STOP: the vcpu is in a special error state [s390] + - KVM_MP_STATE_OPERATING: the vcpu is operating (running or halted) + [s390] + - KVM_MP_STATE_LOAD: the vcpu is in a special load/startup state + [s390] On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel irqchip, the multiprocessing state must be maintained by userspace on @@ -1007,7 +1013,7 @@ these architectures. 4.39 KVM_SET_MP_STATE Capability: KVM_CAP_MP_STATE -Architectures: x86, ia64 +Architectures: x86, ia64, s390 Type: vcpu ioctl Parameters: struct kvm_mp_state (in) Returns: 0 on success; -1 on error diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 4181d7baabba9..c2ba0208a0e14 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -418,6 +418,7 @@ struct kvm_arch{ int css_support; int use_irqchip; int use_cmma; + int user_cpu_state_ctrl; struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS]; wait_queue_head_t ipte_wq; spinlock_t start_stop_lock; diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 0161675878a24..59bd8f991b986 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -176,7 +176,8 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; } - kvm_s390_vcpu_stop(vcpu); + if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) + kvm_s390_vcpu_stop(vcpu); vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM; vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL; vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT; diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index ac6b32585a36d..eaf46291d3619 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -73,7 +73,8 @@ static int handle_stop(struct kvm_vcpu *vcpu) return rc; } - kvm_s390_vcpu_stop(vcpu); + if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) + kvm_s390_vcpu_stop(vcpu); return -EOPNOTSUPP; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 342895350825c..fdf88f7a539c3 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -167,6 +167,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_DEVICE_CTRL: case KVM_CAP_ENABLE_CAP_VM: case KVM_CAP_VM_ATTRIBUTES: + case KVM_CAP_MP_STATE: r = 1; break; case KVM_CAP_NR_VCPUS: @@ -595,7 +596,8 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->pp = 0; vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; kvm_clear_async_pf_completion_queue(vcpu); - kvm_s390_vcpu_stop(vcpu); + if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) + kvm_s390_vcpu_stop(vcpu); kvm_s390_clear_local_irqs(vcpu); } @@ -980,13 +982,34 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - return -EINVAL; /* not implemented yet */ + /* CHECK_STOP and LOAD are not supported yet */ + return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED : + KVM_MP_STATE_OPERATING; } int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - return -EINVAL; /* not implemented yet */ + int rc = 0; + + /* user space knows about this interface - let it control the state */ + vcpu->kvm->arch.user_cpu_state_ctrl = 1; + + switch (mp_state->mp_state) { + case KVM_MP_STATE_STOPPED: + kvm_s390_vcpu_stop(vcpu); + break; + case KVM_MP_STATE_OPERATING: + kvm_s390_vcpu_start(vcpu); + break; + case KVM_MP_STATE_LOAD: + case KVM_MP_STATE_CHECK_STOP: + /* fall through - CHECK_STOP and LOAD are not supported yet */ + default: + rc = -ENXIO; + } + + return rc; } bool kvm_s390_cmma_enabled(struct kvm *kvm) @@ -1284,7 +1307,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); - kvm_s390_vcpu_start(vcpu); + if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { + kvm_s390_vcpu_start(vcpu); + } else if (is_vcpu_stopped(vcpu)) { + pr_err_ratelimited("kvm-s390: can't run stopped vcpu %d\n", + vcpu->vcpu_id); + return -EINVAL; + } switch (kvm_run->exit_reason) { case KVM_EXIT_S390_SIEIC: diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 77ed846342d47..33a0e4bed2a5f 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -129,6 +129,12 @@ static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc) vcpu->arch.sie_block->gpsw.mask |= cc << 44; } +/* are cpu states controlled by user space */ +static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm) +{ + return kvm->arch.user_cpu_state_ctrl != 0; +} + int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); void kvm_s390_tasklet(unsigned long parm); diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 37d4ec6f14d8a..9b744af871d74 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -407,6 +407,10 @@ struct kvm_vapic_addr { #define KVM_MP_STATE_INIT_RECEIVED 2 #define KVM_MP_STATE_HALTED 3 #define KVM_MP_STATE_SIPI_RECEIVED 4 +#define KVM_MP_STATE_STOPPED 5 +#define KVM_MP_STATE_CHECK_STOP 6 +#define KVM_MP_STATE_OPERATING 7 +#define KVM_MP_STATE_LOAD 8 struct kvm_mp_state { __u32 mp_state; From 80112c89ed872c725e7dc39ccf6c37d1a585e161 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Tue, 8 Jul 2014 09:47:41 +0530 Subject: [PATCH 051/104] KVM: Synthesize G bit for all segments. We have noticed that qemu-kvm hangs early in the BIOS when runnning nested under some versions of VMware ESXi. The problem we believe is because KVM assumes that the platform preserves the 'G' but for any segment register. The SVM specification itemizes the segment attribute bits that are observed by the CPU, but the (G)ranularity bit is not one of the bits itemized, for any segment. Though current AMD CPUs keep track of the (G)ranularity bit for all segment registers other than CS, the specification does not require it. VMware's virtual CPU may not track the (G)ranularity bit for any segment register. Since kvm already synthesizes the (G)ranularity bit for the CS segment. It should do so for all segments. The patch below does that, and helps get rid of the hangs. Patch applies on top of Linus' tree. Signed-off-by: Jim Mattson Signed-off-by: Alok N Kataria Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 85d4458a0b359..4925a94a970a4 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1415,7 +1415,16 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1; var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1; var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; - var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; + + /* + * AMD CPUs circa 2014 track the G bit for all segments except CS. + * However, the SVM spec states that the G bit is not observed by the + * CPU, and some VMware virtual CPUs drop the G bit for all segments. + * So let's synthesize a legal G bit for all segments, this helps + * running KVM nested. It also helps cross-vendor migration, because + * Intel's vmentry has a check on the 'G' bit. + */ + var->g = s->limit > 0xfffff; /* * AMD's VMCB does not have an explicit unusable field, so emulate it @@ -1424,14 +1433,6 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, var->unusable = !var->present || (var->type == 0); switch (seg) { - case VCPU_SREG_CS: - /* - * SVM always stores 0 for the 'G' bit in the CS selector in - * the VMCB on a VMEXIT. This hurts cross-vendor migration: - * Intel's VMENTRY has a check on the 'G' bit. - */ - var->g = s->limit > 0xfffff; - break; case VCPU_SREG_TR: /* * Work around a bug where the busy flag in the tr selector From 5f7552d4a56c21a882c9854ac63c6eb73ca7d7c8 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 30 Jun 2014 12:03:02 +0300 Subject: [PATCH 052/104] KVM: x86: Pending interrupt may be delivered after INIT We encountered a scenario in which after an INIT is delivered, a pending interrupt is delivered, although it was sent before the INIT. As the SDM states in section 10.4.7.1, the ISR and the IRR should be cleared after INIT as KVM does. This also means that pending interrupts should be cleared. This patch clears upon reset (and INIT) the pending interrupts; and at the same occassion clears the pending exceptions, since they may cause a similar issue. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f056f855f8e66..b13f3a39ebecf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6845,6 +6845,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu) atomic_set(&vcpu->arch.nmi_queued, 0); vcpu->arch.nmi_pending = 0; vcpu->arch.nmi_injected = false; + kvm_clear_interrupt_queue(vcpu); + kvm_clear_exception_queue(vcpu); memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); vcpu->arch.dr6 = DR6_FIXED_1; From 22d48b2d2aa0b078816eaa1e15e485811a2d03fa Mon Sep 17 00:00:00 2001 From: Matthias Lange Date: Thu, 26 Jun 2014 13:50:15 +0200 Subject: [PATCH 053/104] KVM: svm: writes to MSR_K7_HWCR generates GPE in guest Since commit 575203 the MCE subsystem in the Linux kernel for AMD sets bit 18 in MSR_K7_HWCR. Running such a kernel as a guest in KVM on an AMD host results in a GPE injected into the guest because kvm_set_msr_common returns 1. This patch fixes this by masking bit 18 from the MSR value desired by the guest. Signed-off-by: Matthias Lange Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b13f3a39ebecf..7553530e35022 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2035,6 +2035,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) data &= ~(u64)0x40; /* ignore flush filter disable */ data &= ~(u64)0x100; /* ignore ignne emulation enable */ data &= ~(u64)0x8; /* ignore TLB cache disable */ + data &= ~(u64)0x40000; /* ignore Mc status write enable */ if (data != 0) { vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", data); From 98eb2f8b145cee711984d42eff5d6f19b6b1df69 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 27 Mar 2014 09:51:52 +0100 Subject: [PATCH 054/104] KVM: vmx: speed up emulation of invalid guest state About 25% of the time spent in emulation of invalid guest state is wasted in checking whether emulation is required for the next instruction. However, this almost never changes except when a segment register (or TR or LDTR) changes, or when there is a mode transition (i.e. CR0 changes). In fact, vmx_set_segment and vmx_set_cr0 already modify vmx->emulation_required (except that the former for some reason uses |= instead of just an assignment). So there is no need to call guest_state_valid in the emulation loop. Emulation performance test results indicate 1650-2600 cycles for common instructions, versus 2300-3200 before this patch on a Sandy Bridge Xeon. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8748c2e19ed67..5c7bbde25bbf6 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3672,7 +3672,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var)); out: - vmx->emulation_required |= emulation_required(vcpu); + vmx->emulation_required = emulation_required(vcpu); } static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) @@ -5640,7 +5640,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING; - while (!guest_state_valid(vcpu) && count-- != 0) { + while (vmx->emulation_required && count-- != 0) { if (intr_window_requested && vmx_interrupt_allowed(vcpu)) return handle_interrupt_window(&vmx->vcpu); @@ -5674,7 +5674,6 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) schedule(); } - vmx->emulation_required = emulation_required(vcpu); out: return ret; } From 37ccdcbe0757196ec98c0dcf9754bec8423807a5 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 20 May 2014 14:29:47 +0200 Subject: [PATCH 055/104] KVM: x86: return all bits from get_interrupt_shadow For the next patch we will need to know the full state of the interrupt shadow; we will then set KVM_REQ_EVENT when one bit is cleared. However, right now get_interrupt_shadow only returns the one corresponding to the emulated instruction, or an unconditional 0 if the emulated instruction does not have an interrupt shadow. This is confusing and does not allow us to check for cleared bits as mentioned above. Clean the callback up, and modify toggle_interruptibility to match the comment above the call. As a small result, the call to set_interrupt_shadow will be skipped in the common case where int_shadow == 0 && mask == 0. Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/svm.c | 6 +++--- arch/x86/kvm/vmx.c | 4 ++-- arch/x86/kvm/x86.c | 10 +++++----- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index af36f89fe67a0..b8a4480176b9c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -717,7 +717,7 @@ struct kvm_x86_ops { int (*handle_exit)(struct kvm_vcpu *vcpu); void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); - u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); + u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu); void (*patch_hypercall)(struct kvm_vcpu *vcpu, unsigned char *hypercall_addr); void (*set_irq)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 4925a94a970a4..ddf742768ecf2 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -486,14 +486,14 @@ static int is_external_interrupt(u32 info) return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR); } -static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) +static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); u32 ret = 0; if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) - ret |= KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS; - return ret & mask; + ret = KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS; + return ret; } static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5c7bbde25bbf6..0c9569b994f93 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1943,7 +1943,7 @@ static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) vmcs_writel(GUEST_RFLAGS, rflags); } -static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) +static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu) { u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); int ret = 0; @@ -1953,7 +1953,7 @@ static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) if (interruptibility & GUEST_INTR_STATE_MOV_SS) ret |= KVM_X86_SHADOW_INT_MOV_SS; - return ret & mask; + return ret; } static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7553530e35022..a56126e6bd75f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2978,9 +2978,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft; events->interrupt.nr = vcpu->arch.interrupt.nr; events->interrupt.soft = 0; - events->interrupt.shadow = - kvm_x86_ops->get_interrupt_shadow(vcpu, - KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI); + events->interrupt.shadow = kvm_x86_ops->get_interrupt_shadow(vcpu); events->nmi.injected = vcpu->arch.nmi_injected; events->nmi.pending = vcpu->arch.nmi_pending != 0; @@ -4860,7 +4858,7 @@ static const struct x86_emulate_ops emulate_ops = { static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) { - u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask); + u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu); /* * an sti; sti; sequence only disable interrupts for the first * instruction. So, if the last instruction, be it emulated or @@ -4868,7 +4866,9 @@ static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) * means that the last instruction is an sti. We should not * leave the flag on in this case. The same goes for mov ss */ - if (!(int_shadow & mask)) + if (int_shadow & mask) + mask = 0; + if (unlikely(int_shadow || mask)) kvm_x86_ops->set_interrupt_shadow(vcpu, mask); } From 6addfc42992be4b073c39137ecfdf4b2aa2d487f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 27 Mar 2014 11:29:28 +0100 Subject: [PATCH 056/104] KVM: x86: avoid useless set of KVM_REQ_EVENT after emulation Despite the provisions to emulate up to 130 consecutive instructions, in practice KVM will emulate just one before exiting handle_invalid_guest_state, because x86_emulate_instruction always sets KVM_REQ_EVENT. However, we only need to do this if an interrupt could be injected, which happens a) if an interrupt shadow bit (STI or MOV SS) has gone away; b) if the interrupt flag has just been set (other instructions than STI can set it without enabling an interrupt shadow). This cuts another 700-900 cycles from the cost of emulating an instruction (measured on a Sandy Bridge Xeon: 1650-2600 cycles before the patch on kvm-unit-tests, 925-1700 afterwards). Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a56126e6bd75f..cd9316786dca6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -87,6 +87,7 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); static void update_cr8_intercept(struct kvm_vcpu *vcpu); static void process_nmi(struct kvm_vcpu *vcpu); +static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); struct kvm_x86_ops *kvm_x86_ops; EXPORT_SYMBOL_GPL(kvm_x86_ops); @@ -4868,8 +4869,11 @@ static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) */ if (int_shadow & mask) mask = 0; - if (unlikely(int_shadow || mask)) + if (unlikely(int_shadow || mask)) { kvm_x86_ops->set_interrupt_shadow(vcpu, mask); + if (!mask) + kvm_make_request(KVM_REQ_EVENT, vcpu); + } } static void inject_emulated_exception(struct kvm_vcpu *vcpu) @@ -5095,20 +5099,18 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7, return dr6; } -static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, int *r) +static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflags, int *r) { struct kvm_run *kvm_run = vcpu->run; /* - * Use the "raw" value to see if TF was passed to the processor. - * Note that the new value of the flags has not been saved yet. + * rflags is the old, "raw" value of the flags. The new value has + * not been saved yet. * * This is correct even for TF set by the guest, because "the * processor will not generate this exception after the instruction * that sets the TF flag". */ - unsigned long rflags = kvm_x86_ops->get_rflags(vcpu); - if (unlikely(rflags & X86_EFLAGS_TF)) { if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1; @@ -5275,13 +5277,22 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, r = EMULATE_DONE; if (writeback) { + unsigned long rflags = kvm_x86_ops->get_rflags(vcpu); toggle_interruptibility(vcpu, ctxt->interruptibility); - kvm_make_request(KVM_REQ_EVENT, vcpu); vcpu->arch.emulate_regs_need_sync_to_vcpu = false; kvm_rip_write(vcpu, ctxt->eip); if (r == EMULATE_DONE) - kvm_vcpu_check_singlestep(vcpu, &r); - kvm_set_rflags(vcpu, ctxt->eflags); + kvm_vcpu_check_singlestep(vcpu, rflags, &r); + __kvm_set_rflags(vcpu, ctxt->eflags); + + /* + * For STI, interrupts are shadowed; so KVM_REQ_EVENT will + * do nothing, and it will be requested again as soon as + * the shadow expires. But we still need to check here, + * because POPF has no interrupt shadow. + */ + if (unlikely((ctxt->eflags & ~rflags) & X86_EFLAGS_IF)) + kvm_make_request(KVM_REQ_EVENT, vcpu); } else vcpu->arch.emulate_regs_need_sync_to_vcpu = true; @@ -7406,12 +7417,17 @@ unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_get_rflags); -void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) +static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) { if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip)) rflags |= X86_EFLAGS_TF; kvm_x86_ops->set_rflags(vcpu, rflags); +} + +void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) +{ + __kvm_set_rflags(vcpu, rflags); kvm_make_request(KVM_REQ_EVENT, vcpu); } EXPORT_SYMBOL_GPL(kvm_set_rflags); From e24186e097b80c5995ff75e1bbcd541d09c9e42b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 27 Mar 2014 12:00:57 +0100 Subject: [PATCH 057/104] KVM: emulate: move around some checks The only purpose of this patch is to make the next patch simpler to review. No semantic change. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 15453e569f3d6..d79677c6056d6 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4388,12 +4388,15 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) ctxt->d |= opcode.flags; } + /* Unrecognised? */ + if (ctxt->d == 0) + return EMULATION_FAILED; + ctxt->execute = opcode.u.execute; ctxt->check_perm = opcode.check_perm; ctxt->intercept = opcode.intercept; - /* Unrecognised? */ - if (ctxt->d == 0 || (ctxt->d & NotImpl)) + if (ctxt->d & NotImpl) return EMULATION_FAILED; if (!(ctxt->d & EmulateOnUD) && ctxt->ud) @@ -4535,19 +4538,19 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) ctxt->mem_read.pos = 0; - if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) || - (ctxt->d & Undefined)) { + /* LOCK prefix is allowed only with some instructions */ + if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) { rc = emulate_ud(ctxt); goto done; } - /* LOCK prefix is allowed only with some instructions */ - if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) { + if ((ctxt->d & SrcMask) == SrcMemFAddr && ctxt->src.type != OP_MEM) { rc = emulate_ud(ctxt); goto done; } - if ((ctxt->d & SrcMask) == SrcMemFAddr && ctxt->src.type != OP_MEM) { + if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) || + (ctxt->d & Undefined)) { rc = emulate_ud(ctxt); goto done; } From d40a6898e50c2589ca3d345ef5ca6671e2b35b1a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 27 Mar 2014 11:58:02 +0100 Subject: [PATCH 058/104] KVM: emulate: protect checks on ctxt->d by a common "if (unlikely())" There are several checks for "peculiar" aspects of instructions in both x86_decode_insn and x86_emulate_insn. Group them together, and guard them with a single "if" that lets the processor quickly skip them all. Make this more effective by adding two more flag bits that say whether the .intercept and .check_perm fields are valid. We will reuse these flags later to avoid initializing fields of the emulate_ctxt struct. This skims about 30 cycles for each emulated instructions, which is approximately a 3% improvement. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 175 ++++++++++++++++++++++------------------- 1 file changed, 94 insertions(+), 81 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index d79677c6056d6..ea56dae3e67c3 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -162,6 +162,8 @@ #define NoWrite ((u64)1 << 45) /* No writeback */ #define SrcWrite ((u64)1 << 46) /* Write back src operand */ #define NoMod ((u64)1 << 47) /* Mod field is ignored */ +#define Intercept ((u64)1 << 48) /* Has valid intercept field */ +#define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */ #define DstXacc (DstAccLo | SrcAccHi | SrcWrite) @@ -3546,9 +3548,9 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) } #define D(_y) { .flags = (_y) } -#define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } -#define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ - .check_perm = (_p) } +#define DI(_y, _i) { .flags = (_y)|Intercept, .intercept = x86_intercept_##_i } +#define DIP(_y, _i, _p) { .flags = (_y)|Intercept|CheckPerm, \ + .intercept = x86_intercept_##_i, .check_perm = (_p) } #define N D(NotImpl) #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } @@ -3557,10 +3559,10 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) } #define II(_f, _e, _i) \ - { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } + { .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i } #define IIP(_f, _e, _i, _p) \ - { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i, \ - .check_perm = (_p) } + { .flags = (_f)|Intercept|CheckPerm, .u.execute = (_e), \ + .intercept = x86_intercept_##_i, .check_perm = (_p) } #define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) } #define D2bv(_f) D((_f) | ByteOp), D(_f) @@ -4393,29 +4395,37 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) return EMULATION_FAILED; ctxt->execute = opcode.u.execute; - ctxt->check_perm = opcode.check_perm; - ctxt->intercept = opcode.intercept; - if (ctxt->d & NotImpl) - return EMULATION_FAILED; + if (unlikely(ctxt->d & + (NotImpl|EmulateOnUD|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm))) { + /* + * These are copied unconditionally here, and checked unconditionally + * in x86_emulate_insn. + */ + ctxt->check_perm = opcode.check_perm; + ctxt->intercept = opcode.intercept; - if (!(ctxt->d & EmulateOnUD) && ctxt->ud) - return EMULATION_FAILED; + if (ctxt->d & NotImpl) + return EMULATION_FAILED; - if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack)) - ctxt->op_bytes = 8; + if (!(ctxt->d & EmulateOnUD) && ctxt->ud) + return EMULATION_FAILED; - if (ctxt->d & Op3264) { - if (mode == X86EMUL_MODE_PROT64) + if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack)) ctxt->op_bytes = 8; - else - ctxt->op_bytes = 4; - } - if (ctxt->d & Sse) - ctxt->op_bytes = 16; - else if (ctxt->d & Mmx) - ctxt->op_bytes = 8; + if (ctxt->d & Op3264) { + if (mode == X86EMUL_MODE_PROT64) + ctxt->op_bytes = 8; + else + ctxt->op_bytes = 4; + } + + if (ctxt->d & Sse) + ctxt->op_bytes = 16; + else if (ctxt->d & Mmx) + ctxt->op_bytes = 8; + } /* ModRM and SIB bytes. */ if (ctxt->d & ModRM) { @@ -4549,75 +4559,78 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) goto done; } - if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) || - (ctxt->d & Undefined)) { - rc = emulate_ud(ctxt); - goto done; - } - - if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM))) - || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) { - rc = emulate_ud(ctxt); - goto done; - } - - if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) { - rc = emulate_nm(ctxt); - goto done; - } + if (unlikely(ctxt->d & + (No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) { + if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) || + (ctxt->d & Undefined)) { + rc = emulate_ud(ctxt); + goto done; + } - if (ctxt->d & Mmx) { - rc = flush_pending_x87_faults(ctxt); - if (rc != X86EMUL_CONTINUE) + if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM))) + || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) { + rc = emulate_ud(ctxt); goto done; - /* - * Now that we know the fpu is exception safe, we can fetch - * operands from it. - */ - fetch_possible_mmx_operand(ctxt, &ctxt->src); - fetch_possible_mmx_operand(ctxt, &ctxt->src2); - if (!(ctxt->d & Mov)) - fetch_possible_mmx_operand(ctxt, &ctxt->dst); - } + } - if (unlikely(ctxt->guest_mode) && ctxt->intercept) { - rc = emulator_check_intercept(ctxt, ctxt->intercept, - X86_ICPT_PRE_EXCEPT); - if (rc != X86EMUL_CONTINUE) + if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) { + rc = emulate_nm(ctxt); goto done; - } + } - /* Privileged instruction can be executed only in CPL=0 */ - if ((ctxt->d & Priv) && ops->cpl(ctxt)) { - rc = emulate_gp(ctxt, 0); - goto done; - } + if (ctxt->d & Mmx) { + rc = flush_pending_x87_faults(ctxt); + if (rc != X86EMUL_CONTINUE) + goto done; + /* + * Now that we know the fpu is exception safe, we can fetch + * operands from it. + */ + fetch_possible_mmx_operand(ctxt, &ctxt->src); + fetch_possible_mmx_operand(ctxt, &ctxt->src2); + if (!(ctxt->d & Mov)) + fetch_possible_mmx_operand(ctxt, &ctxt->dst); + } - /* Instruction can only be executed in protected mode */ - if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) { - rc = emulate_ud(ctxt); - goto done; - } + if (unlikely(ctxt->guest_mode) && ctxt->intercept) { + rc = emulator_check_intercept(ctxt, ctxt->intercept, + X86_ICPT_PRE_EXCEPT); + if (rc != X86EMUL_CONTINUE) + goto done; + } - /* Do instruction specific permission checks */ - if (ctxt->check_perm) { - rc = ctxt->check_perm(ctxt); - if (rc != X86EMUL_CONTINUE) + /* Privileged instruction can be executed only in CPL=0 */ + if ((ctxt->d & Priv) && ops->cpl(ctxt)) { + rc = emulate_gp(ctxt, 0); goto done; - } + } - if (unlikely(ctxt->guest_mode) && ctxt->intercept) { - rc = emulator_check_intercept(ctxt, ctxt->intercept, - X86_ICPT_POST_EXCEPT); - if (rc != X86EMUL_CONTINUE) + /* Instruction can only be executed in protected mode */ + if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) { + rc = emulate_ud(ctxt); goto done; - } + } - if (ctxt->rep_prefix && (ctxt->d & String)) { - /* All REP prefixes have the same first termination condition */ - if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) { - ctxt->eip = ctxt->_eip; - goto done; + /* Do instruction specific permission checks */ + if (ctxt->check_perm) { + rc = ctxt->check_perm(ctxt); + if (rc != X86EMUL_CONTINUE) + goto done; + } + + if (unlikely(ctxt->guest_mode) && ctxt->intercept) { + rc = emulator_check_intercept(ctxt, ctxt->intercept, + X86_ICPT_POST_EXCEPT); + if (rc != X86EMUL_CONTINUE) + goto done; + } + + if (ctxt->rep_prefix && (ctxt->d & String)) { + /* All REP prefixes have the same first termination condition */ + if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) { + ctxt->eip = ctxt->_eip; + goto done; + } } } From 54cfdb3e95d4f70409a7d3432a42cffc9a232be7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 27 Mar 2014 11:36:25 +0100 Subject: [PATCH 059/104] KVM: emulate: speed up emulated moves We can just blindly move all 16 bytes of ctxt->src's value to ctxt->dst. write_register_operand will take care of writing only the lower bytes. Avoiding a call to memcpy (the compiler optimizes it out) gains about 200 cycles on kvm-unit-tests for register-to-register moves, and makes them about as fast as arithmetic instructions. We could perhaps get a larger speedup by moving all instructions _except_ moves out of x86_emulate_insn, removing opcode_len, and replacing the switch statement with an inlined em_mov. Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_emulate.h | 2 +- arch/x86/kvm/emulate.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 0e0151c13b2c9..4324473700442 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -233,7 +233,7 @@ struct operand { union { unsigned long val; u64 val64; - char valptr[sizeof(unsigned long) + 2]; + char valptr[sizeof(sse128_t)]; sse128_t vec_val; u64 mm_val; void *data; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index ea56dae3e67c3..27f677ef703e2 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2990,7 +2990,7 @@ static int em_rdpmc(struct x86_emulate_ctxt *ctxt) static int em_mov(struct x86_emulate_ctxt *ctxt) { - memcpy(ctxt->dst.valptr, ctxt->src.valptr, ctxt->op_bytes); + memcpy(ctxt->dst.valptr, ctxt->src.valptr, sizeof(ctxt->src.valptr)); return X86EMUL_CONTINUE; } From f5f87dfbc777f89148c3c66438741139845d3ac6 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 1 Apr 2014 13:23:24 +0200 Subject: [PATCH 060/104] KVM: emulate: simplify writeback The "if/return" checks are useless, because we return X86EMUL_CONTINUE anyway if we do not return. Reviewed-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 27f677ef703e2..32d3da82da2e6 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1588,34 +1588,28 @@ static void write_register_operand(struct operand *op) static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op) { - int rc; - switch (op->type) { case OP_REG: write_register_operand(op); break; case OP_MEM: if (ctxt->lock_prefix) - rc = segmented_cmpxchg(ctxt, + return segmented_cmpxchg(ctxt, + op->addr.mem, + &op->orig_val, + &op->val, + op->bytes); + else + return segmented_write(ctxt, op->addr.mem, - &op->orig_val, &op->val, op->bytes); - else - rc = segmented_write(ctxt, - op->addr.mem, - &op->val, - op->bytes); - if (rc != X86EMUL_CONTINUE) - return rc; break; case OP_MEM_STR: - rc = segmented_write(ctxt, - op->addr.mem, - op->data, - op->bytes * op->count); - if (rc != X86EMUL_CONTINUE) - return rc; + return segmented_write(ctxt, + op->addr.mem, + op->data, + op->bytes * op->count); break; case OP_XMM: write_sse_reg(ctxt, &op->vec_val, op->addr.xmm); From 1498507a47867596de158d4db8728e92385a4919 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Wed, 16 Apr 2014 12:46:09 -0400 Subject: [PATCH 061/104] KVM: emulate: move init_decode_cache to emulate.c Core emulator functions all belong in emulator.c, x86 should have no knowledge of emulator internals Signed-off-by: Bandan Das Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_emulate.h | 1 + arch/x86/kvm/emulate.c | 13 +++++++++++++ arch/x86/kvm/x86.c | 13 ------------- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 4324473700442..c22bd9af43117 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -409,6 +409,7 @@ bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt); #define EMULATION_OK 0 #define EMULATION_RESTART 1 #define EMULATION_INTERCEPTED 2 +void init_decode_cache(struct x86_emulate_ctxt *ctxt); int x86_emulate_insn(struct x86_emulate_ctxt *ctxt); int emulator_task_switch(struct x86_emulate_ctxt *ctxt, u16 tss_selector, int idt_index, int reason, diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 32d3da82da2e6..851315d93bf77 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4534,6 +4534,19 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) return X86EMUL_CONTINUE; } +void init_decode_cache(struct x86_emulate_ctxt *ctxt) +{ + memset(&ctxt->opcode_len, 0, + (void *)&ctxt->_regs - (void *)&ctxt->opcode_len); + + ctxt->fetch.start = 0; + ctxt->fetch.end = 0; + ctxt->io_read.pos = 0; + ctxt->io_read.end = 0; + ctxt->mem_read.pos = 0; + ctxt->mem_read.end = 0; +} + int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) { const struct x86_emulate_ops *ops = ctxt->ops; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cd9316786dca6..905edf8557e78 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4888,19 +4888,6 @@ static void inject_emulated_exception(struct kvm_vcpu *vcpu) kvm_queue_exception(vcpu, ctxt->exception.vector); } -static void init_decode_cache(struct x86_emulate_ctxt *ctxt) -{ - memset(&ctxt->opcode_len, 0, - (void *)&ctxt->_regs - (void *)&ctxt->opcode_len); - - ctxt->fetch.start = 0; - ctxt->fetch.end = 0; - ctxt->io_read.pos = 0; - ctxt->io_read.end = 0; - ctxt->mem_read.pos = 0; - ctxt->mem_read.end = 0; -} - static void init_emulate_ctxt(struct kvm_vcpu *vcpu) { struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; From 685bbf4ac406364a84a1d4237b4970dc570fd4cb Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Wed, 16 Apr 2014 12:46:10 -0400 Subject: [PATCH 062/104] KVM: emulate: Remove ctxt->intercept and ctxt->check_perm checks The same information can be gleaned from ctxt->d and avoids having to zero/NULL initialize intercept and check_perm Signed-off-by: Bandan Das Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 851315d93bf77..000fc73988320 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4599,7 +4599,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) fetch_possible_mmx_operand(ctxt, &ctxt->dst); } - if (unlikely(ctxt->guest_mode) && ctxt->intercept) { + if (unlikely(ctxt->guest_mode) && (ctxt->d & Intercept)) { rc = emulator_check_intercept(ctxt, ctxt->intercept, X86_ICPT_PRE_EXCEPT); if (rc != X86EMUL_CONTINUE) @@ -4619,13 +4619,13 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) } /* Do instruction specific permission checks */ - if (ctxt->check_perm) { + if (ctxt->d & CheckPerm) { rc = ctxt->check_perm(ctxt); if (rc != X86EMUL_CONTINUE) goto done; } - if (unlikely(ctxt->guest_mode) && ctxt->intercept) { + if (unlikely(ctxt->guest_mode) && (ctxt->d & Intercept)) { rc = emulator_check_intercept(ctxt, ctxt->intercept, X86_ICPT_POST_EXCEPT); if (rc != X86EMUL_CONTINUE) @@ -4671,7 +4671,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) special_insn: - if (unlikely(ctxt->guest_mode) && ctxt->intercept) { + if (unlikely(ctxt->guest_mode) && (ctxt->d & Intercept)) { rc = emulator_check_intercept(ctxt, ctxt->intercept, X86_ICPT_POST_MEMACCESS); if (rc != X86EMUL_CONTINUE) From 02357bdc8c30a60cd33dd438f851c1306c34f435 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Wed, 16 Apr 2014 12:46:11 -0400 Subject: [PATCH 063/104] KVM: emulate: cleanup decode_modrm Remove the if conditional - that will help us avoid an "else initialize to 0" Also, rearrange operators for slightly better code. Signed-off-by: Bandan Das Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 000fc73988320..94f5f8b94ce94 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1066,19 +1066,17 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, struct operand *op) { u8 sib; - int index_reg = 0, base_reg = 0, scale; + int index_reg, base_reg, scale; int rc = X86EMUL_CONTINUE; ulong modrm_ea = 0; - if (ctxt->rex_prefix) { - ctxt->modrm_reg = (ctxt->rex_prefix & 4) << 1; /* REX.R */ - index_reg = (ctxt->rex_prefix & 2) << 2; /* REX.X */ - ctxt->modrm_rm = base_reg = (ctxt->rex_prefix & 1) << 3; /* REG.B */ - } + ctxt->modrm_reg = ((ctxt->rex_prefix << 1) & 8); /* REX.R */ + index_reg = (ctxt->rex_prefix << 2) & 8; /* REX.X */ + base_reg = (ctxt->rex_prefix << 3) & 8; /* REX.B */ - ctxt->modrm_mod |= (ctxt->modrm & 0xc0) >> 6; + ctxt->modrm_mod = (ctxt->modrm & 0xc0) >> 6; ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3; - ctxt->modrm_rm |= (ctxt->modrm & 0x07); + ctxt->modrm_rm = base_reg | (ctxt->modrm & 0x07); ctxt->modrm_seg = VCPU_SREG_DS; if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) { From c44b4c6ab80eef3a9c52c7b3f0c632942e6489aa Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Wed, 16 Apr 2014 12:46:12 -0400 Subject: [PATCH 064/104] KVM: emulate: clean up initializations in init_decode_cache A lot of initializations are unnecessary as they get set to appropriate values before actually being used. Optimize placement of fields in x86_emulate_ctxt Signed-off-by: Bandan Das Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_emulate.h | 20 +++++++++++--------- arch/x86/kvm/emulate.c | 7 ++----- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index c22bd9af43117..d40a10a38a80d 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -288,30 +288,32 @@ struct x86_emulate_ctxt { u8 opcode_len; u8 b; u8 intercept; - u8 lock_prefix; - u8 rep_prefix; u8 op_bytes; u8 ad_bytes; - u8 rex_prefix; struct operand src; struct operand src2; struct operand dst; - bool has_seg_override; - u8 seg_override; - u64 d; int (*execute)(struct x86_emulate_ctxt *ctxt); int (*check_perm)(struct x86_emulate_ctxt *ctxt); + bool has_seg_override; + bool rip_relative; + u8 rex_prefix; + u8 lock_prefix; + u8 rep_prefix; + u8 seg_override; + /* bitmaps of registers in _regs[] that can be read */ + u32 regs_valid; + /* bitmaps of registers in _regs[] that have been written */ + u32 regs_dirty; /* modrm */ u8 modrm; u8 modrm_mod; u8 modrm_reg; u8 modrm_rm; u8 modrm_seg; - bool rip_relative; + u64 d; unsigned long _eip; struct operand memop; - u32 regs_valid; /* bitmaps of registers in _regs[] that can be read */ - u32 regs_dirty; /* bitmaps of registers in _regs[] that have been written */ /* Fields above regs are cleared together. */ unsigned long _regs[NR_VCPU_REGS]; struct operand *memopp; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 94f5f8b94ce94..3e9bbdc4c76ac 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4534,14 +4534,11 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) void init_decode_cache(struct x86_emulate_ctxt *ctxt) { - memset(&ctxt->opcode_len, 0, - (void *)&ctxt->_regs - (void *)&ctxt->opcode_len); + memset(&ctxt->has_seg_override, 0, + (void *)&ctxt->modrm - (void *)&ctxt->has_seg_override); - ctxt->fetch.start = 0; - ctxt->fetch.end = 0; ctxt->io_read.pos = 0; ctxt->io_read.end = 0; - ctxt->mem_read.pos = 0; ctxt->mem_read.end = 0; } From 573e80fe04db1aa44e8303037f65716ba5c3a343 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Wed, 16 Apr 2014 12:46:13 -0400 Subject: [PATCH 065/104] KVM: emulate: rework seg_override x86_decode_insn already sets a default for seg_override, so remove it from the zeroed area. Also replace set/get functions with direct access to the field. Signed-off-by: Bandan Das Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_emulate.h | 3 +-- arch/x86/kvm/emulate.c | 41 ++++++++++++------------------ 2 files changed, 17 insertions(+), 27 deletions(-) diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index d40a10a38a80d..6c808408326fc 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -295,12 +295,10 @@ struct x86_emulate_ctxt { struct operand dst; int (*execute)(struct x86_emulate_ctxt *ctxt); int (*check_perm)(struct x86_emulate_ctxt *ctxt); - bool has_seg_override; bool rip_relative; u8 rex_prefix; u8 lock_prefix; u8 rep_prefix; - u8 seg_override; /* bitmaps of registers in _regs[] that can be read */ u32 regs_valid; /* bitmaps of registers in _regs[] that have been written */ @@ -311,6 +309,7 @@ struct x86_emulate_ctxt { u8 modrm_reg; u8 modrm_rm; u8 modrm_seg; + u8 seg_override; u64 d; unsigned long _eip; struct operand memop; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 3e9bbdc4c76ac..08badf638fb0f 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -514,12 +514,6 @@ static u32 desc_limit_scaled(struct desc_struct *desc) return desc->g ? (limit << 12) | 0xfff : limit; } -static void set_seg_override(struct x86_emulate_ctxt *ctxt, int seg) -{ - ctxt->has_seg_override = true; - ctxt->seg_override = seg; -} - static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg) { if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS) @@ -528,14 +522,6 @@ static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg) return ctxt->ops->get_cached_segment_base(ctxt, seg); } -static unsigned seg_override(struct x86_emulate_ctxt *ctxt) -{ - if (!ctxt->has_seg_override) - return 0; - - return ctxt->seg_override; -} - static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, u32 error, bool valid) { @@ -4169,7 +4155,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; op->addr.mem.ea = register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI)); - op->addr.mem.seg = seg_override(ctxt); + op->addr.mem.seg = ctxt->seg_override; op->val = 0; op->count = 1; break; @@ -4180,7 +4166,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, register_address(ctxt, reg_read(ctxt, VCPU_REGS_RBX) + (reg_read(ctxt, VCPU_REGS_RAX) & 0xff)); - op->addr.mem.seg = seg_override(ctxt); + op->addr.mem.seg = ctxt->seg_override; op->val = 0; break; case OpImmFAddr: @@ -4227,6 +4213,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) int mode = ctxt->mode; int def_op_bytes, def_ad_bytes, goffset, simd_prefix; bool op_prefix = false; + bool has_seg_override = false; struct opcode opcode; ctxt->memop.type = OP_NONE; @@ -4280,11 +4267,13 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) case 0x2e: /* CS override */ case 0x36: /* SS override */ case 0x3e: /* DS override */ - set_seg_override(ctxt, (ctxt->b >> 3) & 3); + has_seg_override = true; + ctxt->seg_override = (ctxt->b >> 3) & 3; break; case 0x64: /* FS override */ case 0x65: /* GS override */ - set_seg_override(ctxt, ctxt->b & 7); + has_seg_override = true; + ctxt->seg_override = ctxt->b & 7; break; case 0x40 ... 0x4f: /* REX */ if (mode != X86EMUL_MODE_PROT64) @@ -4422,17 +4411,19 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) /* ModRM and SIB bytes. */ if (ctxt->d & ModRM) { rc = decode_modrm(ctxt, &ctxt->memop); - if (!ctxt->has_seg_override) - set_seg_override(ctxt, ctxt->modrm_seg); + if (!has_seg_override) { + has_seg_override = true; + ctxt->seg_override = ctxt->modrm_seg; + } } else if (ctxt->d & MemAbs) rc = decode_abs(ctxt, &ctxt->memop); if (rc != X86EMUL_CONTINUE) goto done; - if (!ctxt->has_seg_override) - set_seg_override(ctxt, VCPU_SREG_DS); + if (!has_seg_override) + ctxt->seg_override = VCPU_SREG_DS; - ctxt->memop.addr.mem.seg = seg_override(ctxt); + ctxt->memop.addr.mem.seg = ctxt->seg_override; if (ctxt->memop.type == OP_MEM && ctxt->ad_bytes != 8) ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea; @@ -4534,8 +4525,8 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) void init_decode_cache(struct x86_emulate_ctxt *ctxt) { - memset(&ctxt->has_seg_override, 0, - (void *)&ctxt->modrm - (void *)&ctxt->has_seg_override); + memset(&ctxt->rip_relative, 0, + (void *)&ctxt->modrm - (void *)&ctxt->rip_relative); ctxt->io_read.pos = 0; ctxt->io_read.end = 0; From 41061cdb98a0bec464278b4db8e894a3121671f5 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Wed, 16 Apr 2014 12:46:14 -0400 Subject: [PATCH 066/104] KVM: emulate: do not initialize memopp rip_relative is only set if decode_modrm runs, and if you have ModRM you will also have a memopp. We can then access memopp unconditionally. Note that rip_relative cannot be hoisted up to decode_modrm, or you break "mov $0, xyz(%rip)". Also, move typecast on "out of range value" of mem.ea to decode_modrm. Together, all these optimizations save about 50 cycles on each emulated instructions (4-6%). Signed-off-by: Bandan Das [Fix immediate operands with rip-relative addressing. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_emulate.h | 5 +++++ arch/x86/kvm/emulate.c | 8 ++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 6c808408326fc..fcf58cd25ebda 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -295,6 +295,11 @@ struct x86_emulate_ctxt { struct operand dst; int (*execute)(struct x86_emulate_ctxt *ctxt); int (*check_perm)(struct x86_emulate_ctxt *ctxt); + /* + * The following six fields are cleared together, + * the rest are initialized unconditionally in x86_decode_insn + * or elsewhere + */ bool rip_relative; u8 rex_prefix; u8 lock_prefix; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 08badf638fb0f..390400a54a9c5 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1177,6 +1177,9 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, } } op->addr.mem.ea = modrm_ea; + if (ctxt->ad_bytes != 8) + ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea; + done: return rc; } @@ -4425,9 +4428,6 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) ctxt->memop.addr.mem.seg = ctxt->seg_override; - if (ctxt->memop.type == OP_MEM && ctxt->ad_bytes != 8) - ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea; - /* * Decode and fetch the source operand: register, memory * or immediate. @@ -4448,7 +4448,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask); done: - if (ctxt->memopp && ctxt->memopp->type == OP_MEM && ctxt->rip_relative) + if (ctxt->rip_relative) ctxt->memopp->addr.mem.ea += ctxt->_eip; return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; From 285ca9e948fa047e51fe47082528034de5369e8d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 6 May 2014 12:24:32 +0200 Subject: [PATCH 067/104] KVM: emulate: speed up do_insn_fetch Hoist the common case up from do_insn_fetch_byte to do_insn_fetch, and prime the fetch_cache in x86_decode_insn. This helps a bit the compiler and the branch predictor, but above all it lays the ground for further changes in the next few patches. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 67 +++++++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 31 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 390400a54a9c5..ea188a338afbc 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -705,51 +705,51 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt, } /* - * Fetch the next byte of the instruction being emulated which is pointed to - * by ctxt->_eip, then increment ctxt->_eip. - * - * Also prefetch the remaining bytes of the instruction without crossing page + * Prefetch the remaining bytes of the instruction without crossing page * boundary if they are not in fetch_cache yet. */ -static int do_insn_fetch_byte(struct x86_emulate_ctxt *ctxt, u8 *dest) +static int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt) { struct fetch_cache *fc = &ctxt->fetch; int rc; int size, cur_size; - - if (ctxt->_eip == fc->end) { - unsigned long linear; - struct segmented_address addr = { .seg = VCPU_SREG_CS, - .ea = ctxt->_eip }; - cur_size = fc->end - fc->start; - size = min(15UL - cur_size, - PAGE_SIZE - offset_in_page(ctxt->_eip)); - rc = __linearize(ctxt, addr, size, false, true, &linear); - if (unlikely(rc != X86EMUL_CONTINUE)) - return rc; - rc = ctxt->ops->fetch(ctxt, linear, fc->data + cur_size, - size, &ctxt->exception); - if (unlikely(rc != X86EMUL_CONTINUE)) - return rc; - fc->end += size; - } - *dest = fc->data[ctxt->_eip - fc->start]; - ctxt->_eip++; + unsigned long linear; + + struct segmented_address addr = { .seg = VCPU_SREG_CS, + .ea = fc->end }; + cur_size = fc->end - fc->start; + size = min(15UL - cur_size, + PAGE_SIZE - offset_in_page(fc->end)); + if (unlikely(size == 0)) + return X86EMUL_UNHANDLEABLE; + rc = __linearize(ctxt, addr, size, false, true, &linear); + if (unlikely(rc != X86EMUL_CONTINUE)) + return rc; + rc = ctxt->ops->fetch(ctxt, linear, fc->data + cur_size, + size, &ctxt->exception); + if (unlikely(rc != X86EMUL_CONTINUE)) + return rc; + fc->end += size; return X86EMUL_CONTINUE; } static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, - void *dest, unsigned size) + void *__dest, unsigned size) { int rc; + struct fetch_cache *fc = &ctxt->fetch; + u8 *dest = __dest; + u8 *src = &fc->data[ctxt->_eip - fc->start]; - /* x86 instructions are limited to 15 bytes. */ - if (unlikely(ctxt->_eip + size - ctxt->eip > 15)) - return X86EMUL_UNHANDLEABLE; while (size--) { - rc = do_insn_fetch_byte(ctxt, dest++); - if (rc != X86EMUL_CONTINUE) - return rc; + if (unlikely(ctxt->_eip == fc->end)) { + rc = do_insn_fetch_bytes(ctxt); + if (rc != X86EMUL_CONTINUE) + return rc; + } + *dest++ = *src++; + ctxt->_eip++; + continue; } return X86EMUL_CONTINUE; } @@ -4227,6 +4227,11 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) ctxt->opcode_len = 1; if (insn_len > 0) memcpy(ctxt->fetch.data, insn, insn_len); + else { + rc = do_insn_fetch_bytes(ctxt); + if (rc != X86EMUL_CONTINUE) + return rc; + } switch (mode) { case X86EMUL_MODE_REAL: From 5cfc7e0f5e5e1adf998df94f8e36edaf5d30d38e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 6 May 2014 13:05:25 +0200 Subject: [PATCH 068/104] KVM: emulate: avoid repeated calls to do_insn_fetch_bytes do_insn_fetch_bytes will only be called once in a given insn_fetch and insn_fetch_arr, because in fact it will only be called at most twice for any instruction and the first call is explicit in x86_decode_insn. This observation lets us hoist the call out of the memory copying loop. It does not buy performance, because most fetches are one byte long anyway, but it prepares for the next patch. The overflow check is tricky, but correct. Because do_insn_fetch_bytes has already been called once, we know that fc->end is at least 15. So it is okay to subtract the number of bytes we want to read. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index ea188a338afbc..ca82ec9c5ffe6 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -708,7 +708,7 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt, * Prefetch the remaining bytes of the instruction without crossing page * boundary if they are not in fetch_cache yet. */ -static int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt) +static int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size) { struct fetch_cache *fc = &ctxt->fetch; int rc; @@ -720,7 +720,14 @@ static int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt) cur_size = fc->end - fc->start; size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(fc->end)); - if (unlikely(size == 0)) + + /* + * One instruction can only straddle two pages, + * and one has been loaded at the beginning of + * x86_decode_insn. So, if not enough bytes + * still, we must have hit the 15-byte boundary. + */ + if (unlikely(size < op_size)) return X86EMUL_UNHANDLEABLE; rc = __linearize(ctxt, addr, size, false, true, &linear); if (unlikely(rc != X86EMUL_CONTINUE)) @@ -736,17 +743,18 @@ static int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt) static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, void *__dest, unsigned size) { - int rc; struct fetch_cache *fc = &ctxt->fetch; u8 *dest = __dest; u8 *src = &fc->data[ctxt->_eip - fc->start]; + /* We have to be careful about overflow! */ + if (unlikely(ctxt->_eip > fc->end - size)) { + int rc = do_insn_fetch_bytes(ctxt, size); + if (rc != X86EMUL_CONTINUE) + return rc; + } + while (size--) { - if (unlikely(ctxt->_eip == fc->end)) { - rc = do_insn_fetch_bytes(ctxt); - if (rc != X86EMUL_CONTINUE) - return rc; - } *dest++ = *src++; ctxt->_eip++; continue; @@ -4228,7 +4236,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) if (insn_len > 0) memcpy(ctxt->fetch.data, insn, insn_len); else { - rc = do_insn_fetch_bytes(ctxt); + rc = do_insn_fetch_bytes(ctxt, 1); if (rc != X86EMUL_CONTINUE) return rc; } From 9506d57de3bc8277a4e306e0d439976862f68c6d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 6 May 2014 13:05:25 +0200 Subject: [PATCH 069/104] KVM: emulate: avoid per-byte copying in instruction fetches We do not need a memory copying loop anymore in insn_fetch; we can use a byte-aligned pointer to access instruction fields directly from the fetch_cache. This eliminates 50-150 cycles (corresponding to a 5-10% improvement in performance) from each instruction. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 46 ++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index ca82ec9c5ffe6..02c668aca2b68 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -708,7 +708,7 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt, * Prefetch the remaining bytes of the instruction without crossing page * boundary if they are not in fetch_cache yet. */ -static int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size) +static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size) { struct fetch_cache *fc = &ctxt->fetch; int rc; @@ -740,41 +740,39 @@ static int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size) return X86EMUL_CONTINUE; } -static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, - void *__dest, unsigned size) +static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, + unsigned size) { - struct fetch_cache *fc = &ctxt->fetch; - u8 *dest = __dest; - u8 *src = &fc->data[ctxt->_eip - fc->start]; - /* We have to be careful about overflow! */ - if (unlikely(ctxt->_eip > fc->end - size)) { - int rc = do_insn_fetch_bytes(ctxt, size); - if (rc != X86EMUL_CONTINUE) - return rc; - } - - while (size--) { - *dest++ = *src++; - ctxt->_eip++; - continue; - } - return X86EMUL_CONTINUE; + if (unlikely(ctxt->_eip > ctxt->fetch.end - size)) + return __do_insn_fetch_bytes(ctxt, size); + else + return X86EMUL_CONTINUE; } /* Fetch next part of the instruction being emulated. */ #define insn_fetch(_type, _ctxt) \ -({ unsigned long _x; \ - rc = do_insn_fetch(_ctxt, &_x, sizeof(_type)); \ +({ _type _x; \ + struct fetch_cache *_fc; \ + \ + rc = do_insn_fetch_bytes(_ctxt, sizeof(_type)); \ if (rc != X86EMUL_CONTINUE) \ goto done; \ - (_type)_x; \ + _fc = &ctxt->fetch; \ + _x = *(_type __aligned(1) *) &_fc->data[ctxt->_eip - _fc->start]; \ + ctxt->_eip += sizeof(_type); \ + _x; \ }) #define insn_fetch_arr(_arr, _size, _ctxt) \ -({ rc = do_insn_fetch(_ctxt, _arr, (_size)); \ +({ \ + struct fetch_cache *_fc; \ + rc = do_insn_fetch_bytes(_ctxt, _size); \ if (rc != X86EMUL_CONTINUE) \ goto done; \ + _fc = &ctxt->fetch; \ + memcpy(_arr, &_fc->data[ctxt->_eip - _fc->start], _size); \ + ctxt->_eip += (_size); \ }) /* @@ -4236,7 +4234,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) if (insn_len > 0) memcpy(ctxt->fetch.data, insn, insn_len); else { - rc = do_insn_fetch_bytes(ctxt, 1); + rc = __do_insn_fetch_bytes(ctxt, 1); if (rc != X86EMUL_CONTINUE) return rc; } From 17052f16a51af6d8f4b7eee0631af675ac204f65 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 6 May 2014 16:33:01 +0200 Subject: [PATCH 070/104] KVM: emulate: put pointers in the fetch_cache This simplifies the code a bit, especially the overflow checks. Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_emulate.h | 4 ++-- arch/x86/kvm/emulate.c | 34 +++++++++++++----------------- arch/x86/kvm/trace.h | 6 +++--- 3 files changed, 20 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index fcf58cd25ebda..eb181178fe0b3 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -242,8 +242,8 @@ struct operand { struct fetch_cache { u8 data[15]; - unsigned long start; - unsigned long end; + u8 *ptr; + u8 *end; }; struct read_cache { diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 02c668aca2b68..c163148077561 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -710,16 +710,15 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt, */ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size) { - struct fetch_cache *fc = &ctxt->fetch; int rc; - int size, cur_size; + int size; unsigned long linear; - + int cur_size = ctxt->fetch.end - ctxt->fetch.data; struct segmented_address addr = { .seg = VCPU_SREG_CS, - .ea = fc->end }; - cur_size = fc->end - fc->start; - size = min(15UL - cur_size, - PAGE_SIZE - offset_in_page(fc->end)); + .ea = ctxt->eip + cur_size }; + + size = min(15UL ^ cur_size, + PAGE_SIZE - offset_in_page(addr.ea)); /* * One instruction can only straddle two pages, @@ -732,19 +731,18 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size) rc = __linearize(ctxt, addr, size, false, true, &linear); if (unlikely(rc != X86EMUL_CONTINUE)) return rc; - rc = ctxt->ops->fetch(ctxt, linear, fc->data + cur_size, + rc = ctxt->ops->fetch(ctxt, linear, ctxt->fetch.end, size, &ctxt->exception); if (unlikely(rc != X86EMUL_CONTINUE)) return rc; - fc->end += size; + ctxt->fetch.end += size; return X86EMUL_CONTINUE; } static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, unsigned size) { - /* We have to be careful about overflow! */ - if (unlikely(ctxt->_eip > ctxt->fetch.end - size)) + if (unlikely(ctxt->fetch.end - ctxt->fetch.ptr < size)) return __do_insn_fetch_bytes(ctxt, size); else return X86EMUL_CONTINUE; @@ -753,26 +751,24 @@ static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, /* Fetch next part of the instruction being emulated. */ #define insn_fetch(_type, _ctxt) \ ({ _type _x; \ - struct fetch_cache *_fc; \ \ rc = do_insn_fetch_bytes(_ctxt, sizeof(_type)); \ if (rc != X86EMUL_CONTINUE) \ goto done; \ - _fc = &ctxt->fetch; \ - _x = *(_type __aligned(1) *) &_fc->data[ctxt->_eip - _fc->start]; \ ctxt->_eip += sizeof(_type); \ + _x = *(_type __aligned(1) *) ctxt->fetch.ptr; \ + ctxt->fetch.ptr += sizeof(_type); \ _x; \ }) #define insn_fetch_arr(_arr, _size, _ctxt) \ ({ \ - struct fetch_cache *_fc; \ rc = do_insn_fetch_bytes(_ctxt, _size); \ if (rc != X86EMUL_CONTINUE) \ goto done; \ - _fc = &ctxt->fetch; \ - memcpy(_arr, &_fc->data[ctxt->_eip - _fc->start], _size); \ ctxt->_eip += (_size); \ + memcpy(_arr, ctxt->fetch.ptr, _size); \ + ctxt->fetch.ptr += (_size); \ }) /* @@ -4228,8 +4224,8 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) ctxt->memop.type = OP_NONE; ctxt->memopp = NULL; ctxt->_eip = ctxt->eip; - ctxt->fetch.start = ctxt->_eip; - ctxt->fetch.end = ctxt->fetch.start + insn_len; + ctxt->fetch.ptr = ctxt->fetch.data; + ctxt->fetch.end = ctxt->fetch.data + insn_len; ctxt->opcode_len = 1; if (insn_len > 0) memcpy(ctxt->fetch.data, insn, insn_len); diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 33574c95220d1..e850a7d332be3 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -721,10 +721,10 @@ TRACE_EVENT(kvm_emulate_insn, ), TP_fast_assign( - __entry->rip = vcpu->arch.emulate_ctxt.fetch.start; __entry->csbase = kvm_x86_ops->get_segment_base(vcpu, VCPU_SREG_CS); - __entry->len = vcpu->arch.emulate_ctxt._eip - - vcpu->arch.emulate_ctxt.fetch.start; + __entry->len = vcpu->arch.emulate_ctxt.fetch.ptr + - vcpu->arch.emulate_ctxt.fetch.data; + __entry->rip = vcpu->arch.emulate_ctxt._eip - __entry->len; memcpy(__entry->insn, vcpu->arch.emulate_ctxt.fetch.data, 15); From 719d5a9b2487e0562f178f61e323c3dc18a8b200 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 19 Jun 2014 11:37:06 +0200 Subject: [PATCH 071/104] KVM: x86: ensure emulator fetches do not span multiple pages When the CS base is not page-aligned, the linear address of the code could get close to the page boundary (e.g. 0x...ffe) even if the EIP value is not. So we need to first linearize the address, and only then compute the number of valid bytes that can be fetched. This happens relatively often when executing real mode code. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index c163148077561..6a1d60956d63d 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -711,14 +711,18 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt, static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size) { int rc; - int size; + unsigned size; unsigned long linear; int cur_size = ctxt->fetch.end - ctxt->fetch.data; struct segmented_address addr = { .seg = VCPU_SREG_CS, .ea = ctxt->eip + cur_size }; - size = min(15UL ^ cur_size, - PAGE_SIZE - offset_in_page(addr.ea)); + size = 15UL ^ cur_size; + rc = __linearize(ctxt, addr, size, false, true, &linear); + if (unlikely(rc != X86EMUL_CONTINUE)) + return rc; + + size = min_t(unsigned, size, PAGE_SIZE - offset_in_page(linear)); /* * One instruction can only straddle two pages, @@ -728,9 +732,6 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size) */ if (unlikely(size < op_size)) return X86EMUL_UNHANDLEABLE; - rc = __linearize(ctxt, addr, size, false, true, &linear); - if (unlikely(rc != X86EMUL_CONTINUE)) - return rc; rc = ctxt->ops->fetch(ctxt, linear, ctxt->fetch.end, size, &ctxt->exception); if (unlikely(rc != X86EMUL_CONTINUE)) From 44583cba9188b29b20ceeefe8ae23ad19e26d9a4 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 13 May 2014 14:02:13 +0200 Subject: [PATCH 072/104] KVM: x86: use kvm_read_guest_page for emulator accesses Emulator accesses are always done a page at a time, either by the emulator itself (for fetches) or because we need to query the MMU for address translations. Speed up these accesses by using kvm_read_guest_page and, in the case of fetches, by inlining kvm_read_guest_virt_helper and dropping the loop around kvm_read_guest_page. This final tweak saves 30-100 more clock cycles (4-10%), bringing the count (as measured by kvm-unit-tests) down to 720-1100 clock cycles on a Sandy Bridge Xeon host, compared to 2300-3200 before the whole series and 925-1700 after the first two low-hanging fruit changes. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 905edf8557e78..f750b69ca4431 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4085,7 +4085,8 @@ static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, if (gpa == UNMAPPED_GVA) return X86EMUL_PROPAGATE_FAULT; - ret = kvm_read_guest(vcpu->kvm, gpa, data, toread); + ret = kvm_read_guest_page(vcpu->kvm, gpa >> PAGE_SHIFT, data, + offset, toread); if (ret < 0) { r = X86EMUL_IO_NEEDED; goto out; @@ -4106,10 +4107,24 @@ static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt, { struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; + unsigned offset; + int ret; - return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, - access | PFERR_FETCH_MASK, - exception); + /* Inline kvm_read_guest_virt_helper for speed. */ + gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access|PFERR_FETCH_MASK, + exception); + if (unlikely(gpa == UNMAPPED_GVA)) + return X86EMUL_PROPAGATE_FAULT; + + offset = addr & (PAGE_SIZE-1); + if (WARN_ON(offset + bytes > PAGE_SIZE)) + bytes = (unsigned)PAGE_SIZE - offset; + ret = kvm_read_guest_page(vcpu->kvm, gpa >> PAGE_SHIFT, val, + offset, bytes); + if (unlikely(ret < 0)) + return X86EMUL_IO_NEEDED; + + return X86EMUL_CONTINUE; } int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt, From 10e38fc7cabc668738e6a7b7b57cbcddb2234440 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Wed, 18 Jun 2014 17:19:34 +0300 Subject: [PATCH 073/104] KVM: x86: Emulator flag for instruction that only support 16-bit addresses in real mode Certain instructions, such as monitor and xsave do not support big real mode and cause a #GP exception if any of the accessed bytes effective address are not within [0, 0xffff]. This patch introduces a flag to mark these instructions, including the necassary checks. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 6a1d60956d63d..b61ffe9d86a7a 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -164,6 +164,7 @@ #define NoMod ((u64)1 << 47) /* Mod field is ignored */ #define Intercept ((u64)1 << 48) /* Has valid intercept field */ #define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */ +#define NoBigReal ((u64)1 << 50) /* No big real mode */ #define DstXacc (DstAccLo | SrcAccHi | SrcWrite) @@ -640,7 +641,12 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, if (!fetch && (desc.type & 8) && !(desc.type & 2)) goto bad; lim = desc_limit_scaled(&desc); - if ((desc.type & 8) || !(desc.type & 4)) { + if ((ctxt->mode == X86EMUL_MODE_REAL) && !fetch && + (ctxt->d & NoBigReal)) { + /* la is between zero and 0xffff */ + if (la > 0xffff || (u32)(la + size - 1) > 0xffff) + goto bad; + } else if ((desc.type & 8) || !(desc.type & 4)) { /* expand-up segment */ if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) goto bad; From 68efa764f3429f2bd71f431e91c04b0bcb7d34f1 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Wed, 18 Jun 2014 17:19:35 +0300 Subject: [PATCH 074/104] KVM: x86: Emulator support for #UD on CPL>0 Certain instructions (e.g., mwait and monitor) cause a #UD exception when they are executed in user mode. This is in contrast to the regular privileged instructions which cause #GP. In order not to mess with SVM interception of mwait and monitor which assumes privilege level assertions take place before interception, a flag has been added. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index b61ffe9d86a7a..dd074106d0c9b 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -165,6 +165,7 @@ #define Intercept ((u64)1 << 48) /* Has valid intercept field */ #define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */ #define NoBigReal ((u64)1 << 50) /* No big real mode */ +#define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */ #define DstXacc (DstAccLo | SrcAccHi | SrcWrite) @@ -4608,7 +4609,10 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) /* Privileged instruction can be executed only in CPL=0 */ if ((ctxt->d & Priv) && ops->cpl(ctxt)) { - rc = emulate_gp(ctxt, 0); + if (ctxt->d & PrivUD) + rc = emulate_ud(ctxt); + else + rc = emulate_gp(ctxt, 0); goto done; } From 963fee1656603ce2e91ebb988cd5a92f2af41369 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 17 Jul 2014 19:03:00 +0800 Subject: [PATCH 075/104] KVM: nVMX: Fix virtual interrupt delivery injection This patch fix bug reported in https://bugzilla.kernel.org/show_bug.cgi?id=73331, after the patch http://www.spinics.net/lists/kvm/msg105230.html applied, there is some progress and the L2 can boot up, however, slowly. The original idea of this fix vid injection patch is from "Zhang, Yang Z" . Interrupt which delivered by vid should be injected to L1 by L0 if current is in L1, or should be injected to L2 by L0 through the old injection way if L1 doesn't have set External-interrupt exiting bit. The current logic doen't consider these cases. This patch fix it by vid intr to L1 if current is L1 or L2 through old injection way if L1 doen't have External-interrupt exiting bit set. Signed-off-by: Wanpeng Li Signed-off-by: "Zhang, Yang Z" Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0c9569b994f93..7534a9f67cc86 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7130,7 +7130,26 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) if (max_irr == -1) return; - vmx_set_rvi(max_irr); + /* + * If a vmexit is needed, vmx_check_nested_events handles it. + */ + if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) + return; + + if (!is_guest_mode(vcpu)) { + vmx_set_rvi(max_irr); + return; + } + + /* + * Fall back to pre-APICv interrupt injection since L2 + * is run without virtual interrupt delivery. + */ + if (!kvm_event_needs_reinjection(vcpu) && + vmx_interrupt_allowed(vcpu)) { + kvm_queue_interrupt(vcpu, max_irr, false); + vmx_inject_irq(vcpu); + } } static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) From 0759d0681cae279e77ebb4b76175e330360b01d9 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 13 May 2014 16:54:32 +0200 Subject: [PATCH 076/104] KVM: s390: cleanup handle_wait by reusing kvm_vcpu_block This patch cleans up the code in handle_wait by reusing the common code function kvm_vcpu_block. signal_pending(), kvm_cpu_has_pending_timer() and kvm_arch_vcpu_runnable() are sufficient for checking if we need to wake-up that VCPU. kvm_vcpu_block uses these functions, so no checks are lost. The flag "timer_due" can be removed - kvm_cpu_has_pending_timer() tests whether the timer is pending, thus the vcpu is correctly woken up. Signed-off-by: David Hildenbrand Acked-by: Christian Borntraeger Acked-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/kvm_host.h | 1 - arch/s390/kvm/interrupt.c | 41 ++++---------------------------- arch/s390/kvm/kvm-s390.c | 3 +++ 3 files changed, 8 insertions(+), 37 deletions(-) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index c2ba0208a0e14..b3acf28c8c963 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -305,7 +305,6 @@ struct kvm_s390_local_interrupt { struct list_head list; atomic_t active; struct kvm_s390_float_interrupt *float_int; - int timer_due; /* event indicator for waitqueue below */ wait_queue_head_t *wq; atomic_t *cpuflags; unsigned int action_bits; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 90c8de22a2a02..5fd11ce3dc3d3 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -585,60 +585,32 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) { u64 now, sltime; - DECLARE_WAITQUEUE(wait, current); vcpu->stat.exit_wait_state++; - if (kvm_cpu_has_interrupt(vcpu)) - return 0; - __set_cpu_idle(vcpu); - spin_lock_bh(&vcpu->arch.local_int.lock); - vcpu->arch.local_int.timer_due = 0; - spin_unlock_bh(&vcpu->arch.local_int.lock); + /* fast path */ + if (kvm_cpu_has_pending_timer(vcpu) || kvm_arch_vcpu_runnable(vcpu)) + return 0; if (psw_interrupts_disabled(vcpu)) { VCPU_EVENT(vcpu, 3, "%s", "disabled wait"); - __unset_cpu_idle(vcpu); return -EOPNOTSUPP; /* disabled wait */ } + __set_cpu_idle(vcpu); if (!ckc_interrupts_enabled(vcpu)) { VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer"); goto no_timer; } now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch; - if (vcpu->arch.sie_block->ckc < now) { - __unset_cpu_idle(vcpu); - return 0; - } - sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); - hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL); VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime); no_timer: srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); - spin_lock(&vcpu->arch.local_int.float_int->lock); - spin_lock_bh(&vcpu->arch.local_int.lock); - add_wait_queue(&vcpu->wq, &wait); - while (list_empty(&vcpu->arch.local_int.list) && - list_empty(&vcpu->arch.local_int.float_int->list) && - (!vcpu->arch.local_int.timer_due) && - !signal_pending(current) && - !kvm_s390_si_ext_call_pending(vcpu)) { - set_current_state(TASK_INTERRUPTIBLE); - spin_unlock_bh(&vcpu->arch.local_int.lock); - spin_unlock(&vcpu->arch.local_int.float_int->lock); - schedule(); - spin_lock(&vcpu->arch.local_int.float_int->lock); - spin_lock_bh(&vcpu->arch.local_int.lock); - } + kvm_vcpu_block(vcpu); __unset_cpu_idle(vcpu); - __set_current_state(TASK_RUNNING); - remove_wait_queue(&vcpu->wq, &wait); - spin_unlock_bh(&vcpu->arch.local_int.lock); - spin_unlock(&vcpu->arch.local_int.float_int->lock); vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); hrtimer_try_to_cancel(&vcpu->arch.ckc_timer); @@ -649,11 +621,8 @@ void kvm_s390_tasklet(unsigned long parm) { struct kvm_vcpu *vcpu = (struct kvm_vcpu *) parm; - spin_lock(&vcpu->arch.local_int.lock); - vcpu->arch.local_int.timer_due = 1; if (waitqueue_active(&vcpu->wq)) wake_up_interruptible(&vcpu->wq); - spin_unlock(&vcpu->arch.local_int.lock); } /* diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index fdf88f7a539c3..ecb135702313a 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1068,6 +1068,9 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) goto retry; } + /* nothing to do, just clear the request */ + clear_bit(KVM_REQ_UNHALT, &vcpu->requests); + return 0; } From 4ae3c0815fb63cbed1afcd5bacc7705c6d1b9fec Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 16 May 2014 10:23:53 +0200 Subject: [PATCH 077/104] KVM: s390: remove _bh locking from local_int.lock local_int.lock is not used in a bottom-half handler anymore, therefore we can turn it into an ordinary spin_lock at all occurrences. Signed-off-by: David Hildenbrand Reviewed-by: Christian Borntraeger Acked-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- arch/s390/kvm/interrupt.c | 32 ++++++++++++++++---------------- arch/s390/kvm/kvm-s390.c | 4 ++-- arch/s390/kvm/sigp.c | 20 ++++++++++---------- 3 files changed, 28 insertions(+), 28 deletions(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 5fd11ce3dc3d3..86575b4cdc1c4 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -544,13 +544,13 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) int rc = 0; if (atomic_read(&li->active)) { - spin_lock_bh(&li->lock); + spin_lock(&li->lock); list_for_each_entry(inti, &li->list, list) if (__interrupt_is_deliverable(vcpu, inti)) { rc = 1; break; } - spin_unlock_bh(&li->lock); + spin_unlock(&li->lock); } if ((!rc) && atomic_read(&fi->active)) { @@ -645,13 +645,13 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu) struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; struct kvm_s390_interrupt_info *n, *inti = NULL; - spin_lock_bh(&li->lock); + spin_lock(&li->lock); list_for_each_entry_safe(inti, n, &li->list, list) { list_del(&inti->list); kfree(inti); } atomic_set(&li->active, 0); - spin_unlock_bh(&li->lock); + spin_unlock(&li->lock); /* clear pending external calls set by sigp interpretation facility */ atomic_clear_mask(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags); @@ -670,7 +670,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) if (atomic_read(&li->active)) { do { deliver = 0; - spin_lock_bh(&li->lock); + spin_lock(&li->lock); list_for_each_entry_safe(inti, n, &li->list, list) { if (__interrupt_is_deliverable(vcpu, inti)) { list_del(&inti->list); @@ -681,7 +681,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) } if (list_empty(&li->list)) atomic_set(&li->active, 0); - spin_unlock_bh(&li->lock); + spin_unlock(&li->lock); if (deliver) { __do_deliver_interrupt(vcpu, inti); kfree(inti); @@ -727,7 +727,7 @@ void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu) if (atomic_read(&li->active)) { do { deliver = 0; - spin_lock_bh(&li->lock); + spin_lock(&li->lock); list_for_each_entry_safe(inti, n, &li->list, list) { if ((inti->type == KVM_S390_MCHK) && __interrupt_is_deliverable(vcpu, inti)) { @@ -739,7 +739,7 @@ void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu) } if (list_empty(&li->list)) atomic_set(&li->active, 0); - spin_unlock_bh(&li->lock); + spin_unlock(&li->lock); if (deliver) { __do_deliver_interrupt(vcpu, inti); kfree(inti); @@ -786,11 +786,11 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code); trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, inti->type, code, 0, 1); - spin_lock_bh(&li->lock); + spin_lock(&li->lock); list_add(&inti->list, &li->list); atomic_set(&li->active, 1); BUG_ON(waitqueue_active(li->wq)); - spin_unlock_bh(&li->lock); + spin_unlock(&li->lock); return 0; } @@ -811,11 +811,11 @@ int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu, inti->type = KVM_S390_PROGRAM_INT; memcpy(&inti->pgm, pgm_info, sizeof(inti->pgm)); - spin_lock_bh(&li->lock); + spin_lock(&li->lock); list_add(&inti->list, &li->list); atomic_set(&li->active, 1); BUG_ON(waitqueue_active(li->wq)); - spin_unlock_bh(&li->lock); + spin_unlock(&li->lock); return 0; } @@ -903,12 +903,12 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) } dst_vcpu = kvm_get_vcpu(kvm, sigcpu); li = &dst_vcpu->arch.local_int; - spin_lock_bh(&li->lock); + spin_lock(&li->lock); atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); if (waitqueue_active(li->wq)) wake_up_interruptible(li->wq); kvm_get_vcpu(kvm, sigcpu)->preempted = true; - spin_unlock_bh(&li->lock); + spin_unlock(&li->lock); unlock_fi: spin_unlock(&fi->lock); mutex_unlock(&kvm->lock); @@ -1050,7 +1050,7 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, mutex_lock(&vcpu->kvm->lock); li = &vcpu->arch.local_int; - spin_lock_bh(&li->lock); + spin_lock(&li->lock); if (inti->type == KVM_S390_PROGRAM_INT) list_add(&inti->list, &li->list); else @@ -1062,7 +1062,7 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, if (waitqueue_active(&vcpu->wq)) wake_up_interruptible(&vcpu->wq); vcpu->preempted = true; - spin_unlock_bh(&li->lock); + spin_unlock(&li->lock); mutex_unlock(&vcpu->kvm->lock); return 0; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ecb135702313a..a7bda180fe65e 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1522,13 +1522,13 @@ void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); /* Need to lock access to action_bits to avoid a SIGP race condition */ - spin_lock_bh(&vcpu->arch.local_int.lock); + spin_lock(&vcpu->arch.local_int.lock); atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */ vcpu->arch.local_int.action_bits &= ~(ACTION_STOP_ON_STOP | ACTION_STORE_ON_STOP); - spin_unlock_bh(&vcpu->arch.local_int.lock); + spin_unlock(&vcpu->arch.local_int.lock); __disable_ibs_on_vcpu(vcpu); diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index fd7fb5c5ef5dc..946992f7bb252 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -135,7 +135,7 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) return -ENOMEM; inti->type = KVM_S390_SIGP_STOP; - spin_lock_bh(&li->lock); + spin_lock(&li->lock); if (li->action_bits & ACTION_STOP_ON_STOP) { /* another SIGP STOP is pending */ rc = SIGP_CC_BUSY; @@ -154,7 +154,7 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) if (waitqueue_active(li->wq)) wake_up_interruptible(li->wq); out: - spin_unlock_bh(&li->lock); + spin_unlock(&li->lock); return rc; } @@ -243,7 +243,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, if (!inti) return SIGP_CC_BUSY; - spin_lock_bh(&li->lock); + spin_lock(&li->lock); /* cpu must be in stopped state */ if (!(atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) { *reg &= 0xffffffff00000000UL; @@ -264,7 +264,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address); out_li: - spin_unlock_bh(&li->lock); + spin_unlock(&li->lock); return rc; } @@ -280,9 +280,9 @@ static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id, if (!dst_vcpu) return SIGP_CC_NOT_OPERATIONAL; - spin_lock_bh(&dst_vcpu->arch.local_int.lock); + spin_lock(&dst_vcpu->arch.local_int.lock); flags = atomic_read(dst_vcpu->arch.local_int.cpuflags); - spin_unlock_bh(&dst_vcpu->arch.local_int.lock); + spin_unlock(&dst_vcpu->arch.local_int.lock); if (!(flags & CPUSTAT_STOPPED)) { *reg &= 0xffffffff00000000UL; *reg |= SIGP_STATUS_INCORRECT_STATE; @@ -343,10 +343,10 @@ static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr) if (!dst_vcpu) return SIGP_CC_NOT_OPERATIONAL; li = &dst_vcpu->arch.local_int; - spin_lock_bh(&li->lock); + spin_lock(&li->lock); if (li->action_bits & ACTION_STOP_ON_STOP) rc = SIGP_CC_BUSY; - spin_unlock_bh(&li->lock); + spin_unlock(&li->lock); return rc; } @@ -466,11 +466,11 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu) dest_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); BUG_ON(dest_vcpu == NULL); - spin_lock_bh(&dest_vcpu->arch.local_int.lock); + spin_lock(&dest_vcpu->arch.local_int.lock); if (waitqueue_active(&dest_vcpu->wq)) wake_up_interruptible(&dest_vcpu->wq); dest_vcpu->preempted = true; - spin_unlock_bh(&dest_vcpu->arch.local_int.lock); + spin_unlock(&dest_vcpu->arch.local_int.lock); kvm_s390_set_psw_cc(vcpu, SIGP_CC_ORDER_CODE_ACCEPTED); return 0; From 433b9ee43c233790c0ae7c02785d6d73fd4d4455 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 6 May 2014 16:11:14 +0200 Subject: [PATCH 078/104] KVM: s390: remove _bh locking from start_stop_lock The start_stop_lock is no longer acquired when in atomic context, therefore we can convert it into an ordinary spin_lock. Signed-off-by: David Hildenbrand Reviewed-by: Christian Borntraeger Acked-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index a7bda180fe65e..b29a03132ecb0 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1478,7 +1478,7 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1); /* Only one cpu at a time may enter/leave the STOPPED state. */ - spin_lock_bh(&vcpu->kvm->arch.start_stop_lock); + spin_lock(&vcpu->kvm->arch.start_stop_lock); online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); for (i = 0; i < online_vcpus; i++) { @@ -1504,7 +1504,7 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) * Let's play safe and flush the VCPU at startup. */ vcpu->arch.sie_block->ihcpu = 0xffff; - spin_unlock_bh(&vcpu->kvm->arch.start_stop_lock); + spin_unlock(&vcpu->kvm->arch.start_stop_lock); return; } @@ -1518,7 +1518,7 @@ void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0); /* Only one cpu at a time may enter/leave the STOPPED state. */ - spin_lock_bh(&vcpu->kvm->arch.start_stop_lock); + spin_lock(&vcpu->kvm->arch.start_stop_lock); online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); /* Need to lock access to action_bits to avoid a SIGP race condition */ @@ -1547,7 +1547,7 @@ void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) __enable_ibs_on_vcpu(started_vcpu); } - spin_unlock_bh(&vcpu->kvm->arch.start_stop_lock); + spin_unlock(&vcpu->kvm->arch.start_stop_lock); return; } From 0e9c85a5a312fef3e2e79d3ce2d8b6e5b6115e90 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 16 May 2014 11:59:46 +0200 Subject: [PATCH 079/104] KVM: s390: move vcpu wakeup code to a central point Let's move the vcpu wakeup code to a central point. We should set the vcpu->preempted flag only if the target is actually sleeping and before the real wakeup happens. Otherwise the preempted flag might be set, when not necessary. This may result in immediate reschedules after schedule() in some scenarios. The wakeup code doesn't require the local_int.lock to be held. Signed-off-by: David Hildenbrand Reviewed-by: Christian Borntraeger Acked-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- arch/s390/kvm/interrupt.c | 24 +++++++++++++++--------- arch/s390/kvm/kvm-s390.h | 1 + arch/s390/kvm/sigp.c | 20 ++++++-------------- 3 files changed, 22 insertions(+), 23 deletions(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 86575b4cdc1c4..65396e14ff058 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -617,12 +617,22 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) return 0; } +void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu) +{ + if (waitqueue_active(&vcpu->wq)) { + /* + * The vcpu gave up the cpu voluntarily, mark it as a good + * yield-candidate. + */ + vcpu->preempted = true; + wake_up_interruptible(&vcpu->wq); + } +} + void kvm_s390_tasklet(unsigned long parm) { struct kvm_vcpu *vcpu = (struct kvm_vcpu *) parm; - - if (waitqueue_active(&vcpu->wq)) - wake_up_interruptible(&vcpu->wq); + kvm_s390_vcpu_wakeup(vcpu); } /* @@ -905,10 +915,8 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) li = &dst_vcpu->arch.local_int; spin_lock(&li->lock); atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); - if (waitqueue_active(li->wq)) - wake_up_interruptible(li->wq); - kvm_get_vcpu(kvm, sigcpu)->preempted = true; spin_unlock(&li->lock); + kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu)); unlock_fi: spin_unlock(&fi->lock); mutex_unlock(&kvm->lock); @@ -1059,11 +1067,9 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, if (inti->type == KVM_S390_SIGP_STOP) li->action_bits |= ACTION_STOP_ON_STOP; atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); - if (waitqueue_active(&vcpu->wq)) - wake_up_interruptible(&vcpu->wq); - vcpu->preempted = true; spin_unlock(&li->lock); mutex_unlock(&vcpu->kvm->lock); + kvm_s390_vcpu_wakeup(vcpu); return 0; } diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 33a0e4bed2a5f..665eaccb9ca57 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -136,6 +136,7 @@ static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm) } int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); +void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu); enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); void kvm_s390_tasklet(unsigned long parm); void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 946992f7bb252..c6f1c2bc97539 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -125,8 +125,9 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED; } -static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) +static int __inject_sigp_stop(struct kvm_vcpu *dst_vcpu, int action) { + struct kvm_s390_local_interrupt *li = &dst_vcpu->arch.local_int; struct kvm_s390_interrupt_info *inti; int rc = SIGP_CC_ORDER_CODE_ACCEPTED; @@ -151,8 +152,7 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) atomic_set(&li->active, 1); li->action_bits |= action; atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); - if (waitqueue_active(li->wq)) - wake_up_interruptible(li->wq); + kvm_s390_vcpu_wakeup(dst_vcpu); out: spin_unlock(&li->lock); @@ -161,7 +161,6 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action) { - struct kvm_s390_local_interrupt *li; struct kvm_vcpu *dst_vcpu = NULL; int rc; @@ -171,9 +170,8 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action) dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); if (!dst_vcpu) return SIGP_CC_NOT_OPERATIONAL; - li = &dst_vcpu->arch.local_int; - rc = __inject_sigp_stop(li, action); + rc = __inject_sigp_stop(dst_vcpu, action); VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr); @@ -258,8 +256,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, list_add_tail(&inti->list, &li->list); atomic_set(&li->active, 1); - if (waitqueue_active(li->wq)) - wake_up_interruptible(li->wq); + kvm_s390_vcpu_wakeup(dst_vcpu); rc = SIGP_CC_ORDER_CODE_ACCEPTED; VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address); @@ -466,12 +463,7 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu) dest_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); BUG_ON(dest_vcpu == NULL); - spin_lock(&dest_vcpu->arch.local_int.lock); - if (waitqueue_active(&dest_vcpu->wq)) - wake_up_interruptible(&dest_vcpu->wq); - dest_vcpu->preempted = true; - spin_unlock(&dest_vcpu->arch.local_int.lock); - + kvm_s390_vcpu_wakeup(dest_vcpu); kvm_s390_set_psw_cc(vcpu, SIGP_CC_ORDER_CODE_ACCEPTED); return 0; } From ea74c0ea1b24a6978a6ebc80ba4dbc7b7848b32d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 16 May 2014 12:08:29 +0200 Subject: [PATCH 080/104] KVM: s390: remove the tasklet used by the hrtimer We can get rid of the tasklet used for waking up a VCPU in the hrtimer code but wakeup the VCPU directly. Signed-off-by: David Hildenbrand Reviewed-by: Christian Borntraeger Acked-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/kvm_host.h | 1 - arch/s390/kvm/interrupt.c | 13 +------------ arch/s390/kvm/kvm-s390.c | 2 -- arch/s390/kvm/kvm-s390.h | 1 - 4 files changed, 1 insertion(+), 16 deletions(-) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index b3acf28c8c963..773bef7614d8d 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -366,7 +366,6 @@ struct kvm_vcpu_arch { s390_fp_regs guest_fpregs; struct kvm_s390_local_interrupt local_int; struct hrtimer ckc_timer; - struct tasklet_struct tasklet; struct kvm_s390_pgm_info pgm; union { struct cpuid cpu_id; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 65396e14ff058..1be3d8da49e91 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -629,23 +629,12 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu) } } -void kvm_s390_tasklet(unsigned long parm) -{ - struct kvm_vcpu *vcpu = (struct kvm_vcpu *) parm; - kvm_s390_vcpu_wakeup(vcpu); -} - -/* - * low level hrtimer wake routine. Because this runs in hardirq context - * we schedule a tasklet to do the real work. - */ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer) { struct kvm_vcpu *vcpu; vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer); - vcpu->preempted = true; - tasklet_schedule(&vcpu->arch.tasklet); + kvm_s390_vcpu_wakeup(vcpu); return HRTIMER_NORESTART; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index b29a03132ecb0..dd902e64c033d 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -649,8 +649,6 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) return rc; } hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); - tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet, - (unsigned long) vcpu); vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; get_cpu_id(&vcpu->arch.cpu_id); vcpu->arch.cpu_id.version = 0xff; diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 665eaccb9ca57..3862fa2cefe08 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -138,7 +138,6 @@ static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm) int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu); enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); -void kvm_s390_tasklet(unsigned long parm); void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu); void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu); From 0907c855b3b2006b11a96e6c81f91e36a5278d0e Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Fri, 27 Jun 2014 09:29:26 +0200 Subject: [PATCH 081/104] KVM: document target of capability enablement Capabilities can be enabled on a vcpu or (since recently) on a vm. Document this and note for the existing capabilites whether they are per-vcpu or per-vm. Signed-off-by: Cornelia Huck Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- Documentation/virtual/kvm/api.txt | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index a41465bd6a5c3..7ab41e9154c25 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2875,15 +2875,18 @@ The fields in each entry are defined as follows: 6. Capabilities that can be enabled ----------------------------------- -There are certain capabilities that change the behavior of the virtual CPU when -enabled. To enable them, please see section 4.37. Below you can find a list of -capabilities and what their effect on the vCPU is when enabling them. +There are certain capabilities that change the behavior of the virtual CPU or +the virtual machine when enabled. To enable them, please see section 4.37. +Below you can find a list of capabilities and what their effect on the vCPU or +the virtual machine is when enabling them. The following information is provided along with the description: Architectures: which instruction set architectures provide this ioctl. x86 includes both i386 and x86_64. + Target: whether this is a per-vcpu or per-vm capability. + Parameters: what parameters are accepted by the capability. Returns: the return value. General error numbers (EBADF, ENOMEM, EINVAL) @@ -2893,6 +2896,7 @@ The following information is provided along with the description: 6.1 KVM_CAP_PPC_OSI Architectures: ppc +Target: vcpu Parameters: none Returns: 0 on success; -1 on error @@ -2907,6 +2911,7 @@ When this capability is enabled, KVM_EXIT_OSI can occur. 6.2 KVM_CAP_PPC_PAPR Architectures: ppc +Target: vcpu Parameters: none Returns: 0 on success; -1 on error @@ -2926,6 +2931,7 @@ When this capability is enabled, KVM_EXIT_PAPR_HCALL can occur. 6.3 KVM_CAP_SW_TLB Architectures: ppc +Target: vcpu Parameters: args[0] is the address of a struct kvm_config_tlb Returns: 0 on success; -1 on error @@ -2968,6 +2974,7 @@ For mmu types KVM_MMU_FSL_BOOKE_NOHV and KVM_MMU_FSL_BOOKE_HV: 6.4 KVM_CAP_S390_CSS_SUPPORT Architectures: s390 +Target: vcpu Parameters: none Returns: 0 on success; -1 on error @@ -2979,9 +2986,13 @@ handled in-kernel, while the other I/O instructions are passed to userspace. When this capability is enabled, KVM_EXIT_S390_TSCH will occur on TEST SUBCHANNEL intercepts. +Note that even though this capability is enabled per-vcpu, the complete +virtual machine is affected. + 6.5 KVM_CAP_PPC_EPR Architectures: ppc +Target: vcpu Parameters: args[0] defines whether the proxy facility is active Returns: 0 on success; -1 on error @@ -3007,6 +3018,7 @@ This capability connects the vcpu to an in-kernel MPIC device. 6.7 KVM_CAP_IRQ_XICS Architectures: ppc +Target: vcpu Parameters: args[0] is the XICS device fd args[1] is the XICS CPU number (server ID) for this vcpu From 8a366a4bae154b986df475176d7465c1db7d5f40 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Fri, 27 Jun 2014 11:06:25 +0200 Subject: [PATCH 082/104] KVM: s390: document KVM_CAP_S390_IRQCHIP Let's document that this is a capability that may be enabled per-vm. Signed-off-by: Cornelia Huck Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- Documentation/virtual/kvm/api.txt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 7ab41e9154c25..f1979c77ac1c3 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -3023,3 +3023,12 @@ Parameters: args[0] is the XICS device fd args[1] is the XICS CPU number (server ID) for this vcpu This capability connects the vcpu to an in-kernel XICS device. + +6.8 KVM_CAP_S390_IRQCHIP + +Architectures: s390 +Target: vm +Parameters: none + +This capability enables the in-kernel irqchip for s390. Please refer to +"4.24 KVM_CREATE_IRQCHIP" for details. From 78599d90041e0af5702eb45081a77b65d2941a00 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Tue, 15 Jul 2014 09:54:39 +0200 Subject: [PATCH 083/104] KVM: s390: advertise KVM_CAP_S390_IRQCHIP We should advertise all capabilities, including those that can be enabled. Signed-off-by: Cornelia Huck Acked-by: Christian Borntraeger Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index dd902e64c033d..339b34a02fb8b 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -166,6 +166,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_IOEVENTFD: case KVM_CAP_DEVICE_CTRL: case KVM_CAP_ENABLE_CAP_VM: + case KVM_CAP_S390_IRQCHIP: case KVM_CAP_VM_ATTRIBUTES: case KVM_CAP_MP_STATE: r = 1; From e59d120f96687a606db0513c427f10e30a427cc4 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 16 Jul 2014 13:58:19 +0200 Subject: [PATCH 084/104] KVM: s390: add ipte to trace event decoding IPTE intercept can happen, let's decode that. Signed-off-by: Christian Borntraeger Acked-by: Cornelia Huck --- arch/s390/include/uapi/asm/sie.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h index 5d9cc19462c4f..d4096fdfc6ab4 100644 --- a/arch/s390/include/uapi/asm/sie.h +++ b/arch/s390/include/uapi/asm/sie.h @@ -108,6 +108,7 @@ exit_code_ipa0(0xB2, 0x17, "STETR"), \ exit_code_ipa0(0xB2, 0x18, "PC"), \ exit_code_ipa0(0xB2, 0x20, "SERVC"), \ + exit_code_ipa0(0xB2, 0x21, "IPTE"), \ exit_code_ipa0(0xB2, 0x28, "PT"), \ exit_code_ipa0(0xB2, 0x29, "ISKE"), \ exit_code_ipa0(0xB2, 0x2a, "RRBE"), \ From bb663c7ada380f3c89c2f83fdbe2b3626621385d Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 21 Jul 2014 14:37:26 +0300 Subject: [PATCH 085/104] KVM: x86: Clearing rflags.rf upon skipped emulated instruction When skipping an emulated instruction, rflags.rf should be cleared as it would be on real x86 CPU. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f750b69ca4431..1fd806cb96d48 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5229,6 +5229,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, if (emulation_type & EMULTYPE_SKIP) { kvm_rip_write(vcpu, ctxt->_eip); + if (ctxt->eflags & X86_EFLAGS_RF) + kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF); return EMULATE_DONE; } From 163b135e7b09e9158f7eb0aa74e716865e3005d2 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 21 Jul 2014 14:37:28 +0300 Subject: [PATCH 086/104] KVM: x86: popf emulation should not change RF RFLAGS.RF is always zero after popf. Therefore, popf should not updated RF, as anyhow emulating popf, just as any other instruction should clear RFLAGS.RF. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index dd074106d0c9b..cf117bfe95219 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1675,7 +1675,7 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, return rc; change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF - | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_RF | EFLG_AC | EFLG_ID; + | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_AC | EFLG_ID; switch(ctxt->mode) { case X86EMUL_MODE_PROT64: From 4467c3f1ad16e3640e2b61e1a5e0bd55281a925d Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 21 Jul 2014 14:37:29 +0300 Subject: [PATCH 087/104] KVM: x86: Clear rflags.rf on emulated instructions When an instruction is emulated RFLAGS.RF should be cleared. KVM previously did not do so. This patch clears RFLAGS.RF after interception is done. If a fault occurs during the instruction, RFLAGS.RF will be set by a previous patch. This patch does not handle the case of traps/interrupts during rep-strings. Traps are only expected to occur on debug watchpoints, and those are anyhow not handled by the emulator. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index cf117bfe95219..189b8bd86e31e 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4640,6 +4640,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) /* All REP prefixes have the same first termination condition */ if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) { ctxt->eip = ctxt->_eip; + ctxt->eflags &= ~EFLG_RF; goto done; } } @@ -4682,6 +4683,8 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) goto done; } + ctxt->eflags &= ~EFLG_RF; + if (ctxt->execute) { if (ctxt->d & Fastop) { void (*fop)(struct fastop *) = (void *)ctxt->execute; From 6c6cb69b8e974049cca2cc4480052fb9e7df767b Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 21 Jul 2014 14:37:30 +0300 Subject: [PATCH 088/104] KVM: x86: Cleanup of rflags.rf cleaning RFLAGS.RF was cleaned in several functions (e.g., syscall) in the x86 emulator. Now that we clear it before the execution of an instruction in the emulator, we can remove the specific cleanup of RFLAGS.RF. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 189b8bd86e31e..8d415563e05b8 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2211,7 +2211,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt) *reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip; if (efer & EFER_LMA) { #ifdef CONFIG_X86_64 - *reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags & ~EFLG_RF; + *reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags; ops->get_msr(ctxt, ctxt->mode == X86EMUL_MODE_PROT64 ? @@ -2219,14 +2219,14 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt) ctxt->_eip = msr_data; ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data); - ctxt->eflags &= ~(msr_data | EFLG_RF); + ctxt->eflags &= ~msr_data; #endif } else { /* legacy mode */ ops->get_msr(ctxt, MSR_STAR, &msr_data); ctxt->_eip = (u32)msr_data; - ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); + ctxt->eflags &= ~(EFLG_VM | EFLG_IF); } return X86EMUL_CONTINUE; @@ -2275,7 +2275,7 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) break; } - ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); + ctxt->eflags &= ~(EFLG_VM | EFLG_IF); cs_sel = (u16)msr_data; cs_sel &= ~SELECTOR_RPL_MASK; ss_sel = cs_sel + 8; From 4161a569065b17954848069d5209182083ce876b Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Thu, 17 Jul 2014 01:19:31 +0300 Subject: [PATCH 089/104] KVM: x86: emulator injects #DB when RFLAGS.RF is set If the RFLAGS.RF is set, then no #DB should occur on instruction breakpoints. However, the KVM emulator injects #DB regardless to RFLAGS.RF. This patch fixes this behavior. KVM, however, still appears not to update RFLAGS.RF correctly, regardless of this patch. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1fd806cb96d48..cc4f65f39b875 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5158,7 +5158,8 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r) } } - if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK)) { + if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) && + !(kvm_get_rflags(vcpu) & X86_EFLAGS_RF)) { dr6 = kvm_vcpu_check_hw_bp(eip, 0, vcpu->arch.dr7, vcpu->arch.db); From c9cdd085bb75226879fd468b88e2e7eb467325b7 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 21 Jul 2014 14:37:24 +0300 Subject: [PATCH 090/104] KVM: x86: Defining missing x86 vectors Defining XE, XM and VE vector numbers. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/include/uapi/asm/kvm.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index d3a87780c70bd..d7dcef58aefa9 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -23,7 +23,10 @@ #define GP_VECTOR 13 #define PF_VECTOR 14 #define MF_VECTOR 16 +#define AC_VECTOR 17 #define MC_VECTOR 18 +#define XM_VECTOR 19 +#define VE_VECTOR 20 /* Select x86 specific features in */ #define __KVM_HAVE_PIT From 4fa7734c62cdd8c07edd54fa5a5e91482273071a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 17 Jul 2014 12:25:16 +0200 Subject: [PATCH 091/104] KVM: nVMX: fix lifetime issues for vmcs02 free_nested needs the loaded_vmcs to be valid if it is a vmcs02, in order to detach it from the shadow vmcs. However, this is not available anymore after commit 26a865f4aa8e (KVM: VMX: fix use after free of vmx->loaded_vmcs, 2014-01-03). Revert that patch, and fix its problem by forcing a vmcs01 as the active VMCS before freeing all the nested VMX state. Reported-by: Wanpeng Li Tested-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 49 +++++++++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7534a9f67cc86..462334eaa3c00 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5772,22 +5772,27 @@ static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr) /* * Free all VMCSs saved for this vcpu, except the one pointed by - * vmx->loaded_vmcs. These include the VMCSs in vmcs02_pool (except the one - * currently used, if running L2), and vmcs01 when running L2. + * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs + * must be &vmx->vmcs01. */ static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx) { struct vmcs02_list *item, *n; + + WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01); list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) { - if (vmx->loaded_vmcs != &item->vmcs02) - free_loaded_vmcs(&item->vmcs02); + /* + * Something will leak if the above WARN triggers. Better than + * a use-after-free. + */ + if (vmx->loaded_vmcs == &item->vmcs02) + continue; + + free_loaded_vmcs(&item->vmcs02); list_del(&item->list); kfree(item); + vmx->nested.vmcs02_num--; } - vmx->nested.vmcs02_num = 0; - - if (vmx->loaded_vmcs != &vmx->vmcs01) - free_loaded_vmcs(&vmx->vmcs01); } /* @@ -7557,13 +7562,31 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx_complete_interrupts(vmx); } +static void vmx_load_vmcs01(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + int cpu; + + if (vmx->loaded_vmcs == &vmx->vmcs01) + return; + + cpu = get_cpu(); + vmx->loaded_vmcs = &vmx->vmcs01; + vmx_vcpu_put(vcpu); + vmx_vcpu_load(vcpu, cpu); + vcpu->cpu = cpu; + put_cpu(); +} + static void vmx_free_vcpu(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); free_vpid(vmx); - free_loaded_vmcs(vmx->loaded_vmcs); + leave_guest_mode(vcpu); + vmx_load_vmcs01(vcpu); free_nested(vmx); + free_loaded_vmcs(vmx->loaded_vmcs); kfree(vmx->guest_msrs); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, vmx); @@ -8721,7 +8744,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, unsigned long exit_qualification) { struct vcpu_vmx *vmx = to_vmx(vcpu); - int cpu; struct vmcs12 *vmcs12 = get_vmcs12(vcpu); /* trying to cancel vmlaunch/vmresume is a bug */ @@ -8746,12 +8768,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vmcs12->vm_exit_intr_error_code, KVM_ISA_VMX); - cpu = get_cpu(); - vmx->loaded_vmcs = &vmx->vmcs01; - vmx_vcpu_put(vcpu); - vmx_vcpu_load(vcpu, cpu); - vcpu->cpu = cpu; - put_cpu(); + vmx_load_vmcs01(vcpu); vm_entry_controls_init(vmx, vmcs_read32(VM_ENTRY_CONTROLS)); vm_exit_controls_init(vmx, vmcs_read32(VM_EXIT_CONTROLS)); From 9a2a05b9ed618b1bb6d4cbec0c2e1f80d6636609 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 17 Jul 2014 11:55:46 +0200 Subject: [PATCH 092/104] KVM: nVMX: clean up nested_release_vmcs12 and code around it Make nested_release_vmcs12 idempotent. Tested-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 462334eaa3c00..3300f4f2da486 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6109,20 +6109,27 @@ static int nested_vmx_check_permission(struct kvm_vcpu *vcpu) static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) { u32 exec_control; + if (vmx->nested.current_vmptr == -1ull) + return; + + /* current_vmptr and current_vmcs12 are always set/reset together */ + if (WARN_ON(vmx->nested.current_vmcs12 == NULL)) + return; + if (enable_shadow_vmcs) { - if (vmx->nested.current_vmcs12 != NULL) { - /* copy to memory all shadowed fields in case - they were modified */ - copy_shadow_to_vmcs12(vmx); - vmx->nested.sync_shadow_vmcs = false; - exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); - exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; - vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); - vmcs_write64(VMCS_LINK_POINTER, -1ull); - } + /* copy to memory all shadowed fields in case + they were modified */ + copy_shadow_to_vmcs12(vmx); + vmx->nested.sync_shadow_vmcs = false; + exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); + exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; + vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); + vmcs_write64(VMCS_LINK_POINTER, -1ull); } kunmap(vmx->nested.current_vmcs12_page); nested_release_page(vmx->nested.current_vmcs12_page); + vmx->nested.current_vmptr = -1ull; + vmx->nested.current_vmcs12 = NULL; } /* @@ -6133,12 +6140,9 @@ static void free_nested(struct vcpu_vmx *vmx) { if (!vmx->nested.vmxon) return; + vmx->nested.vmxon = false; - if (vmx->nested.current_vmptr != -1ull) { - nested_release_vmcs12(vmx); - vmx->nested.current_vmptr = -1ull; - vmx->nested.current_vmcs12 = NULL; - } + nested_release_vmcs12(vmx); if (enable_shadow_vmcs) free_vmcs(vmx->nested.current_shadow_vmcs); /* Unpin physical memory we referred to in current vmcs02 */ @@ -6175,11 +6179,8 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMCLEAR, &vmptr)) return 1; - if (vmptr == vmx->nested.current_vmptr) { + if (vmptr == vmx->nested.current_vmptr) nested_release_vmcs12(vmx); - vmx->nested.current_vmptr = -1ull; - vmx->nested.current_vmcs12 = NULL; - } page = nested_get_page(vcpu, vmptr); if (page == NULL) { @@ -6521,9 +6522,8 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) skip_emulated_instruction(vcpu); return 1; } - if (vmx->nested.current_vmptr != -1ull) - nested_release_vmcs12(vmx); + nested_release_vmcs12(vmx); vmx->nested.current_vmptr = vmptr; vmx->nested.current_vmcs12 = new_vmcs12; vmx->nested.current_vmcs12_page = page; From 6f43ed01e87c8a8dbd8c826eaf0f714c1342c039 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Tue, 15 Jul 2014 17:37:46 +0300 Subject: [PATCH 093/104] KVM: x86: DR6/7.RTM cannot be written Haswell and newer Intel CPUs have support for RTM, and in that case DR6.RTM is not fixed to 1 and DR7.RTM is not fixed to zero. That is not the case in the current KVM implementation. This bug is apparent only if the MOV-DR instruction is emulated or the host also debugs the guest. This patch is a partial fix which enables DR6.RTM and DR7.RTM to be cleared and set respectively. It also sets DR6.RTM upon every debug exception. Obviously, it is not a complete fix, as debugging of RTM is still unsupported. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 8 +++++--- arch/x86/kvm/cpuid.h | 8 ++++++++ arch/x86/kvm/vmx.c | 4 ++-- arch/x86/kvm/x86.c | 22 ++++++++++++++++------ 4 files changed, 31 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b8a4480176b9c..a84eaf7ba33fd 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -152,14 +152,16 @@ enum { #define DR6_BD (1 << 13) #define DR6_BS (1 << 14) -#define DR6_FIXED_1 0xffff0ff0 -#define DR6_VOLATILE 0x0000e00f +#define DR6_RTM (1 << 16) +#define DR6_FIXED_1 0xfffe0ff0 +#define DR6_INIT 0xffff0ff0 +#define DR6_VOLATILE 0x0001e00f #define DR7_BP_EN_MASK 0x000000ff #define DR7_GE (1 << 9) #define DR7_GD (1 << 13) #define DR7_FIXED_1 0x00000400 -#define DR7_VOLATILE 0xffff23ff +#define DR7_VOLATILE 0xffff2bff /* apic attention bits */ #define KVM_APIC_CHECK_VAPIC 0 diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index f9087315e0cda..a5380590ab0e1 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -95,4 +95,12 @@ static inline bool guest_cpuid_has_gbpages(struct kvm_vcpu *vcpu) best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); return best && (best->edx & bit(X86_FEATURE_GBPAGES)); } + +static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 7, 0); + return best && (best->ebx & bit(X86_FEATURE_RTM)); +} #endif diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3300f4f2da486..fd24f68378a7d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4892,7 +4892,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) if (!(vcpu->guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { vcpu->arch.dr6 &= ~15; - vcpu->arch.dr6 |= dr6; + vcpu->arch.dr6 |= dr6 | DR6_RTM; if (!(dr6 & ~DR6_RESERVED)) /* icebp */ skip_emulated_instruction(vcpu); @@ -5151,7 +5151,7 @@ static int handle_dr(struct kvm_vcpu *vcpu) return 0; } else { vcpu->arch.dr7 &= ~DR7_GD; - vcpu->arch.dr6 |= DR6_BD; + vcpu->arch.dr6 |= DR6_BD | DR6_RTM; vmcs_writel(GUEST_DR7, vcpu->arch.dr7); kvm_queue_exception(vcpu, DB_VECTOR); return 1; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cc4f65f39b875..08c48a3c7c3ac 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -759,6 +759,15 @@ static void kvm_update_dr7(struct kvm_vcpu *vcpu) vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED; } +static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu) +{ + u64 fixed = DR6_FIXED_1; + + if (!guest_cpuid_has_rtm(vcpu)) + fixed |= DR6_RTM; + return fixed; +} + static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) { switch (dr) { @@ -774,7 +783,7 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) case 6: if (val & 0xffffffff00000000ULL) return -1; /* #GP */ - vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; + vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu); kvm_update_dr6(vcpu); break; case 5: @@ -5115,7 +5124,8 @@ static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflag */ if (unlikely(rflags & X86_EFLAGS_TF)) { if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { - kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1; + kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | + DR6_RTM; kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip; kvm_run->debug.arch.exception = DB_VECTOR; kvm_run->exit_reason = KVM_EXIT_DEBUG; @@ -5128,7 +5138,7 @@ static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflag * cleared by the processor". */ vcpu->arch.dr6 &= ~15; - vcpu->arch.dr6 |= DR6_BS; + vcpu->arch.dr6 |= DR6_BS | DR6_RTM; kvm_queue_exception(vcpu, DB_VECTOR); } } @@ -5147,7 +5157,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r) vcpu->arch.eff_db); if (dr6 != 0) { - kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1; + kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM; kvm_run->debug.arch.pc = kvm_rip_read(vcpu) + get_segment_base(vcpu, VCPU_SREG_CS); @@ -5166,7 +5176,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r) if (dr6 != 0) { vcpu->arch.dr6 &= ~15; - vcpu->arch.dr6 |= dr6; + vcpu->arch.dr6 |= dr6 | DR6_RTM; kvm_queue_exception(vcpu, DB_VECTOR); *r = EMULATE_DONE; return true; @@ -6866,7 +6876,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu) kvm_clear_exception_queue(vcpu); memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); - vcpu->arch.dr6 = DR6_FIXED_1; + vcpu->arch.dr6 = DR6_INIT; kvm_update_dr6(vcpu); vcpu->arch.dr7 = DR7_FIXED_1; kvm_update_dr7(vcpu); From b9a1ecb909e8f772934cc4bf1f164124c9fbb0d0 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Thu, 24 Jul 2014 14:51:23 +0300 Subject: [PATCH 094/104] KVM: x86: Setting rflags.rf during rep-string emulation This patch updates RF for rep-string emulation. The flag is set upon the first iteration, and cleared after the last (if emulated). It is intended to make sure that if a trap (in future data/io #DB emulation) or interrupt is delivered to the guest during the rep-string instruction, RF will be set correctly. RF affects whether instruction breakpoint in the guest is masked. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 8d415563e05b8..57743edfb4e21 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4683,7 +4683,10 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) goto done; } - ctxt->eflags &= ~EFLG_RF; + if (ctxt->rep_prefix && (ctxt->d & String)) + ctxt->eflags |= EFLG_RF; + else + ctxt->eflags &= ~EFLG_RF; if (ctxt->execute) { if (ctxt->d & Fastop) { @@ -4824,6 +4827,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) } goto done; /* skip rip writeback */ } + ctxt->eflags &= ~EFLG_RF; } ctxt->eip = ctxt->_eip; From d6e8c8545651b05a86c5b9d29d2fe11ad4cbb9aa Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Thu, 24 Jul 2014 14:51:24 +0300 Subject: [PATCH 095/104] KVM: x86: set rflags.rf during fault injection x86 does not automatically set rflags.rf during event injection. This patch does partial job, setting rflags.rf upon fault injection. It does not handle the setting of RF upon interrupt injection on rep-string instruction. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 08c48a3c7c3ac..439f96bf424df 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -311,6 +311,31 @@ static int exception_class(int vector) return EXCPT_BENIGN; } +#define EXCPT_FAULT 0 +#define EXCPT_TRAP 1 +#define EXCPT_ABORT 2 +#define EXCPT_INTERRUPT 3 + +static int exception_type(int vector) +{ + unsigned int mask; + + if (WARN_ON(vector > 31 || vector == NMI_VECTOR)) + return EXCPT_INTERRUPT; + + mask = 1 << vector; + + /* #DB is trap, as instruction watchpoints are handled elsewhere */ + if (mask & ((1 << DB_VECTOR) | (1 << BP_VECTOR) | (1 << OF_VECTOR))) + return EXCPT_TRAP; + + if (mask & ((1 << DF_VECTOR) | (1 << MC_VECTOR))) + return EXCPT_ABORT; + + /* Reserved exceptions will result in fault */ + return EXCPT_FAULT; +} + static void kvm_multiple_exception(struct kvm_vcpu *vcpu, unsigned nr, bool has_error, u32 error_code, bool reinject) @@ -5893,6 +5918,11 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) trace_kvm_inj_exception(vcpu->arch.exception.nr, vcpu->arch.exception.has_error_code, vcpu->arch.exception.error_code); + + if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT) + __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) | + X86_EFLAGS_RF); + kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr, vcpu->arch.exception.has_error_code, vcpu->arch.exception.error_code, From 0123be429fef40f067e5b1811576c3994229f59e Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Thu, 24 Jul 2014 15:06:56 +0300 Subject: [PATCH 096/104] KVM: x86: Assertions to check no overrun in MSR lists Currently there is no check whether shared MSRs list overrun the allocated size which can results in bugs. In addition there is no check that vmx->guest_msrs has sufficient space to accommodate all the VMX msrs. This patch adds the assertions. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 2 ++ arch/x86/kvm/x86.c | 1 + 2 files changed, 3 insertions(+) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index fd24f68378a7d..3397a88b74633 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7608,6 +7608,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) goto free_vcpu; vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); + BUILD_BUG_ON(PAGE_SIZE / sizeof(struct shared_msr_entry) < NR_VMX_MSR); + err = -ENOMEM; if (!vmx->guest_msrs) { goto uninit_vcpu; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 439f96bf424df..d38abc81db650 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -212,6 +212,7 @@ static void shared_msr_update(unsigned slot, u32 msr) void kvm_define_shared_msr(unsigned slot, u32 msr) { + BUG_ON(slot >= KVM_NR_SHARED_MSRS); if (slot >= shared_msrs_global.nr) shared_msrs_global.nr = slot + 1; shared_msrs_global.msrs[slot] = msr; From 03916db9348c079d8d214f971cc114bb51c6b869 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 24 Jul 2014 14:21:57 +0200 Subject: [PATCH 097/104] Replace NR_VMX_MSR with its definition Using ARRAY_SIZE directly makes it easier to read the code. While touching the code, replace the division by a multiplication in the recently added BUILD_BUG_ON. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3397a88b74633..a3845b8e2b800 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -823,7 +823,6 @@ static const u32 vmx_msr_index[] = { #endif MSR_EFER, MSR_TSC_AUX, MSR_STAR, }; -#define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) static inline bool is_page_fault(u32 intr_info) { @@ -4441,7 +4440,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmx->vcpu.arch.pat = host_pat; } - for (i = 0; i < NR_VMX_MSR; ++i) { + for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) { u32 index = vmx_msr_index[i]; u32 data_low, data_high; int j = vmx->nmsrs; @@ -7608,7 +7607,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) goto free_vcpu; vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); - BUILD_BUG_ON(PAGE_SIZE / sizeof(struct shared_msr_entry) < NR_VMX_MSR); + BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) * sizeof(vmx->guest_msrs[0]) + > PAGE_SIZE); err = -ENOMEM; if (!vmx->guest_msrs) { @@ -8960,7 +8960,7 @@ static int __init vmx_init(void) rdmsrl_safe(MSR_EFER, &host_efer); - for (i = 0; i < NR_VMX_MSR; ++i) + for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) kvm_define_shared_msr(i, vmx_msr_index[i]); vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL); From 25f97ff451a4aab534afc1290af97d23ea0b4fb3 Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Fri, 25 Jul 2014 06:27:03 -0700 Subject: [PATCH 098/104] kvm: Resolve missing-field-initializers warnings Resolve missing-field-initializers warnings seen in W=2 kernel builds by having macros generate more elaborated initializers. That is enough to silence the warnings. Signed-off-by: Mark Rustad Signed-off-by: Jeff Kirsher Signed-off-by: Paolo Bonzini --- virt/kvm/irq_comm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index ced4a542a0313..a228ee82bad26 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -323,13 +323,13 @@ int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, #define IOAPIC_ROUTING_ENTRY(irq) \ { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ - .u.irqchip.irqchip = KVM_IRQCHIP_IOAPIC, .u.irqchip.pin = (irq) } + .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } } #define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq) #ifdef CONFIG_X86 # define PIC_ROUTING_ENTRY(irq) \ { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ - .u.irqchip.irqchip = SELECT_PIC(irq), .u.irqchip.pin = (irq) % 8 } + .u.irqchip = { .irqchip = SELECT_PIC(irq), .pin = (irq) % 8 } } # define ROUTING_ENTRY2(irq) \ IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq) #else From b55a8144d1807f9e74c51cb584f0dd198483d86c Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Fri, 25 Jul 2014 06:27:05 -0700 Subject: [PATCH 099/104] x86/kvm: Resolve shadow warning from min macro Resolve a shadow warning generated in W=2 builds by the nested use of the min macro by instead using the min3 macro for the minimum of 3 values. Signed-off-by: Mark Rustad Signed-off-by: Jeff Kirsher Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 57743edfb4e21..56657b0bb3bb1 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1317,8 +1317,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, in_page = (ctxt->eflags & EFLG_DF) ? offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) : PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)); - n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size, - count); + n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count); if (n == 0) n = 1; rc->pos = rc->end = 0; From d514f42641e1976e9eae6ea500c13274e62cdba3 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 28 Jul 2014 11:52:02 +0200 Subject: [PATCH 100/104] KVM: s390: Fix memory leak on busy SIGP stop commit 7dfc63cf977447e09b1072911c22564f900fc578 (KVM: s390: allow only one SIGP STOP (AND STORE STATUS) at a time) introduced a memory leak if a sigp stop is already pending. Free the allocated inti structure. Signed-off-by: Christian Borntraeger Reviewed-by: David Hildenbrand --- arch/s390/kvm/sigp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index c6f1c2bc97539..cf243ba3d50f2 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -139,6 +139,7 @@ static int __inject_sigp_stop(struct kvm_vcpu *dst_vcpu, int action) spin_lock(&li->lock); if (li->action_bits & ACTION_STOP_ON_STOP) { /* another SIGP STOP is pending */ + kfree(inti); rc = SIGP_CC_BUSY; goto out; } From 296f047502f1b3ddfd63adbc192624ce80740081 Mon Sep 17 00:00:00 2001 From: Chris J Arges Date: Tue, 29 Jul 2014 16:14:10 -0500 Subject: [PATCH 101/104] KVM: vmx: remove duplicate vmx_mpx_supported() prototype Remove a prototype which was added by both 93c4adc7afe and 36be0b9deb2. Signed-off-by: Chris J Arges Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a3845b8e2b800..e618f34bde2d7 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -743,7 +743,6 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var); static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu); static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); -static bool vmx_mpx_supported(void); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); From 0f6c0a740b7d3e1f3697395922d674000f83d060 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 30 Jul 2014 18:07:24 +0200 Subject: [PATCH 102/104] KVM: x86: always exit on EOIs for interrupts listed in the IOAPIC redir table Currently, the EOI exit bitmap (used for APICv) does not include interrupts that are masked. However, this can cause a bug that manifests as an interrupt storm inside the guest. Alex Williamson reported the bug and is the one who really debugged this; I only wrote the patch. :) The scenario involves a multi-function PCI device with OHCI and EHCI USB functions and an audio function, all assigned to the guest, where both USB functions use legacy INTx interrupts. As soon as the guest boots, interrupts for these devices turn into an interrupt storm in the guest; the host does not see the interrupt storm. Basically the EOI path does not work, and the guest continues to see the interrupt over and over, even after it attempts to mask it at the APIC. The bug is only visible with older kernels (RHEL6.5, based on 2.6.32 with not many changes in the area of APIC/IOAPIC handling). Alex then tried forcing bit 59 (corresponding to the USB functions' IRQ) on in the eoi_exit_bitmap and TMR, and things then work. What happens is that VFIO asserts IRQ11, then KVM recomputes the EOI exit bitmap. It does not have set bit 59 because the RTE was masked, so the IOAPIC never sees the EOI and the interrupt continues to fire in the guest. My guess was that the guest is masking the interrupt in the redirection table in the interrupt routine, i.e. while the interrupt is set in a LAPIC's ISR, The simplest fix is to ignore the masking state, we would rather have an unnecessary exit rather than a missed IRQ ACK and anyway IOAPIC interrupts are not as performance-sensitive as for example MSIs. Alex tested this patch and it fixed his bug. [Thanks to Alex for his precise description of the problem and initial debugging effort. A lot of the text above is based on emails exchanged with him.] Reported-by: Alex Williamson Tested-by: Alex Williamson Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- virt/kvm/ioapic.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 2458a1dc2ba9f..e8ce34c9db32c 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -254,10 +254,9 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap, spin_lock(&ioapic->lock); for (index = 0; index < IOAPIC_NUM_PINS; index++) { e = &ioapic->redirtbl[index]; - if (!e->fields.mask && - (e->fields.trig_mode == IOAPIC_LEVEL_TRIG || - kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, - index) || index == RTC_GSI)) { + if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG || + kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index) || + index == RTC_GSI) { if (kvm_apic_match_dest(vcpu, NULL, 0, e->fields.dest_id, e->fields.dest_mode)) { __set_bit(e->fields.vector, From db3738614767e1f2dfe69afca070d7bc46266cca Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 28 Jul 2014 14:05:41 +0200 Subject: [PATCH 103/104] KVM: s390: rework broken SIGP STOP interrupt handling A VCPU might never stop if it intercepts (for whatever reason) between "fake interrupt delivery" and execution of the stop function. Heart of the problem is that SIGP STOP is an interrupt that has to be processed on every SIE entry until the VCPU finally executes the stop function. This problem was made apparent by commit 7dfc63cf977447e09b1072911c2 (KVM: s390: allow only one SIGP STOP (AND STORE STATUS) at a time). With the old code, the guest could (incorrectly) inject SIGP STOPs multiple times. The bug of losing a sigp stop exists in KVM before 7dfc63cf97, but it was hidden by Linux guests doing a sigp stop loop. The new code (rightfully) returns CC=2 and does not queue a new interrupt. This patch is a simple fix of the problem. Longterm we are going to rework that code - e.g. get rid of the action bits and so on. Signed-off-by: David Hildenbrand Reviewed-by: Christian Borntraeger Acked-by: Cornelia Huck Signed-off-by: Christian Borntraeger [some additional patch description] --- arch/s390/kvm/interrupt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 1be3d8da49e91..92528a0bdda6c 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -158,6 +158,9 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu) LCTL_CR10 | LCTL_CR11); vcpu->arch.sie_block->ictl |= (ICTL_STCTL | ICTL_PINT); } + + if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) + atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags); } static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag) From 42cbc04fd3b5e3f9b011bf9fa3ce0b3d1e10b58b Mon Sep 17 00:00:00 2001 From: Mark D Rustad Date: Wed, 30 Jul 2014 14:19:26 -0700 Subject: [PATCH 104/104] x86/kvm: Resolve shadow warnings in macro expansion Resolve shadow warnings that appear in W=2 builds. Instead of using ret to hold the return pointer, save the length in a new variable saved_len and compute the pointer on exit. This also resolves a very technical error, in that ret was declared as a const char *, when it really was a char * const. Signed-off-by: Mark Rustad Signed-off-by: Jeff Kirsher Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmutrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 9d2e0ffcb1909..5aaf356417682 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h @@ -22,7 +22,7 @@ __entry->unsync = sp->unsync; #define KVM_MMU_PAGE_PRINTK() ({ \ - const char *ret = p->buffer + p->len; \ + const u32 saved_len = p->len; \ static const char *access_str[] = { \ "---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux" \ }; \ @@ -41,7 +41,7 @@ role.nxe ? "" : "!", \ __entry->root_count, \ __entry->unsync ? "unsync" : "sync", 0); \ - ret; \ + p->buffer + saved_len; \ }) #define kvm_mmu_trace_pferr_flags \