aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-07-15 11:56:07 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-07-15 11:56:07 -0700
commit405386b02157ea1ee49ecb6917c2397985bb2a39 (patch)
tree88f25e1eb62298fb8a4afd556f66adeaa013d218
parentf3523a226dbb0c925def650a658a0755185d60a8 (diff)
parentd951b2210c1ad2dc08345bb8d97e5a172a15261e (diff)
downloadstaging-405386b02157ea1ee49ecb6917c2397985bb2a39.tar.gz
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: - Allow again loading KVM on 32-bit non-PAE builds - Fixes for host SMIs on AMD - Fixes for guest SMIs on AMD - Fixes for selftests on s390 and ARM - Fix memory leak - Enforce no-instrumentation area on vmentry when hardware breakpoints are in use. * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (25 commits) KVM: selftests: smm_test: Test SMM enter from L2 KVM: nSVM: Restore nested control upon leaving SMM KVM: nSVM: Fix L1 state corruption upon return from SMM KVM: nSVM: Introduce svm_copy_vmrun_state() KVM: nSVM: Check that VM_HSAVE_PA MSR was set before VMRUN KVM: nSVM: Check the value written to MSR_VM_HSAVE_PA KVM: SVM: Fix sev_pin_memory() error checks in SEV migration utilities KVM: SVM: Return -EFAULT if copy_to_user() for SEV mig packet header fails KVM: SVM: add module param to control the #SMI interception KVM: SVM: remove INIT intercept handler KVM: SVM: #SMI interception must not skip the instruction KVM: VMX: Remove vmx_msr_index from vmx.h KVM: X86: Disable hardware breakpoints unconditionally before kvm_x86->run() KVM: selftests: Address extra memslot parameters in vm_vaddr_alloc kvm: debugfs: fix memory leak in kvm_create_vm_debugfs KVM: x86/pmu: Clear anythread deprecated bit when 0xa leaf is unsupported on the SVM KVM: mmio: Fix use-after-free Read in kvm_vm_ioctl_unregister_coalesced_mmio KVM: SVM: Revert clearing of C-bit on GPA in #NPF handler KVM: x86/mmu: Do not apply HPA (memory encryption) mask to GPAs KVM: x86: Use kernel's x86_phys_bits to handle reduced MAXPHYADDR ...
-rw-r--r--arch/x86/kvm/cpuid.c30
-rw-r--r--arch/x86/kvm/mmu/mmu.c2
-rw-r--r--arch/x86/kvm/mmu/paging.h14
-rw-r--r--arch/x86/kvm/mmu/paging_tmpl.h4
-rw-r--r--arch/x86/kvm/mmu/spte.h6
-rw-r--r--arch/x86/kvm/svm/nested.c53
-rw-r--r--arch/x86/kvm/svm/sev.c14
-rw-r--r--arch/x86/kvm/svm/svm.c77
-rw-r--r--arch/x86/kvm/svm/svm.h5
-rw-r--r--arch/x86/kvm/vmx/vmx.h2
-rw-r--r--arch/x86/kvm/x86.c5
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h3
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/processor.c2
-rw-r--r--tools/testing/selftests/kvm/lib/guest_modes.c16
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c5
-rw-r--r--tools/testing/selftests/kvm/set_memory_region_test.c3
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_features.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/mmu_role_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/smm_test.c70
-rw-r--r--virt/kvm/coalesced_mmio.c2
-rw-r--r--virt/kvm/kvm_main.c2
21 files changed, 255 insertions, 64 deletions
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index c42613cfb5ba6d..739be5da3bca78 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -765,7 +765,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
edx.split.num_counters_fixed = min(cap.num_counters_fixed, MAX_FIXED_COUNTERS);
edx.split.bit_width_fixed = cap.bit_width_fixed;
- edx.split.anythread_deprecated = 1;
+ if (cap.version)
+ edx.split.anythread_deprecated = 1;
edx.split.reserved1 = 0;
edx.split.reserved2 = 0;
@@ -940,8 +941,21 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U);
unsigned phys_as = entry->eax & 0xff;
- if (!g_phys_as)
+ /*
+ * If TDP (NPT) is disabled use the adjusted host MAXPHYADDR as
+ * the guest operates in the same PA space as the host, i.e.
+ * reductions in MAXPHYADDR for memory encryption affect shadow
+ * paging, too.
+ *
+ * If TDP is enabled but an explicit guest MAXPHYADDR is not
+ * provided, use the raw bare metal MAXPHYADDR as reductions to
+ * the HPAs do not affect GPAs.
+ */
+ if (!tdp_enabled)
+ g_phys_as = boot_cpu_data.x86_phys_bits;
+ else if (!g_phys_as)
g_phys_as = phys_as;
+
entry->eax = g_phys_as | (virt_as << 8);
entry->edx = 0;
cpuid_entry_override(entry, CPUID_8000_0008_EBX);
@@ -964,12 +978,18 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
case 0x8000001a:
case 0x8000001e:
break;
- /* Support memory encryption cpuid if host supports it */
case 0x8000001F:
- if (!kvm_cpu_cap_has(X86_FEATURE_SEV))
+ if (!kvm_cpu_cap_has(X86_FEATURE_SEV)) {
entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
- else
+ } else {
cpuid_entry_override(entry, CPUID_8000_001F_EAX);
+
+ /*
+ * Enumerate '0' for "PA bits reduction", the adjusted
+ * MAXPHYADDR is enumerated directly (see 0x80000008).
+ */
+ entry->ebx &= ~GENMASK(11, 6);
+ }
break;
/*Add support for Centaur's CPUID instruction*/
case 0xC0000000:
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 845d114ae07547..66f7f5bc3482c5 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -53,6 +53,8 @@
#include <asm/kvm_page_track.h>
#include "trace.h"
+#include "paging.h"
+
extern bool itlb_multihit_kvm_mitigation;
int __read_mostly nx_huge_pages = -1;
diff --git a/arch/x86/kvm/mmu/paging.h b/arch/x86/kvm/mmu/paging.h
new file mode 100644
index 00000000000000..de8ab323bb7076
--- /dev/null
+++ b/arch/x86/kvm/mmu/paging.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Shadow paging constants/helpers that don't need to be #undef'd. */
+#ifndef __KVM_X86_PAGING_H
+#define __KVM_X86_PAGING_H
+
+#define GUEST_PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))
+#define PT64_LVL_ADDR_MASK(level) \
+ (GUEST_PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \
+ * PT64_LEVEL_BITS))) - 1))
+#define PT64_LVL_OFFSET_MASK(level) \
+ (GUEST_PT64_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \
+ * PT64_LEVEL_BITS))) - 1))
+#endif /* __KVM_X86_PAGING_H */
+
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 490a028ddabe98..ee044d357b5f98 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -24,7 +24,7 @@
#define pt_element_t u64
#define guest_walker guest_walker64
#define FNAME(name) paging##64_##name
- #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
+ #define PT_BASE_ADDR_MASK GUEST_PT64_BASE_ADDR_MASK
#define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl)
#define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
@@ -57,7 +57,7 @@
#define pt_element_t u64
#define guest_walker guest_walkerEPT
#define FNAME(name) ept_##name
- #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
+ #define PT_BASE_ADDR_MASK GUEST_PT64_BASE_ADDR_MASK
#define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl)
#define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index 7a5ce931410755..eb7b227fc6cfe1 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -38,12 +38,6 @@ static_assert(SPTE_TDP_AD_ENABLED_MASK == 0);
#else
#define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))
#endif
-#define PT64_LVL_ADDR_MASK(level) \
- (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \
- * PT64_LEVEL_BITS))) - 1))
-#define PT64_LVL_OFFSET_MASK(level) \
- (PT64_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \
- * PT64_LEVEL_BITS))) - 1))
#define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | shadow_user_mask \
| shadow_x_mask | shadow_nx_mask | shadow_me_mask)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 21d03e3a5dfd52..3bd09c50c98b63 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -154,6 +154,10 @@ void recalc_intercepts(struct vcpu_svm *svm)
for (i = 0; i < MAX_INTERCEPT; i++)
c->intercepts[i] |= g->intercepts[i];
+
+ /* If SMI is not intercepted, ignore guest SMI intercept as well */
+ if (!intercept_smi)
+ vmcb_clr_intercept(c, INTERCEPT_SMI);
}
static void copy_vmcb_control_area(struct vmcb_control_area *dst,
@@ -304,8 +308,8 @@ static bool nested_vmcb_valid_sregs(struct kvm_vcpu *vcpu,
return true;
}
-static void nested_load_control_from_vmcb12(struct vcpu_svm *svm,
- struct vmcb_control_area *control)
+void nested_load_control_from_vmcb12(struct vcpu_svm *svm,
+ struct vmcb_control_area *control)
{
copy_vmcb_control_area(&svm->nested.ctl, control);
@@ -618,6 +622,11 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
struct kvm_host_map map;
u64 vmcb12_gpa;
+ if (!svm->nested.hsave_msr) {
+ kvm_inject_gp(vcpu, 0);
+ return 1;
+ }
+
if (is_smm(vcpu)) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
@@ -692,6 +701,27 @@ out:
return ret;
}
+/* Copy state save area fields which are handled by VMRUN */
+void svm_copy_vmrun_state(struct vmcb_save_area *from_save,
+ struct vmcb_save_area *to_save)
+{
+ to_save->es = from_save->es;
+ to_save->cs = from_save->cs;
+ to_save->ss = from_save->ss;
+ to_save->ds = from_save->ds;
+ to_save->gdtr = from_save->gdtr;
+ to_save->idtr = from_save->idtr;
+ to_save->rflags = from_save->rflags | X86_EFLAGS_FIXED;
+ to_save->efer = from_save->efer;
+ to_save->cr0 = from_save->cr0;
+ to_save->cr3 = from_save->cr3;
+ to_save->cr4 = from_save->cr4;
+ to_save->rax = from_save->rax;
+ to_save->rsp = from_save->rsp;
+ to_save->rip = from_save->rip;
+ to_save->cpl = 0;
+}
+
void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
{
to_vmcb->save.fs = from_vmcb->save.fs;
@@ -1355,28 +1385,11 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;
- svm->vmcb01.ptr->save.es = save->es;
- svm->vmcb01.ptr->save.cs = save->cs;
- svm->vmcb01.ptr->save.ss = save->ss;
- svm->vmcb01.ptr->save.ds = save->ds;
- svm->vmcb01.ptr->save.gdtr = save->gdtr;
- svm->vmcb01.ptr->save.idtr = save->idtr;
- svm->vmcb01.ptr->save.rflags = save->rflags | X86_EFLAGS_FIXED;
- svm->vmcb01.ptr->save.efer = save->efer;
- svm->vmcb01.ptr->save.cr0 = save->cr0;
- svm->vmcb01.ptr->save.cr3 = save->cr3;
- svm->vmcb01.ptr->save.cr4 = save->cr4;
- svm->vmcb01.ptr->save.rax = save->rax;
- svm->vmcb01.ptr->save.rsp = save->rsp;
- svm->vmcb01.ptr->save.rip = save->rip;
- svm->vmcb01.ptr->save.cpl = 0;
-
+ svm_copy_vmrun_state(save, &svm->vmcb01.ptr->save);
nested_load_control_from_vmcb12(svm, ctl);
svm_switch_vmcb(svm, &svm->nested.vmcb02);
-
nested_vmcb02_prepare_control(svm);
-
kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
ret = 0;
out_free:
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 62926f1a5f7ba4..6710d9ee2e4b2c 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1272,8 +1272,8 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
/* Pin guest memory */
guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK,
PAGE_SIZE, &n, 0);
- if (!guest_page)
- return -EFAULT;
+ if (IS_ERR(guest_page))
+ return PTR_ERR(guest_page);
/* allocate memory for header and transport buffer */
ret = -ENOMEM;
@@ -1310,8 +1310,9 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
}
/* Copy packet header to userspace. */
- ret = copy_to_user((void __user *)(uintptr_t)params.hdr_uaddr, hdr,
- params.hdr_len);
+ if (copy_to_user((void __user *)(uintptr_t)params.hdr_uaddr, hdr,
+ params.hdr_len))
+ ret = -EFAULT;
e_free_trans_data:
kfree(trans_data);
@@ -1463,11 +1464,12 @@ static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
data.trans_len = params.trans_len;
/* Pin guest memory */
- ret = -EFAULT;
guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK,
PAGE_SIZE, &n, 0);
- if (!guest_page)
+ if (IS_ERR(guest_page)) {
+ ret = PTR_ERR(guest_page);
goto e_free_trans;
+ }
/* The RECEIVE_UPDATE_DATA command requires C-bit to be always set. */
data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 8834822c00cdcc..664d20f0689c8b 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -198,6 +198,11 @@ module_param(avic, bool, 0444);
bool __read_mostly dump_invalid_vmcb;
module_param(dump_invalid_vmcb, bool, 0644);
+
+bool intercept_smi = true;
+module_param(intercept_smi, bool, 0444);
+
+
static bool svm_gp_erratum_intercept = true;
static u8 rsm_ins_bytes[] = "\x0f\xaa";
@@ -1185,7 +1190,10 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
svm_set_intercept(svm, INTERCEPT_INTR);
svm_set_intercept(svm, INTERCEPT_NMI);
- svm_set_intercept(svm, INTERCEPT_SMI);
+
+ if (intercept_smi)
+ svm_set_intercept(svm, INTERCEPT_SMI);
+
svm_set_intercept(svm, INTERCEPT_SELECTIVE_CR0);
svm_set_intercept(svm, INTERCEPT_RDPMC);
svm_set_intercept(svm, INTERCEPT_CPUID);
@@ -1923,7 +1931,7 @@ static int npf_interception(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
+ u64 fault_address = svm->vmcb->control.exit_info_2;
u64 error_code = svm->vmcb->control.exit_info_1;
trace_kvm_page_fault(fault_address, error_code);
@@ -2106,6 +2114,11 @@ static int nmi_interception(struct kvm_vcpu *vcpu)
return 1;
}
+static int smi_interception(struct kvm_vcpu *vcpu)
+{
+ return 1;
+}
+
static int intr_interception(struct kvm_vcpu *vcpu)
{
++vcpu->stat.irq_exits;
@@ -2941,7 +2954,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
svm_disable_lbrv(vcpu);
break;
case MSR_VM_HSAVE_PA:
- svm->nested.hsave_msr = data;
+ /*
+ * Old kernels did not validate the value written to
+ * MSR_VM_HSAVE_PA. Allow KVM_SET_MSR to set an invalid
+ * value to allow live migrating buggy or malicious guests
+ * originating from those kernels.
+ */
+ if (!msr->host_initiated && !page_address_valid(vcpu, data))
+ return 1;
+
+ svm->nested.hsave_msr = data & PAGE_MASK;
break;
case MSR_VM_CR:
return svm_set_vm_cr(vcpu, data);
@@ -3080,8 +3102,7 @@ static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[SVM_EXIT_EXCP_BASE + GP_VECTOR] = gp_interception,
[SVM_EXIT_INTR] = intr_interception,
[SVM_EXIT_NMI] = nmi_interception,
- [SVM_EXIT_SMI] = kvm_emulate_as_nop,
- [SVM_EXIT_INIT] = kvm_emulate_as_nop,
+ [SVM_EXIT_SMI] = smi_interception,
[SVM_EXIT_VINTR] = interrupt_window_interception,
[SVM_EXIT_RDPMC] = kvm_emulate_rdpmc,
[SVM_EXIT_CPUID] = kvm_emulate_cpuid,
@@ -4288,6 +4309,7 @@ static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ struct kvm_host_map map_save;
int ret;
if (is_guest_mode(vcpu)) {
@@ -4303,6 +4325,29 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
ret = nested_svm_vmexit(svm);
if (ret)
return ret;
+
+ /*
+ * KVM uses VMCB01 to store L1 host state while L2 runs but
+ * VMCB01 is going to be used during SMM and thus the state will
+ * be lost. Temporary save non-VMLOAD/VMSAVE state to the host save
+ * area pointed to by MSR_VM_HSAVE_PA. APM guarantees that the
+ * format of the area is identical to guest save area offsetted
+ * by 0x400 (matches the offset of 'struct vmcb_save_area'
+ * within 'struct vmcb'). Note: HSAVE area may also be used by
+ * L1 hypervisor to save additional host context (e.g. KVM does
+ * that, see svm_prepare_guest_switch()) which must be
+ * preserved.
+ */
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr),
+ &map_save) == -EINVAL)
+ return 1;
+
+ BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400);
+
+ svm_copy_vmrun_state(&svm->vmcb01.ptr->save,
+ map_save.hva + 0x400);
+
+ kvm_vcpu_unmap(vcpu, &map_save, true);
}
return 0;
}
@@ -4310,13 +4355,14 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
{
struct vcpu_svm *svm = to_svm(vcpu);
- struct kvm_host_map map;
+ struct kvm_host_map map, map_save;
int ret = 0;
if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
u64 saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0);
u64 guest = GET_SMSTATE(u64, smstate, 0x7ed8);
u64 vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0);
+ struct vmcb *vmcb12;
if (guest) {
if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM))
@@ -4332,8 +4378,25 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
if (svm_allocate_nested(svm))
return 1;
- ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, map.hva);
+ vmcb12 = map.hva;
+
+ nested_load_control_from_vmcb12(svm, &vmcb12->control);
+
+ ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12);
kvm_vcpu_unmap(vcpu, &map, true);
+
+ /*
+ * Restore L1 host state from L1 HSAVE area as VMCB01 was
+ * used during SMM (see svm_enter_smm())
+ */
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr),
+ &map_save) == -EINVAL)
+ return 1;
+
+ svm_copy_vmrun_state(map_save.hva + 0x400,
+ &svm->vmcb01.ptr->save);
+
+ kvm_vcpu_unmap(vcpu, &map_save, true);
}
}
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index f89b623bb591e8..7e2090752d8fce 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -31,6 +31,7 @@
#define MSRPM_OFFSETS 16
extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
extern bool npt_enabled;
+extern bool intercept_smi;
/*
* Clean bits in VMCB.
@@ -463,6 +464,8 @@ void svm_leave_nested(struct vcpu_svm *svm);
void svm_free_nested(struct vcpu_svm *svm);
int svm_allocate_nested(struct vcpu_svm *svm);
int nested_svm_vmrun(struct kvm_vcpu *vcpu);
+void svm_copy_vmrun_state(struct vmcb_save_area *from_save,
+ struct vmcb_save_area *to_save);
void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb);
int nested_svm_vmexit(struct vcpu_svm *svm);
@@ -479,6 +482,8 @@ int nested_svm_check_permissions(struct kvm_vcpu *vcpu);
int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
bool has_error_code, u32 error_code);
int nested_svm_exit_special(struct vcpu_svm *svm);
+void nested_load_control_from_vmcb12(struct vcpu_svm *svm,
+ struct vmcb_control_area *control);
void nested_sync_control_from_vmcb02(struct vcpu_svm *svm);
void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm);
void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb);
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 3979a947933afb..db88ed4f2121c1 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -14,8 +14,6 @@
#include "vmx_ops.h"
#include "cpuid.h"
-extern const u32 vmx_msr_index[];
-
#define MSR_TYPE_R 1
#define MSR_TYPE_W 2
#define MSR_TYPE_RW 3
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c6dc1b44523156..a4fd10604f7228 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9601,6 +9601,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
set_debugreg(vcpu->arch.eff_db[3], 3);
set_debugreg(vcpu->arch.dr6, 6);
vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
+ } else if (unlikely(hw_breakpoint_active())) {
+ set_debugreg(0, 7);
}
for (;;) {
@@ -10985,9 +10987,6 @@ int kvm_arch_hardware_setup(void *opaque)
int r;
rdmsrl_safe(MSR_EFER, &host_efer);
- if (WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_NX) &&
- !(host_efer & EFER_NX)))
- return -EIO;
if (boot_cpu_has(X86_FEATURE_XSAVES))
rdmsrl(MSR_IA32_XSS, host_xss);
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 615ab254899d3b..010b59b1391764 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -45,6 +45,7 @@ enum vm_guest_mode {
VM_MODE_P40V48_64K,
VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */
VM_MODE_P47V64_4K,
+ VM_MODE_P44V64_4K,
NUM_VM_MODES,
};
@@ -62,7 +63,7 @@ enum vm_guest_mode {
#elif defined(__s390x__)
-#define VM_MODE_DEFAULT VM_MODE_P47V64_4K
+#define VM_MODE_DEFAULT VM_MODE_P44V64_4K
#define MIN_PAGE_SHIFT 12U
#define ptes_per_page(page_size) ((page_size) / 16)
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index 9f49f6caafe5d6..632b74d6b3cac3 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -401,7 +401,7 @@ unexpected_exception:
void vm_init_descriptor_tables(struct kvm_vm *vm)
{
vm->handlers = vm_vaddr_alloc(vm, sizeof(struct handlers),
- vm->page_size, 0, 0);
+ vm->page_size);
*(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
}
diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c
index 25bff307c71f2a..c330f414ef96dc 100644
--- a/tools/testing/selftests/kvm/lib/guest_modes.c
+++ b/tools/testing/selftests/kvm/lib/guest_modes.c
@@ -22,6 +22,22 @@ void guest_modes_append_default(void)
}
}
#endif
+#ifdef __s390x__
+ {
+ int kvm_fd, vm_fd;
+ struct kvm_s390_vm_cpu_processor info;
+
+ kvm_fd = open_kvm_dev_path_or_exit();
+ vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, 0);
+ kvm_device_access(vm_fd, KVM_S390_VM_CPU_MODEL,
+ KVM_S390_VM_CPU_PROCESSOR, &info, false);
+ close(vm_fd);
+ close(kvm_fd);
+ /* Starting with z13 we have 47bits of physical address */
+ if (info.ibc >= 0x30)
+ guest_mode_append(VM_MODE_P47V64_4K, true, true);
+ }
+#endif
}
void for_each_guest_mode(void (*func)(enum vm_guest_mode, void *), void *arg)
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 5b56b57b3c2078..10a8ed691c6693 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -176,6 +176,7 @@ const char *vm_guest_mode_string(uint32_t i)
[VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages",
[VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages",
[VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages",
+ [VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages",
};
_Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES,
"Missing new mode strings?");
@@ -194,6 +195,7 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = {
{ 40, 48, 0x10000, 16 },
{ 0, 0, 0x1000, 12 },
{ 47, 64, 0x1000, 12 },
+ { 44, 64, 0x1000, 12 },
};
_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
"Missing new mode params?");
@@ -282,6 +284,9 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
case VM_MODE_P47V64_4K:
vm->pgtable_levels = 5;
break;
+ case VM_MODE_P44V64_4K:
+ vm->pgtable_levels = 5;
+ break;
default:
TEST_FAIL("Unknown guest mode, mode: 0x%x", mode);
}
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
index 85b18bb8f7624a..72a1c9b4882cb8 100644
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -377,7 +377,8 @@ static void test_add_max_memory_regions(void)
(max_mem_slots - 1), MEM_REGION_SIZE >> 10);
mem = mmap(NULL, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment,
- PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host");
mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1));
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
index 42bd658f52a827..af27c7e829c195 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
@@ -615,7 +615,7 @@ int main(void)
vm_init_descriptor_tables(vm);
vcpu_init_descriptor_tables(vm, VCPU_ID);
- vm_handle_exception(vm, GP_VECTOR, guest_gp_handler);
+ vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
pr_info("Testing access to Hyper-V specific MSRs\n");
guest_test_msrs_access(vm, addr_gva2hva(vm, msr_gva),
diff --git a/tools/testing/selftests/kvm/x86_64/mmu_role_test.c b/tools/testing/selftests/kvm/x86_64/mmu_role_test.c
index 523371cf8e8f7b..da2325fcad87bf 100644
--- a/tools/testing/selftests/kvm/x86_64/mmu_role_test.c
+++ b/tools/testing/selftests/kvm/x86_64/mmu_role_test.c
@@ -71,7 +71,7 @@ static void mmu_role_test(u32 *cpuid_reg, u32 evil_cpuid_val)
/* Set up a #PF handler to eat the RSVD #PF and signal all done! */
vm_init_descriptor_tables(vm);
vcpu_init_descriptor_tables(vm, VCPU_ID);
- vm_handle_exception(vm, PF_VECTOR, guest_pf_handler);
+ vm_install_exception_handler(vm, PF_VECTOR, guest_pf_handler);
r = _vcpu_run(vm, VCPU_ID);
TEST_ASSERT(r == 0, "vcpu_run failed: %d\n", r);
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
index c1f831803ad2df..d0fe2fdce58c47 100644
--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
@@ -53,15 +53,28 @@ static inline void sync_with_host(uint64_t phase)
: "+a" (phase));
}
-void self_smi(void)
+static void self_smi(void)
{
x2apic_write_reg(APIC_ICR,
APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI);
}
-void guest_code(void *arg)
+static void l2_guest_code(void)
{
+ sync_with_host(8);
+
+ sync_with_host(10);
+
+ vmcall();
+}
+
+static void guest_code(void *arg)
+{
+ #define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
uint64_t apicbase = rdmsr(MSR_IA32_APICBASE);
+ struct svm_test_data *svm = arg;
+ struct vmx_pages *vmx_pages = arg;
sync_with_host(1);
@@ -74,21 +87,50 @@ void guest_code(void *arg)
sync_with_host(4);
if (arg) {
- if (cpu_has_svm())
- generic_svm_setup(arg, NULL, NULL);
- else
- GUEST_ASSERT(prepare_for_vmx_operation(arg));
+ if (cpu_has_svm()) {
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ } else {
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ }
sync_with_host(5);
self_smi();
sync_with_host(7);
+
+ if (cpu_has_svm()) {
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ svm->vmcb->save.rip += 3;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ } else {
+ vmlaunch();
+ vmresume();
+ }
+
+ /* Stages 8-11 are eaten by SMM (SMRAM_STAGE reported instead) */
+ sync_with_host(12);
}
sync_with_host(DONE);
}
+void inject_smi(struct kvm_vm *vm)
+{
+ struct kvm_vcpu_events events;
+
+ vcpu_events_get(vm, VCPU_ID, &events);
+
+ events.smi.pending = 1;
+ events.flags |= KVM_VCPUEVENT_VALID_SMM;
+
+ vcpu_events_set(vm, VCPU_ID, &events);
+}
+
int main(int argc, char *argv[])
{
vm_vaddr_t nested_gva = 0;
@@ -147,6 +189,22 @@ int main(int argc, char *argv[])
"Unexpected stage: #%x, got %x",
stage, stage_reported);
+ /*
+ * Enter SMM during L2 execution and check that we correctly
+ * return from it. Do not perform save/restore while in SMM yet.
+ */
+ if (stage == 8) {
+ inject_smi(vm);
+ continue;
+ }
+
+ /*
+ * Perform save/restore while the guest is in SMM triggered
+ * during L2 execution.
+ */
+ if (stage == 10)
+ inject_smi(vm);
+
state = vcpu_save_state(vm, VCPU_ID);
kvm_vm_release(vm);
kvm_vm_restart(vm, O_RDWR);
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index f08f5e82460b1d..0be80c213f7f25 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -186,7 +186,6 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
coalesced_mmio_in_range(dev, zone->addr, zone->size)) {
r = kvm_io_bus_unregister_dev(kvm,
zone->pio ? KVM_PIO_BUS : KVM_MMIO_BUS, &dev->dev);
- kvm_iodevice_destructor(&dev->dev);
/*
* On failure, unregister destroys all devices on the
@@ -196,6 +195,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
*/
if (r)
break;
+ kvm_iodevice_destructor(&dev->dev);
}
}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7d95126cda9e12..986959833d701e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -935,7 +935,7 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
stat_data->kvm = kvm;
stat_data->desc = pdesc;
stat_data->kind = KVM_STAT_VCPU;
- kvm->debugfs_stat_data[i] = stat_data;
+ kvm->debugfs_stat_data[i + kvm_vm_stats_header.num_desc] = stat_data;
debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc),
kvm->debugfs_dentry, stat_data,
&stat_fops_per_vm);