diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2024-04-29 14:47:01 +1000 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2024-04-29 14:47:01 +1000 |
commit | eef1447f960c43dca9a96f0823e4eac94fc5bc94 (patch) | |
tree | 1f1b47feaa112f2dfbb2eae0d6cfd8d9f6bbabe4 | |
parent | b4e66e83a749a376535876b11919d5d866e90d08 (diff) | |
parent | a96cb3bf390eebfead5fc7a2092f8452a7997d1b (diff) | |
download | linux-next-history-eef1447f960c43dca9a96f0823e4eac94fc5bc94.tar.gz |
Merge branch 'next' of git://git.kernel.org/pub/scm/virt/kvm/kvm.git
Notice: this object is not reachable from any branch.
# Conflicts:
# arch/x86/kvm/svm/svm.c
Notice: this object is not reachable from any branch.
54 files changed, 1181 insertions, 923 deletions
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 0b5a33ee71eea1..f0b76ff5030dcb 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -8819,6 +8819,8 @@ means the VM type with value @n is supported. Possible values of @n are:: #define KVM_X86_DEFAULT_VM 0 #define KVM_X86_SW_PROTECTED_VM 1 + #define KVM_X86_SEV_VM 2 + #define KVM_X86_SEV_ES_VM 3 Note, KVM_X86_SW_PROTECTED_VM is currently only for development and testing. Do not use KVM_X86_SW_PROTECTED_VM for "real" VMs, and especially not in diff --git a/Documentation/virt/kvm/x86/amd-memory-encryption.rst b/Documentation/virt/kvm/x86/amd-memory-encryption.rst index 84335d119ff136..3381556d596ddc 100644 --- a/Documentation/virt/kvm/x86/amd-memory-encryption.rst +++ b/Documentation/virt/kvm/x86/amd-memory-encryption.rst @@ -76,15 +76,49 @@ are defined in ``<linux/psp-dev.h>``. KVM implements the following commands to support common lifecycle events of SEV guests, such as launching, running, snapshotting, migrating and decommissioning. -1. KVM_SEV_INIT ---------------- +1. KVM_SEV_INIT2 +---------------- -The KVM_SEV_INIT command is used by the hypervisor to initialize the SEV platform +The KVM_SEV_INIT2 command is used by the hypervisor to initialize the SEV platform context. In a typical workflow, this command should be the first command issued. +For this command to be accepted, either KVM_X86_SEV_VM or KVM_X86_SEV_ES_VM +must have been passed to the KVM_CREATE_VM ioctl. A virtual machine created +with those machine types in turn cannot be run until KVM_SEV_INIT2 is invoked. + +Parameters: struct kvm_sev_init (in) Returns: 0 on success, -negative on error +:: + + struct kvm_sev_init { + __u64 vmsa_features; /* initial value of features field in VMSA */ + __u32 flags; /* must be 0 */ + __u32 pad[9]; + }; + +It is an error if the hypervisor does not support any of the bits that +are set in ``flags`` or ``vmsa_features``. ``vmsa_features`` must be +0 for SEV virtual machines, as they do not have a VMSA. + +This command replaces the deprecated KVM_SEV_INIT and KVM_SEV_ES_INIT commands. +The commands did not have any parameters (the ```data``` field was unused) and +only work for the KVM_X86_DEFAULT_VM machine type (0). + +They behave as if: + +* the VM type is KVM_X86_SEV_VM for KVM_SEV_INIT, or KVM_X86_SEV_ES_VM for + KVM_SEV_ES_INIT + +* the ``flags`` and ``vmsa_features`` fields of ``struct kvm_sev_init`` are + set to zero + +If the ``KVM_X86_SEV_VMSA_FEATURES`` attribute does not exist, the hypervisor only +supports KVM_SEV_INIT and KVM_SEV_ES_INIT. In that case, note that KVM_SEV_ES_INIT +might set the debug swap VMSA feature (bit 5) depending on the value of the +``debug_swap`` parameter of ``kvm-amd.ko``. + 2. KVM_SEV_LAUNCH_START ----------------------- @@ -425,6 +459,18 @@ issued by the hypervisor to make the guest ready for execution. Returns: 0 on success, -negative on error +Device attribute API +==================== + +Attributes of the SEV implementation can be retrieved through the +``KVM_HAS_DEVICE_ATTR`` and ``KVM_GET_DEVICE_ATTR`` ioctls on the ``/dev/kvm`` +device node, using group ``KVM_X86_GRP_SEV``. + +Currently only one attribute is implemented: + +* ``KVM_X86_SEV_VMSA_FEATURES``: return the set of all bits that + are accepted in the ``vmsa_features`` of ``KVM_SEV_INIT2``. + Firmware Management =================== diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index dc04bc7678659a..ff17849be9f44b 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1768,40 +1768,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) return false; } -bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) -{ - kvm_pfn_t pfn = pte_pfn(range->arg.pte); - - if (!kvm->arch.mmu.pgt) - return false; - - WARN_ON(range->end - range->start != 1); - - /* - * If the page isn't tagged, defer to user_mem_abort() for sanitising - * the MTE tags. The S2 pte should have been unmapped by - * mmu_notifier_invalidate_range_end(). - */ - if (kvm_has_mte(kvm) && !page_mte_tagged(pfn_to_page(pfn))) - return false; - - /* - * We've moved a page around, probably through CoW, so let's treat - * it just like a translation fault and the map handler will clean - * the cache to the PoC. - * - * The MMU notifiers will have unmapped a huge PMD before calling - * ->change_pte() (which in turn calls kvm_set_spte_gfn()) and - * therefore we never need to clear out a huge PMD through this - * calling path and a memcache is not required. - */ - kvm_pgtable_stage2_map(kvm->arch.mmu.pgt, range->start << PAGE_SHIFT, - PAGE_SIZE, __pfn_to_phys(pfn), - KVM_PGTABLE_PROT_R, NULL, 0); - - return false; -} - bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { u64 size = (range->end - range->start) << PAGE_SHIFT; diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index 2d62f7b0d377b5..69305441f40d22 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -203,7 +203,6 @@ void kvm_flush_tlb_all(void); void kvm_flush_tlb_gpa(struct kvm_vcpu *vcpu, unsigned long gpa); int kvm_handle_mm_fault(struct kvm_vcpu *vcpu, unsigned long badv, bool write); -void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, bool blockable); int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c index a556cff3574023..98883aa23ab827 100644 --- a/arch/loongarch/kvm/mmu.c +++ b/arch/loongarch/kvm/mmu.c @@ -494,38 +494,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) range->end << PAGE_SHIFT, &ctx); } -bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) -{ - unsigned long prot_bits; - kvm_pte_t *ptep; - kvm_pfn_t pfn = pte_pfn(range->arg.pte); - gpa_t gpa = range->start << PAGE_SHIFT; - - ptep = kvm_populate_gpa(kvm, NULL, gpa, 0); - if (!ptep) - return false; - - /* Replacing an absent or old page doesn't need flushes */ - if (!kvm_pte_present(NULL, ptep) || !kvm_pte_young(*ptep)) { - kvm_set_pte(ptep, 0); - return false; - } - - /* Fill new pte if write protected or page migrated */ - prot_bits = _PAGE_PRESENT | __READABLE; - prot_bits |= _CACHE_MASK & pte_val(range->arg.pte); - - /* - * Set _PAGE_WRITE or _PAGE_DIRTY iff old and new pte both support - * _PAGE_WRITE for map_page_fast if next page write fault - * _PAGE_DIRTY since gpa has already recorded as dirty page - */ - prot_bits |= __WRITEABLE & *ptep & pte_val(range->arg.pte); - kvm_set_pte(ptep, kvm_pfn_pte(pfn, __pgprot(prot_bits))); - - return true; -} - bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { kvm_ptw_ctx ctx; diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c index 467ee6b95ae1c6..c17157e700c034 100644 --- a/arch/mips/kvm/mmu.c +++ b/arch/mips/kvm/mmu.c @@ -444,36 +444,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) return true; } -bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) -{ - gpa_t gpa = range->start << PAGE_SHIFT; - pte_t hva_pte = range->arg.pte; - pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa); - pte_t old_pte; - - if (!gpa_pte) - return false; - - /* Mapping may need adjusting depending on memslot flags */ - old_pte = *gpa_pte; - if (range->slot->flags & KVM_MEM_LOG_DIRTY_PAGES && !pte_dirty(old_pte)) - hva_pte = pte_mkclean(hva_pte); - else if (range->slot->flags & KVM_MEM_READONLY) - hva_pte = pte_wrprotect(hva_pte); - - set_pte(gpa_pte, hva_pte); - - /* Replacing an absent or old page doesn't need flushes */ - if (!pte_present(old_pte) || !pte_young(old_pte)) - return false; - - /* Pages swapped, aged, moved, or cleaned require flushes */ - return !pte_present(hva_pte) || - !pte_young(hva_pte) || - pte_pfn(old_pte) != pte_pfn(hva_pte) || - (pte_dirty(old_pte) && !pte_dirty(hva_pte)); -} - bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { return kvm_mips_mkold_gpa_pt(kvm, range->start, range->end); diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 3281215097cc63..ca3829d47ab739 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -287,7 +287,6 @@ struct kvmppc_ops { bool (*unmap_gfn_range)(struct kvm *kvm, struct kvm_gfn_range *range); bool (*age_gfn)(struct kvm *kvm, struct kvm_gfn_range *range); bool (*test_age_gfn)(struct kvm *kvm, struct kvm_gfn_range *range); - bool (*set_spte_gfn)(struct kvm *kvm, struct kvm_gfn_range *range); void (*free_memslot)(struct kvm_memory_slot *slot); int (*init_vm)(struct kvm *kvm); void (*destroy_vm)(struct kvm *kvm); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 8acec144120eaf..0d0624088e6b68 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -899,11 +899,6 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) return kvm->arch.kvm_ops->test_age_gfn(kvm, range); } -bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) -{ - return kvm->arch.kvm_ops->set_spte_gfn(kvm, range); -} - int kvmppc_core_init_vm(struct kvm *kvm) { diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h index 58391b4b32ed2d..4aa2ab89afbcae 100644 --- a/arch/powerpc/kvm/book3s.h +++ b/arch/powerpc/kvm/book3s.h @@ -12,7 +12,6 @@ extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm, extern bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range); extern bool kvm_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range); extern bool kvm_test_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range); -extern bool kvm_set_spte_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range); extern int kvmppc_mmu_init_pr(struct kvm_vcpu *vcpu); extern void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 2b1f0cdd8c1888..1b51b1c4713bf2 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -1010,18 +1010,6 @@ bool kvm_test_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range) return kvm_test_age_rmapp(kvm, range->slot, range->start); } -bool kvm_set_spte_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range) -{ - WARN_ON(range->start + 1 != range->end); - - if (kvm_is_radix(kvm)) - kvm_unmap_radix(kvm, range->slot, range->start); - else - kvm_unmap_rmapp(kvm, range->slot, range->start); - - return false; -} - static int vcpus_running(struct kvm *kvm) { return atomic_read(&kvm->arch.vcpus_running) != 0; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 8e86eb577eb8e1..35cb014a0c51d1 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -6364,7 +6364,6 @@ static struct kvmppc_ops kvm_ops_hv = { .unmap_gfn_range = kvm_unmap_gfn_range_hv, .age_gfn = kvm_age_gfn_hv, .test_age_gfn = kvm_test_age_gfn_hv, - .set_spte_gfn = kvm_set_spte_gfn_hv, .free_memslot = kvmppc_core_free_memslot_hv, .init_vm = kvmppc_core_init_vm_hv, .destroy_vm = kvmppc_core_destroy_vm_hv, diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 5b92619a05fdf2..a7d7137ea0c84f 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -461,12 +461,6 @@ static bool kvm_test_age_gfn_pr(struct kvm *kvm, struct kvm_gfn_range *range) return false; } -static bool kvm_set_spte_gfn_pr(struct kvm *kvm, struct kvm_gfn_range *range) -{ - /* The page will get remapped properly on its next fault */ - return do_kvm_unmap_gfn(kvm, range); -} - /*****************************************/ static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr) @@ -2071,7 +2065,6 @@ static struct kvmppc_ops kvm_ops_pr = { .unmap_gfn_range = kvm_unmap_gfn_range_pr, .age_gfn = kvm_age_gfn_pr, .test_age_gfn = kvm_test_age_gfn_pr, - .set_spte_gfn = kvm_set_spte_gfn_pr, .free_memslot = kvmppc_core_free_memslot_pr, .init_vm = kvmppc_core_init_vm_pr, .destroy_vm = kvmppc_core_destroy_vm_pr, diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index ccb8f16ffe412f..c664fdec75b1ab 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -747,12 +747,6 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) return false; } -bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) -{ - /* The page will get remapped properly on its next fault */ - return kvm_e500_mmu_unmap_gfn(kvm, range); -} - /*****************************************/ int e500_mmu_host_init(struct kvmppc_vcpu_e500 *vcpu_e500) diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index a9e2fd7245e1e9..b63650f9b966ac 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -550,26 +550,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) return false; } -bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) -{ - int ret; - kvm_pfn_t pfn = pte_pfn(range->arg.pte); - - if (!kvm->arch.pgd) - return false; - - WARN_ON(range->end - range->start != 1); - - ret = gstage_map_page(kvm, NULL, range->start << PAGE_SHIFT, - __pfn_to_phys(pfn), PAGE_SIZE, true, true); - if (ret) { - kvm_debug("Failed to map G-stage page (error %d)\n", ret); - return true; - } - - return false; -} - bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { pte_t *ptep; diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index a2be3aefff9f89..f86ad3335529d4 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -143,6 +143,9 @@ extern void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfe extern u64 xstate_get_guest_group_perm(void); +extern void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); + + /* KVM specific functions */ extern bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu); extern void fpu_free_guest_fpstate(struct fpu_guest *gfpu); diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 110d7f29ca9a20..5187fcf4b610b9 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -121,6 +121,7 @@ KVM_X86_OP(enter_smm) KVM_X86_OP(leave_smm) KVM_X86_OP(enable_smi_window) #endif +KVM_X86_OP_OPTIONAL(dev_get_attr) KVM_X86_OP_OPTIONAL(mem_enc_ioctl) KVM_X86_OP_OPTIONAL(mem_enc_register_region) KVM_X86_OP_OPTIONAL(mem_enc_unregister_region) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 6efd1497b02636..3cb7647ea2f20d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1280,12 +1280,14 @@ enum kvm_apicv_inhibit { }; struct kvm_arch { - unsigned long vm_type; unsigned long n_used_mmu_pages; unsigned long n_requested_mmu_pages; unsigned long n_max_mmu_pages; unsigned int indirect_shadow_pages; u8 mmu_valid_gen; + u8 vm_type; + bool has_private_mem; + bool has_protected_state; struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; struct list_head active_mmu_pages; struct list_head zapped_obsolete_pages; @@ -1779,6 +1781,7 @@ struct kvm_x86_ops { void (*enable_smi_window)(struct kvm_vcpu *vcpu); #endif + int (*dev_get_attr)(u32 group, u64 attr, u64 *val); int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp); int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp); int (*mem_enc_unregister_region)(struct kvm *kvm, struct kvm_enc_region *argp); @@ -2140,6 +2143,10 @@ static inline void kvm_clear_apicv_inhibit(struct kvm *kvm, kvm_set_or_clear_apicv_inhibit(kvm, reason, false); } +unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, + unsigned long a0, unsigned long a1, + unsigned long a2, unsigned long a3, + int op_64_bit, int cpl); int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, @@ -2153,8 +2160,9 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd); void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, int tdp_max_root_level, int tdp_huge_page_level); + #ifdef CONFIG_KVM_PRIVATE_MEM -#define kvm_arch_has_private_mem(kvm) ((kvm)->arch.vm_type != KVM_X86_DEFAULT_VM) +#define kvm_arch_has_private_mem(kvm) ((kvm)->arch.has_private_mem) #else #define kvm_arch_has_private_mem(kvm) false #endif diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index ef11aa4cab4253..72ad5ace118d3c 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -457,8 +457,13 @@ struct kvm_sync_regs { #define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001 -/* attributes for system fd (group 0) */ -#define KVM_X86_XCOMP_GUEST_SUPP 0 +/* vendor-independent attributes for system fd (group 0) */ +#define KVM_X86_GRP_SYSTEM 0 +# define KVM_X86_XCOMP_GUEST_SUPP 0 + +/* vendor-specific groups and attributes for system fd */ +#define KVM_X86_GRP_SEV 1 +# define KVM_X86_SEV_VMSA_FEATURES 0 struct kvm_vmx_nested_state_data { __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; @@ -689,6 +694,9 @@ enum sev_cmd_id { /* Guest Migration Extension */ KVM_SEV_SEND_CANCEL, + /* Second time is the charm; improved versions of the above ioctls. */ + KVM_SEV_INIT2, + KVM_SEV_NR_MAX, }; @@ -700,6 +708,12 @@ struct kvm_sev_cmd { __u32 sev_fd; }; +struct kvm_sev_init { + __u64 vmsa_features; + __u32 flags; + __u32 pad[9]; +}; + struct kvm_sev_launch_start { __u32 handle; __u32 policy; @@ -856,5 +870,7 @@ struct kvm_hyperv_eventfd { #define KVM_X86_DEFAULT_VM 0 #define KVM_X86_SW_PROTECTED_VM 1 +#define KVM_X86_SEV_VM 2 +#define KVM_X86_SEV_ES_VM 3 #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 6276329f5e660f..c5a026fee5e061 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -991,6 +991,7 @@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr) return __raw_xsave_addr(xsave, xfeature_nr); } +EXPORT_SYMBOL_GPL(get_xsave_addr); #ifdef CONFIG_ARCH_HAS_PKEYS diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index 19ca623ffa2ac7..05df04f3962823 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -54,8 +54,6 @@ extern int copy_sigframe_from_user_to_xstate(struct task_struct *tsk, const void extern void fpu__init_cpu_xstate(void); extern void fpu__init_system_xstate(unsigned int legacy_size); -extern void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); - static inline u64 xfeatures_mask_supervisor(void) { return fpu_kernel_cfg.max_features & XFEATURE_MASK_SUPERVISOR_SUPPORTED; diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index addc44fc7187d6..5494669a055a6e 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -16,14 +16,15 @@ kvm-$(CONFIG_KVM_XEN) += xen.o kvm-$(CONFIG_KVM_SMM) += smm.o kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \ - vmx/nested.o vmx/posted_intr.o + vmx/nested.o vmx/posted_intr.o vmx/main.o kvm-intel-$(CONFIG_X86_SGX_KVM) += vmx/sgx.o kvm-intel-$(CONFIG_KVM_HYPERV) += vmx/hyperv.o vmx/hyperv_evmcs.o -kvm-amd-y += svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o \ - svm/sev.o -kvm-amd-$(CONFIG_KVM_HYPERV) += svm/hyperv.o +kvm-amd-y += svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o + +kvm-amd-$(CONFIG_KVM_AMD_SEV) += svm/sev.o +kvm-amd-$(CONFIG_KVM_HYPERV) += svm/hyperv.o ifdef CONFIG_HYPERV kvm-y += kvm_onhyperv.o diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 77352a4abd87f8..1851b3870a9c05 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -772,7 +772,7 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_mask(CPUID_8000_000A_EDX, 0); kvm_cpu_cap_mask(CPUID_8000_001F_EAX, - 0 /* SME */ | F(SEV) | 0 /* VM_PAGE_FLUSH */ | F(SEV_ES) | + 0 /* SME */ | 0 /* SEV */ | 0 /* VM_PAGE_FLUSH */ | 0 /* SEV_ES */ | F(SME_COHERENT)); kvm_cpu_cap_mask(CPUID_8000_0021_EAX, diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index db007a4dffa2e1..69a872d151a9be 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -432,8 +432,8 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte) * The idea using the light way get the spte on x86_32 guest is from * gup_get_pte (mm/gup.c). * - * An spte tlb flush may be pending, because kvm_set_pte_rmap - * coalesces them and we are running out of the MMU lock. Therefore + * An spte tlb flush may be pending, because they are coalesced and + * we are running out of the MMU lock. Therefore * we need to protect against in-progress updates of the spte. * * Reading the spte while an update is in progress may get the old value @@ -1448,49 +1448,11 @@ static bool __kvm_zap_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head, } static bool kvm_zap_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head, - struct kvm_memory_slot *slot, gfn_t gfn, int level, - pte_t unused) + struct kvm_memory_slot *slot, gfn_t gfn, int level) { return __kvm_zap_rmap(kvm, rmap_head, slot); } -static bool kvm_set_pte_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head, - struct kvm_memory_slot *slot, gfn_t gfn, int level, - pte_t pte) -{ - u64 *sptep; - struct rmap_iterator iter; - bool need_flush = false; - u64 new_spte; - kvm_pfn_t new_pfn; - - WARN_ON_ONCE(pte_huge(pte)); - new_pfn = pte_pfn(pte); - -restart: - for_each_rmap_spte(rmap_head, &iter, sptep) { - need_flush = true; - - if (pte_write(pte)) { - kvm_zap_one_rmap_spte(kvm, rmap_head, sptep); - goto restart; - } else { - new_spte = kvm_mmu_changed_pte_notifier_make_spte( - *sptep, new_pfn); - - mmu_spte_clear_track_bits(kvm, sptep); - mmu_spte_set(sptep, new_spte); - } - } - - if (need_flush && kvm_available_flush_remote_tlbs_range()) { - kvm_flush_remote_tlbs_gfn(kvm, gfn, level); - return false; - } - - return need_flush; -} - struct slot_rmap_walk_iterator { /* input fields. */ const struct kvm_memory_slot *slot; @@ -1562,7 +1524,7 @@ static void slot_rmap_walk_next(struct slot_rmap_walk_iterator *iterator) typedef bool (*rmap_handler_t)(struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, - int level, pte_t pte); + int level); static __always_inline bool kvm_handle_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range, @@ -1574,7 +1536,7 @@ static __always_inline bool kvm_handle_gfn_range(struct kvm *kvm, for_each_slot_rmap_range(range->slot, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL, range->start, range->end - 1, &iterator) ret |= handler(kvm, iterator.rmap, range->slot, iterator.gfn, - iterator.level, range->arg.pte); + iterator.level); return ret; } @@ -1596,22 +1558,8 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) return flush; } -bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) -{ - bool flush = false; - - if (kvm_memslots_have_rmaps(kvm)) - flush = kvm_handle_gfn_range(kvm, range, kvm_set_pte_rmap); - - if (tdp_mmu_enabled) - flush |= kvm_tdp_mmu_set_spte_gfn(kvm, range); - - return flush; -} - static bool kvm_age_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head, - struct kvm_memory_slot *slot, gfn_t gfn, int level, - pte_t unused) + struct kvm_memory_slot *slot, gfn_t gfn, int level) { u64 *sptep; struct rmap_iterator iter; @@ -1624,8 +1572,7 @@ static bool kvm_age_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head, } static bool kvm_test_age_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head, - struct kvm_memory_slot *slot, gfn_t gfn, - int level, pte_t unused) + struct kvm_memory_slot *slot, gfn_t gfn, int level) { u64 *sptep; struct rmap_iterator iter; diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index 4a599130e9c994..6c7ab3aa6aa7f9 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -322,22 +322,6 @@ u64 make_nonleaf_spte(u64 *child_pt, bool ad_disabled) return spte; } -u64 kvm_mmu_changed_pte_notifier_make_spte(u64 old_spte, kvm_pfn_t new_pfn) -{ - u64 new_spte; - - new_spte = old_spte & ~SPTE_BASE_ADDR_MASK; - new_spte |= (u64)new_pfn << PAGE_SHIFT; - - new_spte &= ~PT_WRITABLE_MASK; - new_spte &= ~shadow_host_writable_mask; - new_spte &= ~shadow_mmu_writable_mask; - - new_spte = mark_spte_for_access_track(new_spte); - - return new_spte; -} - u64 mark_spte_for_access_track(u64 spte) { if (spte_ad_enabled(spte)) diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index a129951c9a8856..f5c600c52f838b 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -496,8 +496,6 @@ static inline u64 restore_acc_track_spte(u64 spte) return spte; } -u64 kvm_mmu_changed_pte_notifier_make_spte(u64 old_spte, kvm_pfn_t new_pfn); - void __init kvm_mmu_spte_module_init(void); void kvm_mmu_reset_all_pte_masks(void); diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 04c1f0957fea87..af21703c848c16 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1258,52 +1258,6 @@ bool kvm_tdp_mmu_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) return kvm_tdp_mmu_handle_gfn(kvm, range, test_age_gfn); } -static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter, - struct kvm_gfn_range *range) -{ - u64 new_spte; - - /* Huge pages aren't expected to be modified without first being zapped. */ - WARN_ON_ONCE(pte_huge(range->arg.pte) || range->start + 1 != range->end); - - if (iter->level != PG_LEVEL_4K || - !is_shadow_present_pte(iter->old_spte)) - return false; - - /* - * Note, when changing a read-only SPTE, it's not strictly necessary to - * zero the SPTE before setting the new PFN, but doing so preserves the - * invariant that the PFN of a present * leaf SPTE can never change. - * See handle_changed_spte(). - */ - tdp_mmu_iter_set_spte(kvm, iter, 0); - - if (!pte_write(range->arg.pte)) { - new_spte = kvm_mmu_changed_pte_notifier_make_spte(iter->old_spte, - pte_pfn(range->arg.pte)); - - tdp_mmu_iter_set_spte(kvm, iter, new_spte); - } - - return true; -} - -/* - * Handle the changed_pte MMU notifier for the TDP MMU. - * data is a pointer to the new pte_t mapping the HVA specified by the MMU - * notifier. - * Returns non-zero if a flush is needed before releasing the MMU lock. - */ -bool kvm_tdp_mmu_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) -{ - /* - * No need to handle the remote TLB flush under RCU protection, the - * target SPTE _must_ be a leaf SPTE, i.e. cannot result in freeing a - * shadow page. See the WARN on pfn_changed in handle_changed_spte(). - */ - return kvm_tdp_mmu_handle_gfn(kvm, range, set_spte_gfn); -} - /* * Remove write access from all SPTEs at or above min_level that map GFNs * [start, end). Returns true if an SPTE has been changed and the TLBs need to diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h index 6e1ea04ca885e5..58b55e61bd33e0 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.h +++ b/arch/x86/kvm/mmu/tdp_mmu.h @@ -31,7 +31,6 @@ bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range, bool flush); bool kvm_tdp_mmu_age_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_tdp_mmu_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range); -bool kvm_tdp_mmu_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, const struct kvm_memory_slot *slot, int min_level); diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 759581bb2128da..4d813c38f0cbbc 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -23,6 +23,7 @@ #include <asm/pkru.h> #include <asm/trapnr.h> #include <asm/fpu/xcr.h> +#include <asm/fpu/xstate.h> #include <asm/debugreg.h> #include "mmu.h" @@ -32,22 +33,9 @@ #include "cpuid.h" #include "trace.h" -#ifndef CONFIG_KVM_AMD_SEV -/* - * When this config is not defined, SEV feature is not supported and APIs in - * this file are not used but this file still gets compiled into the KVM AMD - * module. - * - * We will not have MISC_CG_RES_SEV and MISC_CG_RES_SEV_ES entries in the enum - * misc_res_type {} defined in linux/misc_cgroup.h. - * - * Below macros allow compilation to succeed. - */ -#define MISC_CG_RES_SEV MISC_CG_RES_TYPES -#define MISC_CG_RES_SEV_ES MISC_CG_RES_TYPES -#endif +#define GHCB_VERSION_MAX 1ULL +#define GHCB_VERSION_MIN 1ULL -#ifdef CONFIG_KVM_AMD_SEV /* enable/disable SEV support */ static bool sev_enabled = true; module_param_named(sev, sev_enabled, bool, 0444); @@ -57,13 +45,9 @@ static bool sev_es_enabled = true; module_param_named(sev_es, sev_es_enabled, bool, 0444); /* enable/disable SEV-ES DebugSwap support */ -static bool sev_es_debug_swap_enabled = false; +static bool sev_es_debug_swap_enabled = true; module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0444); -#else -#define sev_enabled false -#define sev_es_enabled false -#define sev_es_debug_swap_enabled false -#endif /* CONFIG_KVM_AMD_SEV */ +static u64 sev_supported_vmsa_features; static u8 sev_enc_bit; static DECLARE_RWSEM(sev_deactivate_lock); @@ -113,7 +97,15 @@ static int sev_flush_asids(unsigned int min_asid, unsigned int max_asid) static inline bool is_mirroring_enc_context(struct kvm *kvm) { - return !!to_kvm_svm(kvm)->sev_info.enc_context_owner; + return !!to_kvm_sev_info(kvm)->enc_context_owner; +} + +static bool sev_vcpu_has_debug_swap(struct vcpu_svm *svm) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info; + + return sev->vmsa_features & SVM_SEV_FEAT_DEBUG_SWAP; } /* Must be called with the sev_bitmap_lock held */ @@ -251,20 +243,32 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) sev_decommission(handle); } -static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) +static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp, + struct kvm_sev_init *data, + unsigned long vm_type) { struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct sev_platform_init_args init_args = {0}; + bool es_active = vm_type != KVM_X86_SEV_VM; + u64 valid_vmsa_features = es_active ? sev_supported_vmsa_features : 0; int ret; if (kvm->created_vcpus) return -EINVAL; + if (data->flags) + return -EINVAL; + + if (data->vmsa_features & ~valid_vmsa_features) + return -EINVAL; + if (unlikely(sev->active)) return -EINVAL; sev->active = true; - sev->es_active = argp->id == KVM_SEV_ES_INIT; + sev->es_active = es_active; + sev->vmsa_features = data->vmsa_features; + ret = sev_asid_new(sev); if (ret) goto e_no_asid; @@ -276,6 +280,7 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) INIT_LIST_HEAD(&sev->regions_list); INIT_LIST_HEAD(&sev->mirror_vms); + sev->need_init = false; kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_SEV); @@ -286,11 +291,44 @@ e_free: sev_asid_free(sev); sev->asid = 0; e_no_asid: + sev->vmsa_features = 0; sev->es_active = false; sev->active = false; return ret; } +static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) +{ + struct kvm_sev_init data = { + .vmsa_features = 0, + }; + unsigned long vm_type; + + if (kvm->arch.vm_type != KVM_X86_DEFAULT_VM) + return -EINVAL; + + vm_type = (argp->id == KVM_SEV_INIT ? KVM_X86_SEV_VM : KVM_X86_SEV_ES_VM); + return __sev_guest_init(kvm, argp, &data, vm_type); +} + +static int sev_guest_init2(struct kvm *kvm, struct kvm_sev_cmd *argp) +{ + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + struct kvm_sev_init data; + + if (!sev->need_init) + return -EINVAL; + + if (kvm->arch.vm_type != KVM_X86_SEV_VM && + kvm->arch.vm_type != KVM_X86_SEV_ES_VM) + return -EINVAL; + + if (copy_from_user(&data, u64_to_user_ptr(argp->data), sizeof(data))) + return -EFAULT; + + return __sev_guest_init(kvm, argp, &data, kvm->arch.vm_type); +} + static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error) { unsigned int asid = sev_get_asid(kvm); @@ -339,7 +377,7 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) if (!sev_guest(kvm)) return -ENOTTY; - if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) + if (copy_from_user(¶ms, u64_to_user_ptr(argp->data), sizeof(params))) return -EFAULT; memset(&start, 0, sizeof(start)); @@ -383,7 +421,7 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) /* return handle to userspace */ params.handle = start.handle; - if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params))) { + if (copy_to_user(u64_to_user_ptr(argp->data), ¶ms, sizeof(params))) { sev_unbind_asid(kvm, start.handle); ret = -EFAULT; goto e_free_session; @@ -522,7 +560,7 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) if (!sev_guest(kvm)) return -ENOTTY; - if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) + if (copy_from_user(¶ms, u64_to_user_ptr(argp->data), sizeof(params))) return -EFAULT; vaddr = params.uaddr; @@ -580,7 +618,13 @@ e_unpin: static int sev_es_sync_vmsa(struct vcpu_svm *svm) { + struct kvm_vcpu *vcpu = &svm->vcpu; + struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info; struct sev_es_save_area *save = svm->sev_es.vmsa; + struct xregs_state *xsave; + const u8 *s; + u8 *d; + int i; /* Check some debug related fields before encrypting the VMSA */ if (svm->vcpu.guest_debug || (svm->vmcb->save.dr7 & ~DR7_FIXED_1)) @@ -621,10 +665,44 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm) save->xss = svm->vcpu.arch.ia32_xss; save->dr6 = svm->vcpu.arch.dr6; - if (sev_es_debug_swap_enabled) { - save->sev_features |= SVM_SEV_FEAT_DEBUG_SWAP; - pr_warn_once("Enabling DebugSwap with KVM_SEV_ES_INIT. " - "This will not work starting with Linux 6.10\n"); + save->sev_features = sev->vmsa_features; + + /* + * Skip FPU and AVX setup with KVM_SEV_ES_INIT to avoid + * breaking older measurements. + */ + if (vcpu->kvm->arch.vm_type != KVM_X86_DEFAULT_VM) { + xsave = &vcpu->arch.guest_fpu.fpstate->regs.xsave; + save->x87_dp = xsave->i387.rdp; + save->mxcsr = xsave->i387.mxcsr; + save->x87_ftw = xsave->i387.twd; + save->x87_fsw = xsave->i387.swd; + save->x87_fcw = xsave->i387.cwd; + save->x87_fop = xsave->i387.fop; + save->x87_ds = 0; + save->x87_cs = 0; + save->x87_rip = xsave->i387.rip; + + for (i = 0; i < 8; i++) { + /* + * The format of the x87 save area is undocumented and + * definitely not what you would expect. It consists of + * an 8*8 bytes area with bytes 0-7, and an 8*2 bytes + * area with bytes 8-9 of each register. + */ + d = save->fpreg_x87 + i * 8; + s = ((u8 *)xsave->i387.st_space) + i * 16; + memcpy(d, s, 8); + save->fpreg_x87[64 + i * 2] = s[8]; + save->fpreg_x87[64 + i * 2 + 1] = s[9]; + } + memcpy(save->fpreg_xmm, xsave->i387.xmm_space, 256); + + s = get_xsave_addr(xsave, XFEATURE_YMM); + if (s) + memcpy(save->fpreg_ymm, s, 256); + else + memset(save->fpreg_ymm, 0, 256); } pr_debug("Virtual Machine Save Area (VMSA):\n"); @@ -658,13 +736,20 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu, clflush_cache_range(svm->sev_es.vmsa, PAGE_SIZE); vmsa.reserved = 0; - vmsa.handle = to_kvm_svm(kvm)->sev_info.handle; + vmsa.handle = to_kvm_sev_info(kvm)->handle; vmsa.address = __sme_pa(svm->sev_es.vmsa); vmsa.len = PAGE_SIZE; ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error); if (ret) return ret; + /* + * SEV-ES guests maintain an encrypted version of their FPU + * state which is restored and saved on VMRUN and VMEXIT. + * Mark vcpu->arch.guest_fpu->fpstate as scratch so it won't + * do xsave/xrstor on it. + */ + fpstate_set_confidential(&vcpu->arch.guest_fpu); vcpu->arch.guest_state_protected = true; return 0; } @@ -695,7 +780,7 @@ static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp) static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) { - void __user *measure = (void __user *)(uintptr_t)argp->data; + void __user *measure = u64_to_user_ptr(argp->data); struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct sev_data_launch_measure data; struct kvm_sev_launch_measure params; @@ -715,7 +800,7 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) if (!params.len) goto cmd; - p = (void __user *)(uintptr_t)params.uaddr; + p = u64_to_user_ptr(params.uaddr); if (p) { if (params.len > SEV_FW_BLOB_MAX_SIZE) return -EINVAL; @@ -788,7 +873,7 @@ static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp) params.state = data.state; params.handle = data.handle; - if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params))) + if (copy_to_user(u64_to_user_ptr(argp->data), ¶ms, sizeof(params))) ret = -EFAULT; return ret; @@ -953,7 +1038,7 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec) if (!sev_guest(kvm)) return -ENOTTY; - if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug))) + if (copy_from_user(&debug, u64_to_user_ptr(argp->data), sizeof(debug))) return -EFAULT; if (!debug.len || debug.src_uaddr + debug.len < debug.src_uaddr) @@ -1037,7 +1122,7 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp) if (!sev_guest(kvm)) return -ENOTTY; - if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) + if (copy_from_user(¶ms, u64_to_user_ptr(argp->data), sizeof(params))) return -EFAULT; pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1); @@ -1101,7 +1186,7 @@ e_unpin_memory: static int sev_get_attestation_report(struct kvm *kvm, struct kvm_sev_cmd *argp) { - void __user *report = (void __user *)(uintptr_t)argp->data; + void __user *report = u64_to_user_ptr(argp->data); struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct sev_data_attestation_report data; struct kvm_sev_attestation_report params; @@ -1112,7 +1197,7 @@ static int sev_get_attestation_report(struct kvm *kvm, struct kvm_sev_cmd *argp) if (!sev_guest(kvm)) return -ENOTTY; - if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) + if (copy_from_user(¶ms, u64_to_user_ptr(argp->data), sizeof(params))) return -EFAULT; memset(&data, 0, sizeof(data)); @@ -1121,7 +1206,7 @@ static int sev_get_attestation_report(struct kvm *kvm, struct kvm_sev_cmd *argp) if (!params.len) goto cmd; - p = (void __user *)(uintptr_t)params.uaddr; + p = u64_to_user_ptr(params.uaddr); if (p) { if (params.len > SEV_FW_BLOB_MAX_SIZE) return -EINVAL; @@ -1174,7 +1259,7 @@ __sev_send_start_query_session_length(struct kvm *kvm, struct kvm_sev_cmd *argp, ret = sev_issue_cmd(kvm, SEV_CMD_SEND_START, &data, &argp->error); params->session_len = data.session_len; - if (copy_to_user((void __user *)(uintptr_t)argp->data, params, + if (copy_to_user(u64_to_user_ptr(argp->data), params, sizeof(struct kvm_sev_send_start))) ret = -EFAULT; @@ -1193,7 +1278,7 @@ static int sev_send_start(struct kvm *kvm, struct kvm_sev_cmd *argp) if (!sev_guest(kvm)) return -ENOTTY; - if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, + if (copy_from_user(¶ms, u64_to_user_ptr(argp->data), sizeof(struct kvm_sev_send_start))) return -EFAULT; @@ -1248,7 +1333,7 @@ static int sev_send_start(struct kvm *kvm, struct kvm_sev_cmd *argp) ret = sev_issue_cmd(kvm, SEV_CMD_SEND_START, &data, &argp->error); - if (!ret && copy_to_user((void __user *)(uintptr_t)params.session_uaddr, + if (!ret && copy_to_user(u64_to_user_ptr(params.session_uaddr), session_data, params.session_len)) { ret = -EFAULT; goto e_free_amd_cert; @@ -1256,7 +1341,7 @@ static int sev_send_start(struct kvm *kvm, struct kvm_sev_cmd *argp) params.policy = data.policy; params.session_len = data.session_len; - if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, + if (copy_to_user(u64_to_user_ptr(argp->data), ¶ms, sizeof(struct kvm_sev_send_start))) ret = -EFAULT; @@ -1287,7 +1372,7 @@ __sev_send_update_data_query_lengths(struct kvm *kvm, struct kvm_sev_cmd *argp, params->hdr_len = data.hdr_len; params->trans_len = data.trans_len; - if (copy_to_user((void __user *)(uintptr_t)argp->data, params, + if (copy_to_user(u64_to_user_ptr(argp->data), params, sizeof(struct kvm_sev_send_update_data))) ret = -EFAULT; @@ -1307,7 +1392,7 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) if (!sev_guest(kvm)) return -ENOTTY; - if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, + if (copy_from_user(¶ms, u64_to_user_ptr(argp->data), sizeof(struct kvm_sev_send_update_data))) return -EFAULT; @@ -1358,14 +1443,14 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) goto e_free_trans_data; /* copy transport buffer to user space */ - if (copy_to_user((void __user *)(uintptr_t)params.trans_uaddr, + if (copy_to_user(u64_to_user_ptr(params.trans_uaddr), trans_data, params.trans_len)) { ret = -EFAULT; goto e_free_trans_data; } /* Copy packet header to userspace. */ - if (copy_to_user((void __user *)(uintptr_t)params.hdr_uaddr, hdr, + if (copy_to_user(u64_to_user_ptr(params.hdr_uaddr), hdr, params.hdr_len)) ret = -EFAULT; @@ -1417,7 +1502,7 @@ static int sev_receive_start(struct kvm *kvm, struct kvm_sev_cmd *argp) return -ENOTTY; /* Get parameter from the userspace */ - if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, + if (copy_from_user(¶ms, u64_to_user_ptr(argp->data), sizeof(struct kvm_sev_receive_start))) return -EFAULT; @@ -1459,7 +1544,7 @@ static int sev_receive_start(struct kvm *kvm, struct kvm_sev_cmd *argp) } params.handle = start.handle; - if (copy_to_user((void __user *)(uintptr_t)argp->data, + if (copy_to_user(u64_to_user_ptr(argp->data), ¶ms, sizeof(struct kvm_sev_receive_start))) { ret = -EFAULT; sev_unbind_asid(kvm, start.handle); @@ -1490,7 +1575,7 @@ static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) if (!sev_guest(kvm)) return -EINVAL; - if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, + if (copy_from_user(¶ms, u64_to_user_ptr(argp->data), sizeof(struct kvm_sev_receive_update_data))) return -EFAULT; @@ -1705,6 +1790,7 @@ static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm) dst->pages_locked = src->pages_locked; dst->enc_context_owner = src->enc_context_owner; dst->es_active = src->es_active; + dst->vmsa_features = src->vmsa_features; src->asid = 0; src->active = false; @@ -1812,7 +1898,8 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) if (ret) goto out_fput; - if (sev_guest(kvm) || !sev_guest(source_kvm)) { + if (kvm->arch.vm_type != source_kvm->arch.vm_type || + sev_guest(kvm) || !sev_guest(source_kvm)) { ret = -EINVAL; goto out_unlock; } @@ -1861,6 +1948,21 @@ out_fput: return ret; } +int sev_dev_get_attr(u32 group, u64 attr, u64 *val) +{ + if (group != KVM_X86_GRP_SEV) + return -ENXIO; + + switch (attr) { + case KVM_X86_SEV_VMSA_FEATURES: + *val = sev_supported_vmsa_features; + return 0; + + default: + return -ENXIO; + } +} + int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp) { struct kvm_sev_cmd sev_cmd; @@ -1894,6 +1996,9 @@ int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp) case KVM_SEV_INIT: r = sev_guest_init(kvm, &sev_cmd); break; + case KVM_SEV_INIT2: + r = sev_guest_init2(kvm, &sev_cmd); + break; case KVM_SEV_LAUNCH_START: r = sev_launch_start(kvm, &sev_cmd); break; @@ -2121,6 +2226,7 @@ int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd) mirror_sev->asid = source_sev->asid; mirror_sev->fd = source_sev->fd; mirror_sev->es_active = source_sev->es_active; + mirror_sev->need_init = false; mirror_sev->handle = source_sev->handle; INIT_LIST_HEAD(&mirror_sev->regions_list); INIT_LIST_HEAD(&mirror_sev->mirror_vms); @@ -2186,15 +2292,18 @@ void sev_vm_destroy(struct kvm *kvm) void __init sev_set_cpu_caps(void) { - if (!sev_enabled) - kvm_cpu_cap_clear(X86_FEATURE_SEV); - if (!sev_es_enabled) - kvm_cpu_cap_clear(X86_FEATURE_SEV_ES); + if (sev_enabled) { + kvm_cpu_cap_set(X86_FEATURE_SEV); + kvm_caps.supported_vm_types |= BIT(KVM_X86_SEV_VM); + } + if (sev_es_enabled) { + kvm_cpu_cap_set(X86_FEATURE_SEV_ES); + kvm_caps.supported_vm_types |= BIT(KVM_X86_SEV_ES_VM); + } } void __init sev_hardware_setup(void) { -#ifdef CONFIG_KVM_AMD_SEV unsigned int eax, ebx, ecx, edx, sev_asid_count, sev_es_asid_count; bool sev_es_supported = false; bool sev_supported = false; @@ -2294,7 +2403,10 @@ out: if (!sev_es_enabled || !cpu_feature_enabled(X86_FEATURE_DEBUG_SWAP) || !cpu_feature_enabled(X86_FEATURE_NO_NESTED_DATA_BP)) sev_es_debug_swap_enabled = false; -#endif + + sev_supported_vmsa_features = 0; + if (sev_es_debug_swap_enabled) + sev_supported_vmsa_features |= SVM_SEV_FEAT_DEBUG_SWAP; } void sev_hardware_unsetup(void) @@ -3063,7 +3175,7 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm) svm_set_intercept(svm, TRAP_CR8_WRITE); vmcb->control.intercepts[INTERCEPT_DR] = 0; - if (!sev_es_debug_swap_enabled) { + if (!sev_vcpu_has_debug_swap(svm)) { vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ); vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE); recalc_intercepts(svm); @@ -3118,7 +3230,7 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm) sev_enc_bit)); } -void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa) +void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_area *hostsa) { /* * All host state for SEV-ES guests is categorized into three swap types @@ -3146,7 +3258,7 @@ void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa) * the CPU (Type-B). If DebugSwap is disabled/unsupported, the CPU both * saves and loads debug registers (Type-A). */ - if (sev_es_debug_swap_enabled) { + if (sev_vcpu_has_debug_swap(svm)) { hostsa->dr0 = native_get_debugreg(0); hostsa->dr1 = native_get_debugreg(1); hostsa->dr2 = native_get_debugreg(2); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 9aaf83c8d57df7..c78a8c531e09f2 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1433,14 +1433,6 @@ static int svm_vcpu_create(struct kvm_vcpu *vcpu) vmsa_page = snp_safe_alloc_page(vcpu); if (!vmsa_page) goto error_free_vmcb_page; - - /* - * SEV-ES guests maintain an encrypted version of their FPU - * state which is restored and saved on VMRUN and VMEXIT. - * Mark vcpu->arch.guest_fpu->fpstate as scratch so it won't - * do xsave/xrstor on it. - */ - fpstate_set_confidential(&vcpu->arch.guest_fpu); } err = avic_init_vcpu(svm); @@ -1525,7 +1517,7 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) */ vmsave(sd->save_area_pa); if (sev_es_guest(vcpu->kvm)) - sev_es_prepare_switch_to_guest(sev_es_host_save_area(sd)); + sev_es_prepare_switch_to_guest(svm, sev_es_host_save_area(sd)); if (tsc_scaling) __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio); @@ -3304,7 +3296,9 @@ static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = { [SVM_EXIT_RSM] = rsm_interception, [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception, [SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception, +#ifdef CONFIG_KVM_AMD_SEV [SVM_EXIT_VMGEXIT] = sev_handle_vmgexit, +#endif }; static void dump_vmcb(struct kvm_vcpu *vcpu) @@ -4085,6 +4079,9 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu) static int svm_vcpu_pre_run(struct kvm_vcpu *vcpu) { + if (to_kvm_sev_info(vcpu->kvm)->need_init) + return -EINVAL; + return 1; } @@ -4892,6 +4889,14 @@ static void svm_vm_destroy(struct kvm *kvm) static int svm_vm_init(struct kvm *kvm) { + int type = kvm->arch.vm_type; + + if (type != KVM_X86_DEFAULT_VM && + type != KVM_X86_SW_PROTECTED_VM) { + kvm->arch.has_protected_state = (type == KVM_X86_SEV_ES_VM); + to_kvm_sev_info(kvm)->need_init = true; + } + if (!pause_filter_count || !pause_filter_thresh) kvm->arch.pause_in_guest = true; @@ -5026,6 +5031,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .enable_smi_window = svm_enable_smi_window, #endif +#ifdef CONFIG_KVM_AMD_SEV + .dev_get_attr = sev_dev_get_attr, .mem_enc_ioctl = sev_mem_enc_ioctl, .mem_enc_register_region = sev_mem_enc_register_region, .mem_enc_unregister_region = sev_mem_enc_unregister_region, @@ -5033,7 +5040,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .vm_copy_enc_context_from = sev_vm_copy_enc_context_from, .vm_move_enc_context_from = sev_vm_move_enc_context_from, - +#endif .check_emulate_instruction = svm_check_emulate_instruction, .apic_init_signal_blocked = svm_apic_init_signal_blocked, diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 33878efdebc829..eda026b5a2fdd9 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -79,12 +79,14 @@ enum { struct kvm_sev_info { bool active; /* SEV enabled guest */ bool es_active; /* SEV-ES enabled guest */ + bool need_init; /* waiting for SEV_INIT2 */ unsigned int asid; /* ASID used for this guest */ unsigned int handle; /* SEV firmware handle */ int fd; /* SEV device fd */ unsigned long pages_locked; /* Number of pages locked */ struct list_head regions_list; /* List of registered regions */ u64 ap_jump_table; /* SEV-ES AP Jump Table address */ + u64 vmsa_features; struct kvm *enc_context_owner; /* Owner of copied encryption context */ struct list_head mirror_vms; /* List of VMs mirroring */ struct list_head mirror_entry; /* Use as a list entry of mirrors */ @@ -318,6 +320,11 @@ static __always_inline struct kvm_svm *to_kvm_svm(struct kvm *kvm) return container_of(kvm, struct kvm_svm, kvm); } +static __always_inline struct kvm_sev_info *to_kvm_sev_info(struct kvm *kvm) +{ + return &to_kvm_svm(kvm)->sev_info; +} + static __always_inline bool sev_guest(struct kvm *kvm) { #ifdef CONFIG_KVM_AMD_SEV @@ -664,13 +671,16 @@ void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu); /* sev.c */ -#define GHCB_VERSION_MAX 1ULL -#define GHCB_VERSION_MIN 1ULL - - -extern unsigned int max_sev_asid; +void pre_sev_run(struct vcpu_svm *svm, int cpu); +void sev_init_vmcb(struct vcpu_svm *svm); +void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm); +int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in); +void sev_es_vcpu_reset(struct vcpu_svm *svm); +void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); +void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_area *hostsa); +void sev_es_unmap_ghcb(struct vcpu_svm *svm); -void sev_vm_destroy(struct kvm *kvm); +#ifdef CONFIG_KVM_AMD_SEV int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp); int sev_mem_enc_register_region(struct kvm *kvm, struct kvm_enc_region *range); @@ -679,22 +689,32 @@ int sev_mem_enc_unregister_region(struct kvm *kvm, int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd); int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd); void sev_guest_memory_reclaimed(struct kvm *kvm); +int sev_handle_vmgexit(struct kvm_vcpu *vcpu); -void pre_sev_run(struct vcpu_svm *svm, int cpu); +/* These symbols are used in common code and are stubbed below. */ +struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu); +void sev_free_vcpu(struct kvm_vcpu *vcpu); +void sev_vm_destroy(struct kvm *kvm); void __init sev_set_cpu_caps(void); void __init sev_hardware_setup(void); void sev_hardware_unsetup(void); int sev_cpu_init(struct svm_cpu_data *sd); -void sev_init_vmcb(struct vcpu_svm *svm); -void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm); -void sev_free_vcpu(struct kvm_vcpu *vcpu); -int sev_handle_vmgexit(struct kvm_vcpu *vcpu); -int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in); -void sev_es_vcpu_reset(struct vcpu_svm *svm); -void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); -void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa); -void sev_es_unmap_ghcb(struct vcpu_svm *svm); -struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu); +int sev_dev_get_attr(u32 group, u64 attr, u64 *val); +extern unsigned int max_sev_asid; +#else +static inline struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu) { + return alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); +} + +static inline void sev_free_vcpu(struct kvm_vcpu *vcpu) {} +static inline void sev_vm_destroy(struct kvm *kvm) {} +static inline void __init sev_set_cpu_caps(void) {} +static inline void __init sev_hardware_setup(void) {} +static inline void sev_hardware_unsetup(void) {} +static inline int sev_cpu_init(struct svm_cpu_data *sd) { return 0; } +static inline int sev_dev_get_attr(u32 group, u64 attr, u64 *val) { return -ENXIO; } +#define max_sev_asid 0 +#endif /* vmenter.S */ diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c new file mode 100644 index 00000000000000..7c546ad3e4c944 --- /dev/null +++ b/arch/x86/kvm/vmx/main.c @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/moduleparam.h> + +#include "x86_ops.h" +#include "vmx.h" +#include "nested.h" +#include "pmu.h" + +#define VMX_REQUIRED_APICV_INHIBITS \ + (BIT(APICV_INHIBIT_REASON_DISABLE)| \ + BIT(APICV_INHIBIT_REASON_ABSENT) | \ + BIT(APICV_INHIBIT_REASON_HYPERV) | \ + BIT(APICV_INHIBIT_REASON_BLOCKIRQ) | \ + BIT(APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED) | \ + BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) | \ + BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED)) + +struct kvm_x86_ops vt_x86_ops __initdata = { + .name = KBUILD_MODNAME, + + .check_processor_compatibility = vmx_check_processor_compat, + + .hardware_unsetup = vmx_hardware_unsetup, + + .hardware_enable = vmx_hardware_enable, + .hardware_disable = vmx_hardware_disable, + .has_emulated_msr = vmx_has_emulated_msr, + + .vm_size = sizeof(struct kvm_vmx), + .vm_init = vmx_vm_init, + .vm_destroy = vmx_vm_destroy, + + .vcpu_precreate = vmx_vcpu_precreate, + .vcpu_create = vmx_vcpu_create, + .vcpu_free = vmx_vcpu_free, + .vcpu_reset = vmx_vcpu_reset, + + .prepare_switch_to_guest = vmx_prepare_switch_to_guest, + .vcpu_load = vmx_vcpu_load, + .vcpu_put = vmx_vcpu_put, + + .update_exception_bitmap = vmx_update_exception_bitmap, + .get_msr_feature = vmx_get_msr_feature, + .get_msr = vmx_get_msr, + .set_msr = vmx_set_msr, + .get_segment_base = vmx_get_segment_base, + .get_segment = vmx_get_segment, + .set_segment = vmx_set_segment, + .get_cpl = vmx_get_cpl, + .get_cs_db_l_bits = vmx_get_cs_db_l_bits, + .is_valid_cr0 = vmx_is_valid_cr0, + .set_cr0 = vmx_set_cr0, + .is_valid_cr4 = vmx_is_valid_cr4, + .set_cr4 = vmx_set_cr4, + .set_efer = vmx_set_efer, + .get_idt = vmx_get_idt, + .set_idt = vmx_set_idt, + .get_gdt = vmx_get_gdt, + .set_gdt = vmx_set_gdt, + .set_dr7 = vmx_set_dr7, + .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs, + .cache_reg = vmx_cache_reg, + .get_rflags = vmx_get_rflags, + .set_rflags = vmx_set_rflags, + .get_if_flag = vmx_get_if_flag, + + .flush_tlb_all = vmx_flush_tlb_all, + .flush_tlb_current = vmx_flush_tlb_current, + .flush_tlb_gva = vmx_flush_tlb_gva, + .flush_tlb_guest = vmx_flush_tlb_guest, + + .vcpu_pre_run = vmx_vcpu_pre_run, + .vcpu_run = vmx_vcpu_run, + .handle_exit = vmx_handle_exit, + .skip_emulated_instruction = vmx_skip_emulated_instruction, + .update_emulated_instruction = vmx_update_emulated_instruction, + .set_interrupt_shadow = vmx_set_interrupt_shadow, + .get_interrupt_shadow = vmx_get_interrupt_shadow, + .patch_hypercall = vmx_patch_hypercall, + .inject_irq = vmx_inject_irq, + .inject_nmi = vmx_inject_nmi, + .inject_exception = vmx_inject_exception, + .cancel_injection = vmx_cancel_injection, + .interrupt_allowed = vmx_interrupt_allowed, + .nmi_allowed = vmx_nmi_allowed, + .get_nmi_mask = vmx_get_nmi_mask, + .set_nmi_mask = vmx_set_nmi_mask, + .enable_nmi_window = vmx_enable_nmi_window, + .enable_irq_window = vmx_enable_irq_window, + .update_cr8_intercept = vmx_update_cr8_intercept, + .set_virtual_apic_mode = vmx_set_virtual_apic_mode, + .set_apic_access_page_addr = vmx_set_apic_access_page_addr, + .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl, + .load_eoi_exitmap = vmx_load_eoi_exitmap, + .apicv_pre_state_restore = vmx_apicv_pre_state_restore, + .required_apicv_inhibits = VMX_REQUIRED_APICV_INHIBITS, + .hwapic_irr_update = vmx_hwapic_irr_update, + .hwapic_isr_update = vmx_hwapic_isr_update, + .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt, + .sync_pir_to_irr = vmx_sync_pir_to_irr, + .deliver_interrupt = vmx_deliver_interrupt, + .dy_apicv_has_pending_interrupt = pi_has_pending_interrupt, + + .set_tss_addr = vmx_set_tss_addr, + .set_identity_map_addr = vmx_set_identity_map_addr, + .get_mt_mask = vmx_get_mt_mask, + + .get_exit_info = vmx_get_exit_info, + + .vcpu_after_set_cpuid = vmx_vcpu_after_set_cpuid, + + .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, + + .get_l2_tsc_offset = vmx_get_l2_tsc_offset, + .get_l2_tsc_multiplier = vmx_get_l2_tsc_multiplier, + .write_tsc_offset = vmx_write_tsc_offset, + .write_tsc_multiplier = vmx_write_tsc_multiplier, + + .load_mmu_pgd = vmx_load_mmu_pgd, + + .check_intercept = vmx_check_intercept, + .handle_exit_irqoff = vmx_handle_exit_irqoff, + + .sched_in = vmx_sched_in, + + .cpu_dirty_log_size = PML_ENTITY_NUM, + .update_cpu_dirty_logging = vmx_update_cpu_dirty_logging, + + .nested_ops = &vmx_nested_ops, + + .pi_update_irte = vmx_pi_update_irte, + .pi_start_assignment = vmx_pi_start_assignment, + +#ifdef CONFIG_X86_64 + .set_hv_timer = vmx_set_hv_timer, + .cancel_hv_timer = vmx_cancel_hv_timer, +#endif + + .setup_mce = vmx_setup_mce, + +#ifdef CONFIG_KVM_SMM + .smi_allowed = vmx_smi_allowed, + .enter_smm = vmx_enter_smm, + .leave_smm = vmx_leave_smm, + .enable_smi_window = vmx_enable_smi_window, +#endif + + .check_emulate_instruction = vmx_check_emulate_instruction, + .apic_init_signal_blocked = vmx_apic_init_signal_blocked, + .migrate_timers = vmx_migrate_timers, + + .msr_filter_changed = vmx_msr_filter_changed, + .complete_emulated_msr = kvm_complete_insn_gp, + + .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector, + + .get_untagged_addr = vmx_get_untagged_addr, +}; + +struct kvm_x86_init_ops vt_init_ops __initdata = { + .hardware_setup = vmx_hardware_setup, + .handle_intel_pt_intr = NULL, + + .runtime_ops = &vt_x86_ops, + .pmu_ops = &intel_pmu_ops, +}; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 22411f4aff5303..94db9e4db2ceaa 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -68,6 +68,7 @@ #include "vmcs12.h" #include "vmx.h" #include "x86.h" +#include "x86_ops.h" #include "smm.h" #include "vmx_onhyperv.h" @@ -530,8 +531,6 @@ static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx) static unsigned long host_idt_base; #if IS_ENABLED(CONFIG_HYPERV) -static struct kvm_x86_ops vmx_x86_ops __initdata; - static bool __read_mostly enlightened_vmcs = true; module_param(enlightened_vmcs, bool, 0444); @@ -581,9 +580,8 @@ static __init void hv_init_evmcs(void) } if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH) - vmx_x86_ops.enable_l2_tlb_flush + vt_x86_ops.enable_l2_tlb_flush = hv_enable_l2_tlb_flush; - } else { enlightened_vmcs = false; } @@ -1477,7 +1475,7 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu, * Switches to specified vcpu, until a matching vcpu_put(), but assumes * vcpu mutex is already taken. */ -static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -1488,7 +1486,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vmx->host_debugctlmsr = get_debugctlmsr(); } -static void vmx_vcpu_put(struct kvm_vcpu *vcpu) +void vmx_vcpu_put(struct kvm_vcpu *vcpu) { vmx_vcpu_pi_put(vcpu); @@ -1547,7 +1545,7 @@ void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) vmx->emulation_required = vmx_emulation_required(vcpu); } -static bool vmx_get_if_flag(struct kvm_vcpu *vcpu) +bool vmx_get_if_flag(struct kvm_vcpu *vcpu) { return vmx_get_rflags(vcpu) & X86_EFLAGS_IF; } @@ -1653,8 +1651,8 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data) return 0; } -static int vmx_check_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, - void *insn, int insn_len) +int vmx_check_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, + void *insn, int insn_len) { /* * Emulation of instructions in SGX enclaves is impossible as RIP does @@ -1738,7 +1736,7 @@ rip_updated: * Recognizes a pending MTF VM-exit and records the nested state for later * delivery. */ -static void vmx_update_emulated_instruction(struct kvm_vcpu *vcpu) +void vmx_update_emulated_instruction(struct kvm_vcpu *vcpu) { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -1769,7 +1767,7 @@ static void vmx_update_emulated_instruction(struct kvm_vcpu *vcpu) } } -static int vmx_skip_emulated_instruction(struct kvm_vcpu *vcpu) +int vmx_skip_emulated_instruction(struct kvm_vcpu *vcpu) { vmx_update_emulated_instruction(vcpu); return skip_emulated_instruction(vcpu); @@ -1788,7 +1786,7 @@ static void vmx_clear_hlt(struct kvm_vcpu *vcpu) vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); } -static void vmx_inject_exception(struct kvm_vcpu *vcpu) +void vmx_inject_exception(struct kvm_vcpu *vcpu) { struct kvm_queued_exception *ex = &vcpu->arch.exception; u32 intr_info = ex->vector | INTR_INFO_VALID_MASK; @@ -1909,12 +1907,12 @@ u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu) return kvm_caps.default_tsc_scaling_ratio; } -static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu) +void vmx_write_tsc_offset(struct kvm_vcpu *vcpu) { vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset); } -static void vmx_write_tsc_multiplier(struct kvm_vcpu *vcpu) +void vmx_write_tsc_multiplier(struct kvm_vcpu *vcpu) { vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio); } @@ -1957,7 +1955,7 @@ static inline bool is_vmx_feature_control_msr_valid(struct vcpu_vmx *vmx, return !(msr->data & ~valid_bits); } -static int vmx_get_msr_feature(struct kvm_msr_entry *msr) +int vmx_get_msr_feature(struct kvm_msr_entry *msr) { switch (msr->index) { case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: @@ -1974,7 +1972,7 @@ static int vmx_get_msr_feature(struct kvm_msr_entry *msr) * Returns 0 on success, non-0 otherwise. * Assumes vcpu_load() was already called. */ -static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) +int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmx_uret_msr *msr; @@ -2155,7 +2153,7 @@ static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated * Returns 0 on success, non-0 otherwise. * Assumes vcpu_load() was already called. */ -static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) +int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmx_uret_msr *msr; @@ -2458,7 +2456,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return ret; } -static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) +void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) { unsigned long guest_owned_bits; @@ -2759,7 +2757,7 @@ static bool kvm_is_vmx_supported(void) return supported; } -static int vmx_check_processor_compat(void) +int vmx_check_processor_compat(void) { int cpu = raw_smp_processor_id(); struct vmcs_config vmcs_conf; @@ -2801,7 +2799,7 @@ fault: return -EFAULT; } -static int vmx_hardware_enable(void) +int vmx_hardware_enable(void) { int cpu = raw_smp_processor_id(); u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); @@ -2841,7 +2839,7 @@ static void vmclear_local_loaded_vmcss(void) __loaded_vmcs_clear(v); } -static void vmx_hardware_disable(void) +void vmx_hardware_disable(void) { vmclear_local_loaded_vmcss(); @@ -3155,7 +3153,7 @@ static void exit_lmode(struct kvm_vcpu *vcpu) #endif -static void vmx_flush_tlb_all(struct kvm_vcpu *vcpu) +void vmx_flush_tlb_all(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -3185,7 +3183,7 @@ static inline int vmx_get_current_vpid(struct kvm_vcpu *vcpu) return to_vmx(vcpu)->vpid; } -static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu) +void vmx_flush_tlb_current(struct kvm_vcpu *vcpu) { struct kvm_mmu *mmu = vcpu->arch.mmu; u64 root_hpa = mmu->root.hpa; @@ -3201,7 +3199,7 @@ static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu) vpid_sync_context(vmx_get_current_vpid(vcpu)); } -static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr) +void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr) { /* * vpid_sync_vcpu_addr() is a nop if vpid==0, see the comment in @@ -3210,7 +3208,7 @@ static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr) vpid_sync_vcpu_addr(vmx_get_current_vpid(vcpu), addr); } -static void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu) +void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu) { /* * vpid_sync_context() is a nop if vpid==0, e.g. if enable_vpid==0 or a @@ -3255,7 +3253,7 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu) #define CR3_EXITING_BITS (CPU_BASED_CR3_LOAD_EXITING | \ CPU_BASED_CR3_STORE_EXITING) -static bool vmx_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) +bool vmx_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { if (is_guest_mode(vcpu)) return nested_guest_cr0_valid(vcpu, cr0); @@ -3376,8 +3374,7 @@ u64 construct_eptp(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level) return eptp; } -static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, - int root_level) +void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level) { struct kvm *kvm = vcpu->kvm; bool update_guest_cr3 = true; @@ -3406,8 +3403,7 @@ static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, vmcs_writel(GUEST_CR3, guest_cr3); } - -static bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { /* * We operate under the default treatment of SMM, so VMX cannot be @@ -3523,7 +3519,7 @@ void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) var->g = (ar >> 15) & 1; } -static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) +u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) { struct kvm_segment s; @@ -3600,14 +3596,14 @@ void __vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var)); } -static void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) +void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) { __vmx_set_segment(vcpu, var, seg); to_vmx(vcpu)->emulation_required = vmx_emulation_required(vcpu); } -static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) +void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) { u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS); @@ -3615,25 +3611,25 @@ static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) *l = (ar >> 13) & 1; } -static void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) +void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) { dt->size = vmcs_read32(GUEST_IDTR_LIMIT); dt->address = vmcs_readl(GUEST_IDTR_BASE); } -static void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) +void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) { vmcs_write32(GUEST_IDTR_LIMIT, dt->size); vmcs_writel(GUEST_IDTR_BASE, dt->address); } -static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) +void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) { dt->size = vmcs_read32(GUEST_GDTR_LIMIT); dt->address = vmcs_readl(GUEST_GDTR_BASE); } -static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) +void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) { vmcs_write32(GUEST_GDTR_LIMIT, dt->size); vmcs_writel(GUEST_GDTR_BASE, dt->address); @@ -4101,7 +4097,7 @@ void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu) } } -static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) +bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); void *vapic_page; @@ -4121,7 +4117,7 @@ static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) return ((rvi & 0xf0) > (vppr & 0xf0)); } -static void vmx_msr_filter_changed(struct kvm_vcpu *vcpu) +void vmx_msr_filter_changed(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); u32 i; @@ -4265,8 +4261,8 @@ static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) return 0; } -static void vmx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode, - int trig_mode, int vector) +void vmx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode, + int trig_mode, int vector) { struct kvm_vcpu *vcpu = apic->vcpu; @@ -4428,7 +4424,7 @@ static u32 vmx_vmexit_ctrl(void) ~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER); } -static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) +void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -4692,7 +4688,7 @@ static int vmx_alloc_ipiv_pid_table(struct kvm *kvm) return 0; } -static int vmx_vcpu_precreate(struct kvm *kvm) +int vmx_vcpu_precreate(struct kvm *kvm) { return vmx_alloc_ipiv_pid_table(kvm); } @@ -4847,7 +4843,7 @@ static void __vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmx->pi_desc.sn = 1; } -static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) +void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -4906,12 +4902,12 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmx_update_fb_clear_dis(vcpu, vmx); } -static void vmx_enable_irq_window(struct kvm_vcpu *vcpu) +void vmx_enable_irq_window(struct kvm_vcpu *vcpu) { exec_controls_setbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING); } -static void vmx_enable_nmi_window(struct kvm_vcpu *vcpu) +void vmx_enable_nmi_window(struct kvm_vcpu *vcpu) { if (!enable_vnmi || vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { @@ -4922,7 +4918,7 @@ static void vmx_enable_nmi_window(struct kvm_vcpu *vcpu) exec_controls_setbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING); } -static void vmx_inject_irq(struct kvm_vcpu *vcpu, bool reinjected) +void vmx_inject_irq(struct kvm_vcpu *vcpu, bool reinjected) { struct vcpu_vmx *vmx = to_vmx(vcpu); uint32_t intr; @@ -4950,7 +4946,7 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, bool reinjected) vmx_clear_hlt(vcpu); } -static void vmx_inject_nmi(struct kvm_vcpu *vcpu) +void vmx_inject_nmi(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -5028,7 +5024,7 @@ bool vmx_nmi_blocked(struct kvm_vcpu *vcpu) GUEST_INTR_STATE_NMI)); } -static int vmx_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection) +int vmx_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection) { if (to_vmx(vcpu)->nested.nested_run_pending) return -EBUSY; @@ -5050,7 +5046,7 @@ bool vmx_interrupt_blocked(struct kvm_vcpu *vcpu) (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)); } -static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection) +int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection) { if (to_vmx(vcpu)->nested.nested_run_pending) return -EBUSY; @@ -5065,7 +5061,7 @@ static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection) return !vmx_interrupt_blocked(vcpu); } -static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) +int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) { void __user *ret; @@ -5085,7 +5081,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) return init_rmode_tss(kvm, ret); } -static int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr) +int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr) { to_kvm_vmx(kvm)->ept_identity_map_addr = ident_addr; return 0; @@ -5371,8 +5367,7 @@ static int handle_io(struct kvm_vcpu *vcpu) return kvm_fast_pio(vcpu, size, port, in); } -static void -vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) +void vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) { /* * Patch in the VMCALL instruction: @@ -5578,7 +5573,7 @@ out: return kvm_complete_insn_gp(vcpu, err); } -static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) +void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) { get_debugreg(vcpu->arch.db[0], 0); get_debugreg(vcpu->arch.db[1], 1); @@ -5597,7 +5592,7 @@ static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) set_debugreg(DR6_RESERVED, 6); } -static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) +void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) { vmcs_writel(GUEST_DR7, val); } @@ -5868,7 +5863,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) return 1; } -static int vmx_vcpu_pre_run(struct kvm_vcpu *vcpu) +int vmx_vcpu_pre_run(struct kvm_vcpu *vcpu) { if (vmx_emulation_required_with_pending_exception(vcpu)) { kvm_prepare_emulation_failure_exit(vcpu); @@ -6156,9 +6151,8 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { static const int kvm_vmx_max_exit_handlers = ARRAY_SIZE(kvm_vmx_exit_handlers); -static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason, - u64 *info1, u64 *info2, - u32 *intr_info, u32 *error_code) +void vmx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason, + u64 *info1, u64 *info2, u32 *intr_info, u32 *error_code) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -6601,7 +6595,7 @@ unexpected_vmexit: return 0; } -static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) +int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) { int ret = __vmx_handle_exit(vcpu, exit_fastpath); @@ -6689,7 +6683,7 @@ static noinstr void vmx_l1d_flush(struct kvm_vcpu *vcpu) : "eax", "ebx", "ecx", "edx"); } -static void vmx_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) +void vmx_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); int tpr_threshold; @@ -6759,7 +6753,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) vmx_update_msr_bitmap_x2apic(vcpu); } -static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu) +void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu) { const gfn_t gfn = APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT; struct kvm *kvm = vcpu->kvm; @@ -6828,7 +6822,7 @@ out: kvm_release_pfn_clean(pfn); } -static void vmx_hwapic_isr_update(int max_isr) +void vmx_hwapic_isr_update(int max_isr) { u16 status; u8 old; @@ -6862,7 +6856,7 @@ static void vmx_set_rvi(int vector) } } -static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) +void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) { /* * When running L2, updating RVI is only relevant when @@ -6876,7 +6870,7 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) vmx_set_rvi(max_irr); } -static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) +int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); int max_irr; @@ -6922,7 +6916,7 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) return max_irr; } -static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) +void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) { if (!kvm_vcpu_apicv_active(vcpu)) return; @@ -6933,7 +6927,7 @@ static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]); } -static void vmx_apicv_pre_state_restore(struct kvm_vcpu *vcpu) +void vmx_apicv_pre_state_restore(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -6964,24 +6958,22 @@ static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu) rdmsrl(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err); } -static void handle_exception_irqoff(struct vcpu_vmx *vmx) +static void handle_exception_irqoff(struct kvm_vcpu *vcpu, u32 intr_info) { - u32 intr_info = vmx_get_intr_info(&vmx->vcpu); - /* if exit due to PF check for async PF */ if (is_page_fault(intr_info)) - vmx->vcpu.arch.apf.host_apf_flags = kvm_read_and_reset_apf_flags(); + vcpu->arch.apf.host_apf_flags = kvm_read_and_reset_apf_flags(); /* if exit due to NM, handle before interrupts are enabled */ else if (is_nm_fault(intr_info)) - handle_nm_fault_irqoff(&vmx->vcpu); + handle_nm_fault_irqoff(vcpu); /* Handle machine checks before interrupts are enabled */ else if (is_machine_check(intr_info)) kvm_machine_check(); } -static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) +static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu, + u32 intr_info) { - u32 intr_info = vmx_get_intr_info(vcpu); unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK; if (KVM_BUG(!is_external_intr(intr_info), vcpu->kvm, @@ -6998,7 +6990,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) vcpu->arch.at_instruction_boundary = true; } -static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) +void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -7006,16 +6998,16 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) return; if (vmx->exit_reason.basic == EXIT_REASON_EXTERNAL_INTERRUPT) - handle_external_interrupt_irqoff(vcpu); + handle_external_interrupt_irqoff(vcpu, vmx_get_intr_info(vcpu)); else if (vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI) - handle_exception_irqoff(vmx); + handle_exception_irqoff(vcpu, vmx_get_intr_info(vcpu)); } /* * The kvm parameter can be NULL (module initialization, or invocation before * VM creation). Be sure to check the kvm parameter before using it. */ -static bool vmx_has_emulated_msr(struct kvm *kvm, u32 index) +bool vmx_has_emulated_msr(struct kvm *kvm, u32 index) { switch (index) { case MSR_IA32_SMBASE: @@ -7138,7 +7130,7 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) IDT_VECTORING_ERROR_CODE); } -static void vmx_cancel_injection(struct kvm_vcpu *vcpu) +void vmx_cancel_injection(struct kvm_vcpu *vcpu) { __vmx_complete_interrupts(vcpu, vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), @@ -7308,7 +7300,7 @@ out: guest_state_exit_irqoff(); } -static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit) +fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit) { struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long cr3, cr4; @@ -7463,7 +7455,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit) return vmx_exit_handlers_fastpath(vcpu, force_immediate_exit); } -static void vmx_vcpu_free(struct kvm_vcpu *vcpu) +void vmx_vcpu_free(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -7474,7 +7466,7 @@ static void vmx_vcpu_free(struct kvm_vcpu *vcpu) free_loaded_vmcs(vmx->loaded_vmcs); } -static int vmx_vcpu_create(struct kvm_vcpu *vcpu) +int vmx_vcpu_create(struct kvm_vcpu *vcpu) { struct vmx_uret_msr *tsx_ctrl; struct vcpu_vmx *vmx; @@ -7583,7 +7575,7 @@ free_vpid: #define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" #define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" -static int vmx_vm_init(struct kvm *kvm) +int vmx_vm_init(struct kvm *kvm) { if (!ple_gap) kvm->arch.pause_in_guest = true; @@ -7614,7 +7606,7 @@ static int vmx_vm_init(struct kvm *kvm) return 0; } -static u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) +u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) { /* We wanted to honor guest CD/MTRR/PAT, but doing so could result in * memory aliases with conflicting memory types and sometimes MCEs. @@ -7786,7 +7778,7 @@ static void update_intel_pt_cfg(struct kvm_vcpu *vcpu) vmx->pt_desc.ctl_bitmask &= ~(0xfULL << (32 + i * 4)); } -static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) +void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -8001,10 +7993,10 @@ static int vmx_check_intercept_io(struct kvm_vcpu *vcpu, return intercept ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; } -static int vmx_check_intercept(struct kvm_vcpu *vcpu, - struct x86_instruction_info *info, - enum x86_intercept_stage stage, - struct x86_exception *exception) +int vmx_check_intercept(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info, + enum x86_intercept_stage stage, + struct x86_exception *exception) { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); @@ -8084,8 +8076,8 @@ static inline int u64_shl_div_u64(u64 a, unsigned int shift, return 0; } -static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, - bool *expired) +int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, + bool *expired) { struct vcpu_vmx *vmx; u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles; @@ -8124,13 +8116,13 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, return 0; } -static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu) +void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu) { to_vmx(vcpu)->hv_deadline_tsc = -1; } #endif -static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu) +void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu) { if (!kvm_pause_in_guest(vcpu->kvm)) shrink_ple_window(vcpu); @@ -8159,7 +8151,7 @@ void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu) secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_ENABLE_PML); } -static void vmx_setup_mce(struct kvm_vcpu *vcpu) +void vmx_setup_mce(struct kvm_vcpu *vcpu) { if (vcpu->arch.mcg_cap & MCG_LMCE_P) to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= @@ -8170,7 +8162,7 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu) } #ifdef CONFIG_KVM_SMM -static int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection) +int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection) { /* we need a nested vmexit to enter SMM, postpone if run is pending */ if (to_vmx(vcpu)->nested.nested_run_pending) @@ -8178,7 +8170,7 @@ static int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection) return !is_smm(vcpu); } -static int vmx_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram) +int vmx_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -8199,7 +8191,7 @@ static int vmx_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram) return 0; } -static int vmx_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram) +int vmx_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram) { struct vcpu_vmx *vmx = to_vmx(vcpu); int ret; @@ -8220,18 +8212,18 @@ static int vmx_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram) return 0; } -static void vmx_enable_smi_window(struct kvm_vcpu *vcpu) +void vmx_enable_smi_window(struct kvm_vcpu *vcpu) { /* RSM will cause a vmexit anyway. */ } #endif -static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu) +bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu) { return to_vmx(vcpu)->nested.vmxon && !is_guest_mode(vcpu); } -static void vmx_migrate_timers(struct kvm_vcpu *vcpu) +void vmx_migrate_timers(struct kvm_vcpu *vcpu) { if (is_guest_mode(vcpu)) { struct hrtimer *timer = &to_vmx(vcpu)->nested.preemption_timer; @@ -8241,7 +8233,7 @@ static void vmx_migrate_timers(struct kvm_vcpu *vcpu) } } -static void vmx_hardware_unsetup(void) +void vmx_hardware_unsetup(void) { kvm_set_posted_intr_wakeup_handler(NULL); @@ -8251,18 +8243,7 @@ static void vmx_hardware_unsetup(void) free_kvm_area(); } -#define VMX_REQUIRED_APICV_INHIBITS \ -( \ - BIT(APICV_INHIBIT_REASON_DISABLE)| \ - BIT(APICV_INHIBIT_REASON_ABSENT) | \ - BIT(APICV_INHIBIT_REASON_HYPERV) | \ - BIT(APICV_INHIBIT_REASON_BLOCKIRQ) | \ - BIT(APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED) | \ - BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) | \ - BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED) \ -) - -static void vmx_vm_destroy(struct kvm *kvm) +void vmx_vm_destroy(struct kvm *kvm) { struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm); @@ -8313,148 +8294,6 @@ gva_t vmx_get_untagged_addr(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags return (sign_extend64(gva, lam_bit) & ~BIT_ULL(63)) | (gva & BIT_ULL(63)); } -static struct kvm_x86_ops vmx_x86_ops __initdata = { - .name = KBUILD_MODNAME, - - .check_processor_compatibility = vmx_check_processor_compat, - - .hardware_unsetup = vmx_hardware_unsetup, - - .hardware_enable = vmx_hardware_enable, - .hardware_disable = vmx_hardware_disable, - .has_emulated_msr = vmx_has_emulated_msr, - - .vm_size = sizeof(struct kvm_vmx), - .vm_init = vmx_vm_init, - .vm_destroy = vmx_vm_destroy, - - .vcpu_precreate = vmx_vcpu_precreate, - .vcpu_create = vmx_vcpu_create, - .vcpu_free = vmx_vcpu_free, - .vcpu_reset = vmx_vcpu_reset, - - .prepare_switch_to_guest = vmx_prepare_switch_to_guest, - .vcpu_load = vmx_vcpu_load, - .vcpu_put = vmx_vcpu_put, - - .update_exception_bitmap = vmx_update_exception_bitmap, - .get_msr_feature = vmx_get_msr_feature, - .get_msr = vmx_get_msr, - .set_msr = vmx_set_msr, - .get_segment_base = vmx_get_segment_base, - .get_segment = vmx_get_segment, - .set_segment = vmx_set_segment, - .get_cpl = vmx_get_cpl, - .get_cs_db_l_bits = vmx_get_cs_db_l_bits, - .is_valid_cr0 = vmx_is_valid_cr0, - .set_cr0 = vmx_set_cr0, - .is_valid_cr4 = vmx_is_valid_cr4, - .set_cr4 = vmx_set_cr4, - .set_efer = vmx_set_efer, - .get_idt = vmx_get_idt, - .set_idt = vmx_set_idt, - .get_gdt = vmx_get_gdt, - .set_gdt = vmx_set_gdt, - .set_dr7 = vmx_set_dr7, - .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs, - .cache_reg = vmx_cache_reg, - .get_rflags = vmx_get_rflags, - .set_rflags = vmx_set_rflags, - .get_if_flag = vmx_get_if_flag, - - .flush_tlb_all = vmx_flush_tlb_all, - .flush_tlb_current = vmx_flush_tlb_current, - .flush_tlb_gva = vmx_flush_tlb_gva, - .flush_tlb_guest = vmx_flush_tlb_guest, - - .vcpu_pre_run = vmx_vcpu_pre_run, - .vcpu_run = vmx_vcpu_run, - .handle_exit = vmx_handle_exit, - .skip_emulated_instruction = vmx_skip_emulated_instruction, - .update_emulated_instruction = vmx_update_emulated_instruction, - .set_interrupt_shadow = vmx_set_interrupt_shadow, - .get_interrupt_shadow = vmx_get_interrupt_shadow, - .patch_hypercall = vmx_patch_hypercall, - .inject_irq = vmx_inject_irq, - .inject_nmi = vmx_inject_nmi, - .inject_exception = vmx_inject_exception, - .cancel_injection = vmx_cancel_injection, - .interrupt_allowed = vmx_interrupt_allowed, - .nmi_allowed = vmx_nmi_allowed, - .get_nmi_mask = vmx_get_nmi_mask, - .set_nmi_mask = vmx_set_nmi_mask, - .enable_nmi_window = vmx_enable_nmi_window, - .enable_irq_window = vmx_enable_irq_window, - .update_cr8_intercept = vmx_update_cr8_intercept, - .set_virtual_apic_mode = vmx_set_virtual_apic_mode, - .set_apic_access_page_addr = vmx_set_apic_access_page_addr, - .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl, - .load_eoi_exitmap = vmx_load_eoi_exitmap, - .apicv_pre_state_restore = vmx_apicv_pre_state_restore, - .required_apicv_inhibits = VMX_REQUIRED_APICV_INHIBITS, - .hwapic_irr_update = vmx_hwapic_irr_update, - .hwapic_isr_update = vmx_hwapic_isr_update, - .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt, - .sync_pir_to_irr = vmx_sync_pir_to_irr, - .deliver_interrupt = vmx_deliver_interrupt, - .dy_apicv_has_pending_interrupt = pi_has_pending_interrupt, - - .set_tss_addr = vmx_set_tss_addr, - .set_identity_map_addr = vmx_set_identity_map_addr, - .get_mt_mask = vmx_get_mt_mask, - - .get_exit_info = vmx_get_exit_info, - - .vcpu_after_set_cpuid = vmx_vcpu_after_set_cpuid, - - .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, - - .get_l2_tsc_offset = vmx_get_l2_tsc_offset, - .get_l2_tsc_multiplier = vmx_get_l2_tsc_multiplier, - .write_tsc_offset = vmx_write_tsc_offset, - .write_tsc_multiplier = vmx_write_tsc_multiplier, - - .load_mmu_pgd = vmx_load_mmu_pgd, - - .check_intercept = vmx_check_intercept, - .handle_exit_irqoff = vmx_handle_exit_irqoff, - - .sched_in = vmx_sched_in, - - .cpu_dirty_log_size = PML_ENTITY_NUM, - .update_cpu_dirty_logging = vmx_update_cpu_dirty_logging, - - .nested_ops = &vmx_nested_ops, - - .pi_update_irte = vmx_pi_update_irte, - .pi_start_assignment = vmx_pi_start_assignment, - -#ifdef CONFIG_X86_64 - .set_hv_timer = vmx_set_hv_timer, - .cancel_hv_timer = vmx_cancel_hv_timer, -#endif - - .setup_mce = vmx_setup_mce, - -#ifdef CONFIG_KVM_SMM - .smi_allowed = vmx_smi_allowed, - .enter_smm = vmx_enter_smm, - .leave_smm = vmx_leave_smm, - .enable_smi_window = vmx_enable_smi_window, -#endif - - .check_emulate_instruction = vmx_check_emulate_instruction, - .apic_init_signal_blocked = vmx_apic_init_signal_blocked, - .migrate_timers = vmx_migrate_timers, - - .msr_filter_changed = vmx_msr_filter_changed, - .complete_emulated_msr = kvm_complete_insn_gp, - - .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector, - - .get_untagged_addr = vmx_get_untagged_addr, -}; - static unsigned int vmx_handle_intel_pt_intr(void) { struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); @@ -8520,9 +8359,7 @@ static void __init vmx_setup_me_spte_mask(void) kvm_mmu_set_me_spte_mask(0, me_mask); } -static struct kvm_x86_init_ops vmx_init_ops __initdata; - -static __init int hardware_setup(void) +__init int vmx_hardware_setup(void) { unsigned long host_bndcfgs; struct desc_ptr dt; @@ -8591,16 +8428,16 @@ static __init int hardware_setup(void) * using the APIC_ACCESS_ADDR VMCS field. */ if (!flexpriority_enabled) - vmx_x86_ops.set_apic_access_page_addr = NULL; + vt_x86_ops.set_apic_access_page_addr = NULL; if (!cpu_has_vmx_tpr_shadow()) - vmx_x86_ops.update_cr8_intercept = NULL; + vt_x86_ops.update_cr8_intercept = NULL; #if IS_ENABLED(CONFIG_HYPERV) if (ms_hyperv.nested_features & HV_X64_NESTED_GUEST_MAPPING_FLUSH && enable_ept) { - vmx_x86_ops.flush_remote_tlbs = hv_flush_remote_tlbs; - vmx_x86_ops.flush_remote_tlbs_range = hv_flush_remote_tlbs_range; + vt_x86_ops.flush_remote_tlbs = hv_flush_remote_tlbs; + vt_x86_ops.flush_remote_tlbs_range = hv_flush_remote_tlbs_range; } #endif @@ -8615,7 +8452,7 @@ static __init int hardware_setup(void) if (!cpu_has_vmx_apicv()) enable_apicv = 0; if (!enable_apicv) - vmx_x86_ops.sync_pir_to_irr = NULL; + vt_x86_ops.sync_pir_to_irr = NULL; if (!enable_apicv || !cpu_has_vmx_ipiv()) enable_ipiv = false; @@ -8651,7 +8488,7 @@ static __init int hardware_setup(void) enable_pml = 0; if (!enable_pml) - vmx_x86_ops.cpu_dirty_log_size = 0; + vt_x86_ops.cpu_dirty_log_size = 0; if (!cpu_has_vmx_preemption_timer()) enable_preemption_timer = false; @@ -8676,8 +8513,8 @@ static __init int hardware_setup(void) } if (!enable_preemption_timer) { - vmx_x86_ops.set_hv_timer = NULL; - vmx_x86_ops.cancel_hv_timer = NULL; + vt_x86_ops.set_hv_timer = NULL; + vt_x86_ops.cancel_hv_timer = NULL; } kvm_caps.supported_mce_cap |= MCG_LMCE_P; @@ -8688,9 +8525,9 @@ static __init int hardware_setup(void) if (!enable_ept || !enable_pmu || !cpu_has_vmx_intel_pt()) pt_mode = PT_MODE_SYSTEM; if (pt_mode == PT_MODE_HOST_GUEST) - vmx_init_ops.handle_intel_pt_intr = vmx_handle_intel_pt_intr; + vt_init_ops.handle_intel_pt_intr = vmx_handle_intel_pt_intr; else - vmx_init_ops.handle_intel_pt_intr = NULL; + vt_init_ops.handle_intel_pt_intr = NULL; setup_default_sgx_lepubkeyhash(); @@ -8713,14 +8550,6 @@ static __init int hardware_setup(void) return r; } -static struct kvm_x86_init_ops vmx_init_ops __initdata = { - .hardware_setup = hardware_setup, - .handle_intel_pt_intr = NULL, - - .runtime_ops = &vmx_x86_ops, - .pmu_ops = &intel_pmu_ops, -}; - static void vmx_cleanup_l1d_flush(void) { if (vmx_l1d_flush_pages) { @@ -8762,7 +8591,7 @@ static int __init vmx_init(void) */ hv_init_evmcs(); - r = kvm_x86_vendor_init(&vmx_init_ops); + r = kvm_x86_vendor_init(&vt_init_ops); if (r) return r; diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h new file mode 100644 index 00000000000000..502704596c8324 --- /dev/null +++ b/arch/x86/kvm/vmx/x86_ops.h @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __KVM_X86_VMX_X86_OPS_H +#define __KVM_X86_VMX_X86_OPS_H + +#include <linux/kvm_host.h> + +#include "x86.h" + +__init int vmx_hardware_setup(void); + +extern struct kvm_x86_ops vt_x86_ops __initdata; +extern struct kvm_x86_init_ops vt_init_ops __initdata; + +void vmx_hardware_unsetup(void); +int vmx_check_processor_compat(void); +int vmx_hardware_enable(void); +void vmx_hardware_disable(void); +int vmx_vm_init(struct kvm *kvm); +void vmx_vm_destroy(struct kvm *kvm); +int vmx_vcpu_precreate(struct kvm *kvm); +int vmx_vcpu_create(struct kvm_vcpu *vcpu); +int vmx_vcpu_pre_run(struct kvm_vcpu *vcpu); +fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit); +void vmx_vcpu_free(struct kvm_vcpu *vcpu); +void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); +void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu); +void vmx_vcpu_put(struct kvm_vcpu *vcpu); +int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath); +void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu); +int vmx_skip_emulated_instruction(struct kvm_vcpu *vcpu); +void vmx_update_emulated_instruction(struct kvm_vcpu *vcpu); +int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info); +#ifdef CONFIG_KVM_SMM +int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection); +int vmx_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram); +int vmx_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram); +void vmx_enable_smi_window(struct kvm_vcpu *vcpu); +#endif +int vmx_check_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, + void *insn, int insn_len); +int vmx_check_intercept(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info, + enum x86_intercept_stage stage, + struct x86_exception *exception); +bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu); +void vmx_migrate_timers(struct kvm_vcpu *vcpu); +void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); +void vmx_apicv_pre_state_restore(struct kvm_vcpu *vcpu); +bool vmx_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason); +void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr); +void vmx_hwapic_isr_update(int max_isr); +bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu); +int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu); +void vmx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode, + int trig_mode, int vector); +void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu); +bool vmx_has_emulated_msr(struct kvm *kvm, u32 index); +void vmx_msr_filter_changed(struct kvm_vcpu *vcpu); +void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu); +void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu); +int vmx_get_msr_feature(struct kvm_msr_entry *msr); +int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info); +u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg); +void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); +void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); +int vmx_get_cpl(struct kvm_vcpu *vcpu); +void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); +bool vmx_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); +void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); +void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level); +void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); +bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); +int vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer); +void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt); +void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt); +void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt); +void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt); +void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val); +void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu); +void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg); +unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu); +void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); +bool vmx_get_if_flag(struct kvm_vcpu *vcpu); +void vmx_flush_tlb_all(struct kvm_vcpu *vcpu); +void vmx_flush_tlb_current(struct kvm_vcpu *vcpu); +void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr); +void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu); +void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask); +u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu); +void vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall); +void vmx_inject_irq(struct kvm_vcpu *vcpu, bool reinjected); +void vmx_inject_nmi(struct kvm_vcpu *vcpu); +void vmx_inject_exception(struct kvm_vcpu *vcpu); +void vmx_cancel_injection(struct kvm_vcpu *vcpu); +int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection); +int vmx_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection); +bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu); +void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); +void vmx_enable_nmi_window(struct kvm_vcpu *vcpu); +void vmx_enable_irq_window(struct kvm_vcpu *vcpu); +void vmx_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr); +void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu); +void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu); +void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); +int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); +int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr); +u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); +void vmx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason, + u64 *info1, u64 *info2, u32 *intr_info, u32 *error_code); +u64 vmx_get_l2_tsc_offset(struct kvm_vcpu *vcpu); +u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu); +void vmx_write_tsc_offset(struct kvm_vcpu *vcpu); +void vmx_write_tsc_multiplier(struct kvm_vcpu *vcpu); +void vmx_request_immediate_exit(struct kvm_vcpu *vcpu); +void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu); +void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu); +#ifdef CONFIG_X86_64 +int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, + bool *expired); +void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu); +#endif +void vmx_setup_mce(struct kvm_vcpu *vcpu); + +#endif /* __KVM_X86_VMX_X86_OPS_H */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 91478b769af089..2369956638deea 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -94,6 +94,7 @@ struct kvm_caps kvm_caps __read_mostly = { .supported_mce_cap = MCG_CTL_P | MCG_SER_P, + .supported_vm_types = BIT(KVM_X86_DEFAULT_VM), }; EXPORT_SYMBOL_GPL(kvm_caps); @@ -4629,9 +4630,7 @@ static int kvm_ioctl_get_supported_hv_cpuid(struct kvm_vcpu *vcpu, static bool kvm_is_vm_type_supported(unsigned long type) { - return type == KVM_X86_DEFAULT_VM || - (type == KVM_X86_SW_PROTECTED_VM && - IS_ENABLED(CONFIG_KVM_SW_PROTECTED_VM) && tdp_mmu_enabled); + return type < 32 && (kvm_caps.supported_vm_types & BIT(type)); } int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) @@ -4832,9 +4831,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = kvm_caps.has_notify_vmexit; break; case KVM_CAP_VM_TYPES: - r = BIT(KVM_X86_DEFAULT_VM); - if (kvm_is_vm_type_supported(KVM_X86_SW_PROTECTED_VM)) - r |= BIT(KVM_X86_SW_PROTECTED_VM); + r = kvm_caps.supported_vm_types; break; default: break; @@ -4842,46 +4839,44 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) return r; } -static inline void __user *kvm_get_attr_addr(struct kvm_device_attr *attr) +static int __kvm_x86_dev_get_attr(struct kvm_device_attr *attr, u64 *val) { - void __user *uaddr = (void __user*)(unsigned long)attr->addr; - - if ((u64)(unsigned long)uaddr != attr->addr) - return ERR_PTR_USR(-EFAULT); - return uaddr; -} - -static int kvm_x86_dev_get_attr(struct kvm_device_attr *attr) -{ - u64 __user *uaddr = kvm_get_attr_addr(attr); - - if (attr->group) + if (attr->group) { + if (kvm_x86_ops.dev_get_attr) + return static_call(kvm_x86_dev_get_attr)(attr->group, attr->attr, val); return -ENXIO; - - if (IS_ERR(uaddr)) - return PTR_ERR(uaddr); + } switch (attr->attr) { case KVM_X86_XCOMP_GUEST_SUPP: - if (put_user(kvm_caps.supported_xcr0, uaddr)) - return -EFAULT; + *val = kvm_caps.supported_xcr0; return 0; default: return -ENXIO; } } +static int kvm_x86_dev_get_attr(struct kvm_device_attr *attr) +{ + u64 __user *uaddr = u64_to_user_ptr(attr->addr); + int r; + u64 val; + + r = __kvm_x86_dev_get_attr(attr, &val); + if (r < 0) + return r; + + if (put_user(val, uaddr)) + return -EFAULT; + + return 0; +} + static int kvm_x86_dev_has_attr(struct kvm_device_attr *attr) { - if (attr->group) - return -ENXIO; + u64 val; - switch (attr->attr) { - case KVM_X86_XCOMP_GUEST_SUPP: - return 0; - default: - return -ENXIO; - } + return __kvm_x86_dev_get_attr(attr, &val); } long kvm_arch_dev_ioctl(struct file *filp, @@ -5557,11 +5552,15 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, return 0; } -static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, - struct kvm_debugregs *dbgregs) +static int kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, + struct kvm_debugregs *dbgregs) { unsigned int i; + if (vcpu->kvm->arch.has_protected_state && + vcpu->arch.guest_state_protected) + return -EINVAL; + memset(dbgregs, 0, sizeof(*dbgregs)); BUILD_BUG_ON(ARRAY_SIZE(vcpu->arch.db) != ARRAY_SIZE(dbgregs->db)); @@ -5570,6 +5569,7 @@ static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, dbgregs->dr6 = vcpu->arch.dr6; dbgregs->dr7 = vcpu->arch.dr7; + return 0; } static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, @@ -5577,6 +5577,10 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, { unsigned int i; + if (vcpu->kvm->arch.has_protected_state && + vcpu->arch.guest_state_protected) + return -EINVAL; + if (dbgregs->flags) return -EINVAL; @@ -5597,8 +5601,8 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, } -static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu, - u8 *state, unsigned int size) +static int kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu, + u8 *state, unsigned int size) { /* * Only copy state for features that are enabled for the guest. The @@ -5616,24 +5620,25 @@ static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu, XFEATURE_MASK_FPSSE; if (fpstate_is_confidential(&vcpu->arch.guest_fpu)) - return; + return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0; fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size, supported_xcr0, vcpu->arch.pkru); + return 0; } -static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, - struct kvm_xsave *guest_xsave) +static int kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, + struct kvm_xsave *guest_xsave) { - kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region, - sizeof(guest_xsave->region)); + return kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region, + sizeof(guest_xsave->region)); } static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, struct kvm_xsave *guest_xsave) { if (fpstate_is_confidential(&vcpu->arch.guest_fpu)) - return 0; + return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0; return fpu_copy_uabi_to_guest_fpstate(&vcpu->arch.guest_fpu, guest_xsave->region, @@ -5641,18 +5646,23 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, &vcpu->arch.pkru); } -static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu, - struct kvm_xcrs *guest_xcrs) +static int kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu, + struct kvm_xcrs *guest_xcrs) { + if (vcpu->kvm->arch.has_protected_state && + vcpu->arch.guest_state_protected) + return -EINVAL; + if (!boot_cpu_has(X86_FEATURE_XSAVE)) { guest_xcrs->nr_xcrs = 0; - return; + return 0; } guest_xcrs->nr_xcrs = 1; guest_xcrs->flags = 0; guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK; guest_xcrs->xcrs[0].value = vcpu->arch.xcr0; + return 0; } static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu, @@ -5660,6 +5670,10 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu, { int i, r = 0; + if (vcpu->kvm->arch.has_protected_state && + vcpu->arch.guest_state_protected) + return -EINVAL; + if (!boot_cpu_has(X86_FEATURE_XSAVE)) return -EINVAL; @@ -5712,12 +5726,9 @@ static int kvm_arch_tsc_has_attr(struct kvm_vcpu *vcpu, static int kvm_arch_tsc_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { - u64 __user *uaddr = kvm_get_attr_addr(attr); + u64 __user *uaddr = u64_to_user_ptr(attr->addr); int r; - if (IS_ERR(uaddr)) - return PTR_ERR(uaddr); - switch (attr->attr) { case KVM_VCPU_TSC_OFFSET: r = -EFAULT; @@ -5735,13 +5746,10 @@ static int kvm_arch_tsc_get_attr(struct kvm_vcpu *vcpu, static int kvm_arch_tsc_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { - u64 __user *uaddr = kvm_get_attr_addr(attr); + u64 __user *uaddr = u64_to_user_ptr(attr->addr); struct kvm *kvm = vcpu->kvm; int r; - if (IS_ERR(uaddr)) - return PTR_ERR(uaddr); - switch (attr->attr) { case KVM_VCPU_TSC_OFFSET: { u64 offset, tsc, ns; @@ -6048,7 +6056,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, case KVM_GET_DEBUGREGS: { struct kvm_debugregs dbgregs; - kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs); + r = kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs); + if (r < 0) + break; r = -EFAULT; if (copy_to_user(argp, &dbgregs, @@ -6078,7 +6088,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (!u.xsave) break; - kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave); + r = kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave); + if (r < 0) + break; r = -EFAULT; if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave))) @@ -6107,7 +6119,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (!u.xsave) break; - kvm_vcpu_ioctl_x86_get_xsave2(vcpu, u.buffer, size); + r = kvm_vcpu_ioctl_x86_get_xsave2(vcpu, u.buffer, size); + if (r < 0) + break; r = -EFAULT; if (copy_to_user(argp, u.xsave, size)) @@ -6123,7 +6137,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (!u.xcrs) break; - kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs); + r = kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs); + if (r < 0) + break; r = -EFAULT; if (copy_to_user(argp, u.xcrs, @@ -6267,6 +6283,11 @@ long kvm_arch_vcpu_ioctl(struct file *filp, } #endif case KVM_GET_SREGS2: { + r = -EINVAL; + if (vcpu->kvm->arch.has_protected_state && + vcpu->arch.guest_state_protected) + goto out; + u.sregs2 = kzalloc(sizeof(struct kvm_sregs2), GFP_KERNEL); r = -ENOMEM; if (!u.sregs2) @@ -6279,6 +6300,11 @@ long kvm_arch_vcpu_ioctl(struct file *filp, break; } case KVM_SET_SREGS2: { + r = -EINVAL; + if (vcpu->kvm->arch.has_protected_state && + vcpu->arch.guest_state_protected) + goto out; + u.sregs2 = memdup_user(argp, sizeof(struct kvm_sregs2)); if (IS_ERR(u.sregs2)) { r = PTR_ERR(u.sregs2); @@ -9795,6 +9821,9 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) kvm_register_perf_callbacks(ops->handle_intel_pt_intr); + if (IS_ENABLED(CONFIG_KVM_SW_PROTECTED_VM) && tdp_mmu_enabled) + kvm_caps.supported_vm_types |= BIT(KVM_X86_SW_PROTECTED_VM); + if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES)) kvm_caps.supported_xss = 0; @@ -10051,26 +10080,15 @@ static int complete_hypercall_exit(struct kvm_vcpu *vcpu) return kvm_skip_emulated_instruction(vcpu); } -int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) +unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, + unsigned long a0, unsigned long a1, + unsigned long a2, unsigned long a3, + int op_64_bit, int cpl) { - unsigned long nr, a0, a1, a2, a3, ret; - int op_64_bit; - - if (kvm_xen_hypercall_enabled(vcpu->kvm)) - return kvm_xen_hypercall(vcpu); - - if (kvm_hv_hypercall_enabled(vcpu)) - return kvm_hv_hypercall(vcpu); - - nr = kvm_rax_read(vcpu); - a0 = kvm_rbx_read(vcpu); - a1 = kvm_rcx_read(vcpu); - a2 = kvm_rdx_read(vcpu); - a3 = kvm_rsi_read(vcpu); + unsigned long ret; trace_kvm_hypercall(nr, a0, a1, a2, a3); - op_64_bit = is_64_bit_hypercall(vcpu); if (!op_64_bit) { nr &= 0xFFFFFFFF; a0 &= 0xFFFFFFFF; @@ -10079,7 +10097,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) a3 &= 0xFFFFFFFF; } - if (static_call(kvm_x86_get_cpl)(vcpu) != 0) { + if (cpl) { ret = -KVM_EPERM; goto out; } @@ -10140,18 +10158,49 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) WARN_ON_ONCE(vcpu->run->hypercall.flags & KVM_EXIT_HYPERCALL_MBZ); vcpu->arch.complete_userspace_io = complete_hypercall_exit; + /* stat is incremented on completion. */ return 0; } default: ret = -KVM_ENOSYS; break; } + out: + ++vcpu->stat.hypercalls; + return ret; +} +EXPORT_SYMBOL_GPL(__kvm_emulate_hypercall); + +int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) +{ + unsigned long nr, a0, a1, a2, a3, ret; + int op_64_bit; + int cpl; + + if (kvm_xen_hypercall_enabled(vcpu->kvm)) + return kvm_xen_hypercall(vcpu); + + if (kvm_hv_hypercall_enabled(vcpu)) + return kvm_hv_hypercall(vcpu); + + nr = kvm_rax_read(vcpu); + a0 = kvm_rbx_read(vcpu); + a1 = kvm_rcx_read(vcpu); + a2 = kvm_rdx_read(vcpu); + a3 = kvm_rsi_read(vcpu); + op_64_bit = is_64_bit_hypercall(vcpu); + cpl = static_call(kvm_x86_get_cpl)(vcpu); + + ret = __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl); + if (nr == KVM_HC_MAP_GPA_RANGE && !ret) + /* MAP_GPA tosses the request to the user space. */ + return 0; + if (!op_64_bit) ret = (u32)ret; kvm_rax_write(vcpu, ret); - ++vcpu->stat.hypercalls; return kvm_skip_emulated_instruction(vcpu); } EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); @@ -11486,6 +11535,10 @@ static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { + if (vcpu->kvm->arch.has_protected_state && + vcpu->arch.guest_state_protected) + return -EINVAL; + vcpu_load(vcpu); __get_regs(vcpu, regs); vcpu_put(vcpu); @@ -11527,6 +11580,10 @@ static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { + if (vcpu->kvm->arch.has_protected_state && + vcpu->arch.guest_state_protected) + return -EINVAL; + vcpu_load(vcpu); __set_regs(vcpu, regs); vcpu_put(vcpu); @@ -11599,6 +11656,10 @@ static void __get_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2) int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { + if (vcpu->kvm->arch.has_protected_state && + vcpu->arch.guest_state_protected) + return -EINVAL; + vcpu_load(vcpu); __get_sregs(vcpu, sregs); vcpu_put(vcpu); @@ -11866,6 +11927,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, { int ret; + if (vcpu->kvm->arch.has_protected_state && + vcpu->arch.guest_state_protected) + return -EINVAL; + vcpu_load(vcpu); ret = __set_sregs(vcpu, sregs); vcpu_put(vcpu); @@ -11983,7 +12048,7 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) struct fxregs_state *fxsave; if (fpstate_is_confidential(&vcpu->arch.guest_fpu)) - return 0; + return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0; vcpu_load(vcpu); @@ -12006,7 +12071,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) struct fxregs_state *fxsave; if (fpstate_is_confidential(&vcpu->arch.guest_fpu)) - return 0; + return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0; vcpu_load(vcpu); @@ -12532,6 +12597,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) return -EINVAL; kvm->arch.vm_type = type; + kvm->arch.has_private_mem = + (type == KVM_X86_SW_PROTECTED_VM); ret = kvm_page_track_init(kvm); if (ret) diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index a8b71803777baa..d80a4c6b5a3882 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -24,6 +24,8 @@ struct kvm_caps { bool has_bus_lock_exit; /* notify VM exit supported? */ bool has_notify_vmexit; + /* bit mask of VM types */ + u32 supported_vm_types; u64 supported_mce_cap; u64 supported_xcr0; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 48f31dcd318a08..afbc99264ffa47 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -259,7 +259,6 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); #ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER union kvm_mmu_notifier_arg { - pte_t pte; unsigned long attributes; }; @@ -273,7 +272,6 @@ struct kvm_gfn_range { bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range); -bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range); #endif enum { diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index f349e08a9dfe1a..d39ebb10caeb68 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -123,15 +123,6 @@ struct mmu_notifier_ops { unsigned long address); /* - * change_pte is called in cases that pte mapping to page is changed: - * for example, when ksm remaps pte to point to a new shared page. - */ - void (*change_pte)(struct mmu_notifier *subscription, - struct mm_struct *mm, - unsigned long address, - pte_t pte); - - /* * invalidate_range_start() and invalidate_range_end() must be * paired and are called only when the mmap_lock and/or the * locks protecting the reverse maps are held. If the subsystem @@ -392,8 +383,6 @@ extern int __mmu_notifier_clear_young(struct mm_struct *mm, unsigned long end); extern int __mmu_notifier_test_young(struct mm_struct *mm, unsigned long address); -extern void __mmu_notifier_change_pte(struct mm_struct *mm, - unsigned long address, pte_t pte); extern int __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *r); extern void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *r); extern void __mmu_notifier_arch_invalidate_secondary_tlbs(struct mm_struct *mm, @@ -439,13 +428,6 @@ static inline int mmu_notifier_test_young(struct mm_struct *mm, return 0; } -static inline void mmu_notifier_change_pte(struct mm_struct *mm, - unsigned long address, pte_t pte) -{ - if (mm_has_notifiers(mm)) - __mmu_notifier_change_pte(mm, address, pte); -} - static inline void mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range) { @@ -581,26 +563,6 @@ static inline void mmu_notifier_range_init_owner( __young; \ }) -/* - * set_pte_at_notify() sets the pte _after_ running the notifier. - * This is safe to start by updating the secondary MMUs, because the primary MMU - * pte invalidate must have already happened with a ptep_clear_flush() before - * set_pte_at_notify() has been invoked. Updating the secondary MMUs first is - * required when we change both the protection of the mapping from read-only to - * read-write and the pfn (like during copy on write page faults). Otherwise the - * old page would remain mapped readonly in the secondary MMUs after the new - * page is already writable by some CPU through the primary MMU. - */ -#define set_pte_at_notify(__mm, __address, __ptep, __pte) \ -({ \ - struct mm_struct *___mm = __mm; \ - unsigned long ___address = __address; \ - pte_t ___pte = __pte; \ - \ - mmu_notifier_change_pte(___mm, ___address, ___pte); \ - set_pte_at(___mm, ___address, __ptep, ___pte); \ -}) - #else /* CONFIG_MMU_NOTIFIER */ struct mmu_notifier_range { @@ -650,11 +612,6 @@ static inline int mmu_notifier_test_young(struct mm_struct *mm, return 0; } -static inline void mmu_notifier_change_pte(struct mm_struct *mm, - unsigned long address, pte_t pte) -{ -} - static inline void mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range) { @@ -693,7 +650,6 @@ static inline void mmu_notifier_subscriptions_destroy(struct mm_struct *mm) #define ptep_clear_flush_notify ptep_clear_flush #define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush #define pudp_huge_clear_flush_notify pudp_huge_clear_flush -#define set_pte_at_notify set_pte_at static inline void mmu_notifier_synchronize(void) { diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 011fba6b55522d..74e40d5d4af424 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -456,21 +456,6 @@ TRACE_EVENT(kvm_unmap_hva_range, __entry->start, __entry->end) ); -TRACE_EVENT(kvm_set_spte_hva, - TP_PROTO(unsigned long hva), - TP_ARGS(hva), - - TP_STRUCT__entry( - __field( unsigned long, hva ) - ), - - TP_fast_assign( - __entry->hva = hva; - ), - - TP_printk("mmu notifier set pte hva: %#016lx", __entry->hva) -); - TRACE_EVENT(kvm_age_hva, TP_PROTO(unsigned long start, unsigned long end), TP_ARGS(start, end), diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index e4834d23e1d1a2..1215bc299390bb 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -18,7 +18,7 @@ #include <linux/sched/coredump.h> #include <linux/export.h> #include <linux/rmap.h> /* anon_vma_prepare */ -#include <linux/mmu_notifier.h> /* set_pte_at_notify */ +#include <linux/mmu_notifier.h> #include <linux/swap.h> /* folio_free_swap */ #include <linux/ptrace.h> /* user_enable_single_step */ #include <linux/kdebug.h> /* notifier mechanism */ @@ -195,8 +195,8 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, flush_cache_page(vma, addr, pte_pfn(ptep_get(pvmw.pte))); ptep_clear_flush(vma, addr, pvmw.pte); if (new_page) - set_pte_at_notify(mm, addr, pvmw.pte, - mk_pte(new_page, vma->vm_page_prot)); + set_pte_at(mm, addr, pvmw.pte, + mk_pte(new_page, vma->vm_page_prot)); folio_remove_rmap_pte(old_folio, old_page, vma); if (!folio_mapped(old_folio)) diff --git a/mm/ksm.c b/mm/ksm.c index e1034bf1c937e9..f5138f43f0d27a 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1346,7 +1346,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct folio *folio, if (pte_write(entry)) entry = pte_wrprotect(entry); - set_pte_at_notify(mm, pvmw.address, pvmw.pte, entry); + set_pte_at(mm, pvmw.address, pvmw.pte, entry); } *orig_pte = entry; err = 0; @@ -1448,7 +1448,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page, * See Documentation/mm/mmu_notifier.rst */ ptep_clear_flush(vma, addr, ptep); - set_pte_at_notify(mm, addr, ptep, newpte); + set_pte_at(mm, addr, ptep, newpte); folio = page_folio(page); folio_remove_rmap_pte(folio, page, vma); diff --git a/mm/memory.c b/mm/memory.c index eea6e4984eaefa..b5453b86ec4b7e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3357,13 +3357,8 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) ptep_clear_flush(vma, vmf->address, vmf->pte); folio_add_new_anon_rmap(new_folio, vma, vmf->address); folio_add_lru_vma(new_folio, vma); - /* - * We call the notify macro here because, when using secondary - * mmu page tables (such as kvm shadow page tables), we want the - * new page to be mapped directly into the secondary page table. - */ BUG_ON(unshare && pte_write(entry)); - set_pte_at_notify(mm, vmf->address, vmf->pte, entry); + set_pte_at(mm, vmf->address, vmf->pte, entry); update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1); if (old_folio) { /* diff --git a/mm/migrate_device.c b/mm/migrate_device.c index 7e0712493d1f48..aecc71972a870f 100644 --- a/mm/migrate_device.c +++ b/mm/migrate_device.c @@ -666,13 +666,9 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate, if (flush) { flush_cache_page(vma, addr, pte_pfn(orig_pte)); ptep_clear_flush(vma, addr, ptep); - set_pte_at_notify(mm, addr, ptep, entry); - update_mmu_cache(vma, addr, ptep); - } else { - /* No need to invalidate - it was non-present before */ - set_pte_at(mm, addr, ptep, entry); - update_mmu_cache(vma, addr, ptep); } + set_pte_at(mm, addr, ptep, entry); + update_mmu_cache(vma, addr, ptep); pte_unmap_unlock(ptep, ptl); *src = MIGRATE_PFN_MIGRATE; diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index ec3b068cbbe6b1..8982e6139d074b 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c @@ -424,23 +424,6 @@ int __mmu_notifier_test_young(struct mm_struct *mm, return young; } -void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address, - pte_t pte) -{ - struct mmu_notifier *subscription; - int id; - - id = srcu_read_lock(&srcu); - hlist_for_each_entry_rcu(subscription, - &mm->notifier_subscriptions->list, hlist, - srcu_read_lock_held(&srcu)) { - if (subscription->ops->change_pte) - subscription->ops->change_pte(subscription, mm, address, - pte); - } - srcu_read_unlock(&srcu, id); -} - static int mn_itree_invalidate(struct mmu_notifier_subscriptions *subscriptions, const struct mmu_notifier_range *range) { diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index ed4ad591f19353..de34dda95c6700 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -120,6 +120,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_caps_test TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test +TEST_GEN_PROGS_x86_64 += x86_64/sev_init2_tests TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests TEST_GEN_PROGS_x86_64 += x86_64/sev_smoke_test TEST_GEN_PROGS_x86_64 += x86_64/amx_test diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index 3e0db283a46ad1..8acca823768700 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -93,7 +93,6 @@ enum kvm_mem_region_type { struct kvm_vm { int mode; unsigned long type; - uint8_t subtype; int kvm_fd; int fd; unsigned int pgtable_levels; @@ -200,8 +199,8 @@ enum vm_guest_mode { struct vm_shape { uint32_t type; uint8_t mode; - uint8_t subtype; - uint16_t padding; + uint8_t pad0; + uint16_t pad1; }; kvm_static_assert(sizeof(struct vm_shape) == sizeof(uint64_t)); @@ -890,17 +889,15 @@ static inline struct kvm_vm *vm_create_barebones(void) return ____vm_create(VM_SHAPE_DEFAULT); } -#ifdef __x86_64__ -static inline struct kvm_vm *vm_create_barebones_protected_vm(void) +static inline struct kvm_vm *vm_create_barebones_type(unsigned long type) { const struct vm_shape shape = { .mode = VM_MODE_DEFAULT, - .type = KVM_X86_SW_PROTECTED_VM, + .type = type, }; return ____vm_create(shape); } -#endif static inline struct kvm_vm *vm_create(uint32_t nr_runnable_vcpus) { diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 81ce37ec407dd1..74a59c7ce7edda 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -23,12 +23,6 @@ extern bool host_cpu_is_intel; extern bool host_cpu_is_amd; -enum vm_guest_x86_subtype { - VM_SUBTYPE_NONE = 0, - VM_SUBTYPE_SEV, - VM_SUBTYPE_SEV_ES, -}; - /* Forced emulation prefix, used to invoke the emulator unconditionally. */ #define KVM_FEP "ud2; .byte 'k', 'v', 'm';" diff --git a/tools/testing/selftests/kvm/include/x86_64/sev.h b/tools/testing/selftests/kvm/include/x86_64/sev.h index 8a1bf88474c92f..82c11c81a95632 100644 --- a/tools/testing/selftests/kvm/include/x86_64/sev.h +++ b/tools/testing/selftests/kvm/include/x86_64/sev.h @@ -31,8 +31,9 @@ void sev_vm_launch(struct kvm_vm *vm, uint32_t policy); void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement); void sev_vm_launch_finish(struct kvm_vm *vm); -struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t policy, void *guest_code, +struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code, struct kvm_vcpu **cpu); +void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement); kvm_static_assert(SEV_RET_SUCCESS == 0); @@ -67,20 +68,8 @@ kvm_static_assert(SEV_RET_SUCCESS == 0); __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm); \ }) -static inline void sev_vm_init(struct kvm_vm *vm) -{ - vm->arch.sev_fd = open_sev_dev_path_or_exit(); - - vm_sev_ioctl(vm, KVM_SEV_INIT, NULL); -} - - -static inline void sev_es_vm_init(struct kvm_vm *vm) -{ - vm->arch.sev_fd = open_sev_dev_path_or_exit(); - - vm_sev_ioctl(vm, KVM_SEV_ES_INIT, NULL); -} +void sev_vm_init(struct kvm_vm *vm); +void sev_es_vm_init(struct kvm_vm *vm); static inline void sev_register_encrypted_memory(struct kvm_vm *vm, struct userspace_mem_region *region) diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index b2262b5fad9e79..9da388100f3a35 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -276,7 +276,6 @@ struct kvm_vm *____vm_create(struct vm_shape shape) vm->mode = shape.mode; vm->type = shape.type; - vm->subtype = shape.subtype; vm->pa_bits = vm_guest_mode_params[vm->mode].pa_bits; vm->va_bits = vm_guest_mode_params[vm->mode].va_bits; diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 74a4c736c9ae1e..9f87ca8b7ab602 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -578,10 +578,11 @@ void kvm_arch_vm_post_create(struct kvm_vm *vm) sync_global_to_guest(vm, host_cpu_is_intel); sync_global_to_guest(vm, host_cpu_is_amd); - if (vm->subtype == VM_SUBTYPE_SEV) - sev_vm_init(vm); - else if (vm->subtype == VM_SUBTYPE_SEV_ES) - sev_es_vm_init(vm); + if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) { + struct kvm_sev_init init = { 0 }; + + vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); + } } void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) @@ -1081,9 +1082,12 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) void kvm_init_vm_address_properties(struct kvm_vm *vm) { - if (vm->subtype == VM_SUBTYPE_SEV || vm->subtype == VM_SUBTYPE_SEV_ES) { + if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) { + vm->arch.sev_fd = open_sev_dev_path_or_exit(); vm->arch.c_bit = BIT_ULL(this_cpu_property(X86_PROPERTY_SEV_C_BIT)); vm->gpa_tag_mask = vm->arch.c_bit; + } else { + vm->arch.sev_fd = -1; } } diff --git a/tools/testing/selftests/kvm/lib/x86_64/sev.c b/tools/testing/selftests/kvm/lib/x86_64/sev.c index e248d3364b9c3b..d482029b600400 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/sev.c +++ b/tools/testing/selftests/kvm/lib/x86_64/sev.c @@ -35,6 +35,32 @@ static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *regio } } +void sev_vm_init(struct kvm_vm *vm) +{ + if (vm->type == KVM_X86_DEFAULT_VM) { + assert(vm->arch.sev_fd == -1); + vm->arch.sev_fd = open_sev_dev_path_or_exit(); + vm_sev_ioctl(vm, KVM_SEV_INIT, NULL); + } else { + struct kvm_sev_init init = { 0 }; + assert(vm->type == KVM_X86_SEV_VM); + vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); + } +} + +void sev_es_vm_init(struct kvm_vm *vm) +{ + if (vm->type == KVM_X86_DEFAULT_VM) { + assert(vm->arch.sev_fd == -1); + vm->arch.sev_fd = open_sev_dev_path_or_exit(); + vm_sev_ioctl(vm, KVM_SEV_ES_INIT, NULL); + } else { + struct kvm_sev_init init = { 0 }; + assert(vm->type == KVM_X86_SEV_ES_VM); + vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); + } +} + void sev_vm_launch(struct kvm_vm *vm, uint32_t policy) { struct kvm_sev_launch_start launch_start = { @@ -87,28 +113,30 @@ void sev_vm_launch_finish(struct kvm_vm *vm) TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_RUNNING); } -struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t policy, void *guest_code, +struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code, struct kvm_vcpu **cpu) { struct vm_shape shape = { - .type = VM_TYPE_DEFAULT, .mode = VM_MODE_DEFAULT, - .subtype = policy & SEV_POLICY_ES ? VM_SUBTYPE_SEV_ES : - VM_SUBTYPE_SEV, + .type = type, }; struct kvm_vm *vm; struct kvm_vcpu *cpus[1]; - uint8_t measurement[512]; vm = __vm_create_with_vcpus(shape, 1, 0, guest_code, cpus); *cpu = cpus[0]; + return vm; +} + +void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement) +{ sev_vm_launch(vm, policy); - /* TODO: Validate the measurement is as expected. */ + if (!measurement) + measurement = alloca(256); + sev_vm_launch_measure(vm, measurement); sev_vm_launch_finish(vm); - - return vm; } diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index bd57d991e27d85..68c899d27561d8 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -339,7 +339,7 @@ static void test_invalid_memory_region_flags(void) #ifdef __x86_64__ if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM)) - vm = vm_create_barebones_protected_vm(); + vm = vm_create_barebones_type(KVM_X86_SW_PROTECTED_VM); else #endif vm = vm_create_barebones(); @@ -462,7 +462,7 @@ static void test_add_private_memory_region(void) pr_info("Testing ADD of KVM_MEM_GUEST_MEMFD memory regions\n"); - vm = vm_create_barebones_protected_vm(); + vm = vm_create_barebones_type(KVM_X86_SW_PROTECTED_VM); test_invalid_guest_memfd(vm, vm->kvm_fd, 0, "KVM fd should fail"); test_invalid_guest_memfd(vm, vm->fd, 0, "VM's fd should fail"); @@ -471,7 +471,7 @@ static void test_add_private_memory_region(void) test_invalid_guest_memfd(vm, memfd, 0, "Regular memfd() should fail"); close(memfd); - vm2 = vm_create_barebones_protected_vm(); + vm2 = vm_create_barebones_type(KVM_X86_SW_PROTECTED_VM); memfd = vm_create_guest_memfd(vm2, MEM_REGION_SIZE, 0); test_invalid_guest_memfd(vm, memfd, 0, "Other VM's guest_memfd() should fail"); @@ -499,7 +499,7 @@ static void test_add_overlapping_private_memory_regions(void) pr_info("Testing ADD of overlapping KVM_MEM_GUEST_MEMFD memory regions\n"); - vm = vm_create_barebones_protected_vm(); + vm = vm_create_barebones_type(KVM_X86_SW_PROTECTED_VM); memfd = vm_create_guest_memfd(vm, MEM_REGION_SIZE * 4, 0); diff --git a/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c b/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c new file mode 100644 index 00000000000000..7a4a61be119b15 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c @@ -0,0 +1,152 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/kvm.h> +#include <linux/psp-sev.h> +#include <stdio.h> +#include <sys/ioctl.h> +#include <stdlib.h> +#include <errno.h> +#include <pthread.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" +#include "kselftest.h" + +#define SVM_SEV_FEAT_DEBUG_SWAP 32u + +/* + * Some features may have hidden dependencies, or may only work + * for certain VM types. Err on the side of safety and don't + * expect that all supported features can be passed one by one + * to KVM_SEV_INIT2. + * + * (Well, right now there's only one...) + */ +#define KNOWN_FEATURES SVM_SEV_FEAT_DEBUG_SWAP + +int kvm_fd; +u64 supported_vmsa_features; +bool have_sev_es; + +static int __sev_ioctl(int vm_fd, int cmd_id, void *data) +{ + struct kvm_sev_cmd cmd = { + .id = cmd_id, + .data = (uint64_t)data, + .sev_fd = open_sev_dev_path_or_exit(), + }; + int ret; + + ret = ioctl(vm_fd, KVM_MEMORY_ENCRYPT_OP, &cmd); + TEST_ASSERT(ret < 0 || cmd.error == SEV_RET_SUCCESS, + "%d failed: fw error: %d\n", + cmd_id, cmd.error); + + return ret; +} + +static void test_init2(unsigned long vm_type, struct kvm_sev_init *init) +{ + struct kvm_vm *vm; + int ret; + + vm = vm_create_barebones_type(vm_type); + ret = __sev_ioctl(vm->fd, KVM_SEV_INIT2, init); + TEST_ASSERT(ret == 0, + "KVM_SEV_INIT2 return code is %d (expected 0), errno: %d", + ret, errno); + kvm_vm_free(vm); +} + +static void test_init2_invalid(unsigned long vm_type, struct kvm_sev_init *init, const char *msg) +{ + struct kvm_vm *vm; + int ret; + + vm = vm_create_barebones_type(vm_type); + ret = __sev_ioctl(vm->fd, KVM_SEV_INIT2, init); + TEST_ASSERT(ret == -1 && errno == EINVAL, + "KVM_SEV_INIT2 should fail, %s.", + msg); + kvm_vm_free(vm); +} + +void test_vm_types(void) +{ + test_init2(KVM_X86_SEV_VM, &(struct kvm_sev_init){}); + + /* + * TODO: check that unsupported types cannot be created. Probably + * a separate selftest. + */ + if (have_sev_es) + test_init2(KVM_X86_SEV_ES_VM, &(struct kvm_sev_init){}); + + test_init2_invalid(0, &(struct kvm_sev_init){}, + "VM type is KVM_X86_DEFAULT_VM"); + if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM)) + test_init2_invalid(KVM_X86_SW_PROTECTED_VM, &(struct kvm_sev_init){}, + "VM type is KVM_X86_SW_PROTECTED_VM"); +} + +void test_flags(uint32_t vm_type) +{ + int i; + + for (i = 0; i < 32; i++) + test_init2_invalid(vm_type, + &(struct kvm_sev_init){ .flags = BIT(i) }, + "invalid flag"); +} + +void test_features(uint32_t vm_type, uint64_t supported_features) +{ + int i; + + for (i = 0; i < 64; i++) { + if (!(supported_features & (1u << i))) + test_init2_invalid(vm_type, + &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) }, + "unknown feature"); + else if (KNOWN_FEATURES & (1u << i)) + test_init2(vm_type, + &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) }); + } +} + +int main(int argc, char *argv[]) +{ + int kvm_fd = open_kvm_dev_path_or_exit(); + bool have_sev; + + TEST_REQUIRE(__kvm_has_device_attr(kvm_fd, KVM_X86_GRP_SEV, + KVM_X86_SEV_VMSA_FEATURES) == 0); + kvm_device_attr_get(kvm_fd, KVM_X86_GRP_SEV, + KVM_X86_SEV_VMSA_FEATURES, + &supported_vmsa_features); + + have_sev = kvm_cpu_has(X86_FEATURE_SEV); + TEST_ASSERT(have_sev == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_VM)), + "sev: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)", + kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_VM); + + TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_VM)); + have_sev_es = kvm_cpu_has(X86_FEATURE_SEV_ES); + + TEST_ASSERT(have_sev_es == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_ES_VM)), + "sev-es: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)", + kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_ES_VM); + + test_vm_types(); + + test_flags(KVM_X86_SEV_VM); + if (have_sev_es) + test_flags(KVM_X86_SEV_ES_VM); + + test_features(KVM_X86_SEV_VM, 0); + if (have_sev_es) + test_features(KVM_X86_SEV_ES_VM, supported_vmsa_features); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c index 026779f3ed06de..7c70c0da4fb745 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c +++ b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c @@ -4,6 +4,7 @@ #include <stdlib.h> #include <string.h> #include <sys/ioctl.h> +#include <math.h> #include "test_util.h" #include "kvm_util.h" @@ -13,6 +14,8 @@ #include "sev.h" +#define XFEATURE_MASK_X87_AVX (XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM) + static void guest_sev_es_code(void) { /* TODO: Check CPUID after GHCB-based hypercall support is added. */ @@ -35,13 +38,98 @@ static void guest_sev_code(void) GUEST_DONE(); } +/* Stash state passed via VMSA before any compiled code runs. */ +extern void guest_code_xsave(void); +asm("guest_code_xsave:\n" + "mov $-1, %eax\n" + "mov $-1, %edx\n" + "xsave (%rdi)\n" + "jmp guest_sev_es_code"); + +static void compare_xsave(u8 *from_host, u8 *from_guest) +{ + int i; + bool bad = false; + for (i = 0; i < 4095; i++) { + if (from_host[i] != from_guest[i]) { + printf("mismatch at %02hhx | %02hhx %02hhx\n", i, from_host[i], from_guest[i]); + bad = true; + } + } + + if (bad) + abort(); +} + +static void test_sync_vmsa(uint32_t policy) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + vm_vaddr_t gva; + void *hva; + + double x87val = M_PI; + struct kvm_xsave __attribute__((aligned(64))) xsave = { 0 }; + struct kvm_sregs sregs; + struct kvm_xcrs xcrs = { + .nr_xcrs = 1, + .xcrs[0].xcr = 0, + .xcrs[0].value = XFEATURE_MASK_X87_AVX, + }; + + vm = vm_sev_create_with_one_vcpu(KVM_X86_SEV_ES_VM, guest_code_xsave, &vcpu); + gva = vm_vaddr_alloc_shared(vm, PAGE_SIZE, KVM_UTIL_MIN_VADDR, + MEM_REGION_TEST_DATA); + hva = addr_gva2hva(vm, gva); + + vcpu_args_set(vcpu, 1, gva); + + vcpu_sregs_get(vcpu, &sregs); + sregs.cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXSAVE; + vcpu_sregs_set(vcpu, &sregs); + + vcpu_xcrs_set(vcpu, &xcrs); + asm("fninit\n" + "vpcmpeqb %%ymm4, %%ymm4, %%ymm4\n" + "fldl %3\n" + "xsave (%2)\n" + "fstp %%st\n" + : "=m"(xsave) + : "A"(XFEATURE_MASK_X87_AVX), "r"(&xsave), "m" (x87val) + : "ymm4", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"); + vcpu_xsave_set(vcpu, &xsave); + + vm_sev_launch(vm, SEV_POLICY_ES | policy, NULL); + + /* This page is shared, so make it decrypted. */ + memset(hva, 0, 4096); + + vcpu_run(vcpu); + + TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT, + "Wanted SYSTEM_EVENT, got %s", + exit_reason_str(vcpu->run->exit_reason)); + TEST_ASSERT_EQ(vcpu->run->system_event.type, KVM_SYSTEM_EVENT_SEV_TERM); + TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 1); + TEST_ASSERT_EQ(vcpu->run->system_event.data[0], GHCB_MSR_TERM_REQ); + + compare_xsave((u8 *)&xsave, (u8 *)hva); + + kvm_vm_free(vm); +} + static void test_sev(void *guest_code, uint64_t policy) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct ucall uc; - vm = vm_sev_create_with_one_vcpu(policy, guest_code, &vcpu); + uint32_t type = policy & SEV_POLICY_ES ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; + + vm = vm_sev_create_with_one_vcpu(type, guest_code, &vcpu); + + /* TODO: Validate the measurement is as expected. */ + vm_sev_launch(vm, policy, NULL); for (;;) { vcpu_run(vcpu); @@ -82,6 +170,12 @@ int main(int argc, char *argv[]) if (kvm_cpu_has(X86_FEATURE_SEV_ES)) { test_sev(guest_sev_es_code, SEV_POLICY_ES | SEV_POLICY_NO_DBG); test_sev(guest_sev_es_code, SEV_POLICY_ES); + + if (kvm_has_cap(KVM_CAP_XCRS) && + (xgetbv(0) & XFEATURE_MASK_X87_AVX) == XFEATURE_MASK_X87_AVX) { + test_sync_vmsa(0); + test_sync_vmsa(SEV_POLICY_NO_DBG); + } } return 0; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 92c70c99420af1..b406ed86d6c590 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -583,8 +583,6 @@ static void kvm_null_fn(void) } #define IS_KVM_NULL_FN(fn) ((fn) == (void *)kvm_null_fn) -static const union kvm_mmu_notifier_arg KVM_MMU_NOTIFIER_NO_ARG; - /* Iterate over each memslot intersecting [start, last] (inclusive) range */ #define kvm_for_each_memslot_in_hva_range(node, slots, start, last) \ for (node = interval_tree_iter_first(&slots->hva_tree, start, last); \ @@ -670,14 +668,12 @@ static __always_inline kvm_mn_ret_t __kvm_handle_hva_range(struct kvm *kvm, static __always_inline int kvm_handle_hva_range(struct mmu_notifier *mn, unsigned long start, unsigned long end, - union kvm_mmu_notifier_arg arg, gfn_handler_t handler) { struct kvm *kvm = mmu_notifier_to_kvm(mn); const struct kvm_mmu_notifier_range range = { .start = start, .end = end, - .arg = arg, .handler = handler, .on_lock = (void *)kvm_null_fn, .flush_on_ret = true, @@ -705,48 +701,6 @@ static __always_inline int kvm_handle_hva_range_no_flush(struct mmu_notifier *mn return __kvm_handle_hva_range(kvm, &range).ret; } -static bool kvm_change_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) -{ - /* - * Skipping invalid memslots is correct if and only change_pte() is - * surrounded by invalidate_range_{start,end}(), which is currently - * guaranteed by the primary MMU. If that ever changes, KVM needs to - * unmap the memslot instead of skipping the memslot to ensure that KVM - * doesn't hold references to the old PFN. - */ - WARN_ON_ONCE(!READ_ONCE(kvm->mn_active_invalidate_count)); - - if (range->slot->flags & KVM_MEMSLOT_INVALID) - return false; - - return kvm_set_spte_gfn(kvm, range); -} - -static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long address, - pte_t pte) -{ - struct kvm *kvm = mmu_notifier_to_kvm(mn); - const union kvm_mmu_notifier_arg arg = { .pte = pte }; - - trace_kvm_set_spte_hva(address); - - /* - * .change_pte() must be surrounded by .invalidate_range_{start,end}(). - * If mmu_invalidate_in_progress is zero, then no in-progress - * invalidations, including this one, found a relevant memslot at - * start(); rechecking memslots here is unnecessary. Note, a false - * positive (count elevated by a different invalidation) is sub-optimal - * but functionally ok. - */ - WARN_ON_ONCE(!READ_ONCE(kvm->mn_active_invalidate_count)); - if (!READ_ONCE(kvm->mmu_invalidate_in_progress)) - return; - - kvm_handle_hva_range(mn, address, address + 1, arg, kvm_change_spte_gfn); -} - void kvm_mmu_invalidate_begin(struct kvm *kvm) { lockdep_assert_held_write(&kvm->mmu_lock); @@ -909,8 +863,7 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, { trace_kvm_age_hva(start, end); - return kvm_handle_hva_range(mn, start, end, KVM_MMU_NOTIFIER_NO_ARG, - kvm_age_gfn); + return kvm_handle_hva_range(mn, start, end, kvm_age_gfn); } static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn, @@ -963,7 +916,6 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { .clear_flush_young = kvm_mmu_notifier_clear_flush_young, .clear_young = kvm_mmu_notifier_clear_young, .test_young = kvm_mmu_notifier_test_young, - .change_pte = kvm_mmu_notifier_change_pte, .release = kvm_mmu_notifier_release, }; |