Merge tag 'kvm-s390-next-6.5-1' of https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD

* New uvdevice secret API * New CMM selftest * cmm fix * diag 9c racy access of target cpu fix
author: Paolo Bonzini <pbonzini@redhat.com> 2023-07-01 07:00:11 -0400
committer: Paolo Bonzini <pbonzini@redhat.com> 2023-07-01 07:00:11 -0400
commit: a443e2609c01479c7c0c3367059d7b9f2e8a6697 (patch)
tree: 90c9c2e6acfdf539523301f239fbac69dff4ed0f
parent: 6995e2de6891c724bfeb2db33d7b87775f913ad1 (diff)
parent: db54dfc9f71cd2df7afd1e88535ef6099cb0333e (diff)
download: net-next-a443e2609c01479c7c0c3367059d7b9f2e8a6697.tar.gz
11 files changed, 1100 insertions, 49 deletions
diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c
index 0a077c0a205630..1e66d2cbb0965a 100644
--- a/arch/s390/boot/uv.c
+++ b/arch/s390/boot/uv.c
@@ -47,6 +47,10 @@ void uv_query_info(void)
 		uv_info.conf_dump_finalize_len = uvcb.conf_dump_finalize_len;
 		uv_info.supp_att_req_hdr_ver = uvcb.supp_att_req_hdr_ver;
 		uv_info.supp_att_pflags = uvcb.supp_att_pflags;
+		uv_info.supp_add_secret_req_ver = uvcb.supp_add_secret_req_ver;
+		uv_info.supp_add_secret_pcf = uvcb.supp_add_secret_pcf;
+		uv_info.supp_secret_types = uvcb.supp_secret_types;
+		uv_info.max_secrets = uvcb.max_secrets;
 	}
 
 #ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index 28a9ad57b6f16c..d6bb2f4f78d1f4 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -58,6 +58,9 @@
 #define UVC_CMD_SET_SHARED_ACCESS	0x1000
 #define UVC_CMD_REMOVE_SHARED_ACCESS	0x1001
 #define UVC_CMD_RETR_ATTEST		0x1020
+#define UVC_CMD_ADD_SECRET		0x1031
+#define UVC_CMD_LIST_SECRETS		0x1033
+#define UVC_CMD_LOCK_SECRETS		0x1034
 
 /* Bits in installed uv calls */
 enum uv_cmds_inst {
@@ -88,6 +91,9 @@ enum uv_cmds_inst {
 	BIT_UVC_CMD_DUMP_CPU = 26,
 	BIT_UVC_CMD_DUMP_COMPLETE = 27,
 	BIT_UVC_CMD_RETR_ATTEST = 28,
+	BIT_UVC_CMD_ADD_SECRET = 29,
+	BIT_UVC_CMD_LIST_SECRETS = 30,
+	BIT_UVC_CMD_LOCK_SECRETS = 31,
 };
 
 enum uv_feat_ind {
@@ -117,7 +123,7 @@ struct uv_cb_qui {
 	u32 reserved70[3];			/* 0x0070 */
 	u32 max_num_sec_conf;			/* 0x007c */
 	u64 max_guest_stor_addr;		/* 0x0080 */
-	u8  reserved88[158 - 136];		/* 0x0088 */
+	u8  reserved88[0x9e - 0x88];		/* 0x0088 */
 	u16 max_guest_cpu_id;			/* 0x009e */
 	u64 uv_feature_indications;		/* 0x00a0 */
 	u64 reserveda8;				/* 0x00a8 */
@@ -129,7 +135,12 @@ struct uv_cb_qui {
 	u64 reservedd8;				/* 0x00d8 */
 	u64 supp_att_req_hdr_ver;		/* 0x00e0 */
 	u64 supp_att_pflags;			/* 0x00e8 */
-	u8 reservedf0[256 - 240];		/* 0x00f0 */
+	u64 reservedf0;				/* 0x00f0 */
+	u64 supp_add_secret_req_ver;		/* 0x00f8 */
+	u64 supp_add_secret_pcf;		/* 0x0100 */
+	u64 supp_secret_types;			/* 0x0180 */
+	u16 max_secrets;			/* 0x0110 */
+	u8 reserved112[0x120 - 0x112];		/* 0x0112 */
 } __packed __aligned(8);
 
 /* Initialize Ultravisor */
@@ -292,6 +303,19 @@ struct uv_cb_dump_complete {
 	u64 reserved30[5];
 } __packed __aligned(8);
 
+/*
+ * A common UV call struct for pv guests that contains a single address
+ * Examples:
+ * Add Secret
+ * List Secrets
+ */
+struct uv_cb_guest_addr {
+	struct uv_cb_header header;
+	u64 reserved08[3];
+	u64 addr;
+	u64 reserved28[4];
+} __packed __aligned(8);
+
 static inline int __uv_call(unsigned long r1, unsigned long r2)
 {
 	int cc;
@@ -365,6 +389,10 @@ struct uv_info {
 	unsigned long conf_dump_finalize_len;
 	unsigned long supp_att_req_hdr_ver;
 	unsigned long supp_att_pflags;
+	unsigned long supp_add_secret_req_ver;
+	unsigned long supp_add_secret_pcf;
+	unsigned long supp_secret_types;
+	unsigned short max_secrets;
 };
 
 extern struct uv_info uv_info;
diff --git a/arch/s390/include/uapi/asm/uvdevice.h b/arch/s390/include/uapi/asm/uvdevice.h
index 10a5ac918e02d9..b9c2f14a6af31a 100644
--- a/arch/s390/include/uapi/asm/uvdevice.h
+++ b/arch/s390/include/uapi/asm/uvdevice.h
@@ -32,6 +32,33 @@ struct uvio_attest {
 	__u16 reserved136;				/* 0x0136 */
 };
 
+/**
+ * uvio_uvdev_info - Information of supported functions
+ * @supp_uvio_cmds - supported IOCTLs by this device
+ * @supp_uv_cmds - supported UVCs corresponding to the IOCTL
+ *
+ * UVIO request to get information about supported request types by this
+ * uvdevice and the Ultravisor.  Everything is output. Bits are in LSB0
+ * ordering.  If the bit is set in both, @supp_uvio_cmds and @supp_uv_cmds, the
+ * uvdevice and the Ultravisor support that call.
+ *
+ * Note that bit 0 (UVIO_IOCTL_UVDEV_INFO_NR) is always zero for `supp_uv_cmds`
+ * as there is no corresponding UV-call.
+ */
+struct uvio_uvdev_info {
+	/*
+	 * If bit `n` is set, this device supports the IOCTL with nr `n`.
+	 */
+	__u64 supp_uvio_cmds;
+	/*
+	 * If bit `n` is set, the Ultravisor(UV) supports the UV-call
+	 * corresponding to the IOCTL with nr `n` in the calling contextx (host
+	 * or guest).  The value is only valid if the corresponding bit in
+	 * @supp_uvio_cmds is set as well.
+	 */
+	__u64 supp_uv_cmds;
+};
+
 /*
  * The following max values define an upper length for the IOCTL in/out buffers.
  * However, they do not represent the maximum the Ultravisor allows which is
@@ -42,10 +69,34 @@ struct uvio_attest {
 #define UVIO_ATT_ARCB_MAX_LEN		0x100000
 #define UVIO_ATT_MEASUREMENT_MAX_LEN	0x8000
 #define UVIO_ATT_ADDITIONAL_MAX_LEN	0x8000
+#define UVIO_ADD_SECRET_MAX_LEN		0x100000
+#define UVIO_LIST_SECRETS_LEN		0x1000
 
 #define UVIO_DEVICE_NAME "uv"
 #define UVIO_TYPE_UVC 'u'
 
-#define UVIO_IOCTL_ATT _IOWR(UVIO_TYPE_UVC, 0x01, struct uvio_ioctl_cb)
+enum UVIO_IOCTL_NR {
+	UVIO_IOCTL_UVDEV_INFO_NR = 0x00,
+	UVIO_IOCTL_ATT_NR,
+	UVIO_IOCTL_ADD_SECRET_NR,
+	UVIO_IOCTL_LIST_SECRETS_NR,
+	UVIO_IOCTL_LOCK_SECRETS_NR,
+	/* must be the last entry */
+	UVIO_IOCTL_NUM_IOCTLS
+};
+
+#define UVIO_IOCTL(nr)		_IOWR(UVIO_TYPE_UVC, nr, struct uvio_ioctl_cb)
+#define UVIO_IOCTL_UVDEV_INFO	UVIO_IOCTL(UVIO_IOCTL_UVDEV_INFO_NR)
+#define UVIO_IOCTL_ATT		UVIO_IOCTL(UVIO_IOCTL_ATT_NR)
+#define UVIO_IOCTL_ADD_SECRET	UVIO_IOCTL(UVIO_IOCTL_ADD_SECRET_NR)
+#define UVIO_IOCTL_LIST_SECRETS	UVIO_IOCTL(UVIO_IOCTL_LIST_SECRETS_NR)
+#define UVIO_IOCTL_LOCK_SECRETS	UVIO_IOCTL(UVIO_IOCTL_LOCK_SECRETS_NR)
+
+#define UVIO_SUPP_CALL(nr)	(1ULL << (nr))
+#define UVIO_SUPP_UDEV_INFO	UVIO_SUPP_CALL(UVIO_IOCTL_UDEV_INFO_NR)
+#define UVIO_SUPP_ATT		UVIO_SUPP_CALL(UVIO_IOCTL_ATT_NR)
+#define UVIO_SUPP_ADD_SECRET	UVIO_SUPP_CALL(UVIO_IOCTL_ADD_SECRET_NR)
+#define UVIO_SUPP_LIST_SECRETS	UVIO_SUPP_CALL(UVIO_IOCTL_LIST_SECRETS_NR)
+#define UVIO_SUPP_LOCK_SECRETS	UVIO_SUPP_CALL(UVIO_IOCTL_LOCK_SECRETS_NR)
 
 #endif /* __S390_ASM_UVDEVICE_H */
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index cb2ee06df286cd..273a0281a18975 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -23,12 +23,20 @@
 int __bootdata_preserved(prot_virt_guest);
 #endif
 
+/*
+ * uv_info contains both host and guest information but it's currently only
+ * expected to be used within modules if it's the KVM module or for
+ * any PV guest module.
+ *
+ * The kernel itself will write these values once in uv_query_info()
+ * and then make some of them readable via a sysfs interface.
+ */
 struct uv_info __bootdata_preserved(uv_info);
+EXPORT_SYMBOL(uv_info);
 
 #if IS_ENABLED(CONFIG_KVM)
 int __bootdata_preserved(prot_virt_host);
 EXPORT_SYMBOL(prot_virt_host);
-EXPORT_SYMBOL(uv_info);
 
 static int __init uv_init(phys_addr_t stor_base, unsigned long stor_len)
 {
@@ -460,13 +468,13 @@ EXPORT_SYMBOL_GPL(arch_make_page_accessible);
 
 #if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM)
 static ssize_t uv_query_facilities(struct kobject *kobj,
-				   struct kobj_attribute *attr, char *page)
+				   struct kobj_attribute *attr, char *buf)
 {
-	return scnprintf(page, PAGE_SIZE, "%lx\n%lx\n%lx\n%lx\n",
-			uv_info.inst_calls_list[0],
-			uv_info.inst_calls_list[1],
-			uv_info.inst_calls_list[2],
-			uv_info.inst_calls_list[3]);
+	return sysfs_emit(buf, "%lx\n%lx\n%lx\n%lx\n",
+			  uv_info.inst_calls_list[0],
+			  uv_info.inst_calls_list[1],
+			  uv_info.inst_calls_list[2],
+			  uv_info.inst_calls_list[3]);
 }
 
 static struct kobj_attribute uv_query_facilities_attr =
@@ -491,30 +499,27 @@ static struct kobj_attribute uv_query_supp_se_hdr_pcf_attr =
 	__ATTR(supp_se_hdr_pcf, 0444, uv_query_supp_se_hdr_pcf, NULL);
 
 static ssize_t uv_query_dump_cpu_len(struct kobject *kobj,
-				     struct kobj_attribute *attr, char *page)
+				     struct kobj_attribute *attr, char *buf)
 {
-	return scnprintf(page, PAGE_SIZE, "%lx\n",
-			uv_info.guest_cpu_stor_len);
+	return sysfs_emit(buf, "%lx\n", uv_info.guest_cpu_stor_len);
 }
 
 static struct kobj_attribute uv_query_dump_cpu_len_attr =
 	__ATTR(uv_query_dump_cpu_len, 0444, uv_query_dump_cpu_len, NULL);
 
 static ssize_t uv_query_dump_storage_state_len(struct kobject *kobj,
-					       struct kobj_attribute *attr, char *page)
+					       struct kobj_attribute *attr, char *buf)
 {
-	return scnprintf(page, PAGE_SIZE, "%lx\n",
-			uv_info.conf_dump_storage_state_len);
+	return sysfs_emit(buf, "%lx\n", uv_info.conf_dump_storage_state_len);
 }
 
 static struct kobj_attribute uv_query_dump_storage_state_len_attr =
 	__ATTR(dump_storage_state_len, 0444, uv_query_dump_storage_state_len, NULL);
 
 static ssize_t uv_query_dump_finalize_len(struct kobject *kobj,
-					  struct kobj_attribute *attr, char *page)
+					  struct kobj_attribute *attr, char *buf)
 {
-	return scnprintf(page, PAGE_SIZE, "%lx\n",
-			uv_info.conf_dump_finalize_len);
+	return sysfs_emit(buf, "%lx\n", uv_info.conf_dump_finalize_len);
 }
 
 static struct kobj_attribute uv_query_dump_finalize_len_attr =
@@ -530,53 +535,86 @@ static struct kobj_attribute uv_query_feature_indications_attr =
 	__ATTR(feature_indications, 0444, uv_query_feature_indications, NULL);
 
 static ssize_t uv_query_max_guest_cpus(struct kobject *kobj,
-				       struct kobj_attribute *attr, char *page)
+				       struct kobj_attribute *attr, char *buf)
 {
-	return scnprintf(page, PAGE_SIZE, "%d\n",
-			uv_info.max_guest_cpu_id + 1);
+	return sysfs_emit(buf, "%d\n", uv_info.max_guest_cpu_id + 1);
 }
 
 static struct kobj_attribute uv_query_max_guest_cpus_attr =
 	__ATTR(max_cpus, 0444, uv_query_max_guest_cpus, NULL);
 
 static ssize_t uv_query_max_guest_vms(struct kobject *kobj,
-				      struct kobj_attribute *attr, char *page)
+				      struct kobj_attribute *attr, char *buf)
 {
-	return scnprintf(page, PAGE_SIZE, "%d\n",
-			uv_info.max_num_sec_conf);
+	return sysfs_emit(buf, "%d\n", uv_info.max_num_sec_conf);
 }
 
 static struct kobj_attribute uv_query_max_guest_vms_attr =
 	__ATTR(max_guests, 0444, uv_query_max_guest_vms, NULL);
 
 static ssize_t uv_query_max_guest_addr(struct kobject *kobj,
-				       struct kobj_attribute *attr, char *page)
+				       struct kobj_attribute *attr, char *buf)
 {
-	return scnprintf(page, PAGE_SIZE, "%lx\n",
-			uv_info.max_sec_stor_addr);
+	return sysfs_emit(buf, "%lx\n", uv_info.max_sec_stor_addr);
 }
 
 static struct kobj_attribute uv_query_max_guest_addr_attr =
 	__ATTR(max_address, 0444, uv_query_max_guest_addr, NULL);
 
 static ssize_t uv_query_supp_att_req_hdr_ver(struct kobject *kobj,
-					     struct kobj_attribute *attr, char *page)
+					     struct kobj_attribute *attr, char *buf)
 {
-	return scnprintf(page, PAGE_SIZE, "%lx\n", uv_info.supp_att_req_hdr_ver);
+	return sysfs_emit(buf, "%lx\n", uv_info.supp_att_req_hdr_ver);
 }
 
 static struct kobj_attribute uv_query_supp_att_req_hdr_ver_attr =
 	__ATTR(supp_att_req_hdr_ver, 0444, uv_query_supp_att_req_hdr_ver, NULL);
 
 static ssize_t uv_query_supp_att_pflags(struct kobject *kobj,
-					struct kobj_attribute *attr, char *page)
+					struct kobj_attribute *attr, char *buf)
 {
-	return scnprintf(page, PAGE_SIZE, "%lx\n", uv_info.supp_att_pflags);
+	return sysfs_emit(buf, "%lx\n", uv_info.supp_att_pflags);
 }
 
 static struct kobj_attribute uv_query_supp_att_pflags_attr =
 	__ATTR(supp_att_pflags, 0444, uv_query_supp_att_pflags, NULL);
 
+static ssize_t uv_query_supp_add_secret_req_ver(struct kobject *kobj,
+						struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.supp_add_secret_req_ver);
+}
+
+static struct kobj_attribute uv_query_supp_add_secret_req_ver_attr =
+	__ATTR(supp_add_secret_req_ver, 0444, uv_query_supp_add_secret_req_ver, NULL);
+
+static ssize_t uv_query_supp_add_secret_pcf(struct kobject *kobj,
+					    struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.supp_add_secret_pcf);
+}
+
+static struct kobj_attribute uv_query_supp_add_secret_pcf_attr =
+	__ATTR(supp_add_secret_pcf, 0444, uv_query_supp_add_secret_pcf, NULL);
+
+static ssize_t uv_query_supp_secret_types(struct kobject *kobj,
+					  struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.supp_secret_types);
+}
+
+static struct kobj_attribute uv_query_supp_secret_types_attr =
+	__ATTR(supp_secret_types, 0444, uv_query_supp_secret_types, NULL);
+
+static ssize_t uv_query_max_secrets(struct kobject *kobj,
+				    struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%d\n", uv_info.max_secrets);
+}
+
+static struct kobj_attribute uv_query_max_secrets_attr =
+	__ATTR(max_secrets, 0444, uv_query_max_secrets, NULL);
+
 static struct attribute *uv_query_attrs[] = {
 	&uv_query_facilities_attr.attr,
 	&uv_query_feature_indications_attr.attr,
@@ -590,6 +628,10 @@ static struct attribute *uv_query_attrs[] = {
 	&uv_query_dump_cpu_len_attr.attr,
 	&uv_query_supp_att_req_hdr_ver_attr.attr,
 	&uv_query_supp_att_pflags_attr.attr,
+	&uv_query_supp_add_secret_req_ver_attr.attr,
+	&uv_query_supp_add_secret_pcf_attr.attr,
+	&uv_query_supp_secret_types_attr.attr,
+	&uv_query_max_secrets_attr.attr,
 	NULL,
 };
 
@@ -598,18 +640,18 @@ static struct attribute_group uv_query_attr_group = {
 };
 
 static ssize_t uv_is_prot_virt_guest(struct kobject *kobj,
-				     struct kobj_attribute *attr, char *page)
+				     struct kobj_attribute *attr, char *buf)
 {
 	int val = 0;
 
 #ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
 	val = prot_virt_guest;
 #endif
-	return scnprintf(page, PAGE_SIZE, "%d\n", val);
+	return sysfs_emit(buf, "%d\n", val);
 }
 
 static ssize_t uv_is_prot_virt_host(struct kobject *kobj,
-				    struct kobj_attribute *attr, char *page)
+				    struct kobj_attribute *attr, char *buf)
 {
 	int val = 0;
 
@@ -617,7 +659,7 @@ static ssize_t uv_is_prot_virt_host(struct kobject *kobj,
 	val = prot_virt_host;
 #endif
 
-	return scnprintf(page, PAGE_SIZE, "%d\n", val);
+	return sysfs_emit(buf, "%d\n", val);
 }
 
 static struct kobj_attribute uv_prot_virt_guest =
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 807fa9da1e721d..3c65b8258ae67a 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -166,6 +166,7 @@ static int diag9c_forwarding_overrun(void)
 static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
 {
 	struct kvm_vcpu *tcpu;
+	int tcpu_cpu;
 	int tid;
 
 	tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
@@ -181,14 +182,15 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
 		goto no_yield;
 
 	/* target guest VCPU already running */
-	if (READ_ONCE(tcpu->cpu) >= 0) {
+	tcpu_cpu = READ_ONCE(tcpu->cpu);
+	if (tcpu_cpu >= 0) {
 		if (!diag9c_forwarding_hz || diag9c_forwarding_overrun())
 			goto no_yield;
 
 		/* target host CPU already running */
-		if (!vcpu_is_preempted(tcpu->cpu))
+		if (!vcpu_is_preempted(tcpu_cpu))
 			goto no_yield;
-		smp_yield_cpu(tcpu->cpu);
+		smp_yield_cpu(tcpu_cpu);
 		VCPU_EVENT(vcpu, 5,
 			   "diag time slice end directed to %d: yield forwarded",
 			   tid);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 17b81659cdb20c..67001969646484 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2156,6 +2156,10 @@ static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
 		ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
 		ofs = 0;
 	}
+
+	if (cur_gfn < ms->base_gfn)
+		ofs = 0;
+
 	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
 	while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
 		ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 8d6b765abf29bf..0333ee482eb894 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -177,7 +177,8 @@ static int setup_apcb00(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
 			    sizeof(struct kvm_s390_apcb0)))
 		return -EFAULT;
 
-	bitmap_and(apcb_s, apcb_s, apcb_h, sizeof(struct kvm_s390_apcb0));
+	bitmap_and(apcb_s, apcb_s, apcb_h,
+		   BITS_PER_BYTE * sizeof(struct kvm_s390_apcb0));
 
 	return 0;
 }
@@ -203,7 +204,8 @@ static int setup_apcb11(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
 			    sizeof(struct kvm_s390_apcb1)))
 		return -EFAULT;
 
-	bitmap_and(apcb_s, apcb_s, apcb_h, sizeof(struct kvm_s390_apcb1));
+	bitmap_and(apcb_s, apcb_s, apcb_h,
+		   BITS_PER_BYTE * sizeof(struct kvm_s390_apcb1));
 
 	return 0;
 }
diff --git a/drivers/s390/char/Kconfig b/drivers/s390/char/Kconfig
index 80c4e5101c9712..8a03af5ee5b300 100644
--- a/drivers/s390/char/Kconfig
+++ b/drivers/s390/char/Kconfig
@@ -96,7 +96,7 @@ config SCLP_OFB
 config S390_UV_UAPI
 	def_tristate m
 	prompt "Ultravisor userspace API"
-	depends on S390
+	depends on S390 && (KVM || PROTECTED_VIRTUALIZATION_GUEST)
 	help
 	  Selecting exposes parts of the UV interface to userspace
 	  by providing a misc character device at /dev/uv.
diff --git a/drivers/s390/char/uvdevice.c b/drivers/s390/char/uvdevice.c
index 1d40457c7b10e9..144cd2e035909b 100644
--- a/drivers/s390/char/uvdevice.c
+++ b/drivers/s390/char/uvdevice.c
@@ -32,6 +32,55 @@
 #include <asm/uvdevice.h>
 #include <asm/uv.h>
 
+#define BIT_UVIO_INTERNAL U32_MAX
+/* Mapping from IOCTL-nr to UVC-bit */
+static const u32 ioctl_nr_to_uvc_bit[] __initconst = {
+	[UVIO_IOCTL_UVDEV_INFO_NR] = BIT_UVIO_INTERNAL,
+	[UVIO_IOCTL_ATT_NR] = BIT_UVC_CMD_RETR_ATTEST,
+	[UVIO_IOCTL_ADD_SECRET_NR] = BIT_UVC_CMD_ADD_SECRET,
+	[UVIO_IOCTL_LIST_SECRETS_NR] = BIT_UVC_CMD_LIST_SECRETS,
+	[UVIO_IOCTL_LOCK_SECRETS_NR] = BIT_UVC_CMD_LOCK_SECRETS,
+};
+
+static_assert(ARRAY_SIZE(ioctl_nr_to_uvc_bit) == UVIO_IOCTL_NUM_IOCTLS);
+
+static struct uvio_uvdev_info uvdev_info = {
+	.supp_uvio_cmds = GENMASK_ULL(UVIO_IOCTL_NUM_IOCTLS - 1, 0),
+};
+
+static void __init set_supp_uv_cmds(unsigned long *supp_uv_cmds)
+{
+	int i;
+
+	for (i = 0; i < UVIO_IOCTL_NUM_IOCTLS; i++) {
+		if (ioctl_nr_to_uvc_bit[i] == BIT_UVIO_INTERNAL)
+			continue;
+		if (!test_bit_inv(ioctl_nr_to_uvc_bit[i], uv_info.inst_calls_list))
+			continue;
+		__set_bit(i, supp_uv_cmds);
+	}
+}
+
+/**
+ * uvio_uvdev_info() - get information about the uvdevice
+ *
+ * @uv_ioctl: ioctl control block
+ *
+ * Lists all IOCTLs that are supported by this uvdevice
+ */
+static int uvio_uvdev_info(struct uvio_ioctl_cb *uv_ioctl)
+{
+	void __user *user_buf_arg = (void __user *)uv_ioctl->argument_addr;
+
+	if (uv_ioctl->argument_len < sizeof(uvdev_info))
+		return -EINVAL;
+	if (copy_to_user(user_buf_arg, &uvdev_info, sizeof(uvdev_info)))
+		return -EFAULT;
+
+	uv_ioctl->uv_rc = UVC_RC_EXECUTED;
+	return 0;
+}
+
 static int uvio_build_uvcb_attest(struct uv_cb_attest *uvcb_attest, u8 *arcb,
 				  u8 *meas, u8 *add_data, struct uvio_attest *uvio_attest)
 {
@@ -185,8 +234,161 @@ out:
 	return ret;
 }
 
-static int uvio_copy_and_check_ioctl(struct uvio_ioctl_cb *ioctl, void __user *argp)
+/** uvio_add_secret() - perform an Add Secret UVC
+ *
+ * @uv_ioctl: ioctl control block
+ *
+ * uvio_add_secret() performs the Add Secret Ultravisor Call.
+ *
+ * The given userspace argument address and size are verified to be
+ * valid but every other check is made by the Ultravisor
+ * (UV). Therefore UV errors won't result in a negative return
+ * value. The request is then copied to kernelspace, the UV-call is
+ * performed and the results are copied back to userspace.
+ *
+ * The argument has to point to an Add Secret Request Control Block
+ * which is an encrypted and cryptographically verified request that
+ * inserts a protected guest's secrets into the Ultravisor for later
+ * use.
+ *
+ * If the Add Secret UV facility is not present, UV will return
+ * invalid command rc. This won't be fenced in the driver and does not
+ * result in a negative return value.
+ *
+ * Context: might sleep
+ *
+ * Return: 0 on success or a negative error code on error.
+ */
+static int uvio_add_secret(struct uvio_ioctl_cb *uv_ioctl)
 {
+	void __user *user_buf_arg = (void __user *)uv_ioctl->argument_addr;
+	struct uv_cb_guest_addr uvcb = {
+		.header.len = sizeof(uvcb),
+		.header.cmd = UVC_CMD_ADD_SECRET,
+	};
+	void *asrcb = NULL;
+	int ret;
+
+	if (uv_ioctl->argument_len > UVIO_ADD_SECRET_MAX_LEN)
+		return -EINVAL;
+	if (uv_ioctl->argument_len == 0)
+		return -EINVAL;
+
+	asrcb = kvzalloc(uv_ioctl->argument_len, GFP_KERNEL);
+	if (!asrcb)
+		return -ENOMEM;
+
+	ret = -EFAULT;
+	if (copy_from_user(asrcb, user_buf_arg, uv_ioctl->argument_len))
+		goto out;
+
+	ret = 0;
+	uvcb.addr = (u64)asrcb;
+	uv_call_sched(0, (u64)&uvcb);
+	uv_ioctl->uv_rc = uvcb.header.rc;
+	uv_ioctl->uv_rrc = uvcb.header.rrc;
+
+out:
+	kvfree(asrcb);
+	return ret;
+}
+
+/** uvio_list_secrets() - perform a List Secret UVC
+ * @uv_ioctl: ioctl control block
+ *
+ * uvio_list_secrets() performs the List Secret Ultravisor Call. It verifies
+ * that the given userspace argument address is valid and its size is sane.
+ * Every other check is made by the Ultravisor (UV) and won't result in a
+ * negative return value. It builds the request, performs the UV-call, and
+ * copies the result to userspace.
+ *
+ * The argument specifies the location for the result of the UV-Call.
+ *
+ * If the List Secrets UV facility is not present, UV will return invalid
+ * command rc. This won't be fenced in the driver and does not result in a
+ * negative return value.
+ *
+ * Context: might sleep
+ *
+ * Return: 0 on success or a negative error code on error.
+ */
+static int uvio_list_secrets(struct uvio_ioctl_cb *uv_ioctl)
+{
+	void __user *user_buf_arg = (void __user *)uv_ioctl->argument_addr;
+	struct uv_cb_guest_addr uvcb = {
+		.header.len = sizeof(uvcb),
+		.header.cmd = UVC_CMD_LIST_SECRETS,
+	};
+	void *secrets = NULL;
+	int ret = 0;
+
+	if (uv_ioctl->argument_len != UVIO_LIST_SECRETS_LEN)
+		return -EINVAL;
+
+	secrets = kvzalloc(UVIO_LIST_SECRETS_LEN, GFP_KERNEL);
+	if (!secrets)
+		return -ENOMEM;
+
+	uvcb.addr = (u64)secrets;
+	uv_call_sched(0, (u64)&uvcb);
+	uv_ioctl->uv_rc = uvcb.header.rc;
+	uv_ioctl->uv_rrc = uvcb.header.rrc;
+
+	if (copy_to_user(user_buf_arg, secrets, UVIO_LIST_SECRETS_LEN))
+		ret = -EFAULT;
+
+	kvfree(secrets);
+	return ret;
+}
+
+/** uvio_lock_secrets() - perform a Lock Secret Store UVC
+ * @uv_ioctl: ioctl control block
+ *
+ * uvio_lock_secrets() performs the Lock Secret Store Ultravisor Call. It
+ * performs the UV-call and copies the return codes to the ioctl control block.
+ * After this call was dispatched successfully every following Add Secret UVC
+ * and Lock Secrets UVC will fail with return code 0x102.
+ *
+ * The argument address and size must be 0.
+ *
+ * If the Lock Secrets UV facility is not present, UV will return invalid
+ * command rc. This won't be fenced in the driver and does not result in a
+ * negative return value.
+ *
+ * Context: might sleep
+ *
+ * Return: 0 on success or a negative error code on error.
+ */
+static int uvio_lock_secrets(struct uvio_ioctl_cb *ioctl)
+{
+	struct uv_cb_nodata uvcb = {
+		.header.len = sizeof(uvcb),
+		.header.cmd = UVC_CMD_LOCK_SECRETS,
+	};
+
+	if (ioctl->argument_addr || ioctl->argument_len)
+		return -EINVAL;
+
+	uv_call(0, (u64)&uvcb);
+	ioctl->uv_rc = uvcb.header.rc;
+	ioctl->uv_rrc = uvcb.header.rrc;
+
+	return 0;
+}
+
+static int uvio_copy_and_check_ioctl(struct uvio_ioctl_cb *ioctl, void __user *argp,
+				     unsigned long cmd)
+{
+	u8 nr = _IOC_NR(cmd);
+
+	if (_IOC_DIR(cmd) != (_IOC_READ | _IOC_WRITE))
+		return -ENOIOCTLCMD;
+	if (_IOC_TYPE(cmd) != UVIO_TYPE_UVC)
+		return -ENOIOCTLCMD;
+	if (nr >= UVIO_IOCTL_NUM_IOCTLS)
+		return -ENOIOCTLCMD;
+	if (_IOC_SIZE(cmd) != sizeof(*ioctl))
+		return -ENOIOCTLCMD;
 	if (copy_from_user(ioctl, argp, sizeof(*ioctl)))
 		return -EFAULT;
 	if (ioctl->flags != 0)
@@ -194,7 +396,7 @@ static int uvio_copy_and_check_ioctl(struct uvio_ioctl_cb *ioctl, void __user *a
 	if (memchr_inv(ioctl->reserved14, 0, sizeof(ioctl->reserved14)))
 		return -EINVAL;
 
-	return 0;
+	return nr;
 }
 
 /*
@@ -205,14 +407,28 @@ static long uvio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	void __user *argp = (void __user *)arg;
 	struct uvio_ioctl_cb uv_ioctl = { };
 	long ret;
+	int nr;
 
-	switch (cmd) {
-	case UVIO_IOCTL_ATT:
-		ret = uvio_copy_and_check_ioctl(&uv_ioctl, argp);
-		if (ret)
-			return ret;
+	nr = uvio_copy_and_check_ioctl(&uv_ioctl, argp, cmd);
+	if (nr < 0)
+		return nr;
+
+	switch (nr) {
+	case UVIO_IOCTL_UVDEV_INFO_NR:
+		ret = uvio_uvdev_info(&uv_ioctl);
+		break;
+	case UVIO_IOCTL_ATT_NR:
 		ret = uvio_attestation(&uv_ioctl);
 		break;
+	case UVIO_IOCTL_ADD_SECRET_NR:
+		ret = uvio_add_secret(&uv_ioctl);
+		break;
+	case UVIO_IOCTL_LIST_SECRETS_NR:
+		ret = uvio_list_secrets(&uv_ioctl);
+		break;
+	case UVIO_IOCTL_LOCK_SECRETS_NR:
+		ret = uvio_lock_secrets(&uv_ioctl);
+		break;
 	default:
 		ret = -ENOIOCTLCMD;
 		break;
@@ -245,6 +461,7 @@ static void __exit uvio_dev_exit(void)
 
 static int __init uvio_dev_init(void)
 {
+	set_supp_uv_cmds((unsigned long *)&uvdev_info.supp_uv_cmds);
 	return misc_register(&uvio_dev_miscdev);
 }
 
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 4761b768b773c4..74e0a44f921649 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -164,6 +164,7 @@ TEST_GEN_PROGS_s390x = s390x/memop
 TEST_GEN_PROGS_s390x += s390x/resets
 TEST_GEN_PROGS_s390x += s390x/sync_regs_test
 TEST_GEN_PROGS_s390x += s390x/tprot
+TEST_GEN_PROGS_s390x += s390x/cmma_test
 TEST_GEN_PROGS_s390x += demand_paging_test
 TEST_GEN_PROGS_s390x += dirty_log_test
 TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
diff --git a/tools/testing/selftests/kvm/s390x/cmma_test.c b/tools/testing/selftests/kvm/s390x/cmma_test.c
new file mode 100644
index 00000000000000..1d73e78e8fa772
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390x/cmma_test.c
@@ -0,0 +1,700 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test for s390x CMMA migration
+ *
+ * Copyright IBM Corp. 2023
+ *
+ * Authors:
+ *  Nico Boehr <nrb@linux.ibm.com>
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+
+#define MAIN_PAGE_COUNT 512
+
+#define TEST_DATA_PAGE_COUNT 512
+#define TEST_DATA_MEMSLOT 1
+#define TEST_DATA_START_GFN 4096
+
+#define TEST_DATA_TWO_PAGE_COUNT 256
+#define TEST_DATA_TWO_MEMSLOT 2
+#define TEST_DATA_TWO_START_GFN 8192
+
+static char cmma_value_buf[MAIN_PAGE_COUNT + TEST_DATA_PAGE_COUNT];
+
+/**
+ * Dirty CMMA attributes of exactly one page in the TEST_DATA memslot,
+ * so use_cmma goes on and the CMMA related ioctls do something.
+ */
+static void guest_do_one_essa(void)
+{
+	asm volatile(
+		/* load TEST_DATA_START_GFN into r1 */
+		"	llilf 1,%[start_gfn]\n"
+		/* calculate the address from the gfn */
+		"	sllg 1,1,12(0)\n"
+		/* set the first page in TEST_DATA memslot to STABLE */
+		"	.insn rrf,0xb9ab0000,2,1,1,0\n"
+		/* hypercall */
+		"	diag 0,0,0x501\n"
+		"0:	j 0b"
+		:
+		: [start_gfn] "L"(TEST_DATA_START_GFN)
+		: "r1", "r2", "memory", "cc"
+	);
+}
+
+/**
+ * Touch CMMA attributes of all pages in TEST_DATA memslot. Set them to stable
+ * state.
+ */
+static void guest_dirty_test_data(void)
+{
+	asm volatile(
+		/* r1 = TEST_DATA_START_GFN */
+		"	xgr 1,1\n"
+		"	llilf 1,%[start_gfn]\n"
+		/* r5 = TEST_DATA_PAGE_COUNT */
+		"	lghi 5,%[page_count]\n"
+		/* r5 += r1 */
+		"2:	agfr 5,1\n"
+		/* r2 = r1 << 12 */
+		"1:	sllg 2,1,12(0)\n"
+		/* essa(r4, r2, SET_STABLE) */
+		"	.insn rrf,0xb9ab0000,4,2,1,0\n"
+		/* i++ */
+		"	agfi 1,1\n"
+		/* if r1 < r5 goto 1 */
+		"	cgrjl 1,5,1b\n"
+		/* hypercall */
+		"	diag 0,0,0x501\n"
+		"0:	j 0b"
+		:
+		: [start_gfn] "L"(TEST_DATA_START_GFN),
+		  [page_count] "L"(TEST_DATA_PAGE_COUNT)
+		:
+			/* the counter in our loop over the pages */
+			"r1",
+			/* the calculated page physical address */
+			"r2",
+			/* ESSA output register */
+			"r4",
+			/* last page */
+			"r5",
+			"cc", "memory"
+	);
+}
+
+static struct kvm_vm *create_vm(void)
+{
+	return ____vm_create(VM_MODE_DEFAULT);
+}
+
+static void create_main_memslot(struct kvm_vm *vm)
+{
+	int i;
+
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, MAIN_PAGE_COUNT, 0);
+	/* set the array of memslots to zero like __vm_create does */
+	for (i = 0; i < NR_MEM_REGIONS; i++)
+		vm->memslots[i] = 0;
+}
+
+static void create_test_memslot(struct kvm_vm *vm)
+{
+	vm_userspace_mem_region_add(vm,
+				    VM_MEM_SRC_ANONYMOUS,
+				    TEST_DATA_START_GFN << vm->page_shift,
+				    TEST_DATA_MEMSLOT,
+				    TEST_DATA_PAGE_COUNT,
+				    0
+				   );
+	vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT;
+}
+
+static void create_memslots(struct kvm_vm *vm)
+{
+	/*
+	 * Our VM has the following memory layout:
+	 * +------+---------------------------+
+	 * | GFN  | Memslot                   |
+	 * +------+---------------------------+
+	 * | 0    |                           |
+	 * | ...  | MAIN (Code, Stack, ...)   |
+	 * | 511  |                           |
+	 * +------+---------------------------+
+	 * | 4096 |                           |
+	 * | ...  | TEST_DATA                 |
+	 * | 4607 |                           |
+	 * +------+---------------------------+
+	 */
+	create_main_memslot(vm);
+	create_test_memslot(vm);
+}
+
+static void finish_vm_setup(struct kvm_vm *vm)
+{
+	struct userspace_mem_region *slot0;
+
+	kvm_vm_elf_load(vm, program_invocation_name);
+
+	slot0 = memslot2region(vm, 0);
+	ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size);
+
+	kvm_arch_vm_post_create(vm);
+}
+
+static struct kvm_vm *create_vm_two_memslots(void)
+{
+	struct kvm_vm *vm;
+
+	vm = create_vm();
+
+	create_memslots(vm);
+
+	finish_vm_setup(vm);
+
+	return vm;
+}
+
+static void enable_cmma(struct kvm_vm *vm)
+{
+	int r;
+
+	r = __kvm_device_attr_set(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA, NULL);
+	TEST_ASSERT(!r, "enabling cmma failed r=%d errno=%d", r, errno);
+}
+
+static void enable_dirty_tracking(struct kvm_vm *vm)
+{
+	vm_mem_region_set_flags(vm, 0, KVM_MEM_LOG_DIRTY_PAGES);
+	vm_mem_region_set_flags(vm, TEST_DATA_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES);
+}
+
+static int __enable_migration_mode(struct kvm_vm *vm)
+{
+	return __kvm_device_attr_set(vm->fd,
+				     KVM_S390_VM_MIGRATION,
+				     KVM_S390_VM_MIGRATION_START,
+				     NULL
+				    );
+}
+
+static void enable_migration_mode(struct kvm_vm *vm)
+{
+	int r = __enable_migration_mode(vm);
+
+	TEST_ASSERT(!r, "enabling migration mode failed r=%d errno=%d", r, errno);
+}
+
+static bool is_migration_mode_on(struct kvm_vm *vm)
+{
+	u64 out;
+	int r;
+
+	r = __kvm_device_attr_get(vm->fd,
+				  KVM_S390_VM_MIGRATION,
+				  KVM_S390_VM_MIGRATION_STATUS,
+				  &out
+				 );
+	TEST_ASSERT(!r, "getting migration mode status failed r=%d errno=%d", r, errno);
+	return out;
+}
+
+static int vm_get_cmma_bits(struct kvm_vm *vm, u64 flags, int *errno_out)
+{
+	struct kvm_s390_cmma_log args;
+	int rc;
+
+	errno = 0;
+
+	args = (struct kvm_s390_cmma_log){
+		.start_gfn = 0,
+		.count = sizeof(cmma_value_buf),
+		.flags = flags,
+		.values = (__u64)&cmma_value_buf[0]
+	};
+	rc = __vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+
+	*errno_out = errno;
+	return rc;
+}
+
+static void test_get_cmma_basic(void)
+{
+	struct kvm_vm *vm = create_vm_two_memslots();
+	struct kvm_vcpu *vcpu;
+	int rc, errno_out;
+
+	/* GET_CMMA_BITS without CMMA enabled should fail */
+	rc = vm_get_cmma_bits(vm, 0, &errno_out);
+	ASSERT_EQ(rc, -1);
+	ASSERT_EQ(errno_out, ENXIO);
+
+	enable_cmma(vm);
+	vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
+
+	vcpu_run(vcpu);
+
+	/* GET_CMMA_BITS without migration mode and without peeking should fail */
+	rc = vm_get_cmma_bits(vm, 0, &errno_out);
+	ASSERT_EQ(rc, -1);
+	ASSERT_EQ(errno_out, EINVAL);
+
+	/* GET_CMMA_BITS without migration mode and with peeking should work */
+	rc = vm_get_cmma_bits(vm, KVM_S390_CMMA_PEEK, &errno_out);
+	ASSERT_EQ(rc, 0);
+	ASSERT_EQ(errno_out, 0);
+
+	enable_dirty_tracking(vm);
+	enable_migration_mode(vm);
+
+	/* GET_CMMA_BITS with invalid flags */
+	rc = vm_get_cmma_bits(vm, 0xfeedc0fe, &errno_out);
+	ASSERT_EQ(rc, -1);
+	ASSERT_EQ(errno_out, EINVAL);
+
+	kvm_vm_free(vm);
+}
+
+static void assert_exit_was_hypercall(struct kvm_vcpu *vcpu)
+{
+	ASSERT_EQ(vcpu->run->exit_reason, 13);
+	ASSERT_EQ(vcpu->run->s390_sieic.icptcode, 4);
+	ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x8300);
+	ASSERT_EQ(vcpu->run->s390_sieic.ipb, 0x5010000);
+}
+
+static void test_migration_mode(void)
+{
+	struct kvm_vm *vm = create_vm();
+	struct kvm_vcpu *vcpu;
+	u64 orig_psw;
+	int rc;
+
+	/* enabling migration mode on a VM without memory should fail */
+	rc = __enable_migration_mode(vm);
+	ASSERT_EQ(rc, -1);
+	ASSERT_EQ(errno, EINVAL);
+	TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off");
+	errno = 0;
+
+	create_memslots(vm);
+	finish_vm_setup(vm);
+
+	enable_cmma(vm);
+	vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
+	orig_psw = vcpu->run->psw_addr;
+
+	/*
+	 * Execute one essa instruction in the guest. Otherwise the guest will
+	 * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
+	 */
+	vcpu_run(vcpu);
+	assert_exit_was_hypercall(vcpu);
+
+	/* migration mode when memslots have dirty tracking off should fail */
+	rc = __enable_migration_mode(vm);
+	ASSERT_EQ(rc, -1);
+	ASSERT_EQ(errno, EINVAL);
+	TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off");
+	errno = 0;
+
+	/* enable dirty tracking */
+	enable_dirty_tracking(vm);
+
+	/* enabling migration mode should work now */
+	rc = __enable_migration_mode(vm);
+	ASSERT_EQ(rc, 0);
+	TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+	errno = 0;
+
+	/* execute another ESSA instruction to see this goes fine */
+	vcpu->run->psw_addr = orig_psw;
+	vcpu_run(vcpu);
+	assert_exit_was_hypercall(vcpu);
+
+	/*
+	 * With migration mode on, create a new memslot with dirty tracking off.
+	 * This should turn off migration mode.
+	 */
+	TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+	vm_userspace_mem_region_add(vm,
+				    VM_MEM_SRC_ANONYMOUS,
+				    TEST_DATA_TWO_START_GFN << vm->page_shift,
+				    TEST_DATA_TWO_MEMSLOT,
+				    TEST_DATA_TWO_PAGE_COUNT,
+				    0
+				   );
+	TEST_ASSERT(!is_migration_mode_on(vm),
+		    "creating memslot without dirty tracking turns off migration mode"
+		   );
+
+	/* ESSA instructions should still execute fine */
+	vcpu->run->psw_addr = orig_psw;
+	vcpu_run(vcpu);
+	assert_exit_was_hypercall(vcpu);
+
+	/*
+	 * Turn on dirty tracking on the new memslot.
+	 * It should be possible to turn migration mode back on again.
+	 */
+	vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES);
+	rc = __enable_migration_mode(vm);
+	ASSERT_EQ(rc, 0);
+	TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+	errno = 0;
+
+	/*
+	 * Turn off dirty tracking again, this time with just a flag change.
+	 * Again, migration mode should turn off.
+	 */
+	TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+	vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, 0);
+	TEST_ASSERT(!is_migration_mode_on(vm),
+		    "disabling dirty tracking should turn off migration mode"
+		   );
+
+	/* ESSA instructions should still execute fine */
+	vcpu->run->psw_addr = orig_psw;
+	vcpu_run(vcpu);
+	assert_exit_was_hypercall(vcpu);
+
+	kvm_vm_free(vm);
+}
+
+/**
+ * Given a VM with the MAIN and TEST_DATA memslot, assert that both slots have
+ * CMMA attributes of all pages in both memslots and nothing more dirty.
+ * This has the useful side effect of ensuring nothing is CMMA dirty after this
+ * function.
+ */
+static void assert_all_slots_cmma_dirty(struct kvm_vm *vm)
+{
+	struct kvm_s390_cmma_log args;
+
+	/*
+	 * First iteration - everything should be dirty.
+	 * Start at the main memslot...
+	 */
+	args = (struct kvm_s390_cmma_log){
+		.start_gfn = 0,
+		.count = sizeof(cmma_value_buf),
+		.flags = 0,
+		.values = (__u64)&cmma_value_buf[0]
+	};
+	memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+	vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+	ASSERT_EQ(args.count, MAIN_PAGE_COUNT);
+	ASSERT_EQ(args.remaining, TEST_DATA_PAGE_COUNT);
+	ASSERT_EQ(args.start_gfn, 0);
+
+	/* ...and then - after a hole - the TEST_DATA memslot should follow */
+	args = (struct kvm_s390_cmma_log){
+		.start_gfn = MAIN_PAGE_COUNT,
+		.count = sizeof(cmma_value_buf),
+		.flags = 0,
+		.values = (__u64)&cmma_value_buf[0]
+	};
+	memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+	vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+	ASSERT_EQ(args.count, TEST_DATA_PAGE_COUNT);
+	ASSERT_EQ(args.start_gfn, TEST_DATA_START_GFN);
+	ASSERT_EQ(args.remaining, 0);
+
+	/* ...and nothing else should be there */
+	args = (struct kvm_s390_cmma_log){
+		.start_gfn = TEST_DATA_START_GFN + TEST_DATA_PAGE_COUNT,
+		.count = sizeof(cmma_value_buf),
+		.flags = 0,
+		.values = (__u64)&cmma_value_buf[0]
+	};
+	memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+	vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+	ASSERT_EQ(args.count, 0);
+	ASSERT_EQ(args.start_gfn, 0);
+	ASSERT_EQ(args.remaining, 0);
+}
+
+/**
+ * Given a VM, assert no pages are CMMA dirty.
+ */
+static void assert_no_pages_cmma_dirty(struct kvm_vm *vm)
+{
+	struct kvm_s390_cmma_log args;
+
+	/* If we start from GFN 0 again, nothing should be dirty. */
+	args = (struct kvm_s390_cmma_log){
+		.start_gfn = 0,
+		.count = sizeof(cmma_value_buf),
+		.flags = 0,
+		.values = (__u64)&cmma_value_buf[0]
+	};
+	memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+	vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+	if (args.count || args.remaining || args.start_gfn)
+		TEST_FAIL("pages are still dirty start_gfn=0x%llx count=%u remaining=%llu",
+			  args.start_gfn,
+			  args.count,
+			  args.remaining
+			 );
+}
+
+static void test_get_inital_dirty(void)
+{
+	struct kvm_vm *vm = create_vm_two_memslots();
+	struct kvm_vcpu *vcpu;
+
+	enable_cmma(vm);
+	vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
+
+	/*
+	 * Execute one essa instruction in the guest. Otherwise the guest will
+	 * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
+	 */
+	vcpu_run(vcpu);
+	assert_exit_was_hypercall(vcpu);
+
+	enable_dirty_tracking(vm);
+	enable_migration_mode(vm);
+
+	assert_all_slots_cmma_dirty(vm);
+
+	/* Start from the beginning again and make sure nothing else is dirty */
+	assert_no_pages_cmma_dirty(vm);
+
+	kvm_vm_free(vm);
+}
+
+static void query_cmma_range(struct kvm_vm *vm,
+			     u64 start_gfn, u64 gfn_count,
+			     struct kvm_s390_cmma_log *res_out)
+{
+	*res_out = (struct kvm_s390_cmma_log){
+		.start_gfn = start_gfn,
+		.count = gfn_count,
+		.flags = 0,
+		.values = (__u64)&cmma_value_buf[0]
+	};
+	memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+	vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, res_out);
+}
+
+/**
+ * Assert the given cmma_log struct that was executed by query_cmma_range()
+ * indicates the first dirty gfn is at first_dirty_gfn and contains exactly
+ * dirty_gfn_count CMMA values.
+ */
+static void assert_cmma_dirty(u64 first_dirty_gfn,
+			      u64 dirty_gfn_count,
+			      const struct kvm_s390_cmma_log *res)
+{
+	ASSERT_EQ(res->start_gfn, first_dirty_gfn);
+	ASSERT_EQ(res->count, dirty_gfn_count);
+	for (size_t i = 0; i < dirty_gfn_count; i++)
+		ASSERT_EQ(cmma_value_buf[0], 0x0); /* stable state */
+	ASSERT_EQ(cmma_value_buf[dirty_gfn_count], 0xff); /* not touched */
+}
+
+static void test_get_skip_holes(void)
+{
+	size_t gfn_offset;
+	struct kvm_vm *vm = create_vm_two_memslots();
+	struct kvm_s390_cmma_log log;
+	struct kvm_vcpu *vcpu;
+	u64 orig_psw;
+
+	enable_cmma(vm);
+	vcpu = vm_vcpu_add(vm, 1, guest_dirty_test_data);
+
+	orig_psw = vcpu->run->psw_addr;
+
+	/*
+	 * Execute some essa instructions in the guest. Otherwise the guest will
+	 * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
+	 */
+	vcpu_run(vcpu);
+	assert_exit_was_hypercall(vcpu);
+
+	enable_dirty_tracking(vm);
+	enable_migration_mode(vm);
+
+	/* un-dirty all pages */
+	assert_all_slots_cmma_dirty(vm);
+
+	/* Then, dirty just the TEST_DATA memslot */
+	vcpu->run->psw_addr = orig_psw;
+	vcpu_run(vcpu);
+
+	gfn_offset = TEST_DATA_START_GFN;
+	/**
+	 * Query CMMA attributes of one page, starting at page 0. Since the
+	 * main memslot was not touched by the VM, this should yield the first
+	 * page of the TEST_DATA memslot.
+	 * The dirty bitmap should now look like this:
+	 * 0: not dirty
+	 * [0x1, 0x200): dirty
+	 */
+	query_cmma_range(vm, 0, 1, &log);
+	assert_cmma_dirty(gfn_offset, 1, &log);
+	gfn_offset++;
+
+	/**
+	 * Query CMMA attributes of 32 (0x20) pages past the end of the TEST_DATA
+	 * memslot. This should wrap back to the beginning of the TEST_DATA
+	 * memslot, page 1.
+	 * The dirty bitmap should now look like this:
+	 * [0, 0x21): not dirty
+	 * [0x21, 0x200): dirty
+	 */
+	query_cmma_range(vm, TEST_DATA_START_GFN + TEST_DATA_PAGE_COUNT, 0x20, &log);
+	assert_cmma_dirty(gfn_offset, 0x20, &log);
+	gfn_offset += 0x20;
+
+	/* Skip 32 pages */
+	gfn_offset += 0x20;
+
+	/**
+	 * After skipping 32 pages, query the next 32 (0x20) pages.
+	 * The dirty bitmap should now look like this:
+	 * [0, 0x21): not dirty
+	 * [0x21, 0x41): dirty
+	 * [0x41, 0x61): not dirty
+	 * [0x61, 0x200): dirty
+	 */
+	query_cmma_range(vm, gfn_offset, 0x20, &log);
+	assert_cmma_dirty(gfn_offset, 0x20, &log);
+	gfn_offset += 0x20;
+
+	/**
+	 * Query 1 page from the beginning of the TEST_DATA memslot. This should
+	 * yield page 0x21.
+	 * The dirty bitmap should now look like this:
+	 * [0, 0x22): not dirty
+	 * [0x22, 0x41): dirty
+	 * [0x41, 0x61): not dirty
+	 * [0x61, 0x200): dirty
+	 */
+	query_cmma_range(vm, TEST_DATA_START_GFN, 1, &log);
+	assert_cmma_dirty(TEST_DATA_START_GFN + 0x21, 1, &log);
+	gfn_offset++;
+
+	/**
+	 * Query 15 (0xF) pages from page 0x23 in TEST_DATA memslot.
+	 * This should yield pages [0x23, 0x33).
+	 * The dirty bitmap should now look like this:
+	 * [0, 0x22): not dirty
+	 * 0x22: dirty
+	 * [0x23, 0x33): not dirty
+	 * [0x33, 0x41): dirty
+	 * [0x41, 0x61): not dirty
+	 * [0x61, 0x200): dirty
+	 */
+	gfn_offset = TEST_DATA_START_GFN + 0x23;
+	query_cmma_range(vm, gfn_offset, 15, &log);
+	assert_cmma_dirty(gfn_offset, 15, &log);
+
+	/**
+	 * Query 17 (0x11) pages from page 0x22 in TEST_DATA memslot.
+	 * This should yield page [0x22, 0x33)
+	 * The dirty bitmap should now look like this:
+	 * [0, 0x33): not dirty
+	 * [0x33, 0x41): dirty
+	 * [0x41, 0x61): not dirty
+	 * [0x61, 0x200): dirty
+	 */
+	gfn_offset = TEST_DATA_START_GFN + 0x22;
+	query_cmma_range(vm, gfn_offset, 17, &log);
+	assert_cmma_dirty(gfn_offset, 17, &log);
+
+	/**
+	 * Query 25 (0x19) pages from page 0x40 in TEST_DATA memslot.
+	 * This should yield page 0x40 and nothing more, since there are more
+	 * than 16 non-dirty pages after page 0x40.
+	 * The dirty bitmap should now look like this:
+	 * [0, 0x33): not dirty
+	 * [0x33, 0x40): dirty
+	 * [0x40, 0x61): not dirty
+	 * [0x61, 0x200): dirty
+	 */
+	gfn_offset = TEST_DATA_START_GFN + 0x40;
+	query_cmma_range(vm, gfn_offset, 25, &log);
+	assert_cmma_dirty(gfn_offset, 1, &log);
+
+	/**
+	 * Query pages [0x33, 0x40).
+	 * The dirty bitmap should now look like this:
+	 * [0, 0x61): not dirty
+	 * [0x61, 0x200): dirty
+	 */
+	gfn_offset = TEST_DATA_START_GFN + 0x33;
+	query_cmma_range(vm, gfn_offset, 0x40 - 0x33, &log);
+	assert_cmma_dirty(gfn_offset, 0x40 - 0x33, &log);
+
+	/**
+	 * Query the remaining pages [0x61, 0x200).
+	 */
+	gfn_offset = TEST_DATA_START_GFN;
+	query_cmma_range(vm, gfn_offset, TEST_DATA_PAGE_COUNT - 0x61, &log);
+	assert_cmma_dirty(TEST_DATA_START_GFN + 0x61, TEST_DATA_PAGE_COUNT - 0x61, &log);
+
+	assert_no_pages_cmma_dirty(vm);
+}
+
+struct testdef {
+	const char *name;
+	void (*test)(void);
+} testlist[] = {
+	{ "migration mode and dirty tracking", test_migration_mode },
+	{ "GET_CMMA_BITS: basic calls", test_get_cmma_basic },
+	{ "GET_CMMA_BITS: all pages are dirty initally", test_get_inital_dirty },
+	{ "GET_CMMA_BITS: holes are skipped", test_get_skip_holes },
+};
+
+/**
+ * The kernel may support CMMA, but the machine may not (i.e. if running as
+ * guest-3).
+ *
+ * In this case, the CMMA capabilities are all there, but the CMMA-related
+ * ioctls fail. To find out whether the machine supports CMMA, create a
+ * temporary VM and then query the CMMA feature of the VM.
+ */
+static int machine_has_cmma(void)
+{
+	struct kvm_vm *vm = create_vm();
+	int r;
+
+	r = !__kvm_has_device_attr(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA);
+	kvm_vm_free(vm);
+
+	return r;
+}
+
+int main(int argc, char *argv[])
+{
+	int idx;
+
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_SYNC_REGS));
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_CMMA_MIGRATION));
+	TEST_REQUIRE(machine_has_cmma());
+
+	ksft_print_header();
+
+	ksft_set_plan(ARRAY_SIZE(testlist));
+
+	for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+		testlist[idx].test();
+		ksft_test_result_pass("%s\n", testlist[idx].name);
+	}
+
+	ksft_finished();	/* Print results and exit() accordingly */
+}
author	Paolo Bonzini <pbonzini@redhat.com>	2023-07-01 07:00:11 -0400
committer	Paolo Bonzini <pbonzini@redhat.com>	2023-07-01 07:00:11 -0400
commit	a443e2609c01479c7c0c3367059d7b9f2e8a6697 (patch)
tree	90c9c2e6acfdf539523301f239fbac69dff4ed0f
parent	6995e2de6891c724bfeb2db33d7b87775f913ad1 (diff)
parent	db54dfc9f71cd2df7afd1e88535ef6099cb0333e (diff)
download	net-next-a443e2609c01479c7c0c3367059d7b9f2e8a6697.tar.gz