aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2023-07-01 07:00:11 -0400
committerPaolo Bonzini <pbonzini@redhat.com>2023-07-01 07:00:11 -0400
commita443e2609c01479c7c0c3367059d7b9f2e8a6697 (patch)
tree90c9c2e6acfdf539523301f239fbac69dff4ed0f
parent6995e2de6891c724bfeb2db33d7b87775f913ad1 (diff)
parentdb54dfc9f71cd2df7afd1e88535ef6099cb0333e (diff)
downloadnet-next-a443e2609c01479c7c0c3367059d7b9f2e8a6697.tar.gz
Merge tag 'kvm-s390-next-6.5-1' of https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD
* New uvdevice secret API * New CMM selftest * cmm fix * diag 9c racy access of target cpu fix
-rw-r--r--arch/s390/boot/uv.c4
-rw-r--r--arch/s390/include/asm/uv.h32
-rw-r--r--arch/s390/include/uapi/asm/uvdevice.h53
-rw-r--r--arch/s390/kernel/uv.c108
-rw-r--r--arch/s390/kvm/diag.c8
-rw-r--r--arch/s390/kvm/kvm-s390.c4
-rw-r--r--arch/s390/kvm/vsie.c6
-rw-r--r--drivers/s390/char/Kconfig2
-rw-r--r--drivers/s390/char/uvdevice.c231
-rw-r--r--tools/testing/selftests/kvm/Makefile1
-rw-r--r--tools/testing/selftests/kvm/s390x/cmma_test.c700
11 files changed, 1100 insertions, 49 deletions
diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c
index 0a077c0a205630..1e66d2cbb0965a 100644
--- a/arch/s390/boot/uv.c
+++ b/arch/s390/boot/uv.c
@@ -47,6 +47,10 @@ void uv_query_info(void)
uv_info.conf_dump_finalize_len = uvcb.conf_dump_finalize_len;
uv_info.supp_att_req_hdr_ver = uvcb.supp_att_req_hdr_ver;
uv_info.supp_att_pflags = uvcb.supp_att_pflags;
+ uv_info.supp_add_secret_req_ver = uvcb.supp_add_secret_req_ver;
+ uv_info.supp_add_secret_pcf = uvcb.supp_add_secret_pcf;
+ uv_info.supp_secret_types = uvcb.supp_secret_types;
+ uv_info.max_secrets = uvcb.max_secrets;
}
#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index 28a9ad57b6f16c..d6bb2f4f78d1f4 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -58,6 +58,9 @@
#define UVC_CMD_SET_SHARED_ACCESS 0x1000
#define UVC_CMD_REMOVE_SHARED_ACCESS 0x1001
#define UVC_CMD_RETR_ATTEST 0x1020
+#define UVC_CMD_ADD_SECRET 0x1031
+#define UVC_CMD_LIST_SECRETS 0x1033
+#define UVC_CMD_LOCK_SECRETS 0x1034
/* Bits in installed uv calls */
enum uv_cmds_inst {
@@ -88,6 +91,9 @@ enum uv_cmds_inst {
BIT_UVC_CMD_DUMP_CPU = 26,
BIT_UVC_CMD_DUMP_COMPLETE = 27,
BIT_UVC_CMD_RETR_ATTEST = 28,
+ BIT_UVC_CMD_ADD_SECRET = 29,
+ BIT_UVC_CMD_LIST_SECRETS = 30,
+ BIT_UVC_CMD_LOCK_SECRETS = 31,
};
enum uv_feat_ind {
@@ -117,7 +123,7 @@ struct uv_cb_qui {
u32 reserved70[3]; /* 0x0070 */
u32 max_num_sec_conf; /* 0x007c */
u64 max_guest_stor_addr; /* 0x0080 */
- u8 reserved88[158 - 136]; /* 0x0088 */
+ u8 reserved88[0x9e - 0x88]; /* 0x0088 */
u16 max_guest_cpu_id; /* 0x009e */
u64 uv_feature_indications; /* 0x00a0 */
u64 reserveda8; /* 0x00a8 */
@@ -129,7 +135,12 @@ struct uv_cb_qui {
u64 reservedd8; /* 0x00d8 */
u64 supp_att_req_hdr_ver; /* 0x00e0 */
u64 supp_att_pflags; /* 0x00e8 */
- u8 reservedf0[256 - 240]; /* 0x00f0 */
+ u64 reservedf0; /* 0x00f0 */
+ u64 supp_add_secret_req_ver; /* 0x00f8 */
+ u64 supp_add_secret_pcf; /* 0x0100 */
+ u64 supp_secret_types; /* 0x0180 */
+ u16 max_secrets; /* 0x0110 */
+ u8 reserved112[0x120 - 0x112]; /* 0x0112 */
} __packed __aligned(8);
/* Initialize Ultravisor */
@@ -292,6 +303,19 @@ struct uv_cb_dump_complete {
u64 reserved30[5];
} __packed __aligned(8);
+/*
+ * A common UV call struct for pv guests that contains a single address
+ * Examples:
+ * Add Secret
+ * List Secrets
+ */
+struct uv_cb_guest_addr {
+ struct uv_cb_header header;
+ u64 reserved08[3];
+ u64 addr;
+ u64 reserved28[4];
+} __packed __aligned(8);
+
static inline int __uv_call(unsigned long r1, unsigned long r2)
{
int cc;
@@ -365,6 +389,10 @@ struct uv_info {
unsigned long conf_dump_finalize_len;
unsigned long supp_att_req_hdr_ver;
unsigned long supp_att_pflags;
+ unsigned long supp_add_secret_req_ver;
+ unsigned long supp_add_secret_pcf;
+ unsigned long supp_secret_types;
+ unsigned short max_secrets;
};
extern struct uv_info uv_info;
diff --git a/arch/s390/include/uapi/asm/uvdevice.h b/arch/s390/include/uapi/asm/uvdevice.h
index 10a5ac918e02d9..b9c2f14a6af31a 100644
--- a/arch/s390/include/uapi/asm/uvdevice.h
+++ b/arch/s390/include/uapi/asm/uvdevice.h
@@ -32,6 +32,33 @@ struct uvio_attest {
__u16 reserved136; /* 0x0136 */
};
+/**
+ * uvio_uvdev_info - Information of supported functions
+ * @supp_uvio_cmds - supported IOCTLs by this device
+ * @supp_uv_cmds - supported UVCs corresponding to the IOCTL
+ *
+ * UVIO request to get information about supported request types by this
+ * uvdevice and the Ultravisor. Everything is output. Bits are in LSB0
+ * ordering. If the bit is set in both, @supp_uvio_cmds and @supp_uv_cmds, the
+ * uvdevice and the Ultravisor support that call.
+ *
+ * Note that bit 0 (UVIO_IOCTL_UVDEV_INFO_NR) is always zero for `supp_uv_cmds`
+ * as there is no corresponding UV-call.
+ */
+struct uvio_uvdev_info {
+ /*
+ * If bit `n` is set, this device supports the IOCTL with nr `n`.
+ */
+ __u64 supp_uvio_cmds;
+ /*
+ * If bit `n` is set, the Ultravisor(UV) supports the UV-call
+ * corresponding to the IOCTL with nr `n` in the calling contextx (host
+ * or guest). The value is only valid if the corresponding bit in
+ * @supp_uvio_cmds is set as well.
+ */
+ __u64 supp_uv_cmds;
+};
+
/*
* The following max values define an upper length for the IOCTL in/out buffers.
* However, they do not represent the maximum the Ultravisor allows which is
@@ -42,10 +69,34 @@ struct uvio_attest {
#define UVIO_ATT_ARCB_MAX_LEN 0x100000
#define UVIO_ATT_MEASUREMENT_MAX_LEN 0x8000
#define UVIO_ATT_ADDITIONAL_MAX_LEN 0x8000
+#define UVIO_ADD_SECRET_MAX_LEN 0x100000
+#define UVIO_LIST_SECRETS_LEN 0x1000
#define UVIO_DEVICE_NAME "uv"
#define UVIO_TYPE_UVC 'u'
-#define UVIO_IOCTL_ATT _IOWR(UVIO_TYPE_UVC, 0x01, struct uvio_ioctl_cb)
+enum UVIO_IOCTL_NR {
+ UVIO_IOCTL_UVDEV_INFO_NR = 0x00,
+ UVIO_IOCTL_ATT_NR,
+ UVIO_IOCTL_ADD_SECRET_NR,
+ UVIO_IOCTL_LIST_SECRETS_NR,
+ UVIO_IOCTL_LOCK_SECRETS_NR,
+ /* must be the last entry */
+ UVIO_IOCTL_NUM_IOCTLS
+};
+
+#define UVIO_IOCTL(nr) _IOWR(UVIO_TYPE_UVC, nr, struct uvio_ioctl_cb)
+#define UVIO_IOCTL_UVDEV_INFO UVIO_IOCTL(UVIO_IOCTL_UVDEV_INFO_NR)
+#define UVIO_IOCTL_ATT UVIO_IOCTL(UVIO_IOCTL_ATT_NR)
+#define UVIO_IOCTL_ADD_SECRET UVIO_IOCTL(UVIO_IOCTL_ADD_SECRET_NR)
+#define UVIO_IOCTL_LIST_SECRETS UVIO_IOCTL(UVIO_IOCTL_LIST_SECRETS_NR)
+#define UVIO_IOCTL_LOCK_SECRETS UVIO_IOCTL(UVIO_IOCTL_LOCK_SECRETS_NR)
+
+#define UVIO_SUPP_CALL(nr) (1ULL << (nr))
+#define UVIO_SUPP_UDEV_INFO UVIO_SUPP_CALL(UVIO_IOCTL_UDEV_INFO_NR)
+#define UVIO_SUPP_ATT UVIO_SUPP_CALL(UVIO_IOCTL_ATT_NR)
+#define UVIO_SUPP_ADD_SECRET UVIO_SUPP_CALL(UVIO_IOCTL_ADD_SECRET_NR)
+#define UVIO_SUPP_LIST_SECRETS UVIO_SUPP_CALL(UVIO_IOCTL_LIST_SECRETS_NR)
+#define UVIO_SUPP_LOCK_SECRETS UVIO_SUPP_CALL(UVIO_IOCTL_LOCK_SECRETS_NR)
#endif /* __S390_ASM_UVDEVICE_H */
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index cb2ee06df286cd..273a0281a18975 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -23,12 +23,20 @@
int __bootdata_preserved(prot_virt_guest);
#endif
+/*
+ * uv_info contains both host and guest information but it's currently only
+ * expected to be used within modules if it's the KVM module or for
+ * any PV guest module.
+ *
+ * The kernel itself will write these values once in uv_query_info()
+ * and then make some of them readable via a sysfs interface.
+ */
struct uv_info __bootdata_preserved(uv_info);
+EXPORT_SYMBOL(uv_info);
#if IS_ENABLED(CONFIG_KVM)
int __bootdata_preserved(prot_virt_host);
EXPORT_SYMBOL(prot_virt_host);
-EXPORT_SYMBOL(uv_info);
static int __init uv_init(phys_addr_t stor_base, unsigned long stor_len)
{
@@ -460,13 +468,13 @@ EXPORT_SYMBOL_GPL(arch_make_page_accessible);
#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM)
static ssize_t uv_query_facilities(struct kobject *kobj,
- struct kobj_attribute *attr, char *page)
+ struct kobj_attribute *attr, char *buf)
{
- return scnprintf(page, PAGE_SIZE, "%lx\n%lx\n%lx\n%lx\n",
- uv_info.inst_calls_list[0],
- uv_info.inst_calls_list[1],
- uv_info.inst_calls_list[2],
- uv_info.inst_calls_list[3]);
+ return sysfs_emit(buf, "%lx\n%lx\n%lx\n%lx\n",
+ uv_info.inst_calls_list[0],
+ uv_info.inst_calls_list[1],
+ uv_info.inst_calls_list[2],
+ uv_info.inst_calls_list[3]);
}
static struct kobj_attribute uv_query_facilities_attr =
@@ -491,30 +499,27 @@ static struct kobj_attribute uv_query_supp_se_hdr_pcf_attr =
__ATTR(supp_se_hdr_pcf, 0444, uv_query_supp_se_hdr_pcf, NULL);
static ssize_t uv_query_dump_cpu_len(struct kobject *kobj,
- struct kobj_attribute *attr, char *page)
+ struct kobj_attribute *attr, char *buf)
{
- return scnprintf(page, PAGE_SIZE, "%lx\n",
- uv_info.guest_cpu_stor_len);
+ return sysfs_emit(buf, "%lx\n", uv_info.guest_cpu_stor_len);
}
static struct kobj_attribute uv_query_dump_cpu_len_attr =
__ATTR(uv_query_dump_cpu_len, 0444, uv_query_dump_cpu_len, NULL);
static ssize_t uv_query_dump_storage_state_len(struct kobject *kobj,
- struct kobj_attribute *attr, char *page)
+ struct kobj_attribute *attr, char *buf)
{
- return scnprintf(page, PAGE_SIZE, "%lx\n",
- uv_info.conf_dump_storage_state_len);
+ return sysfs_emit(buf, "%lx\n", uv_info.conf_dump_storage_state_len);
}
static struct kobj_attribute uv_query_dump_storage_state_len_attr =
__ATTR(dump_storage_state_len, 0444, uv_query_dump_storage_state_len, NULL);
static ssize_t uv_query_dump_finalize_len(struct kobject *kobj,
- struct kobj_attribute *attr, char *page)
+ struct kobj_attribute *attr, char *buf)
{
- return scnprintf(page, PAGE_SIZE, "%lx\n",
- uv_info.conf_dump_finalize_len);
+ return sysfs_emit(buf, "%lx\n", uv_info.conf_dump_finalize_len);
}
static struct kobj_attribute uv_query_dump_finalize_len_attr =
@@ -530,53 +535,86 @@ static struct kobj_attribute uv_query_feature_indications_attr =
__ATTR(feature_indications, 0444, uv_query_feature_indications, NULL);
static ssize_t uv_query_max_guest_cpus(struct kobject *kobj,
- struct kobj_attribute *attr, char *page)
+ struct kobj_attribute *attr, char *buf)
{
- return scnprintf(page, PAGE_SIZE, "%d\n",
- uv_info.max_guest_cpu_id + 1);
+ return sysfs_emit(buf, "%d\n", uv_info.max_guest_cpu_id + 1);
}
static struct kobj_attribute uv_query_max_guest_cpus_attr =
__ATTR(max_cpus, 0444, uv_query_max_guest_cpus, NULL);
static ssize_t uv_query_max_guest_vms(struct kobject *kobj,
- struct kobj_attribute *attr, char *page)
+ struct kobj_attribute *attr, char *buf)
{
- return scnprintf(page, PAGE_SIZE, "%d\n",
- uv_info.max_num_sec_conf);
+ return sysfs_emit(buf, "%d\n", uv_info.max_num_sec_conf);
}
static struct kobj_attribute uv_query_max_guest_vms_attr =
__ATTR(max_guests, 0444, uv_query_max_guest_vms, NULL);
static ssize_t uv_query_max_guest_addr(struct kobject *kobj,
- struct kobj_attribute *attr, char *page)
+ struct kobj_attribute *attr, char *buf)
{
- return scnprintf(page, PAGE_SIZE, "%lx\n",
- uv_info.max_sec_stor_addr);
+ return sysfs_emit(buf, "%lx\n", uv_info.max_sec_stor_addr);
}
static struct kobj_attribute uv_query_max_guest_addr_attr =
__ATTR(max_address, 0444, uv_query_max_guest_addr, NULL);
static ssize_t uv_query_supp_att_req_hdr_ver(struct kobject *kobj,
- struct kobj_attribute *attr, char *page)
+ struct kobj_attribute *attr, char *buf)
{
- return scnprintf(page, PAGE_SIZE, "%lx\n", uv_info.supp_att_req_hdr_ver);
+ return sysfs_emit(buf, "%lx\n", uv_info.supp_att_req_hdr_ver);
}
static struct kobj_attribute uv_query_supp_att_req_hdr_ver_attr =
__ATTR(supp_att_req_hdr_ver, 0444, uv_query_supp_att_req_hdr_ver, NULL);
static ssize_t uv_query_supp_att_pflags(struct kobject *kobj,
- struct kobj_attribute *attr, char *page)
+ struct kobj_attribute *attr, char *buf)
{
- return scnprintf(page, PAGE_SIZE, "%lx\n", uv_info.supp_att_pflags);
+ return sysfs_emit(buf, "%lx\n", uv_info.supp_att_pflags);
}
static struct kobj_attribute uv_query_supp_att_pflags_attr =
__ATTR(supp_att_pflags, 0444, uv_query_supp_att_pflags, NULL);
+static ssize_t uv_query_supp_add_secret_req_ver(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lx\n", uv_info.supp_add_secret_req_ver);
+}
+
+static struct kobj_attribute uv_query_supp_add_secret_req_ver_attr =
+ __ATTR(supp_add_secret_req_ver, 0444, uv_query_supp_add_secret_req_ver, NULL);
+
+static ssize_t uv_query_supp_add_secret_pcf(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lx\n", uv_info.supp_add_secret_pcf);
+}
+
+static struct kobj_attribute uv_query_supp_add_secret_pcf_attr =
+ __ATTR(supp_add_secret_pcf, 0444, uv_query_supp_add_secret_pcf, NULL);
+
+static ssize_t uv_query_supp_secret_types(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lx\n", uv_info.supp_secret_types);
+}
+
+static struct kobj_attribute uv_query_supp_secret_types_attr =
+ __ATTR(supp_secret_types, 0444, uv_query_supp_secret_types, NULL);
+
+static ssize_t uv_query_max_secrets(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", uv_info.max_secrets);
+}
+
+static struct kobj_attribute uv_query_max_secrets_attr =
+ __ATTR(max_secrets, 0444, uv_query_max_secrets, NULL);
+
static struct attribute *uv_query_attrs[] = {
&uv_query_facilities_attr.attr,
&uv_query_feature_indications_attr.attr,
@@ -590,6 +628,10 @@ static struct attribute *uv_query_attrs[] = {
&uv_query_dump_cpu_len_attr.attr,
&uv_query_supp_att_req_hdr_ver_attr.attr,
&uv_query_supp_att_pflags_attr.attr,
+ &uv_query_supp_add_secret_req_ver_attr.attr,
+ &uv_query_supp_add_secret_pcf_attr.attr,
+ &uv_query_supp_secret_types_attr.attr,
+ &uv_query_max_secrets_attr.attr,
NULL,
};
@@ -598,18 +640,18 @@ static struct attribute_group uv_query_attr_group = {
};
static ssize_t uv_is_prot_virt_guest(struct kobject *kobj,
- struct kobj_attribute *attr, char *page)
+ struct kobj_attribute *attr, char *buf)
{
int val = 0;
#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
val = prot_virt_guest;
#endif
- return scnprintf(page, PAGE_SIZE, "%d\n", val);
+ return sysfs_emit(buf, "%d\n", val);
}
static ssize_t uv_is_prot_virt_host(struct kobject *kobj,
- struct kobj_attribute *attr, char *page)
+ struct kobj_attribute *attr, char *buf)
{
int val = 0;
@@ -617,7 +659,7 @@ static ssize_t uv_is_prot_virt_host(struct kobject *kobj,
val = prot_virt_host;
#endif
- return scnprintf(page, PAGE_SIZE, "%d\n", val);
+ return sysfs_emit(buf, "%d\n", val);
}
static struct kobj_attribute uv_prot_virt_guest =
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 807fa9da1e721d..3c65b8258ae67a 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -166,6 +166,7 @@ static int diag9c_forwarding_overrun(void)
static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu *tcpu;
+ int tcpu_cpu;
int tid;
tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
@@ -181,14 +182,15 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
goto no_yield;
/* target guest VCPU already running */
- if (READ_ONCE(tcpu->cpu) >= 0) {
+ tcpu_cpu = READ_ONCE(tcpu->cpu);
+ if (tcpu_cpu >= 0) {
if (!diag9c_forwarding_hz || diag9c_forwarding_overrun())
goto no_yield;
/* target host CPU already running */
- if (!vcpu_is_preempted(tcpu->cpu))
+ if (!vcpu_is_preempted(tcpu_cpu))
goto no_yield;
- smp_yield_cpu(tcpu->cpu);
+ smp_yield_cpu(tcpu_cpu);
VCPU_EVENT(vcpu, 5,
"diag time slice end directed to %d: yield forwarded",
tid);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 17b81659cdb20c..67001969646484 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2156,6 +2156,10 @@ static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
ofs = 0;
}
+
+ if (cur_gfn < ms->base_gfn)
+ ofs = 0;
+
ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 8d6b765abf29bf..0333ee482eb894 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -177,7 +177,8 @@ static int setup_apcb00(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
sizeof(struct kvm_s390_apcb0)))
return -EFAULT;
- bitmap_and(apcb_s, apcb_s, apcb_h, sizeof(struct kvm_s390_apcb0));
+ bitmap_and(apcb_s, apcb_s, apcb_h,
+ BITS_PER_BYTE * sizeof(struct kvm_s390_apcb0));
return 0;
}
@@ -203,7 +204,8 @@ static int setup_apcb11(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
sizeof(struct kvm_s390_apcb1)))
return -EFAULT;
- bitmap_and(apcb_s, apcb_s, apcb_h, sizeof(struct kvm_s390_apcb1));
+ bitmap_and(apcb_s, apcb_s, apcb_h,
+ BITS_PER_BYTE * sizeof(struct kvm_s390_apcb1));
return 0;
}
diff --git a/drivers/s390/char/Kconfig b/drivers/s390/char/Kconfig
index 80c4e5101c9712..8a03af5ee5b300 100644
--- a/drivers/s390/char/Kconfig
+++ b/drivers/s390/char/Kconfig
@@ -96,7 +96,7 @@ config SCLP_OFB
config S390_UV_UAPI
def_tristate m
prompt "Ultravisor userspace API"
- depends on S390
+ depends on S390 && (KVM || PROTECTED_VIRTUALIZATION_GUEST)
help
Selecting exposes parts of the UV interface to userspace
by providing a misc character device at /dev/uv.
diff --git a/drivers/s390/char/uvdevice.c b/drivers/s390/char/uvdevice.c
index 1d40457c7b10e9..144cd2e035909b 100644
--- a/drivers/s390/char/uvdevice.c
+++ b/drivers/s390/char/uvdevice.c
@@ -32,6 +32,55 @@
#include <asm/uvdevice.h>
#include <asm/uv.h>
+#define BIT_UVIO_INTERNAL U32_MAX
+/* Mapping from IOCTL-nr to UVC-bit */
+static const u32 ioctl_nr_to_uvc_bit[] __initconst = {
+ [UVIO_IOCTL_UVDEV_INFO_NR] = BIT_UVIO_INTERNAL,
+ [UVIO_IOCTL_ATT_NR] = BIT_UVC_CMD_RETR_ATTEST,
+ [UVIO_IOCTL_ADD_SECRET_NR] = BIT_UVC_CMD_ADD_SECRET,
+ [UVIO_IOCTL_LIST_SECRETS_NR] = BIT_UVC_CMD_LIST_SECRETS,
+ [UVIO_IOCTL_LOCK_SECRETS_NR] = BIT_UVC_CMD_LOCK_SECRETS,
+};
+
+static_assert(ARRAY_SIZE(ioctl_nr_to_uvc_bit) == UVIO_IOCTL_NUM_IOCTLS);
+
+static struct uvio_uvdev_info uvdev_info = {
+ .supp_uvio_cmds = GENMASK_ULL(UVIO_IOCTL_NUM_IOCTLS - 1, 0),
+};
+
+static void __init set_supp_uv_cmds(unsigned long *supp_uv_cmds)
+{
+ int i;
+
+ for (i = 0; i < UVIO_IOCTL_NUM_IOCTLS; i++) {
+ if (ioctl_nr_to_uvc_bit[i] == BIT_UVIO_INTERNAL)
+ continue;
+ if (!test_bit_inv(ioctl_nr_to_uvc_bit[i], uv_info.inst_calls_list))
+ continue;
+ __set_bit(i, supp_uv_cmds);
+ }
+}
+
+/**
+ * uvio_uvdev_info() - get information about the uvdevice
+ *
+ * @uv_ioctl: ioctl control block
+ *
+ * Lists all IOCTLs that are supported by this uvdevice
+ */
+static int uvio_uvdev_info(struct uvio_ioctl_cb *uv_ioctl)
+{
+ void __user *user_buf_arg = (void __user *)uv_ioctl->argument_addr;
+
+ if (uv_ioctl->argument_len < sizeof(uvdev_info))
+ return -EINVAL;
+ if (copy_to_user(user_buf_arg, &uvdev_info, sizeof(uvdev_info)))
+ return -EFAULT;
+
+ uv_ioctl->uv_rc = UVC_RC_EXECUTED;
+ return 0;
+}
+
static int uvio_build_uvcb_attest(struct uv_cb_attest *uvcb_attest, u8 *arcb,
u8 *meas, u8 *add_data, struct uvio_attest *uvio_attest)
{
@@ -185,8 +234,161 @@ out:
return ret;
}
-static int uvio_copy_and_check_ioctl(struct uvio_ioctl_cb *ioctl, void __user *argp)
+/** uvio_add_secret() - perform an Add Secret UVC
+ *
+ * @uv_ioctl: ioctl control block
+ *
+ * uvio_add_secret() performs the Add Secret Ultravisor Call.
+ *
+ * The given userspace argument address and size are verified to be
+ * valid but every other check is made by the Ultravisor
+ * (UV). Therefore UV errors won't result in a negative return
+ * value. The request is then copied to kernelspace, the UV-call is
+ * performed and the results are copied back to userspace.
+ *
+ * The argument has to point to an Add Secret Request Control Block
+ * which is an encrypted and cryptographically verified request that
+ * inserts a protected guest's secrets into the Ultravisor for later
+ * use.
+ *
+ * If the Add Secret UV facility is not present, UV will return
+ * invalid command rc. This won't be fenced in the driver and does not
+ * result in a negative return value.
+ *
+ * Context: might sleep
+ *
+ * Return: 0 on success or a negative error code on error.
+ */
+static int uvio_add_secret(struct uvio_ioctl_cb *uv_ioctl)
{
+ void __user *user_buf_arg = (void __user *)uv_ioctl->argument_addr;
+ struct uv_cb_guest_addr uvcb = {
+ .header.len = sizeof(uvcb),
+ .header.cmd = UVC_CMD_ADD_SECRET,
+ };
+ void *asrcb = NULL;
+ int ret;
+
+ if (uv_ioctl->argument_len > UVIO_ADD_SECRET_MAX_LEN)
+ return -EINVAL;
+ if (uv_ioctl->argument_len == 0)
+ return -EINVAL;
+
+ asrcb = kvzalloc(uv_ioctl->argument_len, GFP_KERNEL);
+ if (!asrcb)
+ return -ENOMEM;
+
+ ret = -EFAULT;
+ if (copy_from_user(asrcb, user_buf_arg, uv_ioctl->argument_len))
+ goto out;
+
+ ret = 0;
+ uvcb.addr = (u64)asrcb;
+ uv_call_sched(0, (u64)&uvcb);
+ uv_ioctl->uv_rc = uvcb.header.rc;
+ uv_ioctl->uv_rrc = uvcb.header.rrc;
+
+out:
+ kvfree(asrcb);
+ return ret;
+}
+
+/** uvio_list_secrets() - perform a List Secret UVC
+ * @uv_ioctl: ioctl control block
+ *
+ * uvio_list_secrets() performs the List Secret Ultravisor Call. It verifies
+ * that the given userspace argument address is valid and its size is sane.
+ * Every other check is made by the Ultravisor (UV) and won't result in a
+ * negative return value. It builds the request, performs the UV-call, and
+ * copies the result to userspace.
+ *
+ * The argument specifies the location for the result of the UV-Call.
+ *
+ * If the List Secrets UV facility is not present, UV will return invalid
+ * command rc. This won't be fenced in the driver and does not result in a
+ * negative return value.
+ *
+ * Context: might sleep
+ *
+ * Return: 0 on success or a negative error code on error.
+ */
+static int uvio_list_secrets(struct uvio_ioctl_cb *uv_ioctl)
+{
+ void __user *user_buf_arg = (void __user *)uv_ioctl->argument_addr;
+ struct uv_cb_guest_addr uvcb = {
+ .header.len = sizeof(uvcb),
+ .header.cmd = UVC_CMD_LIST_SECRETS,
+ };
+ void *secrets = NULL;
+ int ret = 0;
+
+ if (uv_ioctl->argument_len != UVIO_LIST_SECRETS_LEN)
+ return -EINVAL;
+
+ secrets = kvzalloc(UVIO_LIST_SECRETS_LEN, GFP_KERNEL);
+ if (!secrets)
+ return -ENOMEM;
+
+ uvcb.addr = (u64)secrets;
+ uv_call_sched(0, (u64)&uvcb);
+ uv_ioctl->uv_rc = uvcb.header.rc;
+ uv_ioctl->uv_rrc = uvcb.header.rrc;
+
+ if (copy_to_user(user_buf_arg, secrets, UVIO_LIST_SECRETS_LEN))
+ ret = -EFAULT;
+
+ kvfree(secrets);
+ return ret;
+}
+
+/** uvio_lock_secrets() - perform a Lock Secret Store UVC
+ * @uv_ioctl: ioctl control block
+ *
+ * uvio_lock_secrets() performs the Lock Secret Store Ultravisor Call. It
+ * performs the UV-call and copies the return codes to the ioctl control block.
+ * After this call was dispatched successfully every following Add Secret UVC
+ * and Lock Secrets UVC will fail with return code 0x102.
+ *
+ * The argument address and size must be 0.
+ *
+ * If the Lock Secrets UV facility is not present, UV will return invalid
+ * command rc. This won't be fenced in the driver and does not result in a
+ * negative return value.
+ *
+ * Context: might sleep
+ *
+ * Return: 0 on success or a negative error code on error.
+ */
+static int uvio_lock_secrets(struct uvio_ioctl_cb *ioctl)
+{
+ struct uv_cb_nodata uvcb = {
+ .header.len = sizeof(uvcb),
+ .header.cmd = UVC_CMD_LOCK_SECRETS,
+ };
+
+ if (ioctl->argument_addr || ioctl->argument_len)
+ return -EINVAL;
+
+ uv_call(0, (u64)&uvcb);
+ ioctl->uv_rc = uvcb.header.rc;
+ ioctl->uv_rrc = uvcb.header.rrc;
+
+ return 0;
+}
+
+static int uvio_copy_and_check_ioctl(struct uvio_ioctl_cb *ioctl, void __user *argp,
+ unsigned long cmd)
+{
+ u8 nr = _IOC_NR(cmd);
+
+ if (_IOC_DIR(cmd) != (_IOC_READ | _IOC_WRITE))
+ return -ENOIOCTLCMD;
+ if (_IOC_TYPE(cmd) != UVIO_TYPE_UVC)
+ return -ENOIOCTLCMD;
+ if (nr >= UVIO_IOCTL_NUM_IOCTLS)
+ return -ENOIOCTLCMD;
+ if (_IOC_SIZE(cmd) != sizeof(*ioctl))
+ return -ENOIOCTLCMD;
if (copy_from_user(ioctl, argp, sizeof(*ioctl)))
return -EFAULT;
if (ioctl->flags != 0)
@@ -194,7 +396,7 @@ static int uvio_copy_and_check_ioctl(struct uvio_ioctl_cb *ioctl, void __user *a
if (memchr_inv(ioctl->reserved14, 0, sizeof(ioctl->reserved14)))
return -EINVAL;
- return 0;
+ return nr;
}
/*
@@ -205,14 +407,28 @@ static long uvio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
void __user *argp = (void __user *)arg;
struct uvio_ioctl_cb uv_ioctl = { };
long ret;
+ int nr;
- switch (cmd) {
- case UVIO_IOCTL_ATT:
- ret = uvio_copy_and_check_ioctl(&uv_ioctl, argp);
- if (ret)
- return ret;
+ nr = uvio_copy_and_check_ioctl(&uv_ioctl, argp, cmd);
+ if (nr < 0)
+ return nr;
+
+ switch (nr) {
+ case UVIO_IOCTL_UVDEV_INFO_NR:
+ ret = uvio_uvdev_info(&uv_ioctl);
+ break;
+ case UVIO_IOCTL_ATT_NR:
ret = uvio_attestation(&uv_ioctl);
break;
+ case UVIO_IOCTL_ADD_SECRET_NR:
+ ret = uvio_add_secret(&uv_ioctl);
+ break;
+ case UVIO_IOCTL_LIST_SECRETS_NR:
+ ret = uvio_list_secrets(&uv_ioctl);
+ break;
+ case UVIO_IOCTL_LOCK_SECRETS_NR:
+ ret = uvio_lock_secrets(&uv_ioctl);
+ break;
default:
ret = -ENOIOCTLCMD;
break;
@@ -245,6 +461,7 @@ static void __exit uvio_dev_exit(void)
static int __init uvio_dev_init(void)
{
+ set_supp_uv_cmds((unsigned long *)&uvdev_info.supp_uv_cmds);
return misc_register(&uvio_dev_miscdev);
}
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 4761b768b773c4..74e0a44f921649 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -164,6 +164,7 @@ TEST_GEN_PROGS_s390x = s390x/memop
TEST_GEN_PROGS_s390x += s390x/resets
TEST_GEN_PROGS_s390x += s390x/sync_regs_test
TEST_GEN_PROGS_s390x += s390x/tprot
+TEST_GEN_PROGS_s390x += s390x/cmma_test
TEST_GEN_PROGS_s390x += demand_paging_test
TEST_GEN_PROGS_s390x += dirty_log_test
TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
diff --git a/tools/testing/selftests/kvm/s390x/cmma_test.c b/tools/testing/selftests/kvm/s390x/cmma_test.c
new file mode 100644
index 00000000000000..1d73e78e8fa772
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390x/cmma_test.c
@@ -0,0 +1,700 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test for s390x CMMA migration
+ *
+ * Copyright IBM Corp. 2023
+ *
+ * Authors:
+ * Nico Boehr <nrb@linux.ibm.com>
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+
+#define MAIN_PAGE_COUNT 512
+
+#define TEST_DATA_PAGE_COUNT 512
+#define TEST_DATA_MEMSLOT 1
+#define TEST_DATA_START_GFN 4096
+
+#define TEST_DATA_TWO_PAGE_COUNT 256
+#define TEST_DATA_TWO_MEMSLOT 2
+#define TEST_DATA_TWO_START_GFN 8192
+
+static char cmma_value_buf[MAIN_PAGE_COUNT + TEST_DATA_PAGE_COUNT];
+
+/**
+ * Dirty CMMA attributes of exactly one page in the TEST_DATA memslot,
+ * so use_cmma goes on and the CMMA related ioctls do something.
+ */
+static void guest_do_one_essa(void)
+{
+ asm volatile(
+ /* load TEST_DATA_START_GFN into r1 */
+ " llilf 1,%[start_gfn]\n"
+ /* calculate the address from the gfn */
+ " sllg 1,1,12(0)\n"
+ /* set the first page in TEST_DATA memslot to STABLE */
+ " .insn rrf,0xb9ab0000,2,1,1,0\n"
+ /* hypercall */
+ " diag 0,0,0x501\n"
+ "0: j 0b"
+ :
+ : [start_gfn] "L"(TEST_DATA_START_GFN)
+ : "r1", "r2", "memory", "cc"
+ );
+}
+
+/**
+ * Touch CMMA attributes of all pages in TEST_DATA memslot. Set them to stable
+ * state.
+ */
+static void guest_dirty_test_data(void)
+{
+ asm volatile(
+ /* r1 = TEST_DATA_START_GFN */
+ " xgr 1,1\n"
+ " llilf 1,%[start_gfn]\n"
+ /* r5 = TEST_DATA_PAGE_COUNT */
+ " lghi 5,%[page_count]\n"
+ /* r5 += r1 */
+ "2: agfr 5,1\n"
+ /* r2 = r1 << 12 */
+ "1: sllg 2,1,12(0)\n"
+ /* essa(r4, r2, SET_STABLE) */
+ " .insn rrf,0xb9ab0000,4,2,1,0\n"
+ /* i++ */
+ " agfi 1,1\n"
+ /* if r1 < r5 goto 1 */
+ " cgrjl 1,5,1b\n"
+ /* hypercall */
+ " diag 0,0,0x501\n"
+ "0: j 0b"
+ :
+ : [start_gfn] "L"(TEST_DATA_START_GFN),
+ [page_count] "L"(TEST_DATA_PAGE_COUNT)
+ :
+ /* the counter in our loop over the pages */
+ "r1",
+ /* the calculated page physical address */
+ "r2",
+ /* ESSA output register */
+ "r4",
+ /* last page */
+ "r5",
+ "cc", "memory"
+ );
+}
+
+static struct kvm_vm *create_vm(void)
+{
+ return ____vm_create(VM_MODE_DEFAULT);
+}
+
+static void create_main_memslot(struct kvm_vm *vm)
+{
+ int i;
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, MAIN_PAGE_COUNT, 0);
+ /* set the array of memslots to zero like __vm_create does */
+ for (i = 0; i < NR_MEM_REGIONS; i++)
+ vm->memslots[i] = 0;
+}
+
+static void create_test_memslot(struct kvm_vm *vm)
+{
+ vm_userspace_mem_region_add(vm,
+ VM_MEM_SRC_ANONYMOUS,
+ TEST_DATA_START_GFN << vm->page_shift,
+ TEST_DATA_MEMSLOT,
+ TEST_DATA_PAGE_COUNT,
+ 0
+ );
+ vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT;
+}
+
+static void create_memslots(struct kvm_vm *vm)
+{
+ /*
+ * Our VM has the following memory layout:
+ * +------+---------------------------+
+ * | GFN | Memslot |
+ * +------+---------------------------+
+ * | 0 | |
+ * | ... | MAIN (Code, Stack, ...) |
+ * | 511 | |
+ * +------+---------------------------+
+ * | 4096 | |
+ * | ... | TEST_DATA |
+ * | 4607 | |
+ * +------+---------------------------+
+ */
+ create_main_memslot(vm);
+ create_test_memslot(vm);
+}
+
+static void finish_vm_setup(struct kvm_vm *vm)
+{
+ struct userspace_mem_region *slot0;
+
+ kvm_vm_elf_load(vm, program_invocation_name);
+
+ slot0 = memslot2region(vm, 0);
+ ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size);
+
+ kvm_arch_vm_post_create(vm);
+}
+
+static struct kvm_vm *create_vm_two_memslots(void)
+{
+ struct kvm_vm *vm;
+
+ vm = create_vm();
+
+ create_memslots(vm);
+
+ finish_vm_setup(vm);
+
+ return vm;
+}
+
+static void enable_cmma(struct kvm_vm *vm)
+{
+ int r;
+
+ r = __kvm_device_attr_set(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA, NULL);
+ TEST_ASSERT(!r, "enabling cmma failed r=%d errno=%d", r, errno);
+}
+
+static void enable_dirty_tracking(struct kvm_vm *vm)
+{
+ vm_mem_region_set_flags(vm, 0, KVM_MEM_LOG_DIRTY_PAGES);
+ vm_mem_region_set_flags(vm, TEST_DATA_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES);
+}
+
+static int __enable_migration_mode(struct kvm_vm *vm)
+{
+ return __kvm_device_attr_set(vm->fd,
+ KVM_S390_VM_MIGRATION,
+ KVM_S390_VM_MIGRATION_START,
+ NULL
+ );
+}
+
+static void enable_migration_mode(struct kvm_vm *vm)
+{
+ int r = __enable_migration_mode(vm);
+
+ TEST_ASSERT(!r, "enabling migration mode failed r=%d errno=%d", r, errno);
+}
+
+static bool is_migration_mode_on(struct kvm_vm *vm)
+{
+ u64 out;
+ int r;
+
+ r = __kvm_device_attr_get(vm->fd,
+ KVM_S390_VM_MIGRATION,
+ KVM_S390_VM_MIGRATION_STATUS,
+ &out
+ );
+ TEST_ASSERT(!r, "getting migration mode status failed r=%d errno=%d", r, errno);
+ return out;
+}
+
+static int vm_get_cmma_bits(struct kvm_vm *vm, u64 flags, int *errno_out)
+{
+ struct kvm_s390_cmma_log args;
+ int rc;
+
+ errno = 0;
+
+ args = (struct kvm_s390_cmma_log){
+ .start_gfn = 0,
+ .count = sizeof(cmma_value_buf),
+ .flags = flags,
+ .values = (__u64)&cmma_value_buf[0]
+ };
+ rc = __vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+
+ *errno_out = errno;
+ return rc;
+}
+
+static void test_get_cmma_basic(void)
+{
+ struct kvm_vm *vm = create_vm_two_memslots();
+ struct kvm_vcpu *vcpu;
+ int rc, errno_out;
+
+ /* GET_CMMA_BITS without CMMA enabled should fail */
+ rc = vm_get_cmma_bits(vm, 0, &errno_out);
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno_out, ENXIO);
+
+ enable_cmma(vm);
+ vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
+
+ vcpu_run(vcpu);
+
+ /* GET_CMMA_BITS without migration mode and without peeking should fail */
+ rc = vm_get_cmma_bits(vm, 0, &errno_out);
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno_out, EINVAL);
+
+ /* GET_CMMA_BITS without migration mode and with peeking should work */
+ rc = vm_get_cmma_bits(vm, KVM_S390_CMMA_PEEK, &errno_out);
+ ASSERT_EQ(rc, 0);
+ ASSERT_EQ(errno_out, 0);
+
+ enable_dirty_tracking(vm);
+ enable_migration_mode(vm);
+
+ /* GET_CMMA_BITS with invalid flags */
+ rc = vm_get_cmma_bits(vm, 0xfeedc0fe, &errno_out);
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno_out, EINVAL);
+
+ kvm_vm_free(vm);
+}
+
+static void assert_exit_was_hypercall(struct kvm_vcpu *vcpu)
+{
+ ASSERT_EQ(vcpu->run->exit_reason, 13);
+ ASSERT_EQ(vcpu->run->s390_sieic.icptcode, 4);
+ ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x8300);
+ ASSERT_EQ(vcpu->run->s390_sieic.ipb, 0x5010000);
+}
+
+static void test_migration_mode(void)
+{
+ struct kvm_vm *vm = create_vm();
+ struct kvm_vcpu *vcpu;
+ u64 orig_psw;
+ int rc;
+
+ /* enabling migration mode on a VM without memory should fail */
+ rc = __enable_migration_mode(vm);
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno, EINVAL);
+ TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off");
+ errno = 0;
+
+ create_memslots(vm);
+ finish_vm_setup(vm);
+
+ enable_cmma(vm);
+ vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
+ orig_psw = vcpu->run->psw_addr;
+
+ /*
+ * Execute one essa instruction in the guest. Otherwise the guest will
+ * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
+ */
+ vcpu_run(vcpu);
+ assert_exit_was_hypercall(vcpu);
+
+ /* migration mode when memslots have dirty tracking off should fail */
+ rc = __enable_migration_mode(vm);
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno, EINVAL);
+ TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off");
+ errno = 0;
+
+ /* enable dirty tracking */
+ enable_dirty_tracking(vm);
+
+ /* enabling migration mode should work now */
+ rc = __enable_migration_mode(vm);
+ ASSERT_EQ(rc, 0);
+ TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+ errno = 0;
+
+ /* execute another ESSA instruction to see this goes fine */
+ vcpu->run->psw_addr = orig_psw;
+ vcpu_run(vcpu);
+ assert_exit_was_hypercall(vcpu);
+
+ /*
+ * With migration mode on, create a new memslot with dirty tracking off.
+ * This should turn off migration mode.
+ */
+ TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+ vm_userspace_mem_region_add(vm,
+ VM_MEM_SRC_ANONYMOUS,
+ TEST_DATA_TWO_START_GFN << vm->page_shift,
+ TEST_DATA_TWO_MEMSLOT,
+ TEST_DATA_TWO_PAGE_COUNT,
+ 0
+ );
+ TEST_ASSERT(!is_migration_mode_on(vm),
+ "creating memslot without dirty tracking turns off migration mode"
+ );
+
+ /* ESSA instructions should still execute fine */
+ vcpu->run->psw_addr = orig_psw;
+ vcpu_run(vcpu);
+ assert_exit_was_hypercall(vcpu);
+
+ /*
+ * Turn on dirty tracking on the new memslot.
+ * It should be possible to turn migration mode back on again.
+ */
+ vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES);
+ rc = __enable_migration_mode(vm);
+ ASSERT_EQ(rc, 0);
+ TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+ errno = 0;
+
+ /*
+ * Turn off dirty tracking again, this time with just a flag change.
+ * Again, migration mode should turn off.
+ */
+ TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+ vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, 0);
+ TEST_ASSERT(!is_migration_mode_on(vm),
+ "disabling dirty tracking should turn off migration mode"
+ );
+
+ /* ESSA instructions should still execute fine */
+ vcpu->run->psw_addr = orig_psw;
+ vcpu_run(vcpu);
+ assert_exit_was_hypercall(vcpu);
+
+ kvm_vm_free(vm);
+}
+
+/**
+ * Given a VM with the MAIN and TEST_DATA memslot, assert that both slots have
+ * CMMA attributes of all pages in both memslots and nothing more dirty.
+ * This has the useful side effect of ensuring nothing is CMMA dirty after this
+ * function.
+ */
+static void assert_all_slots_cmma_dirty(struct kvm_vm *vm)
+{
+ struct kvm_s390_cmma_log args;
+
+ /*
+ * First iteration - everything should be dirty.
+ * Start at the main memslot...
+ */
+ args = (struct kvm_s390_cmma_log){
+ .start_gfn = 0,
+ .count = sizeof(cmma_value_buf),
+ .flags = 0,
+ .values = (__u64)&cmma_value_buf[0]
+ };
+ memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+ vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+ ASSERT_EQ(args.count, MAIN_PAGE_COUNT);
+ ASSERT_EQ(args.remaining, TEST_DATA_PAGE_COUNT);
+ ASSERT_EQ(args.start_gfn, 0);
+
+ /* ...and then - after a hole - the TEST_DATA memslot should follow */
+ args = (struct kvm_s390_cmma_log){
+ .start_gfn = MAIN_PAGE_COUNT,
+ .count = sizeof(cmma_value_buf),
+ .flags = 0,
+ .values = (__u64)&cmma_value_buf[0]
+ };
+ memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+ vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+ ASSERT_EQ(args.count, TEST_DATA_PAGE_COUNT);
+ ASSERT_EQ(args.start_gfn, TEST_DATA_START_GFN);
+ ASSERT_EQ(args.remaining, 0);
+
+ /* ...and nothing else should be there */
+ args = (struct kvm_s390_cmma_log){
+ .start_gfn = TEST_DATA_START_GFN + TEST_DATA_PAGE_COUNT,
+ .count = sizeof(cmma_value_buf),
+ .flags = 0,
+ .values = (__u64)&cmma_value_buf[0]
+ };
+ memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+ vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+ ASSERT_EQ(args.count, 0);
+ ASSERT_EQ(args.start_gfn, 0);
+ ASSERT_EQ(args.remaining, 0);
+}
+
+/**
+ * Given a VM, assert no pages are CMMA dirty.
+ */
+static void assert_no_pages_cmma_dirty(struct kvm_vm *vm)
+{
+ struct kvm_s390_cmma_log args;
+
+ /* If we start from GFN 0 again, nothing should be dirty. */
+ args = (struct kvm_s390_cmma_log){
+ .start_gfn = 0,
+ .count = sizeof(cmma_value_buf),
+ .flags = 0,
+ .values = (__u64)&cmma_value_buf[0]
+ };
+ memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+ vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+ if (args.count || args.remaining || args.start_gfn)
+ TEST_FAIL("pages are still dirty start_gfn=0x%llx count=%u remaining=%llu",
+ args.start_gfn,
+ args.count,
+ args.remaining
+ );
+}
+
+static void test_get_inital_dirty(void)
+{
+ struct kvm_vm *vm = create_vm_two_memslots();
+ struct kvm_vcpu *vcpu;
+
+ enable_cmma(vm);
+ vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
+
+ /*
+ * Execute one essa instruction in the guest. Otherwise the guest will
+ * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
+ */
+ vcpu_run(vcpu);
+ assert_exit_was_hypercall(vcpu);
+
+ enable_dirty_tracking(vm);
+ enable_migration_mode(vm);
+
+ assert_all_slots_cmma_dirty(vm);
+
+ /* Start from the beginning again and make sure nothing else is dirty */
+ assert_no_pages_cmma_dirty(vm);
+
+ kvm_vm_free(vm);
+}
+
+static void query_cmma_range(struct kvm_vm *vm,
+ u64 start_gfn, u64 gfn_count,
+ struct kvm_s390_cmma_log *res_out)
+{
+ *res_out = (struct kvm_s390_cmma_log){
+ .start_gfn = start_gfn,
+ .count = gfn_count,
+ .flags = 0,
+ .values = (__u64)&cmma_value_buf[0]
+ };
+ memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+ vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, res_out);
+}
+
+/**
+ * Assert the given cmma_log struct that was executed by query_cmma_range()
+ * indicates the first dirty gfn is at first_dirty_gfn and contains exactly
+ * dirty_gfn_count CMMA values.
+ */
+static void assert_cmma_dirty(u64 first_dirty_gfn,
+ u64 dirty_gfn_count,
+ const struct kvm_s390_cmma_log *res)
+{
+ ASSERT_EQ(res->start_gfn, first_dirty_gfn);
+ ASSERT_EQ(res->count, dirty_gfn_count);
+ for (size_t i = 0; i < dirty_gfn_count; i++)
+ ASSERT_EQ(cmma_value_buf[0], 0x0); /* stable state */
+ ASSERT_EQ(cmma_value_buf[dirty_gfn_count], 0xff); /* not touched */
+}
+
+static void test_get_skip_holes(void)
+{
+ size_t gfn_offset;
+ struct kvm_vm *vm = create_vm_two_memslots();
+ struct kvm_s390_cmma_log log;
+ struct kvm_vcpu *vcpu;
+ u64 orig_psw;
+
+ enable_cmma(vm);
+ vcpu = vm_vcpu_add(vm, 1, guest_dirty_test_data);
+
+ orig_psw = vcpu->run->psw_addr;
+
+ /*
+ * Execute some essa instructions in the guest. Otherwise the guest will
+ * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
+ */
+ vcpu_run(vcpu);
+ assert_exit_was_hypercall(vcpu);
+
+ enable_dirty_tracking(vm);
+ enable_migration_mode(vm);
+
+ /* un-dirty all pages */
+ assert_all_slots_cmma_dirty(vm);
+
+ /* Then, dirty just the TEST_DATA memslot */
+ vcpu->run->psw_addr = orig_psw;
+ vcpu_run(vcpu);
+
+ gfn_offset = TEST_DATA_START_GFN;
+ /**
+ * Query CMMA attributes of one page, starting at page 0. Since the
+ * main memslot was not touched by the VM, this should yield the first
+ * page of the TEST_DATA memslot.
+ * The dirty bitmap should now look like this:
+ * 0: not dirty
+ * [0x1, 0x200): dirty
+ */
+ query_cmma_range(vm, 0, 1, &log);
+ assert_cmma_dirty(gfn_offset, 1, &log);
+ gfn_offset++;
+
+ /**
+ * Query CMMA attributes of 32 (0x20) pages past the end of the TEST_DATA
+ * memslot. This should wrap back to the beginning of the TEST_DATA
+ * memslot, page 1.
+ * The dirty bitmap should now look like this:
+ * [0, 0x21): not dirty
+ * [0x21, 0x200): dirty
+ */
+ query_cmma_range(vm, TEST_DATA_START_GFN + TEST_DATA_PAGE_COUNT, 0x20, &log);
+ assert_cmma_dirty(gfn_offset, 0x20, &log);
+ gfn_offset += 0x20;
+
+ /* Skip 32 pages */
+ gfn_offset += 0x20;
+
+ /**
+ * After skipping 32 pages, query the next 32 (0x20) pages.
+ * The dirty bitmap should now look like this:
+ * [0, 0x21): not dirty
+ * [0x21, 0x41): dirty
+ * [0x41, 0x61): not dirty
+ * [0x61, 0x200): dirty
+ */
+ query_cmma_range(vm, gfn_offset, 0x20, &log);
+ assert_cmma_dirty(gfn_offset, 0x20, &log);
+ gfn_offset += 0x20;
+
+ /**
+ * Query 1 page from the beginning of the TEST_DATA memslot. This should
+ * yield page 0x21.
+ * The dirty bitmap should now look like this:
+ * [0, 0x22): not dirty
+ * [0x22, 0x41): dirty
+ * [0x41, 0x61): not dirty
+ * [0x61, 0x200): dirty
+ */
+ query_cmma_range(vm, TEST_DATA_START_GFN, 1, &log);
+ assert_cmma_dirty(TEST_DATA_START_GFN + 0x21, 1, &log);
+ gfn_offset++;
+
+ /**
+ * Query 15 (0xF) pages from page 0x23 in TEST_DATA memslot.
+ * This should yield pages [0x23, 0x33).
+ * The dirty bitmap should now look like this:
+ * [0, 0x22): not dirty
+ * 0x22: dirty
+ * [0x23, 0x33): not dirty
+ * [0x33, 0x41): dirty
+ * [0x41, 0x61): not dirty
+ * [0x61, 0x200): dirty
+ */
+ gfn_offset = TEST_DATA_START_GFN + 0x23;
+ query_cmma_range(vm, gfn_offset, 15, &log);
+ assert_cmma_dirty(gfn_offset, 15, &log);
+
+ /**
+ * Query 17 (0x11) pages from page 0x22 in TEST_DATA memslot.
+ * This should yield page [0x22, 0x33)
+ * The dirty bitmap should now look like this:
+ * [0, 0x33): not dirty
+ * [0x33, 0x41): dirty
+ * [0x41, 0x61): not dirty
+ * [0x61, 0x200): dirty
+ */
+ gfn_offset = TEST_DATA_START_GFN + 0x22;
+ query_cmma_range(vm, gfn_offset, 17, &log);
+ assert_cmma_dirty(gfn_offset, 17, &log);
+
+ /**
+ * Query 25 (0x19) pages from page 0x40 in TEST_DATA memslot.
+ * This should yield page 0x40 and nothing more, since there are more
+ * than 16 non-dirty pages after page 0x40.
+ * The dirty bitmap should now look like this:
+ * [0, 0x33): not dirty
+ * [0x33, 0x40): dirty
+ * [0x40, 0x61): not dirty
+ * [0x61, 0x200): dirty
+ */
+ gfn_offset = TEST_DATA_START_GFN + 0x40;
+ query_cmma_range(vm, gfn_offset, 25, &log);
+ assert_cmma_dirty(gfn_offset, 1, &log);
+
+ /**
+ * Query pages [0x33, 0x40).
+ * The dirty bitmap should now look like this:
+ * [0, 0x61): not dirty
+ * [0x61, 0x200): dirty
+ */
+ gfn_offset = TEST_DATA_START_GFN + 0x33;
+ query_cmma_range(vm, gfn_offset, 0x40 - 0x33, &log);
+ assert_cmma_dirty(gfn_offset, 0x40 - 0x33, &log);
+
+ /**
+ * Query the remaining pages [0x61, 0x200).
+ */
+ gfn_offset = TEST_DATA_START_GFN;
+ query_cmma_range(vm, gfn_offset, TEST_DATA_PAGE_COUNT - 0x61, &log);
+ assert_cmma_dirty(TEST_DATA_START_GFN + 0x61, TEST_DATA_PAGE_COUNT - 0x61, &log);
+
+ assert_no_pages_cmma_dirty(vm);
+}
+
+struct testdef {
+ const char *name;
+ void (*test)(void);
+} testlist[] = {
+ { "migration mode and dirty tracking", test_migration_mode },
+ { "GET_CMMA_BITS: basic calls", test_get_cmma_basic },
+ { "GET_CMMA_BITS: all pages are dirty initally", test_get_inital_dirty },
+ { "GET_CMMA_BITS: holes are skipped", test_get_skip_holes },
+};
+
+/**
+ * The kernel may support CMMA, but the machine may not (i.e. if running as
+ * guest-3).
+ *
+ * In this case, the CMMA capabilities are all there, but the CMMA-related
+ * ioctls fail. To find out whether the machine supports CMMA, create a
+ * temporary VM and then query the CMMA feature of the VM.
+ */
+static int machine_has_cmma(void)
+{
+ struct kvm_vm *vm = create_vm();
+ int r;
+
+ r = !__kvm_has_device_attr(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA);
+ kvm_vm_free(vm);
+
+ return r;
+}
+
+int main(int argc, char *argv[])
+{
+ int idx;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_SYNC_REGS));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_CMMA_MIGRATION));
+ TEST_REQUIRE(machine_has_cmma());
+
+ ksft_print_header();
+
+ ksft_set_plan(ARRAY_SIZE(testlist));
+
+ for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+ testlist[idx].test();
+ ksft_test_result_pass("%s\n", testlist[idx].name);
+ }
+
+ ksft_finished(); /* Print results and exit() accordingly */
+}