aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJean-Philippe Brucker <jean-philippe@linaro.org>2023-06-28 12:23:32 +0100
committerWill Deacon <will@kernel.org>2023-07-06 15:31:22 +0100
commit0b5e55fc032d1c6394b8ec7fe02d842813c903df (patch)
tree6718269edc401e625318fab4b93d898814a068d7
parent3a36d3410e993094575e7bb5c387a297856cfb1c (diff)
downloadkvmtool-0b5e55fc032d1c6394b8ec7fe02d842813c903df.tar.gz
vfio/pci: Clarify the MSI states
The MSI and MSI-X implementations is a bit complex, because it keeps track of capability and vector states as seen by both the guest and the host. Add a few comments about those states and rename them to something more accurate. What's called phys_state at the moment represents the software state maintained by VFIO and kvmtool, rather than the physical MSI capability, so host_state is more correct. To be consistent, rename virt_state to guest_state as well. Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org> Link: https://lore.kernel.org/r/20230628112331.453904-4-jean-philippe@linaro.org Signed-off-by: Will Deacon <will@kernel.org>
-rw-r--r--include/kvm/vfio.h8
-rw-r--r--vfio/pci.c86
2 files changed, 57 insertions, 37 deletions
diff --git a/include/kvm/vfio.h b/include/kvm/vfio.h
index 764ab9b9..ac7b6226 100644
--- a/include/kvm/vfio.h
+++ b/include/kvm/vfio.h
@@ -36,8 +36,8 @@ struct vfio_pci_msi_entry {
struct msix_table config;
int gsi;
int eventfd;
- u8 phys_state;
- u8 virt_state;
+ u8 guest_state;
+ u8 host_state;
};
struct vfio_pci_msix_table {
@@ -57,8 +57,8 @@ struct vfio_pci_msix_pba {
/* Common data for MSI and MSI-X */
struct vfio_pci_msi_common {
off_t pos;
- u8 virt_state;
- u8 phys_state;
+ u8 guest_state;
+ u8 host_state;
struct mutex mutex;
struct vfio_irq_info info;
struct vfio_irq_set *irq_set;
diff --git a/vfio/pci.c b/vfio/pci.c
index a10b5528..0bcd60e6 100644
--- a/vfio/pci.c
+++ b/vfio/pci.c
@@ -28,8 +28,31 @@ static void set_vfio_irq_eventd_payload(union vfio_irq_eventfd *evfd, int fd)
memcpy(&evfd->irq.data, &fd, sizeof(fd));
}
+/*
+ * To support MSI and MSI-X with common code, track the host and guest states of
+ * the MSI/MSI-X capability, and of individual vectors.
+ *
+ * Both MSI and MSI-X capabilities are enabled and disabled through registers.
+ * Vectors cannot be individually disabled.
+ */
#define msi_is_enabled(state) ((state) & VFIO_PCI_MSI_STATE_ENABLED)
+
+/*
+ * MSI-X: the control register allows to mask all vectors, and the table allows
+ * to mask each vector individually.
+ *
+ * MSI: if the capability supports Per-Vector Masking then the Mask Bit register
+ * allows to mask each vector individually. Otherwise there is no masking for
+ * MSI.
+ */
#define msi_is_masked(state) ((state) & VFIO_PCI_MSI_STATE_MASKED)
+
+/*
+ * A capability is empty when no vector has been registered with SET_IRQS
+ * yet. It's an optimization specific to kvmtool to avoid issuing lots of
+ * SET_IRQS ioctls when the guest configures the MSI-X table while the
+ * capability is masked.
+ */
#define msi_is_empty(state) ((state) & VFIO_PCI_MSI_STATE_EMPTY)
#define msi_update_state(state, val, bit) \
@@ -62,7 +85,7 @@ static int vfio_pci_enable_msis(struct kvm *kvm, struct vfio_device *vdev,
},
};
- if (!msi_is_enabled(msis->virt_state))
+ if (!msi_is_enabled(msis->guest_state))
return 0;
if (pdev->irq_modes & VFIO_PCI_IRQ_MODE_INTX)
@@ -78,9 +101,9 @@ static int vfio_pci_enable_msis(struct kvm *kvm, struct vfio_device *vdev,
/*
* Initial registration of the full range. This enables the physical
- * MSI/MSI-X capability, which might have desired side effects. For
- * instance when assigning virtio legacy devices, enabling the MSI
- * capability modifies the config space layout!
+ * MSI/MSI-X capability, which might have side effects. For instance
+ * when assigning virtio legacy devices, enabling the MSI capability
+ * modifies the config space layout!
*
* As an optimization, only update MSIs when guest unmasks the
* capability. This greatly reduces the initialization time for Linux
@@ -88,13 +111,10 @@ static int vfio_pci_enable_msis(struct kvm *kvm, struct vfio_device *vdev,
* masked, then fills individual vectors, then unmasks the whole
* function. So we only do one VFIO ioctl when enabling for the first
* time, and then one when unmasking.
- *
- * phys_state is empty when it is enabled but no vector has been
- * registered via SET_IRQS yet.
*/
- if (!msi_is_enabled(msis->phys_state) ||
- (!msi_is_masked(msis->virt_state) &&
- msi_is_empty(msis->phys_state))) {
+ if (!msi_is_enabled(msis->host_state) ||
+ (!msi_is_masked(msis->guest_state) &&
+ msi_is_empty(msis->host_state))) {
bool empty = true;
for (i = 0; i < msis->nr_entries; i++) {
@@ -111,14 +131,14 @@ static int vfio_pci_enable_msis(struct kvm *kvm, struct vfio_device *vdev,
return ret;
}
- msi_set_enabled(msis->phys_state, true);
- msi_set_empty(msis->phys_state, empty);
+ msi_set_enabled(msis->host_state, true);
+ msi_set_empty(msis->host_state, empty);
return 0;
}
- if (msi_is_masked(msis->virt_state)) {
- /* TODO: if phys_state is not empty nor masked, mask all vectors */
+ if (msi_is_masked(msis->guest_state)) {
+ /* TODO: if host_state is not empty nor masked, mask all vectors */
return 0;
}
@@ -141,8 +161,8 @@ static int vfio_pci_enable_msis(struct kvm *kvm, struct vfio_device *vdev,
eventfds[i] = fd;
- if (msi_is_empty(msis->phys_state) && fd >= 0)
- msi_set_empty(msis->phys_state, false);
+ if (msi_is_empty(msis->host_state) && fd >= 0)
+ msi_set_empty(msis->host_state, false);
}
return ret;
@@ -162,7 +182,7 @@ static int vfio_pci_disable_msis(struct kvm *kvm, struct vfio_device *vdev,
.count = 0,
};
- if (!msi_is_enabled(msis->phys_state))
+ if (!msi_is_enabled(msis->host_state))
return 0;
ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);
@@ -171,8 +191,8 @@ static int vfio_pci_disable_msis(struct kvm *kvm, struct vfio_device *vdev,
return ret;
}
- msi_set_enabled(msis->phys_state, false);
- msi_set_empty(msis->phys_state, true);
+ msi_set_enabled(msis->host_state, false);
+ msi_set_empty(msis->host_state, true);
/*
* When MSI or MSIX is disabled, this might be called when
@@ -223,12 +243,12 @@ static int vfio_pci_update_msi_entry(struct kvm *kvm, struct vfio_device *vdev,
* the eventfd in a local handler, in order to serve Pending Bit reads
* to the guest.
*
- * So entry->phys_state is masked when there is no active irqfd route.
+ * So entry->host_state is masked when there is no active irqfd route.
*/
- if (msi_is_masked(entry->virt_state) == msi_is_masked(entry->phys_state))
+ if (msi_is_masked(entry->guest_state) == msi_is_masked(entry->host_state))
return 0;
- if (msi_is_masked(entry->phys_state)) {
+ if (msi_is_masked(entry->host_state)) {
ret = irq__add_irqfd(kvm, entry->gsi, entry->eventfd, -1);
if (ret < 0) {
vfio_dev_err(vdev, "cannot setup irqfd");
@@ -238,7 +258,7 @@ static int vfio_pci_update_msi_entry(struct kvm *kvm, struct vfio_device *vdev,
irq__del_irqfd(kvm, entry->gsi, entry->eventfd);
}
- msi_set_masked(entry->phys_state, msi_is_masked(entry->virt_state));
+ msi_set_masked(entry->host_state, msi_is_masked(entry->guest_state));
return 0;
}
@@ -311,7 +331,7 @@ static void vfio_pci_msix_table_access(struct kvm_cpu *vcpu, u64 addr, u8 *data,
if (field + len <= PCI_MSIX_ENTRY_VECTOR_CTRL)
goto out_unlock;
- msi_set_masked(entry->virt_state, entry->config.ctrl &
+ msi_set_masked(entry->guest_state, entry->config.ctrl &
PCI_MSIX_ENTRY_CTRL_MASKBIT);
if (vfio_pci_update_msi_entry(kvm, vdev, entry) < 0)
@@ -346,9 +366,9 @@ static void vfio_pci_msix_cap_write(struct kvm *kvm,
mutex_lock(&pdev->msix.mutex);
- msi_set_masked(pdev->msix.virt_state, flags & PCI_MSIX_FLAGS_MASKALL);
+ msi_set_masked(pdev->msix.guest_state, flags & PCI_MSIX_FLAGS_MASKALL);
enable = flags & PCI_MSIX_FLAGS_ENABLE;
- msi_set_enabled(pdev->msix.virt_state, enable);
+ msi_set_enabled(pdev->msix.guest_state, enable);
if (enable && vfio_pci_enable_msis(kvm, vdev, true))
vfio_dev_err(vdev, "cannot enable MSIX");
@@ -382,7 +402,7 @@ static int vfio_pci_msi_vector_write(struct kvm *kvm, struct vfio_device *vdev,
/* Set mask to current state */
for (i = 0; i < pdev->msi.nr_entries; i++) {
entry = &pdev->msi.entries[i];
- mask |= !!msi_is_masked(entry->virt_state) << i;
+ mask |= !!msi_is_masked(entry->guest_state) << i;
}
/* Update mask following the intersection of access and register */
@@ -397,8 +417,8 @@ static int vfio_pci_msi_vector_write(struct kvm *kvm, struct vfio_device *vdev,
bool masked = mask & (1 << i);
entry = &pdev->msi.entries[i];
- if (masked != msi_is_masked(entry->virt_state)) {
- msi_set_masked(entry->virt_state, masked);
+ if (masked != msi_is_masked(entry->guest_state)) {
+ msi_set_masked(entry->guest_state, masked);
vfio_pci_update_msi_entry(kvm, vdev, entry);
}
}
@@ -430,9 +450,9 @@ static void vfio_pci_msi_cap_write(struct kvm *kvm, struct vfio_device *vdev,
ctrl = *(u8 *)(data + PCI_MSI_FLAGS - off);
- msi_set_enabled(pdev->msi.virt_state, ctrl & PCI_MSI_FLAGS_ENABLE);
+ msi_set_enabled(pdev->msi.guest_state, ctrl & PCI_MSI_FLAGS_ENABLE);
- if (!msi_is_enabled(pdev->msi.virt_state)) {
+ if (!msi_is_enabled(pdev->msi.guest_state)) {
vfio_pci_disable_msis(kvm, vdev, false);
goto out_unlock;
}
@@ -1182,8 +1202,8 @@ static int vfio_pci_init_msis(struct kvm *kvm, struct vfio_device *vdev,
entry = &msis->entries[i];
entry->gsi = -1;
entry->eventfd = -1;
- msi_set_masked(entry->virt_state, false);
- msi_set_masked(entry->phys_state, true);
+ msi_set_masked(entry->guest_state, false);
+ msi_set_masked(entry->host_state, true);
eventfds[i] = -1;
}