aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-06-30 15:22:09 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-06-30 15:22:09 -0700
commitb25f62ccb490680a8cee755ac4528909395e0711 (patch)
tree149d2d35f6efbca401e7a7b5a3b161a2c00bd0ec
parent9070577ae9d6065e447d422bdf85a09f89eaa9e8 (diff)
parentff598081e5b9d0bdd6874bfe340811bbb75b35e4 (diff)
downloadlivepatching-b25f62ccb490680a8cee755ac4528909395e0711.tar.gz
Merge tag 'vfio-v6.5-rc1' of https://github.com/awilliam/linux-vfio
Pull VFIO updates from Alex Williamson: - Adjust log levels for common messages (Oleksandr Natalenko, Alex Williamson) - Support for dynamic MSI-X allocation (Reinette Chatre) - Enable and report PCIe AtomicOp Completer capabilities (Alex Williamson) - Cleanup Kconfigs for vfio bus drivers (Alex Williamson) - Add support for CDX bus based devices (Nipun Gupta) - Fix race with concurrent mdev initialization (Eric Farman) * tag 'vfio-v6.5-rc1' of https://github.com/awilliam/linux-vfio: vfio/mdev: Move the compat_class initialization to module init vfio/cdx: add support for CDX bus vfio/fsl: Create Kconfig sub-menu vfio/platform: Cleanup Kconfig vfio/pci: Cleanup Kconfig vfio/pci-core: Add capability for AtomicOp completer support vfio/pci: Also demote hiding standard cap messages vfio/pci: Clear VFIO_IRQ_INFO_NORESIZE for MSI-X vfio/pci: Support dynamic MSI-X vfio/pci: Probe and store ability to support dynamic MSI-X vfio/pci: Use bitfield for struct vfio_pci_core_device flags vfio/pci: Update stale comment vfio/pci: Remove interrupt context counter vfio/pci: Use xarray for interrupt context storage vfio/pci: Move to single error path vfio/pci: Prepare for dynamic interrupt context storage vfio/pci: Remove negative check on unsigned vector vfio/pci: Consolidate irq cleanup on MSI/MSI-X disable vfio/pci: demote hiding ecap messages to debug level
-rw-r--r--MAINTAINERS7
-rw-r--r--drivers/vfio/Kconfig1
-rw-r--r--drivers/vfio/Makefile5
-rw-r--r--drivers/vfio/cdx/Kconfig17
-rw-r--r--drivers/vfio/cdx/Makefile8
-rw-r--r--drivers/vfio/cdx/main.c234
-rw-r--r--drivers/vfio/cdx/private.h28
-rw-r--r--drivers/vfio/fsl-mc/Kconfig6
-rw-r--r--drivers/vfio/mdev/mdev_core.c23
-rw-r--r--drivers/vfio/pci/Kconfig8
-rw-r--r--drivers/vfio/pci/hisilicon/Kconfig4
-rw-r--r--drivers/vfio/pci/mlx5/Kconfig2
-rw-r--r--drivers/vfio/pci/vfio_pci_config.c8
-rw-r--r--drivers/vfio/pci/vfio_pci_core.c46
-rw-r--r--drivers/vfio/pci/vfio_pci_intrs.c305
-rw-r--r--drivers/vfio/platform/Kconfig18
-rw-r--r--drivers/vfio/platform/Makefile9
-rw-r--r--drivers/vfio/platform/reset/Kconfig2
-rw-r--r--include/linux/cdx/cdx_bus.h1
-rw-r--r--include/linux/mod_devicetable.h6
-rw-r--r--include/linux/vfio_pci_core.h26
-rw-r--r--include/uapi/linux/vfio.h18
-rw-r--r--scripts/mod/devicetable-offsets.c1
-rw-r--r--scripts/mod/file2alias.c17
24 files changed, 654 insertions, 146 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index d2326e8437487e..910a3bd814b73e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -22254,6 +22254,13 @@ F: Documentation/filesystems/vfat.rst
F: fs/fat/
F: tools/testing/selftests/filesystems/fat/
+VFIO CDX DRIVER
+M: Nipun Gupta <nipun.gupta@amd.com>
+M: Nikhil Agarwal <nikhil.agarwal@amd.com>
+L: kvm@vger.kernel.org
+S: Maintained
+F: drivers/vfio/cdx/*
+
VFIO DRIVER
M: Alex Williamson <alex.williamson@redhat.com>
L: kvm@vger.kernel.org
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index 89e06c981e435d..aba36f5be4ecea 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -57,6 +57,7 @@ source "drivers/vfio/pci/Kconfig"
source "drivers/vfio/platform/Kconfig"
source "drivers/vfio/mdev/Kconfig"
source "drivers/vfio/fsl-mc/Kconfig"
+source "drivers/vfio/cdx/Kconfig"
endif
source "virt/lib/Kconfig"
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index 70e7dcb302efd2..66f418aef5a9a7 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -10,7 +10,8 @@ vfio-$(CONFIG_VFIO_VIRQFD) += virqfd.o
obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
-obj-$(CONFIG_VFIO_PCI) += pci/
-obj-$(CONFIG_VFIO_PLATFORM) += platform/
+obj-$(CONFIG_VFIO_PCI_CORE) += pci/
+obj-$(CONFIG_VFIO_PLATFORM_BASE) += platform/
obj-$(CONFIG_VFIO_MDEV) += mdev/
obj-$(CONFIG_VFIO_FSL_MC) += fsl-mc/
+obj-$(CONFIG_VFIO_CDX) += cdx/
diff --git a/drivers/vfio/cdx/Kconfig b/drivers/vfio/cdx/Kconfig
new file mode 100644
index 00000000000000..e6de0a0caa3226
--- /dev/null
+++ b/drivers/vfio/cdx/Kconfig
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# VFIO CDX configuration
+#
+# Copyright (C) 2022-2023, Advanced Micro Devices, Inc.
+#
+
+config VFIO_CDX
+ tristate "VFIO support for CDX bus devices"
+ depends on CDX_BUS
+ select EVENTFD
+ help
+ Driver to enable VFIO support for the devices on CDX bus.
+ This is required to make use of CDX devices present in
+ the system using the VFIO framework.
+
+ If you don't know what to do here, say N.
diff --git a/drivers/vfio/cdx/Makefile b/drivers/vfio/cdx/Makefile
new file mode 100644
index 00000000000000..cd4a2e6fe6094b
--- /dev/null
+++ b/drivers/vfio/cdx/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2022-2023, Advanced Micro Devices, Inc.
+#
+
+obj-$(CONFIG_VFIO_CDX) += vfio-cdx.o
+
+vfio-cdx-objs := main.o
diff --git a/drivers/vfio/cdx/main.c b/drivers/vfio/cdx/main.c
new file mode 100644
index 00000000000000..c376a69d2db2e0
--- /dev/null
+++ b/drivers/vfio/cdx/main.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2023, Advanced Micro Devices, Inc.
+ */
+
+#include <linux/vfio.h>
+#include <linux/cdx/cdx_bus.h>
+
+#include "private.h"
+
+static int vfio_cdx_open_device(struct vfio_device *core_vdev)
+{
+ struct vfio_cdx_device *vdev =
+ container_of(core_vdev, struct vfio_cdx_device, vdev);
+ struct cdx_device *cdx_dev = to_cdx_device(core_vdev->dev);
+ int count = cdx_dev->res_count;
+ int i;
+
+ vdev->regions = kcalloc(count, sizeof(struct vfio_cdx_region),
+ GFP_KERNEL_ACCOUNT);
+ if (!vdev->regions)
+ return -ENOMEM;
+
+ for (i = 0; i < count; i++) {
+ struct resource *res = &cdx_dev->res[i];
+
+ vdev->regions[i].addr = res->start;
+ vdev->regions[i].size = resource_size(res);
+ vdev->regions[i].type = res->flags;
+ /*
+ * Only regions addressed with PAGE granularity may be
+ * MMAP'ed securely.
+ */
+ if (!(vdev->regions[i].addr & ~PAGE_MASK) &&
+ !(vdev->regions[i].size & ~PAGE_MASK))
+ vdev->regions[i].flags |=
+ VFIO_REGION_INFO_FLAG_MMAP;
+ vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_READ;
+ if (!(cdx_dev->res[i].flags & IORESOURCE_READONLY))
+ vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_WRITE;
+ }
+
+ return 0;
+}
+
+static void vfio_cdx_close_device(struct vfio_device *core_vdev)
+{
+ struct vfio_cdx_device *vdev =
+ container_of(core_vdev, struct vfio_cdx_device, vdev);
+
+ kfree(vdev->regions);
+ cdx_dev_reset(core_vdev->dev);
+}
+
+static int vfio_cdx_ioctl_get_info(struct vfio_cdx_device *vdev,
+ struct vfio_device_info __user *arg)
+{
+ unsigned long minsz = offsetofend(struct vfio_device_info, num_irqs);
+ struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev);
+ struct vfio_device_info info;
+
+ if (copy_from_user(&info, arg, minsz))
+ return -EFAULT;
+
+ if (info.argsz < minsz)
+ return -EINVAL;
+
+ info.flags = VFIO_DEVICE_FLAGS_CDX;
+ info.flags |= VFIO_DEVICE_FLAGS_RESET;
+
+ info.num_regions = cdx_dev->res_count;
+ info.num_irqs = 0;
+
+ return copy_to_user(arg, &info, minsz) ? -EFAULT : 0;
+}
+
+static int vfio_cdx_ioctl_get_region_info(struct vfio_cdx_device *vdev,
+ struct vfio_region_info __user *arg)
+{
+ unsigned long minsz = offsetofend(struct vfio_region_info, offset);
+ struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev);
+ struct vfio_region_info info;
+
+ if (copy_from_user(&info, arg, minsz))
+ return -EFAULT;
+
+ if (info.argsz < minsz)
+ return -EINVAL;
+
+ if (info.index >= cdx_dev->res_count)
+ return -EINVAL;
+
+ /* map offset to the physical address */
+ info.offset = vfio_cdx_index_to_offset(info.index);
+ info.size = vdev->regions[info.index].size;
+ info.flags = vdev->regions[info.index].flags;
+
+ return copy_to_user(arg, &info, minsz) ? -EFAULT : 0;
+}
+
+static long vfio_cdx_ioctl(struct vfio_device *core_vdev,
+ unsigned int cmd, unsigned long arg)
+{
+ struct vfio_cdx_device *vdev =
+ container_of(core_vdev, struct vfio_cdx_device, vdev);
+ void __user *uarg = (void __user *)arg;
+
+ switch (cmd) {
+ case VFIO_DEVICE_GET_INFO:
+ return vfio_cdx_ioctl_get_info(vdev, uarg);
+ case VFIO_DEVICE_GET_REGION_INFO:
+ return vfio_cdx_ioctl_get_region_info(vdev, uarg);
+ case VFIO_DEVICE_RESET:
+ return cdx_dev_reset(core_vdev->dev);
+ default:
+ return -ENOTTY;
+ }
+}
+
+static int vfio_cdx_mmap_mmio(struct vfio_cdx_region region,
+ struct vm_area_struct *vma)
+{
+ u64 size = vma->vm_end - vma->vm_start;
+ u64 pgoff, base;
+
+ pgoff = vma->vm_pgoff &
+ ((1U << (VFIO_CDX_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
+ base = pgoff << PAGE_SHIFT;
+
+ if (base + size > region.size)
+ return -EINVAL;
+
+ vma->vm_pgoff = (region.addr >> PAGE_SHIFT) + pgoff;
+ vma->vm_page_prot = pgprot_device(vma->vm_page_prot);
+
+ return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+ size, vma->vm_page_prot);
+}
+
+static int vfio_cdx_mmap(struct vfio_device *core_vdev,
+ struct vm_area_struct *vma)
+{
+ struct vfio_cdx_device *vdev =
+ container_of(core_vdev, struct vfio_cdx_device, vdev);
+ struct cdx_device *cdx_dev = to_cdx_device(core_vdev->dev);
+ unsigned int index;
+
+ index = vma->vm_pgoff >> (VFIO_CDX_OFFSET_SHIFT - PAGE_SHIFT);
+
+ if (index >= cdx_dev->res_count)
+ return -EINVAL;
+
+ if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_MMAP))
+ return -EINVAL;
+
+ if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_READ) &&
+ (vma->vm_flags & VM_READ))
+ return -EPERM;
+
+ if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_WRITE) &&
+ (vma->vm_flags & VM_WRITE))
+ return -EPERM;
+
+ return vfio_cdx_mmap_mmio(vdev->regions[index], vma);
+}
+
+static const struct vfio_device_ops vfio_cdx_ops = {
+ .name = "vfio-cdx",
+ .open_device = vfio_cdx_open_device,
+ .close_device = vfio_cdx_close_device,
+ .ioctl = vfio_cdx_ioctl,
+ .mmap = vfio_cdx_mmap,
+ .bind_iommufd = vfio_iommufd_physical_bind,
+ .unbind_iommufd = vfio_iommufd_physical_unbind,
+ .attach_ioas = vfio_iommufd_physical_attach_ioas,
+};
+
+static int vfio_cdx_probe(struct cdx_device *cdx_dev)
+{
+ struct vfio_cdx_device *vdev;
+ struct device *dev = &cdx_dev->dev;
+ int ret;
+
+ vdev = vfio_alloc_device(vfio_cdx_device, vdev, dev,
+ &vfio_cdx_ops);
+ if (IS_ERR(vdev))
+ return PTR_ERR(vdev);
+
+ ret = vfio_register_group_dev(&vdev->vdev);
+ if (ret)
+ goto out_uninit;
+
+ dev_set_drvdata(dev, vdev);
+ return 0;
+
+out_uninit:
+ vfio_put_device(&vdev->vdev);
+ return ret;
+}
+
+static int vfio_cdx_remove(struct cdx_device *cdx_dev)
+{
+ struct device *dev = &cdx_dev->dev;
+ struct vfio_cdx_device *vdev = dev_get_drvdata(dev);
+
+ vfio_unregister_group_dev(&vdev->vdev);
+ vfio_put_device(&vdev->vdev);
+
+ return 0;
+}
+
+static const struct cdx_device_id vfio_cdx_table[] = {
+ { CDX_DEVICE_DRIVER_OVERRIDE(CDX_ANY_ID, CDX_ANY_ID,
+ CDX_ID_F_VFIO_DRIVER_OVERRIDE) }, /* match all by default */
+ {}
+};
+
+MODULE_DEVICE_TABLE(cdx, vfio_cdx_table);
+
+static struct cdx_driver vfio_cdx_driver = {
+ .probe = vfio_cdx_probe,
+ .remove = vfio_cdx_remove,
+ .match_id_table = vfio_cdx_table,
+ .driver = {
+ .name = "vfio-cdx",
+ .owner = THIS_MODULE,
+ },
+ .driver_managed_dma = true,
+};
+
+module_driver(vfio_cdx_driver, cdx_driver_register, cdx_driver_unregister);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("VFIO for CDX devices - User Level meta-driver");
diff --git a/drivers/vfio/cdx/private.h b/drivers/vfio/cdx/private.h
new file mode 100644
index 00000000000000..8bdc117ea88edd
--- /dev/null
+++ b/drivers/vfio/cdx/private.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022-2023, Advanced Micro Devices, Inc.
+ */
+
+#ifndef VFIO_CDX_PRIVATE_H
+#define VFIO_CDX_PRIVATE_H
+
+#define VFIO_CDX_OFFSET_SHIFT 40
+
+static inline u64 vfio_cdx_index_to_offset(u32 index)
+{
+ return ((u64)(index) << VFIO_CDX_OFFSET_SHIFT);
+}
+
+struct vfio_cdx_region {
+ u32 flags;
+ u32 type;
+ u64 addr;
+ resource_size_t size;
+};
+
+struct vfio_cdx_device {
+ struct vfio_device vdev;
+ struct vfio_cdx_region *regions;
+};
+
+#endif /* VFIO_CDX_PRIVATE_H */
diff --git a/drivers/vfio/fsl-mc/Kconfig b/drivers/vfio/fsl-mc/Kconfig
index 597d338c5c8a2c..7d1d690348f07c 100644
--- a/drivers/vfio/fsl-mc/Kconfig
+++ b/drivers/vfio/fsl-mc/Kconfig
@@ -1,6 +1,8 @@
+menu "VFIO support for FSL_MC bus devices"
+ depends on FSL_MC_BUS
+
config VFIO_FSL_MC
tristate "VFIO support for QorIQ DPAA2 fsl-mc bus devices"
- depends on FSL_MC_BUS
select EVENTFD
help
Driver to enable support for the VFIO QorIQ DPAA2 fsl-mc
@@ -8,3 +10,5 @@ config VFIO_FSL_MC
fsl-mc bus devices using the VFIO framework.
If you don't know what to do here, say N.
+
+endmenu
diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
index 58f91b3bd670cc..ed4737de452896 100644
--- a/drivers/vfio/mdev/mdev_core.c
+++ b/drivers/vfio/mdev/mdev_core.c
@@ -72,12 +72,6 @@ int mdev_register_parent(struct mdev_parent *parent, struct device *dev,
parent->nr_types = nr_types;
atomic_set(&parent->available_instances, mdev_driver->max_instances);
- if (!mdev_bus_compat_class) {
- mdev_bus_compat_class = class_compat_register("mdev_bus");
- if (!mdev_bus_compat_class)
- return -ENOMEM;
- }
-
ret = parent_create_sysfs_files(parent);
if (ret)
return ret;
@@ -251,13 +245,24 @@ int mdev_device_remove(struct mdev_device *mdev)
static int __init mdev_init(void)
{
- return bus_register(&mdev_bus_type);
+ int ret;
+
+ ret = bus_register(&mdev_bus_type);
+ if (ret)
+ return ret;
+
+ mdev_bus_compat_class = class_compat_register("mdev_bus");
+ if (!mdev_bus_compat_class) {
+ bus_unregister(&mdev_bus_type);
+ return -ENOMEM;
+ }
+
+ return 0;
}
static void __exit mdev_exit(void)
{
- if (mdev_bus_compat_class)
- class_compat_unregister(mdev_bus_compat_class);
+ class_compat_unregister(mdev_bus_compat_class);
bus_unregister(&mdev_bus_type);
}
diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index f9d0c908e738c3..86bb7835cf3c69 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -1,5 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
-if PCI && MMU
+menu "VFIO support for PCI devices"
+ depends on PCI && MMU
+
config VFIO_PCI_CORE
tristate
select VFIO_VIRQFD
@@ -7,9 +9,11 @@ config VFIO_PCI_CORE
config VFIO_PCI_MMAP
def_bool y if !S390
+ depends on VFIO_PCI_CORE
config VFIO_PCI_INTX
def_bool y if !S390
+ depends on VFIO_PCI_CORE
config VFIO_PCI
tristate "Generic VFIO support for any PCI device"
@@ -59,4 +63,4 @@ source "drivers/vfio/pci/mlx5/Kconfig"
source "drivers/vfio/pci/hisilicon/Kconfig"
-endif
+endmenu
diff --git a/drivers/vfio/pci/hisilicon/Kconfig b/drivers/vfio/pci/hisilicon/Kconfig
index 5daa0f45d2f99b..cbf1c32f6ebff7 100644
--- a/drivers/vfio/pci/hisilicon/Kconfig
+++ b/drivers/vfio/pci/hisilicon/Kconfig
@@ -1,13 +1,13 @@
# SPDX-License-Identifier: GPL-2.0-only
config HISI_ACC_VFIO_PCI
- tristate "VFIO PCI support for HiSilicon ACC devices"
+ tristate "VFIO support for HiSilicon ACC PCI devices"
depends on ARM64 || (COMPILE_TEST && 64BIT)
- depends on VFIO_PCI_CORE
depends on PCI_MSI
depends on CRYPTO_DEV_HISI_QM
depends on CRYPTO_DEV_HISI_HPRE
depends on CRYPTO_DEV_HISI_SEC2
depends on CRYPTO_DEV_HISI_ZIP
+ select VFIO_PCI_CORE
help
This provides generic PCI support for HiSilicon ACC devices
using the VFIO framework.
diff --git a/drivers/vfio/pci/mlx5/Kconfig b/drivers/vfio/pci/mlx5/Kconfig
index 29ba9c504a7560..7088edc4fb28d8 100644
--- a/drivers/vfio/pci/mlx5/Kconfig
+++ b/drivers/vfio/pci/mlx5/Kconfig
@@ -2,7 +2,7 @@
config MLX5_VFIO_PCI
tristate "VFIO support for MLX5 PCI devices"
depends on MLX5_CORE
- depends on VFIO_PCI_CORE
+ select VFIO_PCI_CORE
help
This provides migration support for MLX5 devices using the VFIO
framework.
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index 948cdd464f4e61..7e2e62ab0869cf 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -1566,8 +1566,8 @@ static int vfio_cap_init(struct vfio_pci_core_device *vdev)
}
if (!len) {
- pci_info(pdev, "%s: hiding cap %#x@%#x\n", __func__,
- cap, pos);
+ pci_dbg(pdev, "%s: hiding cap %#x@%#x\n", __func__,
+ cap, pos);
*prev = next;
pos = next;
continue;
@@ -1643,8 +1643,8 @@ static int vfio_ecap_init(struct vfio_pci_core_device *vdev)
}
if (!len) {
- pci_info(pdev, "%s: hiding ecap %#x@%#x\n",
- __func__, ecap, epos);
+ pci_dbg(pdev, "%s: hiding ecap %#x@%#x\n",
+ __func__, ecap, epos);
/* If not the first in the chain, we can skip over it */
if (prev) {
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index a5ab416cf476c3..20d7b69ea6ff1f 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -530,8 +530,11 @@ int vfio_pci_core_enable(struct vfio_pci_core_device *vdev)
vdev->msix_bar = table & PCI_MSIX_TABLE_BIR;
vdev->msix_offset = table & PCI_MSIX_TABLE_OFFSET;
vdev->msix_size = ((flags & PCI_MSIX_FLAGS_QSIZE) + 1) * 16;
- } else
+ vdev->has_dyn_msix = pci_msix_can_alloc_dyn(pdev);
+ } else {
vdev->msix_bar = 0xFF;
+ vdev->has_dyn_msix = false;
+ }
if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev))
vdev->has_vga = true;
@@ -882,6 +885,37 @@ int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev,
}
EXPORT_SYMBOL_GPL(vfio_pci_core_register_dev_region);
+static int vfio_pci_info_atomic_cap(struct vfio_pci_core_device *vdev,
+ struct vfio_info_cap *caps)
+{
+ struct vfio_device_info_cap_pci_atomic_comp cap = {
+ .header.id = VFIO_DEVICE_INFO_CAP_PCI_ATOMIC_COMP,
+ .header.version = 1
+ };
+ struct pci_dev *pdev = pci_physfn(vdev->pdev);
+ u32 devcap2;
+
+ pcie_capability_read_dword(pdev, PCI_EXP_DEVCAP2, &devcap2);
+
+ if ((devcap2 & PCI_EXP_DEVCAP2_ATOMIC_COMP32) &&
+ !pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP32))
+ cap.flags |= VFIO_PCI_ATOMIC_COMP32;
+
+ if ((devcap2 & PCI_EXP_DEVCAP2_ATOMIC_COMP64) &&
+ !pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP64))
+ cap.flags |= VFIO_PCI_ATOMIC_COMP64;
+
+ if ((devcap2 & PCI_EXP_DEVCAP2_ATOMIC_COMP128) &&
+ !pci_enable_atomic_ops_to_root(pdev,
+ PCI_EXP_DEVCAP2_ATOMIC_COMP128))
+ cap.flags |= VFIO_PCI_ATOMIC_COMP128;
+
+ if (!cap.flags)
+ return -ENODEV;
+
+ return vfio_info_add_capability(caps, &cap.header, sizeof(cap));
+}
+
static int vfio_pci_ioctl_get_info(struct vfio_pci_core_device *vdev,
struct vfio_device_info __user *arg)
{
@@ -920,6 +954,13 @@ static int vfio_pci_ioctl_get_info(struct vfio_pci_core_device *vdev,
return ret;
}
+ ret = vfio_pci_info_atomic_cap(vdev, &caps);
+ if (ret && ret != -ENODEV) {
+ pci_warn(vdev->pdev,
+ "Failed to setup AtomicOps info capability\n");
+ return ret;
+ }
+
if (caps.size) {
info.flags |= VFIO_DEVICE_FLAGS_CAPS;
if (info.argsz < sizeof(info) + caps.size) {
@@ -1111,7 +1152,7 @@ static int vfio_pci_ioctl_get_irq_info(struct vfio_pci_core_device *vdev,
if (info.index == VFIO_PCI_INTX_IRQ_INDEX)
info.flags |=
(VFIO_IRQ_INFO_MASKABLE | VFIO_IRQ_INFO_AUTOMASKED);
- else
+ else if (info.index != VFIO_PCI_MSIX_IRQ_INDEX || !vdev->has_dyn_msix)
info.flags |= VFIO_IRQ_INFO_NORESIZE;
return copy_to_user(arg, &info, minsz) ? -EFAULT : 0;
@@ -2102,6 +2143,7 @@ int vfio_pci_core_init_dev(struct vfio_device *core_vdev)
INIT_LIST_HEAD(&vdev->vma_list);
INIT_LIST_HEAD(&vdev->sriov_pfs_item);
init_rwsem(&vdev->memory_lock);
+ xa_init(&vdev->ctx);
return 0;
}
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index bffb0741518b95..cbb4bcbfbf83d9 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -48,6 +48,39 @@ static bool is_irq_none(struct vfio_pci_core_device *vdev)
vdev->irq_type == VFIO_PCI_MSIX_IRQ_INDEX);
}
+static
+struct vfio_pci_irq_ctx *vfio_irq_ctx_get(struct vfio_pci_core_device *vdev,
+ unsigned long index)
+{
+ return xa_load(&vdev->ctx, index);
+}
+
+static void vfio_irq_ctx_free(struct vfio_pci_core_device *vdev,
+ struct vfio_pci_irq_ctx *ctx, unsigned long index)
+{
+ xa_erase(&vdev->ctx, index);
+ kfree(ctx);
+}
+
+static struct vfio_pci_irq_ctx *
+vfio_irq_ctx_alloc(struct vfio_pci_core_device *vdev, unsigned long index)
+{
+ struct vfio_pci_irq_ctx *ctx;
+ int ret;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT);
+ if (!ctx)
+ return NULL;
+
+ ret = xa_insert(&vdev->ctx, index, ctx, GFP_KERNEL_ACCOUNT);
+ if (ret) {
+ kfree(ctx);
+ return NULL;
+ }
+
+ return ctx;
+}
+
/*
* INTx
*/
@@ -55,14 +88,21 @@ static void vfio_send_intx_eventfd(void *opaque, void *unused)
{
struct vfio_pci_core_device *vdev = opaque;
- if (likely(is_intx(vdev) && !vdev->virq_disabled))
- eventfd_signal(vdev->ctx[0].trigger, 1);
+ if (likely(is_intx(vdev) && !vdev->virq_disabled)) {
+ struct vfio_pci_irq_ctx *ctx;
+
+ ctx = vfio_irq_ctx_get(vdev, 0);
+ if (WARN_ON_ONCE(!ctx))
+ return;
+ eventfd_signal(ctx->trigger, 1);
+ }
}
/* Returns true if the INTx vfio_pci_irq_ctx.masked value is changed. */
bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
{
struct pci_dev *pdev = vdev->pdev;
+ struct vfio_pci_irq_ctx *ctx;
unsigned long flags;
bool masked_changed = false;
@@ -77,7 +117,14 @@ bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
if (unlikely(!is_intx(vdev))) {
if (vdev->pci_2_3)
pci_intx(pdev, 0);
- } else if (!vdev->ctx[0].masked) {
+ goto out_unlock;
+ }
+
+ ctx = vfio_irq_ctx_get(vdev, 0);
+ if (WARN_ON_ONCE(!ctx))
+ goto out_unlock;
+
+ if (!ctx->masked) {
/*
* Can't use check_and_mask here because we always want to
* mask, not just when something is pending.
@@ -87,10 +134,11 @@ bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
else
disable_irq_nosync(pdev->irq);
- vdev->ctx[0].masked = true;
+ ctx->masked = true;
masked_changed = true;
}
+out_unlock:
spin_unlock_irqrestore(&vdev->irqlock, flags);
return masked_changed;
}
@@ -105,6 +153,7 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused)
{
struct vfio_pci_core_device *vdev = opaque;
struct pci_dev *pdev = vdev->pdev;
+ struct vfio_pci_irq_ctx *ctx;
unsigned long flags;
int ret = 0;
@@ -117,7 +166,14 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused)
if (unlikely(!is_intx(vdev))) {
if (vdev->pci_2_3)
pci_intx(pdev, 1);
- } else if (vdev->ctx[0].masked && !vdev->virq_disabled) {
+ goto out_unlock;
+ }
+
+ ctx = vfio_irq_ctx_get(vdev, 0);
+ if (WARN_ON_ONCE(!ctx))
+ goto out_unlock;
+
+ if (ctx->masked && !vdev->virq_disabled) {
/*
* A pending interrupt here would immediately trigger,
* but we can avoid that overhead by just re-sending
@@ -129,9 +185,10 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused)
} else
enable_irq(pdev->irq);
- vdev->ctx[0].masked = (ret > 0);
+ ctx->masked = (ret > 0);
}
+out_unlock:
spin_unlock_irqrestore(&vdev->irqlock, flags);
return ret;
@@ -146,18 +203,23 @@ void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev)
static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
{
struct vfio_pci_core_device *vdev = dev_id;
+ struct vfio_pci_irq_ctx *ctx;
unsigned long flags;
int ret = IRQ_NONE;
+ ctx = vfio_irq_ctx_get(vdev, 0);
+ if (WARN_ON_ONCE(!ctx))
+ return ret;
+
spin_lock_irqsave(&vdev->irqlock, flags);
if (!vdev->pci_2_3) {
disable_irq_nosync(vdev->pdev->irq);
- vdev->ctx[0].masked = true;
+ ctx->masked = true;
ret = IRQ_HANDLED;
- } else if (!vdev->ctx[0].masked && /* may be shared */
+ } else if (!ctx->masked && /* may be shared */
pci_check_and_mask_intx(vdev->pdev)) {
- vdev->ctx[0].masked = true;
+ ctx->masked = true;
ret = IRQ_HANDLED;
}
@@ -171,27 +233,27 @@ static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
static int vfio_intx_enable(struct vfio_pci_core_device *vdev)
{
+ struct vfio_pci_irq_ctx *ctx;
+
if (!is_irq_none(vdev))
return -EINVAL;
if (!vdev->pdev->irq)
return -ENODEV;
- vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL_ACCOUNT);
- if (!vdev->ctx)
+ ctx = vfio_irq_ctx_alloc(vdev, 0);
+ if (!ctx)
return -ENOMEM;
- vdev->num_ctx = 1;
-
/*
* If the virtual interrupt is masked, restore it. Devices
* supporting DisINTx can be masked at the hardware level
* here, non-PCI-2.3 devices will have to wait until the
* interrupt is enabled.
*/
- vdev->ctx[0].masked = vdev->virq_disabled;
+ ctx->masked = vdev->virq_disabled;
if (vdev->pci_2_3)
- pci_intx(vdev->pdev, !vdev->ctx[0].masked);
+ pci_intx(vdev->pdev, !ctx->masked);
vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX;
@@ -202,41 +264,46 @@ static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd)
{
struct pci_dev *pdev = vdev->pdev;
unsigned long irqflags = IRQF_SHARED;
+ struct vfio_pci_irq_ctx *ctx;
struct eventfd_ctx *trigger;
unsigned long flags;
int ret;
- if (vdev->ctx[0].trigger) {
+ ctx = vfio_irq_ctx_get(vdev, 0);
+ if (WARN_ON_ONCE(!ctx))
+ return -EINVAL;
+
+ if (ctx->trigger) {
free_irq(pdev->irq, vdev);
- kfree(vdev->ctx[0].name);
- eventfd_ctx_put(vdev->ctx[0].trigger);
- vdev->ctx[0].trigger = NULL;
+ kfree(ctx->name);
+ eventfd_ctx_put(ctx->trigger);
+ ctx->trigger = NULL;
}
if (fd < 0) /* Disable only */
return 0;
- vdev->ctx[0].name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)",
- pci_name(pdev));
- if (!vdev->ctx[0].name)
+ ctx->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)",
+ pci_name(pdev));
+ if (!ctx->name)
return -ENOMEM;
trigger = eventfd_ctx_fdget(fd);
if (IS_ERR(trigger)) {
- kfree(vdev->ctx[0].name);
+ kfree(ctx->name);
return PTR_ERR(trigger);
}
- vdev->ctx[0].trigger = trigger;
+ ctx->trigger = trigger;
if (!vdev->pci_2_3)
irqflags = 0;
ret = request_irq(pdev->irq, vfio_intx_handler,
- irqflags, vdev->ctx[0].name, vdev);
+ irqflags, ctx->name, vdev);
if (ret) {
- vdev->ctx[0].trigger = NULL;
- kfree(vdev->ctx[0].name);
+ ctx->trigger = NULL;
+ kfree(ctx->name);
eventfd_ctx_put(trigger);
return ret;
}
@@ -246,7 +313,7 @@ static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd)
* disable_irq won't.
*/
spin_lock_irqsave(&vdev->irqlock, flags);
- if (!vdev->pci_2_3 && vdev->ctx[0].masked)
+ if (!vdev->pci_2_3 && ctx->masked)
disable_irq_nosync(pdev->irq);
spin_unlock_irqrestore(&vdev->irqlock, flags);
@@ -255,12 +322,17 @@ static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd)
static void vfio_intx_disable(struct vfio_pci_core_device *vdev)
{
- vfio_virqfd_disable(&vdev->ctx[0].unmask);
- vfio_virqfd_disable(&vdev->ctx[0].mask);
+ struct vfio_pci_irq_ctx *ctx;
+
+ ctx = vfio_irq_ctx_get(vdev, 0);
+ WARN_ON_ONCE(!ctx);
+ if (ctx) {
+ vfio_virqfd_disable(&ctx->unmask);
+ vfio_virqfd_disable(&ctx->mask);
+ }
vfio_intx_set_signal(vdev, -1);
vdev->irq_type = VFIO_PCI_NUM_IRQS;
- vdev->num_ctx = 0;
- kfree(vdev->ctx);
+ vfio_irq_ctx_free(vdev, ctx, 0);
}
/*
@@ -284,11 +356,6 @@ static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi
if (!is_irq_none(vdev))
return -EINVAL;
- vdev->ctx = kcalloc(nvec, sizeof(struct vfio_pci_irq_ctx),
- GFP_KERNEL_ACCOUNT);
- if (!vdev->ctx)
- return -ENOMEM;
-
/* return the number of supported vectors if we can't get all: */
cmd = vfio_pci_memory_lock_and_enable(vdev);
ret = pci_alloc_irq_vectors(pdev, 1, nvec, flag);
@@ -296,12 +363,10 @@ static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi
if (ret > 0)
pci_free_irq_vectors(pdev);
vfio_pci_memory_unlock_and_restore(vdev, cmd);
- kfree(vdev->ctx);
return ret;
}
vfio_pci_memory_unlock_and_restore(vdev, cmd);
- vdev->num_ctx = nvec;
vdev->irq_type = msix ? VFIO_PCI_MSIX_IRQ_INDEX :
VFIO_PCI_MSI_IRQ_INDEX;
@@ -316,53 +381,91 @@ static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi
return 0;
}
-static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
- int vector, int fd, bool msix)
+/*
+ * vfio_msi_alloc_irq() returns the Linux IRQ number of an MSI or MSI-X device
+ * interrupt vector. If a Linux IRQ number is not available then a new
+ * interrupt is allocated if dynamic MSI-X is supported.
+ *
+ * Where is vfio_msi_free_irq()? Allocated interrupts are maintained,
+ * essentially forming a cache that subsequent allocations can draw from.
+ * Interrupts are freed using pci_free_irq_vectors() when MSI/MSI-X is
+ * disabled.
+ */
+static int vfio_msi_alloc_irq(struct vfio_pci_core_device *vdev,
+ unsigned int vector, bool msix)
{
struct pci_dev *pdev = vdev->pdev;
- struct eventfd_ctx *trigger;
- int irq, ret;
+ struct msi_map map;
+ int irq;
u16 cmd;
- if (vector < 0 || vector >= vdev->num_ctx)
+ irq = pci_irq_vector(pdev, vector);
+ if (WARN_ON_ONCE(irq == 0))
return -EINVAL;
+ if (irq > 0 || !msix || !vdev->has_dyn_msix)
+ return irq;
- irq = pci_irq_vector(pdev, vector);
+ cmd = vfio_pci_memory_lock_and_enable(vdev);
+ map = pci_msix_alloc_irq_at(pdev, vector, NULL);
+ vfio_pci_memory_unlock_and_restore(vdev, cmd);
- if (vdev->ctx[vector].trigger) {
- irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
+ return map.index < 0 ? map.index : map.virq;
+}
+static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
+ unsigned int vector, int fd, bool msix)
+{
+ struct pci_dev *pdev = vdev->pdev;
+ struct vfio_pci_irq_ctx *ctx;
+ struct eventfd_ctx *trigger;
+ int irq = -EINVAL, ret;
+ u16 cmd;
+
+ ctx = vfio_irq_ctx_get(vdev, vector);
+
+ if (ctx) {
+ irq_bypass_unregister_producer(&ctx->producer);
+ irq = pci_irq_vector(pdev, vector);
cmd = vfio_pci_memory_lock_and_enable(vdev);
- free_irq(irq, vdev->ctx[vector].trigger);
+ free_irq(irq, ctx->trigger);
vfio_pci_memory_unlock_and_restore(vdev, cmd);
-
- kfree(vdev->ctx[vector].name);
- eventfd_ctx_put(vdev->ctx[vector].trigger);
- vdev->ctx[vector].trigger = NULL;
+ /* Interrupt stays allocated, will be freed at MSI-X disable. */
+ kfree(ctx->name);
+ eventfd_ctx_put(ctx->trigger);
+ vfio_irq_ctx_free(vdev, ctx, vector);
}
if (fd < 0)
return 0;
- vdev->ctx[vector].name = kasprintf(GFP_KERNEL_ACCOUNT,
- "vfio-msi%s[%d](%s)",
- msix ? "x" : "", vector,
- pci_name(pdev));
- if (!vdev->ctx[vector].name)
+ if (irq == -EINVAL) {
+ /* Interrupt stays allocated, will be freed at MSI-X disable. */
+ irq = vfio_msi_alloc_irq(vdev, vector, msix);
+ if (irq < 0)
+ return irq;
+ }
+
+ ctx = vfio_irq_ctx_alloc(vdev, vector);
+ if (!ctx)
return -ENOMEM;
+ ctx->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-msi%s[%d](%s)",
+ msix ? "x" : "", vector, pci_name(pdev));
+ if (!ctx->name) {
+ ret = -ENOMEM;
+ goto out_free_ctx;
+ }
+
trigger = eventfd_ctx_fdget(fd);
if (IS_ERR(trigger)) {
- kfree(vdev->ctx[vector].name);
- return PTR_ERR(trigger);
+ ret = PTR_ERR(trigger);
+ goto out_free_name;
}
/*
- * The MSIx vector table resides in device memory which may be cleared
- * via backdoor resets. We don't allow direct access to the vector
- * table so even if a userspace driver attempts to save/restore around
- * such a reset it would be unsuccessful. To avoid this, restore the
- * cached value of the message prior to enabling.
+ * If the vector was previously allocated, refresh the on-device
+ * message data before enabling in case it had been cleared or
+ * corrupted (e.g. due to backdoor resets) since writing.
*/
cmd = vfio_pci_memory_lock_and_enable(vdev);
if (msix) {
@@ -372,37 +475,39 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
pci_write_msi_msg(irq, &msg);
}
- ret = request_irq(irq, vfio_msihandler, 0,
- vdev->ctx[vector].name, trigger);
+ ret = request_irq(irq, vfio_msihandler, 0, ctx->name, trigger);
vfio_pci_memory_unlock_and_restore(vdev, cmd);
- if (ret) {
- kfree(vdev->ctx[vector].name);
- eventfd_ctx_put(trigger);
- return ret;
- }
+ if (ret)
+ goto out_put_eventfd_ctx;
- vdev->ctx[vector].producer.token = trigger;
- vdev->ctx[vector].producer.irq = irq;
- ret = irq_bypass_register_producer(&vdev->ctx[vector].producer);
+ ctx->producer.token = trigger;
+ ctx->producer.irq = irq;
+ ret = irq_bypass_register_producer(&ctx->producer);
if (unlikely(ret)) {
dev_info(&pdev->dev,
"irq bypass producer (token %p) registration fails: %d\n",
- vdev->ctx[vector].producer.token, ret);
+ ctx->producer.token, ret);
- vdev->ctx[vector].producer.token = NULL;
+ ctx->producer.token = NULL;
}
- vdev->ctx[vector].trigger = trigger;
+ ctx->trigger = trigger;
return 0;
+
+out_put_eventfd_ctx:
+ eventfd_ctx_put(trigger);
+out_free_name:
+ kfree(ctx->name);
+out_free_ctx:
+ vfio_irq_ctx_free(vdev, ctx, vector);
+ return ret;
}
static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start,
unsigned count, int32_t *fds, bool msix)
{
- int i, j, ret = 0;
-
- if (start >= vdev->num_ctx || start + count > vdev->num_ctx)
- return -EINVAL;
+ unsigned int i, j;
+ int ret = 0;
for (i = 0, j = start; i < count && !ret; i++, j++) {
int fd = fds ? fds[i] : -1;
@@ -410,8 +515,8 @@ static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start,
}
if (ret) {
- for (--j; j >= (int)start; j--)
- vfio_msi_set_vector_signal(vdev, j, -1, msix);
+ for (i = start; i < j; i++)
+ vfio_msi_set_vector_signal(vdev, i, -1, msix);
}
return ret;
@@ -420,16 +525,16 @@ static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start,
static void vfio_msi_disable(struct vfio_pci_core_device *vdev, bool msix)
{
struct pci_dev *pdev = vdev->pdev;
- int i;
+ struct vfio_pci_irq_ctx *ctx;
+ unsigned long i;
u16 cmd;
- for (i = 0; i < vdev->num_ctx; i++) {
- vfio_virqfd_disable(&vdev->ctx[i].unmask);
- vfio_virqfd_disable(&vdev->ctx[i].mask);
+ xa_for_each(&vdev->ctx, i, ctx) {
+ vfio_virqfd_disable(&ctx->unmask);
+ vfio_virqfd_disable(&ctx->mask);
+ vfio_msi_set_vector_signal(vdev, i, -1, msix);
}
- vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix);
-
cmd = vfio_pci_memory_lock_and_enable(vdev);
pci_free_irq_vectors(pdev);
vfio_pci_memory_unlock_and_restore(vdev, cmd);
@@ -442,8 +547,6 @@ static void vfio_msi_disable(struct vfio_pci_core_device *vdev, bool msix)
pci_intx(pdev, 0);
vdev->irq_type = VFIO_PCI_NUM_IRQS;
- vdev->num_ctx = 0;
- kfree(vdev->ctx);
}
/*
@@ -463,14 +566,18 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_core_device *vdev,
if (unmask)
vfio_pci_intx_unmask(vdev);
} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
+ struct vfio_pci_irq_ctx *ctx = vfio_irq_ctx_get(vdev, 0);
int32_t fd = *(int32_t *)data;
+
+ if (WARN_ON_ONCE(!ctx))
+ return -EINVAL;
if (fd >= 0)
return vfio_virqfd_enable((void *) vdev,
vfio_pci_intx_unmask_handler,
vfio_send_intx_eventfd, NULL,
- &vdev->ctx[0].unmask, fd);
+ &ctx->unmask, fd);
- vfio_virqfd_disable(&vdev->ctx[0].unmask);
+ vfio_virqfd_disable(&ctx->unmask);
}
return 0;
@@ -543,7 +650,8 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev,
unsigned index, unsigned start,
unsigned count, uint32_t flags, void *data)
{
- int i;
+ struct vfio_pci_irq_ctx *ctx;
+ unsigned int i;
bool msix = (index == VFIO_PCI_MSIX_IRQ_INDEX) ? true : false;
if (irq_is(vdev, index) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
@@ -573,18 +681,19 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev,
return ret;
}
- if (!irq_is(vdev, index) || start + count > vdev->num_ctx)
+ if (!irq_is(vdev, index))
return -EINVAL;
for (i = start; i < start + count; i++) {
- if (!vdev->ctx[i].trigger)
+ ctx = vfio_irq_ctx_get(vdev, i);
+ if (!ctx)
continue;
if (flags & VFIO_IRQ_SET_DATA_NONE) {
- eventfd_signal(vdev->ctx[i].trigger, 1);
+ eventfd_signal(ctx->trigger, 1);
} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
uint8_t *bools = data;
if (bools[i - start])
- eventfd_signal(vdev->ctx[i].trigger, 1);
+ eventfd_signal(ctx->trigger, 1);
}
}
return 0;
diff --git a/drivers/vfio/platform/Kconfig b/drivers/vfio/platform/Kconfig
index 331a5920f5abd2..88fcde51f02468 100644
--- a/drivers/vfio/platform/Kconfig
+++ b/drivers/vfio/platform/Kconfig
@@ -1,8 +1,14 @@
# SPDX-License-Identifier: GPL-2.0-only
-config VFIO_PLATFORM
- tristate "VFIO support for platform devices"
+menu "VFIO support for platform devices"
depends on ARM || ARM64 || COMPILE_TEST
+
+config VFIO_PLATFORM_BASE
+ tristate
select VFIO_VIRQFD
+
+config VFIO_PLATFORM
+ tristate "Generic VFIO support for any platform device"
+ select VFIO_PLATFORM_BASE
help
Support for platform devices with VFIO. This is required to make
use of platform devices present on the system using the VFIO
@@ -10,10 +16,10 @@ config VFIO_PLATFORM
If you don't know what to do here, say N.
-if VFIO_PLATFORM
config VFIO_AMBA
tristate "VFIO support for AMBA devices"
depends on ARM_AMBA || COMPILE_TEST
+ select VFIO_PLATFORM_BASE
help
Support for ARM AMBA devices with VFIO. This is required to make
use of ARM AMBA devices present on the system using the VFIO
@@ -21,5 +27,9 @@ config VFIO_AMBA
If you don't know what to do here, say N.
+menu "VFIO platform reset drivers"
+ depends on VFIO_PLATFORM_BASE
+
source "drivers/vfio/platform/reset/Kconfig"
-endif
+endmenu
+endmenu
diff --git a/drivers/vfio/platform/Makefile b/drivers/vfio/platform/Makefile
index 3f3a24e7c4ef8e..ee4fb6a82ca8a3 100644
--- a/drivers/vfio/platform/Makefile
+++ b/drivers/vfio/platform/Makefile
@@ -1,13 +1,10 @@
# SPDX-License-Identifier: GPL-2.0
vfio-platform-base-y := vfio_platform_common.o vfio_platform_irq.o
-vfio-platform-y := vfio_platform.o
+obj-$(CONFIG_VFIO_PLATFORM_BASE) += vfio-platform-base.o
+obj-$(CONFIG_VFIO_PLATFORM_BASE) += reset/
+vfio-platform-y := vfio_platform.o
obj-$(CONFIG_VFIO_PLATFORM) += vfio-platform.o
-obj-$(CONFIG_VFIO_PLATFORM) += vfio-platform-base.o
-obj-$(CONFIG_VFIO_PLATFORM) += reset/
vfio-amba-y := vfio_amba.o
-
obj-$(CONFIG_VFIO_AMBA) += vfio-amba.o
-obj-$(CONFIG_VFIO_AMBA) += vfio-platform-base.o
-obj-$(CONFIG_VFIO_AMBA) += reset/
diff --git a/drivers/vfio/platform/reset/Kconfig b/drivers/vfio/platform/reset/Kconfig
index 12f5f3d803876c..dcc08dc145a5df 100644
--- a/drivers/vfio/platform/reset/Kconfig
+++ b/drivers/vfio/platform/reset/Kconfig
@@ -1,4 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
+if VFIO_PLATFORM
config VFIO_PLATFORM_CALXEDAXGMAC_RESET
tristate "VFIO support for calxeda xgmac reset"
help
@@ -21,3 +22,4 @@ config VFIO_PLATFORM_BCMFLEXRM_RESET
Enables the VFIO platform driver to handle reset for Broadcom FlexRM
If you don't know what to do here, say N.
+endif
diff --git a/include/linux/cdx/cdx_bus.h b/include/linux/cdx/cdx_bus.h
index 35ef41d8a61a0a..bead71b7bc7380 100644
--- a/include/linux/cdx/cdx_bus.h
+++ b/include/linux/cdx/cdx_bus.h
@@ -14,7 +14,6 @@
#include <linux/mod_devicetable.h>
#define MAX_CDX_DEV_RESOURCES 4
-#define CDX_ANY_ID (0xFFFF)
#define CDX_CONTROLLER_ID_SHIFT 4
#define CDX_BUS_NUM_MASK 0xF
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index ccaaeda792c07a..ccf017353bb642 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -912,6 +912,12 @@ struct ishtp_device_id {
kernel_ulong_t driver_data;
};
+#define CDX_ANY_ID (0xFFFF)
+
+enum {
+ CDX_ID_F_VFIO_DRIVER_OVERRIDE = 1,
+};
+
/**
* struct cdx_device_id - CDX device identifier
* @vendor: Vendor ID
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index 367fd79226a306..562e8754869da6 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -59,8 +59,7 @@ struct vfio_pci_core_device {
struct perm_bits *msi_perm;
spinlock_t irqlock;
struct mutex igate;
- struct vfio_pci_irq_ctx *ctx;
- int num_ctx;
+ struct xarray ctx;
int irq_type;
int num_regions;
struct vfio_pci_region *region;
@@ -69,17 +68,18 @@ struct vfio_pci_core_device {
u16 msix_size;
u32 msix_offset;
u32 rbar[7];
- bool pci_2_3;
- bool virq_disabled;
- bool reset_works;
- bool extended_caps;
- bool bardirty;
- bool has_vga;
- bool needs_reset;
- bool nointx;
- bool needs_pm_restore;
- bool pm_intx_masked;
- bool pm_runtime_engaged;
+ bool has_dyn_msix:1;
+ bool pci_2_3:1;
+ bool virq_disabled:1;
+ bool reset_works:1;
+ bool extended_caps:1;
+ bool bardirty:1;
+ bool has_vga:1;
+ bool needs_reset:1;
+ bool nointx:1;
+ bool needs_pm_restore:1;
+ bool pm_intx_masked:1;
+ bool pm_runtime_engaged:1;
struct pci_saved_state *pci_saved_state;
struct pci_saved_state *pm_save;
int ioeventfds_nr;
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index b71276bd7f9160..20c804bdc09c75 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -213,6 +213,7 @@ struct vfio_device_info {
#define VFIO_DEVICE_FLAGS_AP (1 << 5) /* vfio-ap device */
#define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6) /* vfio-fsl-mc device */
#define VFIO_DEVICE_FLAGS_CAPS (1 << 7) /* Info supports caps */
+#define VFIO_DEVICE_FLAGS_CDX (1 << 8) /* vfio-cdx device */
__u32 num_regions; /* Max region index + 1 */
__u32 num_irqs; /* Max IRQ index + 1 */
__u32 cap_offset; /* Offset within info struct of first cap */
@@ -240,6 +241,20 @@ struct vfio_device_info {
#define VFIO_DEVICE_INFO_CAP_ZPCI_UTIL 3
#define VFIO_DEVICE_INFO_CAP_ZPCI_PFIP 4
+/*
+ * The following VFIO_DEVICE_INFO capability reports support for PCIe AtomicOp
+ * completion to the root bus with supported widths provided via flags.
+ */
+#define VFIO_DEVICE_INFO_CAP_PCI_ATOMIC_COMP 5
+struct vfio_device_info_cap_pci_atomic_comp {
+ struct vfio_info_cap_header header;
+ __u32 flags;
+#define VFIO_PCI_ATOMIC_COMP32 (1 << 0)
+#define VFIO_PCI_ATOMIC_COMP64 (1 << 1)
+#define VFIO_PCI_ATOMIC_COMP128 (1 << 2)
+ __u32 reserved;
+};
+
/**
* VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
* struct vfio_region_info)
@@ -511,6 +526,9 @@ struct vfio_region_info_cap_nvlink2_lnkspd {
* then add and unmask vectors, it's up to userspace to make the decision
* whether to allocate the maximum supported number of vectors or tear
* down setup and incrementally increase the vectors as each is enabled.
+ * Absence of the NORESIZE flag indicates that vectors can be enabled
+ * and disabled dynamically without impacting other vectors within the
+ * index.
*/
struct vfio_irq_info {
__u32 argsz;
diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c
index 62dc988df84dea..abe65f8968dd59 100644
--- a/scripts/mod/devicetable-offsets.c
+++ b/scripts/mod/devicetable-offsets.c
@@ -265,6 +265,7 @@ int main(void)
DEVID(cdx_device_id);
DEVID_FIELD(cdx_device_id, vendor);
DEVID_FIELD(cdx_device_id, device);
+ DEVID_FIELD(cdx_device_id, override_only);
return 0;
}
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 28da34ba4359d2..38120f932b0dc1 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -1458,8 +1458,23 @@ static int do_cdx_entry(const char *filename, void *symval,
{
DEF_FIELD(symval, cdx_device_id, vendor);
DEF_FIELD(symval, cdx_device_id, device);
+ DEF_FIELD(symval, cdx_device_id, override_only);
- sprintf(alias, "cdx:v%08Xd%08Xd", vendor, device);
+ switch (override_only) {
+ case 0:
+ strcpy(alias, "cdx:");
+ break;
+ case CDX_ID_F_VFIO_DRIVER_OVERRIDE:
+ strcpy(alias, "vfio_cdx:");
+ break;
+ default:
+ warn("Unknown CDX driver_override alias %08X\n",
+ override_only);
+ return 0;
+ }
+
+ ADD(alias, "v", vendor != CDX_ANY_ID, vendor);
+ ADD(alias, "d", device != CDX_ANY_ID, device);
return 1;
}