aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/kvm/pci.h14
-rw-r--r--pci.c250
-rw-r--r--vfio/pci.c12
3 files changed, 227 insertions, 49 deletions
diff --git a/include/kvm/pci.h b/include/kvm/pci.h
index 73e06d76..bf81323d 100644
--- a/include/kvm/pci.h
+++ b/include/kvm/pci.h
@@ -11,6 +11,17 @@
#include "kvm/msi.h"
#include "kvm/fdt.h"
+#define pci_dev_err(pci_hdr, fmt, ...) \
+ pr_err("[%04x:%04x] " fmt, pci_hdr->vendor_id, pci_hdr->device_id, ##__VA_ARGS__)
+#define pci_dev_warn(pci_hdr, fmt, ...) \
+ pr_warning("[%04x:%04x] " fmt, pci_hdr->vendor_id, pci_hdr->device_id, ##__VA_ARGS__)
+#define pci_dev_info(pci_hdr, fmt, ...) \
+ pr_info("[%04x:%04x] " fmt, pci_hdr->vendor_id, pci_hdr->device_id, ##__VA_ARGS__)
+#define pci_dev_dbg(pci_hdr, fmt, ...) \
+ pr_debug("[%04x:%04x] " fmt, pci_hdr->vendor_id, pci_hdr->device_id, ##__VA_ARGS__)
+#define pci_dev_die(pci_hdr, fmt, ...) \
+ die("[%04x:%04x] " fmt, pci_hdr->vendor_id, pci_hdr->device_id, ##__VA_ARGS__)
+
/*
* PCI Configuration Mechanism #1 I/O ports. See Section 3.7.4.1.
* ("Configuration Mechanism #1") of the PCI Local Bus Specification 2.1 for
@@ -142,7 +153,8 @@ struct pci_device_header {
};
/* Private to lkvm */
- u32 bar_size[6];
+ u32 bar_size[6];
+ bool bar_active[6];
bar_activate_fn_t bar_activate_fn;
bar_deactivate_fn_t bar_deactivate_fn;
void *data;
diff --git a/pci.c b/pci.c
index 96239160..2e2c0270 100644
--- a/pci.c
+++ b/pci.c
@@ -71,6 +71,11 @@ static bool pci_bar_is_implemented(struct pci_device_header *pci_hdr, int bar_nu
return pci__bar_size(pci_hdr, bar_num);
}
+static bool pci_bar_is_active(struct pci_device_header *pci_hdr, int bar_num)
+{
+ return pci_hdr->bar_active[bar_num];
+}
+
static void *pci_config_address_ptr(u16 port)
{
unsigned long offset;
@@ -163,6 +168,46 @@ static struct ioport_operations pci_config_data_ops = {
.io_out = pci_config_data_out,
};
+static int pci_activate_bar(struct kvm *kvm, struct pci_device_header *pci_hdr,
+ int bar_num)
+{
+ int r = 0;
+
+ if (pci_bar_is_active(pci_hdr, bar_num))
+ goto out;
+
+ r = pci_hdr->bar_activate_fn(kvm, pci_hdr, bar_num, pci_hdr->data);
+ if (r < 0) {
+ pci_dev_warn(pci_hdr, "Error activating emulation for BAR %d",
+ bar_num);
+ goto out;
+ }
+ pci_hdr->bar_active[bar_num] = true;
+
+out:
+ return r;
+}
+
+static int pci_deactivate_bar(struct kvm *kvm, struct pci_device_header *pci_hdr,
+ int bar_num)
+{
+ int r = 0;
+
+ if (!pci_bar_is_active(pci_hdr, bar_num))
+ goto out;
+
+ r = pci_hdr->bar_deactivate_fn(kvm, pci_hdr, bar_num, pci_hdr->data);
+ if (r < 0) {
+ pci_dev_warn(pci_hdr, "Error deactivating emulation for BAR %d",
+ bar_num);
+ goto out;
+ }
+ pci_hdr->bar_active[bar_num] = false;
+
+out:
+ return r;
+}
+
static void pci_config_command_wr(struct kvm *kvm,
struct pci_device_header *pci_hdr,
u16 new_command)
@@ -179,26 +224,167 @@ static void pci_config_command_wr(struct kvm *kvm,
if (toggle_io && pci__bar_is_io(pci_hdr, i)) {
if (__pci__io_space_enabled(new_command))
- pci_hdr->bar_activate_fn(kvm, pci_hdr, i,
- pci_hdr->data);
+ pci_activate_bar(kvm, pci_hdr, i);
else
- pci_hdr->bar_deactivate_fn(kvm, pci_hdr, i,
- pci_hdr->data);
+ pci_deactivate_bar(kvm, pci_hdr, i);
}
if (toggle_mem && pci__bar_is_memory(pci_hdr, i)) {
if (__pci__memory_space_enabled(new_command))
- pci_hdr->bar_activate_fn(kvm, pci_hdr, i,
- pci_hdr->data);
+ pci_activate_bar(kvm, pci_hdr, i);
else
- pci_hdr->bar_deactivate_fn(kvm, pci_hdr, i,
- pci_hdr->data);
+ pci_deactivate_bar(kvm, pci_hdr, i);
}
}
pci_hdr->command = new_command;
}
+static int pci_toggle_bar_regions(bool activate, struct kvm *kvm, u32 start, u32 size)
+{
+ struct device_header *dev_hdr;
+ struct pci_device_header *tmp_hdr;
+ u32 tmp_start, tmp_size;
+ int i, r;
+
+ dev_hdr = device__first_dev(DEVICE_BUS_PCI);
+ while (dev_hdr) {
+ tmp_hdr = dev_hdr->data;
+ for (i = 0; i < 6; i++) {
+ if (!pci_bar_is_implemented(tmp_hdr, i))
+ continue;
+
+ tmp_start = pci__bar_address(tmp_hdr, i);
+ tmp_size = pci__bar_size(tmp_hdr, i);
+ if (tmp_start + tmp_size <= start ||
+ tmp_start >= start + size)
+ continue;
+
+ if (activate)
+ r = pci_activate_bar(kvm, tmp_hdr, i);
+ else
+ r = pci_deactivate_bar(kvm, tmp_hdr, i);
+ if (r < 0)
+ return r;
+ }
+ dev_hdr = device__next_dev(dev_hdr);
+ }
+
+ return 0;
+}
+
+static inline int pci_activate_bar_regions(struct kvm *kvm, u32 start, u32 size)
+{
+ return pci_toggle_bar_regions(true, kvm, start, size);
+}
+
+static inline int pci_deactivate_bar_regions(struct kvm *kvm, u32 start, u32 size)
+{
+ return pci_toggle_bar_regions(false, kvm, start, size);
+}
+
+static void pci_config_bar_wr(struct kvm *kvm,
+ struct pci_device_header *pci_hdr, int bar_num,
+ u32 value)
+{
+ u32 old_addr, new_addr, bar_size;
+ u32 mask;
+ int r;
+
+ if (pci__bar_is_io(pci_hdr, bar_num))
+ mask = (u32)PCI_BASE_ADDRESS_IO_MASK;
+ else
+ mask = (u32)PCI_BASE_ADDRESS_MEM_MASK;
+
+ /*
+ * If the kernel masks the BAR, it will expect to find the size of the
+ * BAR there next time it reads from it. After the kernel reads the
+ * size, it will write the address back.
+ *
+ * According to the PCI local bus specification REV 3.0: The number of
+ * upper bits that a device actually implements depends on how much of
+ * the address space the device will respond to. A device that wants a 1
+ * MB memory address space (using a 32-bit base address register) would
+ * build the top 12 bits of the address register, hardwiring the other
+ * bits to 0.
+ *
+ * Furthermore, software can determine how much address space the device
+ * requires by writing a value of all 1's to the register and then
+ * reading the value back. The device will return 0's in all don't-care
+ * address bits, effectively specifying the address space required.
+ *
+ * Software computes the size of the address space with the formula
+ * S = ~B + 1, where S is the memory size and B is the value read from
+ * the BAR. This means that the BAR value that kvmtool should return is
+ * B = ~(S - 1).
+ */
+ if (value == 0xffffffff) {
+ value = ~(pci__bar_size(pci_hdr, bar_num) - 1);
+ /* Preserve the special bits. */
+ value = (value & mask) | (pci_hdr->bar[bar_num] & ~mask);
+ pci_hdr->bar[bar_num] = value;
+ return;
+ }
+
+ value = (value & mask) | (pci_hdr->bar[bar_num] & ~mask);
+
+ /* Don't toggle emulation when region type access is disbled. */
+ if (pci__bar_is_io(pci_hdr, bar_num) &&
+ !pci__io_space_enabled(pci_hdr)) {
+ pci_hdr->bar[bar_num] = value;
+ return;
+ }
+
+ if (pci__bar_is_memory(pci_hdr, bar_num) &&
+ !pci__memory_space_enabled(pci_hdr)) {
+ pci_hdr->bar[bar_num] = value;
+ return;
+ }
+
+ /*
+ * BAR reassignment can be done while device access is enabled and
+ * memory regions for different devices can overlap as long as no access
+ * is made to the overlapping memory regions. To implement BAR
+ * reasignment, we deactivate emulation for the region described by the
+ * BAR value that the guest is changing, we disable emulation for the
+ * regions that overlap with the new one (by scanning through all PCI
+ * devices), we enable emulation for the new BAR value and finally we
+ * enable emulation for all device regions that were overlapping with
+ * the old value.
+ */
+ old_addr = pci__bar_address(pci_hdr, bar_num);
+ new_addr = __pci__bar_address(value);
+ bar_size = pci__bar_size(pci_hdr, bar_num);
+
+ r = pci_deactivate_bar(kvm, pci_hdr, bar_num);
+ if (r < 0)
+ return;
+
+ r = pci_deactivate_bar_regions(kvm, new_addr, bar_size);
+ if (r < 0) {
+ /*
+ * We cannot update the BAR because of an overlapping region
+ * that failed to deactivate emulation, so keep the old BAR
+ * value and re-activate emulation for it.
+ */
+ pci_activate_bar(kvm, pci_hdr, bar_num);
+ return;
+ }
+
+ pci_hdr->bar[bar_num] = value;
+ r = pci_activate_bar(kvm, pci_hdr, bar_num);
+ if (r < 0) {
+ /*
+ * New region cannot be emulated, re-enable the regions that
+ * were overlapping.
+ */
+ pci_activate_bar_regions(kvm, new_addr, bar_size);
+ return;
+ }
+
+ pci_activate_bar_regions(kvm, old_addr, bar_size);
+}
+
void pci__config_wr(struct kvm *kvm, union pci_config_address addr, void *data, int size)
{
void *base;
@@ -206,7 +392,6 @@ void pci__config_wr(struct kvm *kvm, union pci_config_address addr, void *data,
struct pci_device_header *pci_hdr;
u8 dev_num = addr.device_number;
u32 value = 0;
- u32 mask;
if (!pci_device_exists(addr.bus_number, dev_num, 0))
return;
@@ -231,46 +416,13 @@ void pci__config_wr(struct kvm *kvm, union pci_config_address addr, void *data,
}
bar = (offset - PCI_BAR_OFFSET(0)) / sizeof(u32);
-
- /*
- * If the kernel masks the BAR, it will expect to find the size of the
- * BAR there next time it reads from it. After the kernel reads the
- * size, it will write the address back.
- */
if (bar < 6) {
- if (pci__bar_is_io(pci_hdr, bar))
- mask = (u32)PCI_BASE_ADDRESS_IO_MASK;
- else
- mask = (u32)PCI_BASE_ADDRESS_MEM_MASK;
- /*
- * According to the PCI local bus specification REV 3.0:
- * The number of upper bits that a device actually implements
- * depends on how much of the address space the device will
- * respond to. A device that wants a 1 MB memory address space
- * (using a 32-bit base address register) would build the top
- * 12 bits of the address register, hardwiring the other bits
- * to 0.
- *
- * Furthermore, software can determine how much address space
- * the device requires by writing a value of all 1's to the
- * register and then reading the value back. The device will
- * return 0's in all don't-care address bits, effectively
- * specifying the address space required.
- *
- * Software computes the size of the address space with the
- * formula S = ~B + 1, where S is the memory size and B is the
- * value read from the BAR. This means that the BAR value that
- * kvmtool should return is B = ~(S - 1).
- */
memcpy(&value, data, size);
- if (value == 0xffffffff)
- value = ~(pci__bar_size(pci_hdr, bar) - 1);
- /* Preserve the special bits. */
- value = (value & mask) | (pci_hdr->bar[bar] & ~mask);
- memcpy(base + offset, &value, size);
- } else {
- memcpy(base + offset, data, size);
+ pci_config_bar_wr(kvm, pci_hdr, bar, value);
+ return;
}
+
+ memcpy(base + offset, data, size);
}
void pci__config_rd(struct kvm *kvm, union pci_config_address addr, void *data, int size)
@@ -336,16 +488,18 @@ int pci__register_bar_regions(struct kvm *kvm, struct pci_device_header *pci_hdr
if (!pci_bar_is_implemented(pci_hdr, i))
continue;
+ assert(!pci_bar_is_active(pci_hdr, i));
+
if (pci__bar_is_io(pci_hdr, i) &&
pci__io_space_enabled(pci_hdr)) {
- r = bar_activate_fn(kvm, pci_hdr, i, data);
+ r = pci_activate_bar(kvm, pci_hdr, i);
if (r < 0)
return r;
}
if (pci__bar_is_memory(pci_hdr, i) &&
pci__memory_space_enabled(pci_hdr)) {
- r = bar_activate_fn(kvm, pci_hdr, i, data);
+ r = pci_activate_bar(kvm, pci_hdr, i);
if (r < 0)
return r;
}
diff --git a/vfio/pci.c b/vfio/pci.c
index 34f19792..49ecd12a 100644
--- a/vfio/pci.c
+++ b/vfio/pci.c
@@ -467,6 +467,7 @@ static int vfio_pci_bar_activate(struct kvm *kvm,
struct vfio_pci_msix_pba *pba = &pdev->msix_pba;
struct vfio_pci_msix_table *table = &pdev->msix_table;
struct vfio_region *region;
+ u32 bar_addr;
bool has_msix;
int ret;
@@ -475,7 +476,14 @@ static int vfio_pci_bar_activate(struct kvm *kvm,
region = &vdev->regions[bar_num];
has_msix = pdev->irq_modes & VFIO_PCI_IRQ_MODE_MSIX;
+ bar_addr = pci__bar_address(pci_hdr, bar_num);
+ if (pci__bar_is_io(pci_hdr, bar_num))
+ region->port_base = bar_addr;
+ else
+ region->guest_phys_addr = bar_addr;
+
if (has_msix && (u32)bar_num == table->bar) {
+ table->guest_phys_addr = region->guest_phys_addr;
ret = kvm__register_mmio(kvm, table->guest_phys_addr,
table->size, false,
vfio_pci_msix_table_access, pdev);
@@ -490,6 +498,10 @@ static int vfio_pci_bar_activate(struct kvm *kvm,
}
if (has_msix && (u32)bar_num == pba->bar) {
+ if (pba->bar == table->bar)
+ pba->guest_phys_addr = table->guest_phys_addr + table->size;
+ else
+ pba->guest_phys_addr = region->guest_phys_addr;
ret = kvm__register_mmio(kvm, pba->guest_phys_addr,
pba->size, false,
vfio_pci_msix_pba_access, pdev);