#include "kvm/kvm.h" #include "kvm/kvm-cpu.h" #include "kvm/rbtree-interval.h" #include "kvm/mutex.h" #include #include #include #include #include #include #include #include #define mmio_node(n) rb_entry(n, struct mmio_mapping, node) static DEFINE_MUTEX(mmio_lock); struct mmio_mapping { struct rb_int_node node; mmio_handler_fn mmio_fn; void *ptr; u32 refcount; bool remove; }; static struct rb_root mmio_tree = RB_ROOT; static struct rb_root pio_tree = RB_ROOT; static struct mmio_mapping *mmio_search(struct rb_root *root, u64 addr, u64 len) { struct rb_int_node *node; /* If len is zero or if there's an overflow, the MMIO op is invalid. */ if (addr + len <= addr) return NULL; node = rb_int_search_range(root, addr, addr + len); if (node == NULL) return NULL; return mmio_node(node); } /* Find lowest match, Check for overlap */ static struct mmio_mapping *mmio_search_single(struct rb_root *root, u64 addr) { struct rb_int_node *node; node = rb_int_search_single(root, addr); if (node == NULL) return NULL; return mmio_node(node); } static int mmio_insert(struct rb_root *root, struct mmio_mapping *data) { return rb_int_insert(root, &data->node); } static void mmio_remove(struct rb_root *root, struct mmio_mapping *data) { rb_int_erase(root, &data->node); } static const char *to_direction(u8 is_write) { if (is_write) return "write"; return "read"; } static struct mmio_mapping *mmio_get(struct rb_root *root, u64 phys_addr, u32 len) { struct mmio_mapping *mmio; mutex_lock(&mmio_lock); mmio = mmio_search(root, phys_addr, len); if (mmio) mmio->refcount++; mutex_unlock(&mmio_lock); return mmio; } /* Called with mmio_lock held. */ static void mmio_deregister(struct kvm *kvm, struct rb_root *root, struct mmio_mapping *mmio) { struct kvm_coalesced_mmio_zone zone = (struct kvm_coalesced_mmio_zone) { .addr = rb_int_start(&mmio->node), .size = 1, }; ioctl(kvm->vm_fd, KVM_UNREGISTER_COALESCED_MMIO, &zone); mmio_remove(root, mmio); free(mmio); } static void mmio_put(struct kvm *kvm, struct rb_root *root, struct mmio_mapping *mmio) { mutex_lock(&mmio_lock); mmio->refcount--; if (mmio->remove && mmio->refcount == 0) mmio_deregister(kvm, root, mmio); mutex_unlock(&mmio_lock); } static bool trap_is_mmio(unsigned int flags) { return (flags & IOTRAP_BUS_MASK) == DEVICE_BUS_MMIO; } int kvm__register_iotrap(struct kvm *kvm, u64 phys_addr, u64 phys_addr_len, mmio_handler_fn mmio_fn, void *ptr, unsigned int flags) { struct mmio_mapping *mmio; struct kvm_coalesced_mmio_zone zone; int ret; mmio = malloc(sizeof(*mmio)); if (mmio == NULL) return -ENOMEM; *mmio = (struct mmio_mapping) { .node = RB_INT_INIT(phys_addr, phys_addr + phys_addr_len), .mmio_fn = mmio_fn, .ptr = ptr, /* * Start from 0 because kvm__deregister_mmio() doesn't decrement * the reference count. */ .refcount = 0, .remove = false, }; if (trap_is_mmio(flags) && (flags & IOTRAP_COALESCE)) { zone = (struct kvm_coalesced_mmio_zone) { .addr = phys_addr, .size = phys_addr_len, }; ret = ioctl(kvm->vm_fd, KVM_REGISTER_COALESCED_MMIO, &zone); if (ret < 0) { free(mmio); return -errno; } } mutex_lock(&mmio_lock); if (trap_is_mmio(flags)) ret = mmio_insert(&mmio_tree, mmio); else ret = mmio_insert(&pio_tree, mmio); mutex_unlock(&mmio_lock); return ret; } bool kvm__deregister_iotrap(struct kvm *kvm, u64 phys_addr, unsigned int flags) { struct mmio_mapping *mmio; struct rb_root *tree; if (trap_is_mmio(flags)) tree = &mmio_tree; else tree = &pio_tree; mutex_lock(&mmio_lock); mmio = mmio_search_single(tree, phys_addr); if (mmio == NULL) { mutex_unlock(&mmio_lock); return false; } /* * The PCI emulation code calls this function when memory access is * disabled for a device, or when a BAR has a new address assigned. PCI * emulation doesn't use any locks and as a result we can end up in a * situation where we have called mmio_get() to do emulation on one VCPU * thread (let's call it VCPU0), and several other VCPU threads have * called kvm__deregister_mmio(). In this case, if we decrement refcount * kvm__deregister_mmio() (either directly, or by calling mmio_put()), * refcount will reach 0 and we will free the mmio node before VCPU0 has * called mmio_put(). This will trigger use-after-free errors on VCPU0. */ if (mmio->refcount == 0) mmio_deregister(kvm, tree, mmio); else mmio->remove = true; mutex_unlock(&mmio_lock); return true; } bool kvm__emulate_mmio(struct kvm_cpu *vcpu, u64 phys_addr, u8 *data, u32 len, u8 is_write) { struct mmio_mapping *mmio; mmio = mmio_get(&mmio_tree, phys_addr, len); if (!mmio) { if (vcpu->kvm->cfg.mmio_debug) fprintf(stderr, "Warning: Ignoring MMIO %s at %016llx (length %u)\n", to_direction(is_write), (unsigned long long)phys_addr, len); goto out; } mmio->mmio_fn(vcpu, phys_addr, data, len, is_write, mmio->ptr); mmio_put(vcpu->kvm, &mmio_tree, mmio); out: return true; } bool kvm__emulate_io(struct kvm_cpu *vcpu, u16 port, void *data, int direction, int size, u32 count) { struct mmio_mapping *mmio; bool is_write = direction == KVM_EXIT_IO_OUT; mmio = mmio_get(&pio_tree, port, size); if (!mmio) { if (vcpu->kvm->cfg.ioport_debug) { fprintf(stderr, "IO error: %s port=%x, size=%d, count=%u\n", to_direction(direction), port, size, count); return false; } return true; } while (count--) { mmio->mmio_fn(vcpu, port, data, size, is_write, mmio->ptr); data += size; } mmio_put(vcpu->kvm, &pio_tree, mmio); return true; }