From 06f45fe96fcd81531b0bcb2a6115da563ae6dbd6 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 19 Feb 2021 16:40:28 +0100 Subject: xen/events: add per-xenbus device event statistics and settings Add syfs nodes for each xenbus device showing event statistics (number of events and spurious events, number of associated event channels) and for setting a spurious event threshold in case a frontend is sending too many events without being rogue on purpose. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20210219154030.10892-7-jgross@suse.com Signed-off-by: Boris Ostrovsky --- Documentation/ABI/testing/sysfs-devices-xenbus | 41 ++++++++++++++++ drivers/xen/events/events_base.c | 27 ++++++++++- drivers/xen/xenbus/xenbus_probe.c | 66 ++++++++++++++++++++++++++ include/xen/xenbus.h | 7 +++ 4 files changed, 139 insertions(+), 2 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-devices-xenbus diff --git a/Documentation/ABI/testing/sysfs-devices-xenbus b/Documentation/ABI/testing/sysfs-devices-xenbus new file mode 100644 index 00000000000000..fd796cb4f31573 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-devices-xenbus @@ -0,0 +1,41 @@ +What: /sys/devices/*/xenbus/event_channels +Date: February 2021 +Contact: Xen Developers mailing list +Description: + Number of Xen event channels associated with a kernel based + paravirtualized device frontend or backend. + +What: /sys/devices/*/xenbus/events +Date: February 2021 +Contact: Xen Developers mailing list +Description: + Total number of Xen events received for a Xen pv device + frontend or backend. + +What: /sys/devices/*/xenbus/jiffies_eoi_delayed +Date: February 2021 +Contact: Xen Developers mailing list +Description: + Summed up time in jiffies the EOI of an interrupt for a Xen + pv device has been delayed in order to avoid stalls due to + event storms. This value rising is a first sign for a rogue + other end of the pv device. + +What: /sys/devices/*/xenbus/spurious_events +Date: February 2021 +Contact: Xen Developers mailing list +Description: + Number of events received for a Xen pv device which did not + require any action. Too many spurious events in a row will + trigger delayed EOI processing. + +What: /sys/devices/*/xenbus/spurious_threshold +Date: February 2021 +Contact: Xen Developers mailing list +Description: + Controls the tolerated number of subsequent spurious events + before delayed EOI processing is triggered for a Xen pv + device. Default is 1. This can be modified in case the other + end of the pv device is issuing spurious events on a regular + basis and is known not to be malicious on purpose. Raising + the value for such cases can improve pv device performance. diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index b249f2d6b0ccf6..adb7260e94b232 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -323,6 +323,8 @@ static int xen_irq_info_evtchn_setup(unsigned irq, ret = xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0); info->u.interdomain = dev; + if (dev) + atomic_inc(&dev->event_channels); return ret; } @@ -568,18 +570,28 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious) return; if (spurious) { + struct xenbus_device *dev = info->u.interdomain; + unsigned int threshold = 1; + + if (dev && dev->spurious_threshold) + threshold = dev->spurious_threshold; + if ((1 << info->spurious_cnt) < (HZ << 2)) { if (info->spurious_cnt != 0xFF) info->spurious_cnt++; } - if (info->spurious_cnt > 1) { - delay = 1 << (info->spurious_cnt - 2); + if (info->spurious_cnt > threshold) { + delay = 1 << (info->spurious_cnt - 1 - threshold); if (delay > HZ) delay = HZ; if (!info->eoi_time) info->eoi_cpu = smp_processor_id(); info->eoi_time = get_jiffies_64() + delay; + if (dev) + atomic_add(delay, &dev->jiffies_eoi_delayed); } + if (dev) + atomic_inc(&dev->spurious_events); } else { info->spurious_cnt = 0; } @@ -908,6 +920,7 @@ static void __unbind_from_irq(unsigned int irq) if (VALID_EVTCHN(evtchn)) { unsigned int cpu = cpu_from_irq(irq); + struct xenbus_device *dev; xen_evtchn_close(evtchn); @@ -918,6 +931,11 @@ static void __unbind_from_irq(unsigned int irq) case IRQT_IPI: per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1; break; + case IRQT_EVTCHN: + dev = info->u.interdomain; + if (dev) + atomic_dec(&dev->event_channels); + break; default: break; } @@ -1581,6 +1599,7 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl) { int irq; struct irq_info *info; + struct xenbus_device *dev; irq = get_evtchn_to_irq(port); if (irq == -1) @@ -1610,6 +1629,10 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl) info = info_for_irq(irq); + dev = (info->type == IRQT_EVTCHN) ? info->u.interdomain : NULL; + if (dev) + atomic_inc(&dev->events); + if (ctrl->defer_eoi) { info->eoi_cpu = smp_processor_id(); info->irq_epoch = __this_cpu_read(irq_epoch); diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 8a75092bb148b6..97f0d234482df2 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -206,6 +206,65 @@ void xenbus_otherend_changed(struct xenbus_watch *watch, } EXPORT_SYMBOL_GPL(xenbus_otherend_changed); +#define XENBUS_SHOW_STAT(name) \ +static ssize_t show_##name(struct device *_dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + struct xenbus_device *dev = to_xenbus_device(_dev); \ + \ + return sprintf(buf, "%d\n", atomic_read(&dev->name)); \ +} \ +static DEVICE_ATTR(name, 0444, show_##name, NULL) + +XENBUS_SHOW_STAT(event_channels); +XENBUS_SHOW_STAT(events); +XENBUS_SHOW_STAT(spurious_events); +XENBUS_SHOW_STAT(jiffies_eoi_delayed); + +static ssize_t show_spurious_threshold(struct device *_dev, + struct device_attribute *attr, + char *buf) +{ + struct xenbus_device *dev = to_xenbus_device(_dev); + + return sprintf(buf, "%d\n", dev->spurious_threshold); +} + +static ssize_t set_spurious_threshold(struct device *_dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct xenbus_device *dev = to_xenbus_device(_dev); + unsigned int val; + ssize_t ret; + + ret = kstrtouint(buf, 0, &val); + if (ret) + return ret; + + dev->spurious_threshold = val; + + return count; +} + +static DEVICE_ATTR(spurious_threshold, 0644, show_spurious_threshold, + set_spurious_threshold); + +static struct attribute *xenbus_attrs[] = { + &dev_attr_event_channels.attr, + &dev_attr_events.attr, + &dev_attr_spurious_events.attr, + &dev_attr_jiffies_eoi_delayed.attr, + &dev_attr_spurious_threshold.attr, + NULL +}; + +static const struct attribute_group xenbus_group = { + .name = "xenbus", + .attrs = xenbus_attrs, +}; + int xenbus_dev_probe(struct device *_dev) { struct xenbus_device *dev = to_xenbus_device(_dev); @@ -253,6 +312,11 @@ int xenbus_dev_probe(struct device *_dev) return err; } + dev->spurious_threshold = 1; + if (sysfs_create_group(&dev->dev.kobj, &xenbus_group)) + dev_warn(&dev->dev, "sysfs_create_group on %s failed.\n", + dev->nodename); + return 0; fail_put: module_put(drv->driver.owner); @@ -269,6 +333,8 @@ int xenbus_dev_remove(struct device *_dev) DPRINTK("%s", dev->nodename); + sysfs_remove_group(&dev->dev.kobj, &xenbus_group); + free_otherend_watch(dev); if (drv->remove) { diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index bf3cfc7c35d0b5..0b1386073d49bf 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -88,6 +88,13 @@ struct xenbus_device { struct completion down; struct work_struct work; struct semaphore reclaim_sem; + + /* Event channel based statistics and settings. */ + atomic_t event_channels; + atomic_t events; + atomic_t spurious_events; + atomic_t jiffies_eoi_delayed; + unsigned int spurious_threshold; }; static inline struct xenbus_device *to_xenbus_device(struct device *dev) -- cgit 1.2.3-korg From d56699594046d54f32936a1eec337a62c15f931a Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 19 Feb 2021 16:40:29 +0100 Subject: xen/evtchn: use smp barriers for user event ring The ring buffer for user events is local to the given kernel instance, so smp barriers are fine for ensuring consistency. Reported-by: Andrew Cooper Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Link: https://lore.kernel.org/r/20210219154030.10892-8-jgross@suse.com Signed-off-by: Boris Ostrovsky --- drivers/xen/evtchn.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index a7a85719a8c8ab..421382c73d88ee 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -173,7 +173,7 @@ static irqreturn_t evtchn_interrupt(int irq, void *data) if ((u->ring_prod - u->ring_cons) < u->ring_size) { *evtchn_ring_entry(u, u->ring_prod) = evtchn->port; - wmb(); /* Ensure ring contents visible */ + smp_wmb(); /* Ensure ring contents visible */ if (u->ring_cons == u->ring_prod++) { wake_up_interruptible(&u->evtchn_wait); kill_fasync(&u->evtchn_async_queue, @@ -245,7 +245,7 @@ static ssize_t evtchn_read(struct file *file, char __user *buf, } rc = -EFAULT; - rmb(); /* Ensure that we see the port before we copy it. */ + smp_rmb(); /* Ensure that we see the port before we copy it. */ if (copy_to_user(buf, evtchn_ring_entry(u, c), bytes1) || ((bytes2 != 0) && copy_to_user(&buf[bytes1], &u->ring[0], bytes2))) -- cgit 1.2.3-korg From 6977c0b560f190d0d4786f99d9c120126fe654f2 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 19 Feb 2021 16:40:30 +0100 Subject: xen/evtchn: use READ/WRITE_ONCE() for accessing ring indices For avoiding read- and write-tearing by the compiler use READ_ONCE() and WRITE_ONCE() for accessing the ring indices in evtchn.c. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20210219154030.10892-9-jgross@suse.com Signed-off-by: Boris Ostrovsky --- drivers/xen/evtchn.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 421382c73d88ee..c99415a7005136 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -162,6 +162,7 @@ static irqreturn_t evtchn_interrupt(int irq, void *data) { struct user_evtchn *evtchn = data; struct per_user_data *u = evtchn->user; + unsigned int prod, cons; WARN(!evtchn->enabled, "Interrupt for port %u, but apparently not enabled; per-user %p\n", @@ -171,10 +172,14 @@ static irqreturn_t evtchn_interrupt(int irq, void *data) spin_lock(&u->ring_prod_lock); - if ((u->ring_prod - u->ring_cons) < u->ring_size) { - *evtchn_ring_entry(u, u->ring_prod) = evtchn->port; + prod = READ_ONCE(u->ring_prod); + cons = READ_ONCE(u->ring_cons); + + if ((prod - cons) < u->ring_size) { + *evtchn_ring_entry(u, prod) = evtchn->port; smp_wmb(); /* Ensure ring contents visible */ - if (u->ring_cons == u->ring_prod++) { + WRITE_ONCE(u->ring_prod, prod + 1); + if (cons == prod) { wake_up_interruptible(&u->evtchn_wait); kill_fasync(&u->evtchn_async_queue, SIGIO, POLL_IN); @@ -210,8 +215,8 @@ static ssize_t evtchn_read(struct file *file, char __user *buf, if (u->ring_overflow) goto unlock_out; - c = u->ring_cons; - p = u->ring_prod; + c = READ_ONCE(u->ring_cons); + p = READ_ONCE(u->ring_prod); if (c != p) break; @@ -221,7 +226,7 @@ static ssize_t evtchn_read(struct file *file, char __user *buf, return -EAGAIN; rc = wait_event_interruptible(u->evtchn_wait, - u->ring_cons != u->ring_prod); + READ_ONCE(u->ring_cons) != READ_ONCE(u->ring_prod)); if (rc) return rc; } @@ -251,7 +256,7 @@ static ssize_t evtchn_read(struct file *file, char __user *buf, copy_to_user(&buf[bytes1], &u->ring[0], bytes2))) goto unlock_out; - u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t); + WRITE_ONCE(u->ring_cons, c + (bytes1 + bytes2) / sizeof(evtchn_port_t)); rc = bytes1 + bytes2; unlock_out: @@ -552,7 +557,9 @@ static long evtchn_ioctl(struct file *file, /* Initialise the ring to empty. Clear errors. */ mutex_lock(&u->ring_cons_mutex); spin_lock_irq(&u->ring_prod_lock); - u->ring_cons = u->ring_prod = u->ring_overflow = 0; + WRITE_ONCE(u->ring_cons, 0); + WRITE_ONCE(u->ring_prod, 0); + u->ring_overflow = 0; spin_unlock_irq(&u->ring_prod_lock); mutex_unlock(&u->ring_cons_mutex); rc = 0; @@ -595,7 +602,7 @@ static __poll_t evtchn_poll(struct file *file, poll_table *wait) struct per_user_data *u = file->private_data; poll_wait(file, &u->evtchn_wait, wait); - if (u->ring_cons != u->ring_prod) + if (READ_ONCE(u->ring_cons) != READ_ONCE(u->ring_prod)) mask |= EPOLLIN | EPOLLRDNORM; if (u->ring_overflow) mask = EPOLLERR; -- cgit 1.2.3-korg From 43135df0d7f0a66c75143a1e95ed70a2005ca329 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 10 Feb 2021 15:46:18 -0800 Subject: xen: Replace lkml.org links with lore As started by commit 05a5f51ca566 ("Documentation: Replace lkml.org links with lore"), replace lkml.org links with lore to better use a single source that's more likely to stay available long-term. Signed-off-by: Kees Cook Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20210210234618.2734785-1-keescook@chromium.org Signed-off-by: Boris Ostrovsky --- drivers/xen/xen-acpi-processor.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c index ce8ffb595a4684..df7cab870be5a5 100644 --- a/drivers/xen/xen-acpi-processor.c +++ b/drivers/xen/xen-acpi-processor.c @@ -3,7 +3,8 @@ * Copyright 2012 by Oracle Inc * Author: Konrad Rzeszutek Wilk * - * This code borrows ideas from https://lkml.org/lkml/2011/11/30/249 + * This code borrows ideas from + * https://lore.kernel.org/lkml/1322673664-14642-6-git-send-email-konrad.wilk@oracle.com * so many thanks go to Kevin Tian * and Yu Ke . */ -- cgit 1.2.3-korg From 53f131c284e83c29c227c0938926a82b2ed4d7ba Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 23 Feb 2021 17:26:21 +0100 Subject: xen-front-pgdir-shbuf: don't record wrong grant handle upon error In order for subsequent unmapping to not mistakenly unmap handle 0, record a perceived always-invalid one instead. Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Reviewed-by: Oleksandr Andrushchenko Link: https://lore.kernel.org/r/82414b0f-1b63-5509-7c1d-5bcc8239a3de@suse.com Signed-off-by: Boris Ostrovsky --- drivers/xen/xen-front-pgdir-shbuf.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/xen/xen-front-pgdir-shbuf.c b/drivers/xen/xen-front-pgdir-shbuf.c index 48a658dc7ccfce..81b6e13fa5ecb1 100644 --- a/drivers/xen/xen-front-pgdir-shbuf.c +++ b/drivers/xen/xen-front-pgdir-shbuf.c @@ -305,11 +305,18 @@ static int backend_map(struct xen_front_pgdir_shbuf *buf) /* Save handles even if error, so we can unmap. */ for (cur_page = 0; cur_page < buf->num_pages; cur_page++) { - buf->backend_map_handles[cur_page] = map_ops[cur_page].handle; - if (unlikely(map_ops[cur_page].status != GNTST_okay)) + if (likely(map_ops[cur_page].status == GNTST_okay)) { + buf->backend_map_handles[cur_page] = + map_ops[cur_page].handle; + } else { + buf->backend_map_handles[cur_page] = + INVALID_GRANT_HANDLE; + if (!ret) + ret = -ENXIO; dev_err(&buf->xb_dev->dev, "Failed to map page %d: %d\n", cur_page, map_ops[cur_page].status); + } } if (ret) { -- cgit 1.2.3-korg