aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJean-Philippe Brucker <jean-philippe.brucker@arm.com>2019-04-04 14:20:49 +0100
committerWill Deacon <will.deacon@arm.com>2019-04-26 14:57:59 +0100
commit2790307c3d6d2aad838e1eef76ec0aa21b110698 (patch)
treec74a0e0ba62f83d29ef7852d4b79ab53e58c3e96
parenta839180edd2b52be9fb3aa2b51b44e2fafcd65a8 (diff)
downloadkvmtool-2790307c3d6d2aad838e1eef76ec0aa21b110698.tar.gz
disk/aio: Add wait() disk operation
Add a call into the disk layer to synchronize the AIO queue. Wait for all pending requests to complete. This will be necessary when resetting a virtqueue. The wait() operation isn't the same as flush(). A VIRTIO_BLK_T_FLUSH request ensures that any write request *that completed before the FLUSH is sent* is committed to permanent storage (e.g. written back from a write cache). But it doesn't do anything for requests that are still pending when the FLUSH is sent. Avoid introducing a mutex on the io_submit() and io_getevents() paths, because it can lead to 30% throughput drop on heavy FIO jobs. Instead manage an inflight counter using compare-and-swap operations, which is simple enough as the caller doesn't submit new requests while it waits for the AIO queue to drain. The __sync_fetch_and_* operations are a bit rough since they use full barriers, but that didn't seem to introduce a performance regression. Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com> Signed-off-by: Will Deacon <will.deacon@arm.com>
-rw-r--r--disk/aio.c82
-rw-r--r--disk/blk.c1
-rw-r--r--disk/core.c8
-rw-r--r--disk/raw.c2
-rw-r--r--include/kvm/disk-image.h9
5 files changed, 70 insertions, 32 deletions
diff --git a/disk/aio.c b/disk/aio.c
index 277ddf7c..a7418c8c 100644
--- a/disk/aio.c
+++ b/disk/aio.c
@@ -2,45 +2,31 @@
#include <pthread.h>
#include <sys/eventfd.h>
+#include "kvm/brlock.h"
#include "kvm/disk-image.h"
#include "kvm/kvm.h"
#include "linux/list.h"
#define AIO_MAX 256
-static int aio_pwritev(io_context_t ctx, struct iocb *iocb, int fd,
- const struct iovec *iov, int iovcnt, off_t offset,
- int ev, void *param)
+static int aio_submit(struct disk_image *disk, int nr, struct iocb **ios)
{
- struct iocb *ios[1] = { iocb };
int ret;
- io_prep_pwritev(iocb, fd, iov, iovcnt, offset);
- io_set_eventfd(iocb, ev);
- iocb->data = param;
-
+ __sync_fetch_and_add(&disk->aio_inflight, nr);
+ /*
+ * A wmb() is needed here, to ensure disk_aio_thread() sees this
+ * increase after receiving the events. It is included in the
+ * __sync_fetch_and_add (as a full barrier).
+ */
restart:
- ret = io_submit(ctx, 1, ios);
+ ret = io_submit(disk->ctx, nr, ios);
if (ret == -EAGAIN)
goto restart;
- return ret;
-}
-
-static int aio_preadv(io_context_t ctx, struct iocb *iocb, int fd,
- const struct iovec *iov, int iovcnt, off_t offset,
- int ev, void *param)
-{
- struct iocb *ios[1] = { iocb };
- int ret;
+ else if (ret <= 0)
+ /* disk_aio_thread() is never going to see those */
+ __sync_fetch_and_sub(&disk->aio_inflight, nr);
- io_prep_preadv(iocb, fd, iov, iovcnt, offset);
- io_set_eventfd(iocb, ev);
- iocb->data = param;
-
-restart:
- ret = io_submit(ctx, 1, ios);
- if (ret == -EAGAIN)
- goto restart;
return ret;
}
@@ -48,22 +34,49 @@ ssize_t raw_image__read_async(struct disk_image *disk, u64 sector,
const struct iovec *iov, int iovcount,
void *param)
{
- u64 offset = sector << SECTOR_SHIFT;
struct iocb iocb;
+ u64 offset = sector << SECTOR_SHIFT;
+ struct iocb *ios[1] = { &iocb };
- return aio_preadv(disk->ctx, &iocb, disk->fd, iov, iovcount,
- offset, disk->evt, param);
+ io_prep_preadv(&iocb, disk->fd, iov, iovcount, offset);
+ io_set_eventfd(&iocb, disk->evt);
+ iocb.data = param;
+
+ return aio_submit(disk, 1, ios);
}
ssize_t raw_image__write_async(struct disk_image *disk, u64 sector,
const struct iovec *iov, int iovcount,
void *param)
{
- u64 offset = sector << SECTOR_SHIFT;
struct iocb iocb;
+ u64 offset = sector << SECTOR_SHIFT;
+ struct iocb *ios[1] = { &iocb };
+
+ io_prep_pwritev(&iocb, disk->fd, iov, iovcount, offset);
+ io_set_eventfd(&iocb, disk->evt);
+ iocb.data = param;
+
+ return aio_submit(disk, 1, ios);
+}
- return aio_pwritev(disk->ctx, &iocb, disk->fd, iov, iovcount,
- offset, disk->evt, param);
+/*
+ * When this function returns there are no in-flight I/O. Caller ensures that
+ * io_submit() isn't called concurrently.
+ *
+ * Returns an inaccurate number of I/O that was in-flight when the function was
+ * called.
+ */
+int raw_image__wait(struct disk_image *disk)
+{
+ u64 inflight = disk->aio_inflight;
+
+ while (disk->aio_inflight) {
+ usleep(100);
+ barrier();
+ }
+
+ return inflight;
}
static int disk_aio_get_events(struct disk_image *disk)
@@ -76,6 +89,11 @@ static int disk_aio_get_events(struct disk_image *disk)
nr = io_getevents(disk->ctx, 1, ARRAY_SIZE(event), event, &notime);
for (i = 0; i < nr; i++)
disk->disk_req_cb(event[i].data, event[i].res);
+
+ /* Pairs with wmb() in aio_submit() */
+ rmb();
+ __sync_fetch_and_sub(&disk->aio_inflight, nr);
+
} while (nr > 0);
return 0;
diff --git a/disk/blk.c b/disk/blk.c
index 48922e02..b4c9fba3 100644
--- a/disk/blk.c
+++ b/disk/blk.c
@@ -9,6 +9,7 @@
static struct disk_image_operations blk_dev_ops = {
.read = raw_image__read,
.write = raw_image__write,
+ .wait = raw_image__wait,
.async = true,
};
diff --git a/disk/core.c b/disk/core.c
index 89880703..8d95c98e 100644
--- a/disk/core.c
+++ b/disk/core.c
@@ -201,6 +201,14 @@ error:
return err;
}
+int disk_image__wait(struct disk_image *disk)
+{
+ if (disk->ops->wait)
+ return disk->ops->wait(disk);
+
+ return 0;
+}
+
int disk_image__flush(struct disk_image *disk)
{
if (disk->ops->flush)
diff --git a/disk/raw.c b/disk/raw.c
index e869d6cc..54b4e740 100644
--- a/disk/raw.c
+++ b/disk/raw.c
@@ -67,6 +67,7 @@ int raw_image__close(struct disk_image *disk)
static struct disk_image_operations raw_image_regular_ops = {
.read = raw_image__read,
.write = raw_image__write,
+ .wait = raw_image__wait,
.async = true,
};
@@ -78,6 +79,7 @@ struct disk_image_operations ro_ops = {
struct disk_image_operations ro_ops_nowrite = {
.read = raw_image__read,
+ .wait = raw_image__wait,
.async = true,
};
diff --git a/include/kvm/disk-image.h b/include/kvm/disk-image.h
index 2275e234..27d4f7da 100644
--- a/include/kvm/disk-image.h
+++ b/include/kvm/disk-image.h
@@ -41,6 +41,7 @@ struct disk_image_operations {
ssize_t (*write)(struct disk_image *disk, u64 sector, const struct iovec *iov,
int iovcount, void *param);
int (*flush)(struct disk_image *disk);
+ int (*wait)(struct disk_image *disk);
int (*close)(struct disk_image *disk);
bool async;
};
@@ -70,6 +71,7 @@ struct disk_image {
io_context_t ctx;
int evt;
pthread_t thread;
+ u64 aio_inflight;
#endif /* CONFIG_HAS_AIO */
const char *wwpn;
const char *tpgt;
@@ -81,6 +83,7 @@ int disk_image__init(struct kvm *kvm);
int disk_image__exit(struct kvm *kvm);
struct disk_image *disk_image__new(int fd, u64 size, struct disk_image_operations *ops, int mmap);
int disk_image__flush(struct disk_image *disk);
+int disk_image__wait(struct disk_image *disk);
ssize_t disk_image__read(struct disk_image *disk, u64 sector, const struct iovec *iov,
int iovcount, void *param);
ssize_t disk_image__write(struct disk_image *disk, u64 sector, const struct iovec *iov,
@@ -108,6 +111,7 @@ ssize_t raw_image__read_async(struct disk_image *disk, u64 sector,
const struct iovec *iov, int iovcount, void *param);
ssize_t raw_image__write_async(struct disk_image *disk, u64 sector,
const struct iovec *iov, int iovcount, void *param);
+int raw_image__wait(struct disk_image *disk);
#define raw_image__read raw_image__read_async
#define raw_image__write raw_image__write_async
@@ -121,6 +125,11 @@ static inline int disk_aio_setup(struct disk_image *disk)
static inline void disk_aio_destroy(struct disk_image *disk)
{
}
+
+static inline int raw_image__wait(struct disk_image *disk)
+{
+ return 0;
+}
#define raw_image__read raw_image__read_sync
#define raw_image__write raw_image__write_sync
#endif /* CONFIG_HAS_AIO */