Fix memory ordering/atomic access

A few memory accesses don't seem correct to me, and a few seem slightly stricter than necessary. Here are the rules in userspace, I think: sq.ktail is read by the kernel sq.khead is written by the kernel cq.ktail is written by the kernel cq.khead is read by the kernel Locations that are written concurrently by the kernel must be loaded atomically; if they specify (to the user) that some memory is now available, they must be loaded with acquire semantics before the memory is read. Similarly, locations that are read concurrently by the kernel must be written atomically; if they specify (to the kernel) that some memory is now available, they must be stored with release semantics after the memory is written to. Note that without SQPOLL, sq is only accessed by the kernel in response to a system call, so no synchronization is necessary. Note that in practice, on most CPU architectures, a regular aligned load/store is already atomic; in that situation READ_ONCE/WRITE_ONCE is primarily necessary for the compiler, which would otherwise be allowed to do invalid things like load the lower half of an integer and then the upper half. Signed-off-by: Shachaf Ben-Kiki <shachaf@gmail.com>
author: Shachaf Ben-Kiki <shachaf@gmail.com> 2023-07-02 16:12:21 -0700
committer: Shachaf Ben-Kiki <shachaf@gmail.com> 2024-03-02 16:39:13 -0800
commit: f03becf5d7c1bc3fd9a426ba8b013346847f8b9e (patch)
tree: 49826fc81cf17efa52ecb765182c59db304be3a1
parent: 2b353a85abed9d19201bd947f3efca6a630e476a (diff)
download: liburing-f03becf5d7c1bc3fd9a426ba8b013346847f8b9e.tar.gz
3 files changed, 20 insertions, 26 deletions
diff --git a/src/include/liburing.h b/src/include/liburing.h
index dd6b1009..982a8241 100644
--- a/src/include/liburing.h
+++ b/src/include/liburing.h
@@ -1217,7 +1217,7 @@ IOURINGINLINE void io_uring_prep_ftruncate(struct io_uring_sqe *sqe,
  */
 IOURINGINLINE unsigned io_uring_sq_ready(const struct io_uring *ring)
 {
-	unsigned khead = *ring->sq.khead;
+	unsigned khead;
 
 	/*
 	 * Without a barrier, we could miss an update and think the SQ wasn't
@@ -1226,6 +1226,8 @@ IOURINGINLINE unsigned io_uring_sq_ready(const struct io_uring *ring)
 	 */
 	if (ring->flags & IORING_SETUP_SQPOLL)
 		khead = io_uring_smp_load_acquire(ring->sq.khead);
+	else
+		khead = *ring->sq.khead;
 
 	/* always use real head, to avoid losing sync for short submit */
 	return ring->sq.sqe_tail - khead;
@@ -1412,7 +1414,7 @@ IOURINGINLINE struct io_uring_sqe *_io_uring_get_sqe(struct io_uring *ring)
 	if (ring->flags & IORING_SETUP_SQE128)
 		shift = 1;
 	if (!(ring->flags & IORING_SETUP_SQPOLL))
-		head = IO_URING_READ_ONCE(*sq->khead);
+		head = *sq->khead;
 	else
 		head = io_uring_smp_load_acquire(sq->khead);
 
diff --git a/src/queue.c b/src/queue.c
index b784b10c..c4360614 100644
--- a/src/queue.c
+++ b/src/queue.c
@@ -212,22 +212,18 @@ static unsigned __io_uring_flush_sq(struct io_uring *ring)
 		 * Ensure kernel sees the SQE updates before the tail update.
 		 */
 		if (!(ring->flags & IORING_SETUP_SQPOLL))
-			IO_URING_WRITE_ONCE(*sq->ktail, tail);
+			*sq->ktail = tail;
 		else
 			io_uring_smp_store_release(sq->ktail, tail);
 	}
 	/*
-	 * This _may_ look problematic, as we're not supposed to be reading
-	 * SQ->head without acquire semantics. When we're in SQPOLL mode, the
-	 * kernel submitter could be updating this right now. For non-SQPOLL,
-	 * task itself does it, and there's no potential race. But even for
-	 * SQPOLL, the load is going to be potentially out-of-date the very
-	 * instant it's done, regardless or whether or not it's done
-	 * atomically. Worst case, we're going to be over-estimating what
-	 * we can submit. The point is, we need to be able to deal with this
-	 * situation regardless of any perceived atomicity.
-	 */
-	return tail - *sq->khead;
+	* This load needs to be atomic, since sq->khead is written concurrently
+	* by the kernel, but it doesn't need to be load_acquire, since the
+	* kernel doesn't store to the submission queue; it advances khead just
+	* to indicate that it's finished reading the submission queue entries
+	* so they're available for us to write to.
+	*/
+	return tail - IO_URING_READ_ONCE(*sq->khead);
 }
 
 /*
diff --git a/test/helpers.c b/test/helpers.c
index 24fd4ce3..779347f6 100644
--- a/test/helpers.c
+++ b/test/helpers.c
@@ -286,22 +286,18 @@ unsigned __io_uring_flush_sq(struct io_uring *ring)
 		 * Ensure kernel sees the SQE updates before the tail update.
 		 */
 		if (!(ring->flags & IORING_SETUP_SQPOLL))
-			IO_URING_WRITE_ONCE(*sq->ktail, tail);
+			*sq->ktail = tail;
 		else
 			io_uring_smp_store_release(sq->ktail, tail);
 	}
 	/*
-	 * This _may_ look problematic, as we're not supposed to be reading
-	 * SQ->head without acquire semantics. When we're in SQPOLL mode, the
-	 * kernel submitter could be updating this right now. For non-SQPOLL,
-	 * task itself does it, and there's no potential race. But even for
-	 * SQPOLL, the load is going to be potentially out-of-date the very
-	 * instant it's done, regardless or whether or not it's done
-	 * atomically. Worst case, we're going to be over-estimating what
-	 * we can submit. The point is, we need to be able to deal with this
-	 * situation regardless of any perceived atomicity.
-	 */
-	return tail - *sq->khead;
+	* This load needs to be atomic, since sq->khead is written concurrently
+	* by the kernel, but it doesn't need to be load_acquire, since the
+	* kernel doesn't store to the submission queue; it advances khead just
+	* to indicate that it's finished reading the submission queue entries
+	* so they're available for us to write to.
+	*/
+	return tail - IO_URING_READ_ONCE(*sq->khead);
 }
 
 /*
author	Shachaf Ben-Kiki <shachaf@gmail.com>	2023-07-02 16:12:21 -0700
committer	Shachaf Ben-Kiki <shachaf@gmail.com>	2024-03-02 16:39:13 -0800
commit	f03becf5d7c1bc3fd9a426ba8b013346847f8b9e (patch)
tree	49826fc81cf17efa52ecb765182c59db304be3a1
parent	2b353a85abed9d19201bd947f3efca6a630e476a (diff)
download	liburing-f03becf5d7c1bc3fd9a426ba8b013346847f8b9e.tar.gz