From 59fbc409e71649f558fb4578cdbfac67acb824dc Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 24 Aug 2023 23:53:27 +0100 Subject: io_uring: optimise extra io_get_cqe null check If the cached cqe check passes in io_get_cqe*() it already means that the cqe we return is valid and non-zero, however the compiler is unable to optimise null checks like in io_fill_cqe_req(). Do a bit of trickery, return success/fail boolean from io_get_cqe*() and store cqe in the cqe parameter. That makes it do the right thing, erasing the check together with the introduced indirection. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/322ea4d3377d3d4efd8ae90ab8ed28a99f518210.1692916914.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 7 +++---- io_uring/io_uring.h | 20 +++++++++----------- 2 files changed, 12 insertions(+), 15 deletions(-) (limited to 'io_uring') diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index de05831eeca71..cfc2dc8c4b2fd 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -683,10 +683,10 @@ static void __io_cqring_overflow_flush(struct io_ring_ctx *ctx) io_cq_lock(ctx); while (!list_empty(&ctx->cq_overflow_list)) { - struct io_uring_cqe *cqe = io_get_cqe_overflow(ctx, true); + struct io_uring_cqe *cqe; struct io_overflow_cqe *ocqe; - if (!cqe) + if (!io_get_cqe_overflow(ctx, &cqe, true)) break; ocqe = list_first_entry(&ctx->cq_overflow_list, struct io_overflow_cqe, list); @@ -862,8 +862,7 @@ static bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res, * submission (by quite a lot). Increment the overflow count in * the ring. */ - cqe = io_get_cqe(ctx); - if (likely(cqe)) { + if (likely(io_get_cqe(ctx, &cqe))) { trace_io_uring_complete(ctx, NULL, user_data, res, cflags, 0, 0); WRITE_ONCE(cqe->user_data, user_data); diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 9c80d20fe18f1..2960e35b32a57 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -109,28 +109,27 @@ static inline void io_req_task_work_add(struct io_kiocb *req) #define io_for_each_link(pos, head) \ for (pos = (head); pos; pos = pos->link) -static inline struct io_uring_cqe *io_get_cqe_overflow(struct io_ring_ctx *ctx, - bool overflow) +static inline bool io_get_cqe_overflow(struct io_ring_ctx *ctx, + struct io_uring_cqe **ret, + bool overflow) { - struct io_uring_cqe *cqe; - io_lockdep_assert_cq_locked(ctx); if (unlikely(ctx->cqe_cached >= ctx->cqe_sentinel)) { if (unlikely(!io_cqe_cache_refill(ctx, overflow))) - return NULL; + return false; } - cqe = ctx->cqe_cached; + *ret = ctx->cqe_cached; ctx->cached_cq_tail++; ctx->cqe_cached++; if (ctx->flags & IORING_SETUP_CQE32) ctx->cqe_cached++; - return cqe; + return true; } -static inline struct io_uring_cqe *io_get_cqe(struct io_ring_ctx *ctx) +static inline bool io_get_cqe(struct io_ring_ctx *ctx, struct io_uring_cqe **ret) { - return io_get_cqe_overflow(ctx, false); + return io_get_cqe_overflow(ctx, ret, false); } static inline bool io_fill_cqe_req(struct io_ring_ctx *ctx, struct io_kiocb *req) @@ -142,8 +141,7 @@ static inline bool io_fill_cqe_req(struct io_ring_ctx *ctx, struct io_kiocb *req * submission (by quite a lot). Increment the overflow count in * the ring. */ - cqe = io_get_cqe(ctx); - if (unlikely(!cqe)) + if (unlikely(!io_get_cqe(ctx, &cqe))) return false; if (trace_io_uring_complete_enabled()) -- cgit 1.2.3-korg