aboutsummaryrefslogtreecommitdiffstats
path: root/io_uring/io_uring.c
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2024-02-02 10:20:05 -0700
committerJens Axboe <axboe@kernel.dk>2024-02-08 13:27:06 -0700
commitaf5d68f8892f8ee8f137648b79ceb2abc153a19b (patch)
treed76f98bfd630ee568e19f16aa2646de2b64dc1df /io_uring/io_uring.c
parent2708af1adc11700c6c3ce4109e3b133079a36a78 (diff)
downloadlinux-af5d68f8892f8ee8f137648b79ceb2abc153a19b.tar.gz
io_uring/sqpoll: manage task_work privately
Decouple from task_work running, and cap the number of entries we process at the time. If we exceed that number, push remaining entries to a retry list that we'll process first next time. We cap the number of entries to process at 8, which is fairly random. We just want to get enough per-ctx batching here, while not processing endlessly. Since we manually run PF_IO_WORKER related task_work anyway as the task never exits to userspace, with this we no longer need to add an actual task_work item to the per-process list. Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'io_uring/io_uring.c')
-rw-r--r--io_uring/io_uring.c46
1 files changed, 36 insertions, 10 deletions
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index bfd2f0fff153c..f1e3646c727c1 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1173,7 +1173,14 @@ static void ctx_flush_and_put(struct io_ring_ctx *ctx, struct io_tw_state *ts)
percpu_ref_put(&ctx->refs);
}
-static void handle_tw_list(struct llist_node *node, unsigned int *count)
+/*
+ * Run queued task_work, returning the number of entries processed in *count.
+ * If more entries than max_entries are available, stop processing once this
+ * is reached and return the rest of the list.
+ */
+struct llist_node *io_handle_tw_list(struct llist_node *node,
+ unsigned int *count,
+ unsigned int max_entries)
{
struct io_ring_ctx *ctx = NULL;
struct io_tw_state ts = { };
@@ -1200,9 +1207,10 @@ static void handle_tw_list(struct llist_node *node, unsigned int *count)
ctx = NULL;
cond_resched();
}
- } while (node);
+ } while (node && *count < max_entries);
ctx_flush_and_put(ctx, &ts);
+ return node;
}
/**
@@ -1247,27 +1255,41 @@ static __cold void io_fallback_tw(struct io_uring_task *tctx, bool sync)
}
}
-void tctx_task_work(struct callback_head *cb)
+struct llist_node *tctx_task_work_run(struct io_uring_task *tctx,
+ unsigned int max_entries,
+ unsigned int *count)
{
- struct io_uring_task *tctx = container_of(cb, struct io_uring_task,
- task_work);
struct llist_node *node;
- unsigned int count = 0;
if (unlikely(current->flags & PF_EXITING)) {
io_fallback_tw(tctx, true);
- return;
+ return NULL;
}
node = llist_del_all(&tctx->task_list);
- if (node)
- handle_tw_list(llist_reverse_order(node), &count);
+ if (node) {
+ node = llist_reverse_order(node);
+ node = io_handle_tw_list(node, count, max_entries);
+ }
/* relaxed read is enough as only the task itself sets ->in_cancel */
if (unlikely(atomic_read(&tctx->in_cancel)))
io_uring_drop_tctx_refs(current);
- trace_io_uring_task_work_run(tctx, count);
+ trace_io_uring_task_work_run(tctx, *count);
+ return node;
+}
+
+void tctx_task_work(struct callback_head *cb)
+{
+ struct io_uring_task *tctx;
+ struct llist_node *ret;
+ unsigned int count = 0;
+
+ tctx = container_of(cb, struct io_uring_task, task_work);
+ ret = tctx_task_work_run(tctx, UINT_MAX, &count);
+ /* can't happen */
+ WARN_ON_ONCE(ret);
}
static inline void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
@@ -1350,6 +1372,10 @@ static void io_req_normal_work_add(struct io_kiocb *req)
if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
+ /* SQPOLL doesn't need the task_work added, it'll run it itself */
+ if (ctx->flags & IORING_SETUP_SQPOLL)
+ return;
+
if (likely(!task_work_add(req->task, &tctx->task_work, ctx->notify_method)))
return;