aboutsummaryrefslogtreecommitdiffstats
path: root/io_uring
diff options
context:
space:
mode:
Diffstat (limited to 'io_uring')
-rw-r--r--io_uring/io_uring.c81
-rw-r--r--io_uring/openclose.c6
2 files changed, 57 insertions, 30 deletions
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index e8096d502a7cb..93db3e4e7b688 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1948,6 +1948,14 @@ fail:
ret = io_issue_sqe(req, issue_flags);
if (ret != -EAGAIN)
break;
+
+ /*
+ * If REQ_F_NOWAIT is set, then don't wait or retry with
+ * poll. -EAGAIN is final for that case.
+ */
+ if (req->flags & REQ_F_NOWAIT)
+ break;
+
/*
* We can get EAGAIN for iopolled IO even though we're
* forcing a sync submission from here, since we can't
@@ -2485,10 +2493,21 @@ int io_run_task_work_sig(struct io_ring_ctx *ctx)
return 0;
}
+static bool current_pending_io(void)
+{
+ struct io_uring_task *tctx = current->io_uring;
+
+ if (!tctx)
+ return false;
+ return percpu_counter_read_positive(&tctx->inflight);
+}
+
/* when returns >0, the caller should retry */
static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
struct io_wait_queue *iowq)
{
+ int io_wait, ret;
+
if (unlikely(READ_ONCE(ctx->check_cq)))
return 1;
if (unlikely(!llist_empty(&ctx->work_llist)))
@@ -2499,11 +2518,22 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
return -EINTR;
if (unlikely(io_should_wake(iowq)))
return 0;
+
+ /*
+ * Mark us as being in io_wait if we have pending requests, so cpufreq
+ * can take into account that the task is waiting for IO - turns out
+ * to be important for low QD IO.
+ */
+ io_wait = current->in_iowait;
+ if (current_pending_io())
+ current->in_iowait = 1;
+ ret = 0;
if (iowq->timeout == KTIME_MAX)
schedule();
else if (!schedule_hrtimeout(&iowq->timeout, HRTIMER_MODE_ABS))
- return -ETIME;
- return 0;
+ ret = -ETIME;
+ current->in_iowait = io_wait;
+ return ret;
}
/*
@@ -3418,8 +3448,6 @@ static unsigned long io_uring_mmu_get_unmapped_area(struct file *filp,
unsigned long addr, unsigned long len,
unsigned long pgoff, unsigned long flags)
{
- const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);
- struct vm_unmapped_area_info info;
void *ptr;
/*
@@ -3434,32 +3462,29 @@ static unsigned long io_uring_mmu_get_unmapped_area(struct file *filp,
if (IS_ERR(ptr))
return -ENOMEM;
- info.flags = VM_UNMAPPED_AREA_TOPDOWN;
- info.length = len;
- info.low_limit = max(PAGE_SIZE, mmap_min_addr);
- info.high_limit = arch_get_mmap_base(addr, current->mm->mmap_base);
+ /*
+ * Some architectures have strong cache aliasing requirements.
+ * For such architectures we need a coherent mapping which aliases
+ * kernel memory *and* userspace memory. To achieve that:
+ * - use a NULL file pointer to reference physical memory, and
+ * - use the kernel virtual address of the shared io_uring context
+ * (instead of the userspace-provided address, which has to be 0UL
+ * anyway).
+ * - use the same pgoff which the get_unmapped_area() uses to
+ * calculate the page colouring.
+ * For architectures without such aliasing requirements, the
+ * architecture will return any suitable mapping because addr is 0.
+ */
+ filp = NULL;
+ flags |= MAP_SHARED;
+ pgoff = 0; /* has been translated to ptr above */
#ifdef SHM_COLOUR
- info.align_mask = PAGE_MASK & (SHM_COLOUR - 1UL);
+ addr = (uintptr_t) ptr;
+ pgoff = addr >> PAGE_SHIFT;
#else
- info.align_mask = PAGE_MASK & (SHMLBA - 1UL);
+ addr = 0UL;
#endif
- info.align_offset = (unsigned long) ptr;
-
- /*
- * A failed mmap() very likely causes application failure,
- * so fall back to the bottom-up function here. This scenario
- * can happen with large stack limits and large mmap()
- * allocations.
- */
- addr = vm_unmapped_area(&info);
- if (offset_in_page(addr)) {
- info.flags = 0;
- info.low_limit = TASK_UNMAPPED_BASE;
- info.high_limit = mmap_end;
- addr = vm_unmapped_area(&info);
- }
-
- return addr;
+ return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
}
#else /* !CONFIG_MMU */
@@ -3859,7 +3884,7 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
ctx->syscall_iopoll = 1;
ctx->compat = in_compat_syscall();
- if (!capable(CAP_IPC_LOCK))
+ if (!ns_capable_noaudit(&init_user_ns, CAP_IPC_LOCK))
ctx->user = get_uid(current_user());
/*
diff --git a/io_uring/openclose.c b/io_uring/openclose.c
index 10ca57f5bd249..e3fae26e025dd 100644
--- a/io_uring/openclose.c
+++ b/io_uring/openclose.c
@@ -35,9 +35,11 @@ static bool io_openat_force_async(struct io_open *open)
{
/*
* Don't bother trying for O_TRUNC, O_CREAT, or O_TMPFILE open,
- * it'll always -EAGAIN
+ * it'll always -EAGAIN. Note that we test for __O_TMPFILE because
+ * O_TMPFILE includes O_DIRECTORY, which isn't a flag we need to force
+ * async for.
*/
- return open->how.flags & (O_TRUNC | O_CREAT | O_TMPFILE);
+ return open->how.flags & (O_TRUNC | O_CREAT | __O_TMPFILE);
}
static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)