aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorAndrea Arcangeli <aarcange@redhat.com>2021-12-15 14:14:55 -0500
committerAndrea Arcangeli <aarcange@redhat.com>2023-11-11 22:03:35 -0500
commit62e813f749a51ad2adfc1c90b0e013251bb9e47d (patch)
tree66b12f7a66407c73b106ce8230f78373ae45cea8
parent009bd71ae5894040a043435dbddeb4864f4971db (diff)
downloadaa-62e813f749a51ad2adfc1c90b0e013251bb9e47d.tar.gz
mm: gup: FOLL_NOUNSHARE: optimize follow_page
follow_page would be suboptimal if it triggered COR faults for page migration, KSM and other activities, so optimize it with FOLL_NOUNSHARE that acts just like FOLL_WRITE as far as the GUP unsharing logic is concerned. This commit is based on prototype patches by David. Signed-off-by: David Hildenbrand <david@redhat.com> Co-developed-by: David Hildenbrand <david@redhat.com> Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
-rw-r--r--include/linux/mm.h7
-rw-r--r--mm/gup.c17
2 files changed, 23 insertions, 1 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2ae7c3998cec9e..8eb0a555ce3262 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2875,6 +2875,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
#define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */
#define FOLL_PIN 0x40000 /* pages must be released via unpin_user_page */
#define FOLL_FAST_ONLY 0x80000 /* gup_fast: prevent fall-back to slow gup */
+#define FOLL_NOUNSHARE 0x100000 /* gup: don't trigger a COR fault */
/*
* FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each
@@ -2929,6 +2930,12 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
* releasing pages: get_user_pages*() pages must be released via put_page(),
* while pin_user_pages*() pages must be released via unpin_user_page().
*
+ * FOLL_NOUNSHARE should be set when no COR fault should be triggered when
+ * eventually taking a read-only reference on a shared anonymous page, because
+ * we are sure that user space cannot use that reference for reading the page
+ * after eventually unmapping the page. FOLL_NOUNSHARE is implicitly set for the
+ * follow_page() API.
+ *
* Please see Documentation/core-api/pin_user_pages.rst for more information.
*/
diff --git a/mm/gup.c b/mm/gup.c
index e6ef47726bb1c4..ed3b6b801beb1b 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -73,7 +73,7 @@ static __always_inline bool __gup_must_unshare(unsigned int flags,
struct page *page,
bool is_head, bool irq_safe)
{
- if (flags & FOLL_WRITE)
+ if (flags & (FOLL_WRITE|FOLL_NOUNSHARE))
return false;
/* mmu notifier doesn't need unshare */
if (!(flags & (FOLL_GET|FOLL_PIN)))
@@ -930,6 +930,11 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma,
* When getting pages from ZONE_DEVICE memory, the @ctx->pgmap caches
* the device's dev_pagemap metadata to avoid repeating expensive lookups.
*
+ * When getting an anonymous page and the caller has to trigger a Copy
+ * On Read (COR) fault, -EMLINK is returned. The caller should trigger
+ * a fault with FAULT_FLAG_UNSHARE set. With FOLL_NOUNSHARE set, will
+ * never require a COR fault and consequently not return -EMLINK.
+ *
* On output, the @ctx->page_mask is set according to the size of the page.
*
* Return: the mapped (struct page *), %NULL if no mapping exists, or
@@ -985,6 +990,14 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
if (vma_is_secretmem(vma))
return NULL;
+ /*
+ * Don't require unsharing in case we stumble over a read-only
+ * mapped, shared anonymous page: this is an internal API only
+ * and callers don't actually use it for exposing page content
+ * to user space.
+ */
+ foll_flags |= FOLL_NOUNSHARE;
+
page = follow_page_mask(vma, address, foll_flags, &ctx);
if (ctx.pgmap)
put_dev_pagemap(ctx.pgmap);
@@ -1080,6 +1093,8 @@ static int faultin_page(struct vm_area_struct *vma,
fault_flags |= FAULT_FLAG_UNSHARE;
/* FAULT_FLAG_WRITE and FAULT_FLAG_UNSHARE are incompatible */
VM_BUG_ON(fault_flags & FAULT_FLAG_WRITE);
+ /* If FOLL_NOUNSHARE was set, then "unshare" must not be */
+ VM_BUG_ON(*flags & FOLL_NOUNSHARE);
}
ret = handle_mm_fault(vma, address, fault_flags, NULL);