diff options
author | Andrea Arcangeli <aarcange@redhat.com> | 2021-12-15 14:14:55 -0500 |
---|---|---|
committer | Andrea Arcangeli <aarcange@redhat.com> | 2023-11-11 22:03:35 -0500 |
commit | 62e813f749a51ad2adfc1c90b0e013251bb9e47d (patch) | |
tree | 66b12f7a66407c73b106ce8230f78373ae45cea8 | |
parent | 009bd71ae5894040a043435dbddeb4864f4971db (diff) | |
download | aa-62e813f749a51ad2adfc1c90b0e013251bb9e47d.tar.gz |
mm: gup: FOLL_NOUNSHARE: optimize follow_page
follow_page would be suboptimal if it triggered COR faults for page
migration, KSM and other activities, so optimize it with
FOLL_NOUNSHARE that acts just like FOLL_WRITE as far as the
GUP unsharing logic is concerned.
This commit is based on prototype patches by David.
Signed-off-by: David Hildenbrand <david@redhat.com>
Co-developed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
-rw-r--r-- | include/linux/mm.h | 7 | ||||
-rw-r--r-- | mm/gup.c | 17 |
2 files changed, 23 insertions, 1 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h index 2ae7c3998cec9e..8eb0a555ce3262 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2875,6 +2875,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, #define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */ #define FOLL_PIN 0x40000 /* pages must be released via unpin_user_page */ #define FOLL_FAST_ONLY 0x80000 /* gup_fast: prevent fall-back to slow gup */ +#define FOLL_NOUNSHARE 0x100000 /* gup: don't trigger a COR fault */ /* * FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each @@ -2929,6 +2930,12 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, * releasing pages: get_user_pages*() pages must be released via put_page(), * while pin_user_pages*() pages must be released via unpin_user_page(). * + * FOLL_NOUNSHARE should be set when no COR fault should be triggered when + * eventually taking a read-only reference on a shared anonymous page, because + * we are sure that user space cannot use that reference for reading the page + * after eventually unmapping the page. FOLL_NOUNSHARE is implicitly set for the + * follow_page() API. + * * Please see Documentation/core-api/pin_user_pages.rst for more information. */ diff --git a/mm/gup.c b/mm/gup.c index e6ef47726bb1c4..ed3b6b801beb1b 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -73,7 +73,7 @@ static __always_inline bool __gup_must_unshare(unsigned int flags, struct page *page, bool is_head, bool irq_safe) { - if (flags & FOLL_WRITE) + if (flags & (FOLL_WRITE|FOLL_NOUNSHARE)) return false; /* mmu notifier doesn't need unshare */ if (!(flags & (FOLL_GET|FOLL_PIN))) @@ -930,6 +930,11 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma, * When getting pages from ZONE_DEVICE memory, the @ctx->pgmap caches * the device's dev_pagemap metadata to avoid repeating expensive lookups. * + * When getting an anonymous page and the caller has to trigger a Copy + * On Read (COR) fault, -EMLINK is returned. The caller should trigger + * a fault with FAULT_FLAG_UNSHARE set. With FOLL_NOUNSHARE set, will + * never require a COR fault and consequently not return -EMLINK. + * * On output, the @ctx->page_mask is set according to the size of the page. * * Return: the mapped (struct page *), %NULL if no mapping exists, or @@ -985,6 +990,14 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, if (vma_is_secretmem(vma)) return NULL; + /* + * Don't require unsharing in case we stumble over a read-only + * mapped, shared anonymous page: this is an internal API only + * and callers don't actually use it for exposing page content + * to user space. + */ + foll_flags |= FOLL_NOUNSHARE; + page = follow_page_mask(vma, address, foll_flags, &ctx); if (ctx.pgmap) put_dev_pagemap(ctx.pgmap); @@ -1080,6 +1093,8 @@ static int faultin_page(struct vm_area_struct *vma, fault_flags |= FAULT_FLAG_UNSHARE; /* FAULT_FLAG_WRITE and FAULT_FLAG_UNSHARE are incompatible */ VM_BUG_ON(fault_flags & FAULT_FLAG_WRITE); + /* If FOLL_NOUNSHARE was set, then "unshare" must not be */ + VM_BUG_ON(*flags & FOLL_NOUNSHARE); } ret = handle_mm_fault(vma, address, fault_flags, NULL); |