diff options
author | Zefan Li <lizefan@huawei.com> | 2016-10-26 20:59:39 +0800 |
---|---|---|
committer | Zefan Li <lizefan@huawei.com> | 2016-10-26 20:59:39 +0800 |
commit | 71d933fddc013a6cde5dd00beb888806ddd824c4 (patch) | |
tree | 0f3c1d138372c463f0ca4eb51b2020b8d5f9f8e3 | |
parent | bc45e503d0005b27c79bc8209d908265c62ca9c8 (diff) | |
download | linux-3.4.y-queue-71d933fddc013a6cde5dd00beb888806ddd824c4.tar.gz |
Add two fixes
-rw-r--r-- | patches/mm-remove-gup_flags-foll_write-games-from-__get_user_pages.patch | 140 | ||||
-rw-r--r-- | patches/net-fix-use-after-free-in-the-recvmmsg-exit-path.patch | 89 | ||||
-rw-r--r-- | patches/series | 2 |
3 files changed, 231 insertions, 0 deletions
diff --git a/patches/mm-remove-gup_flags-foll_write-games-from-__get_user_pages.patch b/patches/mm-remove-gup_flags-foll_write-games-from-__get_user_pages.patch new file mode 100644 index 0000000..859c53a --- /dev/null +++ b/patches/mm-remove-gup_flags-foll_write-games-from-__get_user_pages.patch @@ -0,0 +1,140 @@ +From: Michal Hocko <mhocko@suse.com> +Date: Sun, 16 Oct 2016 11:55:00 +0200 +Subject: mm, gup: close FOLL MAP_PRIVATE race + +commit 19be0eaffa3ac7d8eb6784ad9bdbc7d67ed8e619 upstream. + +faultin_page drops FOLL_WRITE after the page fault handler did the CoW +and then we retry follow_page_mask to get our CoWed page. This is racy, +however because the page might have been unmapped by that time and so +we would have to do a page fault again, this time without CoW. This +would cause the page cache corruption for FOLL_FORCE on MAP_PRIVATE +read only mappings with obvious consequences. + +This is an ancient bug that was actually already fixed once by Linus +eleven years ago in commit 4ceb5db9757a ("Fix get_user_pages() race +for write access") but that was then undone due to problems on s390 +by commit f33ea7f404e5 ("fix get_user_pages bug") because s390 didn't +have proper dirty pte tracking until abf09bed3cce ("s390/mm: implement +software dirty bits"). This wasn't a problem at the time as pointed out +by Hugh Dickins because madvise relied on mmap_sem for write up until +0a27a14a6292 ("mm: madvise avoid exclusive mmap_sem") but since then we +can race with madvise which can unmap the fresh COWed page or with KSM +and corrupt the content of the shared page. + +This patch is based on the Linus' approach to not clear FOLL_WRITE after +the CoW page fault (aka VM_FAULT_WRITE) but instead introduces FOLL_COW +to note this fact. The flag is then rechecked during follow_pfn_pte to +enforce the page fault again if we do not see the CoWed page. Linus was +suggesting to check pte_dirty again as s390 is OK now. But that would +make backporting to some old kernels harder. So instead let's just make +sure that vm_normal_page sees a pure anonymous page. + +This would guarantee we are seeing a real CoW page. Introduce +can_follow_write_pte which checks both pte_write and falls back to +PageAnon on forced write faults which passed CoW already. Thanks to Hugh +to point out that a special care has to be taken for KSM pages because +our COWed page might have been merged with a KSM one and keep its +PageAnon flag. + +Fixes: 0a27a14a6292 ("mm: madvise avoid exclusive mmap_sem") +Reported-by: Phil "not Paul" Oester <kernel@linuxace.com> +Disclosed-by: Andy Lutomirski <luto@kernel.org> +Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> +Signed-off-by: Michal Hocko <mhocko@suse.com> +[bwh: Backported to 3.2: + - Adjust filename, context, indentation + - The 'no_page' exit path in follow_page() is different, so open-code the + cleanup + - Delete a now-unused label] +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + include/linux/mm.h | 1 + + mm/memory.c | 39 ++++++++++++++++++++++++++++----------- + 2 files changed, 29 insertions(+), 11 deletions(-) + +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -1525,6 +1525,7 @@ struct page *follow_page(struct vm_area_ + #define FOLL_MLOCK 0x40 /* mark page as mlocked */ + #define FOLL_SPLIT 0x80 /* don't return transhuge pages, split them */ + #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ ++#define FOLL_COW 0x4000 /* internal GUP flag */ + + typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, + void *data); +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -1447,6 +1447,24 @@ int zap_vma_ptes(struct vm_area_struct * + } + EXPORT_SYMBOL_GPL(zap_vma_ptes); + ++static inline bool can_follow_write_pte(pte_t pte, struct page *page, ++ unsigned int flags) ++{ ++ if (pte_write(pte)) ++ return true; ++ ++ /* ++ * Make sure that we are really following CoWed page. We do not really ++ * have to care about exclusiveness of the page because we only want ++ * to ensure that once COWed page hasn't disappeared in the meantime ++ * or it hasn't been merged to a KSM page. ++ */ ++ if ((flags & FOLL_FORCE) && (flags & FOLL_COW)) ++ return page && PageAnon(page) && !PageKsm(page); ++ ++ return false; ++} ++ + /** + * follow_page - look up a page descriptor from a user-virtual address + * @vma: vm_area_struct mapping @address +@@ -1529,10 +1547,13 @@ split_fallthrough: + pte = *ptep; + if (!pte_present(pte)) + goto no_page; +- if ((flags & FOLL_WRITE) && !pte_write(pte)) +- goto unlock; + + page = vm_normal_page(vma, address, pte); ++ if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, page, flags)) { ++ pte_unmap_unlock(ptep, ptl); ++ return NULL; ++ } ++ + if (unlikely(!page)) { + if ((flags & FOLL_DUMP) || + !is_zero_pfn(pte_pfn(pte))) +@@ -1575,7 +1596,7 @@ split_fallthrough: + unlock_page(page); + } + } +-unlock: ++ + pte_unmap_unlock(ptep, ptl); + out: + return page; +@@ -1809,17 +1830,13 @@ int __get_user_pages(struct task_struct + * The VM_FAULT_WRITE bit tells us that + * do_wp_page has broken COW when necessary, + * even if maybe_mkwrite decided not to set +- * pte_write. We can thus safely do subsequent +- * page lookups as if they were reads. But only +- * do so when looping for pte_write is futile: +- * in some cases userspace may also be wanting +- * to write to the gotten user page, which a +- * read fault here might prevent (a readonly +- * page might get reCOWed by userspace write). ++ * pte_write. We cannot simply drop FOLL_WRITE ++ * here because the COWed page might be gone by ++ * the time we do the subsequent page lookups. + */ + if ((ret & VM_FAULT_WRITE) && + !(vma->vm_flags & VM_WRITE)) +- foll_flags &= ~FOLL_WRITE; ++ foll_flags |= FOLL_COW; + + cond_resched(); + } diff --git a/patches/net-fix-use-after-free-in-the-recvmmsg-exit-path.patch b/patches/net-fix-use-after-free-in-the-recvmmsg-exit-path.patch new file mode 100644 index 0000000..93a5c65 --- /dev/null +++ b/patches/net-fix-use-after-free-in-the-recvmmsg-exit-path.patch @@ -0,0 +1,89 @@ +From 34b88a68f26a75e4fded796f1a49c40f82234b7d Mon Sep 17 00:00:00 2001 +From: Arnaldo Carvalho de Melo <acme@redhat.com> +Date: Mon, 14 Mar 2016 09:56:35 -0300 +Subject: net: Fix use after free in the recvmmsg exit path + +commit 34b88a68f26a75e4fded796f1a49c40f82234b7d upstream. + +The syzkaller fuzzer hit the following use-after-free: + + Call Trace: + [<ffffffff8175ea0e>] __asan_report_load8_noabort+0x3e/0x40 mm/kasan/report.c:295 + [<ffffffff851cc31a>] __sys_recvmmsg+0x6fa/0x7f0 net/socket.c:2261 + [< inline >] SYSC_recvmmsg net/socket.c:2281 + [<ffffffff851cc57f>] SyS_recvmmsg+0x16f/0x180 net/socket.c:2270 + [<ffffffff86332bb6>] entry_SYSCALL_64_fastpath+0x16/0x7a + arch/x86/entry/entry_64.S:185 + +And, as Dmitry rightly assessed, that is because we can drop the +reference and then touch it when the underlying recvmsg calls return +some packets and then hit an error, which will make recvmmsg to set +sock->sk->sk_err, oops, fix it. + +Reported-and-Tested-by: Dmitry Vyukov <dvyukov@google.com> +Cc: Alexander Potapenko <glider@google.com> +Cc: Eric Dumazet <edumazet@google.com> +Cc: Kostya Serebryany <kcc@google.com> +Cc: Sasha Levin <sasha.levin@oracle.com> +Fixes: a2e2725541fa ("net: Introduce recvmmsg socket syscall") +http://lkml.kernel.org/r/20160122211644.GC2470@redhat.com +Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + net/socket.c | 38 +++++++++++++++++++------------------- + 1 file changed, 19 insertions(+), 19 deletions(-) + +--- a/net/socket.c ++++ b/net/socket.c +@@ -2332,31 +2332,31 @@ int __sys_recvmmsg(int fd, struct mmsghd + break; + } + +-out_put: +- fput_light(sock->file, fput_needed); +- + if (err == 0) +- return datagrams; ++ goto out_put; ++ ++ if (datagrams == 0) { ++ datagrams = err; ++ goto out_put; ++ } + +- if (datagrams != 0) { ++ /* ++ * We may return less entries than requested (vlen) if the ++ * sock is non block and there aren't enough datagrams... ++ */ ++ if (err != -EAGAIN) { + /* +- * We may return less entries than requested (vlen) if the +- * sock is non block and there aren't enough datagrams... ++ * ... or if recvmsg returns an error after we ++ * received some datagrams, where we record the ++ * error to return on the next call or if the ++ * app asks about it using getsockopt(SO_ERROR). + */ +- if (err != -EAGAIN) { +- /* +- * ... or if recvmsg returns an error after we +- * received some datagrams, where we record the +- * error to return on the next call or if the +- * app asks about it using getsockopt(SO_ERROR). +- */ +- sock->sk->sk_err = -err; +- } +- +- return datagrams; ++ sock->sk->sk_err = -err; + } ++out_put: ++ fput_light(sock->file, fput_needed); + +- return err; ++ return datagrams; + } + + SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg, diff --git a/patches/series b/patches/series index 5147543..6779316 100644 --- a/patches/series +++ b/patches/series @@ -127,3 +127,5 @@ xen-pciback-save-the-number-of-msi-x-entries-to-be-copied-later.patch ser_gigaset-remove-unnecessary-kfree-calls-from-release-method.patch ser_gigaset-use-container_of-instead-of-detour.patch net-core-revert-net-fix-__netdev_update_features-return.-and-add-comment.patch +mm-remove-gup_flags-foll_write-games-from-__get_user_pages.patch +net-fix-use-after-free-in-the-recvmmsg-exit-path.patch |