summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorZefan Li <lizefan@huawei.com>2016-10-26 20:59:39 +0800
committerZefan Li <lizefan@huawei.com>2016-10-26 20:59:39 +0800
commit71d933fddc013a6cde5dd00beb888806ddd824c4 (patch)
tree0f3c1d138372c463f0ca4eb51b2020b8d5f9f8e3
parentbc45e503d0005b27c79bc8209d908265c62ca9c8 (diff)
downloadlinux-3.4.y-queue-71d933fddc013a6cde5dd00beb888806ddd824c4.tar.gz
Add two fixes
-rw-r--r--patches/mm-remove-gup_flags-foll_write-games-from-__get_user_pages.patch140
-rw-r--r--patches/net-fix-use-after-free-in-the-recvmmsg-exit-path.patch89
-rw-r--r--patches/series2
3 files changed, 231 insertions, 0 deletions
diff --git a/patches/mm-remove-gup_flags-foll_write-games-from-__get_user_pages.patch b/patches/mm-remove-gup_flags-foll_write-games-from-__get_user_pages.patch
new file mode 100644
index 0000000..859c53a
--- /dev/null
+++ b/patches/mm-remove-gup_flags-foll_write-games-from-__get_user_pages.patch
@@ -0,0 +1,140 @@
+From: Michal Hocko <mhocko@suse.com>
+Date: Sun, 16 Oct 2016 11:55:00 +0200
+Subject: mm, gup: close FOLL MAP_PRIVATE race
+
+commit 19be0eaffa3ac7d8eb6784ad9bdbc7d67ed8e619 upstream.
+
+faultin_page drops FOLL_WRITE after the page fault handler did the CoW
+and then we retry follow_page_mask to get our CoWed page. This is racy,
+however because the page might have been unmapped by that time and so
+we would have to do a page fault again, this time without CoW. This
+would cause the page cache corruption for FOLL_FORCE on MAP_PRIVATE
+read only mappings with obvious consequences.
+
+This is an ancient bug that was actually already fixed once by Linus
+eleven years ago in commit 4ceb5db9757a ("Fix get_user_pages() race
+for write access") but that was then undone due to problems on s390
+by commit f33ea7f404e5 ("fix get_user_pages bug") because s390 didn't
+have proper dirty pte tracking until abf09bed3cce ("s390/mm: implement
+software dirty bits"). This wasn't a problem at the time as pointed out
+by Hugh Dickins because madvise relied on mmap_sem for write up until
+0a27a14a6292 ("mm: madvise avoid exclusive mmap_sem") but since then we
+can race with madvise which can unmap the fresh COWed page or with KSM
+and corrupt the content of the shared page.
+
+This patch is based on the Linus' approach to not clear FOLL_WRITE after
+the CoW page fault (aka VM_FAULT_WRITE) but instead introduces FOLL_COW
+to note this fact. The flag is then rechecked during follow_pfn_pte to
+enforce the page fault again if we do not see the CoWed page. Linus was
+suggesting to check pte_dirty again as s390 is OK now. But that would
+make backporting to some old kernels harder. So instead let's just make
+sure that vm_normal_page sees a pure anonymous page.
+
+This would guarantee we are seeing a real CoW page. Introduce
+can_follow_write_pte which checks both pte_write and falls back to
+PageAnon on forced write faults which passed CoW already. Thanks to Hugh
+to point out that a special care has to be taken for KSM pages because
+our COWed page might have been merged with a KSM one and keep its
+PageAnon flag.
+
+Fixes: 0a27a14a6292 ("mm: madvise avoid exclusive mmap_sem")
+Reported-by: Phil "not Paul" Oester <kernel@linuxace.com>
+Disclosed-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Michal Hocko <mhocko@suse.com>
+[bwh: Backported to 3.2:
+ - Adjust filename, context, indentation
+ - The 'no_page' exit path in follow_page() is different, so open-code the
+ cleanup
+ - Delete a now-unused label]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Zefan Li <lizefan@huawei.com>
+---
+ include/linux/mm.h | 1 +
+ mm/memory.c | 39 ++++++++++++++++++++++++++++-----------
+ 2 files changed, 29 insertions(+), 11 deletions(-)
+
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -1525,6 +1525,7 @@ struct page *follow_page(struct vm_area_
+ #define FOLL_MLOCK 0x40 /* mark page as mlocked */
+ #define FOLL_SPLIT 0x80 /* don't return transhuge pages, split them */
+ #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */
++#define FOLL_COW 0x4000 /* internal GUP flag */
+
+ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
+ void *data);
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -1447,6 +1447,24 @@ int zap_vma_ptes(struct vm_area_struct *
+ }
+ EXPORT_SYMBOL_GPL(zap_vma_ptes);
+
++static inline bool can_follow_write_pte(pte_t pte, struct page *page,
++ unsigned int flags)
++{
++ if (pte_write(pte))
++ return true;
++
++ /*
++ * Make sure that we are really following CoWed page. We do not really
++ * have to care about exclusiveness of the page because we only want
++ * to ensure that once COWed page hasn't disappeared in the meantime
++ * or it hasn't been merged to a KSM page.
++ */
++ if ((flags & FOLL_FORCE) && (flags & FOLL_COW))
++ return page && PageAnon(page) && !PageKsm(page);
++
++ return false;
++}
++
+ /**
+ * follow_page - look up a page descriptor from a user-virtual address
+ * @vma: vm_area_struct mapping @address
+@@ -1529,10 +1547,13 @@ split_fallthrough:
+ pte = *ptep;
+ if (!pte_present(pte))
+ goto no_page;
+- if ((flags & FOLL_WRITE) && !pte_write(pte))
+- goto unlock;
+
+ page = vm_normal_page(vma, address, pte);
++ if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, page, flags)) {
++ pte_unmap_unlock(ptep, ptl);
++ return NULL;
++ }
++
+ if (unlikely(!page)) {
+ if ((flags & FOLL_DUMP) ||
+ !is_zero_pfn(pte_pfn(pte)))
+@@ -1575,7 +1596,7 @@ split_fallthrough:
+ unlock_page(page);
+ }
+ }
+-unlock:
++
+ pte_unmap_unlock(ptep, ptl);
+ out:
+ return page;
+@@ -1809,17 +1830,13 @@ int __get_user_pages(struct task_struct
+ * The VM_FAULT_WRITE bit tells us that
+ * do_wp_page has broken COW when necessary,
+ * even if maybe_mkwrite decided not to set
+- * pte_write. We can thus safely do subsequent
+- * page lookups as if they were reads. But only
+- * do so when looping for pte_write is futile:
+- * in some cases userspace may also be wanting
+- * to write to the gotten user page, which a
+- * read fault here might prevent (a readonly
+- * page might get reCOWed by userspace write).
++ * pte_write. We cannot simply drop FOLL_WRITE
++ * here because the COWed page might be gone by
++ * the time we do the subsequent page lookups.
+ */
+ if ((ret & VM_FAULT_WRITE) &&
+ !(vma->vm_flags & VM_WRITE))
+- foll_flags &= ~FOLL_WRITE;
++ foll_flags |= FOLL_COW;
+
+ cond_resched();
+ }
diff --git a/patches/net-fix-use-after-free-in-the-recvmmsg-exit-path.patch b/patches/net-fix-use-after-free-in-the-recvmmsg-exit-path.patch
new file mode 100644
index 0000000..93a5c65
--- /dev/null
+++ b/patches/net-fix-use-after-free-in-the-recvmmsg-exit-path.patch
@@ -0,0 +1,89 @@
+From 34b88a68f26a75e4fded796f1a49c40f82234b7d Mon Sep 17 00:00:00 2001
+From: Arnaldo Carvalho de Melo <acme@redhat.com>
+Date: Mon, 14 Mar 2016 09:56:35 -0300
+Subject: net: Fix use after free in the recvmmsg exit path
+
+commit 34b88a68f26a75e4fded796f1a49c40f82234b7d upstream.
+
+The syzkaller fuzzer hit the following use-after-free:
+
+ Call Trace:
+ [<ffffffff8175ea0e>] __asan_report_load8_noabort+0x3e/0x40 mm/kasan/report.c:295
+ [<ffffffff851cc31a>] __sys_recvmmsg+0x6fa/0x7f0 net/socket.c:2261
+ [< inline >] SYSC_recvmmsg net/socket.c:2281
+ [<ffffffff851cc57f>] SyS_recvmmsg+0x16f/0x180 net/socket.c:2270
+ [<ffffffff86332bb6>] entry_SYSCALL_64_fastpath+0x16/0x7a
+ arch/x86/entry/entry_64.S:185
+
+And, as Dmitry rightly assessed, that is because we can drop the
+reference and then touch it when the underlying recvmsg calls return
+some packets and then hit an error, which will make recvmmsg to set
+sock->sk->sk_err, oops, fix it.
+
+Reported-and-Tested-by: Dmitry Vyukov <dvyukov@google.com>
+Cc: Alexander Potapenko <glider@google.com>
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Kostya Serebryany <kcc@google.com>
+Cc: Sasha Levin <sasha.levin@oracle.com>
+Fixes: a2e2725541fa ("net: Introduce recvmmsg socket syscall")
+http://lkml.kernel.org/r/20160122211644.GC2470@redhat.com
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Zefan Li <lizefan@huawei.com>
+---
+ net/socket.c | 38 +++++++++++++++++++-------------------
+ 1 file changed, 19 insertions(+), 19 deletions(-)
+
+--- a/net/socket.c
++++ b/net/socket.c
+@@ -2332,31 +2332,31 @@ int __sys_recvmmsg(int fd, struct mmsghd
+ break;
+ }
+
+-out_put:
+- fput_light(sock->file, fput_needed);
+-
+ if (err == 0)
+- return datagrams;
++ goto out_put;
++
++ if (datagrams == 0) {
++ datagrams = err;
++ goto out_put;
++ }
+
+- if (datagrams != 0) {
++ /*
++ * We may return less entries than requested (vlen) if the
++ * sock is non block and there aren't enough datagrams...
++ */
++ if (err != -EAGAIN) {
+ /*
+- * We may return less entries than requested (vlen) if the
+- * sock is non block and there aren't enough datagrams...
++ * ... or if recvmsg returns an error after we
++ * received some datagrams, where we record the
++ * error to return on the next call or if the
++ * app asks about it using getsockopt(SO_ERROR).
+ */
+- if (err != -EAGAIN) {
+- /*
+- * ... or if recvmsg returns an error after we
+- * received some datagrams, where we record the
+- * error to return on the next call or if the
+- * app asks about it using getsockopt(SO_ERROR).
+- */
+- sock->sk->sk_err = -err;
+- }
+-
+- return datagrams;
++ sock->sk->sk_err = -err;
+ }
++out_put:
++ fput_light(sock->file, fput_needed);
+
+- return err;
++ return datagrams;
+ }
+
+ SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
diff --git a/patches/series b/patches/series
index 5147543..6779316 100644
--- a/patches/series
+++ b/patches/series
@@ -127,3 +127,5 @@ xen-pciback-save-the-number-of-msi-x-entries-to-be-copied-later.patch
ser_gigaset-remove-unnecessary-kfree-calls-from-release-method.patch
ser_gigaset-use-container_of-instead-of-detour.patch
net-core-revert-net-fix-__netdev_update_features-return.-and-add-comment.patch
+mm-remove-gup_flags-foll_write-games-from-__get_user_pages.patch
+net-fix-use-after-free-in-the-recvmmsg-exit-path.patch