aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorClaudio Imbrenda <imbrenda@linux.ibm.com>2020-03-12 15:29:05 +1100
committerChristian Borntraeger <borntraeger@de.ibm.com>2020-03-12 08:19:51 -0400
commitcb89195caf34ddf14dc688ae062bbed4f236230f (patch)
treef8e84441ce23e87b829cd29f0b4f6f188280d338
parent79f83c54416e741da744f66396b2f813f6a808c5 (diff)
downloadlinux-53_pv.tar.gz
mm/gup/writeback: add callbacks for inaccessible pages53_pv
With the introduction of protected KVM guests on s390 there is now a concept of inaccessible pages. These pages need to be made accessible before the host can access them. While cpu accesses will trigger a fault that can be resolved, I/O accesses will just fail. We need to add a callback into architecture code for places that will do I/O, namely when writeback is started or when a page reference is taken. This is not only to enable paging, file backing etc, it is also necessary to protect the host against a malicious user space. For example a bad QEMU could simply start direct I/O on such protected memory. We do not want userspace to be able to trigger I/O errors and thus the logic is "whenever somebody accesses that page (gup) or does I/O, make sure that this page can be accessed". When the guest tries to access that page we will wait in the page fault handler for writeback to have finished and for the page_ref to be the expected value. On s390x the function is not supposed to fail, so it is ok to use a WARN_ON on failure. If we ever need some more finegrained handling we can tackle this when we know the details. Link: http://lkml.kernel.org/r/20200306132537.783769-3-imbrenda@linux.ibm.com Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com> Acked-by: Will Deacon <will@kernel.org> Reviewed-by: David Hildenbrand <david@redhat.com> Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com> Reviewed-by: John Hubbard <jhubbard@nvidia.com> Cc: Jan Kara <jack@suse.cz> Cc: Matthew Wilcox <willy@infradead.org> Cc: Ira Weiny <ira.weiny@intel.com> Cc: Jérôme Glisse <jglisse@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Christoph Hellwig <hch@infradead.org> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Dave Chinner <david@fromorbit.com> Cc: Jason Gunthorpe <jgg@ziepe.ca> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Michal Hocko <mhocko@suse.com> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Shuah Khan <shuah@kernel.org> Cc: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au> [borntraeger@de.ibm.com: backport] Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
-rw-r--r--include/linux/gfp.h6
-rw-r--r--mm/gup.c27
-rw-r--r--mm/page-writeback.c9
3 files changed, 38 insertions, 4 deletions
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index ff1c96b8ae927c..047d40b26ffe0d 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -485,6 +485,12 @@ static inline void arch_free_page(struct page *page, int order) { }
#ifndef HAVE_ARCH_ALLOC_PAGE
static inline void arch_alloc_page(struct page *page, int order) { }
#endif
+#ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE
+static inline int arch_make_page_accessible(struct page *page)
+{
+ return 0;
+}
+#endif
struct page *
__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
diff --git a/mm/gup.c b/mm/gup.c
index 98f13ab37bacc1..7dc19e30360e70 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -193,6 +193,7 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
struct page *page;
spinlock_t *ptl;
pte_t *ptep, pte;
+ int ret;
retry:
if (unlikely(pmd_bad(*pmd)))
@@ -246,8 +247,6 @@ retry:
if (is_zero_pfn(pte_pfn(pte))) {
page = pte_page(pte);
} else {
- int ret;
-
ret = follow_pfn_pte(vma, address, ptep, flags);
page = ERR_PTR(ret);
goto out;
@@ -255,7 +254,6 @@ retry:
}
if (flags & FOLL_SPLIT && PageTransCompound(page)) {
- int ret;
get_page(page);
pte_unmap_unlock(ptep, ptl);
lock_page(page);
@@ -273,6 +271,18 @@ retry:
goto out;
}
}
+ /*
+ * We need to make the page accessible if and only if we are going
+ * to access its content (the FOLL_GET case).
+ */
+ if (flags & FOLL_GET) {
+ ret = arch_make_page_accessible(page);
+ if (ret) {
+ put_page(page);
+ page = ERR_PTR(ret);
+ goto out;
+ }
+ }
if (flags & FOLL_TOUCH) {
if ((flags & FOLL_WRITE) &&
!pte_dirty(pte) && !PageDirty(page))
@@ -1863,6 +1873,17 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
VM_BUG_ON_PAGE(compound_head(page) != head, page);
+ /*
+ * We need to make the page accessible if and only if we are
+ * going to access its content (the FOLL_GET case).
+ */
+ if (flags & FOLL_GET) {
+ ret = arch_make_page_accessible(page);
+ if (ret) {
+ put_page(page);
+ goto pte_unmap;
+ }
+ }
SetPageReferenced(page);
pages[*nr] = page;
(*nr)++;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 1804f64ff43c1c..88c775bbad61c8 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2760,7 +2760,7 @@ int test_clear_page_writeback(struct page *page)
int __test_set_page_writeback(struct page *page, bool keep_write)
{
struct address_space *mapping = page_mapping(page);
- int ret;
+ int ret, access_ret;
lock_page_memcg(page);
if (mapping && mapping_use_writeback_tags(mapping)) {
@@ -2803,6 +2803,13 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
}
unlock_page_memcg(page);
+ access_ret = arch_make_page_accessible(page);
+ /*
+ * If writeback has been triggered on a page that cannot be made
+ * accessible, it is too late to recover here.
+ */
+ VM_BUG_ON_PAGE(access_ret != 0, page);
+
return ret;
}