aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoropeneuler-ci-bot <80474298@qq.com>2022-09-21 02:06:41 +0000
committerGitee <noreply@gitee.com>2022-09-21 02:06:41 +0000
commiteb51ccd7a347cc14920bbb6851aaf8d1d7409470 (patch)
tree94dab592a9dc76e6c04c0e5dfcfdf880a5cb0ada
parente23d90254dad36ec0ed38a26be09d95938f92c69 (diff)
parent9846ae4c121a47845a963f23f29ac026c6c5f684 (diff)
downloadopenEuler-kernel-eb51ccd7a347cc14920bbb6851aaf8d1d7409470.tar.gz
!114 Add page table check for openEuler-22.09
Merge Pull Request from: @zzmine This patchset incorporates the page table check functionality supported in the linux community into openEuler 22.09. The patchset includes: 1. 核心功能及x86支持 d283d422c6c4 x86: mm: add x86_64 support for page table check df4e817b7108 mm: page table check 08d5b29eac7d mm: ptep_clear() page table helper 1eba86c096e3 mm: change page type prior to adding page table entry 2. bugfix补丁 80110bbfbba6 mm/page_table_check: check entries at pmd levels e59a47b8a453 mm/khugepaged: unify collapse pmd clear, flush and free 64d8b9e14512 mm/page_table_check: use unsigned long for page counters and cleanup fb5222aae64f mm/debug_vm_pgtable: remove pte entry from the page table 【5.10不涉及】 3. arm64支持 42b2547137f5 arm64/mm: enable ARCH_SUPPORTS_PAGE_TABLE_CHECK 2e7dc2b632a3 mm: remove __HAVE_ARCH_PTEP_CLEAR in pgtable.h de8c8e52836d mm: page_table_check: add hooks to public helpers e5a554014618 mm: page_table_check: move pxx_user_accessible_page into x86 92fb05242a1b mm: page_table_check: using PxD_SIZE instead of PxD_PAGE_SIZE 4. bugfix补丁 ed928a3402d8 arm64/mm: fix page table check compile error for CONFIG_PGTABLE_LEVELS=2 Intel Kernel Issue openEuler5.10内核支持页表检查功能(page table check) Test Build and boot kernel successfully. Build with PAGE_TABLE_CHECK=y and boot with page_table_check=on kernel parameter. Link:https://gitee.com/openeuler/kernel/pulls/114 Reviewed-by: Zheng Zengkai <zhengzengkai@huawei.com> Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com>
-rw-r--r--Documentation/vm/arch_pgtable_helpers.rst6
-rw-r--r--Documentation/vm/index.rst1
-rw-r--r--Documentation/vm/page_table_check.rst56
-rw-r--r--MAINTAINERS9
-rw-r--r--arch/Kconfig3
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/arm64/include/asm/pgtable.h61
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/pgtable.h36
-rw-r--r--include/linux/page_table_check.h162
-rw-r--r--include/linux/pgtable.h13
-rw-r--r--mm/Kconfig.debug24
-rw-r--r--mm/Makefile1
-rw-r--r--mm/debug_vm_pgtable.c2
-rw-r--r--mm/hugetlb.c4
-rw-r--r--mm/khugepaged.c48
-rw-r--r--mm/memory.c5
-rw-r--r--mm/migrate.c5
-rw-r--r--mm/page_alloc.c4
-rw-r--r--mm/page_ext.c4
-rw-r--r--mm/page_table_check.c248
-rw-r--r--mm/swapfile.c4
22 files changed, 653 insertions, 45 deletions
diff --git a/Documentation/vm/arch_pgtable_helpers.rst b/Documentation/vm/arch_pgtable_helpers.rst
index 552567d863b865..fbe06ec7537027 100644
--- a/Documentation/vm/arch_pgtable_helpers.rst
+++ b/Documentation/vm/arch_pgtable_helpers.rst
@@ -66,9 +66,11 @@ PTE Page Table Helpers
+---------------------------+--------------------------------------------------+
| pte_mknotpresent | Invalidates a mapped PTE |
+---------------------------+--------------------------------------------------+
-| ptep_get_and_clear | Clears a PTE |
+| ptep_clear | Clears a PTE |
+---------------------------+--------------------------------------------------+
-| ptep_get_and_clear_full | Clears a PTE |
+| ptep_get_and_clear | Clears and returns PTE |
++---------------------------+--------------------------------------------------+
+| ptep_get_and_clear_full | Clears and returns PTE (batched PTE unmap) |
+---------------------------+--------------------------------------------------+
| ptep_test_and_clear_young | Clears young from a PTE |
+---------------------------+--------------------------------------------------+
diff --git a/Documentation/vm/index.rst b/Documentation/vm/index.rst
index 6f5ffef4b716a9..43bb54d897d9ee 100644
--- a/Documentation/vm/index.rst
+++ b/Documentation/vm/index.rst
@@ -31,6 +31,7 @@ algorithms. If you are looking for advice on simply allocating memory, see the
page_migration
page_frags
page_owner
+ page_table_check
remap_file_pages
slub
split_page_table_lock
diff --git a/Documentation/vm/page_table_check.rst b/Documentation/vm/page_table_check.rst
new file mode 100644
index 00000000000000..81f521ff7ea707
--- /dev/null
+++ b/Documentation/vm/page_table_check.rst
@@ -0,0 +1,56 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. _page_table_check:
+
+================
+Page Table Check
+================
+
+Introduction
+============
+
+Page table check allows to hardern the kernel by ensuring that some types of
+the memory corruptions are prevented.
+
+Page table check performs extra verifications at the time when new pages become
+accessible from the userspace by getting their page table entries (PTEs PMDs
+etc.) added into the table.
+
+In case of detected corruption, the kernel is crashed. There is a small
+performance and memory overhead associated with the page table check. Therefore,
+it is disabled by default, but can be optionally enabled on systems where the
+extra hardening outweighs the performance costs. Also, because page table check
+is synchronous, it can help with debugging double map memory corruption issues,
+by crashing kernel at the time wrong mapping occurs instead of later which is
+often the case with memory corruptions bugs.
+
+Double mapping detection logic
+==============================
+
++-------------------+-------------------+-------------------+------------------+
+| Current Mapping | New mapping | Permissions | Rule |
++===================+===================+===================+==================+
+| Anonymous | Anonymous | Read | Allow |
++-------------------+-------------------+-------------------+------------------+
+| Anonymous | Anonymous | Read / Write | Prohibit |
++-------------------+-------------------+-------------------+------------------+
+| Anonymous | Named | Any | Prohibit |
++-------------------+-------------------+-------------------+------------------+
+| Named | Anonymous | Any | Prohibit |
++-------------------+-------------------+-------------------+------------------+
+| Named | Named | Any | Allow |
++-------------------+-------------------+-------------------+------------------+
+
+Enabling Page Table Check
+=========================
+
+Build kernel with:
+
+- PAGE_TABLE_CHECK=y
+ Note, it can only be enabled on platforms where ARCH_SUPPORTS_PAGE_TABLE_CHECK
+ is available.
+
+- Boot with 'page_table_check=on' kernel parameter.
+
+Optionally, build kernel with PAGE_TABLE_CHECK_ENFORCED in order to have page
+table support without extra kernel parameter.
diff --git a/MAINTAINERS b/MAINTAINERS
index a9ae6acec16cf9..e228dc4d2e2425 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13270,6 +13270,15 @@ F: include/net/page_pool.h
F: include/trace/events/page_pool.h
F: net/core/page_pool.c
+PAGE TABLE CHECK
+M: Pasha Tatashin <pasha.tatashin@soleen.com>
+M: Andrew Morton <akpm@linux-foundation.org>
+L: linux-mm@kvack.org
+S: Maintained
+F: Documentation/vm/page_table_check.rst
+F: include/linux/page_table_check.h
+F: mm/page_table_check.c
+
PANASONIC LAPTOP ACPI EXTRAS DRIVER
M: Harald Welte <laforge@gnumonks.org>
L: platform-driver-x86@vger.kernel.org
diff --git a/arch/Kconfig b/arch/Kconfig
index 47c626138d5b03..a271278c6fbc28 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1119,6 +1119,9 @@ config ARCH_WANT_LD_ORPHAN_WARN
by the linker, since the locations of such sections can change between linker
versions.
+config ARCH_SUPPORTS_PAGE_TABLE_CHECK
+ bool
+
config ARCH_SPLIT_ARG64
bool
help
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 8f47907b4033a5..0e09962f667c66 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -78,6 +78,7 @@ config ARM64
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG)
select ARCH_SUPPORTS_NUMA_BALANCING
+ select ARCH_SUPPORTS_PAGE_TABLE_CHECK
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
select ARCH_WANT_DEFAULT_BPF_JIT
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index ab2443900f4eae..e9ef7384989654 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -35,6 +35,7 @@
#include <linux/mmdebug.h>
#include <linux/mm_types.h>
#include <linux/sched.h>
+#include <linux/page_table_check.h>
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
@@ -98,6 +99,7 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
#define pte_young(pte) (!!(pte_val(pte) & PTE_AF))
#define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL))
#define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE))
+#define pte_user(pte) (!!(pte_val(pte) & PTE_USER))
#define pte_user_exec(pte) (!(pte_val(pte) & PTE_UXN))
#define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT))
#define pte_devmap(pte) (!!(pte_val(pte) & PTE_DEVMAP))
@@ -314,8 +316,8 @@ static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep,
__func__, pte_val(old_pte), pte_val(pte));
}
-static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte)
+static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte)
{
if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
__sync_icache_dcache(pte);
@@ -329,6 +331,13 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
set_pte(ptep, pte);
}
+static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte)
+{
+ page_table_check_pte_set(mm, addr, ptep, pte);
+ return __set_pte_at(mm, addr, ptep, pte);
+}
+
/*
* Huge pte definitions.
*/
@@ -424,6 +433,8 @@ static inline int pmd_trans_huge(pmd_t pmd)
#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd))
#define pmd_young(pmd) pte_young(pmd_pte(pmd))
#define pmd_valid(pmd) pte_valid(pmd_pte(pmd))
+#define pmd_user(pmd) pte_user(pmd_pte(pmd))
+#define pmd_user_exec(pmd) pte_user_exec(pmd_pte(pmd))
#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd)))
#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd)))
#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd)))
@@ -470,8 +481,19 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
#define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT)
#define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
-#define set_pmd_at(mm, addr, pmdp, pmd) set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd))
-#define set_pud_at(mm, addr, pudp, pud) set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud))
+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t pmd)
+{
+ page_table_check_pmd_set(mm, addr, pmdp, pmd);
+ return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd));
+}
+
+static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp, pud_t pud)
+{
+ page_table_check_pud_set(mm, addr, pudp, pud);
+ return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud));
+}
#define __p4d_to_phys(p4d) __pte_to_phys(p4d_pte(p4d))
#define __phys_to_p4d_val(phys) __phys_to_pte_val(phys)
@@ -609,6 +631,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
#define pud_present(pud) pte_present(pud_pte(pud))
#define pud_leaf(pud) (pud_present(pud) && !pud_table(pud))
#define pud_valid(pud) pte_valid(pud_pte(pud))
+#define pud_user(pud) pte_user(pud_pte(pud))
static inline void set_pud(pud_t *pudp, pud_t pud)
{
@@ -781,6 +804,23 @@ static inline int pgd_devmap(pgd_t pgd)
}
#endif
+#ifdef CONFIG_PAGE_TABLE_CHECK
+static inline bool pte_user_accessible_page(pte_t pte)
+{
+ return pte_present(pte) && (pte_user(pte) || pte_user_exec(pte));
+}
+
+static inline bool pmd_user_accessible_page(pmd_t pmd)
+{
+ return pmd_present(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd));
+}
+
+static inline bool pud_user_accessible_page(pud_t pud)
+{
+ return pud_present(pud) && pud_user(pud);
+}
+#endif
+
/*
* Atomic pte/pmd modifications.
*/
@@ -842,7 +882,11 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
unsigned long address, pte_t *ptep)
{
- return __pte(xchg_relaxed(&pte_val(*ptep), 0));
+ pte_t pte = __pte(xchg_relaxed(&pte_val(*ptep), 0));
+
+ page_table_check_pte_clear(mm, address, pte);
+
+ return pte;
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -850,7 +894,11 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
unsigned long address, pmd_t *pmdp)
{
- return pte_pmd(ptep_get_and_clear(mm, address, (pte_t *)pmdp));
+ pmd_t pmd = __pmd(xchg_relaxed(&pmd_val(*pmdp), 0));
+
+ page_table_check_pmd_clear(mm, address, pmd);
+
+ return pmd;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -884,6 +932,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp, pmd_t pmd)
{
+ page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd);
return __pmd(xchg_relaxed(&pmd_val(*pmdp), pmd_val(pmd)));
}
#endif
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b5ca6f54b32329..ba48644da7cddf 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -94,6 +94,7 @@ config X86
select ARCH_STACKWALK
select ARCH_SUPPORTS_ACPI
select ARCH_SUPPORTS_ATOMIC_RMW
+ select ARCH_SUPPORTS_PAGE_TABLE_CHECK if X86_64
select ARCH_SUPPORTS_NUMA_BALANCING if X86_64
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_QUEUED_RWLOCKS
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 788b3a0120dcd9..028d82290dbc01 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -26,6 +26,7 @@
#include <asm/pkru.h>
#include <asm/fpu/api.h>
#include <asm-generic/pgtable_uffd.h>
+#include <linux/page_table_check.h>
extern pgd_t early_top_pgt[PTRS_PER_PGD];
bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
@@ -1006,18 +1007,21 @@ static inline pud_t native_local_pudp_get_and_clear(pud_t *pudp)
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
+ page_table_check_pte_set(mm, addr, ptep, pte);
set_pte(ptep, pte);
}
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd)
{
+ page_table_check_pmd_set(mm, addr, pmdp, pmd);
set_pmd(pmdp, pmd);
}
static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
pud_t *pudp, pud_t pud)
{
+ page_table_check_pud_set(mm, addr, pudp, pud);
native_set_pud(pudp, pud);
}
@@ -1048,6 +1052,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
pte_t pte = native_ptep_get_and_clear(ptep);
+ page_table_check_pte_clear(mm, addr, pte);
return pte;
}
@@ -1063,6 +1068,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
* care about updates and native needs no locking
*/
pte = native_local_ptep_get_and_clear(ptep);
+ page_table_check_pte_clear(mm, addr, pte);
} else {
pte = ptep_get_and_clear(mm, addr, ptep);
}
@@ -1109,14 +1115,22 @@ static inline int pmd_write(pmd_t pmd)
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp)
{
- return native_pmdp_get_and_clear(pmdp);
+ pmd_t pmd = native_pmdp_get_and_clear(pmdp);
+
+ page_table_check_pmd_clear(mm, addr, pmd);
+
+ return pmd;
}
#define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
unsigned long addr, pud_t *pudp)
{
- return native_pudp_get_and_clear(pudp);
+ pud_t pud = native_pudp_get_and_clear(pudp);
+
+ page_table_check_pud_clear(mm, addr, pud);
+
+ return pud;
}
#define __HAVE_ARCH_PMDP_SET_WRPROTECT
@@ -1137,6 +1151,7 @@ static inline int pud_write(pud_t pud)
static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp, pmd_t pmd)
{
+ page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd);
if (IS_ENABLED(CONFIG_SMP)) {
return xchg(pmdp, pmd);
} else {
@@ -1403,6 +1418,23 @@ static inline bool arch_faults_on_old_pte(void)
return false;
}
+#ifdef CONFIG_PAGE_TABLE_CHECK
+static inline bool pte_user_accessible_page(pte_t pte)
+{
+ return (pte_val(pte) & _PAGE_PRESENT) && (pte_val(pte) & _PAGE_USER);
+}
+
+static inline bool pmd_user_accessible_page(pmd_t pmd)
+{
+ return pmd_leaf(pmd) && (pmd_val(pmd) & _PAGE_PRESENT) && (pmd_val(pmd) & _PAGE_USER);
+}
+
+static inline bool pud_user_accessible_page(pud_t pud)
+{
+ return pud_leaf(pud) && (pud_val(pud) & _PAGE_PRESENT) && (pud_val(pud) & _PAGE_USER);
+}
+#endif
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_PGTABLE_H */
diff --git a/include/linux/page_table_check.h b/include/linux/page_table_check.h
new file mode 100644
index 00000000000000..e387791a04a6fc
--- /dev/null
+++ b/include/linux/page_table_check.h
@@ -0,0 +1,162 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __LINUX_PAGE_TABLE_CHECK_H
+#define __LINUX_PAGE_TABLE_CHECK_H
+
+#ifdef CONFIG_PAGE_TABLE_CHECK
+#include <linux/jump_label.h>
+
+extern struct static_key_true page_table_check_disabled;
+extern struct page_ext_operations page_table_check_ops;
+
+void __page_table_check_zero(struct page *page, unsigned int order);
+void __page_table_check_pte_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t pte);
+void __page_table_check_pmd_clear(struct mm_struct *mm, unsigned long addr,
+ pmd_t pmd);
+void __page_table_check_pud_clear(struct mm_struct *mm, unsigned long addr,
+ pud_t pud);
+void __page_table_check_pte_set(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte);
+void __page_table_check_pmd_set(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t pmd);
+void __page_table_check_pud_set(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp, pud_t pud);
+void __page_table_check_pte_clear_range(struct mm_struct *mm,
+ unsigned long addr,
+ pmd_t pmd);
+
+static inline void page_table_check_alloc(struct page *page, unsigned int order)
+{
+ if (static_branch_likely(&page_table_check_disabled))
+ return;
+
+ __page_table_check_zero(page, order);
+}
+
+static inline void page_table_check_free(struct page *page, unsigned int order)
+{
+ if (static_branch_likely(&page_table_check_disabled))
+ return;
+
+ __page_table_check_zero(page, order);
+}
+
+static inline void page_table_check_pte_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t pte)
+{
+ if (static_branch_likely(&page_table_check_disabled))
+ return;
+
+ __page_table_check_pte_clear(mm, addr, pte);
+}
+
+static inline void page_table_check_pmd_clear(struct mm_struct *mm,
+ unsigned long addr, pmd_t pmd)
+{
+ if (static_branch_likely(&page_table_check_disabled))
+ return;
+
+ __page_table_check_pmd_clear(mm, addr, pmd);
+}
+
+static inline void page_table_check_pud_clear(struct mm_struct *mm,
+ unsigned long addr, pud_t pud)
+{
+ if (static_branch_likely(&page_table_check_disabled))
+ return;
+
+ __page_table_check_pud_clear(mm, addr, pud);
+}
+
+static inline void page_table_check_pte_set(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep,
+ pte_t pte)
+{
+ if (static_branch_likely(&page_table_check_disabled))
+ return;
+
+ __page_table_check_pte_set(mm, addr, ptep, pte);
+}
+
+static inline void page_table_check_pmd_set(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp,
+ pmd_t pmd)
+{
+ if (static_branch_likely(&page_table_check_disabled))
+ return;
+
+ __page_table_check_pmd_set(mm, addr, pmdp, pmd);
+}
+
+static inline void page_table_check_pud_set(struct mm_struct *mm,
+ unsigned long addr, pud_t *pudp,
+ pud_t pud)
+{
+ if (static_branch_likely(&page_table_check_disabled))
+ return;
+
+ __page_table_check_pud_set(mm, addr, pudp, pud);
+}
+
+static inline void page_table_check_pte_clear_range(struct mm_struct *mm,
+ unsigned long addr,
+ pmd_t pmd)
+{
+ if (static_branch_likely(&page_table_check_disabled))
+ return;
+
+ __page_table_check_pte_clear_range(mm, addr, pmd);
+}
+
+#else
+
+static inline void page_table_check_alloc(struct page *page, unsigned int order)
+{
+}
+
+static inline void page_table_check_free(struct page *page, unsigned int order)
+{
+}
+
+static inline void page_table_check_pte_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t pte)
+{
+}
+
+static inline void page_table_check_pmd_clear(struct mm_struct *mm,
+ unsigned long addr, pmd_t pmd)
+{
+}
+
+static inline void page_table_check_pud_clear(struct mm_struct *mm,
+ unsigned long addr, pud_t pud)
+{
+}
+
+static inline void page_table_check_pte_set(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep,
+ pte_t pte)
+{
+}
+
+static inline void page_table_check_pmd_set(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp,
+ pmd_t pmd)
+{
+}
+
+static inline void page_table_check_pud_set(struct mm_struct *mm,
+ unsigned long addr, pud_t *pudp,
+ pud_t pud)
+{
+}
+
+static inline void page_table_check_pte_clear_range(struct mm_struct *mm,
+ unsigned long addr,
+ pmd_t pmd)
+{
+}
+
+#endif /* CONFIG_PAGE_TABLE_CHECK */
+#endif /* __LINUX_PAGE_TABLE_CHECK_H */
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index f924468d84ec41..84b92ff884c150 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -12,6 +12,7 @@
#include <linux/bug.h>
#include <linux/errno.h>
#include <asm-generic/pgtable_uffd.h>
+#include <linux/page_table_check.h>
#if 5 - defined(__PAGETABLE_P4D_FOLDED) - defined(__PAGETABLE_PUD_FOLDED) - \
defined(__PAGETABLE_PMD_FOLDED) != CONFIG_PGTABLE_LEVELS
@@ -248,10 +249,17 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
{
pte_t pte = *ptep;
pte_clear(mm, address, ptep);
+ page_table_check_pte_clear(mm, address, pte);
return pte;
}
#endif
+static inline void ptep_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
+{
+ ptep_get_and_clear(mm, addr, ptep);
+}
+
#ifndef __HAVE_ARCH_PTEP_GET
static inline pte_t ptep_get(pte_t *ptep)
{
@@ -266,7 +274,10 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
pmd_t *pmdp)
{
pmd_t pmd = *pmdp;
+
pmd_clear(pmdp);
+ page_table_check_pmd_clear(mm, address, pmd);
+
return pmd;
}
#endif /* __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR */
@@ -278,6 +289,8 @@ static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
pud_t pud = *pudp;
pud_clear(pudp);
+ page_table_check_pud_clear(mm, address, pud);
+
return pud;
}
#endif /* __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR */
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 864f129f193704..b04d3a4ddfcef5 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -62,6 +62,30 @@ config PAGE_OWNER
If unsure, say N.
+config PAGE_TABLE_CHECK
+ bool "Check for invalid mappings in user page tables"
+ depends on ARCH_SUPPORTS_PAGE_TABLE_CHECK
+ select PAGE_EXTENSION
+ help
+ Check that anonymous page is not being mapped twice with read write
+ permissions. Check that anonymous and file pages are not being
+ erroneously shared. Since the checking is performed at the time
+ entries are added and removed to user page tables, leaking, corruption
+ and double mapping problems are detected synchronously.
+
+ If unsure say "n".
+
+config PAGE_TABLE_CHECK_ENFORCED
+ bool "Enforce the page table checking by default"
+ depends on PAGE_TABLE_CHECK
+ help
+ Always enable page table checking. By default the page table checking
+ is disabled, and can be optionally enabled via page_table_check=on
+ kernel parameter. This config enforces that page table check is always
+ enabled.
+
+ If unsure say "n".
+
config PAGE_POISONING
bool "Poison pages after freeing"
select PAGE_POISONING_NO_SANITY if HIBERNATION
diff --git a/mm/Makefile b/mm/Makefile
index aad7866abe8cc3..366d9f62bfea42 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -111,6 +111,7 @@ obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o
obj-$(CONFIG_CMA) += cma.o
obj-$(CONFIG_MEMORY_BALLOON) += balloon_compaction.o
obj-$(CONFIG_PAGE_EXTENSION) += page_ext.o
+obj-$(CONFIG_PAGE_TABLE_CHECK) += page_table_check.o
obj-$(CONFIG_CMA_DEBUGFS) += cma_debug.o
obj-$(CONFIG_USERFAULTFD) += userfaultfd.o
obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 11d3b46ba18704..63a438791e7b3d 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -614,7 +614,7 @@ static void __init pte_clear_tests(struct mm_struct *mm, pte_t *ptep,
#endif
set_pte_at(mm, vaddr, ptep, pte);
barrier();
- pte_clear(mm, vaddr, ptep);
+ ptep_clear(mm, vaddr, ptep);
pte = ptep_get(ptep);
WARN_ON(!pte_none(pte));
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 1985d8a7a57d29..ad807d65d8154e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4693,10 +4693,10 @@ retry_avoidcopy:
/* Break COW */
huge_ptep_clear_flush(vma, haddr, ptep);
mmu_notifier_invalidate_range(mm, range.start, range.end);
- set_huge_pte_at(mm, haddr, ptep,
- make_huge_pte(vma, new_page, 1));
page_remove_rmap(old_page, true);
hugepage_add_new_anon_rmap(new_page, vma, haddr);
+ set_huge_pte_at(mm, haddr, ptep,
+ make_huge_pte(vma, new_page, 1));
SetHPageMigratable(new_page);
/* Make the old page be freed below */
new_page = old_page;
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index aaef16aa8945cb..faeae65e4b1a30 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -16,6 +16,7 @@
#include <linux/hashtable.h>
#include <linux/userfaultfd_k.h>
#include <linux/page_idle.h>
+#include <linux/page_table_check.h>
#include <linux/swapops.h>
#include <linux/shmem_fs.h>
@@ -754,11 +755,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
* ptl mostly unnecessary.
*/
spin_lock(ptl);
- /*
- * paravirt calls inside pte_clear here are
- * superfluous.
- */
- pte_clear(vma->vm_mm, address, _pte);
+ ptep_clear(vma->vm_mm, address, _pte);
spin_unlock(ptl);
}
} else {
@@ -772,11 +769,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
* inside page_remove_rmap().
*/
spin_lock(ptl);
- /*
- * paravirt calls inside pte_clear here are
- * superfluous.
- */
- pte_clear(vma->vm_mm, address, _pte);
+ ptep_clear(vma->vm_mm, address, _pte);
reliable_page_counter(src_page, vma->vm_mm, -1);
page_remove_rmap(src_page, false);
spin_unlock(ptl);
@@ -1435,6 +1428,21 @@ static int khugepaged_add_pte_mapped_thp(struct mm_struct *mm,
return 0;
}
+static void collapse_and_free_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmdp)
+{
+ spinlock_t *ptl;
+ pmd_t pmd;
+
+ mmap_assert_write_locked(mm);
+ ptl = pmd_lock(vma->vm_mm, pmdp);
+ pmd = pmdp_collapse_flush(vma, addr, pmdp);
+ spin_unlock(ptl);
+ mm_dec_nr_ptes(mm);
+ page_table_check_pte_clear_range(mm, addr, pmd);
+ pte_free(mm, pmd_pgtable(pmd));
+}
+
/**
* Try to collapse a pte-mapped THP for mm at address haddr.
*
@@ -1448,7 +1456,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
struct vm_area_struct *vma = find_vma(mm, haddr);
struct page *hpage;
pte_t *start_pte, *pte;
- pmd_t *pmd, _pmd;
+ pmd_t *pmd;
spinlock_t *ptl;
int count = 0;
int i;
@@ -1525,11 +1533,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
}
/* step 4: collapse pmd */
- ptl = pmd_lock(vma->vm_mm, pmd);
- _pmd = pmdp_collapse_flush(vma, haddr, pmd);
- spin_unlock(ptl);
- mm_dec_nr_ptes(mm);
- pte_free(mm, pmd_pgtable(_pmd));
+ collapse_and_free_pmd(mm, vma, haddr, pmd);
drop_hpage:
unlock_page(hpage);
@@ -1569,7 +1573,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
struct vm_area_struct *vma;
struct mm_struct *mm;
unsigned long addr;
- pmd_t *pmd, _pmd;
+ pmd_t *pmd;
i_mmap_lock_write(mapping);
vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
@@ -1608,14 +1612,8 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
* reverse order. Trylock is a way to avoid deadlock.
*/
if (mmap_write_trylock(mm)) {
- if (!khugepaged_test_exit(mm)) {
- spinlock_t *ptl = pmd_lock(mm, pmd);
- /* assume page table is clear */
- _pmd = pmdp_collapse_flush(vma, addr, pmd);
- spin_unlock(ptl);
- mm_dec_nr_ptes(mm);
- pte_free(mm, pmd_pgtable(_pmd));
- }
+ if (!khugepaged_test_exit(mm))
+ collapse_and_free_pmd(mm, vma, addr, pmd);
mmap_write_unlock(mm);
} else {
/* Try again later */
diff --git a/mm/memory.c b/mm/memory.c
index f1d28ad88944ed..133deca6742924 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3537,8 +3537,6 @@ skip_uswap:
pte = pte_mkuffd_wp(pte);
pte = pte_wrprotect(pte);
}
- set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
- arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte);
vmf->orig_pte = pte;
/* ksm created a completely new copy */
@@ -3549,6 +3547,9 @@ skip_uswap:
do_page_add_anon_rmap(page, vma, vmf->address, exclusive);
}
+ set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
+ arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte);
+
swap_free(entry);
if (mem_cgroup_swap_full(page) ||
(vma->vm_flags & VM_LOCKED) || PageMlocked(page))
diff --git a/mm/migrate.c b/mm/migrate.c
index 1f78410a10635a..ba6cb49e1ab467 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -259,21 +259,20 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
if (PageHuge(new)) {
pte = pte_mkhuge(pte);
pte = arch_make_huge_pte(pte, vma, new, 0);
- set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
if (PageAnon(new))
hugepage_add_anon_rmap(new, vma, pvmw.address);
else
page_dup_rmap(new, true);
+ set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
} else
#endif
{
- set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
-
reliable_page_counter(new, vma->vm_mm, 1);
if (PageAnon(new))
page_add_anon_rmap(new, vma, pvmw.address, false);
else
page_add_file_rmap(new, false);
+ set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
}
if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new))
mlock_vma_page(new);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 24116b7828f525..60b2351ede77d5 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -62,6 +62,7 @@
#include <linux/sched/rt.h>
#include <linux/sched/mm.h>
#include <linux/page_owner.h>
+#include <linux/page_table_check.h>
#include <linux/kthread.h>
#include <linux/memcontrol.h>
#include <linux/ftrace.h>
@@ -1221,6 +1222,7 @@ __always_inline bool free_pages_prepare(struct page *page,
if (memcg_kmem_enabled() && PageMemcgKmem(page))
__memcg_kmem_uncharge_page(page, order);
reset_page_owner(page, order);
+ page_table_check_free(page, order);
return false;
}
@@ -1258,6 +1260,7 @@ __always_inline bool free_pages_prepare(struct page *page,
page_cpupid_reset_last(page);
page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
reset_page_owner(page, order);
+ page_table_check_free(page, order);
if (!PageHighMem(page)) {
debug_check_no_locks_freed(page_address(page),
@@ -2282,6 +2285,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
kasan_alloc_pages(page, order);
kernel_poison_pages(page, 1 << order, 1);
set_page_owner(page, order, gfp_flags);
+ page_table_check_alloc(page, order);
}
void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
diff --git a/mm/page_ext.c b/mm/page_ext.c
index 8e59da0f4367ac..d541923a24063e 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -8,6 +8,7 @@
#include <linux/kmemleak.h>
#include <linux/page_owner.h>
#include <linux/page_idle.h>
+#include <linux/page_table_check.h>
/*
* struct page extension
@@ -75,6 +76,9 @@ static struct page_ext_operations *page_ext_ops[] = {
#if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT)
&page_idle_ops,
#endif
+#ifdef CONFIG_PAGE_TABLE_CHECK
+ &page_table_check_ops,
+#endif
};
unsigned long page_ext_size = sizeof(struct page_ext);
diff --git a/mm/page_table_check.c b/mm/page_table_check.c
new file mode 100644
index 00000000000000..d2d3b8c2e3e936
--- /dev/null
+++ b/mm/page_table_check.c
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/mm.h>
+#include <linux/page_table_check.h>
+
+#undef pr_fmt
+#define pr_fmt(fmt) "page_table_check: " fmt
+
+struct page_table_check {
+ atomic_t anon_map_count;
+ atomic_t file_map_count;
+};
+
+static bool __page_table_check_enabled __initdata =
+ IS_ENABLED(CONFIG_PAGE_TABLE_CHECK_ENFORCED);
+
+DEFINE_STATIC_KEY_TRUE(page_table_check_disabled);
+EXPORT_SYMBOL(page_table_check_disabled);
+
+static int __init early_page_table_check_param(char *buf)
+{
+ if (!buf)
+ return -EINVAL;
+
+ if (strcmp(buf, "on") == 0)
+ __page_table_check_enabled = true;
+ else if (strcmp(buf, "off") == 0)
+ __page_table_check_enabled = false;
+
+ return 0;
+}
+
+early_param("page_table_check", early_page_table_check_param);
+
+static bool __init need_page_table_check(void)
+{
+ return __page_table_check_enabled;
+}
+
+static void __init init_page_table_check(void)
+{
+ if (!__page_table_check_enabled)
+ return;
+ static_branch_disable(&page_table_check_disabled);
+}
+
+struct page_ext_operations page_table_check_ops = {
+ .size = sizeof(struct page_table_check),
+ .need = need_page_table_check,
+ .init = init_page_table_check,
+};
+
+static struct page_table_check *get_page_table_check(struct page_ext *page_ext)
+{
+ BUG_ON(!page_ext);
+ return (void *)(page_ext) + page_table_check_ops.offset;
+}
+
+/*
+ * An enty is removed from the page table, decrement the counters for that page
+ * verify that it is of correct type and counters do not become negative.
+ */
+static void page_table_check_clear(struct mm_struct *mm, unsigned long addr,
+ unsigned long pfn, unsigned long pgcnt)
+{
+ struct page_ext *page_ext;
+ struct page *page;
+ unsigned long i;
+ bool anon;
+
+ if (!pfn_valid(pfn))
+ return;
+
+ page = pfn_to_page(pfn);
+ page_ext = lookup_page_ext(page);
+ anon = PageAnon(page);
+
+ for (i = 0; i < pgcnt; i++) {
+ struct page_table_check *ptc = get_page_table_check(page_ext);
+
+ if (anon) {
+ BUG_ON(atomic_read(&ptc->file_map_count));
+ BUG_ON(atomic_dec_return(&ptc->anon_map_count) < 0);
+ } else {
+ BUG_ON(atomic_read(&ptc->anon_map_count));
+ BUG_ON(atomic_dec_return(&ptc->file_map_count) < 0);
+ }
+ page_ext = page_ext_next(page_ext);
+ }
+}
+
+/*
+ * A new enty is added to the page table, increment the counters for that page
+ * verify that it is of correct type and is not being mapped with a different
+ * type to a different process.
+ */
+static void page_table_check_set(struct mm_struct *mm, unsigned long addr,
+ unsigned long pfn, unsigned long pgcnt,
+ bool rw)
+{
+ struct page_ext *page_ext;
+ struct page *page;
+ unsigned long i;
+ bool anon;
+
+ if (!pfn_valid(pfn))
+ return;
+
+ page = pfn_to_page(pfn);
+ page_ext = lookup_page_ext(page);
+ anon = PageAnon(page);
+
+ for (i = 0; i < pgcnt; i++) {
+ struct page_table_check *ptc = get_page_table_check(page_ext);
+
+ if (anon) {
+ BUG_ON(atomic_read(&ptc->file_map_count));
+ BUG_ON(atomic_inc_return(&ptc->anon_map_count) > 1 && rw);
+ } else {
+ BUG_ON(atomic_read(&ptc->anon_map_count));
+ BUG_ON(atomic_inc_return(&ptc->file_map_count) < 0);
+ }
+ page_ext = page_ext_next(page_ext);
+ }
+}
+
+/*
+ * page is on free list, or is being allocated, verify that counters are zeroes
+ * crash if they are not.
+ */
+void __page_table_check_zero(struct page *page, unsigned int order)
+{
+ struct page_ext *page_ext = lookup_page_ext(page);
+ unsigned long i;
+
+ BUG_ON(!page_ext);
+ for (i = 0; i < (1ul << order); i++) {
+ struct page_table_check *ptc = get_page_table_check(page_ext);
+
+ BUG_ON(atomic_read(&ptc->anon_map_count));
+ BUG_ON(atomic_read(&ptc->file_map_count));
+ page_ext = page_ext_next(page_ext);
+ }
+}
+
+void __page_table_check_pte_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t pte)
+{
+ if (&init_mm == mm)
+ return;
+
+ if (pte_user_accessible_page(pte)) {
+ page_table_check_clear(mm, addr, pte_pfn(pte),
+ PAGE_SIZE >> PAGE_SHIFT);
+ }
+}
+EXPORT_SYMBOL(__page_table_check_pte_clear);
+
+void __page_table_check_pmd_clear(struct mm_struct *mm, unsigned long addr,
+ pmd_t pmd)
+{
+ if (&init_mm == mm)
+ return;
+
+ if (pmd_user_accessible_page(pmd)) {
+ page_table_check_clear(mm, addr, pmd_pfn(pmd),
+ PMD_SIZE >> PAGE_SHIFT);
+ }
+}
+EXPORT_SYMBOL(__page_table_check_pmd_clear);
+
+void __page_table_check_pud_clear(struct mm_struct *mm, unsigned long addr,
+ pud_t pud)
+{
+ if (&init_mm == mm)
+ return;
+
+ if (pud_user_accessible_page(pud)) {
+ page_table_check_clear(mm, addr, pud_pfn(pud),
+ PUD_SIZE >> PAGE_SHIFT);
+ }
+}
+EXPORT_SYMBOL(__page_table_check_pud_clear);
+
+void __page_table_check_pte_set(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte)
+{
+ if (&init_mm == mm)
+ return;
+
+ __page_table_check_pte_clear(mm, addr, *ptep);
+ if (pte_user_accessible_page(pte)) {
+ page_table_check_set(mm, addr, pte_pfn(pte),
+ PAGE_SIZE >> PAGE_SHIFT,
+ pte_write(pte));
+ }
+}
+EXPORT_SYMBOL(__page_table_check_pte_set);
+
+void __page_table_check_pmd_set(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t pmd)
+{
+ if (&init_mm == mm)
+ return;
+
+ __page_table_check_pmd_clear(mm, addr, *pmdp);
+ if (pmd_user_accessible_page(pmd)) {
+ page_table_check_set(mm, addr, pmd_pfn(pmd),
+ PMD_SIZE >> PAGE_SHIFT,
+ pmd_write(pmd));
+ }
+}
+EXPORT_SYMBOL(__page_table_check_pmd_set);
+
+void __page_table_check_pud_set(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp, pud_t pud)
+{
+ if (&init_mm == mm)
+ return;
+
+ __page_table_check_pud_clear(mm, addr, *pudp);
+ if (pud_user_accessible_page(pud)) {
+ page_table_check_set(mm, addr, pud_pfn(pud),
+ PUD_SIZE >> PAGE_SHIFT,
+ pud_write(pud));
+ }
+}
+EXPORT_SYMBOL(__page_table_check_pud_set);
+
+void __page_table_check_pte_clear_range(struct mm_struct *mm,
+ unsigned long addr,
+ pmd_t pmd)
+{
+ if (&init_mm == mm)
+ return;
+
+ if (!pmd_bad(pmd) && !pmd_leaf(pmd)) {
+ pte_t *ptep = pte_offset_map(&pmd, addr);
+ unsigned long i;
+
+ pte_unmap(ptep);
+ for (i = 0; i < PTRS_PER_PTE; i++) {
+ __page_table_check_pte_clear(mm, addr, *ptep);
+ addr += PAGE_SIZE;
+ ptep++;
+ }
+ }
+}
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 7faa30f460e40c..fdeb2c4e6ae672 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1933,8 +1933,6 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
inc_mm_counter(vma->vm_mm, MM_ANONPAGES);
get_page(page);
- set_pte_at(vma->vm_mm, addr, pte,
- pte_mkold(mk_pte(page, vma->vm_page_prot)));
reliable_page_counter(page, vma->vm_mm, 1);
if (page == swapcache) {
@@ -1943,6 +1941,8 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
page_add_new_anon_rmap(page, vma, addr, false);
lru_cache_add_inactive_or_unevictable(page, vma);
}
+ set_pte_at(vma->vm_mm, addr, pte,
+ pte_mkold(mk_pte(page, vma->vm_page_prot)));
swap_free(entry);
out:
pte_unmap_unlock(pte, ptl);