diff options
author | openeuler-ci-bot <80474298@qq.com> | 2022-09-21 02:06:41 +0000 |
---|---|---|
committer | Gitee <noreply@gitee.com> | 2022-09-21 02:06:41 +0000 |
commit | eb51ccd7a347cc14920bbb6851aaf8d1d7409470 (patch) | |
tree | 94dab592a9dc76e6c04c0e5dfcfdf880a5cb0ada | |
parent | e23d90254dad36ec0ed38a26be09d95938f92c69 (diff) | |
parent | 9846ae4c121a47845a963f23f29ac026c6c5f684 (diff) | |
download | openEuler-kernel-eb51ccd7a347cc14920bbb6851aaf8d1d7409470.tar.gz |
!114 Add page table check for openEuler-22.09
Merge Pull Request from: @zzmine
This patchset incorporates the page table check functionality supported in the linux community into openEuler 22.09.
The patchset includes:
1. 核心功能及x86支持
d283d422c6c4 x86: mm: add x86_64 support for page table check
df4e817b7108 mm: page table check
08d5b29eac7d mm: ptep_clear() page table helper
1eba86c096e3 mm: change page type prior to adding page table entry
2. bugfix补丁
80110bbfbba6 mm/page_table_check: check entries at pmd levels
e59a47b8a453 mm/khugepaged: unify collapse pmd clear, flush and free
64d8b9e14512 mm/page_table_check: use unsigned long for page counters
and cleanup
fb5222aae64f mm/debug_vm_pgtable: remove pte entry from the page
table 【5.10不涉及】
3. arm64支持
42b2547137f5 arm64/mm: enable ARCH_SUPPORTS_PAGE_TABLE_CHECK
2e7dc2b632a3 mm: remove __HAVE_ARCH_PTEP_CLEAR in pgtable.h
de8c8e52836d mm: page_table_check: add hooks to public helpers
e5a554014618 mm: page_table_check: move pxx_user_accessible_page into x86
92fb05242a1b mm: page_table_check: using PxD_SIZE instead of PxD_PAGE_SIZE
4. bugfix补丁
ed928a3402d8 arm64/mm: fix page table check compile error for
CONFIG_PGTABLE_LEVELS=2
Intel Kernel Issue
openEuler5.10内核支持页表检查功能(page table check)
Test
Build and boot kernel successfully.
Build with PAGE_TABLE_CHECK=y and boot with page_table_check=on kernel parameter.
Link:https://gitee.com/openeuler/kernel/pulls/114
Reviewed-by: Zheng Zengkai <zhengzengkai@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com>
-rw-r--r-- | Documentation/vm/arch_pgtable_helpers.rst | 6 | ||||
-rw-r--r-- | Documentation/vm/index.rst | 1 | ||||
-rw-r--r-- | Documentation/vm/page_table_check.rst | 56 | ||||
-rw-r--r-- | MAINTAINERS | 9 | ||||
-rw-r--r-- | arch/Kconfig | 3 | ||||
-rw-r--r-- | arch/arm64/Kconfig | 1 | ||||
-rw-r--r-- | arch/arm64/include/asm/pgtable.h | 61 | ||||
-rw-r--r-- | arch/x86/Kconfig | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable.h | 36 | ||||
-rw-r--r-- | include/linux/page_table_check.h | 162 | ||||
-rw-r--r-- | include/linux/pgtable.h | 13 | ||||
-rw-r--r-- | mm/Kconfig.debug | 24 | ||||
-rw-r--r-- | mm/Makefile | 1 | ||||
-rw-r--r-- | mm/debug_vm_pgtable.c | 2 | ||||
-rw-r--r-- | mm/hugetlb.c | 4 | ||||
-rw-r--r-- | mm/khugepaged.c | 48 | ||||
-rw-r--r-- | mm/memory.c | 5 | ||||
-rw-r--r-- | mm/migrate.c | 5 | ||||
-rw-r--r-- | mm/page_alloc.c | 4 | ||||
-rw-r--r-- | mm/page_ext.c | 4 | ||||
-rw-r--r-- | mm/page_table_check.c | 248 | ||||
-rw-r--r-- | mm/swapfile.c | 4 |
22 files changed, 653 insertions, 45 deletions
diff --git a/Documentation/vm/arch_pgtable_helpers.rst b/Documentation/vm/arch_pgtable_helpers.rst index 552567d863b865..fbe06ec7537027 100644 --- a/Documentation/vm/arch_pgtable_helpers.rst +++ b/Documentation/vm/arch_pgtable_helpers.rst @@ -66,9 +66,11 @@ PTE Page Table Helpers +---------------------------+--------------------------------------------------+ | pte_mknotpresent | Invalidates a mapped PTE | +---------------------------+--------------------------------------------------+ -| ptep_get_and_clear | Clears a PTE | +| ptep_clear | Clears a PTE | +---------------------------+--------------------------------------------------+ -| ptep_get_and_clear_full | Clears a PTE | +| ptep_get_and_clear | Clears and returns PTE | ++---------------------------+--------------------------------------------------+ +| ptep_get_and_clear_full | Clears and returns PTE (batched PTE unmap) | +---------------------------+--------------------------------------------------+ | ptep_test_and_clear_young | Clears young from a PTE | +---------------------------+--------------------------------------------------+ diff --git a/Documentation/vm/index.rst b/Documentation/vm/index.rst index 6f5ffef4b716a9..43bb54d897d9ee 100644 --- a/Documentation/vm/index.rst +++ b/Documentation/vm/index.rst @@ -31,6 +31,7 @@ algorithms. If you are looking for advice on simply allocating memory, see the page_migration page_frags page_owner + page_table_check remap_file_pages slub split_page_table_lock diff --git a/Documentation/vm/page_table_check.rst b/Documentation/vm/page_table_check.rst new file mode 100644 index 00000000000000..81f521ff7ea707 --- /dev/null +++ b/Documentation/vm/page_table_check.rst @@ -0,0 +1,56 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. _page_table_check: + +================ +Page Table Check +================ + +Introduction +============ + +Page table check allows to hardern the kernel by ensuring that some types of +the memory corruptions are prevented. + +Page table check performs extra verifications at the time when new pages become +accessible from the userspace by getting their page table entries (PTEs PMDs +etc.) added into the table. + +In case of detected corruption, the kernel is crashed. There is a small +performance and memory overhead associated with the page table check. Therefore, +it is disabled by default, but can be optionally enabled on systems where the +extra hardening outweighs the performance costs. Also, because page table check +is synchronous, it can help with debugging double map memory corruption issues, +by crashing kernel at the time wrong mapping occurs instead of later which is +often the case with memory corruptions bugs. + +Double mapping detection logic +============================== + ++-------------------+-------------------+-------------------+------------------+ +| Current Mapping | New mapping | Permissions | Rule | ++===================+===================+===================+==================+ +| Anonymous | Anonymous | Read | Allow | ++-------------------+-------------------+-------------------+------------------+ +| Anonymous | Anonymous | Read / Write | Prohibit | ++-------------------+-------------------+-------------------+------------------+ +| Anonymous | Named | Any | Prohibit | ++-------------------+-------------------+-------------------+------------------+ +| Named | Anonymous | Any | Prohibit | ++-------------------+-------------------+-------------------+------------------+ +| Named | Named | Any | Allow | ++-------------------+-------------------+-------------------+------------------+ + +Enabling Page Table Check +========================= + +Build kernel with: + +- PAGE_TABLE_CHECK=y + Note, it can only be enabled on platforms where ARCH_SUPPORTS_PAGE_TABLE_CHECK + is available. + +- Boot with 'page_table_check=on' kernel parameter. + +Optionally, build kernel with PAGE_TABLE_CHECK_ENFORCED in order to have page +table support without extra kernel parameter. diff --git a/MAINTAINERS b/MAINTAINERS index a9ae6acec16cf9..e228dc4d2e2425 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13270,6 +13270,15 @@ F: include/net/page_pool.h F: include/trace/events/page_pool.h F: net/core/page_pool.c +PAGE TABLE CHECK +M: Pasha Tatashin <pasha.tatashin@soleen.com> +M: Andrew Morton <akpm@linux-foundation.org> +L: linux-mm@kvack.org +S: Maintained +F: Documentation/vm/page_table_check.rst +F: include/linux/page_table_check.h +F: mm/page_table_check.c + PANASONIC LAPTOP ACPI EXTRAS DRIVER M: Harald Welte <laforge@gnumonks.org> L: platform-driver-x86@vger.kernel.org diff --git a/arch/Kconfig b/arch/Kconfig index 47c626138d5b03..a271278c6fbc28 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1119,6 +1119,9 @@ config ARCH_WANT_LD_ORPHAN_WARN by the linker, since the locations of such sections can change between linker versions. +config ARCH_SUPPORTS_PAGE_TABLE_CHECK + bool + config ARCH_SPLIT_ARG64 bool help diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 8f47907b4033a5..0e09962f667c66 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -78,6 +78,7 @@ config ARM64 select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG) select ARCH_SUPPORTS_NUMA_BALANCING + select ARCH_SUPPORTS_PAGE_TABLE_CHECK select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT select ARCH_WANT_DEFAULT_BPF_JIT select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index ab2443900f4eae..e9ef7384989654 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -35,6 +35,7 @@ #include <linux/mmdebug.h> #include <linux/mm_types.h> #include <linux/sched.h> +#include <linux/page_table_check.h> #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE @@ -98,6 +99,7 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys) #define pte_young(pte) (!!(pte_val(pte) & PTE_AF)) #define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL)) #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) +#define pte_user(pte) (!!(pte_val(pte) & PTE_USER)) #define pte_user_exec(pte) (!(pte_val(pte) & PTE_UXN)) #define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT)) #define pte_devmap(pte) (!!(pte_val(pte) & PTE_DEVMAP)) @@ -314,8 +316,8 @@ static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep, __func__, pte_val(old_pte), pte_val(pte)); } -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) +static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) { if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte)) __sync_icache_dcache(pte); @@ -329,6 +331,13 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, set_pte(ptep, pte); } +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + page_table_check_pte_set(mm, addr, ptep, pte); + return __set_pte_at(mm, addr, ptep, pte); +} + /* * Huge pte definitions. */ @@ -424,6 +433,8 @@ static inline int pmd_trans_huge(pmd_t pmd) #define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) #define pmd_young(pmd) pte_young(pmd_pte(pmd)) #define pmd_valid(pmd) pte_valid(pmd_pte(pmd)) +#define pmd_user(pmd) pte_user(pmd_pte(pmd)) +#define pmd_user_exec(pmd) pte_user_exec(pmd_pte(pmd)) #define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) #define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) #define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd))) @@ -470,8 +481,19 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd) #define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT) #define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)) -#define set_pmd_at(mm, addr, pmdp, pmd) set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd)) -#define set_pud_at(mm, addr, pudp, pud) set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud)) +static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd) +{ + page_table_check_pmd_set(mm, addr, pmdp, pmd); + return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd)); +} + +static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, + pud_t *pudp, pud_t pud) +{ + page_table_check_pud_set(mm, addr, pudp, pud); + return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud)); +} #define __p4d_to_phys(p4d) __pte_to_phys(p4d_pte(p4d)) #define __phys_to_p4d_val(phys) __phys_to_pte_val(phys) @@ -609,6 +631,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) #define pud_present(pud) pte_present(pud_pte(pud)) #define pud_leaf(pud) (pud_present(pud) && !pud_table(pud)) #define pud_valid(pud) pte_valid(pud_pte(pud)) +#define pud_user(pud) pte_user(pud_pte(pud)) static inline void set_pud(pud_t *pudp, pud_t pud) { @@ -781,6 +804,23 @@ static inline int pgd_devmap(pgd_t pgd) } #endif +#ifdef CONFIG_PAGE_TABLE_CHECK +static inline bool pte_user_accessible_page(pte_t pte) +{ + return pte_present(pte) && (pte_user(pte) || pte_user_exec(pte)); +} + +static inline bool pmd_user_accessible_page(pmd_t pmd) +{ + return pmd_present(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); +} + +static inline bool pud_user_accessible_page(pud_t pud) +{ + return pud_present(pud) && pud_user(pud); +} +#endif + /* * Atomic pte/pmd modifications. */ @@ -842,7 +882,11 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long address, pte_t *ptep) { - return __pte(xchg_relaxed(&pte_val(*ptep), 0)); + pte_t pte = __pte(xchg_relaxed(&pte_val(*ptep), 0)); + + page_table_check_pte_clear(mm, address, pte); + + return pte; } #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -850,7 +894,11 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long address, pmd_t *pmdp) { - return pte_pmd(ptep_get_and_clear(mm, address, (pte_t *)pmdp)); + pmd_t pmd = __pmd(xchg_relaxed(&pmd_val(*pmdp), 0)); + + page_table_check_pmd_clear(mm, address, pmd); + + return pmd; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ @@ -884,6 +932,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, static inline pmd_t pmdp_establish(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t pmd) { + page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd); return __pmd(xchg_relaxed(&pmd_val(*pmdp), pmd_val(pmd))); } #endif diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b5ca6f54b32329..ba48644da7cddf 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -94,6 +94,7 @@ config X86 select ARCH_STACKWALK select ARCH_SUPPORTS_ACPI select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_PAGE_TABLE_CHECK if X86_64 select ARCH_SUPPORTS_NUMA_BALANCING if X86_64 select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_QUEUED_RWLOCKS diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 788b3a0120dcd9..028d82290dbc01 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -26,6 +26,7 @@ #include <asm/pkru.h> #include <asm/fpu/api.h> #include <asm-generic/pgtable_uffd.h> +#include <linux/page_table_check.h> extern pgd_t early_top_pgt[PTRS_PER_PGD]; bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd); @@ -1006,18 +1007,21 @@ static inline pud_t native_local_pudp_get_and_clear(pud_t *pudp) static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { + page_table_check_pte_set(mm, addr, ptep, pte); set_pte(ptep, pte); } static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { + page_table_check_pmd_set(mm, addr, pmdp, pmd); set_pmd(pmdp, pmd); } static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, pud_t *pudp, pud_t pud) { + page_table_check_pud_set(mm, addr, pudp, pud); native_set_pud(pudp, pud); } @@ -1048,6 +1052,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pte_t pte = native_ptep_get_and_clear(ptep); + page_table_check_pte_clear(mm, addr, pte); return pte; } @@ -1063,6 +1068,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, * care about updates and native needs no locking */ pte = native_local_ptep_get_and_clear(ptep); + page_table_check_pte_clear(mm, addr, pte); } else { pte = ptep_get_and_clear(mm, addr, ptep); } @@ -1109,14 +1115,22 @@ static inline int pmd_write(pmd_t pmd) static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - return native_pmdp_get_and_clear(pmdp); + pmd_t pmd = native_pmdp_get_and_clear(pmdp); + + page_table_check_pmd_clear(mm, addr, pmd); + + return pmd; } #define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, pud_t *pudp) { - return native_pudp_get_and_clear(pudp); + pud_t pud = native_pudp_get_and_clear(pudp); + + page_table_check_pud_clear(mm, addr, pud); + + return pud; } #define __HAVE_ARCH_PMDP_SET_WRPROTECT @@ -1137,6 +1151,7 @@ static inline int pud_write(pud_t pud) static inline pmd_t pmdp_establish(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t pmd) { + page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd); if (IS_ENABLED(CONFIG_SMP)) { return xchg(pmdp, pmd); } else { @@ -1403,6 +1418,23 @@ static inline bool arch_faults_on_old_pte(void) return false; } +#ifdef CONFIG_PAGE_TABLE_CHECK +static inline bool pte_user_accessible_page(pte_t pte) +{ + return (pte_val(pte) & _PAGE_PRESENT) && (pte_val(pte) & _PAGE_USER); +} + +static inline bool pmd_user_accessible_page(pmd_t pmd) +{ + return pmd_leaf(pmd) && (pmd_val(pmd) & _PAGE_PRESENT) && (pmd_val(pmd) & _PAGE_USER); +} + +static inline bool pud_user_accessible_page(pud_t pud) +{ + return pud_leaf(pud) && (pud_val(pud) & _PAGE_PRESENT) && (pud_val(pud) & _PAGE_USER); +} +#endif + #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_PGTABLE_H */ diff --git a/include/linux/page_table_check.h b/include/linux/page_table_check.h new file mode 100644 index 00000000000000..e387791a04a6fc --- /dev/null +++ b/include/linux/page_table_check.h @@ -0,0 +1,162 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __LINUX_PAGE_TABLE_CHECK_H +#define __LINUX_PAGE_TABLE_CHECK_H + +#ifdef CONFIG_PAGE_TABLE_CHECK +#include <linux/jump_label.h> + +extern struct static_key_true page_table_check_disabled; +extern struct page_ext_operations page_table_check_ops; + +void __page_table_check_zero(struct page *page, unsigned int order); +void __page_table_check_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t pte); +void __page_table_check_pmd_clear(struct mm_struct *mm, unsigned long addr, + pmd_t pmd); +void __page_table_check_pud_clear(struct mm_struct *mm, unsigned long addr, + pud_t pud); +void __page_table_check_pte_set(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte); +void __page_table_check_pmd_set(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd); +void __page_table_check_pud_set(struct mm_struct *mm, unsigned long addr, + pud_t *pudp, pud_t pud); +void __page_table_check_pte_clear_range(struct mm_struct *mm, + unsigned long addr, + pmd_t pmd); + +static inline void page_table_check_alloc(struct page *page, unsigned int order) +{ + if (static_branch_likely(&page_table_check_disabled)) + return; + + __page_table_check_zero(page, order); +} + +static inline void page_table_check_free(struct page *page, unsigned int order) +{ + if (static_branch_likely(&page_table_check_disabled)) + return; + + __page_table_check_zero(page, order); +} + +static inline void page_table_check_pte_clear(struct mm_struct *mm, + unsigned long addr, pte_t pte) +{ + if (static_branch_likely(&page_table_check_disabled)) + return; + + __page_table_check_pte_clear(mm, addr, pte); +} + +static inline void page_table_check_pmd_clear(struct mm_struct *mm, + unsigned long addr, pmd_t pmd) +{ + if (static_branch_likely(&page_table_check_disabled)) + return; + + __page_table_check_pmd_clear(mm, addr, pmd); +} + +static inline void page_table_check_pud_clear(struct mm_struct *mm, + unsigned long addr, pud_t pud) +{ + if (static_branch_likely(&page_table_check_disabled)) + return; + + __page_table_check_pud_clear(mm, addr, pud); +} + +static inline void page_table_check_pte_set(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, + pte_t pte) +{ + if (static_branch_likely(&page_table_check_disabled)) + return; + + __page_table_check_pte_set(mm, addr, ptep, pte); +} + +static inline void page_table_check_pmd_set(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp, + pmd_t pmd) +{ + if (static_branch_likely(&page_table_check_disabled)) + return; + + __page_table_check_pmd_set(mm, addr, pmdp, pmd); +} + +static inline void page_table_check_pud_set(struct mm_struct *mm, + unsigned long addr, pud_t *pudp, + pud_t pud) +{ + if (static_branch_likely(&page_table_check_disabled)) + return; + + __page_table_check_pud_set(mm, addr, pudp, pud); +} + +static inline void page_table_check_pte_clear_range(struct mm_struct *mm, + unsigned long addr, + pmd_t pmd) +{ + if (static_branch_likely(&page_table_check_disabled)) + return; + + __page_table_check_pte_clear_range(mm, addr, pmd); +} + +#else + +static inline void page_table_check_alloc(struct page *page, unsigned int order) +{ +} + +static inline void page_table_check_free(struct page *page, unsigned int order) +{ +} + +static inline void page_table_check_pte_clear(struct mm_struct *mm, + unsigned long addr, pte_t pte) +{ +} + +static inline void page_table_check_pmd_clear(struct mm_struct *mm, + unsigned long addr, pmd_t pmd) +{ +} + +static inline void page_table_check_pud_clear(struct mm_struct *mm, + unsigned long addr, pud_t pud) +{ +} + +static inline void page_table_check_pte_set(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, + pte_t pte) +{ +} + +static inline void page_table_check_pmd_set(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp, + pmd_t pmd) +{ +} + +static inline void page_table_check_pud_set(struct mm_struct *mm, + unsigned long addr, pud_t *pudp, + pud_t pud) +{ +} + +static inline void page_table_check_pte_clear_range(struct mm_struct *mm, + unsigned long addr, + pmd_t pmd) +{ +} + +#endif /* CONFIG_PAGE_TABLE_CHECK */ +#endif /* __LINUX_PAGE_TABLE_CHECK_H */ diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index f924468d84ec41..84b92ff884c150 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -12,6 +12,7 @@ #include <linux/bug.h> #include <linux/errno.h> #include <asm-generic/pgtable_uffd.h> +#include <linux/page_table_check.h> #if 5 - defined(__PAGETABLE_P4D_FOLDED) - defined(__PAGETABLE_PUD_FOLDED) - \ defined(__PAGETABLE_PMD_FOLDED) != CONFIG_PGTABLE_LEVELS @@ -248,10 +249,17 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, { pte_t pte = *ptep; pte_clear(mm, address, ptep); + page_table_check_pte_clear(mm, address, pte); return pte; } #endif +static inline void ptep_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + ptep_get_and_clear(mm, addr, ptep); +} + #ifndef __HAVE_ARCH_PTEP_GET static inline pte_t ptep_get(pte_t *ptep) { @@ -266,7 +274,10 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, pmd_t *pmdp) { pmd_t pmd = *pmdp; + pmd_clear(pmdp); + page_table_check_pmd_clear(mm, address, pmd); + return pmd; } #endif /* __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR */ @@ -278,6 +289,8 @@ static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm, pud_t pud = *pudp; pud_clear(pudp); + page_table_check_pud_clear(mm, address, pud); + return pud; } #endif /* __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR */ diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug index 864f129f193704..b04d3a4ddfcef5 100644 --- a/mm/Kconfig.debug +++ b/mm/Kconfig.debug @@ -62,6 +62,30 @@ config PAGE_OWNER If unsure, say N. +config PAGE_TABLE_CHECK + bool "Check for invalid mappings in user page tables" + depends on ARCH_SUPPORTS_PAGE_TABLE_CHECK + select PAGE_EXTENSION + help + Check that anonymous page is not being mapped twice with read write + permissions. Check that anonymous and file pages are not being + erroneously shared. Since the checking is performed at the time + entries are added and removed to user page tables, leaking, corruption + and double mapping problems are detected synchronously. + + If unsure say "n". + +config PAGE_TABLE_CHECK_ENFORCED + bool "Enforce the page table checking by default" + depends on PAGE_TABLE_CHECK + help + Always enable page table checking. By default the page table checking + is disabled, and can be optionally enabled via page_table_check=on + kernel parameter. This config enforces that page table check is always + enabled. + + If unsure say "n". + config PAGE_POISONING bool "Poison pages after freeing" select PAGE_POISONING_NO_SANITY if HIBERNATION diff --git a/mm/Makefile b/mm/Makefile index aad7866abe8cc3..366d9f62bfea42 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -111,6 +111,7 @@ obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o obj-$(CONFIG_CMA) += cma.o obj-$(CONFIG_MEMORY_BALLOON) += balloon_compaction.o obj-$(CONFIG_PAGE_EXTENSION) += page_ext.o +obj-$(CONFIG_PAGE_TABLE_CHECK) += page_table_check.o obj-$(CONFIG_CMA_DEBUGFS) += cma_debug.o obj-$(CONFIG_USERFAULTFD) += userfaultfd.o obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index 11d3b46ba18704..63a438791e7b3d 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -614,7 +614,7 @@ static void __init pte_clear_tests(struct mm_struct *mm, pte_t *ptep, #endif set_pte_at(mm, vaddr, ptep, pte); barrier(); - pte_clear(mm, vaddr, ptep); + ptep_clear(mm, vaddr, ptep); pte = ptep_get(ptep); WARN_ON(!pte_none(pte)); } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 1985d8a7a57d29..ad807d65d8154e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4693,10 +4693,10 @@ retry_avoidcopy: /* Break COW */ huge_ptep_clear_flush(vma, haddr, ptep); mmu_notifier_invalidate_range(mm, range.start, range.end); - set_huge_pte_at(mm, haddr, ptep, - make_huge_pte(vma, new_page, 1)); page_remove_rmap(old_page, true); hugepage_add_new_anon_rmap(new_page, vma, haddr); + set_huge_pte_at(mm, haddr, ptep, + make_huge_pte(vma, new_page, 1)); SetHPageMigratable(new_page); /* Make the old page be freed below */ new_page = old_page; diff --git a/mm/khugepaged.c b/mm/khugepaged.c index aaef16aa8945cb..faeae65e4b1a30 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -16,6 +16,7 @@ #include <linux/hashtable.h> #include <linux/userfaultfd_k.h> #include <linux/page_idle.h> +#include <linux/page_table_check.h> #include <linux/swapops.h> #include <linux/shmem_fs.h> @@ -754,11 +755,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page, * ptl mostly unnecessary. */ spin_lock(ptl); - /* - * paravirt calls inside pte_clear here are - * superfluous. - */ - pte_clear(vma->vm_mm, address, _pte); + ptep_clear(vma->vm_mm, address, _pte); spin_unlock(ptl); } } else { @@ -772,11 +769,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page, * inside page_remove_rmap(). */ spin_lock(ptl); - /* - * paravirt calls inside pte_clear here are - * superfluous. - */ - pte_clear(vma->vm_mm, address, _pte); + ptep_clear(vma->vm_mm, address, _pte); reliable_page_counter(src_page, vma->vm_mm, -1); page_remove_rmap(src_page, false); spin_unlock(ptl); @@ -1435,6 +1428,21 @@ static int khugepaged_add_pte_mapped_thp(struct mm_struct *mm, return 0; } +static void collapse_and_free_pmd(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp) +{ + spinlock_t *ptl; + pmd_t pmd; + + mmap_assert_write_locked(mm); + ptl = pmd_lock(vma->vm_mm, pmdp); + pmd = pmdp_collapse_flush(vma, addr, pmdp); + spin_unlock(ptl); + mm_dec_nr_ptes(mm); + page_table_check_pte_clear_range(mm, addr, pmd); + pte_free(mm, pmd_pgtable(pmd)); +} + /** * Try to collapse a pte-mapped THP for mm at address haddr. * @@ -1448,7 +1456,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) struct vm_area_struct *vma = find_vma(mm, haddr); struct page *hpage; pte_t *start_pte, *pte; - pmd_t *pmd, _pmd; + pmd_t *pmd; spinlock_t *ptl; int count = 0; int i; @@ -1525,11 +1533,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) } /* step 4: collapse pmd */ - ptl = pmd_lock(vma->vm_mm, pmd); - _pmd = pmdp_collapse_flush(vma, haddr, pmd); - spin_unlock(ptl); - mm_dec_nr_ptes(mm); - pte_free(mm, pmd_pgtable(_pmd)); + collapse_and_free_pmd(mm, vma, haddr, pmd); drop_hpage: unlock_page(hpage); @@ -1569,7 +1573,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) struct vm_area_struct *vma; struct mm_struct *mm; unsigned long addr; - pmd_t *pmd, _pmd; + pmd_t *pmd; i_mmap_lock_write(mapping); vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { @@ -1608,14 +1612,8 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) * reverse order. Trylock is a way to avoid deadlock. */ if (mmap_write_trylock(mm)) { - if (!khugepaged_test_exit(mm)) { - spinlock_t *ptl = pmd_lock(mm, pmd); - /* assume page table is clear */ - _pmd = pmdp_collapse_flush(vma, addr, pmd); - spin_unlock(ptl); - mm_dec_nr_ptes(mm); - pte_free(mm, pmd_pgtable(_pmd)); - } + if (!khugepaged_test_exit(mm)) + collapse_and_free_pmd(mm, vma, addr, pmd); mmap_write_unlock(mm); } else { /* Try again later */ diff --git a/mm/memory.c b/mm/memory.c index f1d28ad88944ed..133deca6742924 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3537,8 +3537,6 @@ skip_uswap: pte = pte_mkuffd_wp(pte); pte = pte_wrprotect(pte); } - set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte); - arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte); vmf->orig_pte = pte; /* ksm created a completely new copy */ @@ -3549,6 +3547,9 @@ skip_uswap: do_page_add_anon_rmap(page, vma, vmf->address, exclusive); } + set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte); + arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte); + swap_free(entry); if (mem_cgroup_swap_full(page) || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) diff --git a/mm/migrate.c b/mm/migrate.c index 1f78410a10635a..ba6cb49e1ab467 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -259,21 +259,20 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma, if (PageHuge(new)) { pte = pte_mkhuge(pte); pte = arch_make_huge_pte(pte, vma, new, 0); - set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte); if (PageAnon(new)) hugepage_add_anon_rmap(new, vma, pvmw.address); else page_dup_rmap(new, true); + set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte); } else #endif { - set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte); - reliable_page_counter(new, vma->vm_mm, 1); if (PageAnon(new)) page_add_anon_rmap(new, vma, pvmw.address, false); else page_add_file_rmap(new, false); + set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte); } if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new)) mlock_vma_page(new); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 24116b7828f525..60b2351ede77d5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -62,6 +62,7 @@ #include <linux/sched/rt.h> #include <linux/sched/mm.h> #include <linux/page_owner.h> +#include <linux/page_table_check.h> #include <linux/kthread.h> #include <linux/memcontrol.h> #include <linux/ftrace.h> @@ -1221,6 +1222,7 @@ __always_inline bool free_pages_prepare(struct page *page, if (memcg_kmem_enabled() && PageMemcgKmem(page)) __memcg_kmem_uncharge_page(page, order); reset_page_owner(page, order); + page_table_check_free(page, order); return false; } @@ -1258,6 +1260,7 @@ __always_inline bool free_pages_prepare(struct page *page, page_cpupid_reset_last(page); page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; reset_page_owner(page, order); + page_table_check_free(page, order); if (!PageHighMem(page)) { debug_check_no_locks_freed(page_address(page), @@ -2282,6 +2285,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order, kasan_alloc_pages(page, order); kernel_poison_pages(page, 1 << order, 1); set_page_owner(page, order, gfp_flags); + page_table_check_alloc(page, order); } void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags, diff --git a/mm/page_ext.c b/mm/page_ext.c index 8e59da0f4367ac..d541923a24063e 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -8,6 +8,7 @@ #include <linux/kmemleak.h> #include <linux/page_owner.h> #include <linux/page_idle.h> +#include <linux/page_table_check.h> /* * struct page extension @@ -75,6 +76,9 @@ static struct page_ext_operations *page_ext_ops[] = { #if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT) &page_idle_ops, #endif +#ifdef CONFIG_PAGE_TABLE_CHECK + &page_table_check_ops, +#endif }; unsigned long page_ext_size = sizeof(struct page_ext); diff --git a/mm/page_table_check.c b/mm/page_table_check.c new file mode 100644 index 00000000000000..d2d3b8c2e3e936 --- /dev/null +++ b/mm/page_table_check.c @@ -0,0 +1,248 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/mm.h> +#include <linux/page_table_check.h> + +#undef pr_fmt +#define pr_fmt(fmt) "page_table_check: " fmt + +struct page_table_check { + atomic_t anon_map_count; + atomic_t file_map_count; +}; + +static bool __page_table_check_enabled __initdata = + IS_ENABLED(CONFIG_PAGE_TABLE_CHECK_ENFORCED); + +DEFINE_STATIC_KEY_TRUE(page_table_check_disabled); +EXPORT_SYMBOL(page_table_check_disabled); + +static int __init early_page_table_check_param(char *buf) +{ + if (!buf) + return -EINVAL; + + if (strcmp(buf, "on") == 0) + __page_table_check_enabled = true; + else if (strcmp(buf, "off") == 0) + __page_table_check_enabled = false; + + return 0; +} + +early_param("page_table_check", early_page_table_check_param); + +static bool __init need_page_table_check(void) +{ + return __page_table_check_enabled; +} + +static void __init init_page_table_check(void) +{ + if (!__page_table_check_enabled) + return; + static_branch_disable(&page_table_check_disabled); +} + +struct page_ext_operations page_table_check_ops = { + .size = sizeof(struct page_table_check), + .need = need_page_table_check, + .init = init_page_table_check, +}; + +static struct page_table_check *get_page_table_check(struct page_ext *page_ext) +{ + BUG_ON(!page_ext); + return (void *)(page_ext) + page_table_check_ops.offset; +} + +/* + * An enty is removed from the page table, decrement the counters for that page + * verify that it is of correct type and counters do not become negative. + */ +static void page_table_check_clear(struct mm_struct *mm, unsigned long addr, + unsigned long pfn, unsigned long pgcnt) +{ + struct page_ext *page_ext; + struct page *page; + unsigned long i; + bool anon; + + if (!pfn_valid(pfn)) + return; + + page = pfn_to_page(pfn); + page_ext = lookup_page_ext(page); + anon = PageAnon(page); + + for (i = 0; i < pgcnt; i++) { + struct page_table_check *ptc = get_page_table_check(page_ext); + + if (anon) { + BUG_ON(atomic_read(&ptc->file_map_count)); + BUG_ON(atomic_dec_return(&ptc->anon_map_count) < 0); + } else { + BUG_ON(atomic_read(&ptc->anon_map_count)); + BUG_ON(atomic_dec_return(&ptc->file_map_count) < 0); + } + page_ext = page_ext_next(page_ext); + } +} + +/* + * A new enty is added to the page table, increment the counters for that page + * verify that it is of correct type and is not being mapped with a different + * type to a different process. + */ +static void page_table_check_set(struct mm_struct *mm, unsigned long addr, + unsigned long pfn, unsigned long pgcnt, + bool rw) +{ + struct page_ext *page_ext; + struct page *page; + unsigned long i; + bool anon; + + if (!pfn_valid(pfn)) + return; + + page = pfn_to_page(pfn); + page_ext = lookup_page_ext(page); + anon = PageAnon(page); + + for (i = 0; i < pgcnt; i++) { + struct page_table_check *ptc = get_page_table_check(page_ext); + + if (anon) { + BUG_ON(atomic_read(&ptc->file_map_count)); + BUG_ON(atomic_inc_return(&ptc->anon_map_count) > 1 && rw); + } else { + BUG_ON(atomic_read(&ptc->anon_map_count)); + BUG_ON(atomic_inc_return(&ptc->file_map_count) < 0); + } + page_ext = page_ext_next(page_ext); + } +} + +/* + * page is on free list, or is being allocated, verify that counters are zeroes + * crash if they are not. + */ +void __page_table_check_zero(struct page *page, unsigned int order) +{ + struct page_ext *page_ext = lookup_page_ext(page); + unsigned long i; + + BUG_ON(!page_ext); + for (i = 0; i < (1ul << order); i++) { + struct page_table_check *ptc = get_page_table_check(page_ext); + + BUG_ON(atomic_read(&ptc->anon_map_count)); + BUG_ON(atomic_read(&ptc->file_map_count)); + page_ext = page_ext_next(page_ext); + } +} + +void __page_table_check_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t pte) +{ + if (&init_mm == mm) + return; + + if (pte_user_accessible_page(pte)) { + page_table_check_clear(mm, addr, pte_pfn(pte), + PAGE_SIZE >> PAGE_SHIFT); + } +} +EXPORT_SYMBOL(__page_table_check_pte_clear); + +void __page_table_check_pmd_clear(struct mm_struct *mm, unsigned long addr, + pmd_t pmd) +{ + if (&init_mm == mm) + return; + + if (pmd_user_accessible_page(pmd)) { + page_table_check_clear(mm, addr, pmd_pfn(pmd), + PMD_SIZE >> PAGE_SHIFT); + } +} +EXPORT_SYMBOL(__page_table_check_pmd_clear); + +void __page_table_check_pud_clear(struct mm_struct *mm, unsigned long addr, + pud_t pud) +{ + if (&init_mm == mm) + return; + + if (pud_user_accessible_page(pud)) { + page_table_check_clear(mm, addr, pud_pfn(pud), + PUD_SIZE >> PAGE_SHIFT); + } +} +EXPORT_SYMBOL(__page_table_check_pud_clear); + +void __page_table_check_pte_set(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + if (&init_mm == mm) + return; + + __page_table_check_pte_clear(mm, addr, *ptep); + if (pte_user_accessible_page(pte)) { + page_table_check_set(mm, addr, pte_pfn(pte), + PAGE_SIZE >> PAGE_SHIFT, + pte_write(pte)); + } +} +EXPORT_SYMBOL(__page_table_check_pte_set); + +void __page_table_check_pmd_set(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd) +{ + if (&init_mm == mm) + return; + + __page_table_check_pmd_clear(mm, addr, *pmdp); + if (pmd_user_accessible_page(pmd)) { + page_table_check_set(mm, addr, pmd_pfn(pmd), + PMD_SIZE >> PAGE_SHIFT, + pmd_write(pmd)); + } +} +EXPORT_SYMBOL(__page_table_check_pmd_set); + +void __page_table_check_pud_set(struct mm_struct *mm, unsigned long addr, + pud_t *pudp, pud_t pud) +{ + if (&init_mm == mm) + return; + + __page_table_check_pud_clear(mm, addr, *pudp); + if (pud_user_accessible_page(pud)) { + page_table_check_set(mm, addr, pud_pfn(pud), + PUD_SIZE >> PAGE_SHIFT, + pud_write(pud)); + } +} +EXPORT_SYMBOL(__page_table_check_pud_set); + +void __page_table_check_pte_clear_range(struct mm_struct *mm, + unsigned long addr, + pmd_t pmd) +{ + if (&init_mm == mm) + return; + + if (!pmd_bad(pmd) && !pmd_leaf(pmd)) { + pte_t *ptep = pte_offset_map(&pmd, addr); + unsigned long i; + + pte_unmap(ptep); + for (i = 0; i < PTRS_PER_PTE; i++) { + __page_table_check_pte_clear(mm, addr, *ptep); + addr += PAGE_SIZE; + ptep++; + } + } +} diff --git a/mm/swapfile.c b/mm/swapfile.c index 7faa30f460e40c..fdeb2c4e6ae672 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1933,8 +1933,6 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, dec_mm_counter(vma->vm_mm, MM_SWAPENTS); inc_mm_counter(vma->vm_mm, MM_ANONPAGES); get_page(page); - set_pte_at(vma->vm_mm, addr, pte, - pte_mkold(mk_pte(page, vma->vm_page_prot))); reliable_page_counter(page, vma->vm_mm, 1); if (page == swapcache) { @@ -1943,6 +1941,8 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, page_add_new_anon_rmap(page, vma, addr, false); lru_cache_add_inactive_or_unevictable(page, vma); } + set_pte_at(vma->vm_mm, addr, pte, + pte_mkold(mk_pte(page, vma->vm_page_prot))); swap_free(entry); out: pte_unmap_unlock(pte, ptl); |