From: Zachary Amsden Virtualization aware Linux kernels may need to redefine functions which write to hardware page tables at the sub-architecture layer. Previously, this was done by encapsulation in a split mach-xxx/pgtable-{2|3}level-ops.h file, but having 8 pgtable header files is simply unacceptable. This goes some ways towards cleaning that up by deprecating the 2/3 level subarch functions. This is accomplished by using __HAVE_ARCH_FUNC macros, and allowing one sub-arch file, pgtable-ops.h, which gets included before any functions which write to hardware page tables, allowing the sub-architecture to override any or all definitions it needs. Signed-off-by: Zachary Amsden Signed-off-by: Andrew Morton --- include/asm-i386/mach-default/pgtable-2level-ops.h | 15 ----- include/asm-i386/mach-default/pgtable-3level-ops.h | 37 -------------- include/asm-i386/mach-default/pgtable-ops.h | 34 ------------- include/asm-i386/pgtable-2level.h | 21 ++++++++ include/asm-i386/pgtable-3level.h | 54 +++++++++++++++++++++ include/asm-i386/pgtable.h | 49 +++++++++++++++++-- 6 files changed, 119 insertions(+), 91 deletions(-) diff -puN include/asm-i386/mach-default/pgtable-2level-ops.h~i386-virtualization-attempt-to-clean-up-pgtable-code-motion include/asm-i386/mach-default/pgtable-2level-ops.h --- devel/include/asm-i386/mach-default/pgtable-2level-ops.h~i386-virtualization-attempt-to-clean-up-pgtable-code-motion 2005-08-17 18:18:30.000000000 -0700 +++ devel-akpm/include/asm-i386/mach-default/pgtable-2level-ops.h 2005-08-17 18:18:30.000000000 -0700 @@ -1,15 +0,0 @@ -#ifndef _MACH_PGTABLE_LEVEL_OPS_H -#define _MACH_PGTABLE_LEVEL_OPS_H - -/* - * Certain architectures need to do special things when PTEs - * within a page table are directly modified. Thus, the following - * hook is made available. - */ -#define set_pte(pteptr, pteval) (*(pteptr) = pteval) -#define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval) -#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval)) - -#define ptep_get_and_clear(mm,addr,xp) __pte(xchg(&(xp)->pte_low, 0)) - -#endif /* _PGTABLE_OPS_H */ diff -puN include/asm-i386/mach-default/pgtable-3level-ops.h~i386-virtualization-attempt-to-clean-up-pgtable-code-motion include/asm-i386/mach-default/pgtable-3level-ops.h --- devel/include/asm-i386/mach-default/pgtable-3level-ops.h~i386-virtualization-attempt-to-clean-up-pgtable-code-motion 2005-08-17 18:18:30.000000000 -0700 +++ devel-akpm/include/asm-i386/mach-default/pgtable-3level-ops.h 2005-08-17 18:18:30.000000000 -0700 @@ -1,37 +0,0 @@ -#ifndef _MACH_PGTABLE_LEVEL_OPS_H -#define _MACH_PGTABLE_LEVEL_OPS_H - -/* Rules for using set_pte: the pte being assigned *must* be - * either not present or in a state where the hardware will - * not attempt to update the pte. In places where this is - * not possible, use pte_get_and_clear to obtain the old pte - * value and then use set_pte to update it. -ben - */ -static inline void set_pte(pte_t *ptep, pte_t pte) -{ - ptep->pte_high = pte.pte_high; - smp_wmb(); - ptep->pte_low = pte.pte_low; -} - -#define __HAVE_ARCH_SET_PTE_ATOMIC -#define set_pte_atomic(pteptr,pteval) \ - set_64bit((unsigned long long *)(pteptr),pte_val(pteval)) -#define set_pmd(pmdptr,pmdval) \ - set_64bit((unsigned long long *)(pmdptr),pmd_val(pmdval)) -#define set_pud(pudptr,pudval) \ - (*(pudptr) = (pudval)) - -static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) -{ - pte_t res; - - /* xchg acts as a barrier before the setting of the high bits */ - res.pte_low = xchg(&ptep->pte_low, 0); - res.pte_high = ptep->pte_high; - ptep->pte_high = 0; - - return res; -} - -#endif diff -puN include/asm-i386/mach-default/pgtable-ops.h~i386-virtualization-attempt-to-clean-up-pgtable-code-motion include/asm-i386/mach-default/pgtable-ops.h --- devel/include/asm-i386/mach-default/pgtable-ops.h~i386-virtualization-attempt-to-clean-up-pgtable-code-motion 2005-08-17 18:18:30.000000000 -0700 +++ devel-akpm/include/asm-i386/mach-default/pgtable-ops.h 2005-08-17 18:18:30.000000000 -0700 @@ -25,31 +25,6 @@ #ifndef _PGTABLE_OPS_H #define _PGTABLE_OPS_H -#ifdef CONFIG_X86_PAE -# include -#else -# include -#endif - -static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) -{ - if (!pte_dirty(*ptep)) - return 0; - return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte_low); -} - -static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) -{ - if (!pte_young(*ptep)) - return 0; - return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte_low); -} - -static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) -{ - clear_bit(_PAGE_BIT_RW, &ptep->pte_low); -} - /* * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); * @@ -65,13 +40,4 @@ static inline void clone_pgd_range(pgd_t memcpy(dst, src, count * sizeof(pgd_t)); } -#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS -#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \ - do { \ - if (__dirty) { \ - (__ptep)->pte_low = (__entry).pte_low; \ - flush_tlb_page(__vma, __address); \ - } \ - } while (0) - #endif /* _PGTABLE_OPS_H */ diff -puN include/asm-i386/pgtable-2level.h~i386-virtualization-attempt-to-clean-up-pgtable-code-motion include/asm-i386/pgtable-2level.h --- devel/include/asm-i386/pgtable-2level.h~i386-virtualization-attempt-to-clean-up-pgtable-code-motion 2005-08-17 18:18:30.000000000 -0700 +++ devel-akpm/include/asm-i386/pgtable-2level.h 2005-08-17 18:18:30.000000000 -0700 @@ -55,4 +55,25 @@ static inline int pte_exec_kernel(pte_t #define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) +/* + * Certain architectures need to do special things when PTEs + * within a page table are directly modified. Thus, the following + * hook is made available. + */ +#ifndef __HAVE_ARCH_SET_PTE +#define __HAVE_ARCH_SET_PTE +#define set_pte(pteptr, pteval) (*(pteptr) = pteval) +#endif +#define set_pte_atomic(pteptr, pteval) set_pte(pteptr, pteval) + +#ifndef __HAVE_ARCH_SET_PMD +#define __HAVE_ARCH_SET_PMD +#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval)) +#endif + +#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +#define ptep_get_and_clear(mm,addr,xp) __pte(xchg(&(xp)->pte_low, 0)) +#endif + #endif /* _I386_PGTABLE_2LEVEL_H */ diff -puN include/asm-i386/pgtable-3level.h~i386-virtualization-attempt-to-clean-up-pgtable-code-motion include/asm-i386/pgtable-3level.h --- devel/include/asm-i386/pgtable-3level.h~i386-virtualization-attempt-to-clean-up-pgtable-code-motion 2005-08-17 18:18:30.000000000 -0700 +++ devel-akpm/include/asm-i386/pgtable-3level.h 2005-08-17 18:18:30.000000000 -0700 @@ -123,4 +123,58 @@ static inline pmd_t pfn_pmd(unsigned lon #define __pmd_free_tlb(tlb, x) do { } while (0) +/* + * Sub-arch is allowed to override these, so check for definition first. + * New functions which write to hardware page table entries should go here. + */ + +/* Rules for using set_pte: the pte being assigned *must* be + * either not present or in a state where the hardware will + * not attempt to update the pte. In places where this is + * not possible, use pte_get_and_clear to obtain the old pte + * value and then use set_pte to update it. -ben + */ +#ifndef __HAVE_ARCH_SET_PTE +#define __HAVE_ARCH_SET_PTE +static inline void set_pte(pte_t *ptep, pte_t pte) +{ + ptep->pte_high = pte.pte_high; + smp_wmb(); + ptep->pte_low = pte.pte_low; +} +#endif + +#ifndef __HAVE_ARCH_SET_PTE_ATOMIC +#define __HAVE_ARCH_SET_PTE_ATOMIC +#define set_pte_atomic(pteptr,pteval) \ + set_64bit((unsigned long long *)(pteptr),pte_val(pteval)) +#endif + +#ifndef __HAVE_ARCH_SET_PMD +#define __HAVE_ARCH_SET_PMD +#define set_pmd(pmdptr,pmdval) \ + set_64bit((unsigned long long *)(pmdptr),pmd_val(pmdval)) +#endif + +#ifndef __HAVE_ARCH_SET_PUD +#define __HAVE_ARCH_SET_PUD +#define set_pud(pudptr,pudval) \ + (*(pudptr) = (pudval)) +#endif + +#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + pte_t res; + + /* xchg acts as a barrier before the setting of the high bits */ + res.pte_low = xchg(&ptep->pte_low, 0); + res.pte_high = ptep->pte_high; + ptep->pte_high = 0; + + return res; +} +#endif + #endif /* _I386_PGTABLE_3LEVEL_H */ diff -puN include/asm-i386/pgtable.h~i386-virtualization-attempt-to-clean-up-pgtable-code-motion include/asm-i386/pgtable.h --- devel/include/asm-i386/pgtable.h~i386-virtualization-attempt-to-clean-up-pgtable-code-motion 2005-08-17 18:18:30.000000000 -0700 +++ devel-akpm/include/asm-i386/pgtable.h 2005-08-17 18:18:30.000000000 -0700 @@ -236,12 +236,55 @@ static inline pte_t pte_mkyoung(pte_t pt static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; } static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PRESENT | _PAGE_PSE; return pte; } +#include #ifdef CONFIG_X86_PAE # include #else # include #endif -#include + +/* + * We give sub-architectures a chance to override functions which write to page + * tables, thus we check for existing definitions first. + */ +#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY +static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) +{ + if (!pte_dirty(*ptep)) + return 0; + return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte_low); +} +#endif + +#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) +{ + if (!pte_young(*ptep)) + return 0; + return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte_low); +} +#endif + +#ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT +#define __HAVE_ARCH_PTEP_SET_WRPROTECT +static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + clear_bit(_PAGE_BIT_RW, &ptep->pte_low); +} +#endif + +#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS +#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \ + do { \ + if (__dirty) { \ + (__ptep)->pte_low = (__entry).pte_low; \ + flush_tlb_page(__vma, __address); \ + } \ + } while (0) +#endif #define set_pte_at(mm,addr,pteptr,pteval) set_pte(pteptr,pteval) @@ -401,11 +444,7 @@ extern void noexec_setup(const char *str #define GET_IOSPACE(pfn) 0 #define GET_PFN(pfn) (pfn) -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL -#define __HAVE_ARCH_PTEP_SET_WRPROTECT #define __HAVE_ARCH_PTE_SAME #include _