--- 25-akpm/arch/i386/mm/init.c | 30 ++++--- 25-akpm/arch/i386/mm/pageattr.c | 25 +++--- 25-akpm/arch/i386/mm/pgtable.c | 122 ++++++++++++++++++++---------- 25-akpm/include/asm-i386/pgtable-3level.h | 2 25-akpm/include/asm-i386/pgtable.h | 30 +++---- 5 files changed, 134 insertions(+), 75 deletions(-) diff -puN arch/i386/mm/init.c~reslabify-pgds-and-pmds-2 arch/i386/mm/init.c --- 25/arch/i386/mm/init.c~reslabify-pgds-and-pmds-2 2004-04-05 18:47:01.040810568 -0700 +++ 25-akpm/arch/i386/mm/init.c 2004-04-05 18:47:01.050809048 -0700 @@ -523,20 +523,30 @@ void __init mem_init(void) #endif } -#ifdef CONFIG_X86_PAE -struct kmem_cache_s *pae_pgd_cachep; +kmem_cache_t *pgd_cache; +kmem_cache_t *pmd_cache; void __init pgtable_cache_init(void) { - /* - * PAE pgds must be 16-byte aligned: - */ - pae_pgd_cachep = kmem_cache_create("pae_pgd", 32, 0, - SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN, NULL, NULL); - if (!pae_pgd_cachep) - panic("init_pae(): Cannot alloc pae_pgd SLAB cache"); + if (PTRS_PER_PMD > 1) { + pmd_cache = kmem_cache_create("pmd", + PTRS_PER_PMD*sizeof(pmd_t), + PTRS_PER_PMD*sizeof(pmd_t), + 0, + pmd_ctor, + NULL); + if (!pmd_cache) + panic("pgtable_cache_init(): cannot create pmd cache"); + } + pgd_cache = kmem_cache_create("pgd", + PTRS_PER_PGD*sizeof(pgd_t), + PTRS_PER_PGD*sizeof(pgd_t), + 0, + pgd_ctor, + PTRS_PER_PMD == 1 ? pgd_dtor : NULL); + if (!pgd_cache) + panic("pgtable_cache_init(): Cannot create pgd cache"); } -#endif /* * This function cannot be __init, since exceptions don't work in that diff -puN arch/i386/mm/pageattr.c~reslabify-pgds-and-pmds-2 arch/i386/mm/pageattr.c --- 25/arch/i386/mm/pageattr.c~reslabify-pgds-and-pmds-2 2004-04-05 18:47:01.042810264 -0700 +++ 25-akpm/arch/i386/mm/pageattr.c 2004-04-05 18:47:01.050809048 -0700 @@ -67,19 +67,22 @@ static void flush_kernel_map(void *dummy static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) { + struct page *page; + unsigned long flags; + set_pte_atomic(kpte, pte); /* change init_mm */ -#ifndef CONFIG_X86_PAE - { - struct list_head *l; - spin_lock(&mmlist_lock); - list_for_each(l, &init_mm.mmlist) { - struct mm_struct *mm = list_entry(l, struct mm_struct, mmlist); - pmd_t *pmd = pmd_offset(pgd_offset(mm, address), address); - set_pte_atomic((pte_t *)pmd, pte); - } - spin_unlock(&mmlist_lock); + if (PTRS_PER_PMD > 1) + return; + + spin_lock_irqsave(&pgd_lock, flags); + for (page = pgd_list; page; page = (struct page *)page->index) { + pgd_t *pgd; + pmd_t *pmd; + pgd = (pgd_t *)page_address(page) + pgd_index(address); + pmd = pmd_offset(pgd, address); + set_pte_atomic((pte_t *)pmd, pte); } -#endif + spin_unlock_irqrestore(&pgd_lock, flags); } /* diff -puN arch/i386/mm/pgtable.c~reslabify-pgds-and-pmds-2 arch/i386/mm/pgtable.c --- 25/arch/i386/mm/pgtable.c~reslabify-pgds-and-pmds-2 2004-04-05 18:47:01.043810112 -0700 +++ 25-akpm/arch/i386/mm/pgtable.c 2004-04-05 18:47:01.051808896 -0700 @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -151,61 +152,108 @@ struct page *pte_alloc_one(struct mm_str return pte; } -#ifdef CONFIG_X86_PAE +void pmd_ctor(void *pmd, kmem_cache_t *cache, unsigned long flags) +{ + memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); +} -pgd_t *pgd_alloc(struct mm_struct *mm) +/* + * List of all pgd's needed for non-PAE so it can invalidate entries + * in both cached and uncached pgd's; not needed for PAE since the + * kernel pmd is shared. If PAE were not to share the pmd a similar + * tactic would be needed. This is essentially codepath-based locking + * against pageattr.c; it is the unique case in which a valid change + * of kernel pagetables can't be lazily synchronized by vmalloc faults. + * vmalloc faults work because attached pagetables are never freed. + * If the locking proves to be non-performant, a ticketing scheme with + * checks at dup_mmap(), exec(), and other mmlist addition points + * could be used. The locking scheme was chosen on the basis of + * manfred's recommendations and having no core impact whatsoever. + * -- wli + */ +spinlock_t pgd_lock = SPIN_LOCK_UNLOCKED; +struct page *pgd_list; + +static inline void pgd_list_add(pgd_t *pgd) +{ + struct page *page = virt_to_page(pgd); + page->index = (unsigned long)pgd_list; + if (pgd_list) + pgd_list->private = (unsigned long)&page->index; + pgd_list = page; + page->private = (unsigned long)&pgd_list; +} + +static inline void pgd_list_del(pgd_t *pgd) +{ + struct page *next, **pprev, *page = virt_to_page(pgd); + next = (struct page *)page->index; + pprev = (struct page **)page->private; + *pprev = next; + if (next) + next->private = (unsigned long)pprev; +} + +void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused) { - int i; - pgd_t *pgd = kmem_cache_alloc(pae_pgd_cachep, GFP_KERNEL); + unsigned long flags; - if (pgd) { - for (i = 0; i < USER_PTRS_PER_PGD; i++) { - unsigned long pmd = __get_free_page(GFP_KERNEL); - if (!pmd) - goto out_oom; - clear_page(pmd); - set_pgd(pgd + i, __pgd(1 + __pa(pmd))); - } - memcpy(pgd + USER_PTRS_PER_PGD, + if (PTRS_PER_PMD == 1) + spin_lock_irqsave(&pgd_lock, flags); + + memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD, swapper_pg_dir + USER_PTRS_PER_PGD, (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); - } - return pgd; -out_oom: - for (i--; i >= 0; i--) - free_page((unsigned long)__va(pgd_val(pgd[i])-1)); - kmem_cache_free(pae_pgd_cachep, pgd); - return NULL; + + if (PTRS_PER_PMD > 1) + return; + + pgd_list_add(pgd); + spin_unlock_irqrestore(&pgd_lock, flags); + memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); } -void pgd_free(pgd_t *pgd) +/* never called when PTRS_PER_PMD > 1 */ +void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused) { - int i; + unsigned long flags; /* can be called from interrupt context */ - for (i = 0; i < USER_PTRS_PER_PGD; i++) - free_page((unsigned long)__va(pgd_val(pgd[i])-1)); - kmem_cache_free(pae_pgd_cachep, pgd); + spin_lock_irqsave(&pgd_lock, flags); + pgd_list_del(pgd); + spin_unlock_irqrestore(&pgd_lock, flags); } -#else - pgd_t *pgd_alloc(struct mm_struct *mm) { - pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL); + int i; + pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL); - if (pgd) { - memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); - memcpy(pgd + USER_PTRS_PER_PGD, - swapper_pg_dir + USER_PTRS_PER_PGD, - (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); + if (PTRS_PER_PMD == 1 || !pgd) + return pgd; + + for (i = 0; i < USER_PTRS_PER_PGD; ++i) { + pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); + if (!pmd) + goto out_oom; + set_pgd(&pgd[i], __pgd(1 + __pa((u64)((u32)pmd)))); } return pgd; + +out_oom: + for (i--; i >= 0; i--) + kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); + kmem_cache_free(pgd_cache, pgd); + return NULL; } void pgd_free(pgd_t *pgd) { - free_page((unsigned long)pgd); -} - -#endif /* CONFIG_X86_PAE */ + int i; + /* in the PAE case user pgd entries are overwritten before usage */ + if (PTRS_PER_PMD > 1) + for (i = 0; i < USER_PTRS_PER_PGD; ++i) + kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); + /* in the non-PAE case, clear_page_tables() clears user pgd entries */ + kmem_cache_free(pgd_cache, pgd); +} diff -puN include/asm-i386/pgtable-3level.h~reslabify-pgds-and-pmds-2 include/asm-i386/pgtable-3level.h --- 25/include/asm-i386/pgtable-3level.h~reslabify-pgds-and-pmds-2 2004-04-05 18:47:01.045809808 -0700 +++ 25-akpm/include/asm-i386/pgtable-3level.h 2004-04-05 18:47:01.052808744 -0700 @@ -123,6 +123,4 @@ static inline pmd_t pfn_pmd(unsigned lon #define pgoff_to_pte(off) ((pte_t) { _PAGE_FILE, (off) }) #define PTE_FILE_MAX_BITS 32 -extern struct kmem_cache_s *pae_pgd_cachep; - #endif /* _I386_PGTABLE_3LEVEL_H */ diff -puN include/asm-i386/pgtable.h~reslabify-pgds-and-pmds-2 include/asm-i386/pgtable.h --- 25/include/asm-i386/pgtable.h~reslabify-pgds-and-pmds-2 2004-04-05 18:47:01.046809656 -0700 +++ 25-akpm/include/asm-i386/pgtable.h 2004-04-05 18:47:01.052808744 -0700 @@ -21,15 +21,27 @@ #include #endif -extern pgd_t swapper_pg_dir[1024]; -extern void paging_init(void); +#include +#include +#include /* * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. */ -extern unsigned long empty_zero_page[1024]; #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) +extern unsigned long empty_zero_page[1024]; +extern pgd_t swapper_pg_dir[1024]; +extern kmem_cache_t *pgd_cache; +extern kmem_cache_t *pmd_cache; +extern spinlock_t pgd_lock; +extern struct page *pgd_list; + +void pmd_ctor(void *, kmem_cache_t *, unsigned long); +void pgd_ctor(void *, kmem_cache_t *, unsigned long); +void pgd_dtor(void *, kmem_cache_t *, unsigned long); +void pgtable_cache_init(void); +void paging_init(void); #endif /* !__ASSEMBLY__ */ @@ -41,20 +53,8 @@ extern unsigned long empty_zero_page[102 #ifndef __ASSEMBLY__ #ifdef CONFIG_X86_PAE # include - -/* - * Need to initialise the X86 PAE caches - */ -extern void pgtable_cache_init(void); - #else # include - -/* - * No page table caches to initialise - */ -#define pgtable_cache_init() do { } while (0) - #endif #endif _