[ppc64] Fix {pte,pmd}_free vs. hash_page race by relaying actual deallocation with RCU, from Ben Herrenschmidt --- arch/ppc64/mm/init.c | 46 ++++++++++++++++++++++++++++++++++++ include/asm-ppc64/pgalloc.h | 55 +++++++++++++++++++++++++++++++++++++++----- include/asm-ppc64/tlb.h | 4 +++ 3 files changed, 99 insertions(+), 6 deletions(-) diff -puN arch/ppc64/mm/init.c~ppc64-hash_page_race arch/ppc64/mm/init.c --- 25/arch/ppc64/mm/init.c~ppc64-hash_page_race 2004-01-13 23:23:05.000000000 -0800 +++ 25-akpm/arch/ppc64/mm/init.c 2004-01-13 23:23:05.000000000 -0800 @@ -94,6 +94,52 @@ unsigned long __max_memory; * include/asm-ppc64/tlb.h file -- tgall */ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); +unsigned long pte_freelist_forced_free; + +static void pte_free_smp_sync(void *arg) +{ + /* Do nothing, just ensure we sync with all CPUs */ +} + +/* This is only called when we are critically out of memory + * (and fail to get a page in pte_free_tlb). + */ +void pte_free_now(struct page *ptepage) +{ + pte_freelist_forced_free++; + + smp_call_function(pte_free_smp_sync, NULL, 0, 1); + + pte_free(ptepage); +} + +static void pte_free_rcu_callback(void *arg) +{ + struct pte_freelist_batch *batch = arg; + unsigned int i; + + for (i = 0; i < batch->index; i++) + pte_free(batch->pages[i]); + free_page((unsigned long)batch); +} + +void pte_free_submit(struct pte_freelist_batch *batch) +{ + INIT_RCU_HEAD(&batch->rcu); + call_rcu(&batch->rcu, pte_free_rcu_callback, batch); +} + +void pte_free_finish(void) +{ + /* This is safe as we are holding page_table_lock */ + struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur); + + if (*batchp == NULL) + return; + pte_free_submit(*batchp); + *batchp = NULL; +} void show_mem(void) { diff -puN include/asm-ppc64/pgalloc.h~ppc64-hash_page_race include/asm-ppc64/pgalloc.h --- 25/include/asm-ppc64/pgalloc.h~ppc64-hash_page_race 2004-01-13 23:23:05.000000000 -0800 +++ 25-akpm/include/asm-ppc64/pgalloc.h 2004-01-13 23:23:05.000000000 -0800 @@ -3,7 +3,10 @@ #include #include +#include +#include #include +#include extern kmem_cache_t *zero_cache; @@ -40,8 +43,6 @@ pmd_free(pmd_t *pmd) kmem_cache_free(zero_cache, pmd); } -#define __pmd_free_tlb(tlb, pmd) pmd_free(pmd) - #define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, pte) #define pmd_populate(mm, pmd, pte_page) \ pmd_populate_kernel(mm, pmd, page_address(pte_page)) @@ -62,15 +63,57 @@ pte_alloc_one(struct mm_struct *mm, unsi return NULL; } - -static inline void -pte_free_kernel(pte_t *pte) + +static inline void pte_free_kernel(pte_t *pte) { kmem_cache_free(zero_cache, pte); } #define pte_free(pte_page) pte_free_kernel(page_address(pte_page)) -#define __pte_free_tlb(tlb, pte) pte_free(pte) + +struct pte_freelist_batch +{ + struct rcu_head rcu; + unsigned int index; + struct page * pages[0]; +}; + +#define PTE_FREELIST_SIZE ((PAGE_SIZE - sizeof(struct pte_freelist_batch) / \ + sizeof(struct page *))) + +extern void pte_free_now(struct page *ptepage); +extern void pte_free_submit(struct pte_freelist_batch *batch); + +DECLARE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); + +static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage) +{ + /* This is safe as we are holding page_table_lock */ + cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id()); + struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur); + + if (atomic_read(&tlb->mm->mm_users) < 2 || + cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) { + pte_free(ptepage); + return; + } + + if (*batchp == NULL) { + *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC); + if (*batchp == NULL) { + pte_free_now(ptepage); + return; + } + (*batchp)->index = 0; + } + (*batchp)->pages[(*batchp)->index++] = ptepage; + if ((*batchp)->index == PTE_FREELIST_SIZE) { + pte_free_submit(*batchp); + *batchp = NULL; + } +} + +#define __pmd_free_tlb(tlb, pmd) __pte_free_tlb(tlb, virt_to_page(pmd)) #define check_pgt_cache() do { } while (0) diff -puN include/asm-ppc64/tlb.h~ppc64-hash_page_race include/asm-ppc64/tlb.h --- 25/include/asm-ppc64/tlb.h~ppc64-hash_page_race 2004-01-13 23:23:05.000000000 -0800 +++ 25-akpm/include/asm-ppc64/tlb.h 2004-01-13 23:23:05.000000000 -0800 @@ -74,6 +74,8 @@ static inline void __tlb_remove_tlb_entr batch->index = i; } +extern void pte_free_finish(void); + static inline void tlb_flush(struct mmu_gather *tlb) { int cpu = smp_processor_id(); @@ -86,6 +88,8 @@ static inline void tlb_flush(struct mmu_ flush_hash_range(tlb->mm->context, batch->index, local); batch->index = 0; + + pte_free_finish(); } #endif /* _PPC64_TLB_H */ _