From: Manfred Spraul Manfred's latest page unmapping debug patch. arch/i386/Kconfig | 8 ++ arch/i386/kernel/cpu/common.c | 8 ++ arch/i386/mm/fault.c | 3 arch/i386/mm/pageattr.c | 80 ++++++++++++----------- include/asm-i386/cacheflush.h | 7 ++ include/linux/slab.h | 2 mm/page_alloc.c | 15 +++- mm/slab.c | 145 +++++++++++++++++++++++++++++++++++++++++- 8 files changed, 225 insertions(+), 43 deletions(-) diff -puN arch/i386/Kconfig~unmap-page-debugging-2 arch/i386/Kconfig --- 25/arch/i386/Kconfig~unmap-page-debugging-2 2003-06-06 20:49:28.000000000 -0700 +++ 25-akpm/arch/i386/Kconfig 2003-06-06 20:49:28.000000000 -0700 @@ -1559,6 +1559,14 @@ config SPINLINE itself (as ".text.lock.filename"). This can be helpful for finding the callers of locks. +config DEBUG_PAGEALLOC + bool "Page alloc debugging" + depends on DEBUG_SLAB + help + Unmap pages from the kernel linear mapping after free_pages(). + This results in a large slowdown, but helps to find certain types + of memory corruptions. + config DEBUG_HIGHMEM bool "Highmem debugging" depends on DEBUG_KERNEL && HIGHMEM diff -puN arch/i386/kernel/cpu/common.c~unmap-page-debugging-2 arch/i386/kernel/cpu/common.c --- 25/arch/i386/kernel/cpu/common.c~unmap-page-debugging-2 2003-06-06 20:49:28.000000000 -0700 +++ 25-akpm/arch/i386/kernel/cpu/common.c 2003-06-06 20:49:28.000000000 -0700 @@ -430,6 +430,14 @@ void __init early_cpu_init(void) rise_init_cpu(); nexgen_init_cpu(); umc_init_cpu(); + +#ifdef CONFIG_DEBUG_PAGEALLOC + /* pse is not compatible with on-the-fly unmapping, + * disable it even if the cpus claim to support it. + */ + clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); + disable_pse = 1; +#endif } /* * cpu_init() initializes state that is per-CPU. Some data is already diff -puN arch/i386/mm/fault.c~unmap-page-debugging-2 arch/i386/mm/fault.c --- 25/arch/i386/mm/fault.c~unmap-page-debugging-2 2003-06-06 20:49:28.000000000 -0700 +++ 25-akpm/arch/i386/mm/fault.c 2003-06-06 20:49:28.000000000 -0700 @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -270,7 +271,9 @@ no_context: } #endif die("Oops", regs, error_code); + ptrinfo(address); bust_spinlocks(0); +for(;;); do_exit(SIGKILL); /* diff -puN arch/i386/mm/pageattr.c~unmap-page-debugging-2 arch/i386/mm/pageattr.c --- 25/arch/i386/mm/pageattr.c~unmap-page-debugging-2 2003-06-06 20:49:28.000000000 -0700 +++ 25-akpm/arch/i386/mm/pageattr.c 2003-06-06 20:49:28.000000000 -0700 @@ -13,6 +13,10 @@ #include #include +static spinlock_t cpa_lock = SPIN_LOCK_UNLOCKED; +static struct list_head df_list = LIST_HEAD_INIT(df_list); + + static inline pte_t *lookup_address(unsigned long address) { pgd_t *pgd = pgd_offset_k(address); @@ -31,10 +35,15 @@ static struct page *split_large_page(uns { int i; unsigned long addr; - struct page *base = alloc_pages(GFP_KERNEL, 0); + struct page *base; pte_t *pbase; + + spin_unlock_irq(&cpa_lock); + base = alloc_pages(GFP_KERNEL, 0); + spin_lock_irq(&cpa_lock); if (!base) return NULL; + address = __pa(address); addr = address & LARGE_PAGE_MASK; pbase = (pte_t *)page_address(base); @@ -90,7 +99,7 @@ static inline void revert_page(struct pa } static int -__change_page_attr(struct page *page, pgprot_t prot, struct page **oldpage) +__change_page_attr(struct page *page, pgprot_t prot) { pte_t *kpte; unsigned long address; @@ -126,7 +135,7 @@ __change_page_attr(struct page *page, pg } if (cpu_has_pse && (atomic_read(&kpte_page->count) == 1)) { - *oldpage = kpte_page; + list_add(&kpte_page->list, &df_list); revert_page(kpte_page, address); } return 0; @@ -137,12 +146,6 @@ static inline void flush_map(void) on_each_cpu(flush_kernel_map, NULL, 1, 1); } -struct deferred_page { - struct deferred_page *next; - struct page *fpage; -}; -static struct deferred_page *df_list; /* protected by init_mm.mmap_sem */ - /* * Change the page attributes of an page in the linear mapping. * @@ -159,47 +162,52 @@ static struct deferred_page *df_list; /* int change_page_attr(struct page *page, int numpages, pgprot_t prot) { int err = 0; - struct page *fpage; int i; + unsigned long flags; - down_write(&init_mm.mmap_sem); + spin_lock_irqsave(&cpa_lock, flags); for (i = 0; i < numpages; i++, page++) { - fpage = NULL; - err = __change_page_attr(page, prot, &fpage); + err = __change_page_attr(page, prot); if (err) break; - if (fpage) { - struct deferred_page *df; - df = kmalloc(sizeof(struct deferred_page), GFP_KERNEL); - if (!df) { - flush_map(); - __free_page(fpage); - } else { - df->next = df_list; - df->fpage = fpage; - df_list = df; - } - } } - up_write(&init_mm.mmap_sem); + spin_unlock_irqrestore(&cpa_lock, flags); return err; } void global_flush_tlb(void) { - struct deferred_page *df, *next_df; + LIST_HEAD(l); + struct list_head* n; - down_read(&init_mm.mmap_sem); - df = xchg(&df_list, NULL); - up_read(&init_mm.mmap_sem); + BUG_ON(irqs_disabled()); + + spin_lock_irq(&cpa_lock); + list_splice_init(&df_list, &l); + spin_unlock_irq(&cpa_lock); flush_map(); - for (; df; df = next_df) { - next_df = df->next; - if (df->fpage) - __free_page(df->fpage); - kfree(df); - } + n = l.next; + while (n != &l) { + struct page *pg = list_entry(n, struct page, list); + n = n->next; + __free_page(pg); + } } +#ifdef CONFIG_DEBUG_PAGEALLOC +void kernel_map_pages(struct page *page, int numpages, int enable) +{ + /* the return value is ignored - the calls cannot fail, + * large pages are disabled at boot time. + */ + change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0)); + /* we should perform an IPI and flush all tlbs, + * but that can deadlock->flush only current cpu. + */ + __flush_tlb_all(); +} +EXPORT_SYMBOL(kernel_map_pages); +#endif + EXPORT_SYMBOL(change_page_attr); EXPORT_SYMBOL(global_flush_tlb); diff -puN include/asm-i386/cacheflush.h~unmap-page-debugging-2 include/asm-i386/cacheflush.h --- 25/include/asm-i386/cacheflush.h~unmap-page-debugging-2 2003-06-06 20:49:28.000000000 -0700 +++ 25-akpm/include/asm-i386/cacheflush.h 2003-06-06 20:49:28.000000000 -0700 @@ -17,4 +17,11 @@ void global_flush_tlb(void); int change_page_attr(struct page *page, int numpages, pgprot_t prot); +#ifdef CONFIG_DEBUG_PAGEALLOC +/* internal debugging function */ +void kernel_map_pages(struct page *page, int numpages, int enable); +#else +static inline void kernel_map_pages(struct page *page, int numpages, int enable) { } +#endif + #endif /* _I386_CACHEFLUSH_H */ diff -puN include/linux/slab.h~unmap-page-debugging-2 include/linux/slab.h --- 25/include/linux/slab.h~unmap-page-debugging-2 2003-06-06 20:49:28.000000000 -0700 +++ 25-akpm/include/linux/slab.h 2003-06-06 20:49:28.000000000 -0700 @@ -80,6 +80,8 @@ extern kmem_cache_t *signal_cachep; extern kmem_cache_t *sighand_cachep; extern kmem_cache_t *bio_cachep; +void ptrinfo(unsigned long addr); + #endif /* __KERNEL__ */ #endif /* _LINUX_SLAB_H */ diff -puN mm/page_alloc.c~unmap-page-debugging-2 mm/page_alloc.c --- 25/mm/page_alloc.c~unmap-page-debugging-2 2003-06-06 20:49:28.000000000 -0700 +++ 25-akpm/mm/page_alloc.c 2003-06-06 20:49:28.000000000 -0700 @@ -32,6 +32,8 @@ #include #include +#include + DECLARE_BITMAP(node_online_map, MAX_NUMNODES); DECLARE_BITMAP(memblk_online_map, MAX_NR_MEMBLKS); struct pglist_data *pgdat_list; @@ -265,6 +267,7 @@ void __free_pages_ok(struct page *page, mod_page_state(pgfree, 1 << order); free_pages_check(__FUNCTION__, page); list_add(&page->list, &list); + kernel_map_pages(page, 1<pageset[get_cpu()].pcp[cold]; @@ -556,7 +560,7 @@ __alloc_pages(unsigned int gfp_mask, uns (!wait && z->free_pages >= z->pages_high)) { page = buffered_rmqueue(z, order, cold); if (page) - return page; + goto got_pg; } min += z->pages_low * sysctl_lower_zone_protection; } @@ -579,7 +583,7 @@ __alloc_pages(unsigned int gfp_mask, uns (!wait && z->free_pages >= z->pages_high)) { page = buffered_rmqueue(z, order, cold); if (page) - return page; + goto got_pg; } min += local_min * sysctl_lower_zone_protection; } @@ -594,7 +598,7 @@ rebalance: page = buffered_rmqueue(z, order, cold); if (page) - return page; + goto got_pg; } goto nopage; } @@ -622,7 +626,7 @@ rebalance: (!wait && z->free_pages >= z->pages_high)) { page = buffered_rmqueue(z, order, cold); if (page) - return page; + goto got_pg; } min += z->pages_low * sysctl_lower_zone_protection; } @@ -653,6 +657,9 @@ nopage: current->comm, order, gfp_mask); } return NULL; +got_pg: + kernel_map_pages(page, 1 << order, 1); + return page; } /* diff -puN mm/slab.c~unmap-page-debugging-2 mm/slab.c --- 25/mm/slab.c~unmap-page-debugging-2 2003-06-06 20:49:28.000000000 -0700 +++ 25-akpm/mm/slab.c 2003-06-06 20:50:02.000000000 -0700 @@ -89,7 +89,11 @@ #include #include #include +#include + #include +#include +#include /* * DEBUG - 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL, @@ -773,6 +777,44 @@ static inline void kmem_freepages (kmem_ } #if DEBUG + +#ifdef CONFIG_DEBUG_PAGEALLOC +static void store_stackinfo(kmem_cache_t *cachep, unsigned long *addr, unsigned long caller) +{ + int size = cachep->objsize; + if (cachep->flags & SLAB_RED_ZONE) { + addr++; + size -= 2*BYTES_PER_WORD; + } + if (cachep->flags & SLAB_STORE_USER) { + size -= BYTES_PER_WORD; + } + if (size < 5*sizeof(unsigned long)) + return; + + *addr++=0x12345678; + *addr++=caller; + *addr++=smp_processor_id(); + size -= 3*sizeof(unsigned long); + { + unsigned long *sptr = &caller; + unsigned long svalue; + + while (((long) sptr & (THREAD_SIZE-1)) != 0) { + svalue = *sptr++; + if (kernel_text_address(svalue)) { + *addr++=svalue; + size -= sizeof(unsigned long); + if (size <= sizeof(unsigned long)) + break; + } + } + + } + *addr++=0x87654321; +} +#endif + static void poison_obj(kmem_cache_t *cachep, void *addr, unsigned char val) { int size = cachep->objsize; @@ -787,6 +829,8 @@ static void poison_obj(kmem_cache_t *cac *(unsigned char *)(addr+size-1) = POISON_END; } +#ifndef CONFIG_DEBUG_PAGEALLOC + static void *scan_poisoned_obj(unsigned char* addr, unsigned int size) { unsigned char *end; @@ -853,6 +897,7 @@ static void check_poison_obj(kmem_cache_ } } #endif +#endif /* Destroy all the objs in a slab, and release the mem back to the system. * Before calling the slab must have been unlinked from the cache. @@ -866,8 +911,14 @@ static void slab_destroy (kmem_cache_t * void *objp = slabp->s_mem + cachep->objsize * i; int objlen = cachep->objsize; - if (cachep->flags & SLAB_POISON) + if (cachep->flags & SLAB_POISON) { +#ifdef CONFIG_DEBUG_PAGEALLOC + if ((cachep->objsize%PAGE_SIZE)==0 && OFF_SLAB(cachep)) + kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE,1); +#else check_poison_obj(cachep, objp); +#endif + } if (cachep->flags & SLAB_STORE_USER) objlen -= BYTES_PER_WORD; @@ -960,6 +1011,10 @@ kmem_cache_create (const char *name, siz } #if FORCED_DEBUG +#ifdef CONFIG_DEBUG_PAGEALLOC + if (size < PAGE_SIZE-3*BYTES_PER_WORD && size > 128) + size = PAGE_SIZE-3*BYTES_PER_WORD; +#endif /* * Enable redzoning and last user accounting, except * - for caches with forced alignment: redzoning would violate the @@ -1411,6 +1466,8 @@ static void cache_init_objs (kmem_cache_ slab_error(cachep, "constructor overwrote the" " start of an object"); } + if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep) && cachep->flags & SLAB_POISON) + kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE, 0); #else if (cachep->ctor) cachep->ctor(objp, cachep, ctor_flags); @@ -1607,9 +1664,16 @@ static inline void *cache_free_debugchec else cachep->dtor(objp, cachep, 0); } - if (cachep->flags & SLAB_POISON) + if (cachep->flags & SLAB_POISON) { +#ifdef CONFIG_DEBUG_PAGEALLOC + store_stackinfo(cachep, objp, POISON_AFTER); + if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) + kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE, 0); +#else poison_obj(cachep, objp, POISON_AFTER); #endif + } +#endif return objp; } @@ -1624,6 +1688,7 @@ static inline void check_slabp(kmem_cach for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) { entries++; BUG_ON(entries > cachep->num); + BUG_ON(i < 0 || i >= cachep->num); } BUG_ON(entries != cachep->num - slabp->inuse); #endif @@ -1753,8 +1818,15 @@ cache_alloc_debugcheck_after(kmem_cache_ if (!objp) return objp; - if (cachep->flags & SLAB_POISON) + if (cachep->flags & SLAB_POISON) { +#ifdef CONFIG_DEBUG_PAGEALLOC + if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) + kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE, 1); + poison_obj(cachep, objp, POISON_AFTER); +#else check_poison_obj(cachep, objp); +#endif + } if (cachep->flags & SLAB_STORE_USER) { objlen -= BYTES_PER_WORD; *((void **)(objp+objlen)) = caller; @@ -2631,3 +2703,70 @@ unsigned int ksize(const void *objp) return size; } +void ptrinfo(unsigned long addr) +{ + struct page *page; + + printk("Dumping data about address %p.\n", (void*)addr); + if (!virt_addr_valid((void*)addr)) { + printk("virt addr invalid.\n"); + return; + } + do { + pgd_t *pgd = pgd_offset_k(addr); + pmd_t *pmd; + if (pgd_none(*pgd)) { + printk("No pgd.\n"); + break; + } + pmd = pmd_offset(pgd, addr); + if (pmd_none(*pmd)) { + printk("No pmd.\n"); + break; + } +#ifdef CONFIG_X86 + if (pmd_large(*pmd)) { + printk("Large page.\n"); + break; + } +#endif + printk("normal page, pte_val 0x%llx\n", + (unsigned long long)pte_val(*pte_offset_kernel(pmd, addr))); + } while(0); + + page = virt_to_page((void*)addr); + printk("struct page at %p, flags %lxh.\n", page, page->flags); + if (PageSlab(page)) { + kmem_cache_t *c; + struct slab *s; + unsigned long flags; + int objnr; + void *objp; + + c = GET_PAGE_CACHE(page); + printk("belongs to cache %s.\n",c->name); + + spin_lock_irqsave(&c->spinlock, flags); + s = GET_PAGE_SLAB(page); + printk("slabp %p with %d inuse objects (from %d).\n", + s, s->inuse, c->num); + check_slabp(c,s); + + objnr = (addr-(unsigned long)s->s_mem)/c->objsize; + objp = s->s_mem+c->objsize*objnr; + printk("points into object no %d, starting at %p, len %d.\n", + objnr, objp, c->objsize); + if (objnr >= c->num) { + printk("Bad obj number.\n"); + } else { + kernel_map_pages(virt_to_page(objp), c->objsize/PAGE_SIZE, 1); + + printk("redzone: %lxh/%lxh/%lxh.\n", + ((unsigned long*)objp)[0], + ((unsigned long*)(objp+c->objsize))[-2], + ((unsigned long*)(objp+c->objsize))[-1]); + } + spin_unlock_irqrestore(&c->spinlock, flags); + + } +} _