From: Alexander Nyberg Introduces CONFIG_PAGE_OWNER that keeps track of the call chain under which a page was allocated. Includes a user-space helper in Documentation/page_owner.c to sort the enormous amount of output that this may give (thanks tridge). Information available through /proc/page_owner x86_64 introduces some stack noise in certain call chains so for exact output use of x86 && CONFIG_FRAME_POINTER is suggested. Tested on x86, x86 && CONFIG_FRAME_POINTER, x86_64 Output looks like: 4819 times: Page allocated via order 0, mask 0x50 [0xc012b7b9] find_lock_page+25 [0xc012b8c8] find_or_create_page+152 [0xc0147d74] grow_dev_page+36 [0xc0148164] __find_get_block+84 [0xc0147ebc] __getblk_slow+124 [0xc0148164] __find_get_block+84 [0xc01481e7] __getblk+55 [0xc0185d14] do_readahead+100 We use a custom stack unwinder because using __builtin_return_address([0-7]) causes gcc to generate code that might try to unwind the stack looking for function return addresses and "fall off" causing early panics if the call chain is not deep enough. So in that case we could have had a depth of around 3 functions in all traces (I experimented a bit with this). From: Dave Hansen make page_owner handle non-contiguous page ranges Signed-off-by: Alexander Nyberg Signed-off-by: Dave Hansen Signed-off-by: Andrew Morton --- Documentation/page_owner.c | 140 +++++++++++++++++++++++++++++++++++++++++++++ fs/proc/proc_misc.c | 68 +++++++++++++++++++++ include/linux/mm.h | 5 + lib/Kconfig.debug | 10 +++ mm/page_alloc.c | 57 ++++++++++++++++++ 5 files changed, 280 insertions(+) diff -puN /dev/null Documentation/page_owner.c --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ devel-akpm/Documentation/page_owner.c 2005-07-08 23:11:40.000000000 -0700 @@ -0,0 +1,140 @@ +/* + * User-space helper to sort the output of /proc/page_owner + * + * Example use: + * cat /proc/page_owner > page_owner.txt + * ./sort page_owner.txt sorted_page_owner.txt +*/ + +#include +#include +#include +#include +#include +#include +#include + +struct block_list { + char *txt; + int len; + int num; +}; + + +static struct block_list *list; +static int list_size; +static int max_size; + +struct block_list *block_head; + +int read_block(char *buf, FILE *fin) +{ + int ret = 0; + int hit = 0; + char *curr = buf; + + for (;;) { + *curr = getc(fin); + if (*curr == EOF) return -1; + + ret++; + if (*curr == '\n' && hit == 1) + return ret - 1; + else if (*curr == '\n') + hit = 1; + else + hit = 0; + curr++; + } +} + +static int compare_txt(struct block_list *l1, struct block_list *l2) +{ + return strcmp(l1->txt, l2->txt); +} + +static int compare_num(struct block_list *l1, struct block_list *l2) +{ + return l2->num - l1->num; +} + +static void add_list(char *buf, int len) +{ + if (list_size != 0 && + len == list[list_size-1].len && + memcmp(buf, list[list_size-1].txt, len) == 0) { + list[list_size-1].num++; + return; + } + if (list_size == max_size) { + printf("max_size too small??\n"); + exit(1); + } + list[list_size].txt = malloc(len+1); + list[list_size].len = len; + list[list_size].num = 1; + memcpy(list[list_size].txt, buf, len); + list[list_size].txt[len] = 0; + list_size++; + if (list_size % 1000 == 0) { + printf("loaded %d\r", list_size); + fflush(stdout); + } +} + +int main(int argc, char **argv) +{ + FILE *fin, *fout; + char buf[1024]; + int ret, i, count; + struct block_list *list2; + struct stat st; + + fin = fopen(argv[1], "r"); + fout = fopen(argv[2], "w"); + if (!fin || !fout) { + printf("Usage: ./program \n"); + perror("open: "); + exit(2); + } + + fstat(fileno(fin), &st); + max_size = st.st_size / 100; /* hack ... */ + + list = malloc(max_size * sizeof(*list)); + + for(;;) { + ret = read_block(buf, fin); + if (ret < 0) + break; + + buf[ret] = '\0'; + add_list(buf, ret); + } + + printf("loaded %d\n", list_size); + + printf("sorting ....\n"); + + qsort(list, list_size, sizeof(list[0]), compare_txt); + + list2 = malloc(sizeof(*list) * list_size); + + printf("culling\n"); + + for (i=count=0;i +#include +static ssize_t +read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos) +{ + unsigned long start_pfn = min_low_pfn; + static unsigned long pfn; + struct page *page; + char *kbuf, *modname; + const char *symname; + int ret = 0, next_idx = 1; + char namebuf[128]; + unsigned long offset = 0, symsize; + int i; + + pfn = start_pfn + *ppos; + page = pfn_to_page(pfn); + for (; pfn < max_pfn; pfn++) { + if (!pfn_valid(pfn)) + continue; + page = pfn_to_page(pfn); + if (page->order >= 0) + break; + next_idx++; + } + + if (!pfn_valid(pfn)) + return 0; + + *ppos += next_idx; + + kbuf = kmalloc(count, GFP_KERNEL); + if (!kbuf) + return -ENOMEM; + + ret = snprintf(kbuf, 1024, "Page allocated via order %d, mask 0x%x\n", + page->order, page->gfp_mask); + + for (i = 0; i < 8; i++) { + if (!page->trace[i]) + break; + symname = kallsyms_lookup(page->trace[i], &symsize, &offset, &modname, namebuf); + ret += snprintf(kbuf + ret, count - ret, "[0x%lx] %s+%lu\n", + page->trace[i], namebuf, offset); + } + + ret += snprintf(kbuf + ret, count -ret, "\n"); + + if (copy_to_user(buf, kbuf, ret)) + ret = -EFAULT; + + kfree(kbuf); + return ret; +} + +static struct file_operations proc_page_owner_operations = { + .read = read_page_owner, +}; +#endif + struct proc_dir_entry *proc_root_kcore; void create_seq_entry(char *name, mode_t mode, struct file_operations *f) @@ -637,4 +698,11 @@ void __init proc_misc_init(void) entry->proc_fops = &ppc_htab_operations; } #endif +#ifdef CONFIG_PAGE_OWNER + entry = create_proc_entry("page_owner", S_IWUSR | S_IRUGO, NULL); + if (entry) { + entry->proc_fops = &proc_page_owner_operations; + entry->size = 1024; + } +#endif } diff -puN include/linux/mm.h~page-owner-tracking-leak-detector include/linux/mm.h --- devel/include/linux/mm.h~page-owner-tracking-leak-detector 2005-07-08 23:11:40.000000000 -0700 +++ devel-akpm/include/linux/mm.h 2005-07-08 23:11:40.000000000 -0700 @@ -257,6 +257,11 @@ struct page { void *virtual; /* Kernel virtual address (NULL if not kmapped, ie. highmem) */ #endif /* WANT_PAGE_VIRTUAL */ +#ifdef CONFIG_PAGE_OWNER + int order; + unsigned int gfp_mask; + unsigned long trace[8]; +#endif }; /* diff -puN lib/Kconfig.debug~page-owner-tracking-leak-detector lib/Kconfig.debug --- devel/lib/Kconfig.debug~page-owner-tracking-leak-detector 2005-07-08 23:11:40.000000000 -0700 +++ devel-akpm/lib/Kconfig.debug 2005-07-08 23:11:40.000000000 -0700 @@ -158,6 +158,16 @@ config DEBUG_IOREMAP automatically, but we'd like to make it more efficient by not having to do that. +config PAGE_OWNER + bool "Track page owner" + depends on DEBUG_KERNEL && X86 + help + This keeps track of what call chain is the owner of a page, may + help to find bare alloc_page(s) leaks. Eats a fair amount of memory. + See Documentation/page_owner.c for user-space helper. + + If unsure, say N. + config DEBUG_FS bool "Debug Filesystem" depends on DEBUG_KERNEL diff -puN mm/page_alloc.c~page-owner-tracking-leak-detector mm/page_alloc.c --- devel/mm/page_alloc.c~page-owner-tracking-leak-detector 2005-07-08 23:11:40.000000000 -0700 +++ devel-akpm/mm/page_alloc.c 2005-07-08 23:11:40.000000000 -0700 @@ -769,6 +769,43 @@ should_reclaim_zone(struct zone *z, unsi return 1; } +#ifdef CONFIG_PAGE_OWNER +static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) +{ + return p > (void *)tinfo && + p < (void *)tinfo + THREAD_SIZE - 3; +} + +static inline void __stack_trace(struct page *page, unsigned long *stack, unsigned long bp) +{ + int i = 0; + unsigned long addr; + struct thread_info *tinfo = (struct thread_info *) + ((unsigned long)stack & (~(THREAD_SIZE - 1))); + + memset(page->trace, 0, sizeof(long) * 8); + +#ifdef CONFIG_FRAME_POINTER + while (valid_stack_ptr(tinfo, (void *)bp)) { + addr = *(unsigned long *)(bp + sizeof(long)); + page->trace[i] = addr; + if (++i >= 8) + break; + bp = *(unsigned long *)bp; + } +#else + while (valid_stack_ptr(tinfo, stack)) { + addr = *stack++; + if (__kernel_text_address(addr)) { + page->trace[i] = addr; + if (++i >= 8) + break; + } + } +#endif +} +#endif /* CONFIG_PAGE_OWNER */ + /* * This is the 'heart' of the zoned buddy allocator. */ @@ -963,6 +1000,20 @@ nopage: } return NULL; got_pg: + +#ifdef CONFIG_PAGE_OWNER /* huga... */ + { + unsigned long address, bp; +#ifdef X86_64 + asm ("movq %%rbp, %0" : "=r" (bp) : ); +#else + asm ("movl %%ebp, %0" : "=r" (bp) : ); +#endif + page->order = (int) order; + page->gfp_mask = gfp_mask; + __stack_trace(page, &address, bp); + } +#endif /* CONFIG_PAGE_OWNER */ zone_statistics(zonelist, z); return page; } @@ -1016,6 +1067,9 @@ fastcall void __free_pages(struct page * free_hot_page(page); else __free_pages_ok(page, order); +#ifdef CONFIG_PAGE_OWNER + page->order = -1; +#endif } } @@ -1663,6 +1717,9 @@ void __init memmap_init_zone(unsigned lo if (!is_highmem_idx(zone)) set_page_address(page, __va(pfn << PAGE_SHIFT)); #endif +#ifdef CONFIG_PAGE_OWNER + page->order = -1; +#endif } } _