per-vma rss and anon statistics, bitches Signed-Off-By: Robert "Black Ninja" Love fs/proc/rss | 9 +++++++++ fs/proc/task_mmu.c | 6 ++++-- include/linux/mm.h | 23 +++++++++++++++++++++++ mm/fremap.c | 4 ++-- mm/memory.c | 12 +++++++----- mm/mmap.c | 26 +++++++++++++++++++++++++- mm/rmap.c | 7 +++++-- mm/swapfile.c | 2 +- 8 files changed, 76 insertions(+), 13 deletions(-) diff -urN linux-2.6.8-20041108112901/fs/proc/task_mmu.c linux-rss/fs/proc/task_mmu.c --- linux-2.6.8-20041108112901/fs/proc/task_mmu.c 2004-11-08 13:13:05.000000000 -0500 +++ linux-rss/fs/proc/task_mmu.c 2005-02-25 11:43:37.335859152 -0500 @@ -57,7 +57,7 @@ ino = inode->i_ino; } - seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", + seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %08lx %08lx %n", map->vm_start, map->vm_end, flags & VM_READ ? 'r' : '-', @@ -65,7 +65,9 @@ flags & VM_EXEC ? 'x' : '-', flags & VM_MAYSHARE ? 's' : 'p', map->vm_pgoff << PAGE_SHIFT, - MAJOR(dev), MINOR(dev), ino, &len); + MAJOR(dev), MINOR(dev), ino, + map->rss << (PAGE_SHIFT-10), + map->anon_rss << (PAGE_SHIFT-10), &len); if (map->vm_file) { len = 25 + sizeof(void*) * 6 - len; diff -urN linux-2.6.8-20041108112901/include/linux/mm.h linux-rss/include/linux/mm.h --- linux-2.6.8-20041108112901/include/linux/mm.h 2004-11-08 13:13:16.000000000 -0500 +++ linux-rss/include/linux/mm.h 2005-02-25 11:45:33.159251320 -0500 @@ -68,6 +68,9 @@ pgprot_t vm_page_prot; /* Access permissions of this VMA. */ unsigned long vm_flags; /* Flags, listed below. */ + unsigned long rss; /* RSS pages */ + unsigned long anon_rss; /* anonymous (COW) pages */ + struct rb_node vm_rb; /* @@ -797,6 +800,26 @@ -vma_pages(vma)); } +/* + * rss_inc - increment the number of RSS pages globally for this address space + * and specifically for this VMA. + */ +static inline void rss_inc(struct mm_struct *mm, struct vm_area_struct *vma) +{ + mm->rss++; + vma->rss++; +} + +/* + * rss_dec - decrement the number of RSS pages globally for this address space + * and specifically for this VMA. + */ +static inline void rss_dec(struct mm_struct *mm, struct vm_area_struct *vma) +{ + mm->rss--; + vma->rss--; +} + #ifndef CONFIG_DEBUG_PAGEALLOC static inline void kernel_map_pages(struct page *page, int numpages, int enable) diff -urN linux-2.6.8-20041108112901/mm/fremap.c linux-rss/mm/fremap.c --- linux-2.6.8-20041108112901/mm/fremap.c 2004-11-08 13:13:06.000000000 -0500 +++ linux-rss/mm/fremap.c 2005-02-24 16:25:08.960738336 -0500 @@ -38,7 +38,7 @@ set_page_dirty(page); page_remove_rmap(page); page_cache_release(page); - mm->rss--; + rss_dec(mm, vma); } } } else { @@ -86,7 +86,7 @@ zap_pte(mm, vma, addr, pte); - mm->rss++; + rss_inc(mm, vma); flush_icache_page(vma, page); set_pte(pte, mk_pte(page, prot)); page_add_file_rmap(page); diff -urN linux-2.6.8-20041108112901/mm/memory.c linux-rss/mm/memory.c --- linux-2.6.8-20041108112901/mm/memory.c 2004-11-08 13:13:08.000000000 -0500 +++ linux-rss/mm/memory.c 2005-02-25 11:45:37.159643168 -0500 @@ -236,7 +236,7 @@ pmd_t * src_pmd, * dst_pmd; src_pgd++; dst_pgd++; - + /* copy_pmd_range */ if (pgd_none(*src_pgd)) @@ -1095,8 +1095,10 @@ spin_lock(&mm->page_table_lock); page_table = pte_offset_map(pmd, address); if (likely(pte_same(*page_table, pte))) { + if (PageAnon(old_page)) + vma->anon_rss--; if (PageReserved(old_page)) - ++mm->rss; + rss_inc(mm, vma); else page_remove_rmap(old_page); break_cow(vma, new_page, address, page_table); @@ -1378,7 +1380,7 @@ if (vm_swap_full()) remove_exclusive_swap_page(page); - mm->rss++; + rss_inc(mm, vma); pte = mk_pte(page, vma->vm_page_prot); if (write_access && can_share_swap_page(page)) { pte = maybe_mkwrite(pte_mkdirty(pte), vma); @@ -1443,7 +1445,7 @@ spin_unlock(&mm->page_table_lock); goto out; } - mm->rss++; + rss_inc(mm, vma); entry = maybe_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)), vma); @@ -1552,7 +1554,7 @@ /* Only go through if we didn't race with anybody else... */ if (pte_none(*page_table)) { if (!PageReserved(new_page)) - ++mm->rss; + rss_inc(mm, vma); flush_icache_page(vma, new_page); entry = mk_pte(new_page, vma->vm_page_prot); if (write_access) diff -urN linux-2.6.8-20041108112901/mm/mmap.c linux-rss/mm/mmap.c --- linux-2.6.8-20041108112901/mm/mmap.c 2004-11-08 13:13:16.000000000 -0500 +++ linux-rss/mm/mmap.c 2005-02-25 11:41:52.403811248 -0500 @@ -1618,13 +1618,14 @@ /* * Split a vma into two pieces at address 'addr', a new vma is allocated - * either for the first part or the the tail. + * either for the first part or the tail. */ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long addr, int new_below) { struct mempolicy *pol; struct vm_area_struct *new; + unsigned long rss, anon; if (mm->map_count >= sysctl_max_map_count) return -ENOMEM; @@ -1643,6 +1644,29 @@ new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT); } + /* + * Hack alert. In splitting the VMA, we have two options wrt per-VMA + * RSS accounting. We can walk the page tables and reallocate the + * RSS statistics back to each VMA on a per-page basis, or we can fudge + * the stats, like below, and loss some per-VMA accuracy in the split + * case but keep the aggregate number correct. + */ + rss = vma->rss; + if (rss) { + vma->rss = new->rss = rss / 2; + if (rss % 2 == 1) + vma->rss++; + } + + /* Same deal for per-VMA Anon statistics */ + anon = vma->anon_rss; + if (anon) { + vma->anon_rss = new->anon_rss = anon / 2; + if (anon % 2 == 1) + vma->anon_rss++; + } + + pol = mpol_copy(vma_policy(vma)); if (IS_ERR(pol)) { kmem_cache_free(vm_area_cachep, new); diff -urN linux-2.6.8-20041108112901/mm/rmap.c linux-rss/mm/rmap.c --- linux-2.6.8-20041108112901/mm/rmap.c 2004-11-08 13:13:15.000000000 -0500 +++ linux-rss/mm/rmap.c 2005-02-25 11:32:50.315221240 -0500 @@ -434,6 +434,8 @@ BUG_ON(PageReserved(page)); BUG_ON(!anon_vma); + vma->anon_rss++; + anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; index = (address - vma->vm_start) >> PAGE_SHIFT; index += vma->vm_pgoff; @@ -581,9 +583,10 @@ swap_duplicate(entry); set_pte(pte, swp_entry_to_pte(entry)); BUG_ON(pte_file(*pte)); + vma->anon_rss++; } - mm->rss--; + rss_dec(mm, vma); page_remove_rmap(page); page_cache_release(page); @@ -683,7 +686,7 @@ page_remove_rmap(page); page_cache_release(page); - mm->rss--; + rss_dec(mm, vma); (*mapcount)--; } diff -urN linux-2.6.8-20041108112901/mm/swapfile.c linux-rss/mm/swapfile.c --- linux-2.6.8-20041108112901/mm/swapfile.c 2004-11-08 13:13:15.000000000 -0500 +++ linux-rss/mm/swapfile.c 2005-02-24 14:56:15.005622768 -0500 @@ -434,7 +434,7 @@ unuse_pte(struct vm_area_struct *vma, unsigned long address, pte_t *dir, swp_entry_t entry, struct page *page) { - vma->vm_mm->rss++; + rss_inc(vma->vm_mm, vma); get_page(page); set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot))); page_add_anon_rmap(page, vma, address);