per-vma rss accounting, bitches Signed-Off-By: Robert "Sly Fox" Love fs/proc/task_mmu.c | 5 +++-- include/linux/mm.h | 22 ++++++++++++++++++++++ mm/fremap.c | 4 ++-- mm/memory.c | 11 ++++++----- mm/mmap.c | 17 ++++++++++++++++- mm/rmap.c | 4 ++-- mm/swapfile.c | 2 +- 7 files changed, 52 insertions(+), 13 deletions(-) diff -urN linux-2.6.8-20041108112901/fs/proc/task_mmu.c linux-rss/fs/proc/task_mmu.c --- linux-2.6.8-20041108112901/fs/proc/task_mmu.c 2004-11-08 13:13:05.000000000 -0500 +++ linux-rss/fs/proc/task_mmu.c 2005-02-24 16:23:16.650812040 -0500 @@ -57,7 +57,7 @@ ino = inode->i_ino; } - seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", + seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %0lx %n", map->vm_start, map->vm_end, flags & VM_READ ? 'r' : '-', @@ -65,7 +65,8 @@ flags & VM_EXEC ? 'x' : '-', flags & VM_MAYSHARE ? 's' : 'p', map->vm_pgoff << PAGE_SHIFT, - MAJOR(dev), MINOR(dev), ino, &len); + MAJOR(dev), MINOR(dev), ino, + map->rss << (PAGE_SHIFT-10), &len); if (map->vm_file) { len = 25 + sizeof(void*) * 6 - len; diff -urN linux-2.6.8-20041108112901/include/linux/mm.h linux-rss/include/linux/mm.h --- linux-2.6.8-20041108112901/include/linux/mm.h 2004-11-08 13:13:16.000000000 -0500 +++ linux-rss/include/linux/mm.h 2005-02-24 15:12:01.485735920 -0500 @@ -68,6 +68,8 @@ pgprot_t vm_page_prot; /* Access permissions of this VMA. */ unsigned long vm_flags; /* Flags, listed below. */ + unsigned long rss; /* RSS pages */ + struct rb_node vm_rb; /* @@ -797,6 +799,26 @@ -vma_pages(vma)); } +/* + * rss_inc - increment the number of RSS pages globally for this address space + * and specifically for this VMA. + */ +static inline void rss_inc(struct mm_struct *mm, struct vm_area_struct *vma) +{ + mm->rss++; + vma->rss++; +} + +/* + * rss_dec - decrement the number of RSS pages globally for this address space + * and specifically for this VMA. + */ +static inline void rss_dec(struct mm_struct *mm, struct vm_area_struct *vma) +{ + mm->rss--; + vma->rss--; +} + #ifndef CONFIG_DEBUG_PAGEALLOC static inline void kernel_map_pages(struct page *page, int numpages, int enable) diff -urN linux-2.6.8-20041108112901/mm/fremap.c linux-rss/mm/fremap.c --- linux-2.6.8-20041108112901/mm/fremap.c 2004-11-08 13:13:06.000000000 -0500 +++ linux-rss/mm/fremap.c 2005-02-24 16:25:08.960738336 -0500 @@ -38,7 +38,7 @@ set_page_dirty(page); page_remove_rmap(page); page_cache_release(page); - mm->rss--; + rss_dec(mm, vma); } } } else { @@ -86,7 +86,7 @@ zap_pte(mm, vma, addr, pte); - mm->rss++; + rss_inc(mm, vma); flush_icache_page(vma, page); set_pte(pte, mk_pte(page, prot)); page_add_file_rmap(page); diff -urN linux-2.6.8-20041108112901/mm/memory.c linux-rss/mm/memory.c --- linux-2.6.8-20041108112901/mm/memory.c 2004-11-08 13:13:08.000000000 -0500 +++ linux-rss/mm/memory.c 2005-02-24 16:40:09.647813088 -0500 @@ -236,7 +236,7 @@ pmd_t * src_pmd, * dst_pmd; src_pgd++; dst_pgd++; - + /* copy_pmd_range */ if (pgd_none(*src_pgd)) @@ -325,6 +325,7 @@ pte = pte_mkclean(pte); pte = pte_mkold(pte); get_page(page); + /* no VMA to increment ? */ dst->rss++; set_pte(dst_pte, pte); page_dup_rmap(page); @@ -1096,7 +1097,7 @@ page_table = pte_offset_map(pmd, address); if (likely(pte_same(*page_table, pte))) { if (PageReserved(old_page)) - ++mm->rss; + rss_inc(mm, vma); else page_remove_rmap(old_page); break_cow(vma, new_page, address, page_table); @@ -1378,7 +1379,7 @@ if (vm_swap_full()) remove_exclusive_swap_page(page); - mm->rss++; + rss_inc(mm, vma); pte = mk_pte(page, vma->vm_page_prot); if (write_access && can_share_swap_page(page)) { pte = maybe_mkwrite(pte_mkdirty(pte), vma); @@ -1443,7 +1444,7 @@ spin_unlock(&mm->page_table_lock); goto out; } - mm->rss++; + rss_inc(mm, vma); entry = maybe_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)), vma); @@ -1552,7 +1553,7 @@ /* Only go through if we didn't race with anybody else... */ if (pte_none(*page_table)) { if (!PageReserved(new_page)) - ++mm->rss; + rss_inc(mm, vma); flush_icache_page(vma, new_page); entry = mk_pte(new_page, vma->vm_page_prot); if (write_access) diff -urN linux-2.6.8-20041108112901/mm/mmap.c linux-rss/mm/mmap.c --- linux-2.6.8-20041108112901/mm/mmap.c 2004-11-08 13:13:16.000000000 -0500 +++ linux-rss/mm/mmap.c 2005-02-24 16:41:17.760458392 -0500 @@ -1618,13 +1618,14 @@ /* * Split a vma into two pieces at address 'addr', a new vma is allocated - * either for the first part or the the tail. + * either for the first part or the tail. */ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long addr, int new_below) { struct mempolicy *pol; struct vm_area_struct *new; + unsigned long rss; if (mm->map_count >= sysctl_max_map_count) return -ENOMEM; @@ -1643,6 +1644,20 @@ new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT); } + /* + * Hack alert. In splitting the VMA, we have two options wrt per-VMA + * RSS accounting. We can walk the page tables and reallocate the + * RSS statistics back to each VMA on a per-page basis, or we can fudge + * the stats, like below, and loss some per-VMA accuracy in the split + * case but keep the aggregate number correct. + */ + rss = vma->rss; + if (rss) { + vma->rss = new->rss = rss / 2; + if (rss % 2 == 1) + vma->rss++; + } + pol = mpol_copy(vma_policy(vma)); if (IS_ERR(pol)) { kmem_cache_free(vm_area_cachep, new); diff -urN linux-2.6.8-20041108112901/mm/rmap.c linux-rss/mm/rmap.c --- linux-2.6.8-20041108112901/mm/rmap.c 2004-11-08 13:13:15.000000000 -0500 +++ linux-rss/mm/rmap.c 2005-02-24 14:54:09.876645264 -0500 @@ -583,7 +583,7 @@ BUG_ON(pte_file(*pte)); } - mm->rss--; + rss_dec(mm, vma); page_remove_rmap(page); page_cache_release(page); @@ -683,7 +683,7 @@ page_remove_rmap(page); page_cache_release(page); - mm->rss--; + rss_dec(mm, vma); (*mapcount)--; } diff -urN linux-2.6.8-20041108112901/mm/swapfile.c linux-rss/mm/swapfile.c --- linux-2.6.8-20041108112901/mm/swapfile.c 2004-11-08 13:13:15.000000000 -0500 +++ linux-rss/mm/swapfile.c 2005-02-24 14:56:15.005622768 -0500 @@ -434,7 +434,7 @@ unuse_pte(struct vm_area_struct *vma, unsigned long address, pte_t *dir, swp_entry_t entry, struct page *page) { - vma->vm_mm->rss++; + rss_inc(vma->vm_mm, vma); get_page(page); set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot))); page_add_anon_rmap(page, vma, address);