We have a problem at present in vm_enough_memory(): it uses smoke-n-mirrors to try to work out how much memory can be reclaimed from dcache and icache. it sometimes gets it quite wrong, especially if the slab has internal fragmentation. And it often does. So here we take a new approach. Rather than trying to work out how many pages are reclaimable by counting up the number of inodes and dentries, we change the slab allocator to keep count of how many pages are currently used by slabs which can be shrunk by the VM. The creator of the slab marks the slab as being reclaimable at kmem_cache_create()-time. Slab keeps a global counter of pages which are currently in use by thus-tagged slabs. Of course, we now slightly overestimate the amount of reclaimable memory, because not _all_ of the icache, dcache, mbcache and quota caches are reclaimable. But I think it's better to be a bit permissive rather than bogusly failing brk() calls as we do at present. include/linux/slab.h | 2 ++ mm/mmap.c | 15 ++++++--------- mm/slab.c | 18 ++++++++++++++++-- 3 files changed, 24 insertions(+), 11 deletions(-) diff -puN include/linux/slab.h~slab-reclaimable-accounting include/linux/slab.h --- 25/include/linux/slab.h~slab-reclaimable-accounting 2003-05-23 18:46:53.000000000 -0700 +++ 25-akpm/include/linux/slab.h 2003-05-23 18:46:53.000000000 -0700 @@ -41,6 +41,8 @@ typedef struct kmem_cache_s kmem_cache_t #define SLAB_CACHE_DMA 0x00004000UL /* use GFP_DMA memory */ #define SLAB_MUST_HWCACHE_ALIGN 0x00008000UL /* force alignment */ #define SLAB_STORE_USER 0x00010000UL /* store the last owner for bug hunting */ +#define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* track pages allocated to indicate + what is reclaimable later*/ /* flags passed to a constructor func */ #define SLAB_CTOR_CONSTRUCTOR 0x001UL /* if not set, then deconstructor */ diff -puN mm/mmap.c~slab-reclaimable-accounting mm/mmap.c --- 25/mm/mmap.c~slab-reclaimable-accounting 2003-05-23 18:46:53.000000000 -0700 +++ 25-akpm/mm/mmap.c 2003-05-23 18:46:53.000000000 -0700 @@ -64,6 +64,7 @@ atomic_t vm_committed_space = ATOMIC_INI * Strict overcommit modes added 2002 Feb 26 by Alan Cox. * Additional code 2002 Jul 20 by Robert Love. */ +extern atomic_t slab_reclaim_pages; int vm_enough_memory(long pages) { unsigned long free, allowed; @@ -82,16 +83,12 @@ int vm_enough_memory(long pages) free += nr_swap_pages; /* - * The code below doesn't account for free space in the - * inode and dentry slab cache, slab cache fragmentation, - * inodes and dentries which will become freeable under - * VM load, etc. Lets just hope all these (complex) - * factors balance out... + * Any slabs which are created with the + * SLAB_RECLAIM_ACCOUNT flag claim to have contents + * which are reclaimable, under pressure. The dentry + * cache and most inode caches should fall into this */ - free += (dentry_stat.nr_unused * sizeof(struct dentry)) >> - PAGE_SHIFT; - free += (inodes_stat.nr_unused * sizeof(struct inode)) >> - PAGE_SHIFT; + free += atomic_read(&slab_reclaim_pages); if (free > pages) return 1; diff -puN mm/slab.c~slab-reclaimable-accounting mm/slab.c --- 25/mm/slab.c~slab-reclaimable-accounting 2003-05-23 18:46:53.000000000 -0700 +++ 25-akpm/mm/slab.c 2003-05-23 18:46:53.000000000 -0700 @@ -118,10 +118,12 @@ # define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \ SLAB_POISON | SLAB_HWCACHE_ALIGN | \ SLAB_NO_REAP | SLAB_CACHE_DMA | \ - SLAB_MUST_HWCACHE_ALIGN | SLAB_STORE_USER) + SLAB_MUST_HWCACHE_ALIGN | SLAB_STORE_USER | \ + SLAB_RECLAIM_ACCOUNT ) #else # define CREATE_MASK (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | \ - SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN) + SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \ + SLAB_RECLAIM_ACCOUNT) #endif /* @@ -423,6 +425,14 @@ static struct semaphore cache_chain_sem; struct list_head cache_chain; /* + * vm_enough_memory() looks at this to determine how many + * slab-allocated pages are possibly freeable under pressure + * + * SLAB_RECLAIM_ACCOUNT turns this on per-slab + */ +atomic_t slab_reclaim_pages; + +/* * chicken and egg problem: delay the per-cpu array allocation * until the general caches are up. */ @@ -723,6 +733,8 @@ static inline void * kmem_getpages (kmem * would be relatively rare and ignorable. */ flags |= cachep->gfpflags; + if ( cachep->flags & SLAB_RECLAIM_ACCOUNT) + atomic_add(1<gfporder, &slab_reclaim_pages); addr = (void*) __get_free_pages(flags, cachep->gfporder); /* Assume that now we have the pages no one else can legally * messes with the 'struct page's. @@ -753,6 +765,8 @@ static inline void kmem_freepages (kmem_ if (current->reclaim_state) current->reclaim_state->reclaimed_slab += nr_freed; free_pages((unsigned long)addr, cachep->gfporder); + if (cachep->flags & SLAB_RECLAIM_ACCOUNT) + atomic_sub(1<gfporder, &slab_reclaim_pages); } #if DEBUG _