diff -urN 2.4.0-test7/fs/dcache.c 2.4.0-test7-slab/fs/dcache.c --- 2.4.0-test7/fs/dcache.c Fri Aug 25 19:37:26 2000 +++ 2.4.0-test7-slab/fs/dcache.c Sat Aug 26 18:22:21 2000 @@ -553,18 +553,19 @@ */ int shrink_dcache_memory(int priority, unsigned int gfp_mask) { - int count = 0; + int count = 0, nr_pages = 0; if (priority) count = dentry_stat.nr_unused / priority; prune_dcache(count); - /* FIXME: kmem_cache_shrink here should tell us - the number of pages freed, and it should - work in a __GFP_DMA/__GFP_HIGHMEM behaviour - to free only the interesting pages in - function of the needs of the current allocation. */ - kmem_cache_shrink(dentry_cache); + /* + * FIXME: we should report the progress done only in the classzone + * that the allocation is interested about but since all the memory + * balancing is GFP-zone-requested blind we can't do that at the moment + * w/o first fixing the rest. + */ + kmem_cache_shrink(dentry_cache, NULL, &nr_pages); - return 0; + return nr_pages; } #define NAME_ALLOC_LEN(len) ((len+16) & ~15) diff -urN 2.4.0-test7/fs/inode.c 2.4.0-test7-slab/fs/inode.c --- 2.4.0-test7/fs/inode.c Fri Aug 25 19:37:26 2000 +++ 2.4.0-test7-slab/fs/inode.c Sat Aug 26 18:58:59 2000 @@ -456,19 +456,20 @@ int shrink_icache_memory(int priority, int gfp_mask) { - int count = 0; + int count = 0, nr_pages = 0; if (priority) count = inodes_stat.nr_unused / priority; prune_icache(count); - /* FIXME: kmem_cache_shrink here should tell us - the number of pages freed, and it should - work in a __GFP_DMA/__GFP_HIGHMEM behaviour - to free only the interesting pages in - function of the needs of the current allocation. */ - kmem_cache_shrink(inode_cachep); + /* + * FIXME: we should report the progress done only in the classzone + * that the allocation is interested about but since all the memory + * balancing is GFP-zone-requested blind we can't do that at the moment + * w/o first fixing the rest. + */ + kmem_cache_shrink(inode_cachep, NULL, &nr_pages); - return 0; + return nr_pages; } /* diff -urN 2.4.0-test7/include/linux/mmzone.h 2.4.0-test7-slab/include/linux/mmzone.h --- 2.4.0-test7/include/linux/mmzone.h Sat Aug 26 17:40:15 2000 +++ 2.4.0-test7-slab/include/linux/mmzone.h Sat Aug 26 18:15:42 2000 @@ -92,8 +92,7 @@ extern pg_data_t *pgdat_list; #define memclass(pgzone, tzone) (((pgzone)->zone_pgdat == (tzone)->zone_pgdat) \ - && (((pgzone) - (pgzone)->zone_pgdat->node_zones) <= \ - ((tzone) - (pgzone)->zone_pgdat->node_zones))) + && ((pgzone) <= (tzone))) /* * The following two are not meant for general usage. They are here as diff -urN 2.4.0-test7/include/linux/slab.h 2.4.0-test7-slab/include/linux/slab.h --- 2.4.0-test7/include/linux/slab.h Sat Aug 26 17:40:15 2000 +++ 2.4.0-test7-slab/include/linux/slab.h Sat Aug 26 18:15:42 2000 @@ -52,14 +52,14 @@ void (*)(void *, kmem_cache_t *, unsigned long), void (*)(void *, kmem_cache_t *, unsigned long)); extern int kmem_cache_destroy(kmem_cache_t *); -extern int kmem_cache_shrink(kmem_cache_t *); +extern int kmem_cache_shrink(kmem_cache_t *, zone_t *, int *); extern void *kmem_cache_alloc(kmem_cache_t *, int); extern void kmem_cache_free(kmem_cache_t *, void *); extern void *kmalloc(size_t, int); extern void kfree(const void *); -extern void kmem_cache_reap(int); +extern int kmem_cache_reap(int, zone_t *); extern int slabinfo_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data); extern int slabinfo_write_proc(struct file *file, const char *buffer, diff -urN 2.4.0-test7/mm/slab.c 2.4.0-test7-slab/mm/slab.c --- 2.4.0-test7/mm/slab.c Fri Aug 25 19:37:33 2000 +++ 2.4.0-test7-slab/mm/slab.c Sat Aug 26 19:21:23 2000 @@ -140,8 +140,7 @@ * * Manages the objs in a slab. Placed either at the beginning of mem allocated * for a slab, or allocated from an general cache. - * Slabs are chained into one ordered list: fully used, partial, then fully - * free slabs. + * Slabs are chained into three list: fully used, partial, fully free slabs. */ typedef struct slab_s { struct list_head list; @@ -167,7 +166,7 @@ } cpucache_t; #define cc_entry(cpucache) \ - ((void **)(((cpucache_t*)cpucache)+1)) + ((void **)(((cpucache_t*)(cpucache))+1)) #define cc_data(cachep) \ ((cachep)->cpudata[smp_processor_id()]) /* @@ -181,8 +180,9 @@ struct kmem_cache_s { /* 1) each alloc & free */ /* full, partial first, then free */ - struct list_head slabs; - struct list_head *firstnotfull; + struct list_head slabs_full; + struct list_head slabs_partial; + struct list_head slabs_free; unsigned int objsize; unsigned int flags; /* constant flags */ unsigned int num; /* # of objs per slab */ @@ -345,8 +345,9 @@ /* internal cache of cache description objs */ static kmem_cache_t cache_cache = { - slabs: LIST_HEAD_INIT(cache_cache.slabs), - firstnotfull: &cache_cache.slabs, + slabs_full: LIST_HEAD_INIT(cache_cache.slabs_full), + slabs_partial: LIST_HEAD_INIT(cache_cache.slabs_partial), + slabs_free: LIST_HEAD_INIT(cache_cache.slabs_free), objsize: sizeof(kmem_cache_t), flags: SLAB_NO_REAP, spinlock: SPIN_LOCK_UNLOCKED, @@ -778,8 +779,9 @@ cachep->gfpflags |= GFP_DMA; spin_lock_init(&cachep->spinlock); cachep->objsize = size; - INIT_LIST_HEAD(&cachep->slabs); - cachep->firstnotfull = &cachep->slabs; + INIT_LIST_HEAD(&cachep->slabs_full); + INIT_LIST_HEAD(&cachep->slabs_partial); + INIT_LIST_HEAD(&cachep->slabs_free); if (flags & CFLGS_OFF_SLAB) cachep->slabp_cache = kmem_find_general_cachep(slab_size,0); @@ -884,10 +886,10 @@ #define drain_cpu_caches(cachep) do { } while (0) #endif -static int __kmem_cache_shrink(kmem_cache_t *cachep) +static int __kmem_cache_shrink(kmem_cache_t *cachep, zone_t * zone, int * nr_pages) { slab_t *slabp; - int ret; + int ret, progress; drain_cpu_caches(cachep); @@ -897,23 +899,33 @@ while (!cachep->growing) { struct list_head *p; - p = cachep->slabs.prev; - if (p == &cachep->slabs) + p = cachep->slabs_free.prev; + if (p == &cachep->slabs_free) break; - slabp = list_entry(cachep->slabs.prev, slab_t, list); + slabp = list_entry(cachep->slabs_free.prev, slab_t, list); if (slabp->inuse) - break; + BUG(); + + progress = 1; + if (zone) { + void * addr = slabp->s_mem - slabp->colouroff; + struct page * page = virt_to_page(addr); + + if (!memclass(page->zone, zone)) + progress = 0; + } list_del(&slabp->list); - if (cachep->firstnotfull == &slabp->list) - cachep->firstnotfull = &cachep->slabs; spin_unlock_irq(&cachep->spinlock); kmem_slab_destroy(cachep, slabp); + if (nr_pages && progress) + /* don't need the cache's spinlock to read the order */ + *nr_pages += 1UL << cachep->gfporder; spin_lock_irq(&cachep->spinlock); } - ret = !list_empty(&cachep->slabs); + ret = !list_empty(&cachep->slabs_full) || !list_empty(&cachep->slabs_partial); spin_unlock_irq(&cachep->spinlock); return ret; } @@ -925,12 +937,12 @@ * Releases as many slabs as possible for a cache. * To help debugging, a zero exit status indicates all slabs were released. */ -int kmem_cache_shrink(kmem_cache_t *cachep) +int kmem_cache_shrink(kmem_cache_t *cachep, zone_t * zone, int * nr_pages) { if (!cachep || in_interrupt() || !is_chained_kmem_cache(cachep)) BUG(); - return __kmem_cache_shrink(cachep); + return __kmem_cache_shrink(cachep, zone, nr_pages); } /** @@ -962,7 +974,7 @@ list_del(&cachep->next); up(&cache_chain_sem); - if (__kmem_cache_shrink(cachep)) { + if (__kmem_cache_shrink(cachep, NULL, NULL)) { printk(KERN_ERR "kmem_cache_destroy: Can't free all objects %p\n", cachep); down(&cache_chain_sem); @@ -1139,9 +1151,7 @@ cachep->growing--; /* Make slab active. */ - list_add_tail(&slabp->list,&cachep->slabs); - if (cachep->firstnotfull == &cachep->slabs) - cachep->firstnotfull = &slabp->list; + list_add_tail(&slabp->list, &cachep->slabs_free); STATS_INC_GROWN(cachep); cachep->failures = 0; @@ -1198,7 +1208,7 @@ } static inline void * kmem_cache_alloc_one_tail (kmem_cache_t *cachep, - slab_t *slabp) + slab_t *slabp, int partial) { void *objp; @@ -1211,9 +1221,15 @@ objp = slabp->s_mem + slabp->free*cachep->objsize; slabp->free=slab_bufctl(slabp)[slabp->free]; - if (slabp->free == BUFCTL_END) - /* slab now full: move to next slab for next alloc */ - cachep->firstnotfull = slabp->list.next; + if (slabp->free == BUFCTL_END) { + list_del(&slabp->list); + list_add(&slabp->list, &cachep->slabs_full); + } else { + if (!partial) { + list_del(&slabp->list); + list_add(&slabp->list, &cachep->slabs_partial); + } + } #if DEBUG if (cachep->flags & SLAB_POISON) if (kmem_check_poison_obj(cachep, objp)) @@ -1239,16 +1255,20 @@ */ #define kmem_cache_alloc_one(cachep) \ ({ \ - slab_t *slabp; \ + slab_t *slabp; \ + struct list_head * slab_freelist; \ + int partial = 1; \ \ - /* Get slab alloc is to come from. */ \ - { \ - struct list_head* p = cachep->firstnotfull; \ - if (p == &cachep->slabs) \ + slab_freelist = &(cachep)->slabs_partial; \ + if (list_empty(slab_freelist)) { \ + partial = 0; \ + slab_freelist = &(cachep)->slabs_free; \ + if (list_empty(slab_freelist)) \ goto alloc_new_slab; \ - slabp = list_entry(p,slab_t, list); \ } \ - kmem_cache_alloc_one_tail(cachep, slabp); \ + \ + slabp = list_entry(slab_freelist->next, slab_t, list); \ + kmem_cache_alloc_one_tail(cachep, slabp, partial); \ }) #ifdef CONFIG_SMP @@ -1256,18 +1276,25 @@ { int batchcount = cachep->batchcount; cpucache_t* cc = cc_data(cachep); + struct list_head * slab_freelist; + int partial; + slab_t *slabp; spin_lock(&cachep->spinlock); while (batchcount--) { /* Get slab alloc is to come from. */ - struct list_head *p = cachep->firstnotfull; - slab_t *slabp; + slab_freelist = &(cachep)->slabs_partial; + partial = 1; + if (list_empty(slab_freelist)) { + partial = 0; + slab_freelist = &(cachep)->slabs_free; + if (list_empty(slab_freelist)) + break; + } - if (p == &cachep->slabs) - break; - slabp = list_entry(p,slab_t, list); + slabp = list_entry(slab_freelist->next, slab_t, list); cc_entry(cc)[cc->avail++] = - kmem_cache_alloc_one_tail(cachep, slabp); + kmem_cache_alloc_one_tail(cachep, slabp, partial); } spin_unlock(&cachep->spinlock); @@ -1397,43 +1424,24 @@ } STATS_DEC_ACTIVE(cachep); - /* fixup slab chain */ - if (slabp->inuse-- == cachep->num) - goto moveslab_partial; - if (!slabp->inuse) + /* fixup slab chains */ + if (!--slabp->inuse) goto moveslab_free; + if (slabp->inuse + 1 == cachep->num) + goto moveslab_partial; return; moveslab_partial: - /* was full. - * Even if the page is now empty, we can set c_firstnotfull to - * slabp: there are no partial slabs in this case - */ - { - struct list_head *t = cachep->firstnotfull; + /* Was full. */ + list_del(&slabp->list); + list_add(&slabp->list, &cachep->slabs_partial); + return; - cachep->firstnotfull = &slabp->list; - if (slabp->list.next == t) - return; - list_del(&slabp->list); - list_add_tail(&slabp->list, t); - return; - } moveslab_free: - /* - * was partial, now empty. - * c_firstnotfull might point to slabp - * FIXME: optimize - */ - { - struct list_head *t = cachep->firstnotfull->prev; - - list_del(&slabp->list); - list_add_tail(&slabp->list, &cachep->slabs); - if (cachep->firstnotfull == &slabp->list) - cachep->firstnotfull = t->next; - return; - } + /* Was partial, now empty. */ + list_del(&slabp->list); + list_add(&slabp->list, &cachep->slabs_free); + return; } #ifdef CONFIG_SMP @@ -1744,7 +1752,7 @@ * * Called from try_to_free_page(). */ -void kmem_cache_reap (int gfp_mask) +int kmem_cache_reap (int gfp_mask, zone_t * zone) { slab_t *slabp; kmem_cache_t *searchp; @@ -1752,12 +1760,13 @@ unsigned int best_pages; unsigned int best_len; unsigned int scan; + int nr_pages = 0; if (gfp_mask & __GFP_WAIT) down(&cache_chain_sem); else if (down_trylock(&cache_chain_sem)) - return; + return 0; scan = REAP_SCANLEN; best_len = 0; @@ -1798,13 +1807,13 @@ #endif full_free = 0; - p = searchp->slabs.prev; - while (p != &searchp->slabs) { + p = searchp->slabs_free.next; + while (p != &searchp->slabs_free) { slabp = list_entry(p, slab_t, list); if (slabp->inuse) - break; + BUG(); full_free++; - p = p->prev; + p = p->next; } /* @@ -1821,7 +1830,7 @@ best_cachep = searchp; best_len = full_free; best_pages = pages; - if (full_free >= REAP_PERFECT) { + if (pages >= REAP_PERFECT) { clock_searchp = list_entry(searchp->next.next, kmem_cache_t,next); goto perfect; @@ -1841,22 +1850,29 @@ spin_lock_irq(&best_cachep->spinlock); perfect: - /* free only 80% of the free slabs */ - best_len = (best_len*4 + 1)/5; + /* free only 50% of the free slabs */ + best_len = (best_len + 1)/2; for (scan = 0; scan < best_len; scan++) { struct list_head *p; + int progress; if (best_cachep->growing) break; - p = best_cachep->slabs.prev; - if (p == &best_cachep->slabs) + p = best_cachep->slabs_free.prev; + if (p == &best_cachep->slabs_free) break; slabp = list_entry(p,slab_t,list); if (slabp->inuse) - break; + BUG(); + progress = 1; + if (zone) { + void * addr = slabp->s_mem - slabp->colouroff; + struct page * page = virt_to_page(addr); + + if (!memclass(page->zone, zone)) + progress = 0; + } list_del(&slabp->list); - if (best_cachep->firstnotfull == &slabp->list) - best_cachep->firstnotfull = &best_cachep->slabs; STATS_INC_REAPED(best_cachep); /* Safe to drop the lock. The slab is no longer linked to the @@ -1864,12 +1880,15 @@ */ spin_unlock_irq(&best_cachep->spinlock); kmem_slab_destroy(best_cachep, slabp); + if (progress) + /* don't need the cache's spinlock to read the order */ + nr_pages += 1UL << best_cachep->gfporder; spin_lock_irq(&best_cachep->spinlock); } spin_unlock_irq(&best_cachep->spinlock); out: up(&cache_chain_sem); - return; + return nr_pages; } #ifdef CONFIG_PROC_FS @@ -1922,14 +1941,25 @@ spin_lock_irq(&cachep->spinlock); active_objs = 0; num_slabs = 0; - list_for_each(q,&cachep->slabs) { + list_for_each(q,&cachep->slabs_full) { slabp = list_entry(q, slab_t, list); + if (slabp->inuse != cachep->num) + BUG(); + active_objs += cachep->num; + active_slabs++; + } + list_for_each(q,&cachep->slabs_partial) { + slabp = list_entry(q, slab_t, list); + if (slabp->inuse == cachep->num || !slabp->inuse) + BUG(); active_objs += slabp->inuse; - num_objs += cachep->num; + active_slabs++; + } + list_for_each(q,&cachep->slabs_free) { + slabp = list_entry(q, slab_t, list); if (slabp->inuse) - active_slabs++; - else - num_slabs++; + BUG(); + num_slabs++; } num_slabs+=active_slabs; num_objs = num_slabs*cachep->num; diff -urN 2.4.0-test7/mm/vmscan.c 2.4.0-test7-slab/mm/vmscan.c --- 2.4.0-test7/mm/vmscan.c Thu Aug 17 19:57:44 2000 +++ 2.4.0-test7-slab/mm/vmscan.c Sat Aug 26 18:21:48 2000 @@ -487,7 +487,9 @@ int swap_count; /* Always trim SLAB caches when memory gets low. */ - kmem_cache_reap(gfp_mask); + count -= kmem_cache_reap(gfp_mask, NULL); + if (count <= 0) + return 1; priority = 64; do { @@ -516,15 +518,9 @@ */ count -= shrink_dcache_memory(priority, gfp_mask); count -= shrink_icache_memory(priority, gfp_mask); - /* - * Not currently working, see fixme in shrink_?cache_memory - * In the inner funtions there is a comment: - * "To help debugging, a zero exit status indicates - * all slabs were released." (-arca?) - * lets handle it in a primitive but working way... - * if (count <= 0) - * goto done; - */ + if (count <= 0) + goto done; + if (!keep_kswapd_awake()) goto done;