From: Manfred Spraul I coded a poor mans magazine layer for slab: +13% performance for Robert's packet routing. Unfortunately this means even more arrays. Could you look at the patch? It's not yet final, I want to make the size of the shared array tunable (replace SHARED_ARRAY_FACTOR with a kmem_cache_t member). Perhaps even default 0, I must think about it a bit more. The size of the new shared array may be altered at runtime, by writing to /proc/slabinfo. Additionally, it reduces the default sizes of the array caches. TODO: a) Disable the magazine-vs-head array duplication on UP. b) remove the hard-wired constant c) Some way for the slab owner to tune the magazine size? mm/slab.c | 182 +++++++++++++++++++++++++++++++++++++++++++++++--------------- 1 files changed, 139 insertions(+), 43 deletions(-) diff -puN mm/slab.c~slab-magazine-layer mm/slab.c --- 25/mm/slab.c~slab-magazine-layer 2003-05-23 18:44:13.000000000 -0700 +++ 25-akpm/mm/slab.c 2003-05-23 18:44:34.000000000 -0700 @@ -208,6 +208,7 @@ struct kmem_list3 { unsigned long free_objects; int free_touched; unsigned long next_reap; + struct array_cache *shared; }; #define LIST3_INIT(parent) \ @@ -1195,6 +1196,7 @@ static void smp_call_function_all_cpus(v } static void free_block (kmem_cache_t* cachep, void** objpp, int len); +static void drain_array_locked(kmem_cache_t* cachep, struct array_cache *ac); static void do_drain(void *arg) { @@ -1203,13 +1205,20 @@ static void do_drain(void *arg) check_irq_off(); ac = ac_data(cachep); + spin_lock(&cachep->spinlock); free_block(cachep, &ac_entry(ac)[0], ac->avail); + spin_unlock(&cachep->spinlock); ac->avail = 0; } static void drain_cpu_caches(kmem_cache_t *cachep) { smp_call_function_all_cpus(do_drain, cachep); + check_irq_on(); + spin_lock_irq(&cachep->spinlock); + if (cachep->lists.shared) + drain_array_locked(cachep, cachep->lists.shared); + spin_unlock_irq(&cachep->spinlock); } @@ -1307,6 +1316,8 @@ int kmem_cache_destroy (kmem_cache_t * c for (i = 0; i < NR_CPUS; i++) kfree(cachep->array[i]); /* NUMA: free the list3 structures */ + kfree(cachep->lists.shared); + cachep->lists.shared = NULL; } kmem_cache_free(&cache_cache, cachep); @@ -1649,6 +1660,19 @@ retry: BUG_ON(ac->avail > 0); spin_lock(&cachep->spinlock); + if (l3->shared) { + struct array_cache *shared_array = l3->shared; + if (shared_array->avail) { + if (batchcount > shared_array->avail) + batchcount = shared_array->avail; + shared_array->avail -= batchcount; + ac->avail = batchcount; + memcpy(ac_entry(ac), &ac_entry(shared_array)[shared_array->avail], + sizeof(void*)*batchcount); + shared_array->touched = 1; + goto alloc_done; + } + } while (batchcount > 0) { struct list_head *entry; struct slab *slabp; @@ -1672,6 +1696,7 @@ retry: must_grow: l3->free_objects -= ac->avail; +alloc_done: spin_unlock(&cachep->spinlock); if (unlikely(!ac->avail)) { @@ -1770,13 +1795,11 @@ static inline void * __cache_alloc (kmem * the l3 structure */ -static inline void -__free_block(kmem_cache_t *cachep, void **objpp, int nr_objects) +static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects) { int i; - check_irq_off(); - spin_lock(&cachep->spinlock); + check_spinlock_acquired(cachep); /* NUMA: move add into loop */ cachep->lists.free_objects += nr_objects; @@ -1814,12 +1837,6 @@ __free_block(kmem_cache_t *cachep, void &list3_data_ptr(cachep, objp)->slabs_partial); } } - spin_unlock(&cachep->spinlock); -} - -static void free_block(kmem_cache_t* cachep, void** objpp, int len) -{ - __free_block(cachep, objpp, len); } static void cache_flusharray (kmem_cache_t* cachep, struct array_cache *ac) @@ -1831,14 +1848,28 @@ static void cache_flusharray (kmem_cache BUG_ON(!batchcount || batchcount > ac->avail); #endif check_irq_off(); - __free_block(cachep, &ac_entry(ac)[0], batchcount); + spin_lock(&cachep->spinlock); + if (cachep->lists.shared) { + struct array_cache *shared_array = cachep->lists.shared; + int max = shared_array->limit-shared_array->avail; + if (max) { + if (batchcount > max) + batchcount = max; + memcpy(&ac_entry(shared_array)[shared_array->avail], + &ac_entry(ac)[0], + sizeof(void*)*batchcount); + shared_array->avail += batchcount; + goto free_done; + } + } + free_block(cachep, &ac_entry(ac)[0], batchcount); +free_done: #if STATS { int i = 0; struct list_head *p; - spin_lock(&cachep->spinlock); p = list3_data(cachep)->slabs_free.next; while (p != &(list3_data(cachep)->slabs_free)) { struct slab *slabp; @@ -1850,9 +1881,9 @@ static void cache_flusharray (kmem_cache p = p->next; } STATS_SET_FREEABLE(cachep, i); - spin_unlock(&cachep->spinlock); } #endif + spin_unlock(&cachep->spinlock); ac->avail -= batchcount; memmove(&ac_entry(ac)[0], &ac_entry(ac)[batchcount], sizeof(void*)*ac->avail); @@ -2086,9 +2117,10 @@ static void do_ccupdate_local(void *info } -static int do_tune_cpucache (kmem_cache_t* cachep, int limit, int batchcount) +static int do_tune_cpucache (kmem_cache_t* cachep, int limit, int batchcount, int shared) { struct ccupdate_struct new; + struct array_cache *new_shared; int i; memset(&new.new,0,sizeof(new.new)); @@ -2122,11 +2154,29 @@ static int do_tune_cpucache (kmem_cache_ struct array_cache *ccold = new.new[i]; if (!ccold) continue; - local_irq_disable(); + spin_lock_irq(&cachep->spinlock); free_block(cachep, ac_entry(ccold), ccold->avail); - local_irq_enable(); + spin_unlock_irq(&cachep->spinlock); kfree(ccold); } + new_shared = kmalloc(sizeof(void*)*batchcount*shared+ + sizeof(struct array_cache), GFP_KERNEL); + if (new_shared) { + struct array_cache *old; + new_shared->avail = 0; + new_shared->limit = batchcount*shared; + new_shared->batchcount = 0xbaadf00d; + new_shared->touched = 0; + + spin_lock_irq(&cachep->spinlock); + old = cachep->lists.shared; + cachep->lists.shared = new_shared; + if (old) + free_block(cachep, ac_entry(old), old->avail); + spin_unlock_irq(&cachep->spinlock); + kfree(old); + } + return 0; } @@ -2134,7 +2184,7 @@ static int do_tune_cpucache (kmem_cache_ static void enable_cpucache (kmem_cache_t *cachep) { int err; - int limit; + int limit, shared; /* The head array serves three purposes: * - create a LIFO ordering, i.e. return objects that are cache-warm @@ -2149,11 +2199,25 @@ static void enable_cpucache (kmem_cache_ else if (cachep->objsize > PAGE_SIZE) limit = 8; else if (cachep->objsize > 1024) - limit = 54; + limit = 24; else if (cachep->objsize > 256) - limit = 120; + limit = 54; else - limit = 248; + limit = 120; + + /* Cpu bound tasks (e.g. network routing) can exhibit cpu bound + * allocation behaviour: Most allocs on one cpu, most free operations + * on another cpu. For these cases, an efficient object passing between + * cpus is necessary. This is provided by a shared array. The array + * replaces Bonwick's magazine layer. + * On uniprocessor, it's functionally equivalent (but less efficient) + * to a larger limit. Thus disabled by default. + */ + shared = 0; +#ifdef CONFIG_SMP + if (cachep->objsize <= PAGE_SIZE) + shared = 8; +#endif #if DEBUG /* With debugging enabled, large batchcount lead to excessively @@ -2163,12 +2227,53 @@ static void enable_cpucache (kmem_cache_ if (limit > 32) limit = 32; #endif - err = do_tune_cpucache(cachep, limit, (limit+1)/2); + err = do_tune_cpucache(cachep, limit, (limit+1)/2, shared); if (err) printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n", cachep->name, -err); } +static void drain_array(kmem_cache_t *cachep, struct array_cache *ac) +{ + int tofree; + + check_irq_off(); + if (ac->touched) { + ac->touched = 0; + } else if (ac->avail) { + tofree = (ac->limit+4)/5; + if (tofree > ac->avail) { + tofree = (ac->avail+1)/2; + } + spin_lock(&cachep->spinlock); + free_block(cachep, ac_entry(ac), tofree); + spin_unlock(&cachep->spinlock); + ac->avail -= tofree; + memmove(&ac_entry(ac)[0], &ac_entry(ac)[tofree], + sizeof(void*)*ac->avail); + } +} + +static void drain_array_locked(kmem_cache_t *cachep, struct array_cache *ac) +{ + int tofree; + + check_spinlock_acquired(cachep); + if (ac->touched) { + ac->touched = 0; + } else if (ac->avail) { + tofree = (ac->limit+4)/5; + if (tofree > ac->avail) { + tofree = (ac->avail+1)/2; + } + free_block(cachep, ac_entry(ac), tofree); + ac->avail -= tofree; + memmove(&ac_entry(ac)[0], &ac_entry(ac)[tofree], + sizeof(void*)*ac->avail); + } +} + + /** * cache_reap - Reclaim memory from caches. * @@ -2195,7 +2300,6 @@ static inline void cache_reap (void) kmem_cache_t *searchp; struct list_head* p; int tofree; - struct array_cache *ac; struct slab *slabp; searchp = list_entry(walk, kmem_cache_t, next); @@ -2205,19 +2309,8 @@ static inline void cache_reap (void) check_irq_on(); local_irq_disable(); - ac = ac_data(searchp); - if (ac->touched) { - ac->touched = 0; - } else if (ac->avail) { - tofree = (ac->limit+4)/5; - if (tofree > ac->avail) { - tofree = (ac->avail+1)/2; - } - free_block(searchp, ac_entry(ac), tofree); - ac->avail -= tofree; - memmove(&ac_entry(ac)[0], &ac_entry(ac)[tofree], - sizeof(void*)*ac->avail); - } + drain_array(searchp, ac_data(searchp)); + if(time_after(searchp->lists.next_reap, jiffies)) goto next_irqon; @@ -2226,6 +2319,10 @@ static inline void cache_reap (void) goto next_unlock; } searchp->lists.next_reap = jiffies + REAPTIMEOUT_LIST3; + + if (searchp->lists.shared) + drain_array_locked(searchp, searchp->lists.shared); + if (searchp->lists.free_touched) { searchp->lists.free_touched = 0; goto next_unlock; @@ -2435,7 +2532,7 @@ struct seq_operations slabinfo_op = { #define MAX_SLABINFO_WRITE 128 /** - * slabinfo_write - SMP tuning for the slab allocator + * slabinfo_write - Tuning for the slab allocator * @file: unused * @buffer: user buffer * @count: data len @@ -2445,7 +2542,7 @@ ssize_t slabinfo_write(struct file *file size_t count, loff_t *ppos) { char kbuf[MAX_SLABINFO_WRITE+1], *tmp; - int limit, batchcount, res; + int limit, batchcount, shared, res; struct list_head *p; if (count > MAX_SLABINFO_WRITE) @@ -2459,10 +2556,8 @@ ssize_t slabinfo_write(struct file *file return -EINVAL; *tmp = '\0'; tmp++; - limit = simple_strtol(tmp, &tmp, 10); - while (*tmp == ' ') - tmp++; - batchcount = simple_strtol(tmp, &tmp, 10); + if (sscanf(tmp, " %d %d %d", &limit, &batchcount, &shared) != 3) + return -EINVAL; /* Find the cache in the chain of caches. */ down(&cache_chain_sem); @@ -2473,10 +2568,11 @@ ssize_t slabinfo_write(struct file *file if (!strcmp(cachep->name, kbuf)) { if (limit < 1 || batchcount < 1 || - batchcount > limit) { + batchcount > limit || + shared < 0) { res = -EINVAL; } else { - res = do_tune_cpucache(cachep, limit, batchcount); + res = do_tune_cpucache(cachep, limit, batchcount, shared); } break; } _