From: Manfred Spraul <manfred@colorfullife.com>

I coded a poor mans magazine layer for slab: +13% performance for 
Robert's packet routing. Unfortunately this means even more arrays.
Could you look at the patch? It's not yet final, I want to make the size 
of the shared array tunable (replace SHARED_ARRAY_FACTOR with a 
kmem_cache_t member). Perhaps even default 0, I must think about it a 
bit more.

TODO:

a) Disable the magazine-vs-head array duplication on UP.

b) remove the hard-wired constant

c) Some way for the slab owner to tune the magazine size?


 25-akpm/mm/slab.c |  142 ++++++++++++++++++++++++++++++++++++++++++------------
 1 files changed, 113 insertions(+), 29 deletions(-)

diff -puN mm/slab.c~slab-magazine-layer mm/slab.c
--- 25/mm/slab.c~slab-magazine-layer	Thu May  1 14:07:01 2003
+++ 25-akpm/mm/slab.c	Thu May  1 14:47:48 2003
@@ -200,6 +200,7 @@ struct arraycache_init {
  * into this structure, too. Figure out what causes
  * fewer cross-node spinlock operations.
  */
+#define SHARED_ARRAY_FACTOR	16
 struct kmem_list3 {
 	struct list_head	slabs_partial;	/* partial list first, better asm code */
 	struct list_head	slabs_full;
@@ -207,6 +208,7 @@ struct kmem_list3 {
 	unsigned long	free_objects;
 	int		free_touched;
 	unsigned long	next_reap;
+	struct array_cache	*shared;
 };
 
 #define LIST3_INIT(parent) \
@@ -1162,6 +1164,7 @@ static void smp_call_function_all_cpus(v
 }
 
 static void free_block (kmem_cache_t* cachep, void** objpp, int len);
+static void drain_array_locked(kmem_cache_t* cachep, struct array_cache *ac);
 
 static void do_drain(void *arg)
 {
@@ -1170,13 +1173,20 @@ static void do_drain(void *arg)
 
 	check_irq_off();
 	ac = ac_data(cachep);
+	spin_lock(&cachep->spinlock);
 	free_block(cachep, &ac_entry(ac)[0], ac->avail);
+	spin_unlock(&cachep->spinlock);
 	ac->avail = 0;
 }
 
 static void drain_cpu_caches(kmem_cache_t *cachep)
 {
 	smp_call_function_all_cpus(do_drain, cachep);
+	check_irq_on();
+	spin_lock_irq(&cachep->spinlock);
+	if (cachep->lists.shared)
+		drain_array_locked(cachep, cachep->lists.shared);
+	spin_unlock_irq(&cachep->spinlock);
 }
 
 
@@ -1274,6 +1284,8 @@ int kmem_cache_destroy (kmem_cache_t * c
 		for (i = 0; i < NR_CPUS; i++)
 			kfree(cachep->array[i]);
 		/* NUMA: free the list3 structures */
+		kfree(cachep->lists.shared);
+		cachep->lists.shared = NULL;
 	}
 	kmem_cache_free(&cache_cache, cachep);
 
@@ -1616,6 +1628,19 @@ retry:
 
 	BUG_ON(ac->avail > 0);
 	spin_lock(&cachep->spinlock);
+	if (l3->shared) {
+		struct array_cache *shared_array = l3->shared;
+		if (shared_array->avail) {
+			if (batchcount > shared_array->avail)
+				batchcount = shared_array->avail;
+			shared_array->avail -= batchcount;
+			ac->avail = batchcount;
+			memcpy(ac_entry(ac), &ac_entry(shared_array)[shared_array->avail],
+					sizeof(void*)*batchcount);
+			shared_array->touched = 1;
+			goto alloc_done;
+		}
+	}
 	while (batchcount > 0) {
 		struct list_head *entry;
 		struct slab *slabp;
@@ -1639,6 +1664,7 @@ retry:
 
 must_grow:
 	l3->free_objects -= ac->avail;
+alloc_done:
 	spin_unlock(&cachep->spinlock);
 
 	if (unlikely(!ac->avail)) {
@@ -1737,13 +1763,11 @@ static inline void * __cache_alloc (kmem
  * the l3 structure
  */
 
-static inline void
-__free_block(kmem_cache_t *cachep, void **objpp, int nr_objects)
+static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects)
 {
 	int i;
 
-	check_irq_off();
-	spin_lock(&cachep->spinlock);
+	check_spinlock_acquired(cachep);
 
 	/* NUMA: move add into loop */
 	cachep->lists.free_objects += nr_objects;
@@ -1779,12 +1803,6 @@ __free_block(kmem_cache_t *cachep, void 
 				&list3_data_ptr(cachep, objp)->slabs_partial);
 		}
 	}
-	spin_unlock(&cachep->spinlock);
-}
-
-static void free_block(kmem_cache_t* cachep, void** objpp, int len)
-{
-	__free_block(cachep, objpp, len);
 }
 
 static void cache_flusharray (kmem_cache_t* cachep, struct array_cache *ac)
@@ -1796,14 +1814,28 @@ static void cache_flusharray (kmem_cache
 	BUG_ON(!batchcount || batchcount > ac->avail);
 #endif
 	check_irq_off();
-	__free_block(cachep, &ac_entry(ac)[0], batchcount);
+	spin_lock(&cachep->spinlock);
+	if (cachep->lists.shared) {
+		struct array_cache *shared_array = cachep->lists.shared;
+		int max = shared_array->limit-shared_array->avail;
+		if (max) {
+			if (batchcount > max)
+				batchcount = max;
+			memcpy(&ac_entry(shared_array)[shared_array->avail],
+					&ac_entry(ac)[0],
+					sizeof(void*)*batchcount);
+			shared_array->avail += batchcount;
+			goto free_done;
+		}
+	}
 
+	free_block(cachep, &ac_entry(ac)[0], batchcount);
+free_done:
 #if STATS
 	{
 		int i = 0;
 		struct list_head *p;
 
-		spin_lock(&cachep->spinlock);
 		p = list3_data(cachep)->slabs_free.next;
 		while (p != &(list3_data(cachep)->slabs_free)) {
 			struct slab *slabp;
@@ -1815,9 +1847,9 @@ static void cache_flusharray (kmem_cache
 			p = p->next;
 		}
 		STATS_SET_FREEABLE(cachep, i);
-		spin_unlock(&cachep->spinlock);
 	}
 #endif
+	spin_unlock(&cachep->spinlock);
 	ac->avail -= batchcount;
 	memmove(&ac_entry(ac)[0], &ac_entry(ac)[batchcount],
 			sizeof(void*)*ac->avail);
@@ -2061,6 +2093,7 @@ static void do_ccupdate_local(void *info
 static int do_tune_cpucache (kmem_cache_t* cachep, int limit, int batchcount)
 {
 	struct ccupdate_struct new;
+	struct array_cache *new_shared;
 	int i;
 
 	memset(&new.new,0,sizeof(new.new));
@@ -2094,11 +2127,29 @@ static int do_tune_cpucache (kmem_cache_
 		struct array_cache *ccold = new.new[i];
 		if (!ccold)
 			continue;
-		local_irq_disable();
+		spin_lock_irq(&cachep->spinlock);
 		free_block(cachep, ac_entry(ccold), ccold->avail);
-		local_irq_enable();
+		spin_unlock_irq(&cachep->spinlock);
 		kfree(ccold);
 	}
+	new_shared = kmalloc(sizeof(void*)*batchcount*SHARED_ARRAY_FACTOR+
+				sizeof(struct array_cache), GFP_KERNEL);
+	if (new_shared) {
+		struct array_cache *old;
+		new_shared->avail = 0;
+		new_shared->limit = batchcount*SHARED_ARRAY_FACTOR;
+		new_shared->batchcount = 0xbaadf00d;
+		new_shared->touched = 0;
+
+		spin_lock_irq(&cachep->spinlock);
+		old = cachep->lists.shared;
+		cachep->lists.shared = new_shared;
+		if (old)
+			free_block(cachep, ac_entry(old), old->avail);
+		spin_unlock_irq(&cachep->spinlock);
+		kfree(old);
+	}
+
 	return 0;
 }
 
@@ -2141,6 +2192,47 @@ static void enable_cpucache (kmem_cache_
 					cachep->name, -err);
 }
 
+static void drain_array(kmem_cache_t *cachep, struct array_cache *ac)
+{
+	int tofree;
+
+	check_irq_off();
+	if (ac->touched) {
+		ac->touched = 0;
+	} else if (ac->avail) {
+		tofree = (ac->limit+4)/5;
+		if (tofree > ac->avail) {
+			tofree = (ac->avail+1)/2;
+		}
+		spin_lock(&cachep->spinlock);
+		free_block(cachep, ac_entry(ac), tofree);
+		spin_unlock(&cachep->spinlock);
+		ac->avail -= tofree;
+		memmove(&ac_entry(ac)[0], &ac_entry(ac)[tofree],
+					sizeof(void*)*ac->avail);
+	}
+}
+
+static void drain_array_locked(kmem_cache_t *cachep, struct array_cache *ac)
+{
+	int tofree;
+
+	check_spinlock_acquired(cachep);
+	if (ac->touched) {
+		ac->touched = 0;
+	} else if (ac->avail) {
+		tofree = (ac->limit+4)/5;
+		if (tofree > ac->avail) {
+			tofree = (ac->avail+1)/2;
+		}
+		free_block(cachep, ac_entry(ac), tofree);
+		ac->avail -= tofree;
+		memmove(&ac_entry(ac)[0], &ac_entry(ac)[tofree],
+					sizeof(void*)*ac->avail);
+	}
+}
+
+
 /**
  * cache_reap - Reclaim memory from caches.
  *
@@ -2167,7 +2259,6 @@ static inline void cache_reap (void)
 		kmem_cache_t *searchp;
 		struct list_head* p;
 		int tofree;
-		struct array_cache *ac;
 		struct slab *slabp;
 
 		searchp = list_entry(walk, kmem_cache_t, next);
@@ -2177,19 +2268,8 @@ static inline void cache_reap (void)
 
 		check_irq_on();
 		local_irq_disable();
-		ac = ac_data(searchp);
-		if (ac->touched) {
-			ac->touched = 0;
-		} else if (ac->avail) {
-			tofree = (ac->limit+4)/5;
-			if (tofree > ac->avail) {
-				tofree = (ac->avail+1)/2;
-			}
-			free_block(searchp, ac_entry(ac), tofree);
-			ac->avail -= tofree;
-			memmove(&ac_entry(ac)[0], &ac_entry(ac)[tofree],
-					sizeof(void*)*ac->avail);
-		}
+		drain_array(searchp, ac_data(searchp));
+
 		if(time_after(searchp->lists.next_reap, jiffies))
 			goto next_irqon;
 
@@ -2198,6 +2278,10 @@ static inline void cache_reap (void)
 			goto next_unlock;
 		}
 		searchp->lists.next_reap = jiffies + REAPTIMEOUT_LIST3;
+
+		if (searchp->lists.shared)
+			drain_array_locked(searchp, searchp->lists.shared);
+
 		if (searchp->lists.free_touched) {
 			searchp->lists.free_touched = 0;
 			goto next_unlock;

_