From: Manfred Spraul The redzone words are only checked during alloc and free - thus objects that are never/rarely freed are not checked at all. The attached patch adds a periodic scan over all objects and checks for wrong redzone data or corrupted bufctl lists. Most changes are under #ifdef DEBUG, the only exception is a trivial correction for the initial timeout calculation: divide the cachep address by L1_CACHE_BYTES before the mod - the low order bits are always 0. Signed-Off-By: Manfred Spraul Signed-off-by: Andrew Morton --- 25-akpm/mm/slab.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 files changed, 107 insertions(+), 7 deletions(-) diff -puN mm/slab.c~periodically-scan-redzone-entries-and-slab-control-structures mm/slab.c --- 25/mm/slab.c~periodically-scan-redzone-entries-and-slab-control-structures Fri Mar 11 17:39:30 2005 +++ 25-akpm/mm/slab.c Fri Mar 11 17:39:33 2005 @@ -189,7 +189,7 @@ */ #define BUFCTL_END (((kmem_bufctl_t)(~0U))-0) -#define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1) +#define BUFCTL_ALLOC (((kmem_bufctl_t)(~0U))-1) #define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-2) /* Max number of objs-per-slab for caches which use off-slab slabs. @@ -355,6 +355,7 @@ struct kmem_cache_s { #if DEBUG int dbghead; int reallen; + unsigned long redzonetest; #endif }; @@ -370,6 +371,7 @@ struct kmem_cache_s { */ #define REAPTIMEOUT_CPUC (2*HZ) #define REAPTIMEOUT_LIST3 (4*HZ) +#define REDZONETIMEOUT (300*HZ) #if STATS #define STATS_INC_ACTIVE(x) ((x)->num_active++) @@ -1454,7 +1456,11 @@ next: } cachep->lists.next_reap = jiffies + REAPTIMEOUT_LIST3 + - ((unsigned long)cachep)%REAPTIMEOUT_LIST3; + ((unsigned long)cachep/L1_CACHE_BYTES)%REAPTIMEOUT_LIST3; +#if DEBUG + cachep->redzonetest = jiffies + REDZONETIMEOUT + + ((unsigned long)cachep/L1_CACHE_BYTES)%REDZONETIMEOUT; +#endif /* Need the semaphore to access the chain. */ down(&cache_chain_sem); @@ -2051,7 +2057,7 @@ retry: slabp->inuse++; next = slab_bufctl(slabp)[slabp->free]; #if DEBUG - slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE; + slab_bufctl(slabp)[slabp->free] = BUFCTL_ALLOC; #endif slabp->free = next; } @@ -2189,9 +2195,9 @@ static void free_block(kmem_cache_t *cac objnr = (objp - slabp->s_mem) / cachep->objsize; check_slabp(cachep, slabp); #if DEBUG - if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) { - printk(KERN_ERR "slab: double free detected in cache '%s', objp %p.\n", - cachep->name, objp); + if (slab_bufctl(slabp)[objnr] != BUFCTL_ALLOC) { + printk(KERN_ERR "slab: double free detected in cache " + "'%s', objp %p\n", cachep->name, objp); BUG(); } #endif @@ -2417,7 +2423,7 @@ got_slabp: slabp->inuse++; next = slab_bufctl(slabp)[slabp->free]; #if DEBUG - slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE; + slab_bufctl(slabp)[slabp->free] = BUFCTL_ALLOC; #endif slabp->free = next; check_slabp(cachep, slabp); @@ -2613,6 +2619,94 @@ unsigned int kmem_cache_size(kmem_cache_ EXPORT_SYMBOL(kmem_cache_size); +#if DEBUG +static void check_slabuse(kmem_cache_t *cachep, struct slab *slabp) +{ + int i; + + if (!(cachep->flags & SLAB_RED_ZONE)) + return; /* no redzone data to check */ + +#if CONFIG_DEBUG_PAGEALLOC + /* Page alloc debugging on for this cache. Mapping & Unmapping happens + * without any locking, thus parallel checks are impossible. + */ + if ((cachep->objsize%PAGE_SIZE)==0 && OFF_SLAB(cachep)) + return; +#endif + + for (i=0;inum;i++) { + void *objp = slabp->s_mem + cachep->objsize * i; + unsigned long red1, red2; + + red1 = *dbg_redzone1(cachep, objp); + red2 = *dbg_redzone2(cachep, objp); + + /* simplest case: marked as inactive */ + if (red1 == RED_INACTIVE && red2 == RED_INACTIVE) + continue; + + /* tricky case: if the bufctl value is BUFCTL_ALLOC, then + * the object is either allocated or somewhere in a cpu + * cache. The cpu caches are lockless and there might be + * a concurrent alloc/free call, thus we must accept random + * combinations of RED_ACTIVE and _INACTIVE + */ + if (slab_bufctl(slabp)[i] == BUFCTL_ALLOC && + (red1 == RED_INACTIVE || red1 == RED_ACTIVE) && + (red2 == RED_INACTIVE || red2 == RED_ACTIVE)) + continue; + + printk(KERN_ERR "slab %s: redzone mismatch in slabp %p, objp %p, bufctl 0x%x\n", + cachep->name, slabp, objp, slab_bufctl(slabp)[i]); + print_objinfo(cachep, objp, 2); + } +} + +/* + * Perform a self test on all slabs from a cache + */ +static void check_redzone(kmem_cache_t *cachep) +{ + struct list_head *q; + struct slab *slabp; + + check_spinlock_acquired(cachep); + + list_for_each(q,&cachep->lists.slabs_full) { + slabp = list_entry(q, struct slab, list); + + if (slabp->inuse != cachep->num) { + printk(KERN_INFO "slab %s: wrong slabp found in full slab chain at %p (%d/%d).\n", + cachep->name, slabp, slabp->inuse, cachep->num); + } + check_slabp(cachep, slabp); + check_slabuse(cachep, slabp); + } + list_for_each(q,&cachep->lists.slabs_partial) { + slabp = list_entry(q, struct slab, list); + + if (slabp->inuse == cachep->num || slabp->inuse == 0) { + printk(KERN_INFO "slab %s: wrong slab found in partial chain at %p (%d/%d).\n", + cachep->name, slabp, slabp->inuse, cachep->num); + } + check_slabp(cachep, slabp); + check_slabuse(cachep, slabp); + } + list_for_each(q,&cachep->lists.slabs_free) { + slabp = list_entry(q, struct slab, list); + + if (slabp->inuse != 0) { + printk(KERN_INFO "slab %s: wrong slab found in free chain at %p (%d/%d).\n", + cachep->name, slabp, slabp->inuse, cachep->num); + } + check_slabp(cachep, slabp); + check_slabuse(cachep, slabp); + } +} + +#endif + struct ccupdate_struct { kmem_cache_t *cachep; struct array_cache *new[NR_CPUS]; @@ -2796,6 +2890,12 @@ static void cache_reap(void *unused) drain_array_locked(searchp, ac_data(searchp), 0); +#if DEBUG + if(time_before(searchp->redzonetest, jiffies)) { + searchp->redzonetest = jiffies + REDZONETIMEOUT; + check_redzone(searchp); + } +#endif if(time_after(searchp->lists.next_reap, jiffies)) goto next_unlock; _