From: Manfred Spraul The patch performs the kmalloc cache lookup for constant kmalloc calls at compile time. The idea is that the loop in kmalloc takes a significant amount of time, and for kmalloc(4096,GFP_KERNEL), that lookup can happen entirely at compile time. The problem is the implementation: gcc's brain is lossy, i.e. if(__builtin_constant_t(size)) { if(size < 32) return kmem_cache_alloc(...); if(size < 64) return kmem_cache_alloc(...); if(size < 96) return kmem_cache_alloc(...); if(size < 128) return kmem_cache_alloc(...); ... } doesn't work, because gcc only optimizes the first two or three comparisons, and then suddenly generates code. I've solved that with a switch/case statement, but the source is not pretty. 25-akpm/include/linux/kmalloc_sizes.h | 26 ++++++++++++++++-- 25-akpm/include/linux/slab.h | 47 +++++++++++++++++++++++++++++++++- 25-akpm/kernel/ksyms.c | 3 +- 25-akpm/mm/slab.c | 8 +---- 4 files changed, 73 insertions(+), 11 deletions(-) diff -puN include/linux/kmalloc_sizes.h~fixed-size-kmalloc-speedup include/linux/kmalloc_sizes.h --- 25/include/linux/kmalloc_sizes.h~fixed-size-kmalloc-speedup Mon Jun 9 16:54:24 2003 +++ 25-akpm/include/linux/kmalloc_sizes.h Mon Jun 9 16:54:24 2003 @@ -1,3 +1,14 @@ +/* + * kmalloc cache sizes. + * - CACHE(x) is called for every entry except the last + * - for the last entry, LCACHE is called. LCACHE defaults + * to CACHE. + */ +#ifndef LCACHE +#define LCACHE(x) CACHE(x) +#define __LCACHE_DEFINED +#endif + #if (PAGE_SIZE == 4096) CACHE(32) #endif @@ -18,16 +29,25 @@ CACHE(16384) CACHE(32768) CACHE(65536) +#ifdef CONFIG_MMU + LCACHE(131072) +#else CACHE(131072) -#ifndef CONFIG_MMU CACHE(262144) CACHE(524288) +#ifndef CONFIG_LARGE_ALLOCS + LCACHE(1048576) +#else CACHE(1048576) -#ifdef CONFIG_LARGE_ALLOCS CACHE(2097152) CACHE(4194304) CACHE(8388608) CACHE(16777216) - CACHE(33554432) + LCACHE(33554432) #endif /* CONFIG_LARGE_ALLOCS */ #endif /* CONFIG_MMU */ + +#ifdef __LCACHE_DEFINED +#undef __LCACHE_DEFINIED +#undef LCACHE +#endif diff -puN include/linux/slab.h~fixed-size-kmalloc-speedup include/linux/slab.h --- 25/include/linux/slab.h~fixed-size-kmalloc-speedup Mon Jun 9 16:54:24 2003 +++ 25-akpm/include/linux/slab.h Mon Jun 9 16:54:24 2003 @@ -62,7 +62,52 @@ extern void *kmem_cache_alloc(kmem_cache extern void kmem_cache_free(kmem_cache_t *, void *); extern unsigned int kmem_cache_size(kmem_cache_t *); -extern void *kmalloc(size_t, int); +/* Size description struct for general caches. */ +struct cache_sizes { + size_t cs_size; + kmem_cache_t *cs_cachep; + kmem_cache_t *cs_dmacachep; +}; +extern struct cache_sizes malloc_sizes[]; +extern void *__kmalloc(size_t, int); + +/* + * gcc's brain is lossy: is forgets that a number is known at compile + * time after a few accesses and produces bogus code if a sequence of + * if clauses is used. This is avoided by using select. + */ +static inline void * kmalloc(size_t size, int flags) +{ + if (__builtin_constant_p(size)) { +extern void __you_cannot_kmalloc_that_much(void); + unsigned int i,j; + j = 0; + switch(size) { + case 0 ... +#define CACHE(x) \ + (x): j++; \ + case (x+1) ... +#define LCACHE(x) \ + (x): j++; break; +#include "kmalloc_sizes.h" +#undef CACHE +#undef LCACHE + default: + __you_cannot_kmalloc_that_much(); + } + i = 0; +#define CACHE(x) \ + i++; +#include "kmalloc_sizes.h" +#undef CACHE + return kmem_cache_alloc( (flags & GFP_DMA)? + malloc_sizes[i-j].cs_dmacachep + : malloc_sizes[i-j].cs_cachep, + flags); + } + return __kmalloc(size,flags); +} + extern void kfree(const void *); extern unsigned int ksize(const void *); diff -puN kernel/ksyms.c~fixed-size-kmalloc-speedup kernel/ksyms.c --- 25/kernel/ksyms.c~fixed-size-kmalloc-speedup Mon Jun 9 16:54:24 2003 +++ 25-akpm/kernel/ksyms.c Mon Jun 9 16:54:24 2003 @@ -95,7 +95,8 @@ EXPORT_SYMBOL(kmem_cache_free); EXPORT_SYMBOL(kmem_cache_size); EXPORT_SYMBOL(set_shrinker); EXPORT_SYMBOL(remove_shrinker); -EXPORT_SYMBOL(kmalloc); +EXPORT_SYMBOL(malloc_sizes); +EXPORT_SYMBOL(__kmalloc); EXPORT_SYMBOL(kfree); #ifdef CONFIG_SMP EXPORT_SYMBOL(__alloc_percpu); diff -puN mm/slab.c~fixed-size-kmalloc-speedup mm/slab.c --- 25/mm/slab.c~fixed-size-kmalloc-speedup Mon Jun 9 16:54:24 2003 +++ 25-akpm/mm/slab.c Mon Jun 9 16:54:24 2003 @@ -385,11 +385,7 @@ static int slab_break_gfp_order = BREAK_ #define GET_PAGE_SLAB(pg) ((struct slab *)(pg)->list.prev) /* These are the default caches for kmalloc. Custom caches can have other sizes. */ -static struct cache_sizes { - size_t cs_size; - kmem_cache_t *cs_cachep; - kmem_cache_t *cs_dmacachep; -} malloc_sizes[] = { +struct cache_sizes malloc_sizes[] = { #define CACHE(x) { .cs_size = (x) }, #include { 0, } @@ -1967,7 +1963,7 @@ void * kmem_cache_alloc (kmem_cache_t *c * platforms. For example, on i386, it means that the memory must come * from the first 16MB. */ -void * kmalloc (size_t size, int flags) +void * __kmalloc (size_t size, int flags) { struct cache_sizes *csizep = malloc_sizes; _